kern_clock.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1982, 1986, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)kern_clock.c        8.5 (Berkeley) 1/21/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/9.0/sys/kern/kern_clock.c 225799 2011-09-27 15:08:59Z mav $");
   39 
   40 #include "opt_kdb.h"
   41 #include "opt_device_polling.h"
   42 #include "opt_hwpmc_hooks.h"
   43 #include "opt_ntp.h"
   44 #include "opt_watchdog.h"
   45 
   46 #include <sys/param.h>
   47 #include <sys/systm.h>
   48 #include <sys/callout.h>
   49 #include <sys/kdb.h>
   50 #include <sys/kernel.h>
   51 #include <sys/kthread.h>
   52 #include <sys/ktr.h>
   53 #include <sys/lock.h>
   54 #include <sys/mutex.h>
   55 #include <sys/proc.h>
   56 #include <sys/resource.h>
   57 #include <sys/resourcevar.h>
   58 #include <sys/sched.h>
   59 #include <sys/signalvar.h>
   60 #include <sys/sleepqueue.h>
   61 #include <sys/smp.h>
   62 #include <vm/vm.h>
   63 #include <vm/pmap.h>
   64 #include <vm/vm_map.h>
   65 #include <sys/sysctl.h>
   66 #include <sys/bus.h>
   67 #include <sys/interrupt.h>
   68 #include <sys/limits.h>
   69 #include <sys/timetc.h>
   70 
   71 #ifdef GPROF
   72 #include <sys/gmon.h>
   73 #endif
   74 
   75 #ifdef HWPMC_HOOKS
   76 #include <sys/pmckern.h>
   77 #endif
   78 
   79 #ifdef DEVICE_POLLING
   80 extern void hardclock_device_poll(void);
   81 #endif /* DEVICE_POLLING */
   82 
   83 static void initclocks(void *dummy);
   84 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL);
   85 
   86 /* Spin-lock protecting profiling statistics. */
   87 static struct mtx time_lock;
   88 
   89 static int
   90 sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS)
   91 {
   92         int error;
   93         long cp_time[CPUSTATES];
   94 #ifdef SCTL_MASK32
   95         int i;
   96         unsigned int cp_time32[CPUSTATES];
   97 #endif
   98 
   99         read_cpu_time(cp_time);
  100 #ifdef SCTL_MASK32
  101         if (req->flags & SCTL_MASK32) {
  102                 if (!req->oldptr)
  103                         return SYSCTL_OUT(req, 0, sizeof(cp_time32));
  104                 for (i = 0; i < CPUSTATES; i++)
  105                         cp_time32[i] = (unsigned int)cp_time[i];
  106                 error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
  107         } else
  108 #endif
  109         {
  110                 if (!req->oldptr)
  111                         return SYSCTL_OUT(req, 0, sizeof(cp_time));
  112                 error = SYSCTL_OUT(req, cp_time, sizeof(cp_time));
  113         }
  114         return error;
  115 }
  116 
  117 SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
  118     0,0, sysctl_kern_cp_time, "LU", "CPU time statistics");
  119 
  120 static long empty[CPUSTATES];
  121 
  122 static int
  123 sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS)
  124 {
  125         struct pcpu *pcpu;
  126         int error;
  127         int c;
  128         long *cp_time;
  129 #ifdef SCTL_MASK32
  130         unsigned int cp_time32[CPUSTATES];
  131         int i;
  132 #endif
  133 
  134         if (!req->oldptr) {
  135 #ifdef SCTL_MASK32
  136                 if (req->flags & SCTL_MASK32)
  137                         return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1));
  138                 else
  139 #endif
  140                         return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1));
  141         }
  142         for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) {
  143                 if (!CPU_ABSENT(c)) {
  144                         pcpu = pcpu_find(c);
  145                         cp_time = pcpu->pc_cp_time;
  146                 } else {
  147                         cp_time = empty;
  148                 }
  149 #ifdef SCTL_MASK32
  150                 if (req->flags & SCTL_MASK32) {
  151                         for (i = 0; i < CPUSTATES; i++)
  152                                 cp_time32[i] = (unsigned int)cp_time[i];
  153                         error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
  154                 } else
  155 #endif
  156                         error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES);
  157         }
  158         return error;
  159 }
  160 
  161 SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
  162     0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
  163 
  164 #ifdef DEADLKRES
  165 static const char *blessed[] = {
  166         "getblk",
  167         "so_snd_sx",
  168         "so_rcv_sx",
  169         NULL
  170 };
  171 static int slptime_threshold = 1800;
  172 static int blktime_threshold = 900;
  173 static int sleepfreq = 3;
  174 
  175 static void
  176 deadlkres(void)
  177 {
  178         struct proc *p;
  179         struct thread *td;
  180         void *wchan;
  181         int blkticks, i, slpticks, slptype, tryl, tticks;
  182 
  183         tryl = 0;
  184         for (;;) {
  185                 blkticks = blktime_threshold * hz;
  186                 slpticks = slptime_threshold * hz;
  187 
  188                 /*
  189                  * Avoid to sleep on the sx_lock in order to avoid a possible
  190                  * priority inversion problem leading to starvation.
  191                  * If the lock can't be held after 100 tries, panic.
  192                  */
  193                 if (!sx_try_slock(&allproc_lock)) {
  194                         if (tryl > 100)
  195                 panic("%s: possible deadlock detected on allproc_lock\n",
  196                                     __func__);
  197                         tryl++;
  198                         pause("allproc", sleepfreq * hz);
  199                         continue;
  200                 }
  201                 tryl = 0;
  202                 FOREACH_PROC_IN_SYSTEM(p) {
  203                         PROC_LOCK(p);
  204                         if (p->p_state == PRS_NEW) {
  205                                 PROC_UNLOCK(p);
  206                                 continue;
  207                         }
  208                         FOREACH_THREAD_IN_PROC(p, td) {
  209 
  210                                 /*
  211                                  * Once a thread is found in "interesting"
  212                                  * state a possible ticks wrap-up needs to be
  213                                  * checked.
  214                                  */
  215                                 thread_lock(td);
  216                                 if (TD_ON_LOCK(td) && ticks < td->td_blktick) {
  217 
  218                                         /*
  219                                          * The thread should be blocked on a
  220                                          * turnstile, simply check if the
  221                                          * turnstile channel is in good state.
  222                                          */
  223                                         MPASS(td->td_blocked != NULL);
  224 
  225                                         tticks = ticks - td->td_blktick;
  226                                         thread_unlock(td);
  227                                         if (tticks > blkticks) {
  228 
  229                                                 /*
  230                                                  * Accordingly with provided
  231                                                  * thresholds, this thread is
  232                                                  * stuck for too long on a
  233                                                  * turnstile.
  234                                                  */
  235                                                 PROC_UNLOCK(p);
  236                                                 sx_sunlock(&allproc_lock);
  237         panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
  238                                                     __func__, td, tticks);
  239                                         }
  240                                 } else if (TD_IS_SLEEPING(td) &&
  241                                     TD_ON_SLEEPQ(td) &&
  242                                     ticks < td->td_blktick) {
  243 
  244                                         /*
  245                                          * Check if the thread is sleeping on a
  246                                          * lock, otherwise skip the check.
  247                                          * Drop the thread lock in order to
  248                                          * avoid a LOR with the sleepqueue
  249                                          * spinlock.
  250                                          */
  251                                         wchan = td->td_wchan;
  252                                         tticks = ticks - td->td_slptick;
  253                                         thread_unlock(td);
  254                                         slptype = sleepq_type(wchan);
  255                                         if ((slptype == SLEEPQ_SX ||
  256                                             slptype == SLEEPQ_LK) &&
  257                                             tticks > slpticks) {
  258 
  259                                                 /*
  260                                                  * Accordingly with provided
  261                                                  * thresholds, this thread is
  262                                                  * stuck for too long on a
  263                                                  * sleepqueue.
  264                                                  * However, being on a
  265                                                  * sleepqueue, we might still
  266                                                  * check for the blessed
  267                                                  * list.
  268                                                  */
  269                                                 tryl = 0;
  270                                                 for (i = 0; blessed[i] != NULL;
  271                                                     i++) {
  272                                                         if (!strcmp(blessed[i],
  273                                                             td->td_wmesg)) {
  274                                                                 tryl = 1;
  275                                                                 break;
  276                                                         }
  277                                                 }
  278                                                 if (tryl != 0) {
  279                                                         tryl = 0;
  280                                                         continue;
  281                                                 }
  282                                                 PROC_UNLOCK(p);
  283                                                 sx_sunlock(&allproc_lock);
  284         panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
  285                                                     __func__, td, tticks);
  286                                         }
  287                                 } else
  288                                         thread_unlock(td);
  289                         }
  290                         PROC_UNLOCK(p);
  291                 }
  292                 sx_sunlock(&allproc_lock);
  293 
  294                 /* Sleep for sleepfreq seconds. */
  295                 pause("-", sleepfreq * hz);
  296         }
  297 }
  298 
  299 static struct kthread_desc deadlkres_kd = {
  300         "deadlkres",
  301         deadlkres,
  302         (struct thread **)NULL
  303 };
  304 
  305 SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
  306 
  307 SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, "Deadlock resolver");
  308 SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW,
  309     &slptime_threshold, 0,
  310     "Number of seconds within is valid to sleep on a sleepqueue");
  311 SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW,
  312     &blktime_threshold, 0,
  313     "Number of seconds within is valid to block on a turnstile");
  314 SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0,
  315     "Number of seconds between any deadlock resolver thread run");
  316 #endif  /* DEADLKRES */
  317 
  318 void
  319 read_cpu_time(long *cp_time)
  320 {
  321         struct pcpu *pc;
  322         int i, j;
  323 
  324         /* Sum up global cp_time[]. */
  325         bzero(cp_time, sizeof(long) * CPUSTATES);
  326         CPU_FOREACH(i) {
  327                 pc = pcpu_find(i);
  328                 for (j = 0; j < CPUSTATES; j++)
  329                         cp_time[j] += pc->pc_cp_time[j];
  330         }
  331 }
  332 
  333 #ifdef SW_WATCHDOG
  334 #include <sys/watchdog.h>
  335 
  336 static int watchdog_ticks;
  337 static int watchdog_enabled;
  338 static void watchdog_fire(void);
  339 static void watchdog_config(void *, u_int, int *);
  340 #endif /* SW_WATCHDOG */
  341 
  342 /*
  343  * Clock handling routines.
  344  *
  345  * This code is written to operate with two timers that run independently of
  346  * each other.
  347  *
  348  * The main timer, running hz times per second, is used to trigger interval
  349  * timers, timeouts and rescheduling as needed.
  350  *
  351  * The second timer handles kernel and user profiling,
  352  * and does resource use estimation.  If the second timer is programmable,
  353  * it is randomized to avoid aliasing between the two clocks.  For example,
  354  * the randomization prevents an adversary from always giving up the cpu
  355  * just before its quantum expires.  Otherwise, it would never accumulate
  356  * cpu ticks.  The mean frequency of the second timer is stathz.
  357  *
  358  * If no second timer exists, stathz will be zero; in this case we drive
  359  * profiling and statistics off the main clock.  This WILL NOT be accurate;
  360  * do not do it unless absolutely necessary.
  361  *
  362  * The statistics clock may (or may not) be run at a higher rate while
  363  * profiling.  This profile clock runs at profhz.  We require that profhz
  364  * be an integral multiple of stathz.
  365  *
  366  * If the statistics clock is running fast, it must be divided by the ratio
  367  * profhz/stathz for statistics.  (For profiling, every tick counts.)
  368  *
  369  * Time-of-day is maintained using a "timecounter", which may or may
  370  * not be related to the hardware generating the above mentioned
  371  * interrupts.
  372  */
  373 
  374 int     stathz;
  375 int     profhz;
  376 int     profprocs;
  377 int     ticks;
  378 int     psratio;
  379 
  380 static DPCPU_DEFINE(int, pcputicks);    /* Per-CPU version of ticks. */
  381 static int global_hardclock_run = 0;
  382 
  383 /*
  384  * Initialize clock frequencies and start both clocks running.
  385  */
  386 /* ARGSUSED*/
  387 static void
  388 initclocks(dummy)
  389         void *dummy;
  390 {
  391         register int i;
  392 
  393         /*
  394          * Set divisors to 1 (normal case) and let the machine-specific
  395          * code do its bit.
  396          */
  397         mtx_init(&time_lock, "time lock", NULL, MTX_DEF);
  398         cpu_initclocks();
  399 
  400         /*
  401          * Compute profhz/stathz, and fix profhz if needed.
  402          */
  403         i = stathz ? stathz : hz;
  404         if (profhz == 0)
  405                 profhz = i;
  406         psratio = profhz / i;
  407 #ifdef SW_WATCHDOG
  408         EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0);
  409 #endif
  410 }
  411 
  412 /*
  413  * Each time the real-time timer fires, this function is called on all CPUs.
  414  * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only
  415  * the other CPUs in the system need to call this function.
  416  */
  417 void
  418 hardclock_cpu(int usermode)
  419 {
  420         struct pstats *pstats;
  421         struct thread *td = curthread;
  422         struct proc *p = td->td_proc;
  423         int flags;
  424 
  425         /*
  426          * Run current process's virtual and profile time, as needed.
  427          */
  428         pstats = p->p_stats;
  429         flags = 0;
  430         if (usermode &&
  431             timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
  432                 PROC_SLOCK(p);
  433                 if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
  434                         flags |= TDF_ALRMPEND | TDF_ASTPENDING;
  435                 PROC_SUNLOCK(p);
  436         }
  437         if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
  438                 PROC_SLOCK(p);
  439                 if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
  440                         flags |= TDF_PROFPEND | TDF_ASTPENDING;
  441                 PROC_SUNLOCK(p);
  442         }
  443         thread_lock(td);
  444         sched_tick(1);
  445         td->td_flags |= flags;
  446         thread_unlock(td);
  447 
  448 #ifdef  HWPMC_HOOKS
  449         if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
  450                 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
  451 #endif
  452         callout_tick();
  453 }
  454 
  455 /*
  456  * The real-time timer, interrupting hz times per second.
  457  */
  458 void
  459 hardclock(int usermode, uintfptr_t pc)
  460 {
  461 
  462         atomic_add_int((volatile int *)&ticks, 1);
  463         hardclock_cpu(usermode);
  464         tc_ticktock(1);
  465         cpu_tick_calibration();
  466         /*
  467          * If no separate statistics clock is available, run it from here.
  468          *
  469          * XXX: this only works for UP
  470          */
  471         if (stathz == 0) {
  472                 profclock(usermode, pc);
  473                 statclock(usermode);
  474         }
  475 #ifdef DEVICE_POLLING
  476         hardclock_device_poll();        /* this is very short and quick */
  477 #endif /* DEVICE_POLLING */
  478 #ifdef SW_WATCHDOG
  479         if (watchdog_enabled > 0 && --watchdog_ticks <= 0)
  480                 watchdog_fire();
  481 #endif /* SW_WATCHDOG */
  482 }
  483 
  484 void
  485 hardclock_anycpu(int cnt, int usermode)
  486 {
  487         struct pstats *pstats;
  488         struct thread *td = curthread;
  489         struct proc *p = td->td_proc;
  490         int *t = DPCPU_PTR(pcputicks);
  491         int flags, global, newticks;
  492 #ifdef SW_WATCHDOG
  493         int i;
  494 #endif /* SW_WATCHDOG */
  495 
  496         /*
  497          * Update per-CPU and possibly global ticks values.
  498          */
  499         *t += cnt;
  500         do {
  501                 global = ticks;
  502                 newticks = *t - global;
  503                 if (newticks <= 0) {
  504                         if (newticks < -1)
  505                                 *t = global - 1;
  506                         newticks = 0;
  507                         break;
  508                 }
  509         } while (!atomic_cmpset_int(&ticks, global, *t));
  510 
  511         /*
  512          * Run current process's virtual and profile time, as needed.
  513          */
  514         pstats = p->p_stats;
  515         flags = 0;
  516         if (usermode &&
  517             timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
  518                 PROC_SLOCK(p);
  519                 if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL],
  520                     tick * cnt) == 0)
  521                         flags |= TDF_ALRMPEND | TDF_ASTPENDING;
  522                 PROC_SUNLOCK(p);
  523         }
  524         if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
  525                 PROC_SLOCK(p);
  526                 if (itimerdecr(&pstats->p_timer[ITIMER_PROF],
  527                     tick * cnt) == 0)
  528                         flags |= TDF_PROFPEND | TDF_ASTPENDING;
  529                 PROC_SUNLOCK(p);
  530         }
  531         thread_lock(td);
  532         sched_tick(cnt);
  533         td->td_flags |= flags;
  534         thread_unlock(td);
  535 
  536 #ifdef  HWPMC_HOOKS
  537         if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
  538                 PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
  539 #endif
  540         callout_tick();
  541         /* We are in charge to handle this tick duty. */
  542         if (newticks > 0) {
  543                 /* Dangerous and no need to call these things concurrently. */
  544                 if (atomic_cmpset_acq_int(&global_hardclock_run, 0, 1)) {
  545                         tc_ticktock(newticks);
  546 #ifdef DEVICE_POLLING
  547                         /* This is very short and quick. */
  548                         hardclock_device_poll();
  549 #endif /* DEVICE_POLLING */
  550                         atomic_store_rel_int(&global_hardclock_run, 0);
  551                 }
  552 #ifdef SW_WATCHDOG
  553                 if (watchdog_enabled > 0) {
  554                         i = atomic_fetchadd_int(&watchdog_ticks, -newticks);
  555                         if (i > 0 && i <= newticks)
  556                                 watchdog_fire();
  557                 }
  558 #endif /* SW_WATCHDOG */
  559         }
  560         if (curcpu == CPU_FIRST())
  561                 cpu_tick_calibration();
  562 }
  563 
  564 void
  565 hardclock_sync(int cpu)
  566 {
  567         int     *t = DPCPU_ID_PTR(cpu, pcputicks);
  568 
  569         *t = ticks;
  570 }
  571 
  572 /*
  573  * Compute number of ticks in the specified amount of time.
  574  */
  575 int
  576 tvtohz(tv)
  577         struct timeval *tv;
  578 {
  579         register unsigned long ticks;
  580         register long sec, usec;
  581 
  582         /*
  583          * If the number of usecs in the whole seconds part of the time
  584          * difference fits in a long, then the total number of usecs will
  585          * fit in an unsigned long.  Compute the total and convert it to
  586          * ticks, rounding up and adding 1 to allow for the current tick
  587          * to expire.  Rounding also depends on unsigned long arithmetic
  588          * to avoid overflow.
  589          *
  590          * Otherwise, if the number of ticks in the whole seconds part of
  591          * the time difference fits in a long, then convert the parts to
  592          * ticks separately and add, using similar rounding methods and
  593          * overflow avoidance.  This method would work in the previous
  594          * case but it is slightly slower and assumes that hz is integral.
  595          *
  596          * Otherwise, round the time difference down to the maximum
  597          * representable value.
  598          *
  599          * If ints have 32 bits, then the maximum value for any timeout in
  600          * 10ms ticks is 248 days.
  601          */
  602         sec = tv->tv_sec;
  603         usec = tv->tv_usec;
  604         if (usec < 0) {
  605                 sec--;
  606                 usec += 1000000;
  607         }
  608         if (sec < 0) {
  609 #ifdef DIAGNOSTIC
  610                 if (usec > 0) {
  611                         sec++;
  612                         usec -= 1000000;
  613                 }
  614                 printf("tvotohz: negative time difference %ld sec %ld usec\n",
  615                        sec, usec);
  616 #endif
  617                 ticks = 1;
  618         } else if (sec <= LONG_MAX / 1000000)
  619                 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
  620                         / tick + 1;
  621         else if (sec <= LONG_MAX / hz)
  622                 ticks = sec * hz
  623                         + ((unsigned long)usec + (tick - 1)) / tick + 1;
  624         else
  625                 ticks = LONG_MAX;
  626         if (ticks > INT_MAX)
  627                 ticks = INT_MAX;
  628         return ((int)ticks);
  629 }
  630 
  631 /*
  632  * Start profiling on a process.
  633  *
  634  * Kernel profiling passes proc0 which never exits and hence
  635  * keeps the profile clock running constantly.
  636  */
  637 void
  638 startprofclock(p)
  639         register struct proc *p;
  640 {
  641 
  642         PROC_LOCK_ASSERT(p, MA_OWNED);
  643         if (p->p_flag & P_STOPPROF)
  644                 return;
  645         if ((p->p_flag & P_PROFIL) == 0) {
  646                 p->p_flag |= P_PROFIL;
  647                 mtx_lock(&time_lock);
  648                 if (++profprocs == 1)
  649                         cpu_startprofclock();
  650                 mtx_unlock(&time_lock);
  651         }
  652 }
  653 
  654 /*
  655  * Stop profiling on a process.
  656  */
  657 void
  658 stopprofclock(p)
  659         register struct proc *p;
  660 {
  661 
  662         PROC_LOCK_ASSERT(p, MA_OWNED);
  663         if (p->p_flag & P_PROFIL) {
  664                 if (p->p_profthreads != 0) {
  665                         p->p_flag |= P_STOPPROF;
  666                         while (p->p_profthreads != 0)
  667                                 msleep(&p->p_profthreads, &p->p_mtx, PPAUSE,
  668                                     "stopprof", 0);
  669                         p->p_flag &= ~P_STOPPROF;
  670                 }
  671                 if ((p->p_flag & P_PROFIL) == 0)
  672                         return;
  673                 p->p_flag &= ~P_PROFIL;
  674                 mtx_lock(&time_lock);
  675                 if (--profprocs == 0)
  676                         cpu_stopprofclock();
  677                 mtx_unlock(&time_lock);
  678         }
  679 }
  680 
  681 /*
  682  * Statistics clock.  Updates rusage information and calls the scheduler
  683  * to adjust priorities of the active thread.
  684  *
  685  * This should be called by all active processors.
  686  */
  687 void
  688 statclock(int usermode)
  689 {
  690         struct rusage *ru;
  691         struct vmspace *vm;
  692         struct thread *td;
  693         struct proc *p;
  694         long rss;
  695         long *cp_time;
  696 
  697         td = curthread;
  698         p = td->td_proc;
  699 
  700         cp_time = (long *)PCPU_PTR(cp_time);
  701         if (usermode) {
  702                 /*
  703                  * Charge the time as appropriate.
  704                  */
  705                 td->td_uticks++;
  706                 if (p->p_nice > NZERO)
  707                         cp_time[CP_NICE]++;
  708                 else
  709                         cp_time[CP_USER]++;
  710         } else {
  711                 /*
  712                  * Came from kernel mode, so we were:
  713                  * - handling an interrupt,
  714                  * - doing syscall or trap work on behalf of the current
  715                  *   user process, or
  716                  * - spinning in the idle loop.
  717                  * Whichever it is, charge the time as appropriate.
  718                  * Note that we charge interrupts to the current process,
  719                  * regardless of whether they are ``for'' that process,
  720                  * so that we know how much of its real time was spent
  721                  * in ``non-process'' (i.e., interrupt) work.
  722                  */
  723                 if ((td->td_pflags & TDP_ITHREAD) ||
  724                     td->td_intr_nesting_level >= 2) {
  725                         td->td_iticks++;
  726                         cp_time[CP_INTR]++;
  727                 } else {
  728                         td->td_pticks++;
  729                         td->td_sticks++;
  730                         if (!TD_IS_IDLETHREAD(td))
  731                                 cp_time[CP_SYS]++;
  732                         else
  733                                 cp_time[CP_IDLE]++;
  734                 }
  735         }
  736 
  737         /* Update resource usage integrals and maximums. */
  738         MPASS(p->p_vmspace != NULL);
  739         vm = p->p_vmspace;
  740         ru = &td->td_ru;
  741         ru->ru_ixrss += pgtok(vm->vm_tsize);
  742         ru->ru_idrss += pgtok(vm->vm_dsize);
  743         ru->ru_isrss += pgtok(vm->vm_ssize);
  744         rss = pgtok(vmspace_resident_count(vm));
  745         if (ru->ru_maxrss < rss)
  746                 ru->ru_maxrss = rss;
  747         KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock",
  748             "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz);
  749         thread_lock_flags(td, MTX_QUIET);
  750         sched_clock(td);
  751         thread_unlock(td);
  752 }
  753 
  754 void
  755 profclock(int usermode, uintfptr_t pc)
  756 {
  757         struct thread *td;
  758 #ifdef GPROF
  759         struct gmonparam *g;
  760         uintfptr_t i;
  761 #endif
  762 
  763         td = curthread;
  764         if (usermode) {
  765                 /*
  766                  * Came from user mode; CPU was in user state.
  767                  * If this process is being profiled, record the tick.
  768                  * if there is no related user location yet, don't
  769                  * bother trying to count it.
  770                  */
  771                 if (td->td_proc->p_flag & P_PROFIL)
  772                         addupc_intr(td, pc, 1);
  773         }
  774 #ifdef GPROF
  775         else {
  776                 /*
  777                  * Kernel statistics are just like addupc_intr, only easier.
  778                  */
  779                 g = &_gmonparam;
  780                 if (g->state == GMON_PROF_ON && pc >= g->lowpc) {
  781                         i = PC_TO_I(g, pc);
  782                         if (i < g->textsize) {
  783                                 KCOUNT(g, i)++;
  784                         }
  785                 }
  786         }
  787 #endif
  788 }
  789 
  790 /*
  791  * Return information about system clocks.
  792  */
  793 static int
  794 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
  795 {
  796         struct clockinfo clkinfo;
  797         /*
  798          * Construct clockinfo structure.
  799          */
  800         bzero(&clkinfo, sizeof(clkinfo));
  801         clkinfo.hz = hz;
  802         clkinfo.tick = tick;
  803         clkinfo.profhz = profhz;
  804         clkinfo.stathz = stathz ? stathz : hz;
  805         return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
  806 }
  807 
  808 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate,
  809         CTLTYPE_STRUCT|CTLFLAG_RD|CTLFLAG_MPSAFE,
  810         0, 0, sysctl_kern_clockrate, "S,clockinfo",
  811         "Rate and period of various kernel clocks");
  812 
  813 #ifdef SW_WATCHDOG
  814 
  815 static void
  816 watchdog_config(void *unused __unused, u_int cmd, int *error)
  817 {
  818         u_int u;
  819 
  820         u = cmd & WD_INTERVAL;
  821         if (u >= WD_TO_1SEC) {
  822                 watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz;
  823                 watchdog_enabled = 1;
  824                 *error = 0;
  825         } else {
  826                 watchdog_enabled = 0;
  827         }
  828 }
  829 
  830 /*
  831  * Handle a watchdog timeout by dumping interrupt information and
  832  * then either dropping to DDB or panicking.
  833  */
  834 static void
  835 watchdog_fire(void)
  836 {
  837         int nintr;
  838         uint64_t inttotal;
  839         u_long *curintr;
  840         char *curname;
  841 
  842         curintr = intrcnt;
  843         curname = intrnames;
  844         inttotal = 0;
  845         nintr = sintrcnt / sizeof(u_long);
  846 
  847         printf("interrupt                   total\n");
  848         while (--nintr >= 0) {
  849                 if (*curintr)
  850                         printf("%-12s %20lu\n", curname, *curintr);
  851                 curname += strlen(curname) + 1;
  852                 inttotal += *curintr++;
  853         }
  854         printf("Total        %20ju\n", (uintmax_t)inttotal);
  855 
  856 #if defined(KDB) && !defined(KDB_UNATTENDED)
  857         kdb_backtrace();
  858         kdb_enter(KDB_WHY_WATCHDOG, "watchdog timeout");
  859 #else
  860         panic("watchdog timeout");
  861 #endif
  862 }
  863 
  864 #endif /* SW_WATCHDOG */
Cache object: 35b9ee2035cfc1156d71a2090372131d
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/kern_clock.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_clock.c