The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_timeout.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * (c) UNIX System Laboratories, Inc.
    7  * All or some portions of this file are derived from material licensed
    8  * to the University of California by American Telephone and Telegraph
    9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   10  * the permission of UNIX System Laboratories, Inc.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      From: @(#)kern_clock.c  8.5 (Berkeley) 1/21/94
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 __FBSDID("$FreeBSD$");
   41 
   42 #include "opt_callout_profiling.h"
   43 #include "opt_ddb.h"
   44 #include "opt_rss.h"
   45 
   46 #include <sys/param.h>
   47 #include <sys/systm.h>
   48 #include <sys/bus.h>
   49 #include <sys/callout.h>
   50 #include <sys/domainset.h>
   51 #include <sys/file.h>
   52 #include <sys/interrupt.h>
   53 #include <sys/kernel.h>
   54 #include <sys/ktr.h>
   55 #include <sys/kthread.h>
   56 #include <sys/lock.h>
   57 #include <sys/malloc.h>
   58 #include <sys/mutex.h>
   59 #include <sys/proc.h>
   60 #include <sys/random.h>
   61 #include <sys/sched.h>
   62 #include <sys/sdt.h>
   63 #include <sys/sleepqueue.h>
   64 #include <sys/sysctl.h>
   65 #include <sys/smp.h>
   66 #include <sys/unistd.h>
   67 
   68 #ifdef DDB
   69 #include <ddb/ddb.h>
   70 #include <ddb/db_sym.h>
   71 #include <machine/_inttypes.h>
   72 #endif
   73 
   74 #ifdef SMP
   75 #include <machine/cpu.h>
   76 #endif
   77 
   78 DPCPU_DECLARE(sbintime_t, hardclocktime);
   79 
   80 SDT_PROVIDER_DEFINE(callout_execute);
   81 SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *");
   82 SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *");
   83 
   84 static void     softclock_thread(void *arg);
   85 
   86 #ifdef CALLOUT_PROFILING
   87 static int avg_depth;
   88 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
   89     "Average number of items examined per softclock call. Units = 1/1000");
   90 static int avg_gcalls;
   91 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
   92     "Average number of Giant callouts made per softclock call. Units = 1/1000");
   93 static int avg_lockcalls;
   94 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
   95     "Average number of lock callouts made per softclock call. Units = 1/1000");
   96 static int avg_mpcalls;
   97 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
   98     "Average number of MP callouts made per softclock call. Units = 1/1000");
   99 static int avg_depth_dir;
  100 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
  101     "Average number of direct callouts examined per callout_process call. "
  102     "Units = 1/1000");
  103 static int avg_lockcalls_dir;
  104 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
  105     &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
  106     "callout_process call. Units = 1/1000");
  107 static int avg_mpcalls_dir;
  108 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
  109     0, "Average number of MP direct callouts made per callout_process call. "
  110     "Units = 1/1000");
  111 #endif
  112 
  113 static int ncallout;
  114 SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0,
  115     "Number of entries in callwheel and size of timeout() preallocation");
  116 
  117 #ifdef  RSS
  118 static int pin_default_swi = 1;
  119 static int pin_pcpu_swi = 1;
  120 #else
  121 static int pin_default_swi = 0;
  122 static int pin_pcpu_swi = 0;
  123 #endif
  124 
  125 SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi,
  126     0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)");
  127 SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi,
  128     0, "Pin the per-CPU swis (except PCPU 0, which is also default)");
  129 
  130 /*
  131  * TODO:
  132  *      allocate more timeout table slots when table overflows.
  133  */
  134 static u_int __read_mostly callwheelsize;
  135 static u_int __read_mostly callwheelmask;
  136 
  137 /*
  138  * The callout cpu exec entities represent informations necessary for
  139  * describing the state of callouts currently running on the CPU and the ones
  140  * necessary for migrating callouts to the new callout cpu. In particular,
  141  * the first entry of the array cc_exec_entity holds informations for callout
  142  * running in SWI thread context, while the second one holds informations
  143  * for callout running directly from hardware interrupt context.
  144  * The cached informations are very important for deferring migration when
  145  * the migrating callout is already running.
  146  */
  147 struct cc_exec {
  148         struct callout          *cc_curr;
  149         callout_func_t          *cc_drain;
  150         void                    *cc_last_func;
  151         void                    *cc_last_arg;
  152 #ifdef SMP
  153         callout_func_t          *ce_migration_func;
  154         void                    *ce_migration_arg;
  155         sbintime_t              ce_migration_time;
  156         sbintime_t              ce_migration_prec;
  157         int                     ce_migration_cpu;
  158 #endif
  159         bool                    cc_cancel;
  160         bool                    cc_waiting;
  161 };
  162 
  163 /*
  164  * There is one struct callout_cpu per cpu, holding all relevant
  165  * state for the callout processing thread on the individual CPU.
  166  */
  167 struct callout_cpu {
  168         struct mtx_padalign     cc_lock;
  169         struct cc_exec          cc_exec_entity[2];
  170         struct callout          *cc_next;
  171         struct callout_list     *cc_callwheel;
  172         struct callout_tailq    cc_expireq;
  173         sbintime_t              cc_firstevent;
  174         sbintime_t              cc_lastscan;
  175         struct thread           *cc_thread;
  176         u_int                   cc_bucket;
  177 #ifdef KTR
  178         char                    cc_ktr_event_name[20];
  179 #endif
  180 };
  181 
  182 #define callout_migrating(c)    ((c)->c_iflags & CALLOUT_DFRMIGRATION)
  183 
  184 #define cc_exec_curr(cc, dir)           cc->cc_exec_entity[dir].cc_curr
  185 #define cc_exec_last_func(cc, dir)      cc->cc_exec_entity[dir].cc_last_func
  186 #define cc_exec_last_arg(cc, dir)       cc->cc_exec_entity[dir].cc_last_arg
  187 #define cc_exec_drain(cc, dir)          cc->cc_exec_entity[dir].cc_drain
  188 #define cc_exec_next(cc)                cc->cc_next
  189 #define cc_exec_cancel(cc, dir)         cc->cc_exec_entity[dir].cc_cancel
  190 #define cc_exec_waiting(cc, dir)        cc->cc_exec_entity[dir].cc_waiting
  191 #ifdef SMP
  192 #define cc_migration_func(cc, dir)      cc->cc_exec_entity[dir].ce_migration_func
  193 #define cc_migration_arg(cc, dir)       cc->cc_exec_entity[dir].ce_migration_arg
  194 #define cc_migration_cpu(cc, dir)       cc->cc_exec_entity[dir].ce_migration_cpu
  195 #define cc_migration_time(cc, dir)      cc->cc_exec_entity[dir].ce_migration_time
  196 #define cc_migration_prec(cc, dir)      cc->cc_exec_entity[dir].ce_migration_prec
  197 
  198 static struct callout_cpu cc_cpu[MAXCPU];
  199 #define CPUBLOCK        MAXCPU
  200 #define CC_CPU(cpu)     (&cc_cpu[(cpu)])
  201 #define CC_SELF()       CC_CPU(PCPU_GET(cpuid))
  202 #else
  203 static struct callout_cpu cc_cpu;
  204 #define CC_CPU(cpu)     (&cc_cpu)
  205 #define CC_SELF()       (&cc_cpu)
  206 #endif
  207 #define CC_LOCK(cc)     mtx_lock_spin(&(cc)->cc_lock)
  208 #define CC_UNLOCK(cc)   mtx_unlock_spin(&(cc)->cc_lock)
  209 #define CC_LOCK_ASSERT(cc)      mtx_assert(&(cc)->cc_lock, MA_OWNED)
  210 
  211 static int __read_mostly cc_default_cpu;
  212 
  213 static void     callout_cpu_init(struct callout_cpu *cc, int cpu);
  214 static void     softclock_call_cc(struct callout *c, struct callout_cpu *cc,
  215 #ifdef CALLOUT_PROFILING
  216                     int *mpcalls, int *lockcalls, int *gcalls,
  217 #endif
  218                     int direct);
  219 
  220 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
  221 
  222 /**
  223  * Locked by cc_lock:
  224  *   cc_curr         - If a callout is in progress, it is cc_curr.
  225  *                     If cc_curr is non-NULL, threads waiting in
  226  *                     callout_drain() will be woken up as soon as the
  227  *                     relevant callout completes.
  228  *   cc_cancel       - Changing to 1 with both callout_lock and cc_lock held
  229  *                     guarantees that the current callout will not run.
  230  *                     The softclock_call_cc() function sets this to 0 before it
  231  *                     drops callout_lock to acquire c_lock, and it calls
  232  *                     the handler only if curr_cancelled is still 0 after
  233  *                     cc_lock is successfully acquired.
  234  *   cc_waiting      - If a thread is waiting in callout_drain(), then
  235  *                     callout_wait is nonzero.  Set only when
  236  *                     cc_curr is non-NULL.
  237  */
  238 
  239 /*
  240  * Resets the execution entity tied to a specific callout cpu.
  241  */
  242 static void
  243 cc_cce_cleanup(struct callout_cpu *cc, int direct)
  244 {
  245 
  246         cc_exec_curr(cc, direct) = NULL;
  247         cc_exec_cancel(cc, direct) = false;
  248         cc_exec_waiting(cc, direct) = false;
  249 #ifdef SMP
  250         cc_migration_cpu(cc, direct) = CPUBLOCK;
  251         cc_migration_time(cc, direct) = 0;
  252         cc_migration_prec(cc, direct) = 0;
  253         cc_migration_func(cc, direct) = NULL;
  254         cc_migration_arg(cc, direct) = NULL;
  255 #endif
  256 }
  257 
  258 /*
  259  * Checks if migration is requested by a specific callout cpu.
  260  */
  261 static int
  262 cc_cce_migrating(struct callout_cpu *cc, int direct)
  263 {
  264 
  265 #ifdef SMP
  266         return (cc_migration_cpu(cc, direct) != CPUBLOCK);
  267 #else
  268         return (0);
  269 #endif
  270 }
  271 
  272 /*
  273  * Kernel low level callwheel initialization
  274  * called on the BSP during kernel startup.
  275  */
  276 static void
  277 callout_callwheel_init(void *dummy)
  278 {
  279         struct callout_cpu *cc;
  280         int cpu;
  281 
  282         /*
  283          * Calculate the size of the callout wheel and the preallocated
  284          * timeout() structures.
  285          * XXX: Clip callout to result of previous function of maxusers
  286          * maximum 384.  This is still huge, but acceptable.
  287          */
  288         ncallout = imin(16 + maxproc + maxfiles, 18508);
  289         TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
  290 
  291         /*
  292          * Calculate callout wheel size, should be next power of two higher
  293          * than 'ncallout'.
  294          */
  295         callwheelsize = 1 << fls(ncallout);
  296         callwheelmask = callwheelsize - 1;
  297 
  298         /*
  299          * Fetch whether we're pinning the swi's or not.
  300          */
  301         TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi);
  302         TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi);
  303 
  304         /*
  305          * Initialize callout wheels.  The software interrupt threads
  306          * are created later.
  307          */
  308         cc_default_cpu = PCPU_GET(cpuid);
  309         CPU_FOREACH(cpu) {
  310                 cc = CC_CPU(cpu);
  311                 callout_cpu_init(cc, cpu);
  312         }
  313 }
  314 SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL);
  315 
  316 /*
  317  * Initialize the per-cpu callout structures.
  318  */
  319 static void
  320 callout_cpu_init(struct callout_cpu *cc, int cpu)
  321 {
  322         int i;
  323 
  324         mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN);
  325         cc->cc_callwheel = malloc_domainset(sizeof(struct callout_list) *
  326             callwheelsize, M_CALLOUT,
  327             DOMAINSET_PREF(pcpu_find(cpu)->pc_domain), M_WAITOK);
  328         for (i = 0; i < callwheelsize; i++)
  329                 LIST_INIT(&cc->cc_callwheel[i]);
  330         TAILQ_INIT(&cc->cc_expireq);
  331         cc->cc_firstevent = SBT_MAX;
  332         for (i = 0; i < 2; i++)
  333                 cc_cce_cleanup(cc, i);
  334 #ifdef KTR
  335         snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
  336             "callwheel cpu %d", cpu);
  337 #endif
  338 }
  339 
  340 #ifdef SMP
  341 /*
  342  * Switches the cpu tied to a specific callout.
  343  * The function expects a locked incoming callout cpu and returns with
  344  * locked outcoming callout cpu.
  345  */
  346 static struct callout_cpu *
  347 callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu)
  348 {
  349         struct callout_cpu *new_cc;
  350 
  351         MPASS(c != NULL && cc != NULL);
  352         CC_LOCK_ASSERT(cc);
  353 
  354         /*
  355          * Avoid interrupts and preemption firing after the callout cpu
  356          * is blocked in order to avoid deadlocks as the new thread
  357          * may be willing to acquire the callout cpu lock.
  358          */
  359         c->c_cpu = CPUBLOCK;
  360         spinlock_enter();
  361         CC_UNLOCK(cc);
  362         new_cc = CC_CPU(new_cpu);
  363         CC_LOCK(new_cc);
  364         spinlock_exit();
  365         c->c_cpu = new_cpu;
  366         return (new_cc);
  367 }
  368 #endif
  369 
  370 /*
  371  * Start softclock threads.
  372  */
  373 static void
  374 start_softclock(void *dummy)
  375 {
  376         struct proc *p;
  377         struct thread *td;
  378         struct callout_cpu *cc;
  379         int cpu, error;
  380         bool pin_swi;
  381 
  382         p = NULL;
  383         CPU_FOREACH(cpu) {
  384                 cc = CC_CPU(cpu);
  385                 error = kproc_kthread_add(softclock_thread, cc, &p, &td,
  386                     RFSTOPPED, 0, "clock", "clock (%d)", cpu);
  387                 if (error != 0)
  388                         panic("failed to create softclock thread for cpu %d: %d",
  389                             cpu, error);
  390                 CC_LOCK(cc);
  391                 cc->cc_thread = td;
  392                 thread_lock(td);
  393                 sched_class(td, PRI_ITHD);
  394                 sched_ithread_prio(td, PI_SOFTCLOCK);
  395                 TD_SET_IWAIT(td);
  396                 thread_lock_set(td, (struct mtx *)&cc->cc_lock);
  397                 thread_unlock(td);
  398                 if (cpu == cc_default_cpu)
  399                         pin_swi = pin_default_swi;
  400                 else
  401                         pin_swi = pin_pcpu_swi;
  402                 if (pin_swi) {
  403                         error = cpuset_setithread(td->td_tid, cpu);
  404                         if (error != 0)
  405                                 printf("%s: %s clock couldn't be pinned to cpu %d: %d\n",
  406                                     __func__, cpu == cc_default_cpu ?
  407                                     "default" : "per-cpu", cpu, error);
  408                 }
  409         }
  410 }
  411 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
  412 
  413 #define CC_HASH_SHIFT   8
  414 
  415 static inline u_int
  416 callout_hash(sbintime_t sbt)
  417 {
  418 
  419         return (sbt >> (32 - CC_HASH_SHIFT));
  420 }
  421 
  422 static inline u_int
  423 callout_get_bucket(sbintime_t sbt)
  424 {
  425 
  426         return (callout_hash(sbt) & callwheelmask);
  427 }
  428 
  429 void
  430 callout_process(sbintime_t now)
  431 {
  432         struct callout_entropy {
  433                 struct callout_cpu *cc;
  434                 struct thread *td;
  435                 sbintime_t now;
  436         } entropy;
  437         struct callout *c, *next;
  438         struct callout_cpu *cc;
  439         struct callout_list *sc;
  440         struct thread *td;
  441         sbintime_t first, last, lookahead, max, tmp_max;
  442         u_int firstb, lastb, nowb;
  443 #ifdef CALLOUT_PROFILING
  444         int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
  445 #endif
  446 
  447         cc = CC_SELF();
  448         mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
  449 
  450         /* Compute the buckets of the last scan and present times. */
  451         firstb = callout_hash(cc->cc_lastscan);
  452         cc->cc_lastscan = now;
  453         nowb = callout_hash(now);
  454 
  455         /* Compute the last bucket and minimum time of the bucket after it. */
  456         if (nowb == firstb)
  457                 lookahead = (SBT_1S / 16);
  458         else if (nowb - firstb == 1)
  459                 lookahead = (SBT_1S / 8);
  460         else
  461                 lookahead = SBT_1S;
  462         first = last = now;
  463         first += (lookahead / 2);
  464         last += lookahead;
  465         last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT));
  466         lastb = callout_hash(last) - 1;
  467         max = last;
  468 
  469         /*
  470          * Check if we wrapped around the entire wheel from the last scan.
  471          * In case, we need to scan entirely the wheel for pending callouts.
  472          */
  473         if (lastb - firstb >= callwheelsize) {
  474                 lastb = firstb + callwheelsize - 1;
  475                 if (nowb - firstb >= callwheelsize)
  476                         nowb = lastb;
  477         }
  478 
  479         /* Iterate callwheel from firstb to nowb and then up to lastb. */
  480         do {
  481                 sc = &cc->cc_callwheel[firstb & callwheelmask];
  482                 LIST_FOREACH_SAFE(c, sc, c_links.le, next) {
  483                         /* Run the callout if present time within allowed. */
  484                         if (c->c_time <= now) {
  485                                 /*
  486                                  * Consumer told us the callout may be run
  487                                  * directly from hardware interrupt context.
  488                                  */
  489                                 if (c->c_iflags & CALLOUT_DIRECT) {
  490 #ifdef CALLOUT_PROFILING
  491                                         ++depth_dir;
  492 #endif
  493                                         cc_exec_next(cc) = next;
  494                                         cc->cc_bucket = firstb & callwheelmask;
  495                                         LIST_REMOVE(c, c_links.le);
  496                                         softclock_call_cc(c, cc,
  497 #ifdef CALLOUT_PROFILING
  498                                             &mpcalls_dir, &lockcalls_dir, NULL,
  499 #endif
  500                                             1);
  501                                         next = cc_exec_next(cc);
  502                                         cc_exec_next(cc) = NULL;
  503                                 } else {
  504                                         LIST_REMOVE(c, c_links.le);
  505                                         TAILQ_INSERT_TAIL(&cc->cc_expireq,
  506                                             c, c_links.tqe);
  507                                         c->c_iflags |= CALLOUT_PROCESSED;
  508                                 }
  509                         } else if (c->c_time >= max) {
  510                                 /*
  511                                  * Skip events in the distant future.
  512                                  */
  513                                 ;
  514                         } else if (c->c_time > last) {
  515                                 /*
  516                                  * Event minimal time is bigger than present
  517                                  * maximal time, so it cannot be aggregated.
  518                                  */
  519                                 lastb = nowb;
  520                         } else {
  521                                 /*
  522                                  * Update first and last time, respecting this
  523                                  * event.
  524                                  */
  525                                 if (c->c_time < first)
  526                                         first = c->c_time;
  527                                 tmp_max = c->c_time + c->c_precision;
  528                                 if (tmp_max < last)
  529                                         last = tmp_max;
  530                         }
  531                 }
  532                 /* Proceed with the next bucket. */
  533                 firstb++;
  534                 /*
  535                  * Stop if we looked after present time and found
  536                  * some event we can't execute at now.
  537                  * Stop if we looked far enough into the future.
  538                  */
  539         } while (((int)(firstb - lastb)) <= 0);
  540         cc->cc_firstevent = last;
  541         cpu_new_callout(curcpu, last, first);
  542 
  543 #ifdef CALLOUT_PROFILING
  544         avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
  545         avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
  546         avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
  547 #endif
  548         if (!TAILQ_EMPTY(&cc->cc_expireq)) {
  549                 entropy.cc = cc;
  550                 entropy.td = curthread;
  551                 entropy.now = now;
  552                 random_harvest_queue(&entropy, sizeof(entropy), RANDOM_CALLOUT);
  553 
  554                 td = cc->cc_thread;
  555                 if (TD_AWAITING_INTR(td)) {
  556                         thread_lock_block_wait(td);
  557                         THREAD_LOCK_ASSERT(td, MA_OWNED);
  558                         TD_CLR_IWAIT(td);
  559                         sched_wakeup(td, SRQ_INTR);
  560                 } else
  561                         mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
  562         } else
  563                 mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
  564 }
  565 
  566 static struct callout_cpu *
  567 callout_lock(struct callout *c)
  568 {
  569         struct callout_cpu *cc;
  570         int cpu;
  571 
  572         for (;;) {
  573                 cpu = c->c_cpu;
  574 #ifdef SMP
  575                 if (cpu == CPUBLOCK) {
  576                         while (c->c_cpu == CPUBLOCK)
  577                                 cpu_spinwait();
  578                         continue;
  579                 }
  580 #endif
  581                 cc = CC_CPU(cpu);
  582                 CC_LOCK(cc);
  583                 if (cpu == c->c_cpu)
  584                         break;
  585                 CC_UNLOCK(cc);
  586         }
  587         return (cc);
  588 }
  589 
  590 static void
  591 callout_cc_add(struct callout *c, struct callout_cpu *cc,
  592     sbintime_t sbt, sbintime_t precision, void (*func)(void *),
  593     void *arg, int flags)
  594 {
  595         int bucket;
  596 
  597         CC_LOCK_ASSERT(cc);
  598         if (sbt < cc->cc_lastscan)
  599                 sbt = cc->cc_lastscan;
  600         c->c_arg = arg;
  601         c->c_iflags |= CALLOUT_PENDING;
  602         c->c_iflags &= ~CALLOUT_PROCESSED;
  603         c->c_flags |= CALLOUT_ACTIVE;
  604         if (flags & C_DIRECT_EXEC)
  605                 c->c_iflags |= CALLOUT_DIRECT;
  606         c->c_func = func;
  607         c->c_time = sbt;
  608         c->c_precision = precision;
  609         bucket = callout_get_bucket(c->c_time);
  610         CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
  611             c, (int)(c->c_precision >> 32),
  612             (u_int)(c->c_precision & 0xffffffff));
  613         LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
  614         if (cc->cc_bucket == bucket)
  615                 cc_exec_next(cc) = c;
  616 
  617         /*
  618          * Inform the eventtimers(4) subsystem there's a new callout
  619          * that has been inserted, but only if really required.
  620          */
  621         if (SBT_MAX - c->c_time < c->c_precision)
  622                 c->c_precision = SBT_MAX - c->c_time;
  623         sbt = c->c_time + c->c_precision;
  624         if (sbt < cc->cc_firstevent) {
  625                 cc->cc_firstevent = sbt;
  626                 cpu_new_callout(c->c_cpu, sbt, c->c_time);
  627         }
  628 }
  629 
  630 static void
  631 softclock_call_cc(struct callout *c, struct callout_cpu *cc,
  632 #ifdef CALLOUT_PROFILING
  633     int *mpcalls, int *lockcalls, int *gcalls,
  634 #endif
  635     int direct)
  636 {
  637         struct rm_priotracker tracker;
  638         callout_func_t *c_func, *drain;
  639         void *c_arg;
  640         struct lock_class *class;
  641         struct lock_object *c_lock;
  642         uintptr_t lock_status;
  643         int c_iflags;
  644 #ifdef SMP
  645         struct callout_cpu *new_cc;
  646         callout_func_t *new_func;
  647         void *new_arg;
  648         int flags, new_cpu;
  649         sbintime_t new_prec, new_time;
  650 #endif
  651 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 
  652         sbintime_t sbt1, sbt2;
  653         struct timespec ts2;
  654         static sbintime_t maxdt = 2 * SBT_1MS;  /* 2 msec */
  655         static callout_func_t *lastfunc;
  656 #endif
  657 
  658         KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING,
  659             ("softclock_call_cc: pend %p %x", c, c->c_iflags));
  660         KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE,
  661             ("softclock_call_cc: act %p %x", c, c->c_flags));
  662         class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
  663         lock_status = 0;
  664         if (c->c_iflags & CALLOUT_SHAREDLOCK) {
  665                 if (class == &lock_class_rm)
  666                         lock_status = (uintptr_t)&tracker;
  667                 else
  668                         lock_status = 1;
  669         }
  670         c_lock = c->c_lock;
  671         c_func = c->c_func;
  672         c_arg = c->c_arg;
  673         c_iflags = c->c_iflags;
  674         c->c_iflags &= ~CALLOUT_PENDING;
  675 
  676         cc_exec_curr(cc, direct) = c;
  677         cc_exec_last_func(cc, direct) = c_func;
  678         cc_exec_last_arg(cc, direct) = c_arg;
  679         cc_exec_cancel(cc, direct) = false;
  680         cc_exec_drain(cc, direct) = NULL;
  681         CC_UNLOCK(cc);
  682         if (c_lock != NULL) {
  683                 class->lc_lock(c_lock, lock_status);
  684                 /*
  685                  * The callout may have been cancelled
  686                  * while we switched locks.
  687                  */
  688                 if (cc_exec_cancel(cc, direct)) {
  689                         class->lc_unlock(c_lock);
  690                         goto skip;
  691                 }
  692                 /* The callout cannot be stopped now. */
  693                 cc_exec_cancel(cc, direct) = true;
  694                 if (c_lock == &Giant.lock_object) {
  695 #ifdef CALLOUT_PROFILING
  696                         (*gcalls)++;
  697 #endif
  698                         CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
  699                             c, c_func, c_arg);
  700                 } else {
  701 #ifdef CALLOUT_PROFILING
  702                         (*lockcalls)++;
  703 #endif
  704                         CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
  705                             c, c_func, c_arg);
  706                 }
  707         } else {
  708 #ifdef CALLOUT_PROFILING
  709                 (*mpcalls)++;
  710 #endif
  711                 CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
  712                     c, c_func, c_arg);
  713         }
  714         KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
  715             "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
  716 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
  717         sbt1 = sbinuptime();
  718 #endif
  719         THREAD_NO_SLEEPING();
  720         SDT_PROBE1(callout_execute, , , callout__start, c);
  721         c_func(c_arg);
  722         SDT_PROBE1(callout_execute, , , callout__end, c);
  723         THREAD_SLEEPING_OK();
  724 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
  725         sbt2 = sbinuptime();
  726         sbt2 -= sbt1;
  727         if (sbt2 > maxdt) {
  728                 if (lastfunc != c_func || sbt2 > maxdt * 2) {
  729                         ts2 = sbttots(sbt2);
  730                         printf(
  731                 "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
  732                             c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
  733                 }
  734                 maxdt = sbt2;
  735                 lastfunc = c_func;
  736         }
  737 #endif
  738         KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
  739         CTR1(KTR_CALLOUT, "callout %p finished", c);
  740         if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0)
  741                 class->lc_unlock(c_lock);
  742 skip:
  743         CC_LOCK(cc);
  744         KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
  745         cc_exec_curr(cc, direct) = NULL;
  746         if (cc_exec_drain(cc, direct)) {
  747                 drain = cc_exec_drain(cc, direct);
  748                 cc_exec_drain(cc, direct) = NULL;
  749                 CC_UNLOCK(cc);
  750                 drain(c_arg);
  751                 CC_LOCK(cc);
  752         }
  753         if (cc_exec_waiting(cc, direct)) {
  754                 /*
  755                  * There is someone waiting for the
  756                  * callout to complete.
  757                  * If the callout was scheduled for
  758                  * migration just cancel it.
  759                  */
  760                 if (cc_cce_migrating(cc, direct)) {
  761                         cc_cce_cleanup(cc, direct);
  762 
  763                         /*
  764                          * It should be assert here that the callout is not
  765                          * destroyed but that is not easy.
  766                          */
  767                         c->c_iflags &= ~CALLOUT_DFRMIGRATION;
  768                 }
  769                 cc_exec_waiting(cc, direct) = false;
  770                 CC_UNLOCK(cc);
  771                 wakeup(&cc_exec_waiting(cc, direct));
  772                 CC_LOCK(cc);
  773         } else if (cc_cce_migrating(cc, direct)) {
  774 #ifdef SMP
  775                 /*
  776                  * If the callout was scheduled for
  777                  * migration just perform it now.
  778                  */
  779                 new_cpu = cc_migration_cpu(cc, direct);
  780                 new_time = cc_migration_time(cc, direct);
  781                 new_prec = cc_migration_prec(cc, direct);
  782                 new_func = cc_migration_func(cc, direct);
  783                 new_arg = cc_migration_arg(cc, direct);
  784                 cc_cce_cleanup(cc, direct);
  785 
  786                 /*
  787                  * It should be assert here that the callout is not destroyed
  788                  * but that is not easy.
  789                  *
  790                  * As first thing, handle deferred callout stops.
  791                  */
  792                 if (!callout_migrating(c)) {
  793                         CTR3(KTR_CALLOUT,
  794                              "deferred cancelled %p func %p arg %p",
  795                              c, new_func, new_arg);
  796                         return;
  797                 }
  798                 c->c_iflags &= ~CALLOUT_DFRMIGRATION;
  799 
  800                 new_cc = callout_cpu_switch(c, cc, new_cpu);
  801                 flags = (direct) ? C_DIRECT_EXEC : 0;
  802                 callout_cc_add(c, new_cc, new_time, new_prec, new_func,
  803                     new_arg, flags);
  804                 CC_UNLOCK(new_cc);
  805                 CC_LOCK(cc);
  806 #else
  807                 panic("migration should not happen");
  808 #endif
  809         }
  810 }
  811 
  812 /*
  813  * The callout mechanism is based on the work of Adam M. Costello and
  814  * George Varghese, published in a technical report entitled "Redesigning
  815  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
  816  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
  817  * used in this implementation was published by G. Varghese and T. Lauck in
  818  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
  819  * the Efficient Implementation of a Timer Facility" in the Proceedings of
  820  * the 11th ACM Annual Symposium on Operating Systems Principles,
  821  * Austin, Texas Nov 1987.
  822  */
  823 
  824 /*
  825  * Software (low priority) clock interrupt thread handler.
  826  * Run periodic events from timeout queue.
  827  */
  828 static void
  829 softclock_thread(void *arg)
  830 {
  831         struct thread *td = curthread;
  832         struct callout_cpu *cc;
  833         struct callout *c;
  834 #ifdef CALLOUT_PROFILING
  835         int depth, gcalls, lockcalls, mpcalls;
  836 #endif
  837 
  838         cc = (struct callout_cpu *)arg;
  839         CC_LOCK(cc);
  840         for (;;) {
  841                 while (TAILQ_EMPTY(&cc->cc_expireq)) {
  842                         /*
  843                          * Use CC_LOCK(cc) as the thread_lock while
  844                          * idle.
  845                          */
  846                         thread_lock(td);
  847                         thread_lock_set(td, (struct mtx *)&cc->cc_lock);
  848                         TD_SET_IWAIT(td);
  849                         mi_switch(SW_VOL | SWT_IWAIT);
  850 
  851                         /* mi_switch() drops thread_lock(). */
  852                         CC_LOCK(cc);
  853                 }
  854 
  855 #ifdef CALLOUT_PROFILING
  856                 depth = gcalls = lockcalls = mpcalls = 0;
  857 #endif
  858                 while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
  859                         TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
  860                         softclock_call_cc(c, cc,
  861 #ifdef CALLOUT_PROFILING
  862                             &mpcalls, &lockcalls, &gcalls,
  863 #endif
  864                             0);
  865 #ifdef CALLOUT_PROFILING
  866                         ++depth;
  867 #endif
  868                 }
  869 #ifdef CALLOUT_PROFILING
  870                 avg_depth += (depth * 1000 - avg_depth) >> 8;
  871                 avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
  872                 avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
  873                 avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
  874 #endif
  875         }
  876 }
  877 
  878 void
  879 callout_when(sbintime_t sbt, sbintime_t precision, int flags,
  880     sbintime_t *res, sbintime_t *prec_res)
  881 {
  882         sbintime_t to_sbt, to_pr;
  883 
  884         if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) {
  885                 *res = sbt;
  886                 *prec_res = precision;
  887                 return;
  888         }
  889         if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt)
  890                 sbt = tick_sbt;
  891         if ((flags & C_HARDCLOCK) != 0 || sbt >= sbt_tickthreshold) {
  892                 /*
  893                  * Obtain the time of the last hardclock() call on
  894                  * this CPU directly from the kern_clocksource.c.
  895                  * This value is per-CPU, but it is equal for all
  896                  * active ones.
  897                  */
  898 #ifdef __LP64__
  899                 to_sbt = DPCPU_GET(hardclocktime);
  900 #else
  901                 spinlock_enter();
  902                 to_sbt = DPCPU_GET(hardclocktime);
  903                 spinlock_exit();
  904 #endif
  905                 if (cold && to_sbt == 0)
  906                         to_sbt = sbinuptime();
  907                 if ((flags & C_HARDCLOCK) == 0)
  908                         to_sbt += tick_sbt;
  909         } else
  910                 to_sbt = sbinuptime();
  911         if (SBT_MAX - to_sbt < sbt)
  912                 to_sbt = SBT_MAX;
  913         else
  914                 to_sbt += sbt;
  915         *res = to_sbt;
  916         to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
  917             sbt >> C_PRELGET(flags));
  918         *prec_res = to_pr > precision ? to_pr : precision;
  919 }
  920 
  921 /*
  922  * New interface; clients allocate their own callout structures.
  923  *
  924  * callout_reset() - establish or change a timeout
  925  * callout_stop() - disestablish a timeout
  926  * callout_init() - initialize a callout structure so that it can
  927  *      safely be passed to callout_reset() and callout_stop()
  928  *
  929  * <sys/callout.h> defines three convenience macros:
  930  *
  931  * callout_active() - returns truth if callout has not been stopped,
  932  *      drained, or deactivated since the last time the callout was
  933  *      reset.
  934  * callout_pending() - returns truth if callout is still waiting for timeout
  935  * callout_deactivate() - marks the callout as having been serviced
  936  */
  937 int
  938 callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec,
  939     callout_func_t *ftn, void *arg, int cpu, int flags)
  940 {
  941         sbintime_t to_sbt, precision;
  942         struct callout_cpu *cc;
  943         int cancelled, direct;
  944 
  945         cancelled = 0;
  946         callout_when(sbt, prec, flags, &to_sbt, &precision);
  947 
  948         /* 
  949          * This flag used to be added by callout_cc_add, but the
  950          * first time you call this we could end up with the
  951          * wrong direct flag if we don't do it before we add.
  952          */
  953         if (flags & C_DIRECT_EXEC) {
  954                 direct = 1;
  955         } else {
  956                 direct = 0;
  957         }
  958         KASSERT(!direct || c->c_lock == NULL ||
  959             (LOCK_CLASS(c->c_lock)->lc_flags & LC_SPINLOCK),
  960             ("%s: direct callout %p has non-spin lock", __func__, c));
  961 
  962         cc = callout_lock(c);
  963         if (cpu == -1)
  964                 cpu = c->c_cpu;
  965         KASSERT(cpu >= 0 && cpu <= mp_maxid && !CPU_ABSENT(cpu),
  966             ("%s: invalid cpu %d", __func__, cpu));
  967 
  968         if (cc_exec_curr(cc, direct) == c) {
  969                 /*
  970                  * We're being asked to reschedule a callout which is
  971                  * currently in progress.  If there is a lock then we
  972                  * can cancel the callout if it has not really started.
  973                  */
  974                 if (c->c_lock != NULL && !cc_exec_cancel(cc, direct))
  975                         cancelled = cc_exec_cancel(cc, direct) = true;
  976                 if (cc_exec_waiting(cc, direct) || cc_exec_drain(cc, direct)) {
  977                         /*
  978                          * Someone has called callout_drain to kill this
  979                          * callout.  Don't reschedule.
  980                          */
  981                         CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
  982                             cancelled ? "cancelled" : "failed to cancel",
  983                             c, c->c_func, c->c_arg);
  984                         CC_UNLOCK(cc);
  985                         return (cancelled);
  986                 }
  987 #ifdef SMP
  988                 if (callout_migrating(c)) {
  989                         /* 
  990                          * This only occurs when a second callout_reset_sbt_on
  991                          * is made after a previous one moved it into
  992                          * deferred migration (below). Note we do *not* change
  993                          * the prev_cpu even though the previous target may
  994                          * be different.
  995                          */
  996                         cc_migration_cpu(cc, direct) = cpu;
  997                         cc_migration_time(cc, direct) = to_sbt;
  998                         cc_migration_prec(cc, direct) = precision;
  999                         cc_migration_func(cc, direct) = ftn;
 1000                         cc_migration_arg(cc, direct) = arg;
 1001                         cancelled = 1;
 1002                         CC_UNLOCK(cc);
 1003                         return (cancelled);
 1004                 }
 1005 #endif
 1006         }
 1007         if (c->c_iflags & CALLOUT_PENDING) {
 1008                 if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
 1009                         if (cc_exec_next(cc) == c)
 1010                                 cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
 1011                         LIST_REMOVE(c, c_links.le);
 1012                 } else {
 1013                         TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 1014                 }
 1015                 cancelled = 1;
 1016                 c->c_iflags &= ~ CALLOUT_PENDING;
 1017                 c->c_flags &= ~ CALLOUT_ACTIVE;
 1018         }
 1019 
 1020 #ifdef SMP
 1021         /*
 1022          * If the callout must migrate try to perform it immediately.
 1023          * If the callout is currently running, just defer the migration
 1024          * to a more appropriate moment.
 1025          */
 1026         if (c->c_cpu != cpu) {
 1027                 if (cc_exec_curr(cc, direct) == c) {
 1028                         /* 
 1029                          * Pending will have been removed since we are
 1030                          * actually executing the callout on another
 1031                          * CPU. That callout should be waiting on the
 1032                          * lock the caller holds. If we set both
 1033                          * active/and/pending after we return and the
 1034                          * lock on the executing callout proceeds, it
 1035                          * will then see pending is true and return.
 1036                          * At the return from the actual callout execution
 1037                          * the migration will occur in softclock_call_cc
 1038                          * and this new callout will be placed on the 
 1039                          * new CPU via a call to callout_cpu_switch() which
 1040                          * will get the lock on the right CPU followed
 1041                          * by a call callout_cc_add() which will add it there.
 1042                          * (see above in softclock_call_cc()).
 1043                          */
 1044                         cc_migration_cpu(cc, direct) = cpu;
 1045                         cc_migration_time(cc, direct) = to_sbt;
 1046                         cc_migration_prec(cc, direct) = precision;
 1047                         cc_migration_func(cc, direct) = ftn;
 1048                         cc_migration_arg(cc, direct) = arg;
 1049                         c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING);
 1050                         c->c_flags |= CALLOUT_ACTIVE;
 1051                         CTR6(KTR_CALLOUT,
 1052                     "migration of %p func %p arg %p in %d.%08x to %u deferred",
 1053                             c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
 1054                             (u_int)(to_sbt & 0xffffffff), cpu);
 1055                         CC_UNLOCK(cc);
 1056                         return (cancelled);
 1057                 }
 1058                 cc = callout_cpu_switch(c, cc, cpu);
 1059         }
 1060 #endif
 1061 
 1062         callout_cc_add(c, cc, to_sbt, precision, ftn, arg, flags);
 1063         CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
 1064             cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
 1065             (u_int)(to_sbt & 0xffffffff));
 1066         CC_UNLOCK(cc);
 1067 
 1068         return (cancelled);
 1069 }
 1070 
 1071 /*
 1072  * Common idioms that can be optimized in the future.
 1073  */
 1074 int
 1075 callout_schedule_on(struct callout *c, int to_ticks, int cpu)
 1076 {
 1077         return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu);
 1078 }
 1079 
 1080 int
 1081 callout_schedule(struct callout *c, int to_ticks)
 1082 {
 1083         return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu);
 1084 }
 1085 
 1086 int
 1087 _callout_stop_safe(struct callout *c, int flags, callout_func_t *drain)
 1088 {
 1089         struct callout_cpu *cc, *old_cc;
 1090         struct lock_class *class;
 1091         int direct, sq_locked, use_lock;
 1092         int cancelled, not_on_a_list;
 1093 
 1094         if ((flags & CS_DRAIN) != 0)
 1095                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock,
 1096                     "calling %s", __func__);
 1097 
 1098         KASSERT((flags & CS_DRAIN) == 0 || drain == NULL,
 1099             ("Cannot set drain callback and CS_DRAIN flag at the same time"));
 1100 
 1101         /*
 1102          * Some old subsystems don't hold Giant while running a callout_stop(),
 1103          * so just discard this check for the moment.
 1104          */
 1105         if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) {
 1106                 if (c->c_lock == &Giant.lock_object)
 1107                         use_lock = mtx_owned(&Giant);
 1108                 else {
 1109                         use_lock = 1;
 1110                         class = LOCK_CLASS(c->c_lock);
 1111                         class->lc_assert(c->c_lock, LA_XLOCKED);
 1112                 }
 1113         } else
 1114                 use_lock = 0;
 1115         if (c->c_iflags & CALLOUT_DIRECT) {
 1116                 direct = 1;
 1117         } else {
 1118                 direct = 0;
 1119         }
 1120         sq_locked = 0;
 1121         old_cc = NULL;
 1122 again:
 1123         cc = callout_lock(c);
 1124 
 1125         if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) ==
 1126             (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) &&
 1127             ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) {
 1128                 /*
 1129                  * Special case where this slipped in while we
 1130                  * were migrating *as* the callout is about to
 1131                  * execute. The caller probably holds the lock
 1132                  * the callout wants.
 1133                  *
 1134                  * Get rid of the migration first. Then set
 1135                  * the flag that tells this code *not* to
 1136                  * try to remove it from any lists (its not
 1137                  * on one yet). When the callout wheel runs,
 1138                  * it will ignore this callout.
 1139                  */
 1140                 c->c_iflags &= ~CALLOUT_PENDING;
 1141                 c->c_flags &= ~CALLOUT_ACTIVE;
 1142                 not_on_a_list = 1;
 1143         } else {
 1144                 not_on_a_list = 0;
 1145         }
 1146 
 1147         /*
 1148          * If the callout was migrating while the callout cpu lock was
 1149          * dropped,  just drop the sleepqueue lock and check the states
 1150          * again.
 1151          */
 1152         if (sq_locked != 0 && cc != old_cc) {
 1153 #ifdef SMP
 1154                 CC_UNLOCK(cc);
 1155                 sleepq_release(&cc_exec_waiting(old_cc, direct));
 1156                 sq_locked = 0;
 1157                 old_cc = NULL;
 1158                 goto again;
 1159 #else
 1160                 panic("migration should not happen");
 1161 #endif
 1162         }
 1163 
 1164         /*
 1165          * If the callout is running, try to stop it or drain it.
 1166          */
 1167         if (cc_exec_curr(cc, direct) == c) {
 1168                 /*
 1169                  * Succeed we to stop it or not, we must clear the
 1170                  * active flag - this is what API users expect.  If we're
 1171                  * draining and the callout is currently executing, first wait
 1172                  * until it finishes.
 1173                  */
 1174                 if ((flags & CS_DRAIN) == 0)
 1175                         c->c_flags &= ~CALLOUT_ACTIVE;
 1176 
 1177                 if ((flags & CS_DRAIN) != 0) {
 1178                         /*
 1179                          * The current callout is running (or just
 1180                          * about to run) and blocking is allowed, so
 1181                          * just wait for the current invocation to
 1182                          * finish.
 1183                          */
 1184                         if (cc_exec_curr(cc, direct) == c) {
 1185                                 /*
 1186                                  * Use direct calls to sleepqueue interface
 1187                                  * instead of cv/msleep in order to avoid
 1188                                  * a LOR between cc_lock and sleepqueue
 1189                                  * chain spinlocks.  This piece of code
 1190                                  * emulates a msleep_spin() call actually.
 1191                                  *
 1192                                  * If we already have the sleepqueue chain
 1193                                  * locked, then we can safely block.  If we
 1194                                  * don't already have it locked, however,
 1195                                  * we have to drop the cc_lock to lock
 1196                                  * it.  This opens several races, so we
 1197                                  * restart at the beginning once we have
 1198                                  * both locks.  If nothing has changed, then
 1199                                  * we will end up back here with sq_locked
 1200                                  * set.
 1201                                  */
 1202                                 if (!sq_locked) {
 1203                                         CC_UNLOCK(cc);
 1204                                         sleepq_lock(
 1205                                             &cc_exec_waiting(cc, direct));
 1206                                         sq_locked = 1;
 1207                                         old_cc = cc;
 1208                                         goto again;
 1209                                 }
 1210 
 1211                                 /*
 1212                                  * Migration could be cancelled here, but
 1213                                  * as long as it is still not sure when it
 1214                                  * will be packed up, just let softclock()
 1215                                  * take care of it.
 1216                                  */
 1217                                 cc_exec_waiting(cc, direct) = true;
 1218                                 DROP_GIANT();
 1219                                 CC_UNLOCK(cc);
 1220                                 sleepq_add(
 1221                                     &cc_exec_waiting(cc, direct),
 1222                                     &cc->cc_lock.lock_object, "codrain",
 1223                                     SLEEPQ_SLEEP, 0);
 1224                                 sleepq_wait(
 1225                                     &cc_exec_waiting(cc, direct),
 1226                                              0);
 1227                                 sq_locked = 0;
 1228                                 old_cc = NULL;
 1229 
 1230                                 /* Reacquire locks previously released. */
 1231                                 PICKUP_GIANT();
 1232                                 goto again;
 1233                         }
 1234                         c->c_flags &= ~CALLOUT_ACTIVE;
 1235                 } else if (use_lock &&
 1236                            !cc_exec_cancel(cc, direct) && (drain == NULL)) {
 1237                         
 1238                         /*
 1239                          * The current callout is waiting for its
 1240                          * lock which we hold.  Cancel the callout
 1241                          * and return.  After our caller drops the
 1242                          * lock, the callout will be skipped in
 1243                          * softclock(). This *only* works with a
 1244                          * callout_stop() *not* callout_drain() or
 1245                          * callout_async_drain().
 1246                          */
 1247                         cc_exec_cancel(cc, direct) = true;
 1248                         CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 1249                             c, c->c_func, c->c_arg);
 1250                         KASSERT(!cc_cce_migrating(cc, direct),
 1251                             ("callout wrongly scheduled for migration"));
 1252                         if (callout_migrating(c)) {
 1253                                 c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 1254 #ifdef SMP
 1255                                 cc_migration_cpu(cc, direct) = CPUBLOCK;
 1256                                 cc_migration_time(cc, direct) = 0;
 1257                                 cc_migration_prec(cc, direct) = 0;
 1258                                 cc_migration_func(cc, direct) = NULL;
 1259                                 cc_migration_arg(cc, direct) = NULL;
 1260 #endif
 1261                         }
 1262                         CC_UNLOCK(cc);
 1263                         KASSERT(!sq_locked, ("sleepqueue chain locked"));
 1264                         return (1);
 1265                 } else if (callout_migrating(c)) {
 1266                         /*
 1267                          * The callout is currently being serviced
 1268                          * and the "next" callout is scheduled at
 1269                          * its completion with a migration. We remove
 1270                          * the migration flag so it *won't* get rescheduled,
 1271                          * but we can't stop the one thats running so
 1272                          * we return 0.
 1273                          */
 1274                         c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 1275 #ifdef SMP
 1276                         /* 
 1277                          * We can't call cc_cce_cleanup here since
 1278                          * if we do it will remove .ce_curr and
 1279                          * its still running. This will prevent a
 1280                          * reschedule of the callout when the 
 1281                          * execution completes.
 1282                          */
 1283                         cc_migration_cpu(cc, direct) = CPUBLOCK;
 1284                         cc_migration_time(cc, direct) = 0;
 1285                         cc_migration_prec(cc, direct) = 0;
 1286                         cc_migration_func(cc, direct) = NULL;
 1287                         cc_migration_arg(cc, direct) = NULL;
 1288 #endif
 1289                         CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
 1290                             c, c->c_func, c->c_arg);
 1291                         if (drain) {
 1292                                 KASSERT(cc_exec_drain(cc, direct) == NULL,
 1293                                     ("callout drain function already set to %p",
 1294                                     cc_exec_drain(cc, direct)));
 1295                                 cc_exec_drain(cc, direct) = drain;
 1296                         }
 1297                         CC_UNLOCK(cc);
 1298                         return (0);
 1299                 } else {
 1300                         CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 1301                             c, c->c_func, c->c_arg);
 1302                         if (drain) {
 1303                                 KASSERT(cc_exec_drain(cc, direct) == NULL,
 1304                                     ("callout drain function already set to %p",
 1305                                     cc_exec_drain(cc, direct)));
 1306                                 cc_exec_drain(cc, direct) = drain;
 1307                         }
 1308                 }
 1309                 KASSERT(!sq_locked, ("sleepqueue chain still locked"));
 1310                 cancelled = 0;
 1311         } else
 1312                 cancelled = 1;
 1313 
 1314         if (sq_locked)
 1315                 sleepq_release(&cc_exec_waiting(cc, direct));
 1316 
 1317         if ((c->c_iflags & CALLOUT_PENDING) == 0) {
 1318                 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 1319                     c, c->c_func, c->c_arg);
 1320                 /*
 1321                  * For not scheduled and not executing callout return
 1322                  * negative value.
 1323                  */
 1324                 if (cc_exec_curr(cc, direct) != c)
 1325                         cancelled = -1;
 1326                 CC_UNLOCK(cc);
 1327                 return (cancelled);
 1328         }
 1329 
 1330         c->c_iflags &= ~CALLOUT_PENDING;
 1331         c->c_flags &= ~CALLOUT_ACTIVE;
 1332 
 1333         CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 1334             c, c->c_func, c->c_arg);
 1335         if (not_on_a_list == 0) {
 1336                 if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
 1337                         if (cc_exec_next(cc) == c)
 1338                                 cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
 1339                         LIST_REMOVE(c, c_links.le);
 1340                 } else {
 1341                         TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 1342                 }
 1343         }
 1344         CC_UNLOCK(cc);
 1345         return (cancelled);
 1346 }
 1347 
 1348 void
 1349 callout_init(struct callout *c, int mpsafe)
 1350 {
 1351         bzero(c, sizeof *c);
 1352         if (mpsafe) {
 1353                 c->c_lock = NULL;
 1354                 c->c_iflags = CALLOUT_RETURNUNLOCKED;
 1355         } else {
 1356                 c->c_lock = &Giant.lock_object;
 1357                 c->c_iflags = 0;
 1358         }
 1359         c->c_cpu = cc_default_cpu;
 1360 }
 1361 
 1362 void
 1363 _callout_init_lock(struct callout *c, struct lock_object *lock, int flags)
 1364 {
 1365         bzero(c, sizeof *c);
 1366         c->c_lock = lock;
 1367         KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0,
 1368             ("callout_init_lock: bad flags %d", flags));
 1369         KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0,
 1370             ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock"));
 1371         KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & LC_SLEEPABLE),
 1372             ("%s: callout %p has sleepable lock", __func__, c));
 1373         c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
 1374         c->c_cpu = cc_default_cpu;
 1375 }
 1376 
 1377 static int
 1378 flssbt(sbintime_t sbt)
 1379 {
 1380 
 1381         sbt += (uint64_t)sbt >> 1;
 1382         if (sizeof(long) >= sizeof(sbintime_t))
 1383                 return (flsl(sbt));
 1384         if (sbt >= SBT_1S)
 1385                 return (flsl(((uint64_t)sbt) >> 32) + 32);
 1386         return (flsl(sbt));
 1387 }
 1388 
 1389 /*
 1390  * Dump immediate statistic snapshot of the scheduled callouts.
 1391  */
 1392 static int
 1393 sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS)
 1394 {
 1395         struct callout *tmp;
 1396         struct callout_cpu *cc;
 1397         struct callout_list *sc;
 1398         sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t;
 1399         int ct[64], cpr[64], ccpbk[32];
 1400         int error, val, i, count, tcum, pcum, maxc, c, medc;
 1401         int cpu;
 1402 
 1403         val = 0;
 1404         error = sysctl_handle_int(oidp, &val, 0, req);
 1405         if (error != 0 || req->newptr == NULL)
 1406                 return (error);
 1407         count = maxc = 0;
 1408         st = spr = maxt = maxpr = 0;
 1409         bzero(ccpbk, sizeof(ccpbk));
 1410         bzero(ct, sizeof(ct));
 1411         bzero(cpr, sizeof(cpr));
 1412         now = sbinuptime();
 1413         CPU_FOREACH(cpu) {
 1414                 cc = CC_CPU(cpu);
 1415                 CC_LOCK(cc);
 1416                 for (i = 0; i < callwheelsize; i++) {
 1417                         sc = &cc->cc_callwheel[i];
 1418                         c = 0;
 1419                         LIST_FOREACH(tmp, sc, c_links.le) {
 1420                                 c++;
 1421                                 t = tmp->c_time - now;
 1422                                 if (t < 0)
 1423                                         t = 0;
 1424                                 st += t / SBT_1US;
 1425                                 spr += tmp->c_precision / SBT_1US;
 1426                                 if (t > maxt)
 1427                                         maxt = t;
 1428                                 if (tmp->c_precision > maxpr)
 1429                                         maxpr = tmp->c_precision;
 1430                                 ct[flssbt(t)]++;
 1431                                 cpr[flssbt(tmp->c_precision)]++;
 1432                         }
 1433                         if (c > maxc)
 1434                                 maxc = c;
 1435                         ccpbk[fls(c + c / 2)]++;
 1436                         count += c;
 1437                 }
 1438                 CC_UNLOCK(cc);
 1439         }
 1440 
 1441         for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++)
 1442                 tcum += ct[i];
 1443         medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
 1444         for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++)
 1445                 pcum += cpr[i];
 1446         medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
 1447         for (i = 0, c = 0; i < 32 && c < count / 2; i++)
 1448                 c += ccpbk[i];
 1449         medc = (i >= 2) ? (1 << (i - 2)) : 0;
 1450 
 1451         printf("Scheduled callouts statistic snapshot:\n");
 1452         printf("  Callouts: %6d  Buckets: %6d*%-3d  Bucket size: 0.%06ds\n",
 1453             count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT);
 1454         printf("  C/Bk: med %5d         avg %6d.%06jd  max %6d\n",
 1455             medc,
 1456             count / callwheelsize / mp_ncpus,
 1457             (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000,
 1458             maxc);
 1459         printf("  Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
 1460             medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32,
 1461             (st / count) / 1000000, (st / count) % 1000000,
 1462             maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32);
 1463         printf("  Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
 1464             medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32,
 1465             (spr / count) / 1000000, (spr / count) % 1000000,
 1466             maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32);
 1467         printf("  Distribution:       \tbuckets\t   time\t   tcum\t"
 1468             "   prec\t   pcum\n");
 1469         for (i = 0, tcum = pcum = 0; i < 64; i++) {
 1470                 if (ct[i] == 0 && cpr[i] == 0)
 1471                         continue;
 1472                 t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0;
 1473                 tcum += ct[i];
 1474                 pcum += cpr[i];
 1475                 printf("  %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n",
 1476                     t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32,
 1477                     i - 1 - (32 - CC_HASH_SHIFT),
 1478                     ct[i], tcum, cpr[i], pcum);
 1479         }
 1480         return (error);
 1481 }
 1482 SYSCTL_PROC(_kern, OID_AUTO, callout_stat,
 1483     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 1484     0, 0, sysctl_kern_callout_stat, "I",
 1485     "Dump immediate statistic snapshot of the scheduled callouts");
 1486 
 1487 #ifdef DDB
 1488 static void
 1489 _show_callout(struct callout *c)
 1490 {
 1491 
 1492         db_printf("callout %p\n", c);
 1493 #define C_DB_PRINTF(f, e)       db_printf("   %s = " f "\n", #e, c->e);
 1494         db_printf("   &c_links = %p\n", &(c->c_links));
 1495         C_DB_PRINTF("%" PRId64, c_time);
 1496         C_DB_PRINTF("%" PRId64, c_precision);
 1497         C_DB_PRINTF("%p",       c_arg);
 1498         C_DB_PRINTF("%p",       c_func);
 1499         C_DB_PRINTF("%p",       c_lock);
 1500         C_DB_PRINTF("%#x",      c_flags);
 1501         C_DB_PRINTF("%#x",      c_iflags);
 1502         C_DB_PRINTF("%d",       c_cpu);
 1503 #undef  C_DB_PRINTF
 1504 }
 1505 
 1506 DB_SHOW_COMMAND(callout, db_show_callout)
 1507 {
 1508 
 1509         if (!have_addr) {
 1510                 db_printf("usage: show callout <struct callout *>\n");
 1511                 return;
 1512         }
 1513 
 1514         _show_callout((struct callout *)addr);
 1515 }
 1516 
 1517 static void
 1518 _show_last_callout(int cpu, int direct, const char *dirstr)
 1519 {
 1520         struct callout_cpu *cc;
 1521         void *func, *arg;
 1522 
 1523         cc = CC_CPU(cpu);
 1524         func = cc_exec_last_func(cc, direct);
 1525         arg = cc_exec_last_arg(cc, direct);
 1526         db_printf("cpu %d last%s callout function: %p ", cpu, dirstr, func);
 1527         db_printsym((db_expr_t)func, DB_STGY_ANY);
 1528         db_printf("\ncpu %d last%s callout argument: %p\n", cpu, dirstr, arg);
 1529 }
 1530 
 1531 DB_SHOW_COMMAND_FLAGS(callout_last, db_show_callout_last, DB_CMD_MEMSAFE)
 1532 {
 1533         int cpu, last;
 1534 
 1535         if (have_addr) {
 1536                 if (addr < 0 || addr > mp_maxid || CPU_ABSENT(addr)) {
 1537                         db_printf("no such cpu: %d\n", (int)addr);
 1538                         return;
 1539                 }
 1540                 cpu = last = addr;
 1541         } else {
 1542                 cpu = 0;
 1543                 last = mp_maxid;
 1544         }
 1545 
 1546         while (cpu <= last) {
 1547                 if (!CPU_ABSENT(cpu)) {
 1548                         _show_last_callout(cpu, 0, "");
 1549                         _show_last_callout(cpu, 1, " direct");
 1550                 }
 1551                 cpu++;
 1552         }
 1553 }
 1554 #endif /* DDB */

Cache object: 1bcea14a98a490b991492afe807825b9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.