The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_timeout.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      From: @(#)kern_clock.c  8.5 (Berkeley) 1/21/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/10.4/sys/kern/kern_timeout.c 305853 2016-09-16 00:14:26Z hiren $");
   39 
   40 #include "opt_callout_profiling.h"
   41 #include "opt_kdtrace.h"
   42 #include "opt_ddb.h"
   43 #if defined(__arm__)
   44 #include "opt_timer.h"
   45 #endif
   46 
   47 #include <sys/param.h>
   48 #include <sys/systm.h>
   49 #include <sys/bus.h>
   50 #include <sys/callout.h>
   51 #include <sys/file.h>
   52 #include <sys/interrupt.h>
   53 #include <sys/kernel.h>
   54 #include <sys/ktr.h>
   55 #include <sys/lock.h>
   56 #include <sys/malloc.h>
   57 #include <sys/mutex.h>
   58 #include <sys/proc.h>
   59 #include <sys/sdt.h>
   60 #include <sys/sleepqueue.h>
   61 #include <sys/sysctl.h>
   62 #include <sys/smp.h>
   63 
   64 #ifdef DDB
   65 #include <ddb/ddb.h>
   66 #include <machine/_inttypes.h>
   67 #endif
   68 
   69 #ifdef SMP
   70 #include <machine/cpu.h>
   71 #endif
   72 
   73 #ifndef NO_EVENTTIMERS
   74 DPCPU_DECLARE(sbintime_t, hardclocktime);
   75 #endif
   76 
   77 SDT_PROVIDER_DEFINE(callout_execute);
   78 SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *");
   79 SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *");
   80 
   81 #ifdef CALLOUT_PROFILING
   82 static int avg_depth;
   83 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
   84     "Average number of items examined per softclock call. Units = 1/1000");
   85 static int avg_gcalls;
   86 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
   87     "Average number of Giant callouts made per softclock call. Units = 1/1000");
   88 static int avg_lockcalls;
   89 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
   90     "Average number of lock callouts made per softclock call. Units = 1/1000");
   91 static int avg_mpcalls;
   92 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
   93     "Average number of MP callouts made per softclock call. Units = 1/1000");
   94 static int avg_depth_dir;
   95 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
   96     "Average number of direct callouts examined per callout_process call. "
   97     "Units = 1/1000");
   98 static int avg_lockcalls_dir;
   99 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
  100     &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
  101     "callout_process call. Units = 1/1000");
  102 static int avg_mpcalls_dir;
  103 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
  104     0, "Average number of MP direct callouts made per callout_process call. "
  105     "Units = 1/1000");
  106 #endif
  107 
  108 static int ncallout;
  109 SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN, &ncallout, 0,
  110     "Number of entries in callwheel and size of timeout() preallocation");
  111 
  112 /*
  113  * TODO:
  114  *      allocate more timeout table slots when table overflows.
  115  */
  116 u_int callwheelsize, callwheelmask;
  117 
  118 /*
  119  * The callout cpu exec entities represent informations necessary for
  120  * describing the state of callouts currently running on the CPU and the ones
  121  * necessary for migrating callouts to the new callout cpu. In particular,
  122  * the first entry of the array cc_exec_entity holds informations for callout
  123  * running in SWI thread context, while the second one holds informations
  124  * for callout running directly from hardware interrupt context.
  125  * The cached informations are very important for deferring migration when
  126  * the migrating callout is already running.
  127  */
  128 struct cc_exec {
  129         struct callout          *cc_curr;
  130 #ifdef SMP
  131         void                    (*ce_migration_func)(void *);
  132         void                    *ce_migration_arg;
  133         int                     ce_migration_cpu;
  134         sbintime_t              ce_migration_time;
  135         sbintime_t              ce_migration_prec;
  136 #endif
  137         bool                    cc_cancel;
  138         bool                    cc_waiting;
  139 };
  140 
  141 /*
  142  * There is one struct callout_cpu per cpu, holding all relevant
  143  * state for the callout processing thread on the individual CPU.
  144  */
  145 struct callout_cpu {
  146         struct mtx_padalign     cc_lock;
  147         struct cc_exec          cc_exec_entity[2];
  148         struct callout          *cc_next;
  149         struct callout          *cc_callout;
  150         struct callout_list     *cc_callwheel;
  151         struct callout_tailq    cc_expireq;
  152         struct callout_slist    cc_callfree;
  153         sbintime_t              cc_firstevent;
  154         sbintime_t              cc_lastscan;
  155         void                    *cc_cookie;
  156         u_int                   cc_bucket;
  157         u_int                   cc_inited;
  158         char                    cc_ktr_event_name[20];
  159 };
  160 
  161 #define callout_migrating(c)    ((c)->c_iflags & CALLOUT_DFRMIGRATION)
  162 
  163 #define cc_exec_curr(cc, dir)           cc->cc_exec_entity[dir].cc_curr
  164 #define cc_exec_next(cc)                cc->cc_next
  165 #define cc_exec_cancel(cc, dir)         cc->cc_exec_entity[dir].cc_cancel
  166 #define cc_exec_waiting(cc, dir)        cc->cc_exec_entity[dir].cc_waiting
  167 #ifdef SMP
  168 #define cc_migration_func(cc, dir)      cc->cc_exec_entity[dir].ce_migration_func
  169 #define cc_migration_arg(cc, dir)       cc->cc_exec_entity[dir].ce_migration_arg
  170 #define cc_migration_cpu(cc, dir)       cc->cc_exec_entity[dir].ce_migration_cpu
  171 #define cc_migration_time(cc, dir)      cc->cc_exec_entity[dir].ce_migration_time
  172 #define cc_migration_prec(cc, dir)      cc->cc_exec_entity[dir].ce_migration_prec
  173 
  174 struct callout_cpu cc_cpu[MAXCPU];
  175 #define CPUBLOCK        MAXCPU
  176 #define CC_CPU(cpu)     (&cc_cpu[(cpu)])
  177 #define CC_SELF()       CC_CPU(PCPU_GET(cpuid))
  178 #else
  179 struct callout_cpu cc_cpu;
  180 #define CC_CPU(cpu)     &cc_cpu
  181 #define CC_SELF()       &cc_cpu
  182 #endif
  183 #define CC_LOCK(cc)     mtx_lock_spin(&(cc)->cc_lock)
  184 #define CC_UNLOCK(cc)   mtx_unlock_spin(&(cc)->cc_lock)
  185 #define CC_LOCK_ASSERT(cc)      mtx_assert(&(cc)->cc_lock, MA_OWNED)
  186 
  187 static int timeout_cpu;
  188 
  189 static void     callout_cpu_init(struct callout_cpu *cc, int cpu);
  190 static void     softclock_call_cc(struct callout *c, struct callout_cpu *cc,
  191 #ifdef CALLOUT_PROFILING
  192                     int *mpcalls, int *lockcalls, int *gcalls,
  193 #endif
  194                     int direct);
  195 
  196 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
  197 
  198 /**
  199  * Locked by cc_lock:
  200  *   cc_curr         - If a callout is in progress, it is cc_curr.
  201  *                     If cc_curr is non-NULL, threads waiting in
  202  *                     callout_drain() will be woken up as soon as the
  203  *                     relevant callout completes.
  204  *   cc_cancel       - Changing to 1 with both callout_lock and cc_lock held
  205  *                     guarantees that the current callout will not run.
  206  *                     The softclock() function sets this to 0 before it
  207  *                     drops callout_lock to acquire c_lock, and it calls
  208  *                     the handler only if curr_cancelled is still 0 after
  209  *                     cc_lock is successfully acquired.
  210  *   cc_waiting      - If a thread is waiting in callout_drain(), then
  211  *                     callout_wait is nonzero.  Set only when
  212  *                     cc_curr is non-NULL.
  213  */
  214 
  215 /*
  216  * Resets the execution entity tied to a specific callout cpu.
  217  */
  218 static void
  219 cc_cce_cleanup(struct callout_cpu *cc, int direct)
  220 {
  221 
  222         cc_exec_curr(cc, direct) = NULL;
  223         cc_exec_cancel(cc, direct) = false;
  224         cc_exec_waiting(cc, direct) = false;
  225 #ifdef SMP
  226         cc_migration_cpu(cc, direct) = CPUBLOCK;
  227         cc_migration_time(cc, direct) = 0;
  228         cc_migration_prec(cc, direct) = 0;
  229         cc_migration_func(cc, direct) = NULL;
  230         cc_migration_arg(cc, direct) = NULL;
  231 #endif
  232 }
  233 
  234 /*
  235  * Checks if migration is requested by a specific callout cpu.
  236  */
  237 static int
  238 cc_cce_migrating(struct callout_cpu *cc, int direct)
  239 {
  240 
  241 #ifdef SMP
  242         return (cc_migration_cpu(cc, direct) != CPUBLOCK);
  243 #else
  244         return (0);
  245 #endif
  246 }
  247 
  248 /*
  249  * Kernel low level callwheel initialization
  250  * called on cpu0 during kernel startup.
  251  */
  252 static void
  253 callout_callwheel_init(void *dummy)
  254 {
  255         struct callout_cpu *cc;
  256 
  257         /*
  258          * Calculate the size of the callout wheel and the preallocated
  259          * timeout() structures.
  260          * XXX: Clip callout to result of previous function of maxusers
  261          * maximum 384.  This is still huge, but acceptable.
  262          */
  263         memset(CC_CPU(0), 0, sizeof(cc_cpu));
  264         ncallout = imin(16 + maxproc + maxfiles, 18508);
  265         TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
  266 
  267         /*
  268          * Calculate callout wheel size, should be next power of two higher
  269          * than 'ncallout'.
  270          */
  271         callwheelsize = 1 << fls(ncallout);
  272         callwheelmask = callwheelsize - 1;
  273 
  274         /*
  275          * Only cpu0 handles timeout(9) and receives a preallocation.
  276          *
  277          * XXX: Once all timeout(9) consumers are converted this can
  278          * be removed.
  279          */
  280         timeout_cpu = PCPU_GET(cpuid);
  281         cc = CC_CPU(timeout_cpu);
  282         cc->cc_callout = malloc(ncallout * sizeof(struct callout),
  283             M_CALLOUT, M_WAITOK);
  284         callout_cpu_init(cc, timeout_cpu);
  285 }
  286 SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL);
  287 
  288 /*
  289  * Initialize the per-cpu callout structures.
  290  */
  291 static void
  292 callout_cpu_init(struct callout_cpu *cc, int cpu)
  293 {
  294         struct callout *c;
  295         int i;
  296 
  297         mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
  298         SLIST_INIT(&cc->cc_callfree);
  299         cc->cc_inited = 1;
  300         cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
  301             M_CALLOUT, M_WAITOK);
  302         for (i = 0; i < callwheelsize; i++)
  303                 LIST_INIT(&cc->cc_callwheel[i]);
  304         TAILQ_INIT(&cc->cc_expireq);
  305         cc->cc_firstevent = SBT_MAX;
  306         for (i = 0; i < 2; i++)
  307                 cc_cce_cleanup(cc, i);
  308         snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
  309             "callwheel cpu %d", cpu);
  310         if (cc->cc_callout == NULL)     /* Only cpu0 handles timeout(9) */
  311                 return;
  312         for (i = 0; i < ncallout; i++) {
  313                 c = &cc->cc_callout[i];
  314                 callout_init(c, 0);
  315                 c->c_iflags = CALLOUT_LOCAL_ALLOC;
  316                 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
  317         }
  318 }
  319 
  320 #ifdef SMP
  321 /*
  322  * Switches the cpu tied to a specific callout.
  323  * The function expects a locked incoming callout cpu and returns with
  324  * locked outcoming callout cpu.
  325  */
  326 static struct callout_cpu *
  327 callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu)
  328 {
  329         struct callout_cpu *new_cc;
  330 
  331         MPASS(c != NULL && cc != NULL);
  332         CC_LOCK_ASSERT(cc);
  333 
  334         /*
  335          * Avoid interrupts and preemption firing after the callout cpu
  336          * is blocked in order to avoid deadlocks as the new thread
  337          * may be willing to acquire the callout cpu lock.
  338          */
  339         c->c_cpu = CPUBLOCK;
  340         spinlock_enter();
  341         CC_UNLOCK(cc);
  342         new_cc = CC_CPU(new_cpu);
  343         CC_LOCK(new_cc);
  344         spinlock_exit();
  345         c->c_cpu = new_cpu;
  346         return (new_cc);
  347 }
  348 #endif
  349 
  350 /*
  351  * Start standard softclock thread.
  352  */
  353 static void
  354 start_softclock(void *dummy)
  355 {
  356         struct callout_cpu *cc;
  357 #ifdef SMP
  358         int cpu;
  359 #endif
  360 
  361         cc = CC_CPU(timeout_cpu);
  362         if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK,
  363             INTR_MPSAFE, &cc->cc_cookie))
  364                 panic("died while creating standard software ithreads");
  365 #ifdef SMP
  366         CPU_FOREACH(cpu) {
  367                 if (cpu == timeout_cpu)
  368                         continue;
  369                 cc = CC_CPU(cpu);
  370                 cc->cc_callout = NULL;  /* Only cpu0 handles timeout(9). */
  371                 callout_cpu_init(cc, cpu);
  372                 if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK,
  373                     INTR_MPSAFE, &cc->cc_cookie))
  374                         panic("died while creating standard software ithreads");
  375         }
  376 #endif
  377 }
  378 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
  379 
  380 #define CC_HASH_SHIFT   8
  381 
  382 static inline u_int
  383 callout_hash(sbintime_t sbt)
  384 {
  385 
  386         return (sbt >> (32 - CC_HASH_SHIFT));
  387 }
  388 
  389 static inline u_int
  390 callout_get_bucket(sbintime_t sbt)
  391 {
  392 
  393         return (callout_hash(sbt) & callwheelmask);
  394 }
  395 
  396 void
  397 callout_process(sbintime_t now)
  398 {
  399         struct callout *tmp, *tmpn;
  400         struct callout_cpu *cc;
  401         struct callout_list *sc;
  402         sbintime_t first, last, max, tmp_max;
  403         uint32_t lookahead;
  404         u_int firstb, lastb, nowb;
  405 #ifdef CALLOUT_PROFILING
  406         int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
  407 #endif
  408 
  409         cc = CC_SELF();
  410         mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
  411 
  412         /* Compute the buckets of the last scan and present times. */
  413         firstb = callout_hash(cc->cc_lastscan);
  414         cc->cc_lastscan = now;
  415         nowb = callout_hash(now);
  416 
  417         /* Compute the last bucket and minimum time of the bucket after it. */
  418         if (nowb == firstb)
  419                 lookahead = (SBT_1S / 16);
  420         else if (nowb - firstb == 1)
  421                 lookahead = (SBT_1S / 8);
  422         else
  423                 lookahead = (SBT_1S / 2);
  424         first = last = now;
  425         first += (lookahead / 2);
  426         last += lookahead;
  427         last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT));
  428         lastb = callout_hash(last) - 1;
  429         max = last;
  430 
  431         /*
  432          * Check if we wrapped around the entire wheel from the last scan.
  433          * In case, we need to scan entirely the wheel for pending callouts.
  434          */
  435         if (lastb - firstb >= callwheelsize) {
  436                 lastb = firstb + callwheelsize - 1;
  437                 if (nowb - firstb >= callwheelsize)
  438                         nowb = lastb;
  439         }
  440 
  441         /* Iterate callwheel from firstb to nowb and then up to lastb. */
  442         do {
  443                 sc = &cc->cc_callwheel[firstb & callwheelmask];
  444                 tmp = LIST_FIRST(sc);
  445                 while (tmp != NULL) {
  446                         /* Run the callout if present time within allowed. */
  447                         if (tmp->c_time <= now) {
  448                                 /*
  449                                  * Consumer told us the callout may be run
  450                                  * directly from hardware interrupt context.
  451                                  */
  452                                 if (tmp->c_iflags & CALLOUT_DIRECT) {
  453 #ifdef CALLOUT_PROFILING
  454                                         ++depth_dir;
  455 #endif
  456                                         cc_exec_next(cc) =
  457                                             LIST_NEXT(tmp, c_links.le);
  458                                         cc->cc_bucket = firstb & callwheelmask;
  459                                         LIST_REMOVE(tmp, c_links.le);
  460                                         softclock_call_cc(tmp, cc,
  461 #ifdef CALLOUT_PROFILING
  462                                             &mpcalls_dir, &lockcalls_dir, NULL,
  463 #endif
  464                                             1);
  465                                         tmp = cc_exec_next(cc);
  466                                         cc_exec_next(cc) = NULL;
  467                                 } else {
  468                                         tmpn = LIST_NEXT(tmp, c_links.le);
  469                                         LIST_REMOVE(tmp, c_links.le);
  470                                         TAILQ_INSERT_TAIL(&cc->cc_expireq,
  471                                             tmp, c_links.tqe);
  472                                         tmp->c_iflags |= CALLOUT_PROCESSED;
  473                                         tmp = tmpn;
  474                                 }
  475                                 continue;
  476                         }
  477                         /* Skip events from distant future. */
  478                         if (tmp->c_time >= max)
  479                                 goto next;
  480                         /*
  481                          * Event minimal time is bigger than present maximal
  482                          * time, so it cannot be aggregated.
  483                          */
  484                         if (tmp->c_time > last) {
  485                                 lastb = nowb;
  486                                 goto next;
  487                         }
  488                         /* Update first and last time, respecting this event. */
  489                         if (tmp->c_time < first)
  490                                 first = tmp->c_time;
  491                         tmp_max = tmp->c_time + tmp->c_precision;
  492                         if (tmp_max < last)
  493                                 last = tmp_max;
  494 next:
  495                         tmp = LIST_NEXT(tmp, c_links.le);
  496                 }
  497                 /* Proceed with the next bucket. */
  498                 firstb++;
  499                 /*
  500                  * Stop if we looked after present time and found
  501                  * some event we can't execute at now.
  502                  * Stop if we looked far enough into the future.
  503                  */
  504         } while (((int)(firstb - lastb)) <= 0);
  505         cc->cc_firstevent = last;
  506 #ifndef NO_EVENTTIMERS
  507         cpu_new_callout(curcpu, last, first);
  508 #endif
  509 #ifdef CALLOUT_PROFILING
  510         avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
  511         avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
  512         avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
  513 #endif
  514         mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
  515         /*
  516          * swi_sched acquires the thread lock, so we don't want to call it
  517          * with cc_lock held; incorrect locking order.
  518          */
  519         if (!TAILQ_EMPTY(&cc->cc_expireq))
  520                 swi_sched(cc->cc_cookie, 0);
  521 }
  522 
  523 static struct callout_cpu *
  524 callout_lock(struct callout *c)
  525 {
  526         struct callout_cpu *cc;
  527         int cpu;
  528 
  529         for (;;) {
  530                 cpu = c->c_cpu;
  531 #ifdef SMP
  532                 if (cpu == CPUBLOCK) {
  533                         while (c->c_cpu == CPUBLOCK)
  534                                 cpu_spinwait();
  535                         continue;
  536                 }
  537 #endif
  538                 cc = CC_CPU(cpu);
  539                 CC_LOCK(cc);
  540                 if (cpu == c->c_cpu)
  541                         break;
  542                 CC_UNLOCK(cc);
  543         }
  544         return (cc);
  545 }
  546 
  547 static void
  548 callout_cc_add(struct callout *c, struct callout_cpu *cc,
  549     sbintime_t sbt, sbintime_t precision, void (*func)(void *),
  550     void *arg, int cpu, int flags)
  551 {
  552         int bucket;
  553 
  554         CC_LOCK_ASSERT(cc);
  555         if (sbt < cc->cc_lastscan)
  556                 sbt = cc->cc_lastscan;
  557         c->c_arg = arg;
  558         c->c_iflags |= CALLOUT_PENDING;
  559         c->c_iflags &= ~CALLOUT_PROCESSED;
  560         c->c_flags |= CALLOUT_ACTIVE;
  561         if (flags & C_DIRECT_EXEC)
  562                 c->c_iflags |= CALLOUT_DIRECT;
  563         c->c_func = func;
  564         c->c_time = sbt;
  565         c->c_precision = precision;
  566         bucket = callout_get_bucket(c->c_time);
  567         CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
  568             c, (int)(c->c_precision >> 32),
  569             (u_int)(c->c_precision & 0xffffffff));
  570         LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
  571         if (cc->cc_bucket == bucket)
  572                 cc_exec_next(cc) = c;
  573 #ifndef NO_EVENTTIMERS
  574         /*
  575          * Inform the eventtimers(4) subsystem there's a new callout
  576          * that has been inserted, but only if really required.
  577          */
  578         if (SBT_MAX - c->c_time < c->c_precision)
  579                 c->c_precision = SBT_MAX - c->c_time;
  580         sbt = c->c_time + c->c_precision;
  581         if (sbt < cc->cc_firstevent) {
  582                 cc->cc_firstevent = sbt;
  583                 cpu_new_callout(cpu, sbt, c->c_time);
  584         }
  585 #endif
  586 }
  587 
  588 static void
  589 callout_cc_del(struct callout *c, struct callout_cpu *cc)
  590 {
  591 
  592         if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0)
  593                 return;
  594         c->c_func = NULL;
  595         SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
  596 }
  597 
  598 static void
  599 softclock_call_cc(struct callout *c, struct callout_cpu *cc,
  600 #ifdef CALLOUT_PROFILING
  601     int *mpcalls, int *lockcalls, int *gcalls,
  602 #endif
  603     int direct)
  604 {
  605         struct rm_priotracker tracker;
  606         void (*c_func)(void *);
  607         void *c_arg;
  608         struct lock_class *class;
  609         struct lock_object *c_lock;
  610         uintptr_t lock_status;
  611         int c_iflags;
  612 #ifdef SMP
  613         struct callout_cpu *new_cc;
  614         void (*new_func)(void *);
  615         void *new_arg;
  616         int flags, new_cpu;
  617         sbintime_t new_prec, new_time;
  618 #endif
  619 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 
  620         sbintime_t sbt1, sbt2;
  621         struct timespec ts2;
  622         static sbintime_t maxdt = 2 * SBT_1MS;  /* 2 msec */
  623         static timeout_t *lastfunc;
  624 #endif
  625 
  626         KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING,
  627             ("softclock_call_cc: pend %p %x", c, c->c_iflags));
  628         KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE,
  629             ("softclock_call_cc: act %p %x", c, c->c_flags));
  630         class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
  631         lock_status = 0;
  632         if (c->c_flags & CALLOUT_SHAREDLOCK) {
  633                 if (class == &lock_class_rm)
  634                         lock_status = (uintptr_t)&tracker;
  635                 else
  636                         lock_status = 1;
  637         }
  638         c_lock = c->c_lock;
  639         c_func = c->c_func;
  640         c_arg = c->c_arg;
  641         c_iflags = c->c_iflags;
  642         if (c->c_iflags & CALLOUT_LOCAL_ALLOC)
  643                 c->c_iflags = CALLOUT_LOCAL_ALLOC;
  644         else
  645                 c->c_iflags &= ~CALLOUT_PENDING;
  646         
  647         cc_exec_curr(cc, direct) = c;
  648         cc_exec_cancel(cc, direct) = false;
  649         CC_UNLOCK(cc);
  650         if (c_lock != NULL) {
  651                 class->lc_lock(c_lock, lock_status);
  652                 /*
  653                  * The callout may have been cancelled
  654                  * while we switched locks.
  655                  */
  656                 if (cc_exec_cancel(cc, direct)) {
  657                         class->lc_unlock(c_lock);
  658                         goto skip;
  659                 }
  660                 /* The callout cannot be stopped now. */
  661                 cc_exec_cancel(cc, direct) = true;
  662                 if (c_lock == &Giant.lock_object) {
  663 #ifdef CALLOUT_PROFILING
  664                         (*gcalls)++;
  665 #endif
  666                         CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
  667                             c, c_func, c_arg);
  668                 } else {
  669 #ifdef CALLOUT_PROFILING
  670                         (*lockcalls)++;
  671 #endif
  672                         CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
  673                             c, c_func, c_arg);
  674                 }
  675         } else {
  676 #ifdef CALLOUT_PROFILING
  677                 (*mpcalls)++;
  678 #endif
  679                 CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
  680                     c, c_func, c_arg);
  681         }
  682         KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
  683             "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
  684 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
  685         sbt1 = sbinuptime();
  686 #endif
  687         THREAD_NO_SLEEPING();
  688         SDT_PROBE1(callout_execute, , , callout__start, c);
  689         c_func(c_arg);
  690         SDT_PROBE1(callout_execute, , , callout__end, c);
  691         THREAD_SLEEPING_OK();
  692 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
  693         sbt2 = sbinuptime();
  694         sbt2 -= sbt1;
  695         if (sbt2 > maxdt) {
  696                 if (lastfunc != c_func || sbt2 > maxdt * 2) {
  697                         ts2 = sbttots(sbt2);
  698                         printf(
  699                 "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
  700                             c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
  701                 }
  702                 maxdt = sbt2;
  703                 lastfunc = c_func;
  704         }
  705 #endif
  706         KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
  707         CTR1(KTR_CALLOUT, "callout %p finished", c);
  708         if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0)
  709                 class->lc_unlock(c_lock);
  710 skip:
  711         CC_LOCK(cc);
  712         KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
  713         cc_exec_curr(cc, direct) = NULL;
  714         if (cc_exec_waiting(cc, direct)) {
  715                 /*
  716                  * There is someone waiting for the
  717                  * callout to complete.
  718                  * If the callout was scheduled for
  719                  * migration just cancel it.
  720                  */
  721                 if (cc_cce_migrating(cc, direct)) {
  722                         cc_cce_cleanup(cc, direct);
  723 
  724                         /*
  725                          * It should be assert here that the callout is not
  726                          * destroyed but that is not easy.
  727                          */
  728                         c->c_iflags &= ~CALLOUT_DFRMIGRATION;
  729                 }
  730                 cc_exec_waiting(cc, direct) = false;
  731                 CC_UNLOCK(cc);
  732                 wakeup(&cc_exec_waiting(cc, direct));
  733                 CC_LOCK(cc);
  734         } else if (cc_cce_migrating(cc, direct)) {
  735                 KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0,
  736                     ("Migrating legacy callout %p", c));
  737 #ifdef SMP
  738                 /*
  739                  * If the callout was scheduled for
  740                  * migration just perform it now.
  741                  */
  742                 new_cpu = cc_migration_cpu(cc, direct);
  743                 new_time = cc_migration_time(cc, direct);
  744                 new_prec = cc_migration_prec(cc, direct);
  745                 new_func = cc_migration_func(cc, direct);
  746                 new_arg = cc_migration_arg(cc, direct);
  747                 cc_cce_cleanup(cc, direct);
  748 
  749                 /*
  750                  * It should be assert here that the callout is not destroyed
  751                  * but that is not easy.
  752                  *
  753                  * As first thing, handle deferred callout stops.
  754                  */
  755                 if (!callout_migrating(c)) {
  756                         CTR3(KTR_CALLOUT,
  757                              "deferred cancelled %p func %p arg %p",
  758                              c, new_func, new_arg);
  759                         callout_cc_del(c, cc);
  760                         return;
  761                 }
  762                 c->c_iflags &= ~CALLOUT_DFRMIGRATION;
  763 
  764                 new_cc = callout_cpu_switch(c, cc, new_cpu);
  765                 flags = (direct) ? C_DIRECT_EXEC : 0;
  766                 callout_cc_add(c, new_cc, new_time, new_prec, new_func,
  767                     new_arg, new_cpu, flags);
  768                 CC_UNLOCK(new_cc);
  769                 CC_LOCK(cc);
  770 #else
  771                 panic("migration should not happen");
  772 #endif
  773         }
  774         /*
  775          * If the current callout is locally allocated (from
  776          * timeout(9)) then put it on the freelist.
  777          *
  778          * Note: we need to check the cached copy of c_iflags because
  779          * if it was not local, then it's not safe to deref the
  780          * callout pointer.
  781          */
  782         KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 ||
  783             c->c_iflags == CALLOUT_LOCAL_ALLOC,
  784             ("corrupted callout"));
  785         if (c_iflags & CALLOUT_LOCAL_ALLOC)
  786                 callout_cc_del(c, cc);
  787 }
  788 
  789 /*
  790  * The callout mechanism is based on the work of Adam M. Costello and
  791  * George Varghese, published in a technical report entitled "Redesigning
  792  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
  793  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
  794  * used in this implementation was published by G. Varghese and T. Lauck in
  795  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
  796  * the Efficient Implementation of a Timer Facility" in the Proceedings of
  797  * the 11th ACM Annual Symposium on Operating Systems Principles,
  798  * Austin, Texas Nov 1987.
  799  */
  800 
  801 /*
  802  * Software (low priority) clock interrupt.
  803  * Run periodic events from timeout queue.
  804  */
  805 void
  806 softclock(void *arg)
  807 {
  808         struct callout_cpu *cc;
  809         struct callout *c;
  810 #ifdef CALLOUT_PROFILING
  811         int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
  812 #endif
  813 
  814         cc = (struct callout_cpu *)arg;
  815         CC_LOCK(cc);
  816         while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
  817                 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
  818                 softclock_call_cc(c, cc,
  819 #ifdef CALLOUT_PROFILING
  820                     &mpcalls, &lockcalls, &gcalls,
  821 #endif
  822                     0);
  823 #ifdef CALLOUT_PROFILING
  824                 ++depth;
  825 #endif
  826         }
  827 #ifdef CALLOUT_PROFILING
  828         avg_depth += (depth * 1000 - avg_depth) >> 8;
  829         avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
  830         avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
  831         avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
  832 #endif
  833         CC_UNLOCK(cc);
  834 }
  835 
  836 /*
  837  * timeout --
  838  *      Execute a function after a specified length of time.
  839  *
  840  * untimeout --
  841  *      Cancel previous timeout function call.
  842  *
  843  * callout_handle_init --
  844  *      Initialize a handle so that using it with untimeout is benign.
  845  *
  846  *      See AT&T BCI Driver Reference Manual for specification.  This
  847  *      implementation differs from that one in that although an
  848  *      identification value is returned from timeout, the original
  849  *      arguments to timeout as well as the identifier are used to
  850  *      identify entries for untimeout.
  851  */
  852 struct callout_handle
  853 timeout(ftn, arg, to_ticks)
  854         timeout_t *ftn;
  855         void *arg;
  856         int to_ticks;
  857 {
  858         struct callout_cpu *cc;
  859         struct callout *new;
  860         struct callout_handle handle;
  861 
  862         cc = CC_CPU(timeout_cpu);
  863         CC_LOCK(cc);
  864         /* Fill in the next free callout structure. */
  865         new = SLIST_FIRST(&cc->cc_callfree);
  866         if (new == NULL)
  867                 /* XXX Attempt to malloc first */
  868                 panic("timeout table full");
  869         SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle);
  870         callout_reset(new, to_ticks, ftn, arg);
  871         handle.callout = new;
  872         CC_UNLOCK(cc);
  873 
  874         return (handle);
  875 }
  876 
  877 void
  878 untimeout(ftn, arg, handle)
  879         timeout_t *ftn;
  880         void *arg;
  881         struct callout_handle handle;
  882 {
  883         struct callout_cpu *cc;
  884 
  885         /*
  886          * Check for a handle that was initialized
  887          * by callout_handle_init, but never used
  888          * for a real timeout.
  889          */
  890         if (handle.callout == NULL)
  891                 return;
  892 
  893         cc = callout_lock(handle.callout);
  894         if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
  895                 callout_stop(handle.callout);
  896         CC_UNLOCK(cc);
  897 }
  898 
  899 void
  900 callout_handle_init(struct callout_handle *handle)
  901 {
  902         handle->callout = NULL;
  903 }
  904 
  905 void
  906 callout_when(sbintime_t sbt, sbintime_t precision, int flags,
  907     sbintime_t *res, sbintime_t *prec_res)
  908 {
  909         sbintime_t to_sbt, to_pr;
  910 
  911         if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) {
  912                 *res = sbt;
  913                 *prec_res = precision;
  914                 return;
  915         }
  916         if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt)
  917                 sbt = tick_sbt;
  918         if ((flags & C_HARDCLOCK) != 0 ||
  919 #ifdef NO_EVENTTIMERS
  920             sbt >= sbt_timethreshold) {
  921                 to_sbt = getsbinuptime();
  922 
  923                 /* Add safety belt for the case of hz > 1000. */
  924                 to_sbt += tc_tick_sbt - tick_sbt;
  925 #else
  926             sbt >= sbt_tickthreshold) {
  927                 /*
  928                  * Obtain the time of the last hardclock() call on
  929                  * this CPU directly from the kern_clocksource.c.
  930                  * This value is per-CPU, but it is equal for all
  931                  * active ones.
  932                  */
  933 #ifdef __LP64__
  934                 to_sbt = DPCPU_GET(hardclocktime);
  935 #else
  936                 spinlock_enter();
  937                 to_sbt = DPCPU_GET(hardclocktime);
  938                 spinlock_exit();
  939 #endif
  940 #endif
  941                 if ((flags & C_HARDCLOCK) == 0)
  942                         to_sbt += tick_sbt;
  943         } else
  944                 to_sbt = sbinuptime();
  945         if (SBT_MAX - to_sbt < sbt)
  946                 to_sbt = SBT_MAX;
  947         else
  948                 to_sbt += sbt;
  949         *res = to_sbt;
  950         to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
  951             sbt >> C_PRELGET(flags));
  952         *prec_res = to_pr > precision ? to_pr : precision;
  953 }
  954 
  955 /*
  956  * New interface; clients allocate their own callout structures.
  957  *
  958  * callout_reset() - establish or change a timeout
  959  * callout_stop() - disestablish a timeout
  960  * callout_init() - initialize a callout structure so that it can
  961  *      safely be passed to callout_reset() and callout_stop()
  962  *
  963  * <sys/callout.h> defines three convenience macros:
  964  *
  965  * callout_active() - returns truth if callout has not been stopped,
  966  *      drained, or deactivated since the last time the callout was
  967  *      reset.
  968  * callout_pending() - returns truth if callout is still waiting for timeout
  969  * callout_deactivate() - marks the callout as having been serviced
  970  */
  971 int
  972 callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec,
  973     void (*ftn)(void *), void *arg, int cpu, int flags)
  974 {
  975         sbintime_t to_sbt, precision;
  976         struct callout_cpu *cc;
  977         int cancelled, direct;
  978         int ignore_cpu=0;
  979 
  980         cancelled = 0;
  981         if (cpu == -1) {
  982                 ignore_cpu = 1;
  983         } else if ((cpu >= MAXCPU) ||
  984                    ((CC_CPU(cpu))->cc_inited == 0)) {
  985                 /* Invalid CPU spec */
  986                 panic("Invalid CPU in callout %d", cpu);
  987         }
  988         callout_when(sbt, prec, flags, &to_sbt, &precision);
  989 
  990         /* 
  991          * This flag used to be added by callout_cc_add, but the
  992          * first time you call this we could end up with the
  993          * wrong direct flag if we don't do it before we add.
  994          */
  995         if (flags & C_DIRECT_EXEC) {
  996                 direct = 1;
  997         } else {
  998                 direct = 0;
  999         }
 1000         KASSERT(!direct || c->c_lock == NULL,
 1001             ("%s: direct callout %p has lock", __func__, c));
 1002         cc = callout_lock(c);
 1003         /*
 1004          * Don't allow migration of pre-allocated callouts lest they
 1005          * become unbalanced or handle the case where the user does
 1006          * not care. 
 1007          */
 1008         if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) ||
 1009             ignore_cpu) {
 1010                 cpu = c->c_cpu;
 1011         }
 1012 
 1013         if (cc_exec_curr(cc, direct) == c) {
 1014                 /*
 1015                  * We're being asked to reschedule a callout which is
 1016                  * currently in progress.  If there is a lock then we
 1017                  * can cancel the callout if it has not really started.
 1018                  */
 1019                 if (c->c_lock != NULL && !cc_exec_cancel(cc, direct))
 1020                         cancelled = cc_exec_cancel(cc, direct) = true;
 1021                 if (cc_exec_waiting(cc, direct)) {
 1022                         /*
 1023                          * Someone has called callout_drain to kill this
 1024                          * callout.  Don't reschedule.
 1025                          */
 1026                         CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
 1027                             cancelled ? "cancelled" : "failed to cancel",
 1028                             c, c->c_func, c->c_arg);
 1029                         CC_UNLOCK(cc);
 1030                         return (cancelled);
 1031                 }
 1032 #ifdef SMP
 1033                 if (callout_migrating(c)) {
 1034                         /* 
 1035                          * This only occurs when a second callout_reset_sbt_on
 1036                          * is made after a previous one moved it into
 1037                          * deferred migration (below). Note we do *not* change
 1038                          * the prev_cpu even though the previous target may
 1039                          * be different.
 1040                          */
 1041                         cc_migration_cpu(cc, direct) = cpu;
 1042                         cc_migration_time(cc, direct) = to_sbt;
 1043                         cc_migration_prec(cc, direct) = precision;
 1044                         cc_migration_func(cc, direct) = ftn;
 1045                         cc_migration_arg(cc, direct) = arg;
 1046                         cancelled = 1;
 1047                         CC_UNLOCK(cc);
 1048                         return (cancelled);
 1049                 }
 1050 #endif
 1051         }
 1052         if (c->c_iflags & CALLOUT_PENDING) {
 1053                 if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
 1054                         if (cc_exec_next(cc) == c)
 1055                                 cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
 1056                         LIST_REMOVE(c, c_links.le);
 1057                 } else {
 1058                         TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 1059                 }
 1060                 cancelled = 1;
 1061                 c->c_iflags &= ~ CALLOUT_PENDING;
 1062                 c->c_flags &= ~ CALLOUT_ACTIVE;
 1063         }
 1064 
 1065 #ifdef SMP
 1066         /*
 1067          * If the callout must migrate try to perform it immediately.
 1068          * If the callout is currently running, just defer the migration
 1069          * to a more appropriate moment.
 1070          */
 1071         if (c->c_cpu != cpu) {
 1072                 if (cc_exec_curr(cc, direct) == c) {
 1073                         /* 
 1074                          * Pending will have been removed since we are
 1075                          * actually executing the callout on another
 1076                          * CPU. That callout should be waiting on the
 1077                          * lock the caller holds. If we set both
 1078                          * active/and/pending after we return and the
 1079                          * lock on the executing callout proceeds, it
 1080                          * will then see pending is true and return.
 1081                          * At the return from the actual callout execution
 1082                          * the migration will occur in softclock_call_cc
 1083                          * and this new callout will be placed on the 
 1084                          * new CPU via a call to callout_cpu_switch() which
 1085                          * will get the lock on the right CPU followed
 1086                          * by a call callout_cc_add() which will add it there.
 1087                          * (see above in softclock_call_cc()).
 1088                          */
 1089                         cc_migration_cpu(cc, direct) = cpu;
 1090                         cc_migration_time(cc, direct) = to_sbt;
 1091                         cc_migration_prec(cc, direct) = precision;
 1092                         cc_migration_func(cc, direct) = ftn;
 1093                         cc_migration_arg(cc, direct) = arg;
 1094                         c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING);
 1095                         c->c_flags |= CALLOUT_ACTIVE;
 1096                         CTR6(KTR_CALLOUT,
 1097                     "migration of %p func %p arg %p in %d.%08x to %u deferred",
 1098                             c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
 1099                             (u_int)(to_sbt & 0xffffffff), cpu);
 1100                         CC_UNLOCK(cc);
 1101                         return (cancelled);
 1102                 }
 1103                 cc = callout_cpu_switch(c, cc, cpu);
 1104         }
 1105 #endif
 1106 
 1107         callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
 1108         CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
 1109             cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
 1110             (u_int)(to_sbt & 0xffffffff));
 1111         CC_UNLOCK(cc);
 1112 
 1113         return (cancelled);
 1114 }
 1115 
 1116 /*
 1117  * Common idioms that can be optimized in the future.
 1118  */
 1119 int
 1120 callout_schedule_on(struct callout *c, int to_ticks, int cpu)
 1121 {
 1122         return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu);
 1123 }
 1124 
 1125 int
 1126 callout_schedule(struct callout *c, int to_ticks)
 1127 {
 1128         return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu);
 1129 }
 1130 
 1131 int
 1132 _callout_stop_safe(c, flags)
 1133         struct  callout *c;
 1134         int     flags;
 1135 {
 1136         struct callout_cpu *cc, *old_cc;
 1137         struct lock_class *class;
 1138         int direct, sq_locked, use_lock;
 1139         int not_on_a_list;
 1140 
 1141         /*
 1142          * Some old subsystems don't hold Giant while running a callout_stop(),
 1143          * so just discard this check for the moment.
 1144          */
 1145         if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) {
 1146                 if (c->c_lock == &Giant.lock_object)
 1147                         use_lock = mtx_owned(&Giant);
 1148                 else {
 1149                         use_lock = 1;
 1150                         class = LOCK_CLASS(c->c_lock);
 1151                         class->lc_assert(c->c_lock, LA_XLOCKED);
 1152                 }
 1153         } else
 1154                 use_lock = 0;
 1155         if (c->c_iflags & CALLOUT_DIRECT) {
 1156                 direct = 1;
 1157         } else {
 1158                 direct = 0;
 1159         }
 1160         sq_locked = 0;
 1161         old_cc = NULL;
 1162 again:
 1163         cc = callout_lock(c);
 1164 
 1165         if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) ==
 1166             (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) &&
 1167             ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) {
 1168                 /*
 1169                  * Special case where this slipped in while we
 1170                  * were migrating *as* the callout is about to
 1171                  * execute. The caller probably holds the lock
 1172                  * the callout wants.
 1173                  *
 1174                  * Get rid of the migration first. Then set
 1175                  * the flag that tells this code *not* to
 1176                  * try to remove it from any lists (its not
 1177                  * on one yet). When the callout wheel runs,
 1178                  * it will ignore this callout.
 1179                  */
 1180                 c->c_iflags &= ~CALLOUT_PENDING;
 1181                 c->c_flags &= ~CALLOUT_ACTIVE;
 1182                 not_on_a_list = 1;
 1183         } else {
 1184                 not_on_a_list = 0;
 1185         }
 1186 
 1187         /*
 1188          * If the callout was migrating while the callout cpu lock was
 1189          * dropped,  just drop the sleepqueue lock and check the states
 1190          * again.
 1191          */
 1192         if (sq_locked != 0 && cc != old_cc) {
 1193 #ifdef SMP
 1194                 CC_UNLOCK(cc);
 1195                 sleepq_release(&cc_exec_waiting(old_cc, direct));
 1196                 sq_locked = 0;
 1197                 old_cc = NULL;
 1198                 goto again;
 1199 #else
 1200                 panic("migration should not happen");
 1201 #endif
 1202         }
 1203 
 1204         /*
 1205          * If the callout isn't pending, it's not on the queue, so
 1206          * don't attempt to remove it from the queue.  We can try to
 1207          * stop it by other means however.
 1208          */
 1209         if (!(c->c_iflags & CALLOUT_PENDING)) {
 1210                 c->c_flags &= ~CALLOUT_ACTIVE;
 1211 
 1212                 /*
 1213                  * If it wasn't on the queue and it isn't the current
 1214                  * callout, then we can't stop it, so just bail.
 1215                  */
 1216                 if (cc_exec_curr(cc, direct) != c) {
 1217                         CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 1218                             c, c->c_func, c->c_arg);
 1219                         CC_UNLOCK(cc);
 1220                         if (sq_locked)
 1221                                 sleepq_release(&cc_exec_waiting(cc, direct));
 1222                         return (0);
 1223                 }
 1224 
 1225                 if ((flags & CS_DRAIN) != 0) {
 1226                         /*
 1227                          * The current callout is running (or just
 1228                          * about to run) and blocking is allowed, so
 1229                          * just wait for the current invocation to
 1230                          * finish.
 1231                          */
 1232                         while (cc_exec_curr(cc, direct) == c) {
 1233                                 /*
 1234                                  * Use direct calls to sleepqueue interface
 1235                                  * instead of cv/msleep in order to avoid
 1236                                  * a LOR between cc_lock and sleepqueue
 1237                                  * chain spinlocks.  This piece of code
 1238                                  * emulates a msleep_spin() call actually.
 1239                                  *
 1240                                  * If we already have the sleepqueue chain
 1241                                  * locked, then we can safely block.  If we
 1242                                  * don't already have it locked, however,
 1243                                  * we have to drop the cc_lock to lock
 1244                                  * it.  This opens several races, so we
 1245                                  * restart at the beginning once we have
 1246                                  * both locks.  If nothing has changed, then
 1247                                  * we will end up back here with sq_locked
 1248                                  * set.
 1249                                  */
 1250                                 if (!sq_locked) {
 1251                                         CC_UNLOCK(cc);
 1252                                         sleepq_lock(
 1253                                             &cc_exec_waiting(cc, direct));
 1254                                         sq_locked = 1;
 1255                                         old_cc = cc;
 1256                                         goto again;
 1257                                 }
 1258 
 1259                                 /*
 1260                                  * Migration could be cancelled here, but
 1261                                  * as long as it is still not sure when it
 1262                                  * will be packed up, just let softclock()
 1263                                  * take care of it.
 1264                                  */
 1265                                 cc_exec_waiting(cc, direct) = true;
 1266                                 DROP_GIANT();
 1267                                 CC_UNLOCK(cc);
 1268                                 sleepq_add(
 1269                                     &cc_exec_waiting(cc, direct),
 1270                                     &cc->cc_lock.lock_object, "codrain",
 1271                                     SLEEPQ_SLEEP, 0);
 1272                                 sleepq_wait(
 1273                                     &cc_exec_waiting(cc, direct),
 1274                                              0);
 1275                                 sq_locked = 0;
 1276                                 old_cc = NULL;
 1277 
 1278                                 /* Reacquire locks previously released. */
 1279                                 PICKUP_GIANT();
 1280                                 CC_LOCK(cc);
 1281                         }
 1282                 } else if (use_lock &&
 1283                            !cc_exec_cancel(cc, direct)) {
 1284                         
 1285                         /*
 1286                          * The current callout is waiting for its
 1287                          * lock which we hold.  Cancel the callout
 1288                          * and return.  After our caller drops the
 1289                          * lock, the callout will be skipped in
 1290                          * softclock().
 1291                          */
 1292                         cc_exec_cancel(cc, direct) = true;
 1293                         CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 1294                             c, c->c_func, c->c_arg);
 1295                         KASSERT(!cc_cce_migrating(cc, direct),
 1296                             ("callout wrongly scheduled for migration"));
 1297                         if (callout_migrating(c)) {
 1298                                 c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 1299 #ifdef SMP
 1300                                 cc_migration_cpu(cc, direct) = CPUBLOCK;
 1301                                 cc_migration_time(cc, direct) = 0;
 1302                                 cc_migration_prec(cc, direct) = 0;
 1303                                 cc_migration_func(cc, direct) = NULL;
 1304                                 cc_migration_arg(cc, direct) = NULL;
 1305 #endif
 1306                         }
 1307                         CC_UNLOCK(cc);
 1308                         KASSERT(!sq_locked, ("sleepqueue chain locked"));
 1309                         return (1);
 1310                 } else if (callout_migrating(c)) {
 1311                         /*
 1312                          * The callout is currently being serviced
 1313                          * and the "next" callout is scheduled at
 1314                          * its completion with a migration. We remove
 1315                          * the migration flag so it *won't* get rescheduled,
 1316                          * but we can't stop the one thats running so
 1317                          * we return 0.
 1318                          */
 1319                         c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 1320 #ifdef SMP
 1321                         /* 
 1322                          * We can't call cc_cce_cleanup here since
 1323                          * if we do it will remove .ce_curr and
 1324                          * its still running. This will prevent a
 1325                          * reschedule of the callout when the 
 1326                          * execution completes.
 1327                          */
 1328                         cc_migration_cpu(cc, direct) = CPUBLOCK;
 1329                         cc_migration_time(cc, direct) = 0;
 1330                         cc_migration_prec(cc, direct) = 0;
 1331                         cc_migration_func(cc, direct) = NULL;
 1332                         cc_migration_arg(cc, direct) = NULL;
 1333 #endif
 1334                         CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
 1335                             c, c->c_func, c->c_arg);
 1336                         CC_UNLOCK(cc);
 1337                         return ((flags & CS_MIGRBLOCK) != 0);
 1338                 }
 1339                 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 1340                     c, c->c_func, c->c_arg);
 1341                 CC_UNLOCK(cc);
 1342                 KASSERT(!sq_locked, ("sleepqueue chain still locked"));
 1343                 return (0);
 1344         }
 1345         if (sq_locked)
 1346                 sleepq_release(&cc_exec_waiting(cc, direct));
 1347 
 1348         c->c_iflags &= ~CALLOUT_PENDING;
 1349         c->c_flags &= ~CALLOUT_ACTIVE;
 1350 
 1351         CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 1352             c, c->c_func, c->c_arg);
 1353         if (not_on_a_list == 0) {
 1354                 if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
 1355                         if (cc_exec_next(cc) == c)
 1356                                 cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
 1357                         LIST_REMOVE(c, c_links.le);
 1358                 } else {
 1359                         TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 1360                 }
 1361         }
 1362         callout_cc_del(c, cc);
 1363         CC_UNLOCK(cc);
 1364         return (1);
 1365 }
 1366 
 1367 void
 1368 callout_init(c, mpsafe)
 1369         struct  callout *c;
 1370         int mpsafe;
 1371 {
 1372         bzero(c, sizeof *c);
 1373         if (mpsafe) {
 1374                 c->c_lock = NULL;
 1375                 c->c_iflags = CALLOUT_RETURNUNLOCKED;
 1376         } else {
 1377                 c->c_lock = &Giant.lock_object;
 1378                 c->c_iflags = 0;
 1379         }
 1380         c->c_cpu = timeout_cpu;
 1381 }
 1382 
 1383 void
 1384 _callout_init_lock(c, lock, flags)
 1385         struct  callout *c;
 1386         struct  lock_object *lock;
 1387         int flags;
 1388 {
 1389         bzero(c, sizeof *c);
 1390         c->c_lock = lock;
 1391         KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0,
 1392             ("callout_init_lock: bad flags %d", flags));
 1393         KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0,
 1394             ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock"));
 1395         KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags &
 1396             (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class",
 1397             __func__));
 1398         c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
 1399         c->c_cpu = timeout_cpu;
 1400 }
 1401 
 1402 #ifdef APM_FIXUP_CALLTODO
 1403 /* 
 1404  * Adjust the kernel calltodo timeout list.  This routine is used after 
 1405  * an APM resume to recalculate the calltodo timer list values with the 
 1406  * number of hz's we have been sleeping.  The next hardclock() will detect 
 1407  * that there are fired timers and run softclock() to execute them.
 1408  *
 1409  * Please note, I have not done an exhaustive analysis of what code this
 1410  * might break.  I am motivated to have my select()'s and alarm()'s that
 1411  * have expired during suspend firing upon resume so that the applications
 1412  * which set the timer can do the maintanence the timer was for as close
 1413  * as possible to the originally intended time.  Testing this code for a 
 1414  * week showed that resuming from a suspend resulted in 22 to 25 timers 
 1415  * firing, which seemed independent on whether the suspend was 2 hours or
 1416  * 2 days.  Your milage may vary.   - Ken Key <key@cs.utk.edu>
 1417  */
 1418 void
 1419 adjust_timeout_calltodo(time_change)
 1420     struct timeval *time_change;
 1421 {
 1422         register struct callout *p;
 1423         unsigned long delta_ticks;
 1424 
 1425         /* 
 1426          * How many ticks were we asleep?
 1427          * (stolen from tvtohz()).
 1428          */
 1429 
 1430         /* Don't do anything */
 1431         if (time_change->tv_sec < 0)
 1432                 return;
 1433         else if (time_change->tv_sec <= LONG_MAX / 1000000)
 1434                 delta_ticks = (time_change->tv_sec * 1000000 +
 1435                                time_change->tv_usec + (tick - 1)) / tick + 1;
 1436         else if (time_change->tv_sec <= LONG_MAX / hz)
 1437                 delta_ticks = time_change->tv_sec * hz +
 1438                               (time_change->tv_usec + (tick - 1)) / tick + 1;
 1439         else
 1440                 delta_ticks = LONG_MAX;
 1441 
 1442         if (delta_ticks > INT_MAX)
 1443                 delta_ticks = INT_MAX;
 1444 
 1445         /* 
 1446          * Now rip through the timer calltodo list looking for timers
 1447          * to expire.
 1448          */
 1449 
 1450         /* don't collide with softclock() */
 1451         CC_LOCK(cc);
 1452         for (p = calltodo.c_next; p != NULL; p = p->c_next) {
 1453                 p->c_time -= delta_ticks;
 1454 
 1455                 /* Break if the timer had more time on it than delta_ticks */
 1456                 if (p->c_time > 0)
 1457                         break;
 1458 
 1459                 /* take back the ticks the timer didn't use (p->c_time <= 0) */
 1460                 delta_ticks = -p->c_time;
 1461         }
 1462         CC_UNLOCK(cc);
 1463 
 1464         return;
 1465 }
 1466 #endif /* APM_FIXUP_CALLTODO */
 1467 
 1468 static int
 1469 flssbt(sbintime_t sbt)
 1470 {
 1471 
 1472         sbt += (uint64_t)sbt >> 1;
 1473         if (sizeof(long) >= sizeof(sbintime_t))
 1474                 return (flsl(sbt));
 1475         if (sbt >= SBT_1S)
 1476                 return (flsl(((uint64_t)sbt) >> 32) + 32);
 1477         return (flsl(sbt));
 1478 }
 1479 
 1480 /*
 1481  * Dump immediate statistic snapshot of the scheduled callouts.
 1482  */
 1483 static int
 1484 sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS)
 1485 {
 1486         struct callout *tmp;
 1487         struct callout_cpu *cc;
 1488         struct callout_list *sc;
 1489         sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t;
 1490         int ct[64], cpr[64], ccpbk[32];
 1491         int error, val, i, count, tcum, pcum, maxc, c, medc;
 1492 #ifdef SMP
 1493         int cpu;
 1494 #endif
 1495 
 1496         val = 0;
 1497         error = sysctl_handle_int(oidp, &val, 0, req);
 1498         if (error != 0 || req->newptr == NULL)
 1499                 return (error);
 1500         count = maxc = 0;
 1501         st = spr = maxt = maxpr = 0;
 1502         bzero(ccpbk, sizeof(ccpbk));
 1503         bzero(ct, sizeof(ct));
 1504         bzero(cpr, sizeof(cpr));
 1505         now = sbinuptime();
 1506 #ifdef SMP
 1507         CPU_FOREACH(cpu) {
 1508                 cc = CC_CPU(cpu);
 1509 #else
 1510                 cc = CC_CPU(timeout_cpu);
 1511 #endif
 1512                 CC_LOCK(cc);
 1513                 for (i = 0; i < callwheelsize; i++) {
 1514                         sc = &cc->cc_callwheel[i];
 1515                         c = 0;
 1516                         LIST_FOREACH(tmp, sc, c_links.le) {
 1517                                 c++;
 1518                                 t = tmp->c_time - now;
 1519                                 if (t < 0)
 1520                                         t = 0;
 1521                                 st += t / SBT_1US;
 1522                                 spr += tmp->c_precision / SBT_1US;
 1523                                 if (t > maxt)
 1524                                         maxt = t;
 1525                                 if (tmp->c_precision > maxpr)
 1526                                         maxpr = tmp->c_precision;
 1527                                 ct[flssbt(t)]++;
 1528                                 cpr[flssbt(tmp->c_precision)]++;
 1529                         }
 1530                         if (c > maxc)
 1531                                 maxc = c;
 1532                         ccpbk[fls(c + c / 2)]++;
 1533                         count += c;
 1534                 }
 1535                 CC_UNLOCK(cc);
 1536 #ifdef SMP
 1537         }
 1538 #endif
 1539 
 1540         for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++)
 1541                 tcum += ct[i];
 1542         medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
 1543         for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++)
 1544                 pcum += cpr[i];
 1545         medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
 1546         for (i = 0, c = 0; i < 32 && c < count / 2; i++)
 1547                 c += ccpbk[i];
 1548         medc = (i >= 2) ? (1 << (i - 2)) : 0;
 1549 
 1550         printf("Scheduled callouts statistic snapshot:\n");
 1551         printf("  Callouts: %6d  Buckets: %6d*%-3d  Bucket size: 0.%06ds\n",
 1552             count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT);
 1553         printf("  C/Bk: med %5d         avg %6d.%06jd  max %6d\n",
 1554             medc,
 1555             count / callwheelsize / mp_ncpus,
 1556             (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000,
 1557             maxc);
 1558         printf("  Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
 1559             medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32,
 1560             (st / count) / 1000000, (st / count) % 1000000,
 1561             maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32);
 1562         printf("  Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
 1563             medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32,
 1564             (spr / count) / 1000000, (spr / count) % 1000000,
 1565             maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32);
 1566         printf("  Distribution:       \tbuckets\t   time\t   tcum\t"
 1567             "   prec\t   pcum\n");
 1568         for (i = 0, tcum = pcum = 0; i < 64; i++) {
 1569                 if (ct[i] == 0 && cpr[i] == 0)
 1570                         continue;
 1571                 t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0;
 1572                 tcum += ct[i];
 1573                 pcum += cpr[i];
 1574                 printf("  %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n",
 1575                     t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32,
 1576                     i - 1 - (32 - CC_HASH_SHIFT),
 1577                     ct[i], tcum, cpr[i], pcum);
 1578         }
 1579         return (error);
 1580 }
 1581 SYSCTL_PROC(_kern, OID_AUTO, callout_stat,
 1582     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 1583     0, 0, sysctl_kern_callout_stat, "I",
 1584     "Dump immediate statistic snapshot of the scheduled callouts");
 1585 
 1586 #ifdef DDB
 1587 static void
 1588 _show_callout(struct callout *c)
 1589 {
 1590 
 1591         db_printf("callout %p\n", c);
 1592 #define C_DB_PRINTF(f, e)       db_printf("   %s = " f "\n", #e, c->e);
 1593         db_printf("   &c_links = %p\n", &(c->c_links));
 1594         C_DB_PRINTF("%" PRId64, c_time);
 1595         C_DB_PRINTF("%" PRId64, c_precision);
 1596         C_DB_PRINTF("%p",       c_arg);
 1597         C_DB_PRINTF("%p",       c_func);
 1598         C_DB_PRINTF("%p",       c_lock);
 1599         C_DB_PRINTF("%#x",      c_flags);
 1600         C_DB_PRINTF("%#x",      c_iflags);
 1601         C_DB_PRINTF("%d",       c_cpu);
 1602 #undef  C_DB_PRINTF
 1603 }
 1604 
 1605 DB_SHOW_COMMAND(callout, db_show_callout)
 1606 {
 1607 
 1608         if (!have_addr) {
 1609                 db_printf("usage: show callout <struct callout *>\n");
 1610                 return;
 1611         }
 1612 
 1613         _show_callout((struct callout *)addr);
 1614 }
 1615 #endif /* DDB */

Cache object: b684978acae2cde04c47ba5a3b06130e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.