1 /*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD: releng/10.2/sys/kern/kern_timeout.c 281921 2015-04-24 07:52:21Z bz $");
39
40 #include "opt_callout_profiling.h"
41 #include "opt_kdtrace.h"
42 #if defined(__arm__)
43 #include "opt_timer.h"
44 #endif
45
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/bus.h>
49 #include <sys/callout.h>
50 #include <sys/file.h>
51 #include <sys/interrupt.h>
52 #include <sys/kernel.h>
53 #include <sys/ktr.h>
54 #include <sys/lock.h>
55 #include <sys/malloc.h>
56 #include <sys/mutex.h>
57 #include <sys/proc.h>
58 #include <sys/sdt.h>
59 #include <sys/sleepqueue.h>
60 #include <sys/sysctl.h>
61 #include <sys/smp.h>
62
63 #ifdef SMP
64 #include <machine/cpu.h>
65 #endif
66
67 #ifndef NO_EVENTTIMERS
68 DPCPU_DECLARE(sbintime_t, hardclocktime);
69 #endif
70
71 SDT_PROVIDER_DEFINE(callout_execute);
72 SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start,
73 "struct callout *");
74 SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end,
75 "struct callout *");
76
77 #ifdef CALLOUT_PROFILING
78 static int avg_depth;
79 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
80 "Average number of items examined per softclock call. Units = 1/1000");
81 static int avg_gcalls;
82 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
83 "Average number of Giant callouts made per softclock call. Units = 1/1000");
84 static int avg_lockcalls;
85 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
86 "Average number of lock callouts made per softclock call. Units = 1/1000");
87 static int avg_mpcalls;
88 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
89 "Average number of MP callouts made per softclock call. Units = 1/1000");
90 static int avg_depth_dir;
91 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
92 "Average number of direct callouts examined per callout_process call. "
93 "Units = 1/1000");
94 static int avg_lockcalls_dir;
95 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
96 &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
97 "callout_process call. Units = 1/1000");
98 static int avg_mpcalls_dir;
99 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
100 0, "Average number of MP direct callouts made per callout_process call. "
101 "Units = 1/1000");
102 #endif
103
104 static int ncallout;
105 SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN, &ncallout, 0,
106 "Number of entries in callwheel and size of timeout() preallocation");
107
108 /*
109 * TODO:
110 * allocate more timeout table slots when table overflows.
111 */
112 u_int callwheelsize, callwheelmask;
113
114 /*
115 * The callout cpu exec entities represent informations necessary for
116 * describing the state of callouts currently running on the CPU and the ones
117 * necessary for migrating callouts to the new callout cpu. In particular,
118 * the first entry of the array cc_exec_entity holds informations for callout
119 * running in SWI thread context, while the second one holds informations
120 * for callout running directly from hardware interrupt context.
121 * The cached informations are very important for deferring migration when
122 * the migrating callout is already running.
123 */
124 struct cc_exec {
125 struct callout *cc_curr;
126 #ifdef SMP
127 void (*ce_migration_func)(void *);
128 void *ce_migration_arg;
129 int ce_migration_cpu;
130 sbintime_t ce_migration_time;
131 sbintime_t ce_migration_prec;
132 #endif
133 bool cc_cancel;
134 bool cc_waiting;
135 };
136
137 /*
138 * There is one struct callout_cpu per cpu, holding all relevant
139 * state for the callout processing thread on the individual CPU.
140 */
141 struct callout_cpu {
142 struct mtx_padalign cc_lock;
143 struct cc_exec cc_exec_entity[2];
144 struct callout *cc_next;
145 struct callout *cc_callout;
146 struct callout_list *cc_callwheel;
147 struct callout_tailq cc_expireq;
148 struct callout_slist cc_callfree;
149 sbintime_t cc_firstevent;
150 sbintime_t cc_lastscan;
151 void *cc_cookie;
152 u_int cc_bucket;
153 u_int cc_inited;
154 char cc_ktr_event_name[20];
155 };
156
157 #define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION)
158
159 #define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr
160 #define cc_exec_next(cc) cc->cc_next
161 #define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel
162 #define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting
163 #ifdef SMP
164 #define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func
165 #define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg
166 #define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu
167 #define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time
168 #define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec
169
170 struct callout_cpu cc_cpu[MAXCPU];
171 #define CPUBLOCK MAXCPU
172 #define CC_CPU(cpu) (&cc_cpu[(cpu)])
173 #define CC_SELF() CC_CPU(PCPU_GET(cpuid))
174 #else
175 struct callout_cpu cc_cpu;
176 #define CC_CPU(cpu) &cc_cpu
177 #define CC_SELF() &cc_cpu
178 #endif
179 #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock)
180 #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock)
181 #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED)
182
183 static int timeout_cpu;
184
185 static void callout_cpu_init(struct callout_cpu *cc, int cpu);
186 static void softclock_call_cc(struct callout *c, struct callout_cpu *cc,
187 #ifdef CALLOUT_PROFILING
188 int *mpcalls, int *lockcalls, int *gcalls,
189 #endif
190 int direct);
191
192 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
193
194 /**
195 * Locked by cc_lock:
196 * cc_curr - If a callout is in progress, it is cc_curr.
197 * If cc_curr is non-NULL, threads waiting in
198 * callout_drain() will be woken up as soon as the
199 * relevant callout completes.
200 * cc_cancel - Changing to 1 with both callout_lock and cc_lock held
201 * guarantees that the current callout will not run.
202 * The softclock() function sets this to 0 before it
203 * drops callout_lock to acquire c_lock, and it calls
204 * the handler only if curr_cancelled is still 0 after
205 * cc_lock is successfully acquired.
206 * cc_waiting - If a thread is waiting in callout_drain(), then
207 * callout_wait is nonzero. Set only when
208 * cc_curr is non-NULL.
209 */
210
211 /*
212 * Resets the execution entity tied to a specific callout cpu.
213 */
214 static void
215 cc_cce_cleanup(struct callout_cpu *cc, int direct)
216 {
217
218 cc_exec_curr(cc, direct) = NULL;
219 cc_exec_cancel(cc, direct) = false;
220 cc_exec_waiting(cc, direct) = false;
221 #ifdef SMP
222 cc_migration_cpu(cc, direct) = CPUBLOCK;
223 cc_migration_time(cc, direct) = 0;
224 cc_migration_prec(cc, direct) = 0;
225 cc_migration_func(cc, direct) = NULL;
226 cc_migration_arg(cc, direct) = NULL;
227 #endif
228 }
229
230 /*
231 * Checks if migration is requested by a specific callout cpu.
232 */
233 static int
234 cc_cce_migrating(struct callout_cpu *cc, int direct)
235 {
236
237 #ifdef SMP
238 return (cc_migration_cpu(cc, direct) != CPUBLOCK);
239 #else
240 return (0);
241 #endif
242 }
243
244 /*
245 * Kernel low level callwheel initialization
246 * called on cpu0 during kernel startup.
247 */
248 static void
249 callout_callwheel_init(void *dummy)
250 {
251 struct callout_cpu *cc;
252
253 /*
254 * Calculate the size of the callout wheel and the preallocated
255 * timeout() structures.
256 * XXX: Clip callout to result of previous function of maxusers
257 * maximum 384. This is still huge, but acceptable.
258 */
259 memset(CC_CPU(0), 0, sizeof(cc_cpu));
260 ncallout = imin(16 + maxproc + maxfiles, 18508);
261 TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
262
263 /*
264 * Calculate callout wheel size, should be next power of two higher
265 * than 'ncallout'.
266 */
267 callwheelsize = 1 << fls(ncallout);
268 callwheelmask = callwheelsize - 1;
269
270 /*
271 * Only cpu0 handles timeout(9) and receives a preallocation.
272 *
273 * XXX: Once all timeout(9) consumers are converted this can
274 * be removed.
275 */
276 timeout_cpu = PCPU_GET(cpuid);
277 cc = CC_CPU(timeout_cpu);
278 cc->cc_callout = malloc(ncallout * sizeof(struct callout),
279 M_CALLOUT, M_WAITOK);
280 callout_cpu_init(cc, timeout_cpu);
281 }
282 SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL);
283
284 /*
285 * Initialize the per-cpu callout structures.
286 */
287 static void
288 callout_cpu_init(struct callout_cpu *cc, int cpu)
289 {
290 struct callout *c;
291 int i;
292
293 mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
294 SLIST_INIT(&cc->cc_callfree);
295 cc->cc_inited = 1;
296 cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
297 M_CALLOUT, M_WAITOK);
298 for (i = 0; i < callwheelsize; i++)
299 LIST_INIT(&cc->cc_callwheel[i]);
300 TAILQ_INIT(&cc->cc_expireq);
301 cc->cc_firstevent = INT64_MAX;
302 for (i = 0; i < 2; i++)
303 cc_cce_cleanup(cc, i);
304 snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
305 "callwheel cpu %d", cpu);
306 if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */
307 return;
308 for (i = 0; i < ncallout; i++) {
309 c = &cc->cc_callout[i];
310 callout_init(c, 0);
311 c->c_iflags = CALLOUT_LOCAL_ALLOC;
312 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
313 }
314 }
315
316 #ifdef SMP
317 /*
318 * Switches the cpu tied to a specific callout.
319 * The function expects a locked incoming callout cpu and returns with
320 * locked outcoming callout cpu.
321 */
322 static struct callout_cpu *
323 callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu)
324 {
325 struct callout_cpu *new_cc;
326
327 MPASS(c != NULL && cc != NULL);
328 CC_LOCK_ASSERT(cc);
329
330 /*
331 * Avoid interrupts and preemption firing after the callout cpu
332 * is blocked in order to avoid deadlocks as the new thread
333 * may be willing to acquire the callout cpu lock.
334 */
335 c->c_cpu = CPUBLOCK;
336 spinlock_enter();
337 CC_UNLOCK(cc);
338 new_cc = CC_CPU(new_cpu);
339 CC_LOCK(new_cc);
340 spinlock_exit();
341 c->c_cpu = new_cpu;
342 return (new_cc);
343 }
344 #endif
345
346 /*
347 * Start standard softclock thread.
348 */
349 static void
350 start_softclock(void *dummy)
351 {
352 struct callout_cpu *cc;
353 #ifdef SMP
354 int cpu;
355 #endif
356
357 cc = CC_CPU(timeout_cpu);
358 if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK,
359 INTR_MPSAFE, &cc->cc_cookie))
360 panic("died while creating standard software ithreads");
361 #ifdef SMP
362 CPU_FOREACH(cpu) {
363 if (cpu == timeout_cpu)
364 continue;
365 cc = CC_CPU(cpu);
366 cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */
367 callout_cpu_init(cc, cpu);
368 if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK,
369 INTR_MPSAFE, &cc->cc_cookie))
370 panic("died while creating standard software ithreads");
371 }
372 #endif
373 }
374 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
375
376 #define CC_HASH_SHIFT 8
377
378 static inline u_int
379 callout_hash(sbintime_t sbt)
380 {
381
382 return (sbt >> (32 - CC_HASH_SHIFT));
383 }
384
385 static inline u_int
386 callout_get_bucket(sbintime_t sbt)
387 {
388
389 return (callout_hash(sbt) & callwheelmask);
390 }
391
392 void
393 callout_process(sbintime_t now)
394 {
395 struct callout *tmp, *tmpn;
396 struct callout_cpu *cc;
397 struct callout_list *sc;
398 sbintime_t first, last, max, tmp_max;
399 uint32_t lookahead;
400 u_int firstb, lastb, nowb;
401 #ifdef CALLOUT_PROFILING
402 int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
403 #endif
404
405 cc = CC_SELF();
406 mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
407
408 /* Compute the buckets of the last scan and present times. */
409 firstb = callout_hash(cc->cc_lastscan);
410 cc->cc_lastscan = now;
411 nowb = callout_hash(now);
412
413 /* Compute the last bucket and minimum time of the bucket after it. */
414 if (nowb == firstb)
415 lookahead = (SBT_1S / 16);
416 else if (nowb - firstb == 1)
417 lookahead = (SBT_1S / 8);
418 else
419 lookahead = (SBT_1S / 2);
420 first = last = now;
421 first += (lookahead / 2);
422 last += lookahead;
423 last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT));
424 lastb = callout_hash(last) - 1;
425 max = last;
426
427 /*
428 * Check if we wrapped around the entire wheel from the last scan.
429 * In case, we need to scan entirely the wheel for pending callouts.
430 */
431 if (lastb - firstb >= callwheelsize) {
432 lastb = firstb + callwheelsize - 1;
433 if (nowb - firstb >= callwheelsize)
434 nowb = lastb;
435 }
436
437 /* Iterate callwheel from firstb to nowb and then up to lastb. */
438 do {
439 sc = &cc->cc_callwheel[firstb & callwheelmask];
440 tmp = LIST_FIRST(sc);
441 while (tmp != NULL) {
442 /* Run the callout if present time within allowed. */
443 if (tmp->c_time <= now) {
444 /*
445 * Consumer told us the callout may be run
446 * directly from hardware interrupt context.
447 */
448 if (tmp->c_iflags & CALLOUT_DIRECT) {
449 #ifdef CALLOUT_PROFILING
450 ++depth_dir;
451 #endif
452 cc_exec_next(cc) =
453 LIST_NEXT(tmp, c_links.le);
454 cc->cc_bucket = firstb & callwheelmask;
455 LIST_REMOVE(tmp, c_links.le);
456 softclock_call_cc(tmp, cc,
457 #ifdef CALLOUT_PROFILING
458 &mpcalls_dir, &lockcalls_dir, NULL,
459 #endif
460 1);
461 tmp = cc_exec_next(cc);
462 cc_exec_next(cc) = NULL;
463 } else {
464 tmpn = LIST_NEXT(tmp, c_links.le);
465 LIST_REMOVE(tmp, c_links.le);
466 TAILQ_INSERT_TAIL(&cc->cc_expireq,
467 tmp, c_links.tqe);
468 tmp->c_iflags |= CALLOUT_PROCESSED;
469 tmp = tmpn;
470 }
471 continue;
472 }
473 /* Skip events from distant future. */
474 if (tmp->c_time >= max)
475 goto next;
476 /*
477 * Event minimal time is bigger than present maximal
478 * time, so it cannot be aggregated.
479 */
480 if (tmp->c_time > last) {
481 lastb = nowb;
482 goto next;
483 }
484 /* Update first and last time, respecting this event. */
485 if (tmp->c_time < first)
486 first = tmp->c_time;
487 tmp_max = tmp->c_time + tmp->c_precision;
488 if (tmp_max < last)
489 last = tmp_max;
490 next:
491 tmp = LIST_NEXT(tmp, c_links.le);
492 }
493 /* Proceed with the next bucket. */
494 firstb++;
495 /*
496 * Stop if we looked after present time and found
497 * some event we can't execute at now.
498 * Stop if we looked far enough into the future.
499 */
500 } while (((int)(firstb - lastb)) <= 0);
501 cc->cc_firstevent = last;
502 #ifndef NO_EVENTTIMERS
503 cpu_new_callout(curcpu, last, first);
504 #endif
505 #ifdef CALLOUT_PROFILING
506 avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
507 avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
508 avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
509 #endif
510 mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
511 /*
512 * swi_sched acquires the thread lock, so we don't want to call it
513 * with cc_lock held; incorrect locking order.
514 */
515 if (!TAILQ_EMPTY(&cc->cc_expireq))
516 swi_sched(cc->cc_cookie, 0);
517 }
518
519 static struct callout_cpu *
520 callout_lock(struct callout *c)
521 {
522 struct callout_cpu *cc;
523 int cpu;
524
525 for (;;) {
526 cpu = c->c_cpu;
527 #ifdef SMP
528 if (cpu == CPUBLOCK) {
529 while (c->c_cpu == CPUBLOCK)
530 cpu_spinwait();
531 continue;
532 }
533 #endif
534 cc = CC_CPU(cpu);
535 CC_LOCK(cc);
536 if (cpu == c->c_cpu)
537 break;
538 CC_UNLOCK(cc);
539 }
540 return (cc);
541 }
542
543 static void
544 callout_cc_add(struct callout *c, struct callout_cpu *cc,
545 sbintime_t sbt, sbintime_t precision, void (*func)(void *),
546 void *arg, int cpu, int flags)
547 {
548 int bucket;
549
550 CC_LOCK_ASSERT(cc);
551 if (sbt < cc->cc_lastscan)
552 sbt = cc->cc_lastscan;
553 c->c_arg = arg;
554 c->c_iflags |= CALLOUT_PENDING;
555 c->c_iflags &= ~CALLOUT_PROCESSED;
556 c->c_flags |= CALLOUT_ACTIVE;
557 if (flags & C_DIRECT_EXEC)
558 c->c_iflags |= CALLOUT_DIRECT;
559 c->c_func = func;
560 c->c_time = sbt;
561 c->c_precision = precision;
562 bucket = callout_get_bucket(c->c_time);
563 CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
564 c, (int)(c->c_precision >> 32),
565 (u_int)(c->c_precision & 0xffffffff));
566 LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
567 if (cc->cc_bucket == bucket)
568 cc_exec_next(cc) = c;
569 #ifndef NO_EVENTTIMERS
570 /*
571 * Inform the eventtimers(4) subsystem there's a new callout
572 * that has been inserted, but only if really required.
573 */
574 if (INT64_MAX - c->c_time < c->c_precision)
575 c->c_precision = INT64_MAX - c->c_time;
576 sbt = c->c_time + c->c_precision;
577 if (sbt < cc->cc_firstevent) {
578 cc->cc_firstevent = sbt;
579 cpu_new_callout(cpu, sbt, c->c_time);
580 }
581 #endif
582 }
583
584 static void
585 callout_cc_del(struct callout *c, struct callout_cpu *cc)
586 {
587
588 if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0)
589 return;
590 c->c_func = NULL;
591 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
592 }
593
594 static void
595 softclock_call_cc(struct callout *c, struct callout_cpu *cc,
596 #ifdef CALLOUT_PROFILING
597 int *mpcalls, int *lockcalls, int *gcalls,
598 #endif
599 int direct)
600 {
601 struct rm_priotracker tracker;
602 void (*c_func)(void *);
603 void *c_arg;
604 struct lock_class *class;
605 struct lock_object *c_lock;
606 uintptr_t lock_status;
607 int c_iflags;
608 #ifdef SMP
609 struct callout_cpu *new_cc;
610 void (*new_func)(void *);
611 void *new_arg;
612 int flags, new_cpu;
613 sbintime_t new_prec, new_time;
614 #endif
615 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
616 sbintime_t sbt1, sbt2;
617 struct timespec ts2;
618 static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */
619 static timeout_t *lastfunc;
620 #endif
621
622 KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING,
623 ("softclock_call_cc: pend %p %x", c, c->c_iflags));
624 KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE,
625 ("softclock_call_cc: act %p %x", c, c->c_flags));
626 class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
627 lock_status = 0;
628 if (c->c_flags & CALLOUT_SHAREDLOCK) {
629 if (class == &lock_class_rm)
630 lock_status = (uintptr_t)&tracker;
631 else
632 lock_status = 1;
633 }
634 c_lock = c->c_lock;
635 c_func = c->c_func;
636 c_arg = c->c_arg;
637 c_iflags = c->c_iflags;
638 if (c->c_iflags & CALLOUT_LOCAL_ALLOC)
639 c->c_iflags = CALLOUT_LOCAL_ALLOC;
640 else
641 c->c_iflags &= ~CALLOUT_PENDING;
642
643 cc_exec_curr(cc, direct) = c;
644 cc_exec_cancel(cc, direct) = false;
645 CC_UNLOCK(cc);
646 if (c_lock != NULL) {
647 class->lc_lock(c_lock, lock_status);
648 /*
649 * The callout may have been cancelled
650 * while we switched locks.
651 */
652 if (cc_exec_cancel(cc, direct)) {
653 class->lc_unlock(c_lock);
654 goto skip;
655 }
656 /* The callout cannot be stopped now. */
657 cc_exec_cancel(cc, direct) = true;
658 if (c_lock == &Giant.lock_object) {
659 #ifdef CALLOUT_PROFILING
660 (*gcalls)++;
661 #endif
662 CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
663 c, c_func, c_arg);
664 } else {
665 #ifdef CALLOUT_PROFILING
666 (*lockcalls)++;
667 #endif
668 CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
669 c, c_func, c_arg);
670 }
671 } else {
672 #ifdef CALLOUT_PROFILING
673 (*mpcalls)++;
674 #endif
675 CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
676 c, c_func, c_arg);
677 }
678 KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
679 "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
680 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
681 sbt1 = sbinuptime();
682 #endif
683 THREAD_NO_SLEEPING();
684 SDT_PROBE(callout_execute, kernel, , callout__start, c, 0, 0, 0, 0);
685 c_func(c_arg);
686 SDT_PROBE(callout_execute, kernel, , callout__end, c, 0, 0, 0, 0);
687 THREAD_SLEEPING_OK();
688 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
689 sbt2 = sbinuptime();
690 sbt2 -= sbt1;
691 if (sbt2 > maxdt) {
692 if (lastfunc != c_func || sbt2 > maxdt * 2) {
693 ts2 = sbttots(sbt2);
694 printf(
695 "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
696 c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
697 }
698 maxdt = sbt2;
699 lastfunc = c_func;
700 }
701 #endif
702 KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
703 CTR1(KTR_CALLOUT, "callout %p finished", c);
704 if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0)
705 class->lc_unlock(c_lock);
706 skip:
707 CC_LOCK(cc);
708 KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
709 cc_exec_curr(cc, direct) = NULL;
710 if (cc_exec_waiting(cc, direct)) {
711 /*
712 * There is someone waiting for the
713 * callout to complete.
714 * If the callout was scheduled for
715 * migration just cancel it.
716 */
717 if (cc_cce_migrating(cc, direct)) {
718 cc_cce_cleanup(cc, direct);
719
720 /*
721 * It should be assert here that the callout is not
722 * destroyed but that is not easy.
723 */
724 c->c_iflags &= ~CALLOUT_DFRMIGRATION;
725 }
726 cc_exec_waiting(cc, direct) = false;
727 CC_UNLOCK(cc);
728 wakeup(&cc_exec_waiting(cc, direct));
729 CC_LOCK(cc);
730 } else if (cc_cce_migrating(cc, direct)) {
731 KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0,
732 ("Migrating legacy callout %p", c));
733 #ifdef SMP
734 /*
735 * If the callout was scheduled for
736 * migration just perform it now.
737 */
738 new_cpu = cc_migration_cpu(cc, direct);
739 new_time = cc_migration_time(cc, direct);
740 new_prec = cc_migration_prec(cc, direct);
741 new_func = cc_migration_func(cc, direct);
742 new_arg = cc_migration_arg(cc, direct);
743 cc_cce_cleanup(cc, direct);
744
745 /*
746 * It should be assert here that the callout is not destroyed
747 * but that is not easy.
748 *
749 * As first thing, handle deferred callout stops.
750 */
751 if (!callout_migrating(c)) {
752 CTR3(KTR_CALLOUT,
753 "deferred cancelled %p func %p arg %p",
754 c, new_func, new_arg);
755 callout_cc_del(c, cc);
756 return;
757 }
758 c->c_iflags &= ~CALLOUT_DFRMIGRATION;
759
760 new_cc = callout_cpu_switch(c, cc, new_cpu);
761 flags = (direct) ? C_DIRECT_EXEC : 0;
762 callout_cc_add(c, new_cc, new_time, new_prec, new_func,
763 new_arg, new_cpu, flags);
764 CC_UNLOCK(new_cc);
765 CC_LOCK(cc);
766 #else
767 panic("migration should not happen");
768 #endif
769 }
770 /*
771 * If the current callout is locally allocated (from
772 * timeout(9)) then put it on the freelist.
773 *
774 * Note: we need to check the cached copy of c_iflags because
775 * if it was not local, then it's not safe to deref the
776 * callout pointer.
777 */
778 KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 ||
779 c->c_iflags == CALLOUT_LOCAL_ALLOC,
780 ("corrupted callout"));
781 if (c_iflags & CALLOUT_LOCAL_ALLOC)
782 callout_cc_del(c, cc);
783 }
784
785 /*
786 * The callout mechanism is based on the work of Adam M. Costello and
787 * George Varghese, published in a technical report entitled "Redesigning
788 * the BSD Callout and Timer Facilities" and modified slightly for inclusion
789 * in FreeBSD by Justin T. Gibbs. The original work on the data structures
790 * used in this implementation was published by G. Varghese and T. Lauck in
791 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
792 * the Efficient Implementation of a Timer Facility" in the Proceedings of
793 * the 11th ACM Annual Symposium on Operating Systems Principles,
794 * Austin, Texas Nov 1987.
795 */
796
797 /*
798 * Software (low priority) clock interrupt.
799 * Run periodic events from timeout queue.
800 */
801 void
802 softclock(void *arg)
803 {
804 struct callout_cpu *cc;
805 struct callout *c;
806 #ifdef CALLOUT_PROFILING
807 int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
808 #endif
809
810 cc = (struct callout_cpu *)arg;
811 CC_LOCK(cc);
812 while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
813 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
814 softclock_call_cc(c, cc,
815 #ifdef CALLOUT_PROFILING
816 &mpcalls, &lockcalls, &gcalls,
817 #endif
818 0);
819 #ifdef CALLOUT_PROFILING
820 ++depth;
821 #endif
822 }
823 #ifdef CALLOUT_PROFILING
824 avg_depth += (depth * 1000 - avg_depth) >> 8;
825 avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
826 avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
827 avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
828 #endif
829 CC_UNLOCK(cc);
830 }
831
832 /*
833 * timeout --
834 * Execute a function after a specified length of time.
835 *
836 * untimeout --
837 * Cancel previous timeout function call.
838 *
839 * callout_handle_init --
840 * Initialize a handle so that using it with untimeout is benign.
841 *
842 * See AT&T BCI Driver Reference Manual for specification. This
843 * implementation differs from that one in that although an
844 * identification value is returned from timeout, the original
845 * arguments to timeout as well as the identifier are used to
846 * identify entries for untimeout.
847 */
848 struct callout_handle
849 timeout(ftn, arg, to_ticks)
850 timeout_t *ftn;
851 void *arg;
852 int to_ticks;
853 {
854 struct callout_cpu *cc;
855 struct callout *new;
856 struct callout_handle handle;
857
858 cc = CC_CPU(timeout_cpu);
859 CC_LOCK(cc);
860 /* Fill in the next free callout structure. */
861 new = SLIST_FIRST(&cc->cc_callfree);
862 if (new == NULL)
863 /* XXX Attempt to malloc first */
864 panic("timeout table full");
865 SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle);
866 callout_reset(new, to_ticks, ftn, arg);
867 handle.callout = new;
868 CC_UNLOCK(cc);
869
870 return (handle);
871 }
872
873 void
874 untimeout(ftn, arg, handle)
875 timeout_t *ftn;
876 void *arg;
877 struct callout_handle handle;
878 {
879 struct callout_cpu *cc;
880
881 /*
882 * Check for a handle that was initialized
883 * by callout_handle_init, but never used
884 * for a real timeout.
885 */
886 if (handle.callout == NULL)
887 return;
888
889 cc = callout_lock(handle.callout);
890 if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
891 callout_stop(handle.callout);
892 CC_UNLOCK(cc);
893 }
894
895 void
896 callout_handle_init(struct callout_handle *handle)
897 {
898 handle->callout = NULL;
899 }
900
901 /*
902 * New interface; clients allocate their own callout structures.
903 *
904 * callout_reset() - establish or change a timeout
905 * callout_stop() - disestablish a timeout
906 * callout_init() - initialize a callout structure so that it can
907 * safely be passed to callout_reset() and callout_stop()
908 *
909 * <sys/callout.h> defines three convenience macros:
910 *
911 * callout_active() - returns truth if callout has not been stopped,
912 * drained, or deactivated since the last time the callout was
913 * reset.
914 * callout_pending() - returns truth if callout is still waiting for timeout
915 * callout_deactivate() - marks the callout as having been serviced
916 */
917 int
918 callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
919 void (*ftn)(void *), void *arg, int cpu, int flags)
920 {
921 sbintime_t to_sbt, pr;
922 struct callout_cpu *cc;
923 int cancelled, direct;
924 int ignore_cpu=0;
925
926 cancelled = 0;
927 if (cpu == -1) {
928 ignore_cpu = 1;
929 } else if ((cpu >= MAXCPU) ||
930 ((CC_CPU(cpu))->cc_inited == 0)) {
931 /* Invalid CPU spec */
932 panic("Invalid CPU in callout %d", cpu);
933 }
934 if (flags & C_ABSOLUTE) {
935 to_sbt = sbt;
936 } else {
937 if ((flags & C_HARDCLOCK) && (sbt < tick_sbt))
938 sbt = tick_sbt;
939 if ((flags & C_HARDCLOCK) ||
940 #ifdef NO_EVENTTIMERS
941 sbt >= sbt_timethreshold) {
942 to_sbt = getsbinuptime();
943
944 /* Add safety belt for the case of hz > 1000. */
945 to_sbt += tc_tick_sbt - tick_sbt;
946 #else
947 sbt >= sbt_tickthreshold) {
948 /*
949 * Obtain the time of the last hardclock() call on
950 * this CPU directly from the kern_clocksource.c.
951 * This value is per-CPU, but it is equal for all
952 * active ones.
953 */
954 #ifdef __LP64__
955 to_sbt = DPCPU_GET(hardclocktime);
956 #else
957 spinlock_enter();
958 to_sbt = DPCPU_GET(hardclocktime);
959 spinlock_exit();
960 #endif
961 #endif
962 if ((flags & C_HARDCLOCK) == 0)
963 to_sbt += tick_sbt;
964 } else
965 to_sbt = sbinuptime();
966 if (INT64_MAX - to_sbt < sbt)
967 to_sbt = INT64_MAX;
968 else
969 to_sbt += sbt;
970 pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
971 sbt >> C_PRELGET(flags));
972 if (pr > precision)
973 precision = pr;
974 }
975 /*
976 * This flag used to be added by callout_cc_add, but the
977 * first time you call this we could end up with the
978 * wrong direct flag if we don't do it before we add.
979 */
980 if (flags & C_DIRECT_EXEC) {
981 direct = 1;
982 } else {
983 direct = 0;
984 }
985 KASSERT(!direct || c->c_lock == NULL,
986 ("%s: direct callout %p has lock", __func__, c));
987 cc = callout_lock(c);
988 /*
989 * Don't allow migration of pre-allocated callouts lest they
990 * become unbalanced or handle the case where the user does
991 * not care.
992 */
993 if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) ||
994 ignore_cpu) {
995 cpu = c->c_cpu;
996 }
997
998 if (cc_exec_curr(cc, direct) == c) {
999 /*
1000 * We're being asked to reschedule a callout which is
1001 * currently in progress. If there is a lock then we
1002 * can cancel the callout if it has not really started.
1003 */
1004 if (c->c_lock != NULL && cc_exec_cancel(cc, direct))
1005 cancelled = cc_exec_cancel(cc, direct) = true;
1006 if (cc_exec_waiting(cc, direct)) {
1007 /*
1008 * Someone has called callout_drain to kill this
1009 * callout. Don't reschedule.
1010 */
1011 CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
1012 cancelled ? "cancelled" : "failed to cancel",
1013 c, c->c_func, c->c_arg);
1014 CC_UNLOCK(cc);
1015 return (cancelled);
1016 }
1017 #ifdef SMP
1018 if (callout_migrating(c)) {
1019 /*
1020 * This only occurs when a second callout_reset_sbt_on
1021 * is made after a previous one moved it into
1022 * deferred migration (below). Note we do *not* change
1023 * the prev_cpu even though the previous target may
1024 * be different.
1025 */
1026 cc_migration_cpu(cc, direct) = cpu;
1027 cc_migration_time(cc, direct) = to_sbt;
1028 cc_migration_prec(cc, direct) = precision;
1029 cc_migration_func(cc, direct) = ftn;
1030 cc_migration_arg(cc, direct) = arg;
1031 cancelled = 1;
1032 CC_UNLOCK(cc);
1033 return (cancelled);
1034 }
1035 #endif
1036 }
1037 if (c->c_iflags & CALLOUT_PENDING) {
1038 if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
1039 if (cc_exec_next(cc) == c)
1040 cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
1041 LIST_REMOVE(c, c_links.le);
1042 } else {
1043 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
1044 }
1045 cancelled = 1;
1046 c->c_iflags &= ~ CALLOUT_PENDING;
1047 c->c_flags &= ~ CALLOUT_ACTIVE;
1048 }
1049
1050 #ifdef SMP
1051 /*
1052 * If the callout must migrate try to perform it immediately.
1053 * If the callout is currently running, just defer the migration
1054 * to a more appropriate moment.
1055 */
1056 if (c->c_cpu != cpu) {
1057 if (cc_exec_curr(cc, direct) == c) {
1058 /*
1059 * Pending will have been removed since we are
1060 * actually executing the callout on another
1061 * CPU. That callout should be waiting on the
1062 * lock the caller holds. If we set both
1063 * active/and/pending after we return and the
1064 * lock on the executing callout proceeds, it
1065 * will then see pending is true and return.
1066 * At the return from the actual callout execution
1067 * the migration will occur in softclock_call_cc
1068 * and this new callout will be placed on the
1069 * new CPU via a call to callout_cpu_switch() which
1070 * will get the lock on the right CPU followed
1071 * by a call callout_cc_add() which will add it there.
1072 * (see above in softclock_call_cc()).
1073 */
1074 cc_migration_cpu(cc, direct) = cpu;
1075 cc_migration_time(cc, direct) = to_sbt;
1076 cc_migration_prec(cc, direct) = precision;
1077 cc_migration_func(cc, direct) = ftn;
1078 cc_migration_arg(cc, direct) = arg;
1079 c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING);
1080 c->c_flags |= CALLOUT_ACTIVE;
1081 CTR6(KTR_CALLOUT,
1082 "migration of %p func %p arg %p in %d.%08x to %u deferred",
1083 c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
1084 (u_int)(to_sbt & 0xffffffff), cpu);
1085 CC_UNLOCK(cc);
1086 return (cancelled);
1087 }
1088 cc = callout_cpu_switch(c, cc, cpu);
1089 }
1090 #endif
1091
1092 callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
1093 CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
1094 cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
1095 (u_int)(to_sbt & 0xffffffff));
1096 CC_UNLOCK(cc);
1097
1098 return (cancelled);
1099 }
1100
1101 /*
1102 * Common idioms that can be optimized in the future.
1103 */
1104 int
1105 callout_schedule_on(struct callout *c, int to_ticks, int cpu)
1106 {
1107 return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu);
1108 }
1109
1110 int
1111 callout_schedule(struct callout *c, int to_ticks)
1112 {
1113 return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu);
1114 }
1115
1116 int
1117 _callout_stop_safe(c, safe)
1118 struct callout *c;
1119 int safe;
1120 {
1121 struct callout_cpu *cc, *old_cc;
1122 struct lock_class *class;
1123 int direct, sq_locked, use_lock;
1124 int not_on_a_list;
1125
1126 /*
1127 * Some old subsystems don't hold Giant while running a callout_stop(),
1128 * so just discard this check for the moment.
1129 */
1130 if (!safe && c->c_lock != NULL) {
1131 if (c->c_lock == &Giant.lock_object)
1132 use_lock = mtx_owned(&Giant);
1133 else {
1134 use_lock = 1;
1135 class = LOCK_CLASS(c->c_lock);
1136 class->lc_assert(c->c_lock, LA_XLOCKED);
1137 }
1138 } else
1139 use_lock = 0;
1140 if (c->c_iflags & CALLOUT_DIRECT) {
1141 direct = 1;
1142 } else {
1143 direct = 0;
1144 }
1145 sq_locked = 0;
1146 old_cc = NULL;
1147 again:
1148 cc = callout_lock(c);
1149
1150 if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) ==
1151 (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) &&
1152 ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) {
1153 /*
1154 * Special case where this slipped in while we
1155 * were migrating *as* the callout is about to
1156 * execute. The caller probably holds the lock
1157 * the callout wants.
1158 *
1159 * Get rid of the migration first. Then set
1160 * the flag that tells this code *not* to
1161 * try to remove it from any lists (its not
1162 * on one yet). When the callout wheel runs,
1163 * it will ignore this callout.
1164 */
1165 c->c_iflags &= ~CALLOUT_PENDING;
1166 c->c_flags &= ~CALLOUT_ACTIVE;
1167 not_on_a_list = 1;
1168 } else {
1169 not_on_a_list = 0;
1170 }
1171
1172 /*
1173 * If the callout was migrating while the callout cpu lock was
1174 * dropped, just drop the sleepqueue lock and check the states
1175 * again.
1176 */
1177 if (sq_locked != 0 && cc != old_cc) {
1178 #ifdef SMP
1179 CC_UNLOCK(cc);
1180 sleepq_release(&cc_exec_waiting(old_cc, direct));
1181 sq_locked = 0;
1182 old_cc = NULL;
1183 goto again;
1184 #else
1185 panic("migration should not happen");
1186 #endif
1187 }
1188
1189 /*
1190 * If the callout isn't pending, it's not on the queue, so
1191 * don't attempt to remove it from the queue. We can try to
1192 * stop it by other means however.
1193 */
1194 if (!(c->c_iflags & CALLOUT_PENDING)) {
1195 c->c_flags &= ~CALLOUT_ACTIVE;
1196
1197 /*
1198 * If it wasn't on the queue and it isn't the current
1199 * callout, then we can't stop it, so just bail.
1200 */
1201 if (cc_exec_curr(cc, direct) != c) {
1202 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
1203 c, c->c_func, c->c_arg);
1204 CC_UNLOCK(cc);
1205 if (sq_locked)
1206 sleepq_release(&cc_exec_waiting(cc, direct));
1207 return (0);
1208 }
1209
1210 if (safe) {
1211 /*
1212 * The current callout is running (or just
1213 * about to run) and blocking is allowed, so
1214 * just wait for the current invocation to
1215 * finish.
1216 */
1217 while (cc_exec_curr(cc, direct) == c) {
1218 /*
1219 * Use direct calls to sleepqueue interface
1220 * instead of cv/msleep in order to avoid
1221 * a LOR between cc_lock and sleepqueue
1222 * chain spinlocks. This piece of code
1223 * emulates a msleep_spin() call actually.
1224 *
1225 * If we already have the sleepqueue chain
1226 * locked, then we can safely block. If we
1227 * don't already have it locked, however,
1228 * we have to drop the cc_lock to lock
1229 * it. This opens several races, so we
1230 * restart at the beginning once we have
1231 * both locks. If nothing has changed, then
1232 * we will end up back here with sq_locked
1233 * set.
1234 */
1235 if (!sq_locked) {
1236 CC_UNLOCK(cc);
1237 sleepq_lock(
1238 &cc_exec_waiting(cc, direct));
1239 sq_locked = 1;
1240 old_cc = cc;
1241 goto again;
1242 }
1243
1244 /*
1245 * Migration could be cancelled here, but
1246 * as long as it is still not sure when it
1247 * will be packed up, just let softclock()
1248 * take care of it.
1249 */
1250 cc_exec_waiting(cc, direct) = true;
1251 DROP_GIANT();
1252 CC_UNLOCK(cc);
1253 sleepq_add(
1254 &cc_exec_waiting(cc, direct),
1255 &cc->cc_lock.lock_object, "codrain",
1256 SLEEPQ_SLEEP, 0);
1257 sleepq_wait(
1258 &cc_exec_waiting(cc, direct),
1259 0);
1260 sq_locked = 0;
1261 old_cc = NULL;
1262
1263 /* Reacquire locks previously released. */
1264 PICKUP_GIANT();
1265 CC_LOCK(cc);
1266 }
1267 } else if (use_lock &&
1268 !cc_exec_cancel(cc, direct)) {
1269
1270 /*
1271 * The current callout is waiting for its
1272 * lock which we hold. Cancel the callout
1273 * and return. After our caller drops the
1274 * lock, the callout will be skipped in
1275 * softclock().
1276 */
1277 cc_exec_cancel(cc, direct) = true;
1278 CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
1279 c, c->c_func, c->c_arg);
1280 KASSERT(!cc_cce_migrating(cc, direct),
1281 ("callout wrongly scheduled for migration"));
1282 if (callout_migrating(c)) {
1283 c->c_iflags &= ~CALLOUT_DFRMIGRATION;
1284 #ifdef SMP
1285 cc_migration_cpu(cc, direct) = CPUBLOCK;
1286 cc_migration_time(cc, direct) = 0;
1287 cc_migration_prec(cc, direct) = 0;
1288 cc_migration_func(cc, direct) = NULL;
1289 cc_migration_arg(cc, direct) = NULL;
1290 #endif
1291 }
1292 CC_UNLOCK(cc);
1293 KASSERT(!sq_locked, ("sleepqueue chain locked"));
1294 return (1);
1295 } else if (callout_migrating(c)) {
1296 /*
1297 * The callout is currently being serviced
1298 * and the "next" callout is scheduled at
1299 * its completion with a migration. We remove
1300 * the migration flag so it *won't* get rescheduled,
1301 * but we can't stop the one thats running so
1302 * we return 0.
1303 */
1304 c->c_iflags &= ~CALLOUT_DFRMIGRATION;
1305 #ifdef SMP
1306 /*
1307 * We can't call cc_cce_cleanup here since
1308 * if we do it will remove .ce_curr and
1309 * its still running. This will prevent a
1310 * reschedule of the callout when the
1311 * execution completes.
1312 */
1313 cc_migration_cpu(cc, direct) = CPUBLOCK;
1314 cc_migration_time(cc, direct) = 0;
1315 cc_migration_prec(cc, direct) = 0;
1316 cc_migration_func(cc, direct) = NULL;
1317 cc_migration_arg(cc, direct) = NULL;
1318 #endif
1319 CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
1320 c, c->c_func, c->c_arg);
1321 CC_UNLOCK(cc);
1322 return (0);
1323 }
1324 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
1325 c, c->c_func, c->c_arg);
1326 CC_UNLOCK(cc);
1327 KASSERT(!sq_locked, ("sleepqueue chain still locked"));
1328 return (0);
1329 }
1330 if (sq_locked)
1331 sleepq_release(&cc_exec_waiting(cc, direct));
1332
1333 c->c_iflags &= ~CALLOUT_PENDING;
1334 c->c_flags &= ~CALLOUT_ACTIVE;
1335
1336 CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
1337 c, c->c_func, c->c_arg);
1338 if (not_on_a_list == 0) {
1339 if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
1340 if (cc_exec_next(cc) == c)
1341 cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
1342 LIST_REMOVE(c, c_links.le);
1343 } else {
1344 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
1345 }
1346 }
1347 callout_cc_del(c, cc);
1348 CC_UNLOCK(cc);
1349 return (1);
1350 }
1351
1352 void
1353 callout_init(c, mpsafe)
1354 struct callout *c;
1355 int mpsafe;
1356 {
1357 bzero(c, sizeof *c);
1358 if (mpsafe) {
1359 c->c_lock = NULL;
1360 c->c_iflags = CALLOUT_RETURNUNLOCKED;
1361 } else {
1362 c->c_lock = &Giant.lock_object;
1363 c->c_iflags = 0;
1364 }
1365 c->c_cpu = timeout_cpu;
1366 }
1367
1368 void
1369 _callout_init_lock(c, lock, flags)
1370 struct callout *c;
1371 struct lock_object *lock;
1372 int flags;
1373 {
1374 bzero(c, sizeof *c);
1375 c->c_lock = lock;
1376 KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0,
1377 ("callout_init_lock: bad flags %d", flags));
1378 KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0,
1379 ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock"));
1380 KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags &
1381 (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class",
1382 __func__));
1383 c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
1384 c->c_cpu = timeout_cpu;
1385 }
1386
1387 #ifdef APM_FIXUP_CALLTODO
1388 /*
1389 * Adjust the kernel calltodo timeout list. This routine is used after
1390 * an APM resume to recalculate the calltodo timer list values with the
1391 * number of hz's we have been sleeping. The next hardclock() will detect
1392 * that there are fired timers and run softclock() to execute them.
1393 *
1394 * Please note, I have not done an exhaustive analysis of what code this
1395 * might break. I am motivated to have my select()'s and alarm()'s that
1396 * have expired during suspend firing upon resume so that the applications
1397 * which set the timer can do the maintanence the timer was for as close
1398 * as possible to the originally intended time. Testing this code for a
1399 * week showed that resuming from a suspend resulted in 22 to 25 timers
1400 * firing, which seemed independant on whether the suspend was 2 hours or
1401 * 2 days. Your milage may vary. - Ken Key <key@cs.utk.edu>
1402 */
1403 void
1404 adjust_timeout_calltodo(time_change)
1405 struct timeval *time_change;
1406 {
1407 register struct callout *p;
1408 unsigned long delta_ticks;
1409
1410 /*
1411 * How many ticks were we asleep?
1412 * (stolen from tvtohz()).
1413 */
1414
1415 /* Don't do anything */
1416 if (time_change->tv_sec < 0)
1417 return;
1418 else if (time_change->tv_sec <= LONG_MAX / 1000000)
1419 delta_ticks = (time_change->tv_sec * 1000000 +
1420 time_change->tv_usec + (tick - 1)) / tick + 1;
1421 else if (time_change->tv_sec <= LONG_MAX / hz)
1422 delta_ticks = time_change->tv_sec * hz +
1423 (time_change->tv_usec + (tick - 1)) / tick + 1;
1424 else
1425 delta_ticks = LONG_MAX;
1426
1427 if (delta_ticks > INT_MAX)
1428 delta_ticks = INT_MAX;
1429
1430 /*
1431 * Now rip through the timer calltodo list looking for timers
1432 * to expire.
1433 */
1434
1435 /* don't collide with softclock() */
1436 CC_LOCK(cc);
1437 for (p = calltodo.c_next; p != NULL; p = p->c_next) {
1438 p->c_time -= delta_ticks;
1439
1440 /* Break if the timer had more time on it than delta_ticks */
1441 if (p->c_time > 0)
1442 break;
1443
1444 /* take back the ticks the timer didn't use (p->c_time <= 0) */
1445 delta_ticks = -p->c_time;
1446 }
1447 CC_UNLOCK(cc);
1448
1449 return;
1450 }
1451 #endif /* APM_FIXUP_CALLTODO */
1452
1453 static int
1454 flssbt(sbintime_t sbt)
1455 {
1456
1457 sbt += (uint64_t)sbt >> 1;
1458 if (sizeof(long) >= sizeof(sbintime_t))
1459 return (flsl(sbt));
1460 if (sbt >= SBT_1S)
1461 return (flsl(((uint64_t)sbt) >> 32) + 32);
1462 return (flsl(sbt));
1463 }
1464
1465 /*
1466 * Dump immediate statistic snapshot of the scheduled callouts.
1467 */
1468 static int
1469 sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS)
1470 {
1471 struct callout *tmp;
1472 struct callout_cpu *cc;
1473 struct callout_list *sc;
1474 sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t;
1475 int ct[64], cpr[64], ccpbk[32];
1476 int error, val, i, count, tcum, pcum, maxc, c, medc;
1477 #ifdef SMP
1478 int cpu;
1479 #endif
1480
1481 val = 0;
1482 error = sysctl_handle_int(oidp, &val, 0, req);
1483 if (error != 0 || req->newptr == NULL)
1484 return (error);
1485 count = maxc = 0;
1486 st = spr = maxt = maxpr = 0;
1487 bzero(ccpbk, sizeof(ccpbk));
1488 bzero(ct, sizeof(ct));
1489 bzero(cpr, sizeof(cpr));
1490 now = sbinuptime();
1491 #ifdef SMP
1492 CPU_FOREACH(cpu) {
1493 cc = CC_CPU(cpu);
1494 #else
1495 cc = CC_CPU(timeout_cpu);
1496 #endif
1497 CC_LOCK(cc);
1498 for (i = 0; i < callwheelsize; i++) {
1499 sc = &cc->cc_callwheel[i];
1500 c = 0;
1501 LIST_FOREACH(tmp, sc, c_links.le) {
1502 c++;
1503 t = tmp->c_time - now;
1504 if (t < 0)
1505 t = 0;
1506 st += t / SBT_1US;
1507 spr += tmp->c_precision / SBT_1US;
1508 if (t > maxt)
1509 maxt = t;
1510 if (tmp->c_precision > maxpr)
1511 maxpr = tmp->c_precision;
1512 ct[flssbt(t)]++;
1513 cpr[flssbt(tmp->c_precision)]++;
1514 }
1515 if (c > maxc)
1516 maxc = c;
1517 ccpbk[fls(c + c / 2)]++;
1518 count += c;
1519 }
1520 CC_UNLOCK(cc);
1521 #ifdef SMP
1522 }
1523 #endif
1524
1525 for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++)
1526 tcum += ct[i];
1527 medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
1528 for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++)
1529 pcum += cpr[i];
1530 medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
1531 for (i = 0, c = 0; i < 32 && c < count / 2; i++)
1532 c += ccpbk[i];
1533 medc = (i >= 2) ? (1 << (i - 2)) : 0;
1534
1535 printf("Scheduled callouts statistic snapshot:\n");
1536 printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n",
1537 count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT);
1538 printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n",
1539 medc,
1540 count / callwheelsize / mp_ncpus,
1541 (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000,
1542 maxc);
1543 printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
1544 medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32,
1545 (st / count) / 1000000, (st / count) % 1000000,
1546 maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32);
1547 printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
1548 medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32,
1549 (spr / count) / 1000000, (spr / count) % 1000000,
1550 maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32);
1551 printf(" Distribution: \tbuckets\t time\t tcum\t"
1552 " prec\t pcum\n");
1553 for (i = 0, tcum = pcum = 0; i < 64; i++) {
1554 if (ct[i] == 0 && cpr[i] == 0)
1555 continue;
1556 t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0;
1557 tcum += ct[i];
1558 pcum += cpr[i];
1559 printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n",
1560 t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32,
1561 i - 1 - (32 - CC_HASH_SHIFT),
1562 ct[i], tcum, cpr[i], pcum);
1563 }
1564 return (error);
1565 }
1566 SYSCTL_PROC(_kern, OID_AUTO, callout_stat,
1567 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
1568 0, 0, sysctl_kern_callout_stat, "I",
1569 "Dump immediate statistic snapshot of the scheduled callouts");
Cache object: c7d2ac80a8a4fd94586f6ade7587bab9
|