FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_synch.c
1 /*-
2 * Copyright (c) 1982, 1986, 1990, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95
39 * $FreeBSD$
40 */
41
42 #include "opt_ktrace.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/proc.h>
47 #include <sys/kernel.h>
48 #include <sys/signalvar.h>
49 #include <sys/resourcevar.h>
50 #include <sys/vmmeter.h>
51 #include <sys/sysctl.h>
52 #include <vm/vm.h>
53 #include <vm/vm_extern.h>
54 #ifdef KTRACE
55 #include <sys/uio.h>
56 #include <sys/ktrace.h>
57 #endif
58
59 #include <machine/cpu.h>
60 #ifdef SMP
61 #include <machine/smp.h>
62 #endif
63 #include <machine/limits.h> /* for UCHAR_MAX = typeof(p_priority)_MAX */
64
65 static void rqinit __P((void *));
66 SYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, rqinit, NULL)
67 static void sched_setup __P((void *dummy));
68 SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL)
69
70 u_char curpriority;
71 int hogticks;
72 int lbolt;
73 int sched_quantum; /* Roundrobin scheduling quantum in ticks. */
74
75 static void endtsleep __P((void *));
76 static void roundrobin __P((void *arg));
77 static void schedcpu __P((void *arg));
78 static void updatepri __P((struct proc *p));
79
80 static int
81 sysctl_kern_quantum SYSCTL_HANDLER_ARGS
82 {
83 int error, new_val;
84
85 new_val = sched_quantum * tick;
86 error = sysctl_handle_int(oidp, &new_val, 0, req);
87 if (error != 0 || req->newptr == NULL)
88 return (error);
89 if (new_val < tick)
90 return (EINVAL);
91 sched_quantum = new_val / tick;
92 hogticks = 2 * sched_quantum;
93 return (0);
94 }
95
96 SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW,
97 0, sizeof sched_quantum, sysctl_kern_quantum, "I", "");
98
99 /* maybe_resched: Decide if you need to reschedule or not
100 * taking the priorities and schedulers into account.
101 */
102 static void maybe_resched(struct proc *chk)
103 {
104 struct proc *p = curproc; /* XXX */
105
106 /*
107 * Compare priorities if the new process is on the same scheduler,
108 * otherwise the one on the more realtimeish scheduler wins.
109 *
110 * XXX idle scheduler still broken because proccess stays on idle
111 * scheduler during waits (such as when getting FS locks). If a
112 * standard process becomes runaway cpu-bound, the system can lockup
113 * due to idle-scheduler processes in wakeup never getting any cpu.
114 */
115 if (p == 0 ||
116 (chk->p_priority < curpriority && RTP_PRIO_BASE(p->p_rtprio.type) == RTP_PRIO_BASE(chk->p_rtprio.type)) ||
117 RTP_PRIO_BASE(chk->p_rtprio.type) < RTP_PRIO_BASE(p->p_rtprio.type)
118 ) {
119 need_resched();
120 }
121 }
122
123 int
124 roundrobin_interval(void)
125 {
126 return (sched_quantum);
127 }
128
129 /*
130 * Force switch among equal priority processes every 100ms.
131 */
132 /* ARGSUSED */
133 static void
134 roundrobin(arg)
135 void *arg;
136 {
137 #ifndef SMP
138 struct proc *p = curproc; /* XXX */
139 #endif
140
141 #ifdef SMP
142 need_resched();
143 forward_roundrobin();
144 #else
145 if (p == 0 || RTP_PRIO_NEED_RR(p->p_rtprio.type))
146 need_resched();
147 #endif
148
149 timeout(roundrobin, NULL, sched_quantum);
150 }
151
152 /*
153 * Constants for digital decay and forget:
154 * 90% of (p_estcpu) usage in 5 * loadav time
155 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
156 * Note that, as ps(1) mentions, this can let percentages
157 * total over 100% (I've seen 137.9% for 3 processes).
158 *
159 * Note that statclock() updates p_estcpu and p_cpticks asynchronously.
160 *
161 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
162 * That is, the system wants to compute a value of decay such
163 * that the following for loop:
164 * for (i = 0; i < (5 * loadavg); i++)
165 * p_estcpu *= decay;
166 * will compute
167 * p_estcpu *= 0.1;
168 * for all values of loadavg:
169 *
170 * Mathematically this loop can be expressed by saying:
171 * decay ** (5 * loadavg) ~= .1
172 *
173 * The system computes decay as:
174 * decay = (2 * loadavg) / (2 * loadavg + 1)
175 *
176 * We wish to prove that the system's computation of decay
177 * will always fulfill the equation:
178 * decay ** (5 * loadavg) ~= .1
179 *
180 * If we compute b as:
181 * b = 2 * loadavg
182 * then
183 * decay = b / (b + 1)
184 *
185 * We now need to prove two things:
186 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
187 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
188 *
189 * Facts:
190 * For x close to zero, exp(x) =~ 1 + x, since
191 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
192 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
193 * For x close to zero, ln(1+x) =~ x, since
194 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
195 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
196 * ln(.1) =~ -2.30
197 *
198 * Proof of (1):
199 * Solve (factor)**(power) =~ .1 given power (5*loadav):
200 * solving for factor,
201 * ln(factor) =~ (-2.30/5*loadav), or
202 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
203 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
204 *
205 * Proof of (2):
206 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
207 * solving for power,
208 * power*ln(b/(b+1)) =~ -2.30, or
209 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
210 *
211 * Actual power values for the implemented algorithm are as follows:
212 * loadav: 1 2 3 4
213 * power: 5.68 10.32 14.94 19.55
214 */
215
216 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
217 #define loadfactor(loadav) (2 * (loadav))
218 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
219
220 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
221 static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
222 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
223
224 /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */
225 static int fscale __unused = FSCALE;
226 SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, "");
227
228 /*
229 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
230 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
231 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
232 *
233 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
234 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
235 *
236 * If you don't want to bother with the faster/more-accurate formula, you
237 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
238 * (more general) method of calculating the %age of CPU used by a process.
239 */
240 #define CCPU_SHIFT 11
241
242 /*
243 * Recompute process priorities, every hz ticks.
244 */
245 /* ARGSUSED */
246 static void
247 schedcpu(arg)
248 void *arg;
249 {
250 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
251 register struct proc *p;
252 register int realstathz, s;
253 register unsigned int newcpu;
254
255 realstathz = stathz ? stathz : hz;
256 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
257 /*
258 * Increment time in/out of memory and sleep time
259 * (if sleeping). We ignore overflow; with 16-bit int's
260 * (remember them?) overflow takes 45 days.
261 */
262 p->p_swtime++;
263 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
264 p->p_slptime++;
265 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
266 /*
267 * If the process has slept the entire second,
268 * stop recalculating its priority until it wakes up.
269 */
270 if (p->p_slptime > 1)
271 continue;
272 s = splhigh(); /* prevent state changes and protect run queue */
273 /*
274 * p_pctcpu is only for ps.
275 */
276 #if (FSHIFT >= CCPU_SHIFT)
277 p->p_pctcpu += (realstathz == 100)?
278 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
279 100 * (((fixpt_t) p->p_cpticks)
280 << (FSHIFT - CCPU_SHIFT)) / realstathz;
281 #else
282 p->p_pctcpu += ((FSCALE - ccpu) *
283 (p->p_cpticks * FSCALE / realstathz)) >> FSHIFT;
284 #endif
285 p->p_cpticks = 0;
286 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice;
287 p->p_estcpu = min(newcpu, UCHAR_MAX);
288 resetpriority(p);
289 if (p->p_priority >= PUSER) {
290 #define PPQ (128 / NQS) /* priorities per queue */
291 if ((p != curproc) &&
292 #ifdef SMP
293 (u_char)p->p_oncpu == 0xff && /* idle */
294 #endif
295 p->p_stat == SRUN &&
296 (p->p_flag & P_INMEM) &&
297 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
298 remrq(p);
299 p->p_priority = p->p_usrpri;
300 setrunqueue(p);
301 } else
302 p->p_priority = p->p_usrpri;
303 }
304 splx(s);
305 }
306 vmmeter();
307 wakeup((caddr_t)&lbolt);
308 timeout(schedcpu, (void *)0, hz);
309 }
310
311 /*
312 * Recalculate the priority of a process after it has slept for a while.
313 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
314 * least six times the loadfactor will decay p_estcpu to zero.
315 */
316 static void
317 updatepri(p)
318 register struct proc *p;
319 {
320 register unsigned int newcpu = p->p_estcpu;
321 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
322
323 if (p->p_slptime > 5 * loadfac)
324 p->p_estcpu = 0;
325 else {
326 p->p_slptime--; /* the first time was done in schedcpu */
327 while (newcpu && --p->p_slptime)
328 newcpu = (int) decay_cpu(loadfac, newcpu);
329 p->p_estcpu = min(newcpu, UCHAR_MAX);
330 }
331 resetpriority(p);
332 }
333
334 /*
335 * We're only looking at 7 bits of the address; everything is
336 * aligned to 4, lots of things are aligned to greater powers
337 * of 2. Shift right by 8, i.e. drop the bottom 256 worth.
338 */
339 #define TABLESIZE 128
340 static TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE];
341 #define LOOKUP(x) (((intptr_t)(x) >> 8) & (TABLESIZE - 1))
342
343 /*
344 * During autoconfiguration or after a panic, a sleep will simply
345 * lower the priority briefly to allow interrupts, then return.
346 * The priority to be used (safepri) is machine-dependent, thus this
347 * value is initialized and maintained in the machine-dependent layers.
348 * This priority will typically be 0, or the lowest priority
349 * that is safe for use on the interrupt stack; it can be made
350 * higher to block network software interrupts after panics.
351 */
352 int safepri;
353
354 void
355 sleepinit(void)
356 {
357 int i;
358
359 sched_quantum = hz/10;
360 hogticks = 2 * sched_quantum;
361 for (i = 0; i < TABLESIZE; i++)
362 TAILQ_INIT(&slpque[i]);
363 }
364
365 /*
366 * General sleep call. Suspends the current process until a wakeup is
367 * performed on the specified identifier. The process will then be made
368 * runnable with the specified priority. Sleeps at most timo/hz seconds
369 * (0 means no timeout). If pri includes PCATCH flag, signals are checked
370 * before and after sleeping, else signals are not checked. Returns 0 if
371 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
372 * signal needs to be delivered, ERESTART is returned if the current system
373 * call should be restarted if possible, and EINTR is returned if the system
374 * call should be interrupted by the signal (return EINTR).
375 */
376 int
377 tsleep(ident, priority, wmesg, timo)
378 void *ident;
379 int priority, timo;
380 const char *wmesg;
381 {
382 struct proc *p = curproc;
383 int s, sig, catch = priority & PCATCH;
384 struct callout_handle thandle;
385
386 #ifdef KTRACE
387 if (p && KTRPOINT(p, KTR_CSW))
388 ktrcsw(p->p_tracep, 1, 0);
389 #endif
390 s = splhigh();
391 if (cold || panicstr) {
392 /*
393 * After a panic, or during autoconfiguration,
394 * just give interrupts a chance, then just return;
395 * don't run any other procs or panic below,
396 * in case this is the idle process and already asleep.
397 */
398 splx(safepri);
399 splx(s);
400 return (0);
401 }
402 KASSERT(p != NULL, ("tsleep1"));
403 KASSERT(ident != NULL && p->p_stat == SRUN, ("tsleep"));
404 /*
405 * Process may be sitting on a slpque if asleep() was called, remove
406 * it before re-adding.
407 */
408 if (p->p_wchan != NULL)
409 unsleep(p);
410
411 p->p_wchan = ident;
412 p->p_wmesg = wmesg;
413 p->p_slptime = 0;
414 p->p_priority = priority & PRIMASK;
415 TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq);
416 if (timo)
417 thandle = timeout(endtsleep, (void *)p, timo);
418 /*
419 * We put ourselves on the sleep queue and start our timeout
420 * before calling CURSIG, as we could stop there, and a wakeup
421 * or a SIGCONT (or both) could occur while we were stopped.
422 * A SIGCONT would cause us to be marked as SSLEEP
423 * without resuming us, thus we must be ready for sleep
424 * when CURSIG is called. If the wakeup happens while we're
425 * stopped, p->p_wchan will be 0 upon return from CURSIG.
426 */
427 if (catch) {
428 p->p_flag |= P_SINTR;
429 if ((sig = CURSIG(p))) {
430 if (p->p_wchan)
431 unsleep(p);
432 p->p_stat = SRUN;
433 goto resume;
434 }
435 if (p->p_wchan == 0) {
436 catch = 0;
437 goto resume;
438 }
439 } else
440 sig = 0;
441 p->p_stat = SSLEEP;
442 p->p_stats->p_ru.ru_nvcsw++;
443 mi_switch();
444 resume:
445 curpriority = p->p_usrpri;
446 splx(s);
447 p->p_flag &= ~P_SINTR;
448 if (p->p_flag & P_TIMEOUT) {
449 p->p_flag &= ~P_TIMEOUT;
450 if (sig == 0) {
451 #ifdef KTRACE
452 if (KTRPOINT(p, KTR_CSW))
453 ktrcsw(p->p_tracep, 0, 0);
454 #endif
455 return (EWOULDBLOCK);
456 }
457 } else if (timo)
458 untimeout(endtsleep, (void *)p, thandle);
459 if (catch && (sig != 0 || (sig = CURSIG(p)))) {
460 #ifdef KTRACE
461 if (KTRPOINT(p, KTR_CSW))
462 ktrcsw(p->p_tracep, 0, 0);
463 #endif
464 if (p->p_sigacts->ps_sigintr & sigmask(sig))
465 return (EINTR);
466 return (ERESTART);
467 }
468 #ifdef KTRACE
469 if (KTRPOINT(p, KTR_CSW))
470 ktrcsw(p->p_tracep, 0, 0);
471 #endif
472 return (0);
473 }
474
475 /*
476 * asleep() - async sleep call. Place process on wait queue and return
477 * immediately without blocking. The process stays runnable until await()
478 * is called. If ident is NULL, remove process from wait queue if it is still
479 * on one.
480 *
481 * Only the most recent sleep condition is effective when making successive
482 * calls to asleep() or when calling tsleep().
483 *
484 * The timeout, if any, is not initiated until await() is called. The sleep
485 * priority, signal, and timeout is specified in the asleep() call but may be
486 * overriden in the await() call.
487 *
488 * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>>
489 */
490
491 int
492 asleep(void *ident, int priority, const char *wmesg, int timo)
493 {
494 struct proc *p = curproc;
495 int s;
496
497 /*
498 * splhigh() while manipulating sleep structures and slpque.
499 *
500 * Remove preexisting wait condition (if any) and place process
501 * on appropriate slpque, but do not put process to sleep.
502 */
503
504 s = splhigh();
505
506 if (p->p_wchan != NULL)
507 unsleep(p);
508
509 if (ident) {
510 p->p_wchan = ident;
511 p->p_wmesg = wmesg;
512 p->p_slptime = 0;
513 p->p_asleep.as_priority = priority;
514 p->p_asleep.as_timo = timo;
515 TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq);
516 }
517
518 splx(s);
519
520 return(0);
521 }
522
523 /*
524 * await() - wait for async condition to occur. The process blocks until
525 * wakeup() is called on the most recent asleep() address. If wakeup is called
526 * priority to await(), await() winds up being a NOP.
527 *
528 * If await() is called more then once (without an intervening asleep() call),
529 * await() is still effectively a NOP but it calls mi_switch() to give other
530 * processes some cpu before returning. The process is left runnable.
531 *
532 * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>>
533 */
534
535 int
536 await(int priority, int timo)
537 {
538 struct proc *p = curproc;
539 int s;
540
541 s = splhigh();
542
543 if (p->p_wchan != NULL) {
544 struct callout_handle thandle;
545 int sig;
546 int catch;
547
548 /*
549 * The call to await() can override defaults specified in
550 * the original asleep().
551 */
552 if (priority < 0)
553 priority = p->p_asleep.as_priority;
554 if (timo < 0)
555 timo = p->p_asleep.as_timo;
556
557 /*
558 * Install timeout
559 */
560
561 if (timo)
562 thandle = timeout(endtsleep, (void *)p, timo);
563
564 sig = 0;
565 catch = priority & PCATCH;
566
567 if (catch) {
568 p->p_flag |= P_SINTR;
569 if ((sig = CURSIG(p))) {
570 if (p->p_wchan)
571 unsleep(p);
572 p->p_stat = SRUN;
573 goto resume;
574 }
575 if (p->p_wchan == NULL) {
576 catch = 0;
577 goto resume;
578 }
579 }
580 p->p_stat = SSLEEP;
581 p->p_stats->p_ru.ru_nvcsw++;
582 mi_switch();
583 resume:
584 curpriority = p->p_usrpri;
585
586 splx(s);
587 p->p_flag &= ~P_SINTR;
588 if (p->p_flag & P_TIMEOUT) {
589 p->p_flag &= ~P_TIMEOUT;
590 if (sig == 0) {
591 #ifdef KTRACE
592 if (KTRPOINT(p, KTR_CSW))
593 ktrcsw(p->p_tracep, 0, 0);
594 #endif
595 return (EWOULDBLOCK);
596 }
597 } else if (timo)
598 untimeout(endtsleep, (void *)p, thandle);
599 if (catch && (sig != 0 || (sig = CURSIG(p)))) {
600 #ifdef KTRACE
601 if (KTRPOINT(p, KTR_CSW))
602 ktrcsw(p->p_tracep, 0, 0);
603 #endif
604 if (p->p_sigacts->ps_sigintr & sigmask(sig))
605 return (EINTR);
606 return (ERESTART);
607 }
608 #ifdef KTRACE
609 if (KTRPOINT(p, KTR_CSW))
610 ktrcsw(p->p_tracep, 0, 0);
611 #endif
612 } else {
613 /*
614 * If as_priority is 0, await() has been called without an
615 * intervening asleep(). We are still effectively a NOP,
616 * but we call mi_switch() for safety.
617 */
618
619 if (p->p_asleep.as_priority == 0) {
620 p->p_stats->p_ru.ru_nvcsw++;
621 mi_switch();
622 }
623 splx(s);
624 }
625
626 /*
627 * clear p_asleep.as_priority as an indication that await() has been
628 * called. If await() is called again without an intervening asleep(),
629 * await() is still effectively a NOP but the above mi_switch() code
630 * is triggered as a safety.
631 */
632 p->p_asleep.as_priority = 0;
633
634 return (0);
635 }
636
637 /*
638 * Implement timeout for tsleep or asleep()/await()
639 *
640 * If process hasn't been awakened (wchan non-zero),
641 * set timeout flag and undo the sleep. If proc
642 * is stopped, just unsleep so it will remain stopped.
643 */
644 static void
645 endtsleep(arg)
646 void *arg;
647 {
648 register struct proc *p;
649 int s;
650
651 p = (struct proc *)arg;
652 s = splhigh();
653 if (p->p_wchan) {
654 if (p->p_stat == SSLEEP)
655 setrunnable(p);
656 else
657 unsleep(p);
658 p->p_flag |= P_TIMEOUT;
659 }
660 splx(s);
661 }
662
663 /*
664 * Remove a process from its wait queue
665 */
666 void
667 unsleep(p)
668 register struct proc *p;
669 {
670 int s;
671
672 s = splhigh();
673 if (p->p_wchan) {
674 TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_procq);
675 p->p_wchan = 0;
676 }
677 splx(s);
678 }
679
680 /*
681 * Make all processes sleeping on the specified identifier runnable.
682 */
683 void
684 wakeup(ident)
685 register void *ident;
686 {
687 register struct slpquehead *qp;
688 register struct proc *p;
689 int s;
690
691 s = splhigh();
692 qp = &slpque[LOOKUP(ident)];
693 restart:
694 for (p = qp->tqh_first; p != NULL; p = p->p_procq.tqe_next) {
695 if (p->p_wchan == ident) {
696 TAILQ_REMOVE(qp, p, p_procq);
697 p->p_wchan = 0;
698 if (p->p_stat == SSLEEP) {
699 /* OPTIMIZED EXPANSION OF setrunnable(p); */
700 if (p->p_slptime > 1)
701 updatepri(p);
702 p->p_slptime = 0;
703 p->p_stat = SRUN;
704 if (p->p_flag & P_INMEM) {
705 setrunqueue(p);
706 maybe_resched(p);
707 } else {
708 p->p_flag |= P_SWAPINREQ;
709 wakeup((caddr_t)&proc0);
710 }
711 /* END INLINE EXPANSION */
712 goto restart;
713 }
714 }
715 }
716 splx(s);
717 }
718
719 /*
720 * Make a process sleeping on the specified identifier runnable.
721 * May wake more than one process if a target prcoess is currently
722 * swapped out.
723 */
724 void
725 wakeup_one(ident)
726 register void *ident;
727 {
728 register struct slpquehead *qp;
729 register struct proc *p;
730 int s;
731
732 s = splhigh();
733 qp = &slpque[LOOKUP(ident)];
734
735 for (p = qp->tqh_first; p != NULL; p = p->p_procq.tqe_next) {
736 if (p->p_wchan == ident) {
737 TAILQ_REMOVE(qp, p, p_procq);
738 p->p_wchan = 0;
739 if (p->p_stat == SSLEEP) {
740 /* OPTIMIZED EXPANSION OF setrunnable(p); */
741 if (p->p_slptime > 1)
742 updatepri(p);
743 p->p_slptime = 0;
744 p->p_stat = SRUN;
745 if (p->p_flag & P_INMEM) {
746 setrunqueue(p);
747 maybe_resched(p);
748 break;
749 } else {
750 p->p_flag |= P_SWAPINREQ;
751 wakeup((caddr_t)&proc0);
752 }
753 /* END INLINE EXPANSION */
754 }
755 }
756 }
757 splx(s);
758 }
759
760 /*
761 * The machine independent parts of mi_switch().
762 * Must be called at splstatclock() or higher.
763 */
764 void
765 mi_switch()
766 {
767 register struct proc *p = curproc; /* XXX */
768 register struct rlimit *rlim;
769 int x;
770
771 /*
772 * XXX this spl is almost unnecessary. It is partly to allow for
773 * sloppy callers that don't do it (issignal() via CURSIG() is the
774 * main offender). It is partly to work around a bug in the i386
775 * cpu_switch() (the ipl is not preserved). We ran for years
776 * without it. I think there was only a interrupt latency problem.
777 * The main caller, tsleep(), does an splx() a couple of instructions
778 * after calling here. The buggy caller, issignal(), usually calls
779 * here at spl0() and sometimes returns at splhigh(). The process
780 * then runs for a little too long at splhigh(). The ipl gets fixed
781 * when the process returns to user mode (or earlier).
782 *
783 * It would probably be better to always call here at spl0(). Callers
784 * are prepared to give up control to another process, so they must
785 * be prepared to be interrupted. The clock stuff here may not
786 * actually need splstatclock().
787 */
788 x = splstatclock();
789
790 #ifdef SIMPLELOCK_DEBUG
791 if (p->p_simple_locks)
792 printf("sleep: holding simple lock\n");
793 #endif
794 /*
795 * Compute the amount of time during which the current
796 * process was running, and add that to its total so far.
797 */
798 microuptime(&switchtime);
799 p->p_runtime += (switchtime.tv_usec - p->p_switchtime.tv_usec) +
800 (switchtime.tv_sec - p->p_switchtime.tv_sec) * (int64_t)1000000;
801
802 /*
803 * Check if the process exceeds its cpu resource allocation.
804 * If over max, kill it.
805 */
806 if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY &&
807 p->p_runtime > p->p_limit->p_cpulimit) {
808 rlim = &p->p_rlimit[RLIMIT_CPU];
809 if (p->p_runtime / (rlim_t)1000000 >= rlim->rlim_max) {
810 killproc(p, "exceeded maximum CPU limit");
811 } else {
812 psignal(p, SIGXCPU);
813 if (rlim->rlim_cur < rlim->rlim_max) {
814 /* XXX: we should make a private copy */
815 rlim->rlim_cur += 5;
816 }
817 }
818 }
819
820 /*
821 * Pick a new current process and record its start time.
822 */
823 cnt.v_swtch++;
824 cpu_switch(p);
825 if (switchtime.tv_sec)
826 p->p_switchtime = switchtime;
827 else
828 microuptime(&p->p_switchtime);
829 switchticks = ticks;
830
831 splx(x);
832 }
833
834 /*
835 * Initialize the (doubly-linked) run queues
836 * to be empty.
837 */
838 /* ARGSUSED*/
839 static void
840 rqinit(dummy)
841 void *dummy;
842 {
843 register int i;
844
845 for (i = 0; i < NQS; i++) {
846 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
847 rtqs[i].ph_link = rtqs[i].ph_rlink = (struct proc *)&rtqs[i];
848 idqs[i].ph_link = idqs[i].ph_rlink = (struct proc *)&idqs[i];
849 }
850 }
851
852 /*
853 * Change process state to be runnable,
854 * placing it on the run queue if it is in memory,
855 * and awakening the swapper if it isn't in memory.
856 */
857 void
858 setrunnable(p)
859 register struct proc *p;
860 {
861 register int s;
862
863 s = splhigh();
864 switch (p->p_stat) {
865 case 0:
866 case SRUN:
867 case SZOMB:
868 default:
869 panic("setrunnable");
870 case SSTOP:
871 case SSLEEP:
872 unsleep(p); /* e.g. when sending signals */
873 break;
874
875 case SIDL:
876 break;
877 }
878 p->p_stat = SRUN;
879 if (p->p_flag & P_INMEM)
880 setrunqueue(p);
881 splx(s);
882 if (p->p_slptime > 1)
883 updatepri(p);
884 p->p_slptime = 0;
885 if ((p->p_flag & P_INMEM) == 0) {
886 p->p_flag |= P_SWAPINREQ;
887 wakeup((caddr_t)&proc0);
888 }
889 else
890 maybe_resched(p);
891 }
892
893 /*
894 * Compute the priority of a process when running in user mode.
895 * Arrange to reschedule if the resulting priority is better
896 * than that of the current process.
897 */
898 void
899 resetpriority(p)
900 register struct proc *p;
901 {
902 register unsigned int newpriority;
903
904 if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
905 newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
906 newpriority = min(newpriority, MAXPRI);
907 p->p_usrpri = newpriority;
908 }
909 maybe_resched(p);
910 }
911
912 /* ARGSUSED */
913 static void
914 sched_setup(dummy)
915 void *dummy;
916 {
917 /* Kick off timeout driven events by calling first time. */
918 roundrobin(NULL);
919 schedcpu(NULL);
920 }
921
Cache object: 229c508552e938b50e62784cfc36172b
|