FreeBSD/Linux Kernel Cross Reference
sys/port/proc.c
1 #include <u.h>
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "../port/error.h"
7 #include "edf.h"
8 #include <trace.h>
9
10 int schedgain = 30; /* units in seconds */
11 int nrdy;
12 Ref noteidalloc;
13
14 void updatecpu(Proc*);
15 int reprioritize(Proc*);
16
17 ulong delayedscheds; /* statistics */
18 long skipscheds;
19 long preempts;
20 ulong load;
21
22 static Ref pidalloc;
23
24 static struct Procalloc
25 {
26 Lock;
27 Proc* ht[128];
28 Proc* arena;
29 Proc* free;
30 } procalloc;
31
32 enum
33 {
34 Q=10,
35 DQ=4,
36 Scaling=2,
37 };
38
39 Schedq runq[Nrq];
40 ulong runvec;
41
42 char *statename[] =
43 { /* BUG: generate automatically */
44 "Dead",
45 "Moribund",
46 "Ready",
47 "Scheding",
48 "Running",
49 "Queueing",
50 "QueueingR",
51 "QueueingW",
52 "Wakeme",
53 "Broken",
54 "Stopped",
55 "Rendez",
56 "Waitrelease",
57 };
58
59 static void pidhash(Proc*);
60 static void pidunhash(Proc*);
61 static void rebalance(void);
62
63 /*
64 * Always splhi()'ed.
65 */
66 void
67 schedinit(void) /* never returns */
68 {
69 Edf *e;
70
71 setlabel(&m->sched);
72 if(up) {
73 if((e = up->edf) && (e->flags & Admitted))
74 edfrecord(up);
75 m->proc = 0;
76 switch(up->state) {
77 case Running:
78 ready(up);
79 break;
80 case Moribund:
81 up->state = Dead;
82 edfstop(up);
83 if (up->edf)
84 free(up->edf);
85 up->edf = nil;
86
87 /*
88 * Holding locks from pexit:
89 * procalloc
90 * palloc
91 */
92 mmurelease(up);
93
94 up->qnext = procalloc.free;
95 procalloc.free = up;
96
97 unlock(&palloc);
98 unlock(&procalloc);
99 break;
100 }
101 up->mach = nil;
102 updatecpu(up);
103 up = nil;
104 }
105 sched();
106 }
107
108 /*
109 * If changing this routine, look also at sleep(). It
110 * contains a copy of the guts of sched().
111 */
112 void
113 sched(void)
114 {
115 Proc *p;
116
117 if(m->ilockdepth)
118 panic("ilockdepth %d, last lock %#p at %#lux, sched called from %#p",
119 m->ilockdepth, up?up->lastilock:nil,
120 (up && up->lastilock)?up->lastilock->pc:0,
121 getcallerpc(&p+2));
122
123 if(up){
124 /*
125 * Delay the sched until the process gives up the locks
126 * it is holding. This avoids dumb lock loops.
127 * Don't delay if the process is Moribund.
128 * It called sched to die.
129 * But do sched eventually. This avoids a missing unlock
130 * from hanging the entire kernel.
131 * But don't reschedule procs holding palloc or procalloc.
132 * Those are far too important to be holding while asleep.
133 *
134 * This test is not exact. There can still be a few instructions
135 * in the middle of taslock when a process holds a lock
136 * but Lock.p has not yet been initialized.
137 */
138 if(up->nlocks.ref)
139 if(up->state != Moribund)
140 if(up->delaysched < 20
141 || palloc.Lock.p == up
142 || procalloc.Lock.p == up){
143 up->delaysched++;
144 delayedscheds++;
145 return;
146 }
147 up->delaysched = 0;
148
149 splhi();
150
151 /* statistics */
152 m->cs++;
153
154 procsave(up);
155 if(setlabel(&up->sched)){
156 procrestore(up);
157 spllo();
158 return;
159 }
160 gotolabel(&m->sched);
161 }
162 p = runproc();
163 if(!p->edf){
164 updatecpu(p);
165 p->priority = reprioritize(p);
166 }
167 if(p != m->readied)
168 m->schedticks = m->ticks + HZ/10;
169 m->readied = 0;
170 up = p;
171 up->state = Running;
172 up->mach = MACHP(m->machno);
173 m->proc = up;
174 mmuswitch(up);
175 gotolabel(&up->sched);
176 }
177
178 int
179 anyready(void)
180 {
181 return runvec;
182 }
183
184 int
185 anyhigher(void)
186 {
187 return runvec & ~((1<<(up->priority+1))-1);
188 }
189
190 /*
191 * here once per clock tick to see if we should resched
192 */
193 void
194 hzsched(void)
195 {
196 /* once a second, rebalance will reprioritize ready procs */
197 if(m->machno == 0)
198 rebalance();
199
200 /* unless preempted, get to run for at least 100ms */
201 if(anyhigher()
202 || (!up->fixedpri && m->ticks > m->schedticks && anyready())){
203 m->readied = nil; /* avoid cooperative scheduling */
204 up->delaysched++;
205 }
206 }
207
208 /*
209 * here at the end of non-clock interrupts to see if we should preempt the
210 * current process. Returns 1 if preempted, 0 otherwise.
211 */
212 int
213 preempted(void)
214 {
215 if(up && up->state == Running)
216 if(up->preempted == 0)
217 if(anyhigher())
218 if(!active.exiting){
219 m->readied = nil; /* avoid cooperative scheduling */
220 up->preempted = 1;
221 sched();
222 splhi();
223 up->preempted = 0;
224 return 1;
225 }
226 return 0;
227 }
228
229 /*
230 * Update the cpu time average for this particular process,
231 * which is about to change from up -> not up or vice versa.
232 * p->lastupdate is the last time an updatecpu happened.
233 *
234 * The cpu time average is a decaying average that lasts
235 * about D clock ticks. D is chosen to be approximately
236 * the cpu time of a cpu-intensive "quick job". A job has to run
237 * for approximately D clock ticks before we home in on its
238 * actual cpu usage. Thus if you manage to get in and get out
239 * quickly, you won't be penalized during your burst. Once you
240 * start using your share of the cpu for more than about D
241 * clock ticks though, your p->cpu hits 1000 (1.0) and you end up
242 * below all the other quick jobs. Interactive tasks, because
243 * they basically always use less than their fair share of cpu,
244 * will be rewarded.
245 *
246 * If the process has not been running, then we want to
247 * apply the filter
248 *
249 * cpu = cpu * (D-1)/D
250 *
251 * n times, yielding
252 *
253 * cpu = cpu * ((D-1)/D)^n
254 *
255 * but D is big enough that this is approximately
256 *
257 * cpu = cpu * (D-n)/D
258 *
259 * so we use that instead.
260 *
261 * If the process has been running, we apply the filter to
262 * 1 - cpu, yielding a similar equation. Note that cpu is
263 * stored in fixed point (* 1000).
264 *
265 * Updatecpu must be called before changing up, in order
266 * to maintain accurate cpu usage statistics. It can be called
267 * at any time to bring the stats for a given proc up-to-date.
268 */
269 void
270 updatecpu(Proc *p)
271 {
272 int n, t, ocpu;
273 int D = schedgain*HZ*Scaling;
274
275 if(p->edf)
276 return;
277
278 t = MACHP(0)->ticks*Scaling + Scaling/2;
279 n = t - p->lastupdate;
280 p->lastupdate = t;
281
282 if(n == 0)
283 return;
284 if(n > D)
285 n = D;
286
287 ocpu = p->cpu;
288 if(p != up)
289 p->cpu = (ocpu*(D-n))/D;
290 else{
291 t = 1000 - ocpu;
292 t = (t*(D-n))/D;
293 p->cpu = 1000 - t;
294 }
295
296 //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu);
297 }
298
299 /*
300 * On average, p has used p->cpu of a cpu recently.
301 * Its fair share is conf.nmach/m->load of a cpu. If it has been getting
302 * too much, penalize it. If it has been getting not enough, reward it.
303 * I don't think you can get much more than your fair share that
304 * often, so most of the queues are for using less. Having a priority
305 * of 3 means you're just right. Having a higher priority (up to p->basepri)
306 * means you're not using as much as you could.
307 */
308 int
309 reprioritize(Proc *p)
310 {
311 int fairshare, n, load, ratio;
312
313 load = MACHP(0)->load;
314 if(load == 0)
315 return p->basepri;
316
317 /*
318 * fairshare = 1.000 * conf.nproc * 1.000/load,
319 * except the decimal point is moved three places
320 * on both load and fairshare.
321 */
322 fairshare = (conf.nmach*1000*1000)/load;
323 n = p->cpu;
324 if(n == 0)
325 n = 1;
326 ratio = (fairshare+n/2) / n;
327 if(ratio > p->basepri)
328 ratio = p->basepri;
329 if(ratio < 0)
330 panic("reprioritize");
331 //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio);
332 return ratio;
333 }
334
335 /*
336 * add a process to a scheduling queue
337 */
338 void
339 queueproc(Schedq *rq, Proc *p)
340 {
341 int pri;
342
343 pri = rq - runq;
344 lock(runq);
345 p->priority = pri;
346 p->rnext = 0;
347 if(rq->tail)
348 rq->tail->rnext = p;
349 else
350 rq->head = p;
351 rq->tail = p;
352 rq->n++;
353 nrdy++;
354 runvec |= 1<<pri;
355 unlock(runq);
356 }
357
358 /*
359 * try to remove a process from a scheduling queue (called splhi)
360 */
361 Proc*
362 dequeueproc(Schedq *rq, Proc *tp)
363 {
364 Proc *l, *p;
365
366 if(!canlock(runq))
367 return nil;
368
369 /*
370 * the queue may have changed before we locked runq,
371 * refind the target process.
372 */
373 l = 0;
374 for(p = rq->head; p; p = p->rnext){
375 if(p == tp)
376 break;
377 l = p;
378 }
379
380 /*
381 * p->mach==0 only when process state is saved
382 */
383 if(p == 0 || p->mach){
384 unlock(runq);
385 return nil;
386 }
387 if(p->rnext == 0)
388 rq->tail = l;
389 if(l)
390 l->rnext = p->rnext;
391 else
392 rq->head = p->rnext;
393 if(rq->head == nil)
394 runvec &= ~(1<<(rq-runq));
395 rq->n--;
396 nrdy--;
397 if(p->state != Ready)
398 print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]);
399
400 unlock(runq);
401 return p;
402 }
403
404 /*
405 * ready(p) picks a new priority for a process and sticks it in the
406 * runq for that priority.
407 */
408 void
409 ready(Proc *p)
410 {
411 int s, pri;
412 Schedq *rq;
413 void (*pt)(Proc*, int, vlong);
414
415 s = splhi();
416 if(edfready(p)){
417 splx(s);
418 return;
419 }
420
421 if(up != p)
422 m->readied = p; /* group scheduling */
423
424 updatecpu(p);
425 pri = reprioritize(p);
426 p->priority = pri;
427 rq = &runq[pri];
428 p->state = Ready;
429 queueproc(rq, p);
430 pt = proctrace;
431 if(pt)
432 pt(p, SReady, 0);
433 splx(s);
434 }
435
436 /*
437 * yield the processor and drop our priority
438 */
439 void
440 yield(void)
441 {
442 if(anyready()){
443 /* pretend we just used 1/2 tick */
444 up->lastupdate -= Scaling/2;
445 sched();
446 }
447 }
448
449 /*
450 * recalculate priorities once a second. We need to do this
451 * since priorities will otherwise only be recalculated when
452 * the running process blocks.
453 */
454 ulong balancetime;
455
456 static void
457 rebalance(void)
458 {
459 int pri, npri, t, x;
460 Schedq *rq;
461 Proc *p;
462
463 t = m->ticks;
464 if(t - balancetime < HZ)
465 return;
466 balancetime = t;
467
468 for(pri=0, rq=runq; pri<Npriq; pri++, rq++){
469 another:
470 p = rq->head;
471 if(p == nil)
472 continue;
473 if(p->mp != MACHP(m->machno))
474 continue;
475 if(pri == p->basepri)
476 continue;
477 updatecpu(p);
478 npri = reprioritize(p);
479 if(npri != pri){
480 x = splhi();
481 p = dequeueproc(rq, p);
482 if(p)
483 queueproc(&runq[npri], p);
484 splx(x);
485 goto another;
486 }
487 }
488 }
489
490
491 /*
492 * pick a process to run
493 */
494 Proc*
495 runproc(void)
496 {
497 Schedq *rq;
498 Proc *p;
499 ulong start, now;
500 int i;
501 void (*pt)(Proc*, int, vlong);
502
503 start = perfticks();
504
505 /* cooperative scheduling until the clock ticks */
506 if((p=m->readied) && p->mach==0 && p->state==Ready
507 && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){
508 skipscheds++;
509 rq = &runq[p->priority];
510 goto found;
511 }
512
513 preempts++;
514
515 loop:
516 /*
517 * find a process that last ran on this processor (affinity),
518 * or one that hasn't moved in a while (load balancing). Every
519 * time around the loop affinity goes down.
520 */
521 spllo();
522 for(i = 0;; i++){
523 /*
524 * find the highest priority target process that this
525 * processor can run given affinity constraints.
526 *
527 */
528 for(rq = &runq[Nrq-1]; rq >= runq; rq--){
529 for(p = rq->head; p; p = p->rnext){
530 if(p->mp == nil || p->mp == MACHP(m->machno)
531 || (!p->wired && i > 0))
532 goto found;
533 }
534 }
535
536 /* waste time or halt the CPU */
537 idlehands();
538
539 /* remember how much time we're here */
540 now = perfticks();
541 m->perf.inidle += now-start;
542 start = now;
543 }
544
545 found:
546 splhi();
547 p = dequeueproc(rq, p);
548 if(p == nil)
549 goto loop;
550
551 p->state = Scheding;
552 p->mp = MACHP(m->machno);
553
554 if(edflock(p)){
555 edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */
556 edfunlock();
557 }
558 pt = proctrace;
559 if(pt)
560 pt(p, SRun, 0);
561 return p;
562 }
563
564 int
565 canpage(Proc *p)
566 {
567 int ok = 0;
568
569 splhi();
570 lock(runq);
571 /* Only reliable way to see if we are Running */
572 if(p->mach == 0) {
573 p->newtlb = 1;
574 ok = 1;
575 }
576 unlock(runq);
577 spllo();
578
579 return ok;
580 }
581
582 Proc*
583 newproc(void)
584 {
585 char msg[64];
586 Proc *p;
587
588 lock(&procalloc);
589 for(;;) {
590 if(p = procalloc.free)
591 break;
592
593 snprint(msg, sizeof msg, "no procs; %s forking",
594 up? up->text: "kernel");
595 unlock(&procalloc);
596 resrcwait(msg);
597 lock(&procalloc);
598 }
599 procalloc.free = p->qnext;
600 unlock(&procalloc);
601
602 p->state = Scheding;
603 p->psstate = "New";
604 p->mach = 0;
605 p->qnext = 0;
606 p->nchild = 0;
607 p->nwait = 0;
608 p->waitq = 0;
609 p->parent = 0;
610 p->pgrp = 0;
611 p->egrp = 0;
612 p->fgrp = 0;
613 p->rgrp = 0;
614 p->pdbg = 0;
615 p->fpstate = FPinit;
616 p->kp = 0;
617 p->procctl = 0;
618 p->notepending = 0;
619 p->ureg = 0;
620 p->privatemem = 0;
621 p->noswap = 0;
622 p->errstr = p->errbuf0;
623 p->syserrstr = p->errbuf1;
624 p->errbuf0[0] = '\0';
625 p->errbuf1[0] = '\0';
626 p->nlocks.ref = 0;
627 p->delaysched = 0;
628 p->trace = 0;
629 kstrdup(&p->user, "*nouser");
630 kstrdup(&p->text, "*notext");
631 kstrdup(&p->args, "");
632 p->nargs = 0;
633 p->setargs = 0;
634 memset(p->seg, 0, sizeof p->seg);
635 p->pid = incref(&pidalloc);
636 pidhash(p);
637 p->noteid = incref(¬eidalloc);
638 if(p->pid==0 || p->noteid==0)
639 panic("pidalloc");
640 if(p->kstack == 0)
641 p->kstack = smalloc(KSTACK);
642
643 /* sched params */
644 p->mp = 0;
645 p->wired = 0;
646 procpriority(p, PriNormal, 0);
647 p->cpu = 0;
648 p->lastupdate = MACHP(0)->ticks*Scaling;
649 p->edf = nil;
650
651 return p;
652 }
653
654 /*
655 * wire this proc to a machine
656 */
657 void
658 procwired(Proc *p, int bm)
659 {
660 Proc *pp;
661 int i;
662 char nwired[MAXMACH];
663 Mach *wm;
664
665 if(bm < 0){
666 /* pick a machine to wire to */
667 memset(nwired, 0, sizeof(nwired));
668 p->wired = 0;
669 pp = proctab(0);
670 for(i=0; i<conf.nproc; i++, pp++){
671 wm = pp->wired;
672 if(wm && pp->pid)
673 nwired[wm->machno]++;
674 }
675 bm = 0;
676 for(i=0; i<conf.nmach; i++)
677 if(nwired[i] < nwired[bm])
678 bm = i;
679 } else {
680 /* use the virtual machine requested */
681 bm = bm % conf.nmach;
682 }
683
684 p->wired = MACHP(bm);
685 p->mp = p->wired;
686 }
687
688 void
689 procpriority(Proc *p, int pri, int fixed)
690 {
691 if(pri >= Npriq)
692 pri = Npriq - 1;
693 else if(pri < 0)
694 pri = 0;
695 p->basepri = pri;
696 p->priority = pri;
697 if(fixed){
698 p->fixedpri = 1;
699 } else {
700 p->fixedpri = 0;
701 }
702 }
703
704 void
705 procinit0(void) /* bad planning - clashes with devproc.c */
706 {
707 Proc *p;
708 int i;
709
710 procalloc.free = xalloc(conf.nproc*sizeof(Proc));
711 if(procalloc.free == nil){
712 xsummary();
713 panic("cannot allocate %lud procs (%ludMB)\n", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024));
714 }
715 procalloc.arena = procalloc.free;
716
717 p = procalloc.free;
718 for(i=0; i<conf.nproc-1; i++,p++)
719 p->qnext = p+1;
720 p->qnext = 0;
721 }
722
723 /*
724 * sleep if a condition is not true. Another process will
725 * awaken us after it sets the condition. When we awaken
726 * the condition may no longer be true.
727 *
728 * we lock both the process and the rendezvous to keep r->p
729 * and p->r synchronized.
730 */
731 void
732 sleep(Rendez *r, int (*f)(void*), void *arg)
733 {
734 int s;
735 void (*pt)(Proc*, int, vlong);
736
737 s = splhi();
738
739 if(up->nlocks.ref)
740 print("process %lud sleeps with %lud locks held, last lock %#p locked at pc %#lux, sleep called from %#p\n",
741 up->pid, up->nlocks.ref, up->lastlock, up->lastlock->pc, getcallerpc(&r));
742 lock(r);
743 lock(&up->rlock);
744 if(r->p){
745 print("double sleep called from %#p, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid);
746 dumpstack();
747 }
748
749 /*
750 * Wakeup only knows there may be something to do by testing
751 * r->p in order to get something to lock on.
752 * Flush that information out to memory in case the sleep is
753 * committed.
754 */
755 r->p = up;
756
757 if((*f)(arg) || up->notepending){
758 /*
759 * if condition happened or a note is pending
760 * never mind
761 */
762 r->p = nil;
763 unlock(&up->rlock);
764 unlock(r);
765 } else {
766 /*
767 * now we are committed to
768 * change state and call scheduler
769 */
770 pt = proctrace;
771 if(pt)
772 pt(up, SSleep, 0);
773 up->state = Wakeme;
774 up->r = r;
775
776 /* statistics */
777 m->cs++;
778
779 procsave(up);
780 if(setlabel(&up->sched)) {
781 /*
782 * here when the process is awakened
783 */
784 procrestore(up);
785 spllo();
786 } else {
787 /*
788 * here to go to sleep (i.e. stop Running)
789 */
790 unlock(&up->rlock);
791 unlock(r);
792 gotolabel(&m->sched);
793 }
794 }
795
796 if(up->notepending) {
797 up->notepending = 0;
798 splx(s);
799 if(up->procctl == Proc_exitme && up->closingfgrp)
800 forceclosefgrp();
801 error(Eintr);
802 }
803
804 splx(s);
805 }
806
807 static int
808 tfn(void *arg)
809 {
810 return up->trend == nil || up->tfn(arg);
811 }
812
813 void
814 twakeup(Ureg*, Timer *t)
815 {
816 Proc *p;
817 Rendez *trend;
818
819 p = t->ta;
820 trend = p->trend;
821 p->trend = 0;
822 if(trend)
823 wakeup(trend);
824 }
825
826 void
827 tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms)
828 {
829 if (up->tt){
830 print("tsleep: timer active: mode %d, tf %#p\n", up->tmode, up->tf);
831 timerdel(up);
832 }
833 up->tns = MS2NS(ms);
834 up->tf = twakeup;
835 up->tmode = Trelative;
836 up->ta = up;
837 up->trend = r;
838 up->tfn = fn;
839 timeradd(up);
840
841 if(waserror()){
842 timerdel(up);
843 nexterror();
844 }
845 sleep(r, tfn, arg);
846 if (up->tt)
847 timerdel(up);
848 up->twhen = 0;
849 poperror();
850 }
851
852 /*
853 * Expects that only one process can call wakeup for any given Rendez.
854 * We hold both locks to ensure that r->p and p->r remain consistent.
855 * Richard Miller has a better solution that doesn't require both to
856 * be held simultaneously, but I'm a paranoid - presotto.
857 */
858 Proc*
859 wakeup(Rendez *r)
860 {
861 Proc *p;
862 int s;
863
864 s = splhi();
865
866 lock(r);
867 p = r->p;
868
869 if(p != nil){
870 lock(&p->rlock);
871 if(p->state != Wakeme || p->r != r){
872 iprint("%p %p %d\n", p->r, r, p->state);
873 panic("wakeup: state");
874 }
875 r->p = nil;
876 p->r = nil;
877 ready(p);
878 unlock(&p->rlock);
879 }
880 unlock(r);
881
882 splx(s);
883
884 return p;
885 }
886
887 /*
888 * if waking a sleeping process, this routine must hold both
889 * p->rlock and r->lock. However, it can't know them in
890 * the same order as wakeup causing a possible lock ordering
891 * deadlock. We break the deadlock by giving up the p->rlock
892 * lock if we can't get the r->lock and retrying.
893 */
894 int
895 postnote(Proc *p, int dolock, char *n, int flag)
896 {
897 int s, ret;
898 Rendez *r;
899 Proc *d, **l;
900
901 if(dolock)
902 qlock(&p->debug);
903
904 if(flag != NUser && (p->notify == 0 || p->notified))
905 p->nnote = 0;
906
907 ret = 0;
908 if(p->nnote < NNOTE) {
909 strcpy(p->note[p->nnote].msg, n);
910 p->note[p->nnote++].flag = flag;
911 ret = 1;
912 }
913 p->notepending = 1;
914 if(dolock)
915 qunlock(&p->debug);
916
917 /* this loop is to avoid lock ordering problems. */
918 for(;;){
919 s = splhi();
920 lock(&p->rlock);
921 r = p->r;
922
923 /* waiting for a wakeup? */
924 if(r == nil)
925 break; /* no */
926
927 /* try for the second lock */
928 if(canlock(r)){
929 if(p->state != Wakeme || r->p != p)
930 panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state);
931 p->r = nil;
932 r->p = nil;
933 ready(p);
934 unlock(r);
935 break;
936 }
937
938 /* give other process time to get out of critical section and try again */
939 unlock(&p->rlock);
940 splx(s);
941 sched();
942 }
943 unlock(&p->rlock);
944 splx(s);
945
946 if(p->state != Rendezvous)
947 return ret;
948
949 /* Try and pull out of a rendezvous */
950 lock(p->rgrp);
951 if(p->state == Rendezvous) {
952 p->rendval = ~0;
953 l = &REND(p->rgrp, p->rendtag);
954 for(d = *l; d; d = d->rendhash) {
955 if(d == p) {
956 *l = p->rendhash;
957 break;
958 }
959 l = &d->rendhash;
960 }
961 ready(p);
962 }
963 unlock(p->rgrp);
964 return ret;
965 }
966
967 /*
968 * weird thing: keep at most NBROKEN around
969 */
970 #define NBROKEN 4
971 struct
972 {
973 QLock;
974 int n;
975 Proc *p[NBROKEN];
976 }broken;
977
978 void
979 addbroken(Proc *p)
980 {
981 qlock(&broken);
982 if(broken.n == NBROKEN) {
983 ready(broken.p[0]);
984 memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1));
985 --broken.n;
986 }
987 broken.p[broken.n++] = p;
988 qunlock(&broken);
989
990 edfstop(up);
991 p->state = Broken;
992 p->psstate = 0;
993 sched();
994 }
995
996 void
997 unbreak(Proc *p)
998 {
999 int b;
1000
1001 qlock(&broken);
1002 for(b=0; b < broken.n; b++)
1003 if(broken.p[b] == p) {
1004 broken.n--;
1005 memmove(&broken.p[b], &broken.p[b+1],
1006 sizeof(Proc*)*(NBROKEN-(b+1)));
1007 ready(p);
1008 break;
1009 }
1010 qunlock(&broken);
1011 }
1012
1013 int
1014 freebroken(void)
1015 {
1016 int i, n;
1017
1018 qlock(&broken);
1019 n = broken.n;
1020 for(i=0; i<n; i++) {
1021 ready(broken.p[i]);
1022 broken.p[i] = 0;
1023 }
1024 broken.n = 0;
1025 qunlock(&broken);
1026 return n;
1027 }
1028
1029 void
1030 pexit(char *exitstr, int freemem)
1031 {
1032 Proc *p;
1033 Segment **s, **es;
1034 long utime, stime;
1035 Waitq *wq, *f, *next;
1036 Fgrp *fgrp;
1037 Egrp *egrp;
1038 Rgrp *rgrp;
1039 Pgrp *pgrp;
1040 Chan *dot;
1041 void (*pt)(Proc*, int, vlong);
1042
1043 up->alarm = 0;
1044 if (up->tt)
1045 timerdel(up);
1046 pt = proctrace;
1047 if(pt)
1048 pt(up, SDead, 0);
1049
1050 /* nil out all the resources under lock (free later) */
1051 qlock(&up->debug);
1052 fgrp = up->fgrp;
1053 up->fgrp = nil;
1054 egrp = up->egrp;
1055 up->egrp = nil;
1056 rgrp = up->rgrp;
1057 up->rgrp = nil;
1058 pgrp = up->pgrp;
1059 up->pgrp = nil;
1060 dot = up->dot;
1061 up->dot = nil;
1062 qunlock(&up->debug);
1063
1064 if(fgrp)
1065 closefgrp(fgrp);
1066 if(egrp)
1067 closeegrp(egrp);
1068 if(rgrp)
1069 closergrp(rgrp);
1070 if(dot)
1071 cclose(dot);
1072 if(pgrp)
1073 closepgrp(pgrp);
1074
1075 /*
1076 * if not a kernel process and have a parent,
1077 * do some housekeeping.
1078 */
1079 if(up->kp == 0) {
1080 p = up->parent;
1081 if(p == 0) {
1082 if(exitstr == 0)
1083 exitstr = "unknown";
1084 panic("boot process died: %s", exitstr);
1085 }
1086
1087 while(waserror())
1088 ;
1089
1090 wq = smalloc(sizeof(Waitq));
1091 poperror();
1092
1093 wq->w.pid = up->pid;
1094 utime = up->time[TUser] + up->time[TCUser];
1095 stime = up->time[TSys] + up->time[TCSys];
1096 wq->w.time[TUser] = tk2ms(utime);
1097 wq->w.time[TSys] = tk2ms(stime);
1098 wq->w.time[TReal] = tk2ms(MACHP(0)->ticks - up->time[TReal]);
1099 if(exitstr && exitstr[0])
1100 snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr);
1101 else
1102 wq->w.msg[0] = '\0';
1103
1104 lock(&p->exl);
1105 /*
1106 * Check that parent is still alive.
1107 */
1108 if(p->pid == up->parentpid && p->state != Broken) {
1109 p->nchild--;
1110 p->time[TCUser] += utime;
1111 p->time[TCSys] += stime;
1112 /*
1113 * If there would be more than 128 wait records
1114 * processes for my parent, then don't leave a wait
1115 * record behind. This helps prevent badly written
1116 * daemon processes from accumulating lots of wait
1117 * records.
1118 */
1119 if(p->nwait < 128) {
1120 wq->next = p->waitq;
1121 p->waitq = wq;
1122 p->nwait++;
1123 wq = nil;
1124 wakeup(&p->waitr);
1125 }
1126 }
1127 unlock(&p->exl);
1128 if(wq)
1129 free(wq);
1130 }
1131
1132 if(!freemem)
1133 addbroken(up);
1134
1135 qlock(&up->seglock);
1136 es = &up->seg[NSEG];
1137 for(s = up->seg; s < es; s++) {
1138 if(*s) {
1139 putseg(*s);
1140 *s = 0;
1141 }
1142 }
1143 qunlock(&up->seglock);
1144
1145 lock(&up->exl); /* Prevent my children from leaving waits */
1146 pidunhash(up);
1147 up->pid = 0;
1148 wakeup(&up->waitr);
1149 unlock(&up->exl);
1150
1151 for(f = up->waitq; f; f = next) {
1152 next = f->next;
1153 free(f);
1154 }
1155
1156 /* release debuggers */
1157 qlock(&up->debug);
1158 if(up->pdbg) {
1159 wakeup(&up->pdbg->sleep);
1160 up->pdbg = 0;
1161 }
1162 qunlock(&up->debug);
1163
1164 /* Sched must not loop for these locks */
1165 lock(&procalloc);
1166 lock(&palloc);
1167
1168 edfstop(up);
1169 up->state = Moribund;
1170 sched();
1171 panic("pexit");
1172 }
1173
1174 int
1175 haswaitq(void *x)
1176 {
1177 Proc *p;
1178
1179 p = (Proc *)x;
1180 return p->waitq != 0;
1181 }
1182
1183 ulong
1184 pwait(Waitmsg *w)
1185 {
1186 ulong cpid;
1187 Waitq *wq;
1188
1189 if(!canqlock(&up->qwaitr))
1190 error(Einuse);
1191
1192 if(waserror()) {
1193 qunlock(&up->qwaitr);
1194 nexterror();
1195 }
1196
1197 lock(&up->exl);
1198 if(up->nchild == 0 && up->waitq == 0) {
1199 unlock(&up->exl);
1200 error(Enochild);
1201 }
1202 unlock(&up->exl);
1203
1204 sleep(&up->waitr, haswaitq, up);
1205
1206 lock(&up->exl);
1207 wq = up->waitq;
1208 up->waitq = wq->next;
1209 up->nwait--;
1210 unlock(&up->exl);
1211
1212 qunlock(&up->qwaitr);
1213 poperror();
1214
1215 if(w)
1216 memmove(w, &wq->w, sizeof(Waitmsg));
1217 cpid = wq->w.pid;
1218 free(wq);
1219 return cpid;
1220 }
1221
1222 Proc*
1223 proctab(int i)
1224 {
1225 return &procalloc.arena[i];
1226 }
1227
1228 void
1229 dumpaproc(Proc *p)
1230 {
1231 ulong bss;
1232 char *s;
1233
1234 if(p == 0)
1235 return;
1236
1237 bss = 0;
1238 if(p->seg[BSEG])
1239 bss = p->seg[BSEG]->top;
1240
1241 s = p->psstate;
1242 if(s == 0)
1243 s = statename[p->state];
1244 print("%3lud:%10s pc %8lux dbgpc %8lux %8s (%s) ut %ld st %ld bss %lux qpc %lux nl %lud nd %lud lpc %lux pri %lud\n",
1245 p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state],
1246 p->time[0], p->time[1], bss, p->qpc, p->nlocks.ref, p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority);
1247 }
1248
1249 void
1250 procdump(void)
1251 {
1252 int i;
1253 Proc *p;
1254
1255 if(up)
1256 print("up %lud\n", up->pid);
1257 else
1258 print("no current process\n");
1259 for(i=0; i<conf.nproc; i++) {
1260 p = &procalloc.arena[i];
1261 if(p->state == Dead)
1262 continue;
1263
1264 dumpaproc(p);
1265 }
1266 }
1267
1268 /*
1269 * wait till all processes have flushed their mmu
1270 * state about segement s
1271 */
1272 void
1273 procflushseg(Segment *s)
1274 {
1275 int i, ns, nm, nwait;
1276 Proc *p;
1277
1278 /*
1279 * tell all processes with this
1280 * segment to flush their mmu's
1281 */
1282 nwait = 0;
1283 for(i=0; i<conf.nproc; i++) {
1284 p = &procalloc.arena[i];
1285 if(p->state == Dead)
1286 continue;
1287 for(ns = 0; ns < NSEG; ns++)
1288 if(p->seg[ns] == s){
1289 p->newtlb = 1;
1290 for(nm = 0; nm < conf.nmach; nm++){
1291 if(MACHP(nm)->proc == p){
1292 MACHP(nm)->flushmmu = 1;
1293 nwait++;
1294 }
1295 }
1296 break;
1297 }
1298 }
1299
1300 if(nwait == 0)
1301 return;
1302
1303 /*
1304 * wait for all processors to take a clock interrupt
1305 * and flush their mmu's
1306 */
1307 for(nm = 0; nm < conf.nmach; nm++)
1308 if(MACHP(nm) != m)
1309 while(MACHP(nm)->flushmmu)
1310 sched();
1311 }
1312
1313 void
1314 scheddump(void)
1315 {
1316 Proc *p;
1317 Schedq *rq;
1318
1319 for(rq = &runq[Nrq-1]; rq >= runq; rq--){
1320 if(rq->head == 0)
1321 continue;
1322 print("rq%ld:", rq-runq);
1323 for(p = rq->head; p; p = p->rnext)
1324 print(" %lud(%lud)", p->pid, m->ticks - p->readytime);
1325 print("\n");
1326 delay(150);
1327 }
1328 print("nrdy %d\n", nrdy);
1329 }
1330
1331 void
1332 kproc(char *name, void (*func)(void *), void *arg)
1333 {
1334 Proc *p;
1335 static Pgrp *kpgrp;
1336
1337 p = newproc();
1338 p->psstate = 0;
1339 p->procmode = 0640;
1340 p->kp = 1;
1341 p->noswap = 1;
1342
1343 p->fpsave = up->fpsave;
1344 p->scallnr = up->scallnr;
1345 p->s = up->s;
1346 p->nerrlab = 0;
1347 p->slash = up->slash;
1348 p->dot = up->dot;
1349 if(p->dot)
1350 incref(p->dot);
1351
1352 memmove(p->note, up->note, sizeof(p->note));
1353 p->nnote = up->nnote;
1354 p->notified = 0;
1355 p->lastnote = up->lastnote;
1356 p->notify = up->notify;
1357 p->ureg = 0;
1358 p->dbgreg = 0;
1359
1360 procpriority(p, PriKproc, 0);
1361
1362 kprocchild(p, func, arg);
1363
1364 kstrdup(&p->user, eve);
1365 kstrdup(&p->text, name);
1366 if(kpgrp == 0)
1367 kpgrp = newpgrp();
1368 p->pgrp = kpgrp;
1369 incref(kpgrp);
1370
1371 memset(p->time, 0, sizeof(p->time));
1372 p->time[TReal] = MACHP(0)->ticks;
1373 ready(p);
1374 /*
1375 * since the bss/data segments are now shareable,
1376 * any mmu info about this process is now stale
1377 * and has to be discarded.
1378 */
1379 p->newtlb = 1;
1380 flushmmu();
1381 }
1382
1383 /*
1384 * called splhi() by notify(). See comment in notify for the
1385 * reasoning.
1386 */
1387 void
1388 procctl(Proc *p)
1389 {
1390 char *state;
1391 ulong s;
1392
1393 switch(p->procctl) {
1394 case Proc_exitbig:
1395 spllo();
1396 pexit("Killed: Insufficient physical memory", 1);
1397
1398 case Proc_exitme:
1399 spllo(); /* pexit has locks in it */
1400 pexit("Killed", 1);
1401
1402 case Proc_traceme:
1403 if(p->nnote == 0)
1404 return;
1405 /* No break */
1406
1407 case Proc_stopme:
1408 p->procctl = 0;
1409 state = p->psstate;
1410 p->psstate = "Stopped";
1411 /* free a waiting debugger */
1412 s = spllo();
1413 qlock(&p->debug);
1414 if(p->pdbg) {
1415 wakeup(&p->pdbg->sleep);
1416 p->pdbg = 0;
1417 }
1418 qunlock(&p->debug);
1419 splhi();
1420 p->state = Stopped;
1421 sched();
1422 p->psstate = state;
1423 splx(s);
1424 return;
1425 }
1426 }
1427
1428 #include "errstr.h"
1429
1430 void
1431 error(char *err)
1432 {
1433 spllo();
1434
1435 assert(up->nerrlab < NERR);
1436 kstrcpy(up->errstr, err, ERRMAX);
1437 setlabel(&up->errlab[NERR-1]);
1438 nexterror();
1439 }
1440
1441 void
1442 nexterror(void)
1443 {
1444 gotolabel(&up->errlab[--up->nerrlab]);
1445 }
1446
1447 void
1448 exhausted(char *resource)
1449 {
1450 char buf[ERRMAX];
1451
1452 sprint(buf, "no free %s", resource);
1453 iprint("%s\n", buf);
1454 error(buf);
1455 }
1456
1457 void
1458 killbig(char *why)
1459 {
1460 int i;
1461 Segment *s;
1462 ulong l, max;
1463 Proc *p, *ep, *kp;
1464
1465 max = 0;
1466 kp = 0;
1467 ep = procalloc.arena+conf.nproc;
1468 for(p = procalloc.arena; p < ep; p++) {
1469 if(p->state == Dead || p->kp)
1470 continue;
1471 l = 0;
1472 for(i=1; i<NSEG; i++) {
1473 s = p->seg[i];
1474 if(s != 0)
1475 l += s->top - s->base;
1476 }
1477 if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) {
1478 kp = p;
1479 max = l;
1480 }
1481 }
1482
1483 print("%lud: %s killed: %s\n", kp->pid, kp->text, why);
1484 for(p = procalloc.arena; p < ep; p++) {
1485 if(p->state == Dead || p->kp)
1486 continue;
1487 if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG])
1488 p->procctl = Proc_exitbig;
1489 }
1490 kp->procctl = Proc_exitbig;
1491 for(i = 0; i < NSEG; i++) {
1492 s = kp->seg[i];
1493 if(s != 0 && canqlock(&s->lk)) {
1494 mfreeseg(s, s->base, (s->top - s->base)/BY2PG);
1495 qunlock(&s->lk);
1496 }
1497 }
1498 }
1499
1500 /*
1501 * change ownership to 'new' of all processes owned by 'old'. Used when
1502 * eve changes.
1503 */
1504 void
1505 renameuser(char *old, char *new)
1506 {
1507 Proc *p, *ep;
1508
1509 ep = procalloc.arena+conf.nproc;
1510 for(p = procalloc.arena; p < ep; p++)
1511 if(p->user!=nil && strcmp(old, p->user)==0)
1512 kstrdup(&p->user, new);
1513 }
1514
1515 /*
1516 * time accounting called by clock() splhi'd
1517 */
1518 void
1519 accounttime(void)
1520 {
1521 Proc *p;
1522 ulong n, per;
1523 static ulong nrun;
1524
1525 p = m->proc;
1526 if(p) {
1527 nrun++;
1528 p->time[p->insyscall]++;
1529 }
1530
1531 /* calculate decaying duty cycles */
1532 n = perfticks();
1533 per = n - m->perf.last;
1534 m->perf.last = n;
1535 per = (m->perf.period*(HZ-1) + per)/HZ;
1536 if(per != 0)
1537 m->perf.period = per;
1538
1539 m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ;
1540 m->perf.inidle = 0;
1541
1542 m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ;
1543 m->perf.inintr = 0;
1544
1545 /* only one processor gets to compute system load averages */
1546 if(m->machno != 0)
1547 return;
1548
1549 /*
1550 * calculate decaying load average.
1551 * if we decay by (n-1)/n then it takes
1552 * n clock ticks to go from load L to .36 L once
1553 * things quiet down. it takes about 5 n clock
1554 * ticks to go to zero. so using HZ means this is
1555 * approximately the load over the last second,
1556 * with a tail lasting about 5 seconds.
1557 */
1558 n = nrun;
1559 nrun = 0;
1560 n = (nrdy+n)*1000;
1561 m->load = (m->load*(HZ-1)+n)/HZ;
1562 }
1563
1564 static void
1565 pidhash(Proc *p)
1566 {
1567 int h;
1568
1569 h = p->pid % nelem(procalloc.ht);
1570 lock(&procalloc);
1571 p->pidhash = procalloc.ht[h];
1572 procalloc.ht[h] = p;
1573 unlock(&procalloc);
1574 }
1575
1576 static void
1577 pidunhash(Proc *p)
1578 {
1579 int h;
1580 Proc **l;
1581
1582 h = p->pid % nelem(procalloc.ht);
1583 lock(&procalloc);
1584 for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash)
1585 if(*l == p){
1586 *l = p->pidhash;
1587 break;
1588 }
1589 unlock(&procalloc);
1590 }
1591
1592 int
1593 procindex(ulong pid)
1594 {
1595 Proc *p;
1596 int h;
1597 int s;
1598
1599 s = -1;
1600 h = pid % nelem(procalloc.ht);
1601 lock(&procalloc);
1602 for(p = procalloc.ht[h]; p != nil; p = p->pidhash)
1603 if(p->pid == pid){
1604 s = p - procalloc.arena;
1605 break;
1606 }
1607 unlock(&procalloc);
1608 return s;
1609 }
Cache object: b8d9e0d0e518de2e175c17f197073e9d
|