1 /*
2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 * DAMAGE.
27 *
28 * $FreeBSD: releng/5.0/sys/kern/kern_thread.c 107719 2002-12-10 02:33:45Z julian $
29 */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/lock.h>
35 #include <sys/malloc.h>
36 #include <sys/mutex.h>
37 #include <sys/proc.h>
38 #include <sys/smp.h>
39 #include <sys/sysctl.h>
40 #include <sys/sysproto.h>
41 #include <sys/filedesc.h>
42 #include <sys/sched.h>
43 #include <sys/signalvar.h>
44 #include <sys/sx.h>
45 #include <sys/tty.h>
46 #include <sys/user.h>
47 #include <sys/jail.h>
48 #include <sys/kse.h>
49 #include <sys/ktr.h>
50 #include <sys/ucontext.h>
51
52 #include <vm/vm.h>
53 #include <vm/vm_object.h>
54 #include <vm/pmap.h>
55 #include <vm/uma.h>
56 #include <vm/vm_map.h>
57
58 #include <machine/frame.h>
59
60 /*
61 * KSEGRP related storage.
62 */
63 static uma_zone_t ksegrp_zone;
64 static uma_zone_t kse_zone;
65 static uma_zone_t thread_zone;
66
67 /* DEBUG ONLY */
68 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
69 static int thread_debug = 0;
70 SYSCTL_INT(_kern_threads, OID_AUTO, debug, CTLFLAG_RW,
71 &thread_debug, 0, "thread debug");
72
73 static int max_threads_per_proc = 30;
74 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
75 &max_threads_per_proc, 0, "Limit on threads per proc");
76
77 static int max_groups_per_proc = 5;
78 SYSCTL_INT(_kern_threads, OID_AUTO, max_groups_per_proc, CTLFLAG_RW,
79 &max_groups_per_proc, 0, "Limit on thread groups per proc");
80
81 #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
82
83 struct threadqueue zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
84 TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses);
85 TAILQ_HEAD(, ksegrp) zombie_ksegrps = TAILQ_HEAD_INITIALIZER(zombie_ksegrps);
86 struct mtx zombie_thread_lock;
87 MTX_SYSINIT(zombie_thread_lock, &zombie_thread_lock,
88 "zombie_thread_lock", MTX_SPIN);
89
90 static void kse_purge(struct proc *p, struct thread *td);
91
92 /*
93 * Prepare a thread for use.
94 */
95 static void
96 thread_ctor(void *mem, int size, void *arg)
97 {
98 struct thread *td;
99
100 td = (struct thread *)mem;
101 td->td_state = TDS_INACTIVE;
102 td->td_flags |= TDF_UNBOUND;
103 }
104
105 /*
106 * Reclaim a thread after use.
107 */
108 static void
109 thread_dtor(void *mem, int size, void *arg)
110 {
111 struct thread *td;
112
113 td = (struct thread *)mem;
114
115 #ifdef INVARIANTS
116 /* Verify that this thread is in a safe state to free. */
117 switch (td->td_state) {
118 case TDS_INHIBITED:
119 case TDS_RUNNING:
120 case TDS_CAN_RUN:
121 case TDS_RUNQ:
122 /*
123 * We must never unlink a thread that is in one of
124 * these states, because it is currently active.
125 */
126 panic("bad state for thread unlinking");
127 /* NOTREACHED */
128 case TDS_INACTIVE:
129 break;
130 default:
131 panic("bad thread state");
132 /* NOTREACHED */
133 }
134 #endif
135 }
136
137 /*
138 * Initialize type-stable parts of a thread (when newly created).
139 */
140 static void
141 thread_init(void *mem, int size)
142 {
143 struct thread *td;
144
145 td = (struct thread *)mem;
146 mtx_lock(&Giant);
147 pmap_new_thread(td, 0);
148 mtx_unlock(&Giant);
149 cpu_thread_setup(td);
150 td->td_sched = (struct td_sched *)&td[1];
151 }
152
153 /*
154 * Tear down type-stable parts of a thread (just before being discarded).
155 */
156 static void
157 thread_fini(void *mem, int size)
158 {
159 struct thread *td;
160
161 td = (struct thread *)mem;
162 pmap_dispose_thread(td);
163 }
164 /*
165 * Initialize type-stable parts of a kse (when newly created).
166 */
167 static void
168 kse_init(void *mem, int size)
169 {
170 struct kse *ke;
171
172 ke = (struct kse *)mem;
173 ke->ke_sched = (struct ke_sched *)&ke[1];
174 }
175 /*
176 * Initialize type-stable parts of a ksegrp (when newly created).
177 */
178 static void
179 ksegrp_init(void *mem, int size)
180 {
181 struct ksegrp *kg;
182
183 kg = (struct ksegrp *)mem;
184 kg->kg_sched = (struct kg_sched *)&kg[1];
185 }
186
187 /*
188 * KSE is linked onto the idle queue.
189 */
190 void
191 kse_link(struct kse *ke, struct ksegrp *kg)
192 {
193 struct proc *p = kg->kg_proc;
194
195 TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist);
196 kg->kg_kses++;
197 ke->ke_state = KES_UNQUEUED;
198 ke->ke_proc = p;
199 ke->ke_ksegrp = kg;
200 ke->ke_thread = NULL;
201 ke->ke_oncpu = NOCPU;
202 }
203
204 void
205 kse_unlink(struct kse *ke)
206 {
207 struct ksegrp *kg;
208
209 mtx_assert(&sched_lock, MA_OWNED);
210 kg = ke->ke_ksegrp;
211 if (ke->ke_state == KES_IDLE) {
212 kg->kg_idle_kses--;
213 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
214 }
215
216 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
217 if (--kg->kg_kses == 0) {
218 ksegrp_unlink(kg);
219 }
220 /*
221 * Aggregate stats from the KSE
222 */
223 kse_stash(ke);
224 }
225
226 void
227 ksegrp_link(struct ksegrp *kg, struct proc *p)
228 {
229
230 TAILQ_INIT(&kg->kg_threads);
231 TAILQ_INIT(&kg->kg_runq); /* links with td_runq */
232 TAILQ_INIT(&kg->kg_slpq); /* links with td_runq */
233 TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */
234 TAILQ_INIT(&kg->kg_iq); /* idle kses in ksegrp */
235 TAILQ_INIT(&kg->kg_lq); /* loan kses in ksegrp */
236 kg->kg_proc = p;
237 /* the following counters are in the -zero- section and may not need clearing */
238 kg->kg_numthreads = 0;
239 kg->kg_runnable = 0;
240 kg->kg_kses = 0;
241 kg->kg_idle_kses = 0;
242 kg->kg_loan_kses = 0;
243 kg->kg_runq_kses = 0; /* XXXKSE change name */
244 /* link it in now that it's consistent */
245 p->p_numksegrps++;
246 TAILQ_INSERT_HEAD(&p->p_ksegrps, kg, kg_ksegrp);
247 }
248
249 void
250 ksegrp_unlink(struct ksegrp *kg)
251 {
252 struct proc *p;
253
254 mtx_assert(&sched_lock, MA_OWNED);
255 p = kg->kg_proc;
256 KASSERT(((kg->kg_numthreads == 0) && (kg->kg_kses == 0)),
257 ("kseg_unlink: residual threads or KSEs"));
258 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
259 p->p_numksegrps--;
260 /*
261 * Aggregate stats from the KSE
262 */
263 ksegrp_stash(kg);
264 }
265
266 /*
267 * for a newly created process,
268 * link up a the structure and its initial threads etc.
269 */
270 void
271 proc_linkup(struct proc *p, struct ksegrp *kg,
272 struct kse *ke, struct thread *td)
273 {
274
275 TAILQ_INIT(&p->p_ksegrps); /* all ksegrps in proc */
276 TAILQ_INIT(&p->p_threads); /* all threads in proc */
277 TAILQ_INIT(&p->p_suspended); /* Threads suspended */
278 p->p_numksegrps = 0;
279 p->p_numthreads = 0;
280
281 ksegrp_link(kg, p);
282 kse_link(ke, kg);
283 thread_link(td, kg);
284 }
285
286 int
287 kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap)
288 {
289 struct proc *p;
290 struct thread *td2;
291
292 p = td->td_proc;
293 /* KSE-enabled processes only, please. */
294 if (!(p->p_flag & P_KSES))
295 return (EINVAL);
296 if (uap->tmbx == NULL)
297 return (EINVAL);
298 mtx_lock_spin(&sched_lock);
299 FOREACH_THREAD_IN_PROC(p, td2) {
300 if (td2->td_mailbox == uap->tmbx) {
301 td2->td_flags |= TDF_INTERRUPT;
302 if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR)) {
303 if (td2->td_flags & TDF_CVWAITQ)
304 cv_abort(td2);
305 else
306 abortsleep(td2);
307 }
308 mtx_unlock_spin(&sched_lock);
309 td->td_retval[0] = 0;
310 td->td_retval[1] = 0;
311 return (0);
312 }
313 }
314 mtx_unlock_spin(&sched_lock);
315 return (ESRCH);
316 }
317
318 int
319 kse_exit(struct thread *td, struct kse_exit_args *uap)
320 {
321 struct proc *p;
322 struct ksegrp *kg;
323
324 p = td->td_proc;
325 /* KSE-enabled processes only, please. */
326 if (!(p->p_flag & P_KSES))
327 return (EINVAL);
328 /* must be a bound thread */
329 if (td->td_flags & TDF_UNBOUND)
330 return (EINVAL);
331 kg = td->td_ksegrp;
332 /* serialize killing kse */
333 PROC_LOCK(p);
334 mtx_lock_spin(&sched_lock);
335 if ((kg->kg_kses == 1) && (kg->kg_numthreads > 1)) {
336 mtx_unlock_spin(&sched_lock);
337 PROC_UNLOCK(p);
338 return (EDEADLK);
339 }
340 if ((p->p_numthreads == 1) && (p->p_numksegrps == 1)) {
341 /* XXXSKE what if >1 KSE? check.... */
342 p->p_flag &= ~P_KSES;
343 mtx_unlock_spin(&sched_lock);
344 PROC_UNLOCK(p);
345 } else {
346 td->td_kse->ke_flags |= KEF_EXIT;
347 thread_exit();
348 /* NOTREACHED */
349 }
350 return (0);
351 }
352
353 /*
354 * Either returns as an upcall or exits
355 */
356 int
357 kse_release(struct thread * td, struct kse_release_args * uap)
358 {
359 struct proc *p;
360 struct ksegrp *kg;
361
362 p = td->td_proc;
363 kg = td->td_ksegrp;
364 /*
365 * Must be a bound thread. And kse must have a mailbox ready,
366 * if not, the kse can not generate an upcall.
367 */
368 if (!(p->p_flag & P_KSES) ||
369 (td->td_flags & TDF_UNBOUND) ||
370 (td->td_kse->ke_mailbox == NULL))
371 return (EINVAL);
372 PROC_LOCK(p);
373 mtx_lock_spin(&sched_lock);
374 if (kg->kg_completed == NULL) {
375 #if 1 /* temp until signals make new threads */
376 if (p->p_numthreads == 1) {
377 /* change OURSELF to become an upcall */
378 td->td_flags = TDF_UPCALLING;
379 mtx_unlock_spin(&sched_lock);
380 PROC_UNLOCK(p);
381 /*
382 * msleep will not call thread_sched_upcall
383 * because thread is not UNBOUND.
384 */
385 msleep(p->p_sigacts, NULL,
386 PPAUSE | PCATCH, "ksepause", 0);
387 return (0);
388 }
389 #endif /* end temp */
390 thread_exit();
391 }
392 /* change OURSELF to become an upcall */
393 td->td_flags = TDF_UPCALLING;
394 mtx_unlock_spin(&sched_lock);
395 PROC_UNLOCK(p);
396 return (0);
397 }
398
399 /* struct kse_wakeup_args {
400 struct kse_mailbox *mbx;
401 }; */
402 int
403 kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
404 {
405 struct proc *p;
406 struct kse *ke, *ke2;
407 struct ksegrp *kg;
408
409 p = td->td_proc;
410 /* KSE-enabled processes only, please. */
411 if (!(p->p_flag & P_KSES))
412 return EINVAL;
413 ke = NULL;
414 mtx_lock_spin(&sched_lock);
415 if (uap->mbx) {
416 FOREACH_KSEGRP_IN_PROC(p, kg) {
417 FOREACH_KSE_IN_GROUP(kg, ke2) {
418 if (ke2->ke_mailbox != uap->mbx)
419 continue;
420 if (ke2->ke_state == KES_IDLE) {
421 ke = ke2;
422 goto found;
423 } else {
424 mtx_unlock_spin(&sched_lock);
425 td->td_retval[0] = 0;
426 td->td_retval[1] = 0;
427 return (0);
428 }
429 }
430 }
431 } else {
432 kg = td->td_ksegrp;
433 ke = TAILQ_FIRST(&kg->kg_iq);
434 }
435 if (ke == NULL) {
436 mtx_unlock_spin(&sched_lock);
437 return (ESRCH);
438 }
439 found:
440 thread_schedule_upcall(td, ke);
441 mtx_unlock_spin(&sched_lock);
442 td->td_retval[0] = 0;
443 td->td_retval[1] = 0;
444 return (0);
445 }
446
447 /*
448 * No new KSEG: first call: use current KSE, don't schedule an upcall
449 * All other situations, do allocate a new KSE and schedule an upcall on it.
450 */
451 /* struct kse_create_args {
452 struct kse_mailbox *mbx;
453 int newgroup;
454 }; */
455 int
456 kse_create(struct thread *td, struct kse_create_args *uap)
457 {
458 struct kse *newke;
459 struct kse *ke;
460 struct ksegrp *newkg;
461 struct ksegrp *kg;
462 struct proc *p;
463 struct kse_mailbox mbx;
464 int err;
465
466 p = td->td_proc;
467 if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
468 return (err);
469
470 p->p_flag |= P_KSES; /* easier to just set it than to test and set */
471 kg = td->td_ksegrp;
472 if (uap->newgroup) {
473 if (p->p_numksegrps >= max_groups_per_proc)
474 return (EPROCLIM);
475 /*
476 * If we want a new KSEGRP it doesn't matter whether
477 * we have already fired up KSE mode before or not.
478 * We put the process in KSE mode and create a new KSEGRP
479 * and KSE. If our KSE has not got a mailbox yet then
480 * that doesn't matter, just leave it that way. It will
481 * ensure that this thread stay BOUND. It's possible
482 * that the call came form a threaded library and the main
483 * program knows nothing of threads.
484 */
485 newkg = ksegrp_alloc();
486 bzero(&newkg->kg_startzero, RANGEOF(struct ksegrp,
487 kg_startzero, kg_endzero));
488 bcopy(&kg->kg_startcopy, &newkg->kg_startcopy,
489 RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
490 newke = kse_alloc();
491 } else {
492 /*
493 * Otherwise, if we have already set this KSE
494 * to have a mailbox, we want to make another KSE here,
495 * but only if there are not already the limit, which
496 * is 1 per CPU max.
497 *
498 * If the current KSE doesn't have a mailbox we just use it
499 * and give it one.
500 *
501 * Because we don't like to access
502 * the KSE outside of schedlock if we are UNBOUND,
503 * (because it can change if we are preempted by an interrupt)
504 * we can deduce it as having a mailbox if we are UNBOUND,
505 * and only need to actually look at it if we are BOUND,
506 * which is safe.
507 */
508 if ((td->td_flags & TDF_UNBOUND) || td->td_kse->ke_mailbox) {
509 if (thread_debug == 0) { /* if debugging, allow more */
510 #ifdef SMP
511 if (kg->kg_kses > mp_ncpus)
512 #endif
513 return (EPROCLIM);
514 }
515 newke = kse_alloc();
516 } else {
517 newke = NULL;
518 }
519 newkg = NULL;
520 }
521 if (newke) {
522 bzero(&newke->ke_startzero, RANGEOF(struct kse,
523 ke_startzero, ke_endzero));
524 #if 0
525 bcopy(&ke->ke_startcopy, &newke->ke_startcopy,
526 RANGEOF(struct kse, ke_startcopy, ke_endcopy));
527 #endif
528 /* For the first call this may not have been set */
529 if (td->td_standin == NULL) {
530 td->td_standin = thread_alloc();
531 }
532 mtx_lock_spin(&sched_lock);
533 if (newkg) {
534 if (p->p_numksegrps >= max_groups_per_proc) {
535 mtx_unlock_spin(&sched_lock);
536 ksegrp_free(newkg);
537 kse_free(newke);
538 return (EPROCLIM);
539 }
540 ksegrp_link(newkg, p);
541 }
542 else
543 newkg = kg;
544 kse_link(newke, newkg);
545 if (p->p_sflag & PS_NEEDSIGCHK)
546 newke->ke_flags |= KEF_ASTPENDING;
547 newke->ke_mailbox = uap->mbx;
548 newke->ke_upcall = mbx.km_func;
549 bcopy(&mbx.km_stack, &newke->ke_stack, sizeof(stack_t));
550 thread_schedule_upcall(td, newke);
551 mtx_unlock_spin(&sched_lock);
552 } else {
553 /*
554 * If we didn't allocate a new KSE then the we are using
555 * the exisiting (BOUND) kse.
556 */
557 ke = td->td_kse;
558 ke->ke_mailbox = uap->mbx;
559 ke->ke_upcall = mbx.km_func;
560 bcopy(&mbx.km_stack, &ke->ke_stack, sizeof(stack_t));
561 }
562 /*
563 * Fill out the KSE-mode specific fields of the new kse.
564 */
565
566 td->td_retval[0] = 0;
567 td->td_retval[1] = 0;
568 return (0);
569 }
570
571 /*
572 * Fill a ucontext_t with a thread's context information.
573 *
574 * This is an analogue to getcontext(3).
575 */
576 void
577 thread_getcontext(struct thread *td, ucontext_t *uc)
578 {
579
580 /*
581 * XXX this is declared in a MD include file, i386/include/ucontext.h but
582 * is used in MI code.
583 */
584 #ifdef __i386__
585 get_mcontext(td, &uc->uc_mcontext);
586 #endif
587 uc->uc_sigmask = td->td_proc->p_sigmask;
588 }
589
590 /*
591 * Set a thread's context from a ucontext_t.
592 *
593 * This is an analogue to setcontext(3).
594 */
595 int
596 thread_setcontext(struct thread *td, ucontext_t *uc)
597 {
598 int ret;
599
600 /*
601 * XXX this is declared in a MD include file, i386/include/ucontext.h but
602 * is used in MI code.
603 */
604 #ifdef __i386__
605 ret = set_mcontext(td, &uc->uc_mcontext);
606 #else
607 ret = ENOSYS;
608 #endif
609 if (ret == 0) {
610 SIG_CANTMASK(uc->uc_sigmask);
611 PROC_LOCK(td->td_proc);
612 td->td_proc->p_sigmask = uc->uc_sigmask;
613 PROC_UNLOCK(td->td_proc);
614 }
615 return (ret);
616 }
617
618 /*
619 * Initialize global thread allocation resources.
620 */
621 void
622 threadinit(void)
623 {
624
625 #ifndef __ia64__
626 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
627 thread_ctor, thread_dtor, thread_init, thread_fini,
628 UMA_ALIGN_CACHE, 0);
629 #else
630 /*
631 * XXX the ia64 kstack allocator is really lame and is at the mercy
632 * of contigmallloc(). This hackery is to pre-construct a whole
633 * pile of thread structures with associated kernel stacks early
634 * in the system startup while contigmalloc() still works. Once we
635 * have them, keep them. Sigh.
636 */
637 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
638 thread_ctor, thread_dtor, thread_init, thread_fini,
639 UMA_ALIGN_CACHE, UMA_ZONE_NOFREE);
640 uma_prealloc(thread_zone, 512); /* XXX arbitary */
641 #endif
642 ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(),
643 NULL, NULL, ksegrp_init, NULL,
644 UMA_ALIGN_CACHE, 0);
645 kse_zone = uma_zcreate("KSE", sched_sizeof_kse(),
646 NULL, NULL, kse_init, NULL,
647 UMA_ALIGN_CACHE, 0);
648 }
649
650 /*
651 * Stash an embarasingly extra thread into the zombie thread queue.
652 */
653 void
654 thread_stash(struct thread *td)
655 {
656 mtx_lock_spin(&zombie_thread_lock);
657 TAILQ_INSERT_HEAD(&zombie_threads, td, td_runq);
658 mtx_unlock_spin(&zombie_thread_lock);
659 }
660
661 /*
662 * Stash an embarasingly extra kse into the zombie kse queue.
663 */
664 void
665 kse_stash(struct kse *ke)
666 {
667 mtx_lock_spin(&zombie_thread_lock);
668 TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq);
669 mtx_unlock_spin(&zombie_thread_lock);
670 }
671
672 /*
673 * Stash an embarasingly extra ksegrp into the zombie ksegrp queue.
674 */
675 void
676 ksegrp_stash(struct ksegrp *kg)
677 {
678 mtx_lock_spin(&zombie_thread_lock);
679 TAILQ_INSERT_HEAD(&zombie_ksegrps, kg, kg_ksegrp);
680 mtx_unlock_spin(&zombie_thread_lock);
681 }
682
683 /*
684 * Reap zombie threads.
685 */
686 void
687 thread_reap(void)
688 {
689 struct thread *td_first, *td_next;
690 struct kse *ke_first, *ke_next;
691 struct ksegrp *kg_first, * kg_next;
692
693 /*
694 * don't even bother to lock if none at this instant
695 * We really don't care about the next instant..
696 */
697 if ((!TAILQ_EMPTY(&zombie_threads))
698 || (!TAILQ_EMPTY(&zombie_kses))
699 || (!TAILQ_EMPTY(&zombie_ksegrps))) {
700 mtx_lock_spin(&zombie_thread_lock);
701 td_first = TAILQ_FIRST(&zombie_threads);
702 ke_first = TAILQ_FIRST(&zombie_kses);
703 kg_first = TAILQ_FIRST(&zombie_ksegrps);
704 if (td_first)
705 TAILQ_INIT(&zombie_threads);
706 if (ke_first)
707 TAILQ_INIT(&zombie_kses);
708 if (kg_first)
709 TAILQ_INIT(&zombie_ksegrps);
710 mtx_unlock_spin(&zombie_thread_lock);
711 while (td_first) {
712 td_next = TAILQ_NEXT(td_first, td_runq);
713 thread_free(td_first);
714 td_first = td_next;
715 }
716 while (ke_first) {
717 ke_next = TAILQ_NEXT(ke_first, ke_procq);
718 kse_free(ke_first);
719 ke_first = ke_next;
720 }
721 while (kg_first) {
722 kg_next = TAILQ_NEXT(kg_first, kg_ksegrp);
723 ksegrp_free(kg_first);
724 kg_first = kg_next;
725 }
726 }
727 }
728
729 /*
730 * Allocate a ksegrp.
731 */
732 struct ksegrp *
733 ksegrp_alloc(void)
734 {
735 return (uma_zalloc(ksegrp_zone, M_WAITOK));
736 }
737
738 /*
739 * Allocate a kse.
740 */
741 struct kse *
742 kse_alloc(void)
743 {
744 return (uma_zalloc(kse_zone, M_WAITOK));
745 }
746
747 /*
748 * Allocate a thread.
749 */
750 struct thread *
751 thread_alloc(void)
752 {
753 thread_reap(); /* check if any zombies to get */
754 return (uma_zalloc(thread_zone, M_WAITOK));
755 }
756
757 /*
758 * Deallocate a ksegrp.
759 */
760 void
761 ksegrp_free(struct ksegrp *td)
762 {
763 uma_zfree(ksegrp_zone, td);
764 }
765
766 /*
767 * Deallocate a kse.
768 */
769 void
770 kse_free(struct kse *td)
771 {
772 uma_zfree(kse_zone, td);
773 }
774
775 /*
776 * Deallocate a thread.
777 */
778 void
779 thread_free(struct thread *td)
780 {
781
782 cpu_thread_clean(td);
783 uma_zfree(thread_zone, td);
784 }
785
786 /*
787 * Store the thread context in the UTS's mailbox.
788 * then add the mailbox at the head of a list we are building in user space.
789 * The list is anchored in the ksegrp structure.
790 */
791 int
792 thread_export_context(struct thread *td)
793 {
794 struct proc *p;
795 struct ksegrp *kg;
796 uintptr_t mbx;
797 void *addr;
798 int error;
799 ucontext_t uc;
800 uint temp;
801
802 p = td->td_proc;
803 kg = td->td_ksegrp;
804
805 /* Export the user/machine context. */
806 #if 0
807 addr = (caddr_t)td->td_mailbox +
808 offsetof(struct kse_thr_mailbox, tm_context);
809 #else /* if user pointer arithmetic is valid in the kernel */
810 addr = (void *)(&td->td_mailbox->tm_context);
811 #endif
812 error = copyin(addr, &uc, sizeof(ucontext_t));
813 if (error == 0) {
814 thread_getcontext(td, &uc);
815 error = copyout(&uc, addr, sizeof(ucontext_t));
816
817 }
818 if (error) {
819 PROC_LOCK(p);
820 psignal(p, SIGSEGV);
821 PROC_UNLOCK(p);
822 return (error);
823 }
824 /* get address in latest mbox of list pointer */
825 #if 0
826 addr = (caddr_t)td->td_mailbox
827 + offsetof(struct kse_thr_mailbox , tm_next);
828 #else /* if user pointer arithmetic is valid in the kernel */
829 addr = (void *)(&td->td_mailbox->tm_next);
830 #endif
831 /*
832 * Put the saved address of the previous first
833 * entry into this one
834 */
835 for (;;) {
836 mbx = (uintptr_t)kg->kg_completed;
837 if (suword(addr, mbx)) {
838 goto bad;
839 }
840 PROC_LOCK(p);
841 if (mbx == (uintptr_t)kg->kg_completed) {
842 kg->kg_completed = td->td_mailbox;
843 PROC_UNLOCK(p);
844 break;
845 }
846 PROC_UNLOCK(p);
847 }
848 addr = (caddr_t)td->td_mailbox
849 + offsetof(struct kse_thr_mailbox, tm_sticks);
850 temp = fuword(addr) + td->td_usticks;
851 if (suword(addr, temp))
852 goto bad;
853 return (0);
854
855 bad:
856 PROC_LOCK(p);
857 psignal(p, SIGSEGV);
858 PROC_UNLOCK(p);
859 return (EFAULT);
860 }
861
862 /*
863 * Take the list of completed mailboxes for this KSEGRP and put them on this
864 * KSE's mailbox as it's the next one going up.
865 */
866 static int
867 thread_link_mboxes(struct ksegrp *kg, struct kse *ke)
868 {
869 struct proc *p = kg->kg_proc;
870 void *addr;
871 uintptr_t mbx;
872
873 #if 0
874 addr = (caddr_t)ke->ke_mailbox
875 + offsetof(struct kse_mailbox, km_completed);
876 #else /* if user pointer arithmetic is valid in the kernel */
877 addr = (void *)(&ke->ke_mailbox->km_completed);
878 #endif
879 for (;;) {
880 mbx = (uintptr_t)kg->kg_completed;
881 if (suword(addr, mbx)) {
882 PROC_LOCK(p);
883 psignal(p, SIGSEGV);
884 PROC_UNLOCK(p);
885 return (EFAULT);
886 }
887 /* XXXKSE could use atomic CMPXCH here */
888 PROC_LOCK(p);
889 if (mbx == (uintptr_t)kg->kg_completed) {
890 kg->kg_completed = NULL;
891 PROC_UNLOCK(p);
892 break;
893 }
894 PROC_UNLOCK(p);
895 }
896 return (0);
897 }
898
899 /*
900 * This function should be called at statclock interrupt time
901 */
902 int
903 thread_add_ticks_intr(int user, uint ticks)
904 {
905 struct thread *td = curthread;
906 struct kse *ke = td->td_kse;
907
908 if (ke->ke_mailbox == NULL)
909 return -1;
910 if (user) {
911 /* Current always do via ast() */
912 ke->ke_flags |= KEF_ASTPENDING;
913 ke->ke_uuticks += ticks;
914 } else {
915 if (td->td_mailbox != NULL)
916 td->td_usticks += ticks;
917 else
918 ke->ke_usticks += ticks;
919 }
920 return 0;
921 }
922
923 static int
924 thread_update_uticks(void)
925 {
926 struct thread *td = curthread;
927 struct proc *p = td->td_proc;
928 struct kse *ke = td->td_kse;
929 struct kse_thr_mailbox *tmbx;
930 caddr_t addr;
931 uint uticks, sticks;
932
933 KASSERT(!(td->td_flags & TDF_UNBOUND), ("thread not bound."));
934
935 if (ke->ke_mailbox == NULL)
936 return 0;
937
938 uticks = ke->ke_uuticks;
939 ke->ke_uuticks = 0;
940 sticks = ke->ke_usticks;
941 ke->ke_usticks = 0;
942 tmbx = (void *)fuword((caddr_t)ke->ke_mailbox
943 + offsetof(struct kse_mailbox, km_curthread));
944 if ((tmbx == NULL) || (tmbx == (void *)-1))
945 return 0;
946 if (uticks) {
947 addr = (caddr_t)tmbx + offsetof(struct kse_thr_mailbox, tm_uticks);
948 uticks += fuword(addr);
949 if (suword(addr, uticks))
950 goto bad;
951 }
952 if (sticks) {
953 addr = (caddr_t)tmbx + offsetof(struct kse_thr_mailbox, tm_sticks);
954 sticks += fuword(addr);
955 if (suword(addr, sticks))
956 goto bad;
957 }
958 return 0;
959 bad:
960 PROC_LOCK(p);
961 psignal(p, SIGSEGV);
962 PROC_UNLOCK(p);
963 return -1;
964 }
965
966 /*
967 * Discard the current thread and exit from its context.
968 *
969 * Because we can't free a thread while we're operating under its context,
970 * push the current thread into our CPU's deadthread holder. This means
971 * we needn't worry about someone else grabbing our context before we
972 * do a cpu_throw().
973 */
974 void
975 thread_exit(void)
976 {
977 struct thread *td;
978 struct kse *ke;
979 struct proc *p;
980 struct ksegrp *kg;
981
982 td = curthread;
983 kg = td->td_ksegrp;
984 p = td->td_proc;
985 ke = td->td_kse;
986
987 mtx_assert(&sched_lock, MA_OWNED);
988 KASSERT(p != NULL, ("thread exiting without a process"));
989 KASSERT(ke != NULL, ("thread exiting without a kse"));
990 KASSERT(kg != NULL, ("thread exiting without a kse group"));
991 PROC_LOCK_ASSERT(p, MA_OWNED);
992 CTR1(KTR_PROC, "thread_exit: thread %p", td);
993 KASSERT(!mtx_owned(&Giant), ("dying thread owns giant"));
994
995 if (td->td_standin != NULL) {
996 thread_stash(td->td_standin);
997 td->td_standin = NULL;
998 }
999
1000 cpu_thread_exit(td); /* XXXSMP */
1001
1002 /*
1003 * The last thread is left attached to the process
1004 * So that the whole bundle gets recycled. Skip
1005 * all this stuff.
1006 */
1007 if (p->p_numthreads > 1) {
1008 /*
1009 * Unlink this thread from its proc and the kseg.
1010 * In keeping with the other structs we probably should
1011 * have a thread_unlink() that does some of this but it
1012 * would only be called from here (I think) so it would
1013 * be a waste. (might be useful for proc_fini() as well.)
1014 */
1015 TAILQ_REMOVE(&p->p_threads, td, td_plist);
1016 p->p_numthreads--;
1017 TAILQ_REMOVE(&kg->kg_threads, td, td_kglist);
1018 kg->kg_numthreads--;
1019 /*
1020 * The test below is NOT true if we are the
1021 * sole exiting thread. P_STOPPED_SNGL is unset
1022 * in exit1() after it is the only survivor.
1023 */
1024 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
1025 if (p->p_numthreads == p->p_suspcount) {
1026 thread_unsuspend_one(p->p_singlethread);
1027 }
1028 }
1029
1030 /* Reassign this thread's KSE. */
1031 ke->ke_thread = NULL;
1032 td->td_kse = NULL;
1033 ke->ke_state = KES_UNQUEUED;
1034 KASSERT((ke->ke_bound != td),
1035 ("thread_exit: entered with ke_bound set"));
1036
1037 /*
1038 * decide what to do with the KSE attached to this thread.
1039 */
1040 if (ke->ke_flags & KEF_EXIT) {
1041 kse_unlink(ke);
1042 } else {
1043 kse_reassign(ke);
1044 }
1045 PROC_UNLOCK(p);
1046 td->td_state = TDS_INACTIVE;
1047 td->td_proc = NULL;
1048 td->td_ksegrp = NULL;
1049 td->td_last_kse = NULL;
1050 PCPU_SET(deadthread, td);
1051 } else {
1052 PROC_UNLOCK(p);
1053 }
1054 cpu_throw();
1055 /* NOTREACHED */
1056 }
1057
1058 /*
1059 * Do any thread specific cleanups that may be needed in wait()
1060 * called with Giant held, proc and schedlock not held.
1061 */
1062 void
1063 thread_wait(struct proc *p)
1064 {
1065 struct thread *td;
1066
1067 KASSERT((p->p_numthreads == 1), ("Muliple threads in wait1()"));
1068 KASSERT((p->p_numksegrps == 1), ("Muliple ksegrps in wait1()"));
1069 FOREACH_THREAD_IN_PROC(p, td) {
1070 if (td->td_standin != NULL) {
1071 thread_free(td->td_standin);
1072 td->td_standin = NULL;
1073 }
1074 cpu_thread_clean(td);
1075 }
1076 thread_reap(); /* check for zombie threads etc. */
1077 }
1078
1079 /*
1080 * Link a thread to a process.
1081 * set up anything that needs to be initialized for it to
1082 * be used by the process.
1083 *
1084 * Note that we do not link to the proc's ucred here.
1085 * The thread is linked as if running but no KSE assigned.
1086 */
1087 void
1088 thread_link(struct thread *td, struct ksegrp *kg)
1089 {
1090 struct proc *p;
1091
1092 p = kg->kg_proc;
1093 td->td_state = TDS_INACTIVE;
1094 td->td_proc = p;
1095 td->td_ksegrp = kg;
1096 td->td_last_kse = NULL;
1097
1098 LIST_INIT(&td->td_contested);
1099 callout_init(&td->td_slpcallout, 1);
1100 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
1101 TAILQ_INSERT_HEAD(&kg->kg_threads, td, td_kglist);
1102 p->p_numthreads++;
1103 kg->kg_numthreads++;
1104 td->td_kse = NULL;
1105 }
1106
1107 void
1108 kse_purge(struct proc *p, struct thread *td)
1109 {
1110 struct kse *ke;
1111 struct ksegrp *kg;
1112
1113 KASSERT(p->p_numthreads == 1, ("bad thread number"));
1114 mtx_lock_spin(&sched_lock);
1115 while ((kg = TAILQ_FIRST(&p->p_ksegrps)) != NULL) {
1116 while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) {
1117 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
1118 kg->kg_idle_kses--;
1119 TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist);
1120 kg->kg_kses--;
1121 kse_stash(ke);
1122 }
1123 TAILQ_REMOVE(&p->p_ksegrps, kg, kg_ksegrp);
1124 p->p_numksegrps--;
1125 KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) ||
1126 ((kg->kg_kses == 1) && (kg == td->td_ksegrp)),
1127 ("wrong kg_kses"));
1128 if (kg != td->td_ksegrp) {
1129 ksegrp_stash(kg);
1130 }
1131 }
1132 TAILQ_INSERT_HEAD(&p->p_ksegrps, td->td_ksegrp, kg_ksegrp);
1133 p->p_numksegrps++;
1134 mtx_unlock_spin(&sched_lock);
1135 }
1136
1137
1138 /*
1139 * Create a thread and schedule it for upcall on the KSE given.
1140 */
1141 struct thread *
1142 thread_schedule_upcall(struct thread *td, struct kse *ke)
1143 {
1144 struct thread *td2;
1145 struct ksegrp *kg;
1146 int newkse;
1147
1148 mtx_assert(&sched_lock, MA_OWNED);
1149 newkse = (ke != td->td_kse);
1150
1151 /*
1152 * If the kse is already owned by another thread then we can't
1153 * schedule an upcall because the other thread must be BOUND
1154 * which means it is not in a position to take an upcall.
1155 * We must be borrowing the KSE to allow us to complete some in-kernel
1156 * work. When we complete, the Bound thread will have teh chance to
1157 * complete. This thread will sleep as planned. Hopefully there will
1158 * eventually be un unbound thread that can be converted to an
1159 * upcall to report the completion of this thread.
1160 */
1161 if (ke->ke_bound && ((ke->ke_bound->td_flags & TDF_UNBOUND) == 0)) {
1162 return (NULL);
1163 }
1164 KASSERT((ke->ke_bound == NULL), ("kse already bound"));
1165
1166 if (ke->ke_state == KES_IDLE) {
1167 kg = ke->ke_ksegrp;
1168 TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
1169 kg->kg_idle_kses--;
1170 ke->ke_state = KES_UNQUEUED;
1171 }
1172 if ((td2 = td->td_standin) != NULL) {
1173 td->td_standin = NULL;
1174 } else {
1175 if (newkse)
1176 panic("no reserve thread when called with a new kse");
1177 /*
1178 * If called from (e.g.) sleep and we do not have
1179 * a reserve thread, then we've used it, so do not
1180 * create an upcall.
1181 */
1182 return (NULL);
1183 }
1184 CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
1185 td2, td->td_proc->p_pid, td->td_proc->p_comm);
1186 bzero(&td2->td_startzero,
1187 (unsigned)RANGEOF(struct thread, td_startzero, td_endzero));
1188 bcopy(&td->td_startcopy, &td2->td_startcopy,
1189 (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
1190 thread_link(td2, ke->ke_ksegrp);
1191 cpu_set_upcall(td2, td->td_pcb);
1192
1193 /*
1194 * XXXKSE do we really need this? (default values for the
1195 * frame).
1196 */
1197 bcopy(td->td_frame, td2->td_frame, sizeof(struct trapframe));
1198
1199 /*
1200 * Bind the new thread to the KSE,
1201 * and if it's our KSE, lend it back to ourself
1202 * so we can continue running.
1203 */
1204 td2->td_ucred = crhold(td->td_ucred);
1205 td2->td_flags = TDF_UPCALLING; /* note: BOUND */
1206 td2->td_kse = ke;
1207 td2->td_state = TDS_CAN_RUN;
1208 td2->td_inhibitors = 0;
1209 /*
1210 * If called from msleep(), we are working on the current
1211 * KSE so fake that we borrowed it. If called from
1212 * kse_create(), don't, as we have a new kse too.
1213 */
1214 if (!newkse) {
1215 /*
1216 * This thread will be scheduled when the current thread
1217 * blocks, exits or tries to enter userspace, (which ever
1218 * happens first). When that happens the KSe will "revert"
1219 * to this thread in a BOUND manner. Since we are called
1220 * from msleep() this is going to be "very soon" in nearly
1221 * all cases.
1222 */
1223 ke->ke_bound = td2;
1224 TD_SET_LOAN(td2);
1225 } else {
1226 ke->ke_bound = NULL;
1227 ke->ke_thread = td2;
1228 ke->ke_state = KES_THREAD;
1229 setrunqueue(td2);
1230 }
1231 return (td2); /* bogus.. should be a void function */
1232 }
1233
1234 /*
1235 * Schedule an upcall to notify a KSE process recieved signals.
1236 *
1237 * XXX - Modifying a sigset_t like this is totally bogus.
1238 */
1239 struct thread *
1240 signal_upcall(struct proc *p, int sig)
1241 {
1242 struct thread *td, *td2;
1243 struct kse *ke;
1244 sigset_t ss;
1245 int error;
1246
1247 PROC_LOCK_ASSERT(p, MA_OWNED);
1248 return (NULL);
1249
1250 td = FIRST_THREAD_IN_PROC(p);
1251 ke = td->td_kse;
1252 PROC_UNLOCK(p);
1253 error = copyin(&ke->ke_mailbox->km_sigscaught, &ss, sizeof(sigset_t));
1254 PROC_LOCK(p);
1255 if (error)
1256 return (NULL);
1257 SIGADDSET(ss, sig);
1258 PROC_UNLOCK(p);
1259 error = copyout(&ss, &ke->ke_mailbox->km_sigscaught, sizeof(sigset_t));
1260 PROC_LOCK(p);
1261 if (error)
1262 return (NULL);
1263 if (td->td_standin == NULL)
1264 td->td_standin = thread_alloc();
1265 mtx_lock_spin(&sched_lock);
1266 td2 = thread_schedule_upcall(td, ke); /* Bogus JRE */
1267 mtx_unlock_spin(&sched_lock);
1268 return (td2);
1269 }
1270
1271 /*
1272 * setup done on the thread when it enters the kernel.
1273 * XXXKSE Presently only for syscalls but eventually all kernel entries.
1274 */
1275 void
1276 thread_user_enter(struct proc *p, struct thread *td)
1277 {
1278 struct kse *ke;
1279
1280 /*
1281 * First check that we shouldn't just abort.
1282 * But check if we are the single thread first!
1283 * XXX p_singlethread not locked, but should be safe.
1284 */
1285 if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
1286 PROC_LOCK(p);
1287 mtx_lock_spin(&sched_lock);
1288 thread_exit();
1289 /* NOTREACHED */
1290 }
1291
1292 /*
1293 * If we are doing a syscall in a KSE environment,
1294 * note where our mailbox is. There is always the
1295 * possibility that we could do this lazily (in sleep()),
1296 * but for now do it every time.
1297 */
1298 ke = td->td_kse;
1299 if (ke->ke_mailbox != NULL) {
1300 #if 0
1301 td->td_mailbox = (void *)fuword((caddr_t)ke->ke_mailbox
1302 + offsetof(struct kse_mailbox, km_curthread));
1303 #else /* if user pointer arithmetic is ok in the kernel */
1304 td->td_mailbox =
1305 (void *)fuword( (void *)&ke->ke_mailbox->km_curthread);
1306 #endif
1307 if ((td->td_mailbox == NULL) ||
1308 (td->td_mailbox == (void *)-1)) {
1309 td->td_mailbox = NULL; /* single thread it.. */
1310 mtx_lock_spin(&sched_lock);
1311 td->td_flags &= ~TDF_UNBOUND;
1312 mtx_unlock_spin(&sched_lock);
1313 } else {
1314 /*
1315 * when thread limit reached, act like that the thread
1316 * has already done an upcall.
1317 */
1318 if (p->p_numthreads > max_threads_per_proc) {
1319 if (td->td_standin != NULL)
1320 thread_stash(td->td_standin);
1321 td->td_standin = NULL;
1322 } else {
1323 if (td->td_standin == NULL)
1324 td->td_standin = thread_alloc();
1325 }
1326 mtx_lock_spin(&sched_lock);
1327 td->td_flags |= TDF_UNBOUND;
1328 mtx_unlock_spin(&sched_lock);
1329 td->td_usticks = 0;
1330 }
1331 }
1332 }
1333
1334 /*
1335 * The extra work we go through if we are a threaded process when we
1336 * return to userland.
1337 *
1338 * If we are a KSE process and returning to user mode, check for
1339 * extra work to do before we return (e.g. for more syscalls
1340 * to complete first). If we were in a critical section, we should
1341 * just return to let it finish. Same if we were in the UTS (in
1342 * which case the mailbox's context's busy indicator will be set).
1343 * The only traps we suport will have set the mailbox.
1344 * We will clear it here.
1345 */
1346 int
1347 thread_userret(struct thread *td, struct trapframe *frame)
1348 {
1349 int error;
1350 int unbound;
1351 struct kse *ke;
1352 struct ksegrp *kg;
1353 struct thread *td2;
1354 struct proc *p;
1355 struct timespec ts;
1356
1357 error = 0;
1358
1359 unbound = td->td_flags & TDF_UNBOUND;
1360
1361 kg = td->td_ksegrp;
1362 p = td->td_proc;
1363
1364 /*
1365 * Originally bound threads never upcall but they may
1366 * loan out their KSE at this point.
1367 * Upcalls imply bound.. They also may want to do some Philantropy.
1368 * Unbound threads on the other hand either yield to other work
1369 * or transform into an upcall.
1370 * (having saved their context to user space in both cases)
1371 */
1372 if (unbound) {
1373 /*
1374 * We are an unbound thread, looking to return to
1375 * user space.
1376 * THere are several possibilities:
1377 * 1) we are using a borrowed KSE. save state and exit.
1378 * kse_reassign() will recycle the kse as needed,
1379 * 2) we are not.. save state, and then convert ourself
1380 * to be an upcall, bound to the KSE.
1381 * if there are others that need the kse,
1382 * give them a chance by doing an mi_switch().
1383 * Because we are bound, control will eventually return
1384 * to us here.
1385 * ***
1386 * Save the thread's context, and link it
1387 * into the KSEGRP's list of completed threads.
1388 */
1389 error = thread_export_context(td);
1390 td->td_mailbox = NULL;
1391 td->td_usticks = 0;
1392 if (error) {
1393 /*
1394 * If we are not running on a borrowed KSE, then
1395 * failing to do the KSE operation just defaults
1396 * back to synchonous operation, so just return from
1397 * the syscall. If it IS borrowed, there is nothing
1398 * we can do. We just lose that context. We
1399 * probably should note this somewhere and send
1400 * the process a signal.
1401 */
1402 PROC_LOCK(td->td_proc);
1403 psignal(td->td_proc, SIGSEGV);
1404 mtx_lock_spin(&sched_lock);
1405 if (td->td_kse->ke_bound == NULL) {
1406 td->td_flags &= ~TDF_UNBOUND;
1407 PROC_UNLOCK(td->td_proc);
1408 mtx_unlock_spin(&sched_lock);
1409 thread_update_uticks();
1410 return (error); /* go sync */
1411 }
1412 thread_exit();
1413 }
1414
1415 /*
1416 * if the KSE is owned and we are borrowing it,
1417 * don't make an upcall, just exit so that the owner
1418 * can get its KSE if it wants it.
1419 * Our context is already safely stored for later
1420 * use by the UTS.
1421 */
1422 PROC_LOCK(p);
1423 mtx_lock_spin(&sched_lock);
1424 if (td->td_kse->ke_bound) {
1425 thread_exit();
1426 }
1427 PROC_UNLOCK(p);
1428
1429 /*
1430 * Turn ourself into a bound upcall.
1431 * We will rely on kse_reassign()
1432 * to make us run at a later time.
1433 * We should look just like a sheduled upcall
1434 * from msleep() or cv_wait().
1435 */
1436 td->td_flags &= ~TDF_UNBOUND;
1437 td->td_flags |= TDF_UPCALLING;
1438 /* Only get here if we have become an upcall */
1439
1440 } else {
1441 mtx_lock_spin(&sched_lock);
1442 }
1443 /*
1444 * We ARE going back to userland with this KSE.
1445 * Check for threads that need to borrow it.
1446 * Optimisation: don't call mi_switch if no-one wants the KSE.
1447 * Any other thread that comes ready after this missed the boat.
1448 */
1449 ke = td->td_kse;
1450 if ((td2 = kg->kg_last_assigned))
1451 td2 = TAILQ_NEXT(td2, td_runq);
1452 else
1453 td2 = TAILQ_FIRST(&kg->kg_runq);
1454 if (td2) {
1455 /*
1456 * force a switch to more urgent 'in kernel'
1457 * work. Control will return to this thread
1458 * when there is no more work to do.
1459 * kse_reassign() will do tha for us.
1460 */
1461 TD_SET_LOAN(td);
1462 ke->ke_bound = td;
1463 ke->ke_thread = NULL;
1464 p->p_stats->p_ru.ru_nvcsw++;
1465 mi_switch(); /* kse_reassign() will (re)find td2 */
1466 }
1467 mtx_unlock_spin(&sched_lock);
1468
1469 /*
1470 * Optimisation:
1471 * Ensure that we have a spare thread available,
1472 * for when we re-enter the kernel.
1473 */
1474 if (td->td_standin == NULL) {
1475 td->td_standin = thread_alloc();
1476 }
1477
1478 thread_update_uticks();
1479 /*
1480 * To get here, we know there is no other need for our
1481 * KSE so we can proceed. If not upcalling, go back to
1482 * userspace. If we are, get the upcall set up.
1483 */
1484 if ((td->td_flags & TDF_UPCALLING) == 0)
1485 return (0);
1486
1487 /*
1488 * We must be an upcall to get this far.
1489 * There is no more work to do and we are going to ride
1490 * this thead/KSE up to userland as an upcall.
1491 * Do the last parts of the setup needed for the upcall.
1492 */
1493 CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
1494 td, td->td_proc->p_pid, td->td_proc->p_comm);
1495
1496 /*
1497 * Set user context to the UTS.
1498 * Will use Giant in cpu_thread_clean() because it uses
1499 * kmem_free(kernel_map, ...)
1500 */
1501 cpu_set_upcall_kse(td, ke);
1502
1503 /*
1504 * Put any completed mailboxes on this KSE's list.
1505 */
1506 error = thread_link_mboxes(kg, ke);
1507 if (error)
1508 goto bad;
1509
1510 /*
1511 * Set state and mailbox.
1512 * From now on we are just a bound outgoing process.
1513 * **Problem** userret is often called several times.
1514 * it would be nice if this all happenned only on the first time
1515 * through. (the scan for extra work etc.)
1516 */
1517 mtx_lock_spin(&sched_lock);
1518 td->td_flags &= ~TDF_UPCALLING;
1519 mtx_unlock_spin(&sched_lock);
1520 #if 0
1521 error = suword((caddr_t)ke->ke_mailbox +
1522 offsetof(struct kse_mailbox, km_curthread), 0);
1523 #else /* if user pointer arithmetic is ok in the kernel */
1524 error = suword((caddr_t)&ke->ke_mailbox->km_curthread, 0);
1525 #endif
1526 ke->ke_uuticks = ke->ke_usticks = 0;
1527 if (!error) {
1528 nanotime(&ts);
1529 if (copyout(&ts, (caddr_t)&ke->ke_mailbox->km_timeofday,
1530 sizeof(ts))) {
1531 goto bad;
1532 }
1533 }
1534 return (0);
1535
1536 bad:
1537 /*
1538 * Things are going to be so screwed we should just kill the process.
1539 * how do we do that?
1540 */
1541 PROC_LOCK(td->td_proc);
1542 psignal(td->td_proc, SIGSEGV);
1543 PROC_UNLOCK(td->td_proc);
1544 return (error); /* go sync */
1545 }
1546
1547 /*
1548 * Enforce single-threading.
1549 *
1550 * Returns 1 if the caller must abort (another thread is waiting to
1551 * exit the process or similar). Process is locked!
1552 * Returns 0 when you are successfully the only thread running.
1553 * A process has successfully single threaded in the suspend mode when
1554 * There are no threads in user mode. Threads in the kernel must be
1555 * allowed to continue until they get to the user boundary. They may even
1556 * copy out their return values and data before suspending. They may however be
1557 * accellerated in reaching the user boundary as we will wake up
1558 * any sleeping threads that are interruptable. (PCATCH).
1559 */
1560 int
1561 thread_single(int force_exit)
1562 {
1563 struct thread *td;
1564 struct thread *td2;
1565 struct proc *p;
1566
1567 td = curthread;
1568 p = td->td_proc;
1569 mtx_assert(&Giant, MA_OWNED);
1570 PROC_LOCK_ASSERT(p, MA_OWNED);
1571 KASSERT((td != NULL), ("curthread is NULL"));
1572
1573 if ((p->p_flag & P_KSES) == 0)
1574 return (0);
1575
1576 /* Is someone already single threading? */
1577 if (p->p_singlethread)
1578 return (1);
1579
1580 if (force_exit == SINGLE_EXIT)
1581 p->p_flag |= P_SINGLE_EXIT;
1582 else
1583 p->p_flag &= ~P_SINGLE_EXIT;
1584 p->p_flag |= P_STOPPED_SINGLE;
1585 p->p_singlethread = td;
1586 /* XXXKSE Which lock protects the below values? */
1587 while ((p->p_numthreads - p->p_suspcount) != 1) {
1588 mtx_lock_spin(&sched_lock);
1589 FOREACH_THREAD_IN_PROC(p, td2) {
1590 if (td2 == td)
1591 continue;
1592 if (TD_IS_INHIBITED(td2)) {
1593 if (force_exit == SINGLE_EXIT) {
1594 if (TD_IS_SUSPENDED(td2)) {
1595 thread_unsuspend_one(td2);
1596 }
1597 if (TD_ON_SLEEPQ(td2) &&
1598 (td2->td_flags & TDF_SINTR)) {
1599 if (td2->td_flags & TDF_CVWAITQ)
1600 cv_abort(td2);
1601 else
1602 abortsleep(td2);
1603 }
1604 } else {
1605 if (TD_IS_SUSPENDED(td2))
1606 continue;
1607 /* maybe other inhibitted states too? */
1608 if (TD_IS_SLEEPING(td2))
1609 thread_suspend_one(td2);
1610 }
1611 }
1612 }
1613 /*
1614 * Maybe we suspended some threads.. was it enough?
1615 */
1616 if ((p->p_numthreads - p->p_suspcount) == 1) {
1617 mtx_unlock_spin(&sched_lock);
1618 break;
1619 }
1620
1621 /*
1622 * Wake us up when everyone else has suspended.
1623 * In the mean time we suspend as well.
1624 */
1625 thread_suspend_one(td);
1626 mtx_unlock(&Giant);
1627 PROC_UNLOCK(p);
1628 p->p_stats->p_ru.ru_nvcsw++;
1629 mi_switch();
1630 mtx_unlock_spin(&sched_lock);
1631 mtx_lock(&Giant);
1632 PROC_LOCK(p);
1633 }
1634 if (force_exit == SINGLE_EXIT)
1635 kse_purge(p, td);
1636 return (0);
1637 }
1638
1639 /*
1640 * Called in from locations that can safely check to see
1641 * whether we have to suspend or at least throttle for a
1642 * single-thread event (e.g. fork).
1643 *
1644 * Such locations include userret().
1645 * If the "return_instead" argument is non zero, the thread must be able to
1646 * accept 0 (caller may continue), or 1 (caller must abort) as a result.
1647 *
1648 * The 'return_instead' argument tells the function if it may do a
1649 * thread_exit() or suspend, or whether the caller must abort and back
1650 * out instead.
1651 *
1652 * If the thread that set the single_threading request has set the
1653 * P_SINGLE_EXIT bit in the process flags then this call will never return
1654 * if 'return_instead' is false, but will exit.
1655 *
1656 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
1657 *---------------+--------------------+---------------------
1658 * 0 | returns 0 | returns 0 or 1
1659 * | when ST ends | immediatly
1660 *---------------+--------------------+---------------------
1661 * 1 | thread exits | returns 1
1662 * | | immediatly
1663 * 0 = thread_exit() or suspension ok,
1664 * other = return error instead of stopping the thread.
1665 *
1666 * While a full suspension is under effect, even a single threading
1667 * thread would be suspended if it made this call (but it shouldn't).
1668 * This call should only be made from places where
1669 * thread_exit() would be safe as that may be the outcome unless
1670 * return_instead is set.
1671 */
1672 int
1673 thread_suspend_check(int return_instead)
1674 {
1675 struct thread *td;
1676 struct proc *p;
1677 struct kse *ke;
1678 struct ksegrp *kg;
1679
1680 td = curthread;
1681 p = td->td_proc;
1682 kg = td->td_ksegrp;
1683 PROC_LOCK_ASSERT(p, MA_OWNED);
1684 while (P_SHOULDSTOP(p)) {
1685 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
1686 KASSERT(p->p_singlethread != NULL,
1687 ("singlethread not set"));
1688 /*
1689 * The only suspension in action is a
1690 * single-threading. Single threader need not stop.
1691 * XXX Should be safe to access unlocked
1692 * as it can only be set to be true by us.
1693 */
1694 if (p->p_singlethread == td)
1695 return (0); /* Exempt from stopping. */
1696 }
1697 if (return_instead)
1698 return (1);
1699
1700 /*
1701 * If the process is waiting for us to exit,
1702 * this thread should just suicide.
1703 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
1704 */
1705 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
1706 mtx_lock_spin(&sched_lock);
1707 while (mtx_owned(&Giant))
1708 mtx_unlock(&Giant);
1709 /*
1710 * free extra kses and ksegrps, we needn't worry
1711 * about if current thread is in same ksegrp as
1712 * p_singlethread and last kse in the group
1713 * could be killed, this is protected by kg_numthreads,
1714 * in this case, we deduce that kg_numthreads must > 1.
1715 */
1716 ke = td->td_kse;
1717 if (ke->ke_bound == NULL &&
1718 ((kg->kg_kses != 1) || (kg->kg_numthreads == 1)))
1719 ke->ke_flags |= KEF_EXIT;
1720 thread_exit();
1721 }
1722
1723 /*
1724 * When a thread suspends, it just
1725 * moves to the processes's suspend queue
1726 * and stays there.
1727 *
1728 * XXXKSE if TDF_BOUND is true
1729 * it will not release it's KSE which might
1730 * lead to deadlock if there are not enough KSEs
1731 * to complete all waiting threads.
1732 * Maybe be able to 'lend' it out again.
1733 * (lent kse's can not go back to userland?)
1734 * and can only be lent in STOPPED state.
1735 */
1736 mtx_lock_spin(&sched_lock);
1737 if ((p->p_flag & P_STOPPED_SIG) &&
1738 (p->p_suspcount+1 == p->p_numthreads)) {
1739 mtx_unlock_spin(&sched_lock);
1740 PROC_LOCK(p->p_pptr);
1741 if ((p->p_pptr->p_procsig->ps_flag &
1742 PS_NOCLDSTOP) == 0) {
1743 psignal(p->p_pptr, SIGCHLD);
1744 }
1745 PROC_UNLOCK(p->p_pptr);
1746 mtx_lock_spin(&sched_lock);
1747 }
1748 mtx_assert(&Giant, MA_NOTOWNED);
1749 thread_suspend_one(td);
1750 PROC_UNLOCK(p);
1751 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
1752 if (p->p_numthreads == p->p_suspcount) {
1753 thread_unsuspend_one(p->p_singlethread);
1754 }
1755 }
1756 p->p_stats->p_ru.ru_nivcsw++;
1757 mi_switch();
1758 mtx_unlock_spin(&sched_lock);
1759 PROC_LOCK(p);
1760 }
1761 return (0);
1762 }
1763
1764 void
1765 thread_suspend_one(struct thread *td)
1766 {
1767 struct proc *p = td->td_proc;
1768
1769 mtx_assert(&sched_lock, MA_OWNED);
1770 p->p_suspcount++;
1771 TD_SET_SUSPENDED(td);
1772 TAILQ_INSERT_TAIL(&p->p_suspended, td, td_runq);
1773 /*
1774 * Hack: If we are suspending but are on the sleep queue
1775 * then we are in msleep or the cv equivalent. We
1776 * want to look like we have two Inhibitors.
1777 * May already be set.. doesn't matter.
1778 */
1779 if (TD_ON_SLEEPQ(td))
1780 TD_SET_SLEEPING(td);
1781 }
1782
1783 void
1784 thread_unsuspend_one(struct thread *td)
1785 {
1786 struct proc *p = td->td_proc;
1787
1788 mtx_assert(&sched_lock, MA_OWNED);
1789 TAILQ_REMOVE(&p->p_suspended, td, td_runq);
1790 TD_CLR_SUSPENDED(td);
1791 p->p_suspcount--;
1792 setrunnable(td);
1793 }
1794
1795 /*
1796 * Allow all threads blocked by single threading to continue running.
1797 */
1798 void
1799 thread_unsuspend(struct proc *p)
1800 {
1801 struct thread *td;
1802
1803 mtx_assert(&sched_lock, MA_OWNED);
1804 PROC_LOCK_ASSERT(p, MA_OWNED);
1805 if (!P_SHOULDSTOP(p)) {
1806 while (( td = TAILQ_FIRST(&p->p_suspended))) {
1807 thread_unsuspend_one(td);
1808 }
1809 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
1810 (p->p_numthreads == p->p_suspcount)) {
1811 /*
1812 * Stopping everything also did the job for the single
1813 * threading request. Now we've downgraded to single-threaded,
1814 * let it continue.
1815 */
1816 thread_unsuspend_one(p->p_singlethread);
1817 }
1818 }
1819
1820 void
1821 thread_single_end(void)
1822 {
1823 struct thread *td;
1824 struct proc *p;
1825
1826 td = curthread;
1827 p = td->td_proc;
1828 PROC_LOCK_ASSERT(p, MA_OWNED);
1829 p->p_flag &= ~P_STOPPED_SINGLE;
1830 p->p_singlethread = NULL;
1831 /*
1832 * If there are other threads they mey now run,
1833 * unless of course there is a blanket 'stop order'
1834 * on the process. The single threader must be allowed
1835 * to continue however as this is a bad place to stop.
1836 */
1837 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
1838 mtx_lock_spin(&sched_lock);
1839 while (( td = TAILQ_FIRST(&p->p_suspended))) {
1840 thread_unsuspend_one(td);
1841 }
1842 mtx_unlock_spin(&sched_lock);
1843 }
1844 }
1845
1846
Cache object: ca2f53629beb9ce5f1088f07ae4486e9
|