kern_kse.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
    3  *  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice(s), this list of conditions and the following disclaimer as
   10  *    the first lines of this file unmodified other than the possible
   11  *    addition of one or more copyright notices.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice(s), this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
   17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   19  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
   20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
   26  * DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD: releng/5.4/sys/kern/kern_kse.c 145335 2005-04-20 19:11:07Z cvs2svn $");
   31 
   32 #include <sys/param.h>
   33 #include <sys/systm.h>
   34 #include <sys/kernel.h>
   35 #include <sys/lock.h>
   36 #include <sys/mutex.h>
   37 #include <sys/proc.h>
   38 #include <sys/ptrace.h>
   39 #include <sys/smp.h>
   40 #include <sys/sysproto.h>
   41 #include <sys/sched.h>
   42 #include <sys/signalvar.h>
   43 #include <sys/sleepqueue.h>
   44 #include <sys/kse.h>
   45 #include <sys/ktr.h>
   46 #include <vm/uma.h>
   47 
   48 /*
   49  * KSEGRP related storage.
   50  */
   51 static uma_zone_t upcall_zone;
   52 
   53 /* DEBUG ONLY */
   54 extern int virtual_cpu;
   55 extern int thread_debug;
   56 
   57 extern int max_threads_per_proc;
   58 extern int max_groups_per_proc;
   59 extern int max_threads_hits;
   60 extern struct mtx kse_zombie_lock;
   61 
   62 
   63 TAILQ_HEAD(, kse_upcall) zombie_upcalls =
   64         TAILQ_HEAD_INITIALIZER(zombie_upcalls);
   65 
   66 static int thread_update_usr_ticks(struct thread *td);
   67 static void thread_alloc_spare(struct thread *td);
   68 
   69 struct kse_upcall *
   70 upcall_alloc(void)
   71 {
   72         struct kse_upcall *ku;
   73 
   74         ku = uma_zalloc(upcall_zone, M_WAITOK | M_ZERO);
   75         return (ku);
   76 }
   77 
   78 void
   79 upcall_free(struct kse_upcall *ku)
   80 {
   81 
   82         uma_zfree(upcall_zone, ku);
   83 }
   84 
   85 void
   86 upcall_link(struct kse_upcall *ku, struct ksegrp *kg)
   87 {
   88 
   89         mtx_assert(&sched_lock, MA_OWNED);
   90         TAILQ_INSERT_TAIL(&kg->kg_upcalls, ku, ku_link);
   91         ku->ku_ksegrp = kg;
   92         kg->kg_numupcalls++;
   93 }
   94 
   95 void
   96 upcall_unlink(struct kse_upcall *ku)
   97 {
   98         struct ksegrp *kg = ku->ku_ksegrp;
   99 
  100         mtx_assert(&sched_lock, MA_OWNED);
  101         KASSERT(ku->ku_owner == NULL, ("%s: have owner", __func__));
  102         TAILQ_REMOVE(&kg->kg_upcalls, ku, ku_link);
  103         kg->kg_numupcalls--;
  104         upcall_stash(ku);
  105 }
  106 
  107 void
  108 upcall_remove(struct thread *td)
  109 {
  110 
  111         if (td->td_upcall) {
  112                 td->td_upcall->ku_owner = NULL;
  113                 upcall_unlink(td->td_upcall);
  114                 td->td_upcall = 0;
  115         }
  116 }
  117 
  118 #ifndef _SYS_SYSPROTO_H_
  119 struct kse_switchin_args {
  120         struct kse_thr_mailbox *tmbx;
  121         int flags;
  122 };
  123 #endif
  124 
  125 int
  126 kse_switchin(struct thread *td, struct kse_switchin_args *uap)
  127 {
  128         struct kse_thr_mailbox tmbx;
  129         struct kse_upcall *ku;
  130         int error;
  131 
  132         if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
  133                 return (EINVAL);
  134         error = (uap->tmbx == NULL) ? EINVAL : 0;
  135         if (!error)
  136                 error = copyin(uap->tmbx, &tmbx, sizeof(tmbx));
  137         if (!error && (uap->flags & KSE_SWITCHIN_SETTMBX))
  138                 error = (suword(&ku->ku_mailbox->km_curthread,
  139                          (long)uap->tmbx) != 0 ? EINVAL : 0);
  140         if (!error)
  141                 error = set_mcontext(td, &tmbx.tm_context.uc_mcontext);
  142         if (!error) {
  143                 suword32(&uap->tmbx->tm_lwp, td->td_tid);
  144                 if (uap->flags & KSE_SWITCHIN_SETTMBX) {
  145                         td->td_mailbox = uap->tmbx;
  146                         td->td_pflags |= TDP_CAN_UNBIND;
  147                 }
  148                 if (td->td_proc->p_flag & P_TRACED) {
  149                         if (tmbx.tm_dflags & TMDF_SSTEP)
  150                                 ptrace_single_step(td);
  151                         else
  152                                 ptrace_clear_single_step(td);
  153                         if (tmbx.tm_dflags & TMDF_SUSPEND) {
  154                                 mtx_lock_spin(&sched_lock);
  155                                 /* fuword can block, check again */
  156                                 if (td->td_upcall)
  157                                         ku->ku_flags |= KUF_DOUPCALL;
  158                                 mtx_unlock_spin(&sched_lock);
  159                         }
  160                 }
  161         }
  162         return ((error == 0) ? EJUSTRETURN : error);
  163 }
  164 
  165 /*
  166 struct kse_thr_interrupt_args {
  167         struct kse_thr_mailbox * tmbx;
  168         int cmd;
  169         long data;
  170 };
  171 */
  172 int
  173 kse_thr_interrupt(struct thread *td, struct kse_thr_interrupt_args *uap)
  174 {
  175         struct proc *p;
  176         struct thread *td2;
  177         struct kse_upcall *ku;
  178         struct kse_thr_mailbox *tmbx;
  179         uint32_t flags;
  180 
  181         p = td->td_proc;
  182 
  183         if (!(p->p_flag & P_SA))
  184                 return (EINVAL);
  185 
  186         switch (uap->cmd) {
  187         case KSE_INTR_SENDSIG:
  188                 if (uap->data < 0 || uap->data > _SIG_MAXSIG)
  189                         return (EINVAL);
  190         case KSE_INTR_INTERRUPT:
  191         case KSE_INTR_RESTART:
  192                 PROC_LOCK(p);
  193                 mtx_lock_spin(&sched_lock);
  194                 FOREACH_THREAD_IN_PROC(p, td2) {
  195                         if (td2->td_mailbox == uap->tmbx)
  196                                 break;
  197                 }
  198                 if (td2 == NULL) {
  199                         mtx_unlock_spin(&sched_lock);
  200                         PROC_UNLOCK(p);
  201                         return (ESRCH);
  202                 }
  203                 if (uap->cmd == KSE_INTR_SENDSIG) {
  204                         if (uap->data > 0) {
  205                                 td2->td_flags &= ~TDF_INTERRUPT;
  206                                 mtx_unlock_spin(&sched_lock);
  207                                 tdsignal(td2, (int)uap->data, SIGTARGET_TD);
  208                         } else {
  209                                 mtx_unlock_spin(&sched_lock);
  210                         }
  211                 } else {
  212                         td2->td_flags |= TDF_INTERRUPT | TDF_ASTPENDING;
  213                         if (TD_CAN_UNBIND(td2))
  214                                 td2->td_upcall->ku_flags |= KUF_DOUPCALL;
  215                         if (uap->cmd == KSE_INTR_INTERRUPT)
  216                                 td2->td_intrval = EINTR;
  217                         else
  218                                 td2->td_intrval = ERESTART;
  219                         if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR))
  220                                 sleepq_abort(td2);
  221                         mtx_unlock_spin(&sched_lock);
  222                 }
  223                 PROC_UNLOCK(p);
  224                 break;
  225         case KSE_INTR_SIGEXIT:
  226                 if (uap->data < 1 || uap->data > _SIG_MAXSIG)
  227                         return (EINVAL);
  228                 PROC_LOCK(p);
  229                 sigexit(td, (int)uap->data);
  230                 break;
  231 
  232         case KSE_INTR_DBSUSPEND:
  233                 /* this sub-function is only for bound thread */
  234                 if (td->td_pflags & TDP_SA)
  235                         return (EINVAL);
  236                 ku = td->td_upcall;
  237                 tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
  238                 if (tmbx == NULL || tmbx == (void *)-1)
  239                         return (EINVAL);
  240                 flags = 0;
  241                 while ((p->p_flag & P_TRACED) && !(p->p_flag & P_SINGLE_EXIT)) {
  242                         flags = fuword32(&tmbx->tm_dflags);
  243                         if (!(flags & TMDF_SUSPEND))
  244                                 break;
  245                         PROC_LOCK(p);
  246                         mtx_lock_spin(&sched_lock);
  247                         thread_stopped(p);
  248                         thread_suspend_one(td);
  249                         PROC_UNLOCK(p);
  250                         mi_switch(SW_VOL, NULL);
  251                         mtx_unlock_spin(&sched_lock);
  252                 }
  253                 return (0);
  254 
  255         default:
  256                 return (EINVAL);
  257         }
  258         return (0);
  259 }
  260 
  261 /*
  262 struct kse_exit_args {
  263         register_t dummy;
  264 };
  265 */
  266 int
  267 kse_exit(struct thread *td, struct kse_exit_args *uap)
  268 {
  269         struct proc *p;
  270         struct ksegrp *kg;
  271         struct kse_upcall *ku, *ku2;
  272         int    error, count;
  273 
  274         p = td->td_proc;
  275         /* 
  276          * Ensure that this is only called from the UTS
  277          */
  278         if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
  279                 return (EINVAL);
  280 
  281         kg = td->td_ksegrp;
  282         count = 0;
  283 
  284         /*
  285          * Calculate the existing non-exiting upcalls in this ksegroup.
  286          * If we are the last upcall but there are still other threads,
  287          * then do not exit. We need the other threads to be able to 
  288          * complete whatever they are doing.
  289          * XXX This relies on the userland knowing what to do if we return.
  290          * It may be a better choice to convert ourselves into a kse_release
  291          * ( or similar) and wait in the kernel to be needed.
  292          */
  293         PROC_LOCK(p);
  294         mtx_lock_spin(&sched_lock);
  295         FOREACH_UPCALL_IN_GROUP(kg, ku2) {
  296                 if (ku2->ku_flags & KUF_EXITING)
  297                         count++;
  298         }
  299         if ((kg->kg_numupcalls - count) == 1 &&
  300             (kg->kg_numthreads > 1)) {
  301                 mtx_unlock_spin(&sched_lock);
  302                 PROC_UNLOCK(p);
  303                 return (EDEADLK);
  304         }
  305         ku->ku_flags |= KUF_EXITING;
  306         mtx_unlock_spin(&sched_lock);
  307         PROC_UNLOCK(p);
  308 
  309         /* 
  310          * Mark the UTS mailbox as having been finished with.
  311          * If that fails then just go for a segfault.
  312          * XXX need to check it that can be deliverred without a mailbox.
  313          */
  314         error = suword32(&ku->ku_mailbox->km_flags, ku->ku_mflags|KMF_DONE);
  315         if (!(td->td_pflags & TDP_SA))
  316                 if (suword32(&td->td_mailbox->tm_lwp, 0))
  317                         error = EFAULT;
  318         PROC_LOCK(p);
  319         if (error)
  320                 psignal(p, SIGSEGV);
  321         mtx_lock_spin(&sched_lock);
  322         upcall_remove(td);
  323         if (p->p_numthreads != 1) {
  324                 /*
  325                  * If we are not the last thread, but we are the last
  326                  * thread in this ksegrp, then by definition this is not
  327                  * the last group and we need to clean it up as well.
  328                  * thread_exit will clean up the kseg as needed.
  329                  */
  330                 thread_stopped(p);
  331                 thread_exit();
  332                 /* NOTREACHED */
  333         }
  334         /*
  335          * This is the last thread. Just return to the user.
  336          * We know that there is only one ksegrp too, as any others
  337          * would have been discarded in previous calls to thread_exit().
  338          * Effectively we have left threading mode..
  339          * The only real thing left to do is ensure that the
  340          * scheduler sets out concurrency back to 1 as that may be a
  341          * resource leak otherwise.
  342          * This is an A[PB]I issue.. what SHOULD we do?
  343          * One possibility is to return to the user. It may not cope well.
  344          * The other possibility would be to let the process exit.
  345          */
  346         thread_unthread(td);
  347         mtx_unlock_spin(&sched_lock);
  348         PROC_UNLOCK(p);
  349 #if 1
  350         return (0);
  351 #else
  352         exit1(td, 0);
  353 #endif
  354 }
  355 
  356 /*
  357  * Either becomes an upcall or waits for an awakening event and
  358  * then becomes an upcall. Only error cases return.
  359  */
  360 /*
  361 struct kse_release_args {
  362         struct timespec *timeout;
  363 };
  364 */
  365 int
  366 kse_release(struct thread *td, struct kse_release_args *uap)
  367 {
  368         struct proc *p;
  369         struct ksegrp *kg;
  370         struct kse_upcall *ku;
  371         struct timespec timeout;
  372         struct timeval tv;
  373         sigset_t sigset;
  374         int error;
  375 
  376         p = td->td_proc;
  377         kg = td->td_ksegrp;
  378         if ((ku = td->td_upcall) == NULL || TD_CAN_UNBIND(td))
  379                 return (EINVAL);
  380         if (uap->timeout != NULL) {
  381                 if ((error = copyin(uap->timeout, &timeout, sizeof(timeout))))
  382                         return (error);
  383                 TIMESPEC_TO_TIMEVAL(&tv, &timeout);
  384         }
  385         if (td->td_pflags & TDP_SA)
  386                 td->td_pflags |= TDP_UPCALLING;
  387         else {
  388                 ku->ku_mflags = fuword32(&ku->ku_mailbox->km_flags);
  389                 if (ku->ku_mflags == -1) {
  390                         PROC_LOCK(p);
  391                         sigexit(td, SIGSEGV);
  392                 }
  393         }
  394         PROC_LOCK(p);
  395         if (ku->ku_mflags & KMF_WAITSIGEVENT) {
  396                 /* UTS wants to wait for signal event */
  397                 if (!(p->p_flag & P_SIGEVENT) &&
  398                     !(ku->ku_flags & KUF_DOUPCALL)) {
  399                         td->td_kflags |= TDK_KSERELSIG;
  400                         error = msleep(&p->p_siglist, &p->p_mtx, PPAUSE|PCATCH,
  401                             "ksesigwait", (uap->timeout ? tvtohz(&tv) : 0));
  402                         td->td_kflags &= ~(TDK_KSERELSIG | TDK_WAKEUP);
  403                 }
  404                 p->p_flag &= ~P_SIGEVENT;
  405                 sigset = p->p_siglist;
  406                 PROC_UNLOCK(p);
  407                 error = copyout(&sigset, &ku->ku_mailbox->km_sigscaught,
  408                     sizeof(sigset));
  409         } else {
  410                 if ((ku->ku_flags & KUF_DOUPCALL) == 0 &&
  411                     ((ku->ku_mflags & KMF_NOCOMPLETED) ||
  412                      (kg->kg_completed == NULL))) {
  413                         kg->kg_upsleeps++;
  414                         td->td_kflags |= TDK_KSEREL;
  415                         error = msleep(&kg->kg_completed, &p->p_mtx,
  416                                 PPAUSE|PCATCH, "kserel",
  417                                 (uap->timeout ? tvtohz(&tv) : 0));
  418                         td->td_kflags &= ~(TDK_KSEREL | TDK_WAKEUP);
  419                         kg->kg_upsleeps--;
  420                 }
  421                 PROC_UNLOCK(p);
  422         }
  423         if (ku->ku_flags & KUF_DOUPCALL) {
  424                 mtx_lock_spin(&sched_lock);
  425                 ku->ku_flags &= ~KUF_DOUPCALL;
  426                 mtx_unlock_spin(&sched_lock);
  427         }
  428         return (0);
  429 }
  430 
  431 /* struct kse_wakeup_args {
  432         struct kse_mailbox *mbx;
  433 }; */
  434 int
  435 kse_wakeup(struct thread *td, struct kse_wakeup_args *uap)
  436 {
  437         struct proc *p;
  438         struct ksegrp *kg;
  439         struct kse_upcall *ku;
  440         struct thread *td2;
  441 
  442         p = td->td_proc;
  443         td2 = NULL;
  444         ku = NULL;
  445         /* KSE-enabled processes only, please. */
  446         if (!(p->p_flag & P_SA))
  447                 return (EINVAL);
  448         PROC_LOCK(p);
  449         mtx_lock_spin(&sched_lock);
  450         if (uap->mbx) {
  451                 FOREACH_KSEGRP_IN_PROC(p, kg) {
  452                         FOREACH_UPCALL_IN_GROUP(kg, ku) {
  453                                 if (ku->ku_mailbox == uap->mbx)
  454                                         break;
  455                         }
  456                         if (ku)
  457                                 break;
  458                 }
  459         } else {
  460                 kg = td->td_ksegrp;
  461                 if (kg->kg_upsleeps) {
  462                         mtx_unlock_spin(&sched_lock);
  463                         wakeup(&kg->kg_completed);
  464                         PROC_UNLOCK(p);
  465                         return (0);
  466                 }
  467                 ku = TAILQ_FIRST(&kg->kg_upcalls);
  468         }
  469         if (ku == NULL) {
  470                 mtx_unlock_spin(&sched_lock);
  471                 PROC_UNLOCK(p);
  472                 return (ESRCH);
  473         }
  474         if ((td2 = ku->ku_owner) == NULL) {
  475                 mtx_unlock_spin(&sched_lock);
  476                 panic("%s: no owner", __func__);
  477         } else if (td2->td_kflags & (TDK_KSEREL | TDK_KSERELSIG)) {
  478                 mtx_unlock_spin(&sched_lock);
  479                 if (!(td2->td_kflags & TDK_WAKEUP)) {
  480                         td2->td_kflags |= TDK_WAKEUP;
  481                         if (td2->td_kflags & TDK_KSEREL)
  482                                 sleepq_remove(td2, &kg->kg_completed);
  483                         else
  484                                 sleepq_remove(td2, &p->p_siglist);
  485                 }
  486         } else {
  487                 ku->ku_flags |= KUF_DOUPCALL;
  488                 mtx_unlock_spin(&sched_lock);
  489         }
  490         PROC_UNLOCK(p);
  491         return (0);
  492 }
  493 
  494 /*
  495  * No new KSEG: first call: use current KSE, don't schedule an upcall
  496  * All other situations, do allocate max new KSEs and schedule an upcall.
  497  *
  498  * XXX should be changed so that 'first' behaviour lasts for as long
  499  * as you have not made a kse in this ksegrp. i.e. as long as we do not have
  500  * a mailbox..
  501  */
  502 /* struct kse_create_args {
  503         struct kse_mailbox *mbx;
  504         int newgroup;
  505 }; */
  506 int
  507 kse_create(struct thread *td, struct kse_create_args *uap)
  508 {
  509         struct ksegrp *newkg;
  510         struct ksegrp *kg;
  511         struct proc *p;
  512         struct kse_mailbox mbx;
  513         struct kse_upcall *newku;
  514         int err, ncpus, sa = 0, first = 0;
  515         struct thread *newtd;
  516 
  517         p = td->td_proc;
  518         kg = td->td_ksegrp;
  519         if ((err = copyin(uap->mbx, &mbx, sizeof(mbx))))
  520                 return (err);
  521 
  522         ncpus = mp_ncpus;
  523         if (virtual_cpu != 0)
  524                 ncpus = virtual_cpu;
  525         /*
  526          * If the new UTS mailbox says that this
  527          * will be a BOUND lwp, then it had better
  528          * have its thread mailbox already there.
  529          * In addition, this ksegrp will be limited to
  530          * a concurrency of 1. There is more on this later.
  531          */
  532         if (mbx.km_flags & KMF_BOUND) {
  533                 if (mbx.km_curthread == NULL) 
  534                         return (EINVAL);
  535                 ncpus = 1;
  536         } else {
  537                 sa = TDP_SA;
  538         }
  539 
  540         PROC_LOCK(p);
  541         /*
  542          * Processes using the other threading model can't
  543          * suddenly start calling this one
  544          */
  545         if ((p->p_flag & (P_SA|P_HADTHREADS)) == P_HADTHREADS) {
  546                 PROC_UNLOCK(p);
  547                 return (EINVAL);
  548         }
  549 
  550         /*
  551          * Limit it to NCPU upcall contexts per ksegrp in any case.
  552          * There is a small race here as we don't hold proclock
  553          * until we inc the ksegrp count, but it's not really a big problem
  554          * if we get one too many, but we save a proc lock.
  555          */
  556         if ((!uap->newgroup) && (kg->kg_numupcalls >= ncpus)) {
  557                 PROC_UNLOCK(p);
  558                 return (EPROCLIM);
  559         }
  560 
  561         if (!(p->p_flag & P_SA)) {
  562                 first = 1;
  563                 p->p_flag |= P_SA|P_HADTHREADS;
  564         }
  565 
  566         PROC_UNLOCK(p);
  567         /*
  568          * Now pay attention!
  569          * If we are going to be bound, then we need to be either
  570          * a new group, or the first call ever. In either
  571          * case we will be creating (or be) the only thread in a group.
  572          * and the concurrency will be set to 1.
  573          * This is not quite right, as we may still make ourself 
  574          * bound after making other ksegrps but it will do for now.
  575          * The library will only try do this much.
  576          */
  577         if (!sa && !(uap->newgroup || first))
  578                 return (EINVAL);
  579 
  580         if (uap->newgroup) {
  581                 newkg = ksegrp_alloc();
  582                 bzero(&newkg->kg_startzero,
  583                     __rangeof(struct ksegrp, kg_startzero, kg_endzero));
  584                 bcopy(&kg->kg_startcopy, &newkg->kg_startcopy,
  585                     __rangeof(struct ksegrp, kg_startcopy, kg_endcopy));
  586                 sched_init_concurrency(newkg);
  587                 PROC_LOCK(p);
  588                 if (p->p_numksegrps >= max_groups_per_proc) {
  589                         PROC_UNLOCK(p);
  590                         ksegrp_free(newkg);
  591                         return (EPROCLIM);
  592                 }
  593                 ksegrp_link(newkg, p);
  594                 mtx_lock_spin(&sched_lock);
  595                 sched_fork_ksegrp(td, newkg);
  596                 mtx_unlock_spin(&sched_lock);
  597                 PROC_UNLOCK(p);
  598         } else {
  599                 /*
  600                  * We want to make a thread in our own ksegrp.
  601                  * If we are just the first call, either kind
  602                  * is ok, but if not then either we must be 
  603                  * already an upcallable thread to make another,
  604                  * or a bound thread to make one of those.
  605                  * Once again, not quite right but good enough for now.. XXXKSE
  606                  */
  607                 if (!first && ((td->td_pflags & TDP_SA) != sa))
  608                         return (EINVAL);
  609 
  610                 newkg = kg;
  611         }
  612 
  613         /* 
  614          * This test is a bit "indirect".
  615          * It might simplify things if we made a direct way of testing
  616          * if a ksegrp has been worked on before.
  617          * In the case of a bound request and the concurrency being set to 
  618          * one, the concurrency will already be 1 so it's just inefficient
  619          * but not dangerous to call this again. XXX
  620          */
  621         if (newkg->kg_numupcalls == 0) {
  622                 /*
  623                  * Initialize KSE group with the appropriate
  624                  * concurrency.
  625                  *
  626                  * For a multiplexed group, create as as much concurrency
  627                  * as the number of physical cpus.
  628                  * This increases concurrency in the kernel even if the
  629                  * userland is not MP safe and can only run on a single CPU.
  630                  * In an ideal world, every physical cpu should execute a
  631                  * thread.  If there is enough concurrency, threads in the
  632                  * kernel can be executed parallel on different cpus at
  633                  * full speed without being restricted by the number of
  634                  * upcalls the userland provides.
  635                  * Adding more upcall structures only increases concurrency
  636                  * in userland.
  637                  *
  638                  * For a bound thread group, because there is only one thread
  639                  * in the group, we only set the concurrency for the group 
  640                  * to 1.  A thread in this kind of group will never schedule
  641                  * an upcall when blocked.  This simulates pthread system
  642                  * scope thread behaviour.
  643                  */
  644                 sched_set_concurrency(newkg, ncpus);
  645         }
  646         /* 
  647          * Even bound LWPs get a mailbox and an upcall to hold it.
  648          */
  649         newku = upcall_alloc();
  650         newku->ku_mailbox = uap->mbx;
  651         newku->ku_func = mbx.km_func;
  652         bcopy(&mbx.km_stack, &newku->ku_stack, sizeof(stack_t));
  653 
  654         /*
  655          * For the first call this may not have been set.
  656          * Of course nor may it actually be needed.
  657          */
  658         if (td->td_standin == NULL)
  659                 thread_alloc_spare(td);
  660 
  661         PROC_LOCK(p);
  662         if (newkg->kg_numupcalls >= ncpus) {
  663                 PROC_UNLOCK(p);
  664                 upcall_free(newku);
  665                 return (EPROCLIM);
  666         }
  667 
  668         /*
  669          * If we are the first time, and a normal thread,
  670          * then trnasfer all the signals back to the 'process'.
  671          * SA threading will make a special thread to handle them.
  672          */
  673         if (first && sa) {
  674                 SIGSETOR(p->p_siglist, td->td_siglist);
  675                 SIGEMPTYSET(td->td_siglist);
  676                 SIGFILLSET(td->td_sigmask);
  677                 SIG_CANTMASK(td->td_sigmask);
  678         }
  679 
  680         /*
  681          * Make the new upcall available to the ksegrp,.
  682          *  It may or may not use it, but its available.
  683          */
  684         mtx_lock_spin(&sched_lock);
  685         PROC_UNLOCK(p);
  686         upcall_link(newku, newkg);
  687         if (mbx.km_quantum)
  688                 newkg->kg_upquantum = max(1, mbx.km_quantum/tick);
  689 
  690         /*
  691          * Each upcall structure has an owner thread, find which
  692          * one owns it.
  693          */
  694         if (uap->newgroup) {
  695                 /*
  696                  * Because the new ksegrp hasn't a thread,
  697                  * create an initial upcall thread to own it.
  698                  */
  699                 newtd = thread_schedule_upcall(td, newku);
  700         } else {
  701                 /*
  702                  * If the current thread hasn't an upcall structure,
  703                  * just assign the upcall to it.
  704                  * It'll just return.
  705                  */
  706                 if (td->td_upcall == NULL) {
  707                         newku->ku_owner = td;
  708                         td->td_upcall = newku;
  709                         newtd = td;
  710                 } else {
  711                         /*
  712                          * Create a new upcall thread to own it.
  713                          */
  714                         newtd = thread_schedule_upcall(td, newku);
  715                 }
  716         }
  717         mtx_unlock_spin(&sched_lock);
  718 
  719         /*
  720          * Let the UTS instance know its LWPID.
  721          * It doesn't really care. But the debugger will.
  722          */
  723         suword32(&newku->ku_mailbox->km_lwp, newtd->td_tid);
  724 
  725         /*
  726          * In the same manner, if the UTS has a current user thread, 
  727          * then it is also running on this LWP so set it as well.
  728          * The library could do that of course.. but why not..
  729          */
  730         if (mbx.km_curthread)
  731                 suword32(&mbx.km_curthread->tm_lwp, newtd->td_tid);
  732 
  733         
  734         if (sa) {
  735                 newtd->td_pflags |= TDP_SA;
  736         } else {
  737                 newtd->td_pflags &= ~TDP_SA;
  738 
  739                 /*
  740                  * Since a library will use the mailbox pointer to 
  741                  * identify even a bound thread, and the mailbox pointer
  742                  * will never be allowed to change after this syscall
  743                  * for a bound thread, set it here so the library can
  744                  * find the thread after the syscall returns.
  745                  */
  746                 newtd->td_mailbox = mbx.km_curthread;
  747 
  748                 if (newtd != td) {
  749                         /*
  750                          * If we did create a new thread then
  751                          * make sure it goes to the right place
  752                          * when it starts up, and make sure that it runs 
  753                          * at full speed when it gets there. 
  754                          * thread_schedule_upcall() copies all cpu state
  755                          * to the new thread, so we should clear single step
  756                          * flag here.
  757                          */
  758                         cpu_set_upcall_kse(newtd, newku);
  759                         if (p->p_flag & P_TRACED)
  760                                 ptrace_clear_single_step(newtd);
  761                 }
  762         }
  763         
  764         /* 
  765          * If we are starting a new thread, kick it off.
  766          */
  767         if (newtd != td) {
  768                 mtx_lock_spin(&sched_lock);
  769                 setrunqueue(newtd, SRQ_BORING);
  770                 mtx_unlock_spin(&sched_lock);
  771         }
  772         return (0);
  773 }
  774 
  775 /*
  776  * Initialize global thread allocation resources.
  777  */
  778 void
  779 kseinit(void)
  780 {
  781 
  782         upcall_zone = uma_zcreate("UPCALL", sizeof(struct kse_upcall),
  783             NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
  784 }
  785 
  786 /*
  787  * Stash an embarasingly extra upcall into the zombie upcall queue.
  788  */
  789 
  790 void
  791 upcall_stash(struct kse_upcall *ku)
  792 {
  793         mtx_lock_spin(&kse_zombie_lock);
  794         TAILQ_INSERT_HEAD(&zombie_upcalls, ku, ku_link);
  795         mtx_unlock_spin(&kse_zombie_lock);
  796 }
  797 
  798 /*
  799  * Reap zombie kse resource.
  800  */
  801 void
  802 kse_GC(void)
  803 {
  804         struct kse_upcall *ku_first, *ku_next;
  805 
  806         /*
  807          * Don't even bother to lock if none at this instant,
  808          * we really don't care about the next instant..
  809          */
  810         if (!TAILQ_EMPTY(&zombie_upcalls)) {
  811                 mtx_lock_spin(&kse_zombie_lock);
  812                 ku_first = TAILQ_FIRST(&zombie_upcalls);
  813                 if (ku_first)
  814                         TAILQ_INIT(&zombie_upcalls);
  815                 mtx_unlock_spin(&kse_zombie_lock);
  816                 while (ku_first) {
  817                         ku_next = TAILQ_NEXT(ku_first, ku_link);
  818                         upcall_free(ku_first);
  819                         ku_first = ku_next;
  820                 }
  821         }
  822 }
  823 
  824 /*
  825  * Store the thread context in the UTS's mailbox.
  826  * then add the mailbox at the head of a list we are building in user space.
  827  * The list is anchored in the ksegrp structure.
  828  */
  829 int
  830 thread_export_context(struct thread *td, int willexit)
  831 {
  832         struct proc *p;
  833         struct ksegrp *kg;
  834         uintptr_t mbx;
  835         void *addr;
  836         int error = 0, sig;
  837         mcontext_t mc;
  838 
  839         p = td->td_proc;
  840         kg = td->td_ksegrp;
  841 
  842         /*
  843          * Post sync signal, or process SIGKILL and SIGSTOP.
  844          * For sync signal, it is only possible when the signal is not
  845          * caught by userland or process is being debugged.
  846          */
  847         PROC_LOCK(p);
  848         if (td->td_flags & TDF_NEEDSIGCHK) {
  849                 mtx_lock_spin(&sched_lock);
  850                 td->td_flags &= ~TDF_NEEDSIGCHK;
  851                 mtx_unlock_spin(&sched_lock);
  852                 mtx_lock(&p->p_sigacts->ps_mtx);
  853                 while ((sig = cursig(td)) != 0)
  854                         postsig(sig);
  855                 mtx_unlock(&p->p_sigacts->ps_mtx);
  856         }
  857         if (willexit)
  858                 SIGFILLSET(td->td_sigmask);
  859         PROC_UNLOCK(p);
  860 
  861         /* Export the user/machine context. */
  862         get_mcontext(td, &mc, 0);
  863         addr = (void *)(&td->td_mailbox->tm_context.uc_mcontext);
  864         error = copyout(&mc, addr, sizeof(mcontext_t));
  865         if (error)
  866                 goto bad;
  867 
  868         addr = (caddr_t)(&td->td_mailbox->tm_lwp);
  869         if (suword32(addr, 0)) {
  870                 error = EFAULT;
  871                 goto bad;
  872         }
  873 
  874         /* Get address in latest mbox of list pointer */
  875         addr = (void *)(&td->td_mailbox->tm_next);
  876         /*
  877          * Put the saved address of the previous first
  878          * entry into this one
  879          */
  880         for (;;) {
  881                 mbx = (uintptr_t)kg->kg_completed;
  882                 if (suword(addr, mbx)) {
  883                         error = EFAULT;
  884                         goto bad;
  885                 }
  886                 PROC_LOCK(p);
  887                 if (mbx == (uintptr_t)kg->kg_completed) {
  888                         kg->kg_completed = td->td_mailbox;
  889                         /*
  890                          * The thread context may be taken away by
  891                          * other upcall threads when we unlock
  892                          * process lock. it's no longer valid to
  893                          * use it again in any other places.
  894                          */
  895                         td->td_mailbox = NULL;
  896                         PROC_UNLOCK(p);
  897                         break;
  898                 }
  899                 PROC_UNLOCK(p);
  900         }
  901         td->td_usticks = 0;
  902         return (0);
  903 
  904 bad:
  905         PROC_LOCK(p);
  906         sigexit(td, SIGILL);
  907         return (error);
  908 }
  909 
  910 /*
  911  * Take the list of completed mailboxes for this KSEGRP and put them on this
  912  * upcall's mailbox as it's the next one going up.
  913  */
  914 static int
  915 thread_link_mboxes(struct ksegrp *kg, struct kse_upcall *ku)
  916 {
  917         struct proc *p = kg->kg_proc;
  918         void *addr;
  919         uintptr_t mbx;
  920 
  921         addr = (void *)(&ku->ku_mailbox->km_completed);
  922         for (;;) {
  923                 mbx = (uintptr_t)kg->kg_completed;
  924                 if (suword(addr, mbx)) {
  925                         PROC_LOCK(p);
  926                         psignal(p, SIGSEGV);
  927                         PROC_UNLOCK(p);
  928                         return (EFAULT);
  929                 }
  930                 PROC_LOCK(p);
  931                 if (mbx == (uintptr_t)kg->kg_completed) {
  932                         kg->kg_completed = NULL;
  933                         PROC_UNLOCK(p);
  934                         break;
  935                 }
  936                 PROC_UNLOCK(p);
  937         }
  938         return (0);
  939 }
  940 
  941 /*
  942  * This function should be called at statclock interrupt time
  943  */
  944 int
  945 thread_statclock(int user)
  946 {
  947         struct thread *td = curthread;
  948 
  949         if (!(td->td_pflags & TDP_SA))
  950                 return (0);
  951         if (user) {
  952                 /* Current always do via ast() */
  953                 mtx_lock_spin(&sched_lock);
  954                 td->td_flags |= TDF_ASTPENDING;
  955                 mtx_unlock_spin(&sched_lock);
  956                 td->td_uuticks++;
  957         } else if (td->td_mailbox != NULL)
  958                 td->td_usticks++;
  959         return (0);
  960 }
  961 
  962 /*
  963  * Export state clock ticks for userland
  964  */
  965 static int
  966 thread_update_usr_ticks(struct thread *td)
  967 {
  968         struct proc *p = td->td_proc;
  969         caddr_t addr;
  970         u_int uticks;
  971 
  972         if (td->td_mailbox == NULL)
  973                 return (-1);
  974 
  975         if ((uticks = td->td_uuticks) != 0) {
  976                 td->td_uuticks = 0;
  977                 addr = (caddr_t)&td->td_mailbox->tm_uticks;
  978                 if (suword32(addr, uticks+fuword32(addr)))
  979                         goto error;
  980         }
  981         if ((uticks = td->td_usticks) != 0) {
  982                 td->td_usticks = 0;
  983                 addr = (caddr_t)&td->td_mailbox->tm_sticks;
  984                 if (suword32(addr, uticks+fuword32(addr)))
  985                         goto error;
  986         }
  987         return (0);
  988 
  989 error:
  990         PROC_LOCK(p);
  991         psignal(p, SIGSEGV);
  992         PROC_UNLOCK(p);
  993         return (-2);
  994 }
  995 
  996 /*
  997  * This function is intended to be used to initialize a spare thread
  998  * for upcall. Initialize thread's large data area outside sched_lock
  999  * for thread_schedule_upcall(). The crhold is also here to get it out
 1000  * from the schedlock as it has a mutex op itself.
 1001  * XXX BUG.. we need to get the cr ref after the thread has 
 1002  * checked and chenged its own, not 6 months before...  
 1003  */
 1004 void
 1005 thread_alloc_spare(struct thread *td)
 1006 {
 1007         struct thread *spare;
 1008 
 1009         if (td->td_standin)
 1010                 return;
 1011         spare = thread_alloc();
 1012         td->td_standin = spare;
 1013         bzero(&spare->td_startzero,
 1014             __rangeof(struct thread, td_startzero, td_endzero));
 1015         spare->td_proc = td->td_proc;
 1016         spare->td_ucred = crhold(td->td_ucred);
 1017 }
 1018 
 1019 /*
 1020  * Create a thread and schedule it for upcall on the KSE given.
 1021  * Use our thread's standin so that we don't have to allocate one.
 1022  */
 1023 struct thread *
 1024 thread_schedule_upcall(struct thread *td, struct kse_upcall *ku)
 1025 {
 1026         struct thread *td2;
 1027 
 1028         mtx_assert(&sched_lock, MA_OWNED);
 1029 
 1030         /*
 1031          * Schedule an upcall thread on specified kse_upcall,
 1032          * the kse_upcall must be free.
 1033          * td must have a spare thread.
 1034          */
 1035         KASSERT(ku->ku_owner == NULL, ("%s: upcall has owner", __func__));
 1036         if ((td2 = td->td_standin) != NULL) {
 1037                 td->td_standin = NULL;
 1038         } else {
 1039                 panic("no reserve thread when scheduling an upcall");
 1040                 return (NULL);
 1041         }
 1042         CTR3(KTR_PROC, "thread_schedule_upcall: thread %p (pid %d, %s)",
 1043              td2, td->td_proc->p_pid, td->td_proc->p_comm);
 1044         /*
 1045          * Bzero already done in thread_alloc_spare() because we can't
 1046          * do the crhold here because we are in schedlock already.
 1047          */
 1048         bcopy(&td->td_startcopy, &td2->td_startcopy,
 1049             __rangeof(struct thread, td_startcopy, td_endcopy));
 1050         thread_link(td2, ku->ku_ksegrp);
 1051         /* inherit parts of blocked thread's context as a good template */
 1052         cpu_set_upcall(td2, td);
 1053         /* Let the new thread become owner of the upcall */
 1054         ku->ku_owner   = td2;
 1055         td2->td_upcall = ku;
 1056         td2->td_flags  = 0;
 1057         td2->td_pflags = TDP_SA|TDP_UPCALLING;
 1058         td2->td_state  = TDS_CAN_RUN;
 1059         td2->td_inhibitors = 0;
 1060         SIGFILLSET(td2->td_sigmask);
 1061         SIG_CANTMASK(td2->td_sigmask);
 1062         sched_fork_thread(td, td2);
 1063         return (td2);   /* bogus.. should be a void function */
 1064 }
 1065 
 1066 /*
 1067  * It is only used when thread generated a trap and process is being
 1068  * debugged.
 1069  */
 1070 void
 1071 thread_signal_add(struct thread *td, int sig)
 1072 {
 1073         struct proc *p;
 1074         siginfo_t siginfo;
 1075         struct sigacts *ps;
 1076         int error;
 1077 
 1078         p = td->td_proc;
 1079         PROC_LOCK_ASSERT(p, MA_OWNED);
 1080         ps = p->p_sigacts;
 1081         mtx_assert(&ps->ps_mtx, MA_OWNED);
 1082 
 1083         cpu_thread_siginfo(sig, 0, &siginfo);
 1084         mtx_unlock(&ps->ps_mtx);
 1085         SIGADDSET(td->td_sigmask, sig);
 1086         PROC_UNLOCK(p);
 1087         error = copyout(&siginfo, &td->td_mailbox->tm_syncsig, sizeof(siginfo));
 1088         if (error) {
 1089                 PROC_LOCK(p);
 1090                 sigexit(td, SIGSEGV);
 1091         }
 1092         PROC_LOCK(p);
 1093         mtx_lock(&ps->ps_mtx);
 1094 }
 1095 #include "opt_sched.h"
 1096 struct thread *
 1097 thread_switchout(struct thread *td, int flags, struct thread *nextthread)
 1098 {
 1099         struct kse_upcall *ku;
 1100         struct thread *td2;
 1101 
 1102         mtx_assert(&sched_lock, MA_OWNED);
 1103 
 1104         /*
 1105          * If the outgoing thread is in threaded group and has never
 1106          * scheduled an upcall, decide whether this is a short
 1107          * or long term event and thus whether or not to schedule
 1108          * an upcall.
 1109          * If it is a short term event, just suspend it in
 1110          * a way that takes its KSE with it.
 1111          * Select the events for which we want to schedule upcalls.
 1112          * For now it's just sleep or if thread is suspended but
 1113          * process wide suspending flag is not set (debugger
 1114          * suspends thread).
 1115          * XXXKSE eventually almost any inhibition could do.
 1116          */
 1117         if (TD_CAN_UNBIND(td) && (td->td_standin) &&
 1118             (TD_ON_SLEEPQ(td) || (TD_IS_SUSPENDED(td) &&
 1119              !P_SHOULDSTOP(td->td_proc)))) {
 1120                 /*
 1121                  * Release ownership of upcall, and schedule an upcall
 1122                  * thread, this new upcall thread becomes the owner of
 1123                  * the upcall structure. It will be ahead of us in the
 1124                  * run queue, so as we are stopping, it should either
 1125                  * start up immediatly, or at least before us if
 1126                  * we release our slot.
 1127                  */
 1128                 ku = td->td_upcall;
 1129                 ku->ku_owner = NULL;
 1130                 td->td_upcall = NULL;
 1131                 td->td_pflags &= ~TDP_CAN_UNBIND;
 1132                 td2 = thread_schedule_upcall(td, ku);
 1133 #ifdef SCHED_4BSD
 1134                 if (flags & SW_INVOL || nextthread) {
 1135                         setrunqueue(td2, SRQ_YIELDING);
 1136                 } else {
 1137                         /* Keep up with reality.. we have one extra thread 
 1138                          * in the picture.. and it's 'running'.
 1139                          */
 1140                         return td2;
 1141                 }
 1142 #else
 1143                 setrunqueue(td2, SRQ_YIELDING);
 1144 #endif
 1145         }
 1146         return (nextthread);
 1147 }
 1148 
 1149 /*
 1150  * Setup done on the thread when it enters the kernel.
 1151  */
 1152 void
 1153 thread_user_enter(struct thread *td)
 1154 {
 1155         struct proc *p = td->td_proc;
 1156         struct ksegrp *kg;
 1157         struct kse_upcall *ku;
 1158         struct kse_thr_mailbox *tmbx;
 1159         uint32_t flags;
 1160 
 1161         /*
 1162          * First check that we shouldn't just abort. we
 1163          * can suspend it here or just exit.
 1164          */
 1165         if (__predict_false(P_SHOULDSTOP(p))) {
 1166                 PROC_LOCK(p);
 1167                 thread_suspend_check(0);
 1168                 PROC_UNLOCK(p);
 1169         }
 1170 
 1171         if (!(td->td_pflags & TDP_SA))
 1172                 return;
 1173 
 1174         /*
 1175          * If we are doing a syscall in a KSE environment,
 1176          * note where our mailbox is.
 1177          */
 1178 
 1179         kg = td->td_ksegrp;
 1180         ku = td->td_upcall;
 1181 
 1182         KASSERT(ku != NULL, ("no upcall owned"));
 1183         KASSERT(ku->ku_owner == td, ("wrong owner"));
 1184         KASSERT(!TD_CAN_UNBIND(td), ("can unbind"));
 1185 
 1186         if (td->td_standin == NULL)
 1187                 thread_alloc_spare(td);
 1188         ku->ku_mflags = fuword32((void *)&ku->ku_mailbox->km_flags);
 1189         tmbx = (void *)fuword((void *)&ku->ku_mailbox->km_curthread);
 1190         if ((tmbx == NULL) || (tmbx == (void *)-1L) ||
 1191             (ku->ku_mflags & KMF_NOUPCALL)) {
 1192                 td->td_mailbox = NULL;
 1193         } else {
 1194                 flags = fuword32(&tmbx->tm_flags);
 1195                 /*
 1196                  * On some architectures, TP register points to thread
 1197                  * mailbox but not points to kse mailbox, and userland
 1198                  * can not atomically clear km_curthread, but can
 1199                  * use TP register, and set TMF_NOUPCALL in thread
 1200                  * flag to indicate a critical region.
 1201                  */
 1202                 if (flags & TMF_NOUPCALL) {
 1203                         td->td_mailbox = NULL;
 1204                 } else {
 1205                         td->td_mailbox = tmbx;
 1206                         td->td_pflags |= TDP_CAN_UNBIND;
 1207                         if (__predict_false(p->p_flag & P_TRACED)) {
 1208                                 flags = fuword32(&tmbx->tm_dflags);
 1209                                 if (flags & TMDF_SUSPEND) {
 1210                                         mtx_lock_spin(&sched_lock);
 1211                                         /* fuword can block, check again */
 1212                                         if (td->td_upcall)
 1213                                                 ku->ku_flags |= KUF_DOUPCALL;
 1214                                         mtx_unlock_spin(&sched_lock);
 1215                                 }
 1216                         }
 1217                 }
 1218         }
 1219 }
 1220 
 1221 /*
 1222  * The extra work we go through if we are a threaded process when we
 1223  * return to userland.
 1224  *
 1225  * If we are a KSE process and returning to user mode, check for
 1226  * extra work to do before we return (e.g. for more syscalls
 1227  * to complete first).  If we were in a critical section, we should
 1228  * just return to let it finish. Same if we were in the UTS (in
 1229  * which case the mailbox's context's busy indicator will be set).
 1230  * The only traps we suport will have set the mailbox.
 1231  * We will clear it here.
 1232  */
 1233 int
 1234 thread_userret(struct thread *td, struct trapframe *frame)
 1235 {
 1236         struct kse_upcall *ku;
 1237         struct ksegrp *kg, *kg2;
 1238         struct proc *p;
 1239         struct timespec ts;
 1240         int error = 0, upcalls, uts_crit;
 1241 
 1242         /* Nothing to do with bound thread */
 1243         if (!(td->td_pflags & TDP_SA))
 1244                 return (0);
 1245 
 1246         /*
 1247          * Update stat clock count for userland
 1248          */
 1249         if (td->td_mailbox != NULL) {
 1250                 thread_update_usr_ticks(td);
 1251                 uts_crit = 0;
 1252         } else {
 1253                 uts_crit = 1;
 1254         }
 1255 
 1256         p = td->td_proc;
 1257         kg = td->td_ksegrp;
 1258         ku = td->td_upcall;
 1259 
 1260         /*
 1261          * Optimisation:
 1262          * This thread has not started any upcall.
 1263          * If there is no work to report other than ourself,
 1264          * then it can return direct to userland.
 1265          */
 1266         if (TD_CAN_UNBIND(td)) {
 1267                 td->td_pflags &= ~TDP_CAN_UNBIND;
 1268                 if ((td->td_flags & TDF_NEEDSIGCHK) == 0 &&
 1269                     (kg->kg_completed == NULL) &&
 1270                     (ku->ku_flags & KUF_DOUPCALL) == 0 &&
 1271                     (kg->kg_upquantum && ticks < kg->kg_nextupcall)) {
 1272                         nanotime(&ts);
 1273                         error = copyout(&ts,
 1274                                 (caddr_t)&ku->ku_mailbox->km_timeofday,
 1275                                 sizeof(ts));
 1276                         td->td_mailbox = 0;
 1277                         ku->ku_mflags = 0;
 1278                         if (error)
 1279                                 goto out;
 1280                         return (0);
 1281                 }
 1282                 thread_export_context(td, 0);
 1283                 /*
 1284                  * There is something to report, and we own an upcall
 1285                  * strucuture, we can go to userland.
 1286                  * Turn ourself into an upcall thread.
 1287                  */
 1288                 td->td_pflags |= TDP_UPCALLING;
 1289         } else if (td->td_mailbox && (ku == NULL)) {
 1290                 thread_export_context(td, 1);
 1291                 PROC_LOCK(p);
 1292                 if (kg->kg_upsleeps)
 1293                         wakeup(&kg->kg_completed);
 1294                 mtx_lock_spin(&sched_lock);
 1295                 thread_stopped(p);
 1296                 thread_exit();
 1297                 /* NOTREACHED */
 1298         }
 1299 
 1300         KASSERT(ku != NULL, ("upcall is NULL"));
 1301         KASSERT(TD_CAN_UNBIND(td) == 0, ("can unbind"));
 1302 
 1303         if (p->p_numthreads > max_threads_per_proc) {
 1304                 max_threads_hits++;
 1305                 PROC_LOCK(p);
 1306                 mtx_lock_spin(&sched_lock);
 1307                 p->p_maxthrwaits++;
 1308                 while (p->p_numthreads > max_threads_per_proc) {
 1309                         upcalls = 0;
 1310                         FOREACH_KSEGRP_IN_PROC(p, kg2) {
 1311                                 if (kg2->kg_numupcalls == 0)
 1312                                         upcalls++;
 1313                                 else
 1314                                         upcalls += kg2->kg_numupcalls;
 1315                         }
 1316                         if (upcalls >= max_threads_per_proc)
 1317                                 break;
 1318                         mtx_unlock_spin(&sched_lock);
 1319                         if (msleep(&p->p_numthreads, &p->p_mtx, PPAUSE|PCATCH,
 1320                             "maxthreads", 0)) {
 1321                                 mtx_lock_spin(&sched_lock);
 1322                                 break;
 1323                         } else {
 1324                                 mtx_lock_spin(&sched_lock);
 1325                         }
 1326                 }
 1327                 p->p_maxthrwaits--;
 1328                 mtx_unlock_spin(&sched_lock);
 1329                 PROC_UNLOCK(p);
 1330         }
 1331 
 1332         if (td->td_pflags & TDP_UPCALLING) {
 1333                 uts_crit = 0;
 1334                 kg->kg_nextupcall = ticks+kg->kg_upquantum;
 1335                 /*
 1336                  * There is no more work to do and we are going to ride
 1337                  * this thread up to userland as an upcall.
 1338                  * Do the last parts of the setup needed for the upcall.
 1339                  */
 1340                 CTR3(KTR_PROC, "userret: upcall thread %p (pid %d, %s)",
 1341                     td, p->p_pid, td->td_proc->p_comm);
 1342 
 1343                 td->td_pflags &= ~TDP_UPCALLING;
 1344                 if (ku->ku_flags & KUF_DOUPCALL) {
 1345                         mtx_lock_spin(&sched_lock);
 1346                         ku->ku_flags &= ~KUF_DOUPCALL;
 1347                         mtx_unlock_spin(&sched_lock);
 1348                 }
 1349                 /*
 1350                  * Set user context to the UTS
 1351                  */
 1352                 if (!(ku->ku_mflags & KMF_NOUPCALL)) {
 1353                         cpu_set_upcall_kse(td, ku);
 1354                         if (p->p_flag & P_TRACED)
 1355                                 ptrace_clear_single_step(td);
 1356                         error = suword32(&ku->ku_mailbox->km_lwp,
 1357                                         td->td_tid);
 1358                         if (error)
 1359                                 goto out;
 1360                         error = suword(&ku->ku_mailbox->km_curthread, 0);
 1361                         if (error)
 1362                                 goto out;
 1363                 }
 1364 
 1365                 /*
 1366                  * Unhook the list of completed threads.
 1367                  * anything that completes after this gets to
 1368                  * come in next time.
 1369                  * Put the list of completed thread mailboxes on
 1370                  * this KSE's mailbox.
 1371                  */
 1372                 if (!(ku->ku_mflags & KMF_NOCOMPLETED) &&
 1373                     (error = thread_link_mboxes(kg, ku)) != 0)
 1374                         goto out;
 1375         }
 1376         if (!uts_crit) {
 1377                 nanotime(&ts);
 1378                 error = copyout(&ts, &ku->ku_mailbox->km_timeofday, sizeof(ts));
 1379         }
 1380 
 1381 out:
 1382         if (error) {
 1383                 /*
 1384                  * Things are going to be so screwed we should just kill
 1385                  * the process.
 1386                  * how do we do that?
 1387                  */
 1388                 PROC_LOCK(p);
 1389                 psignal(p, SIGSEGV);
 1390                 PROC_UNLOCK(p);
 1391         } else {
 1392                 /*
 1393                  * Optimisation:
 1394                  * Ensure that we have a spare thread available,
 1395                  * for when we re-enter the kernel.
 1396                  */
 1397                 if (td->td_standin == NULL)
 1398                         thread_alloc_spare(td);
 1399         }
 1400 
 1401         ku->ku_mflags = 0;
 1402         td->td_mailbox = NULL;
 1403         td->td_usticks = 0;
 1404         return (error); /* go sync */
 1405 }
 1406 
 1407 int
 1408 thread_upcall_check(struct thread *td)
 1409 {
 1410         PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 1411         if (td->td_kflags & TDK_WAKEUP)
 1412                 return (1);
 1413         else
 1414                 return (0);
 1415 }
 1416 
 1417 /*
 1418  * called after ptrace resumed a process, force all
 1419  * virtual CPUs to schedule upcall for SA process,
 1420  * because debugger may have changed something in userland,
 1421  * we should notice UTS as soon as possible.
 1422  */
 1423 void
 1424 thread_continued(struct proc *p)
 1425 {
 1426         struct ksegrp *kg;
 1427         struct kse_upcall *ku;
 1428         struct thread *td;
 1429 
 1430         PROC_LOCK_ASSERT(p, MA_OWNED);
 1431         mtx_assert(&sched_lock, MA_OWNED);
 1432 
 1433         if (!(p->p_flag & P_SA))
 1434                 return;
 1435 
 1436         if (p->p_flag & P_TRACED) {
 1437                 FOREACH_KSEGRP_IN_PROC(p, kg) {
 1438                         td = TAILQ_FIRST(&kg->kg_threads);
 1439                         if (td == NULL)
 1440                                 continue;
 1441                         /* not a SA group, nothing to do */
 1442                         if (!(td->td_pflags & TDP_SA))
 1443                                 continue;
 1444                         FOREACH_UPCALL_IN_GROUP(kg, ku) {
 1445                                 ku->ku_flags |= KUF_DOUPCALL;
 1446                                 wakeup(&kg->kg_completed);
 1447                                 if (TD_IS_SUSPENDED(ku->ku_owner)) {
 1448                                         thread_unsuspend_one(ku->ku_owner);
 1449                                 }       
 1450                         }
 1451                 }
 1452         }
 1453 }
Cache object: d517d4a0a3f18034e00318698ae12d13
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/kern_kse.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_kse.c