The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_thread.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
    5  *  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice(s), this list of conditions and the following disclaimer as
   12  *    the first lines of this file unmodified other than the possible
   13  *    addition of one or more copyright notices.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice(s), this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
   19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   21  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
   22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   25  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
   28  * DAMAGE.
   29  */
   30 
   31 #include "opt_witness.h"
   32 #include "opt_hwpmc_hooks.h"
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/kernel.h>
   40 #include <sys/lock.h>
   41 #include <sys/mutex.h>
   42 #include <sys/proc.h>
   43 #include <sys/bitstring.h>
   44 #include <sys/epoch.h>
   45 #include <sys/rangelock.h>
   46 #include <sys/resourcevar.h>
   47 #include <sys/sdt.h>
   48 #include <sys/smp.h>
   49 #include <sys/sched.h>
   50 #include <sys/sleepqueue.h>
   51 #include <sys/selinfo.h>
   52 #include <sys/syscallsubr.h>
   53 #include <sys/dtrace_bsd.h>
   54 #include <sys/sysent.h>
   55 #include <sys/turnstile.h>
   56 #include <sys/taskqueue.h>
   57 #include <sys/ktr.h>
   58 #include <sys/rwlock.h>
   59 #include <sys/umtxvar.h>
   60 #include <sys/vmmeter.h>
   61 #include <sys/cpuset.h>
   62 #ifdef  HWPMC_HOOKS
   63 #include <sys/pmckern.h>
   64 #endif
   65 #include <sys/priv.h>
   66 
   67 #include <security/audit/audit.h>
   68 
   69 #include <vm/pmap.h>
   70 #include <vm/vm.h>
   71 #include <vm/vm_extern.h>
   72 #include <vm/uma.h>
   73 #include <vm/vm_phys.h>
   74 #include <sys/eventhandler.h>
   75 
   76 /*
   77  * Asserts below verify the stability of struct thread and struct proc
   78  * layout, as exposed by KBI to modules.  On head, the KBI is allowed
   79  * to drift, change to the structures must be accompanied by the
   80  * assert update.
   81  *
   82  * On the stable branches after KBI freeze, conditions must not be
   83  * violated.  Typically new fields are moved to the end of the
   84  * structures.
   85  */
   86 #ifdef __amd64__
   87 _Static_assert(offsetof(struct thread, td_flags) == 0xfc,
   88     "struct thread KBI td_flags");
   89 _Static_assert(offsetof(struct thread, td_pflags) == 0x104,
   90     "struct thread KBI td_pflags");
   91 _Static_assert(offsetof(struct thread, td_frame) == 0x4a0,
   92     "struct thread KBI td_frame");
   93 _Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
   94     "struct thread KBI td_emuldata");
   95 _Static_assert(offsetof(struct proc, p_flag) == 0xb8,
   96     "struct proc KBI p_flag");
   97 _Static_assert(offsetof(struct proc, p_pid) == 0xc4,
   98     "struct proc KBI p_pid");
   99 _Static_assert(offsetof(struct proc, p_filemon) == 0x3c0,
  100     "struct proc KBI p_filemon");
  101 _Static_assert(offsetof(struct proc, p_comm) == 0x3d8,
  102     "struct proc KBI p_comm");
  103 _Static_assert(offsetof(struct proc, p_emuldata) == 0x4b8,
  104     "struct proc KBI p_emuldata");
  105 #endif
  106 #ifdef __i386__
  107 _Static_assert(offsetof(struct thread, td_flags) == 0x98,
  108     "struct thread KBI td_flags");
  109 _Static_assert(offsetof(struct thread, td_pflags) == 0xa0,
  110     "struct thread KBI td_pflags");
  111 _Static_assert(offsetof(struct thread, td_frame) == 0x300,
  112     "struct thread KBI td_frame");
  113 _Static_assert(offsetof(struct thread, td_emuldata) == 0x344,
  114     "struct thread KBI td_emuldata");
  115 _Static_assert(offsetof(struct proc, p_flag) == 0x6c,
  116     "struct proc KBI p_flag");
  117 _Static_assert(offsetof(struct proc, p_pid) == 0x78,
  118     "struct proc KBI p_pid");
  119 _Static_assert(offsetof(struct proc, p_filemon) == 0x26c,
  120     "struct proc KBI p_filemon");
  121 _Static_assert(offsetof(struct proc, p_comm) == 0x280,
  122     "struct proc KBI p_comm");
  123 _Static_assert(offsetof(struct proc, p_emuldata) == 0x30c,
  124     "struct proc KBI p_emuldata");
  125 #endif
  126 
  127 SDT_PROVIDER_DECLARE(proc);
  128 SDT_PROBE_DEFINE(proc, , , lwp__exit);
  129 
  130 /*
  131  * thread related storage.
  132  */
  133 static uma_zone_t thread_zone;
  134 
  135 struct thread_domain_data {
  136         struct thread   *tdd_zombies;
  137         int             tdd_reapticks;
  138 } __aligned(CACHE_LINE_SIZE);
  139 
  140 static struct thread_domain_data thread_domain_data[MAXMEMDOM];
  141 
  142 static struct task      thread_reap_task;
  143 static struct callout   thread_reap_callout;
  144 
  145 static void thread_zombie(struct thread *);
  146 static void thread_reap(void);
  147 static void thread_reap_all(void);
  148 static void thread_reap_task_cb(void *, int);
  149 static void thread_reap_callout_cb(void *);
  150 static int thread_unsuspend_one(struct thread *td, struct proc *p,
  151     bool boundary);
  152 static void thread_free_batched(struct thread *td);
  153 
  154 static __exclusive_cache_line struct mtx tid_lock;
  155 static bitstr_t *tid_bitmap;
  156 
  157 static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash");
  158 
  159 static int maxthread;
  160 SYSCTL_INT(_kern, OID_AUTO, maxthread, CTLFLAG_RDTUN,
  161     &maxthread, 0, "Maximum number of threads");
  162 
  163 static __exclusive_cache_line int nthreads;
  164 
  165 static LIST_HEAD(tidhashhead, thread) *tidhashtbl;
  166 static u_long   tidhash;
  167 static u_long   tidhashlock;
  168 static struct   rwlock *tidhashtbl_lock;
  169 #define TIDHASH(tid)            (&tidhashtbl[(tid) & tidhash])
  170 #define TIDHASHLOCK(tid)        (&tidhashtbl_lock[(tid) & tidhashlock])
  171 
  172 EVENTHANDLER_LIST_DEFINE(thread_ctor);
  173 EVENTHANDLER_LIST_DEFINE(thread_dtor);
  174 EVENTHANDLER_LIST_DEFINE(thread_init);
  175 EVENTHANDLER_LIST_DEFINE(thread_fini);
  176 
  177 static bool
  178 thread_count_inc_try(void)
  179 {
  180         int nthreads_new;
  181 
  182         nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1;
  183         if (nthreads_new >= maxthread - 100) {
  184                 if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 ||
  185                     nthreads_new >= maxthread) {
  186                         atomic_subtract_int(&nthreads, 1);
  187                         return (false);
  188                 }
  189         }
  190         return (true);
  191 }
  192 
  193 static bool
  194 thread_count_inc(void)
  195 {
  196         static struct timeval lastfail;
  197         static int curfail;
  198 
  199         thread_reap();
  200         if (thread_count_inc_try()) {
  201                 return (true);
  202         }
  203 
  204         thread_reap_all();
  205         if (thread_count_inc_try()) {
  206                 return (true);
  207         }
  208 
  209         if (ppsratecheck(&lastfail, &curfail, 1)) {
  210                 printf("maxthread limit exceeded by uid %u "
  211                     "(pid %d); consider increasing kern.maxthread\n",
  212                     curthread->td_ucred->cr_ruid, curproc->p_pid);
  213         }
  214         return (false);
  215 }
  216 
  217 static void
  218 thread_count_sub(int n)
  219 {
  220 
  221         atomic_subtract_int(&nthreads, n);
  222 }
  223 
  224 static void
  225 thread_count_dec(void)
  226 {
  227 
  228         thread_count_sub(1);
  229 }
  230 
  231 static lwpid_t
  232 tid_alloc(void)
  233 {
  234         static lwpid_t trytid;
  235         lwpid_t tid;
  236 
  237         mtx_lock(&tid_lock);
  238         /*
  239          * It is an invariant that the bitmap is big enough to hold maxthread
  240          * IDs. If we got to this point there has to be at least one free.
  241          */
  242         if (trytid >= maxthread)
  243                 trytid = 0;
  244         bit_ffc_at(tid_bitmap, trytid, maxthread, &tid);
  245         if (tid == -1) {
  246                 KASSERT(trytid != 0, ("unexpectedly ran out of IDs"));
  247                 trytid = 0;
  248                 bit_ffc_at(tid_bitmap, trytid, maxthread, &tid);
  249                 KASSERT(tid != -1, ("unexpectedly ran out of IDs"));
  250         }
  251         bit_set(tid_bitmap, tid);
  252         trytid = tid + 1;
  253         mtx_unlock(&tid_lock);
  254         return (tid + NO_PID);
  255 }
  256 
  257 static void
  258 tid_free_locked(lwpid_t rtid)
  259 {
  260         lwpid_t tid;
  261 
  262         mtx_assert(&tid_lock, MA_OWNED);
  263         KASSERT(rtid >= NO_PID,
  264             ("%s: invalid tid %d\n", __func__, rtid));
  265         tid = rtid - NO_PID;
  266         KASSERT(bit_test(tid_bitmap, tid) != 0,
  267             ("thread ID %d not allocated\n", rtid));
  268         bit_clear(tid_bitmap, tid);
  269 }
  270 
  271 static void
  272 tid_free(lwpid_t rtid)
  273 {
  274 
  275         mtx_lock(&tid_lock);
  276         tid_free_locked(rtid);
  277         mtx_unlock(&tid_lock);
  278 }
  279 
  280 static void
  281 tid_free_batch(lwpid_t *batch, int n)
  282 {
  283         int i;
  284 
  285         mtx_lock(&tid_lock);
  286         for (i = 0; i < n; i++) {
  287                 tid_free_locked(batch[i]);
  288         }
  289         mtx_unlock(&tid_lock);
  290 }
  291 
  292 /*
  293  * Batching for thread reapping.
  294  */
  295 struct tidbatch {
  296         lwpid_t tab[16];
  297         int n;
  298 };
  299 
  300 static void
  301 tidbatch_prep(struct tidbatch *tb)
  302 {
  303 
  304         tb->n = 0;
  305 }
  306 
  307 static void
  308 tidbatch_add(struct tidbatch *tb, struct thread *td)
  309 {
  310 
  311         KASSERT(tb->n < nitems(tb->tab),
  312             ("%s: count too high %d", __func__, tb->n));
  313         tb->tab[tb->n] = td->td_tid;
  314         tb->n++;
  315 }
  316 
  317 static void
  318 tidbatch_process(struct tidbatch *tb)
  319 {
  320 
  321         KASSERT(tb->n <= nitems(tb->tab),
  322             ("%s: count too high %d", __func__, tb->n));
  323         if (tb->n == nitems(tb->tab)) {
  324                 tid_free_batch(tb->tab, tb->n);
  325                 tb->n = 0;
  326         }
  327 }
  328 
  329 static void
  330 tidbatch_final(struct tidbatch *tb)
  331 {
  332 
  333         KASSERT(tb->n <= nitems(tb->tab),
  334             ("%s: count too high %d", __func__, tb->n));
  335         if (tb->n != 0) {
  336                 tid_free_batch(tb->tab, tb->n);
  337         }
  338 }
  339 
  340 /*
  341  * Prepare a thread for use.
  342  */
  343 static int
  344 thread_ctor(void *mem, int size, void *arg, int flags)
  345 {
  346         struct thread   *td;
  347 
  348         td = (struct thread *)mem;
  349         td->td_state = TDS_INACTIVE;
  350         td->td_lastcpu = td->td_oncpu = NOCPU;
  351 
  352         /*
  353          * Note that td_critnest begins life as 1 because the thread is not
  354          * running and is thereby implicitly waiting to be on the receiving
  355          * end of a context switch.
  356          */
  357         td->td_critnest = 1;
  358         td->td_lend_user_pri = PRI_MAX;
  359 #ifdef AUDIT
  360         audit_thread_alloc(td);
  361 #endif
  362 #ifdef KDTRACE_HOOKS
  363         kdtrace_thread_ctor(td);
  364 #endif
  365         umtx_thread_alloc(td);
  366         MPASS(td->td_sel == NULL);
  367         return (0);
  368 }
  369 
  370 /*
  371  * Reclaim a thread after use.
  372  */
  373 static void
  374 thread_dtor(void *mem, int size, void *arg)
  375 {
  376         struct thread *td;
  377 
  378         td = (struct thread *)mem;
  379 
  380 #ifdef INVARIANTS
  381         /* Verify that this thread is in a safe state to free. */
  382         switch (td->td_state) {
  383         case TDS_INHIBITED:
  384         case TDS_RUNNING:
  385         case TDS_CAN_RUN:
  386         case TDS_RUNQ:
  387                 /*
  388                  * We must never unlink a thread that is in one of
  389                  * these states, because it is currently active.
  390                  */
  391                 panic("bad state for thread unlinking");
  392                 /* NOTREACHED */
  393         case TDS_INACTIVE:
  394                 break;
  395         default:
  396                 panic("bad thread state");
  397                 /* NOTREACHED */
  398         }
  399 #endif
  400 #ifdef AUDIT
  401         audit_thread_free(td);
  402 #endif
  403 #ifdef KDTRACE_HOOKS
  404         kdtrace_thread_dtor(td);
  405 #endif
  406         /* Free all OSD associated to this thread. */
  407         osd_thread_exit(td);
  408         td_softdep_cleanup(td);
  409         MPASS(td->td_su == NULL);
  410         seltdfini(td);
  411 }
  412 
  413 /*
  414  * Initialize type-stable parts of a thread (when newly created).
  415  */
  416 static int
  417 thread_init(void *mem, int size, int flags)
  418 {
  419         struct thread *td;
  420 
  421         td = (struct thread *)mem;
  422 
  423         td->td_allocdomain = vm_phys_domain(vtophys(td));
  424         td->td_sleepqueue = sleepq_alloc();
  425         td->td_turnstile = turnstile_alloc();
  426         td->td_rlqe = NULL;
  427         EVENTHANDLER_DIRECT_INVOKE(thread_init, td);
  428         umtx_thread_init(td);
  429         td->td_kstack = 0;
  430         td->td_sel = NULL;
  431         return (0);
  432 }
  433 
  434 /*
  435  * Tear down type-stable parts of a thread (just before being discarded).
  436  */
  437 static void
  438 thread_fini(void *mem, int size)
  439 {
  440         struct thread *td;
  441 
  442         td = (struct thread *)mem;
  443         EVENTHANDLER_DIRECT_INVOKE(thread_fini, td);
  444         rlqentry_free(td->td_rlqe);
  445         turnstile_free(td->td_turnstile);
  446         sleepq_free(td->td_sleepqueue);
  447         umtx_thread_fini(td);
  448         MPASS(td->td_sel == NULL);
  449 }
  450 
  451 /*
  452  * For a newly created process,
  453  * link up all the structures and its initial threads etc.
  454  * called from:
  455  * {arch}/{arch}/machdep.c   {arch}_init(), init386() etc.
  456  * proc_dtor() (should go away)
  457  * proc_init()
  458  */
  459 void
  460 proc_linkup0(struct proc *p, struct thread *td)
  461 {
  462         TAILQ_INIT(&p->p_threads);           /* all threads in proc */
  463         proc_linkup(p, td);
  464 }
  465 
  466 void
  467 proc_linkup(struct proc *p, struct thread *td)
  468 {
  469 
  470         sigqueue_init(&p->p_sigqueue, p);
  471         p->p_ksi = ksiginfo_alloc(M_WAITOK);
  472         if (p->p_ksi != NULL) {
  473                 /* XXX p_ksi may be null if ksiginfo zone is not ready */
  474                 p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
  475         }
  476         LIST_INIT(&p->p_mqnotifier);
  477         p->p_numthreads = 0;
  478         thread_link(td, p);
  479 }
  480 
  481 extern int max_threads_per_proc;
  482 
  483 /*
  484  * Initialize global thread allocation resources.
  485  */
  486 void
  487 threadinit(void)
  488 {
  489         u_long i;
  490         lwpid_t tid0;
  491         uint32_t flags;
  492 
  493         /*
  494          * Place an upper limit on threads which can be allocated.
  495          *
  496          * Note that other factors may make the de facto limit much lower.
  497          *
  498          * Platform limits are somewhat arbitrary but deemed "more than good
  499          * enough" for the foreseable future.
  500          */
  501         if (maxthread == 0) {
  502 #ifdef _LP64
  503                 maxthread = MIN(maxproc * max_threads_per_proc, 1000000);
  504 #else
  505                 maxthread = MIN(maxproc * max_threads_per_proc, 100000);
  506 #endif
  507         }
  508 
  509         mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
  510         tid_bitmap = bit_alloc(maxthread, M_TIDHASH, M_WAITOK);
  511         /*
  512          * Handle thread0.
  513          */
  514         thread_count_inc();
  515         tid0 = tid_alloc();
  516         if (tid0 != THREAD0_TID)
  517                 panic("tid0 %d != %d\n", tid0, THREAD0_TID);
  518 
  519         flags = UMA_ZONE_NOFREE;
  520 #ifdef __aarch64__
  521         /*
  522          * Force thread structures to be allocated from the direct map.
  523          * Otherwise, superpage promotions and demotions may temporarily
  524          * invalidate thread structure mappings.  For most dynamically allocated
  525          * structures this is not a problem, but translation faults cannot be
  526          * handled without accessing curthread.
  527          */
  528         flags |= UMA_ZONE_CONTIG;
  529 #endif
  530         thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
  531             thread_ctor, thread_dtor, thread_init, thread_fini,
  532             32 - 1, flags);
  533         tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash);
  534         tidhashlock = (tidhash + 1) / 64;
  535         if (tidhashlock > 0)
  536                 tidhashlock--;
  537         tidhashtbl_lock = malloc(sizeof(*tidhashtbl_lock) * (tidhashlock + 1),
  538             M_TIDHASH, M_WAITOK | M_ZERO);
  539         for (i = 0; i < tidhashlock + 1; i++)
  540                 rw_init(&tidhashtbl_lock[i], "tidhash");
  541 
  542         TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL);
  543         callout_init(&thread_reap_callout, 1);
  544         callout_reset(&thread_reap_callout, 5 * hz,
  545             thread_reap_callout_cb, NULL);
  546 }
  547 
  548 /*
  549  * Place an unused thread on the zombie list.
  550  */
  551 void
  552 thread_zombie(struct thread *td)
  553 {
  554         struct thread_domain_data *tdd;
  555         struct thread *ztd;
  556 
  557         tdd = &thread_domain_data[td->td_allocdomain];
  558         ztd = atomic_load_ptr(&tdd->tdd_zombies);
  559         for (;;) {
  560                 td->td_zombie = ztd;
  561                 if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies,
  562                     (uintptr_t *)&ztd, (uintptr_t)td))
  563                         break;
  564                 continue;
  565         }
  566 }
  567 
  568 /*
  569  * Release a thread that has exited after cpu_throw().
  570  */
  571 void
  572 thread_stash(struct thread *td)
  573 {
  574         atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
  575         thread_zombie(td);
  576 }
  577 
  578 /*
  579  * Reap zombies from passed domain.
  580  */
  581 static void
  582 thread_reap_domain(struct thread_domain_data *tdd)
  583 {
  584         struct thread *itd, *ntd;
  585         struct tidbatch tidbatch;
  586         struct credbatch credbatch;
  587         int tdcount;
  588         struct plimit *lim;
  589         int limcount;
  590 
  591         /*
  592          * Reading upfront is pessimal if followed by concurrent atomic_swap,
  593          * but most of the time the list is empty.
  594          */
  595         if (tdd->tdd_zombies == NULL)
  596                 return;
  597 
  598         itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies,
  599             (uintptr_t)NULL);
  600         if (itd == NULL)
  601                 return;
  602 
  603         /*
  604          * Multiple CPUs can get here, the race is fine as ticks is only
  605          * advisory.
  606          */
  607         tdd->tdd_reapticks = ticks;
  608 
  609         tidbatch_prep(&tidbatch);
  610         credbatch_prep(&credbatch);
  611         tdcount = 0;
  612         lim = NULL;
  613         limcount = 0;
  614 
  615         while (itd != NULL) {
  616                 ntd = itd->td_zombie;
  617                 EVENTHANDLER_DIRECT_INVOKE(thread_dtor, itd);
  618                 tidbatch_add(&tidbatch, itd);
  619                 credbatch_add(&credbatch, itd);
  620                 MPASS(itd->td_limit != NULL);
  621                 if (lim != itd->td_limit) {
  622                         if (limcount != 0) {
  623                                 lim_freen(lim, limcount);
  624                                 limcount = 0;
  625                         }
  626                 }
  627                 lim = itd->td_limit;
  628                 limcount++;
  629                 thread_free_batched(itd);
  630                 tidbatch_process(&tidbatch);
  631                 credbatch_process(&credbatch);
  632                 tdcount++;
  633                 if (tdcount == 32) {
  634                         thread_count_sub(tdcount);
  635                         tdcount = 0;
  636                 }
  637                 itd = ntd;
  638         }
  639 
  640         tidbatch_final(&tidbatch);
  641         credbatch_final(&credbatch);
  642         if (tdcount != 0) {
  643                 thread_count_sub(tdcount);
  644         }
  645         MPASS(limcount != 0);
  646         lim_freen(lim, limcount);
  647 }
  648 
  649 /*
  650  * Reap zombies from all domains.
  651  */
  652 static void
  653 thread_reap_all(void)
  654 {
  655         struct thread_domain_data *tdd;
  656         int i, domain;
  657 
  658         domain = PCPU_GET(domain);
  659         for (i = 0; i < vm_ndomains; i++) {
  660                 tdd = &thread_domain_data[(i + domain) % vm_ndomains];
  661                 thread_reap_domain(tdd);
  662         }
  663 }
  664 
  665 /*
  666  * Reap zombies from local domain.
  667  */
  668 static void
  669 thread_reap(void)
  670 {
  671         struct thread_domain_data *tdd;
  672         int domain;
  673 
  674         domain = PCPU_GET(domain);
  675         tdd = &thread_domain_data[domain];
  676 
  677         thread_reap_domain(tdd);
  678 }
  679 
  680 static void
  681 thread_reap_task_cb(void *arg __unused, int pending __unused)
  682 {
  683 
  684         thread_reap_all();
  685 }
  686 
  687 static void
  688 thread_reap_callout_cb(void *arg __unused)
  689 {
  690         struct thread_domain_data *tdd;
  691         int i, cticks, lticks;
  692         bool wantreap;
  693 
  694         wantreap = false;
  695         cticks = atomic_load_int(&ticks);
  696         for (i = 0; i < vm_ndomains; i++) {
  697                 tdd = &thread_domain_data[i];
  698                 lticks = tdd->tdd_reapticks;
  699                 if (tdd->tdd_zombies != NULL &&
  700                     (u_int)(cticks - lticks) > 5 * hz) {
  701                         wantreap = true;
  702                         break;
  703                 }
  704         }
  705 
  706         if (wantreap)
  707                 taskqueue_enqueue(taskqueue_thread, &thread_reap_task);
  708         callout_reset(&thread_reap_callout, 5 * hz,
  709             thread_reap_callout_cb, NULL);
  710 }
  711 
  712 /*
  713  * Calling this function guarantees that any thread that exited before
  714  * the call is reaped when the function returns.  By 'exited' we mean
  715  * a thread removed from the process linkage with thread_unlink().
  716  * Practically this means that caller must lock/unlock corresponding
  717  * process lock before the call, to synchronize with thread_exit().
  718  */
  719 void
  720 thread_reap_barrier(void)
  721 {
  722         struct task *t;
  723 
  724         /*
  725          * First do context switches to each CPU to ensure that all
  726          * PCPU pc_deadthreads are moved to zombie list.
  727          */
  728         quiesce_all_cpus("", PDROP);
  729 
  730         /*
  731          * Second, fire the task in the same thread as normal
  732          * thread_reap() is done, to serialize reaping.
  733          */
  734         t = malloc(sizeof(*t), M_TEMP, M_WAITOK);
  735         TASK_INIT(t, 0, thread_reap_task_cb, t);
  736         taskqueue_enqueue(taskqueue_thread, t);
  737         taskqueue_drain(taskqueue_thread, t);
  738         free(t, M_TEMP);
  739 }
  740 
  741 /*
  742  * Allocate a thread.
  743  */
  744 struct thread *
  745 thread_alloc(int pages)
  746 {
  747         struct thread *td;
  748         lwpid_t tid;
  749 
  750         if (!thread_count_inc()) {
  751                 return (NULL);
  752         }
  753 
  754         tid = tid_alloc();
  755         td = uma_zalloc(thread_zone, M_WAITOK);
  756         KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
  757         if (!vm_thread_new(td, pages)) {
  758                 uma_zfree(thread_zone, td);
  759                 tid_free(tid);
  760                 thread_count_dec();
  761                 return (NULL);
  762         }
  763         td->td_tid = tid;
  764         bzero(&td->td_sa.args, sizeof(td->td_sa.args));
  765         cpu_thread_alloc(td);
  766         EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td);
  767         return (td);
  768 }
  769 
  770 int
  771 thread_alloc_stack(struct thread *td, int pages)
  772 {
  773 
  774         KASSERT(td->td_kstack == 0,
  775             ("thread_alloc_stack called on a thread with kstack"));
  776         if (!vm_thread_new(td, pages))
  777                 return (0);
  778         cpu_thread_alloc(td);
  779         return (1);
  780 }
  781 
  782 /*
  783  * Deallocate a thread.
  784  */
  785 static void
  786 thread_free_batched(struct thread *td)
  787 {
  788 
  789         lock_profile_thread_exit(td);
  790         if (td->td_cpuset)
  791                 cpuset_rel(td->td_cpuset);
  792         td->td_cpuset = NULL;
  793         cpu_thread_free(td);
  794         if (td->td_kstack != 0)
  795                 vm_thread_dispose(td);
  796         callout_drain(&td->td_slpcallout);
  797         /*
  798          * Freeing handled by the caller.
  799          */
  800         td->td_tid = -1;
  801         uma_zfree(thread_zone, td);
  802 }
  803 
  804 void
  805 thread_free(struct thread *td)
  806 {
  807         lwpid_t tid;
  808 
  809         EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td);
  810         tid = td->td_tid;
  811         thread_free_batched(td);
  812         tid_free(tid);
  813         thread_count_dec();
  814 }
  815 
  816 void
  817 thread_cow_get_proc(struct thread *newtd, struct proc *p)
  818 {
  819 
  820         PROC_LOCK_ASSERT(p, MA_OWNED);
  821         newtd->td_realucred = crcowget(p->p_ucred);
  822         newtd->td_ucred = newtd->td_realucred;
  823         newtd->td_limit = lim_hold(p->p_limit);
  824         newtd->td_cowgen = p->p_cowgen;
  825 }
  826 
  827 void
  828 thread_cow_get(struct thread *newtd, struct thread *td)
  829 {
  830 
  831         MPASS(td->td_realucred == td->td_ucred);
  832         newtd->td_realucred = crcowget(td->td_realucred);
  833         newtd->td_ucred = newtd->td_realucred;
  834         newtd->td_limit = lim_hold(td->td_limit);
  835         newtd->td_cowgen = td->td_cowgen;
  836 }
  837 
  838 void
  839 thread_cow_free(struct thread *td)
  840 {
  841 
  842         if (td->td_realucred != NULL)
  843                 crcowfree(td);
  844         if (td->td_limit != NULL)
  845                 lim_free(td->td_limit);
  846 }
  847 
  848 void
  849 thread_cow_update(struct thread *td)
  850 {
  851         struct proc *p;
  852         struct ucred *oldcred;
  853         struct plimit *oldlimit;
  854 
  855         p = td->td_proc;
  856         oldlimit = NULL;
  857         PROC_LOCK(p);
  858         oldcred = crcowsync();
  859         if (td->td_limit != p->p_limit) {
  860                 oldlimit = td->td_limit;
  861                 td->td_limit = lim_hold(p->p_limit);
  862         }
  863         td->td_cowgen = p->p_cowgen;
  864         PROC_UNLOCK(p);
  865         if (oldcred != NULL)
  866                 crfree(oldcred);
  867         if (oldlimit != NULL)
  868                 lim_free(oldlimit);
  869 }
  870 
  871 /*
  872  * Discard the current thread and exit from its context.
  873  * Always called with scheduler locked.
  874  *
  875  * Because we can't free a thread while we're operating under its context,
  876  * push the current thread into our CPU's deadthread holder. This means
  877  * we needn't worry about someone else grabbing our context before we
  878  * do a cpu_throw().
  879  */
  880 void
  881 thread_exit(void)
  882 {
  883         uint64_t runtime, new_switchtime;
  884         struct thread *td;
  885         struct thread *td2;
  886         struct proc *p;
  887         int wakeup_swapper;
  888 
  889         td = curthread;
  890         p = td->td_proc;
  891 
  892         PROC_SLOCK_ASSERT(p, MA_OWNED);
  893         mtx_assert(&Giant, MA_NOTOWNED);
  894 
  895         PROC_LOCK_ASSERT(p, MA_OWNED);
  896         KASSERT(p != NULL, ("thread exiting without a process"));
  897         CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
  898             (long)p->p_pid, td->td_name);
  899         SDT_PROBE0(proc, , , lwp__exit);
  900         KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
  901         MPASS(td->td_realucred == td->td_ucred);
  902 
  903         /*
  904          * drop FPU & debug register state storage, or any other
  905          * architecture specific resources that
  906          * would not be on a new untouched process.
  907          */
  908         cpu_thread_exit(td);
  909 
  910         /*
  911          * The last thread is left attached to the process
  912          * So that the whole bundle gets recycled. Skip
  913          * all this stuff if we never had threads.
  914          * EXIT clears all sign of other threads when
  915          * it goes to single threading, so the last thread always
  916          * takes the short path.
  917          */
  918         if (p->p_flag & P_HADTHREADS) {
  919                 if (p->p_numthreads > 1) {
  920                         atomic_add_int(&td->td_proc->p_exitthreads, 1);
  921                         thread_unlink(td);
  922                         td2 = FIRST_THREAD_IN_PROC(p);
  923                         sched_exit_thread(td2, td);
  924 
  925                         /*
  926                          * The test below is NOT true if we are the
  927                          * sole exiting thread. P_STOPPED_SINGLE is unset
  928                          * in exit1() after it is the only survivor.
  929                          */
  930                         if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
  931                                 if (p->p_numthreads == p->p_suspcount) {
  932                                         thread_lock(p->p_singlethread);
  933                                         wakeup_swapper = thread_unsuspend_one(
  934                                                 p->p_singlethread, p, false);
  935                                         if (wakeup_swapper)
  936                                                 kick_proc0();
  937                                 }
  938                         }
  939 
  940                         PCPU_SET(deadthread, td);
  941                 } else {
  942                         /*
  943                          * The last thread is exiting.. but not through exit()
  944                          */
  945                         panic ("thread_exit: Last thread exiting on its own");
  946                 }
  947         } 
  948 #ifdef  HWPMC_HOOKS
  949         /*
  950          * If this thread is part of a process that is being tracked by hwpmc(4),
  951          * inform the module of the thread's impending exit.
  952          */
  953         if (PMC_PROC_IS_USING_PMCS(td->td_proc)) {
  954                 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
  955                 PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT, NULL);
  956         } else if (PMC_SYSTEM_SAMPLING_ACTIVE())
  957                 PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT_LOG, NULL);
  958 #endif
  959         PROC_UNLOCK(p);
  960         PROC_STATLOCK(p);
  961         thread_lock(td);
  962         PROC_SUNLOCK(p);
  963 
  964         /* Do the same timestamp bookkeeping that mi_switch() would do. */
  965         new_switchtime = cpu_ticks();
  966         runtime = new_switchtime - PCPU_GET(switchtime);
  967         td->td_runtime += runtime;
  968         td->td_incruntime += runtime;
  969         PCPU_SET(switchtime, new_switchtime);
  970         PCPU_SET(switchticks, ticks);
  971         VM_CNT_INC(v_swtch);
  972 
  973         /* Save our resource usage in our process. */
  974         td->td_ru.ru_nvcsw++;
  975         ruxagg_locked(p, td);
  976         rucollect(&p->p_ru, &td->td_ru);
  977         PROC_STATUNLOCK(p);
  978 
  979         td->td_state = TDS_INACTIVE;
  980 #ifdef WITNESS
  981         witness_thread_exit(td);
  982 #endif
  983         CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
  984         sched_throw(td);
  985         panic("I'm a teapot!");
  986         /* NOTREACHED */
  987 }
  988 
  989 /*
  990  * Do any thread specific cleanups that may be needed in wait()
  991  * called with Giant, proc and schedlock not held.
  992  */
  993 void
  994 thread_wait(struct proc *p)
  995 {
  996         struct thread *td;
  997 
  998         mtx_assert(&Giant, MA_NOTOWNED);
  999         KASSERT(p->p_numthreads == 1, ("multiple threads in thread_wait()"));
 1000         KASSERT(p->p_exitthreads == 0, ("p_exitthreads leaking"));
 1001         td = FIRST_THREAD_IN_PROC(p);
 1002         /* Lock the last thread so we spin until it exits cpu_throw(). */
 1003         thread_lock(td);
 1004         thread_unlock(td);
 1005         lock_profile_thread_exit(td);
 1006         cpuset_rel(td->td_cpuset);
 1007         td->td_cpuset = NULL;
 1008         cpu_thread_clean(td);
 1009         thread_cow_free(td);
 1010         callout_drain(&td->td_slpcallout);
 1011         thread_reap();  /* check for zombie threads etc. */
 1012 }
 1013 
 1014 /*
 1015  * Link a thread to a process.
 1016  * set up anything that needs to be initialized for it to
 1017  * be used by the process.
 1018  */
 1019 void
 1020 thread_link(struct thread *td, struct proc *p)
 1021 {
 1022 
 1023         /*
 1024          * XXX This can't be enabled because it's called for proc0 before
 1025          * its lock has been created.
 1026          * PROC_LOCK_ASSERT(p, MA_OWNED);
 1027          */
 1028         td->td_state    = TDS_INACTIVE;
 1029         td->td_proc     = p;
 1030         td->td_flags    = TDF_INMEM;
 1031 
 1032         LIST_INIT(&td->td_contested);
 1033         LIST_INIT(&td->td_lprof[0]);
 1034         LIST_INIT(&td->td_lprof[1]);
 1035 #ifdef EPOCH_TRACE
 1036         SLIST_INIT(&td->td_epochs);
 1037 #endif
 1038         sigqueue_init(&td->td_sigqueue, p);
 1039         callout_init(&td->td_slpcallout, 1);
 1040         TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist);
 1041         p->p_numthreads++;
 1042 }
 1043 
 1044 /*
 1045  * Called from:
 1046  *  thread_exit()
 1047  */
 1048 void
 1049 thread_unlink(struct thread *td)
 1050 {
 1051         struct proc *p = td->td_proc;
 1052 
 1053         PROC_LOCK_ASSERT(p, MA_OWNED);
 1054 #ifdef EPOCH_TRACE
 1055         MPASS(SLIST_EMPTY(&td->td_epochs));
 1056 #endif
 1057 
 1058         TAILQ_REMOVE(&p->p_threads, td, td_plist);
 1059         p->p_numthreads--;
 1060         /* could clear a few other things here */
 1061         /* Must  NOT clear links to proc! */
 1062 }
 1063 
 1064 static int
 1065 calc_remaining(struct proc *p, int mode)
 1066 {
 1067         int remaining;
 1068 
 1069         PROC_LOCK_ASSERT(p, MA_OWNED);
 1070         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1071         if (mode == SINGLE_EXIT)
 1072                 remaining = p->p_numthreads;
 1073         else if (mode == SINGLE_BOUNDARY)
 1074                 remaining = p->p_numthreads - p->p_boundary_count;
 1075         else if (mode == SINGLE_NO_EXIT || mode == SINGLE_ALLPROC)
 1076                 remaining = p->p_numthreads - p->p_suspcount;
 1077         else
 1078                 panic("calc_remaining: wrong mode %d", mode);
 1079         return (remaining);
 1080 }
 1081 
 1082 static int
 1083 remain_for_mode(int mode)
 1084 {
 1085 
 1086         return (mode == SINGLE_ALLPROC ? 0 : 1);
 1087 }
 1088 
 1089 static int
 1090 weed_inhib(int mode, struct thread *td2, struct proc *p)
 1091 {
 1092         int wakeup_swapper;
 1093 
 1094         PROC_LOCK_ASSERT(p, MA_OWNED);
 1095         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1096         THREAD_LOCK_ASSERT(td2, MA_OWNED);
 1097 
 1098         wakeup_swapper = 0;
 1099 
 1100         /*
 1101          * Since the thread lock is dropped by the scheduler we have
 1102          * to retry to check for races.
 1103          */
 1104 restart:
 1105         switch (mode) {
 1106         case SINGLE_EXIT:
 1107                 if (TD_IS_SUSPENDED(td2)) {
 1108                         wakeup_swapper |= thread_unsuspend_one(td2, p, true);
 1109                         thread_lock(td2);
 1110                         goto restart;
 1111                 }
 1112                 if (TD_CAN_ABORT(td2)) {
 1113                         wakeup_swapper |= sleepq_abort(td2, EINTR);
 1114                         return (wakeup_swapper);
 1115                 }
 1116                 break;
 1117         case SINGLE_BOUNDARY:
 1118         case SINGLE_NO_EXIT:
 1119                 if (TD_IS_SUSPENDED(td2) &&
 1120                     (td2->td_flags & TDF_BOUNDARY) == 0) {
 1121                         wakeup_swapper |= thread_unsuspend_one(td2, p, false);
 1122                         thread_lock(td2);
 1123                         goto restart;
 1124                 }
 1125                 if (TD_CAN_ABORT(td2)) {
 1126                         wakeup_swapper |= sleepq_abort(td2, ERESTART);
 1127                         return (wakeup_swapper);
 1128                 }
 1129                 break;
 1130         case SINGLE_ALLPROC:
 1131                 /*
 1132                  * ALLPROC suspend tries to avoid spurious EINTR for
 1133                  * threads sleeping interruptable, by suspending the
 1134                  * thread directly, similarly to sig_suspend_threads().
 1135                  * Since such sleep is not neccessary performed at the user
 1136                  * boundary, TDF_ALLPROCSUSP is used to avoid immediate
 1137                  * un-suspend.
 1138                  */
 1139                 if (TD_IS_SUSPENDED(td2) && (td2->td_flags &
 1140                     TDF_ALLPROCSUSP) == 0) {
 1141                         wakeup_swapper |= thread_unsuspend_one(td2, p, false);
 1142                         thread_lock(td2);
 1143                         goto restart;
 1144                 }
 1145                 if (TD_CAN_ABORT(td2)) {
 1146                         td2->td_flags |= TDF_ALLPROCSUSP;
 1147                         wakeup_swapper |= sleepq_abort(td2, ERESTART);
 1148                         return (wakeup_swapper);
 1149                 }
 1150                 break;
 1151         default:
 1152                 break;
 1153         }
 1154         thread_unlock(td2);
 1155         return (wakeup_swapper);
 1156 }
 1157 
 1158 /*
 1159  * Enforce single-threading.
 1160  *
 1161  * Returns 1 if the caller must abort (another thread is waiting to
 1162  * exit the process or similar). Process is locked!
 1163  * Returns 0 when you are successfully the only thread running.
 1164  * A process has successfully single threaded in the suspend mode when
 1165  * There are no threads in user mode. Threads in the kernel must be
 1166  * allowed to continue until they get to the user boundary. They may even
 1167  * copy out their return values and data before suspending. They may however be
 1168  * accelerated in reaching the user boundary as we will wake up
 1169  * any sleeping threads that are interruptable. (PCATCH).
 1170  */
 1171 int
 1172 thread_single(struct proc *p, int mode)
 1173 {
 1174         struct thread *td;
 1175         struct thread *td2;
 1176         int remaining, wakeup_swapper;
 1177 
 1178         td = curthread;
 1179         KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
 1180             mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
 1181             ("invalid mode %d", mode));
 1182         /*
 1183          * If allowing non-ALLPROC singlethreading for non-curproc
 1184          * callers, calc_remaining() and remain_for_mode() should be
 1185          * adjusted to also account for td->td_proc != p.  For now
 1186          * this is not implemented because it is not used.
 1187          */
 1188         KASSERT((mode == SINGLE_ALLPROC && td->td_proc != p) ||
 1189             (mode != SINGLE_ALLPROC && td->td_proc == p),
 1190             ("mode %d proc %p curproc %p", mode, p, td->td_proc));
 1191         mtx_assert(&Giant, MA_NOTOWNED);
 1192         PROC_LOCK_ASSERT(p, MA_OWNED);
 1193 
 1194         /*
 1195          * Is someone already single threading?
 1196          * Or may be singlethreading is not needed at all.
 1197          */
 1198         if (mode == SINGLE_ALLPROC) {
 1199                 while ((p->p_flag & P_STOPPED_SINGLE) != 0) {
 1200                         if ((p->p_flag2 & P2_WEXIT) != 0)
 1201                                 return (1);
 1202                         msleep(&p->p_flag, &p->p_mtx, PCATCH, "thrsgl", 0);
 1203                 }
 1204         } else if ((p->p_flag & P_HADTHREADS) == 0)
 1205                 return (0);
 1206         if (p->p_singlethread != NULL && p->p_singlethread != td)
 1207                 return (1);
 1208 
 1209         if (mode == SINGLE_EXIT) {
 1210                 p->p_flag |= P_SINGLE_EXIT;
 1211                 p->p_flag &= ~P_SINGLE_BOUNDARY;
 1212         } else {
 1213                 p->p_flag &= ~P_SINGLE_EXIT;
 1214                 if (mode == SINGLE_BOUNDARY)
 1215                         p->p_flag |= P_SINGLE_BOUNDARY;
 1216                 else
 1217                         p->p_flag &= ~P_SINGLE_BOUNDARY;
 1218         }
 1219         if (mode == SINGLE_ALLPROC)
 1220                 p->p_flag |= P_TOTAL_STOP;
 1221         p->p_flag |= P_STOPPED_SINGLE;
 1222         PROC_SLOCK(p);
 1223         p->p_singlethread = td;
 1224         remaining = calc_remaining(p, mode);
 1225         while (remaining != remain_for_mode(mode)) {
 1226                 if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
 1227                         goto stopme;
 1228                 wakeup_swapper = 0;
 1229                 FOREACH_THREAD_IN_PROC(p, td2) {
 1230                         if (td2 == td)
 1231                                 continue;
 1232                         thread_lock(td2);
 1233                         td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
 1234                         if (TD_IS_INHIBITED(td2)) {
 1235                                 wakeup_swapper |= weed_inhib(mode, td2, p);
 1236 #ifdef SMP
 1237                         } else if (TD_IS_RUNNING(td2)) {
 1238                                 forward_signal(td2);
 1239                                 thread_unlock(td2);
 1240 #endif
 1241                         } else
 1242                                 thread_unlock(td2);
 1243                 }
 1244                 if (wakeup_swapper)
 1245                         kick_proc0();
 1246                 remaining = calc_remaining(p, mode);
 1247 
 1248                 /*
 1249                  * Maybe we suspended some threads.. was it enough?
 1250                  */
 1251                 if (remaining == remain_for_mode(mode))
 1252                         break;
 1253 
 1254 stopme:
 1255                 /*
 1256                  * Wake us up when everyone else has suspended.
 1257                  * In the mean time we suspend as well.
 1258                  */
 1259                 thread_suspend_switch(td, p);
 1260                 remaining = calc_remaining(p, mode);
 1261         }
 1262         if (mode == SINGLE_EXIT) {
 1263                 /*
 1264                  * Convert the process to an unthreaded process.  The
 1265                  * SINGLE_EXIT is called by exit1() or execve(), in
 1266                  * both cases other threads must be retired.
 1267                  */
 1268                 KASSERT(p->p_numthreads == 1, ("Unthreading with >1 threads"));
 1269                 p->p_singlethread = NULL;
 1270                 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_HADTHREADS);
 1271 
 1272                 /*
 1273                  * Wait for any remaining threads to exit cpu_throw().
 1274                  */
 1275                 while (p->p_exitthreads != 0) {
 1276                         PROC_SUNLOCK(p);
 1277                         PROC_UNLOCK(p);
 1278                         sched_relinquish(td);
 1279                         PROC_LOCK(p);
 1280                         PROC_SLOCK(p);
 1281                 }
 1282         } else if (mode == SINGLE_BOUNDARY) {
 1283                 /*
 1284                  * Wait until all suspended threads are removed from
 1285                  * the processors.  The thread_suspend_check()
 1286                  * increments p_boundary_count while it is still
 1287                  * running, which makes it possible for the execve()
 1288                  * to destroy vmspace while our other threads are
 1289                  * still using the address space.
 1290                  *
 1291                  * We lock the thread, which is only allowed to
 1292                  * succeed after context switch code finished using
 1293                  * the address space.
 1294                  */
 1295                 FOREACH_THREAD_IN_PROC(p, td2) {
 1296                         if (td2 == td)
 1297                                 continue;
 1298                         thread_lock(td2);
 1299                         KASSERT((td2->td_flags & TDF_BOUNDARY) != 0,
 1300                             ("td %p not on boundary", td2));
 1301                         KASSERT(TD_IS_SUSPENDED(td2),
 1302                             ("td %p is not suspended", td2));
 1303                         thread_unlock(td2);
 1304                 }
 1305         }
 1306         PROC_SUNLOCK(p);
 1307         return (0);
 1308 }
 1309 
 1310 bool
 1311 thread_suspend_check_needed(void)
 1312 {
 1313         struct proc *p;
 1314         struct thread *td;
 1315 
 1316         td = curthread;
 1317         p = td->td_proc;
 1318         PROC_LOCK_ASSERT(p, MA_OWNED);
 1319         return (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) != 0 &&
 1320             (td->td_dbgflags & TDB_SUSPEND) != 0));
 1321 }
 1322 
 1323 /*
 1324  * Called in from locations that can safely check to see
 1325  * whether we have to suspend or at least throttle for a
 1326  * single-thread event (e.g. fork).
 1327  *
 1328  * Such locations include userret().
 1329  * If the "return_instead" argument is non zero, the thread must be able to
 1330  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
 1331  *
 1332  * The 'return_instead' argument tells the function if it may do a
 1333  * thread_exit() or suspend, or whether the caller must abort and back
 1334  * out instead.
 1335  *
 1336  * If the thread that set the single_threading request has set the
 1337  * P_SINGLE_EXIT bit in the process flags then this call will never return
 1338  * if 'return_instead' is false, but will exit.
 1339  *
 1340  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
 1341  *---------------+--------------------+---------------------
 1342  *       0       | returns 0          |   returns 0 or 1
 1343  *               | when ST ends       |   immediately
 1344  *---------------+--------------------+---------------------
 1345  *       1       | thread exits       |   returns 1
 1346  *               |                    |  immediately
 1347  * 0 = thread_exit() or suspension ok,
 1348  * other = return error instead of stopping the thread.
 1349  *
 1350  * While a full suspension is under effect, even a single threading
 1351  * thread would be suspended if it made this call (but it shouldn't).
 1352  * This call should only be made from places where
 1353  * thread_exit() would be safe as that may be the outcome unless
 1354  * return_instead is set.
 1355  */
 1356 int
 1357 thread_suspend_check(int return_instead)
 1358 {
 1359         struct thread *td;
 1360         struct proc *p;
 1361         int wakeup_swapper;
 1362 
 1363         td = curthread;
 1364         p = td->td_proc;
 1365         mtx_assert(&Giant, MA_NOTOWNED);
 1366         PROC_LOCK_ASSERT(p, MA_OWNED);
 1367         while (thread_suspend_check_needed()) {
 1368                 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 1369                         KASSERT(p->p_singlethread != NULL,
 1370                             ("singlethread not set"));
 1371                         /*
 1372                          * The only suspension in action is a
 1373                          * single-threading. Single threader need not stop.
 1374                          * It is safe to access p->p_singlethread unlocked
 1375                          * because it can only be set to our address by us.
 1376                          */
 1377                         if (p->p_singlethread == td)
 1378                                 return (0);     /* Exempt from stopping. */
 1379                 }
 1380                 if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
 1381                         return (EINTR);
 1382 
 1383                 /* Should we goto user boundary if we didn't come from there? */
 1384                 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
 1385                     (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
 1386                         return (ERESTART);
 1387 
 1388                 /*
 1389                  * Ignore suspend requests if they are deferred.
 1390                  */
 1391                 if ((td->td_flags & TDF_SBDRY) != 0) {
 1392                         KASSERT(return_instead,
 1393                             ("TDF_SBDRY set for unsafe thread_suspend_check"));
 1394                         KASSERT((td->td_flags & (TDF_SEINTR | TDF_SERESTART)) !=
 1395                             (TDF_SEINTR | TDF_SERESTART),
 1396                             ("both TDF_SEINTR and TDF_SERESTART"));
 1397                         return (TD_SBDRY_INTR(td) ? TD_SBDRY_ERRNO(td) : 0);
 1398                 }
 1399 
 1400                 /*
 1401                  * If the process is waiting for us to exit,
 1402                  * this thread should just suicide.
 1403                  * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
 1404                  */
 1405                 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
 1406                         PROC_UNLOCK(p);
 1407 
 1408                         /*
 1409                          * Allow Linux emulation layer to do some work
 1410                          * before thread suicide.
 1411                          */
 1412                         if (__predict_false(p->p_sysent->sv_thread_detach != NULL))
 1413                                 (p->p_sysent->sv_thread_detach)(td);
 1414                         umtx_thread_exit(td);
 1415                         kern_thr_exit(td);
 1416                         panic("stopped thread did not exit");
 1417                 }
 1418 
 1419                 PROC_SLOCK(p);
 1420                 thread_stopped(p);
 1421                 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 1422                         if (p->p_numthreads == p->p_suspcount + 1) {
 1423                                 thread_lock(p->p_singlethread);
 1424                                 wakeup_swapper = thread_unsuspend_one(
 1425                                     p->p_singlethread, p, false);
 1426                                 if (wakeup_swapper)
 1427                                         kick_proc0();
 1428                         }
 1429                 }
 1430                 PROC_UNLOCK(p);
 1431                 thread_lock(td);
 1432                 /*
 1433                  * When a thread suspends, it just
 1434                  * gets taken off all queues.
 1435                  */
 1436                 thread_suspend_one(td);
 1437                 if (return_instead == 0) {
 1438                         p->p_boundary_count++;
 1439                         td->td_flags |= TDF_BOUNDARY;
 1440                 }
 1441                 PROC_SUNLOCK(p);
 1442                 mi_switch(SW_INVOL | SWT_SUSPEND);
 1443                 PROC_LOCK(p);
 1444         }
 1445         return (0);
 1446 }
 1447 
 1448 /*
 1449  * Check for possible stops and suspensions while executing a
 1450  * casueword or similar transiently failing operation.
 1451  *
 1452  * The sleep argument controls whether the function can handle a stop
 1453  * request itself or it should return ERESTART and the request is
 1454  * proceed at the kernel/user boundary in ast.
 1455  *
 1456  * Typically, when retrying due to casueword(9) failure (rv == 1), we
 1457  * should handle the stop requests there, with exception of cases when
 1458  * the thread owns a kernel resource, for instance busied the umtx
 1459  * key, or when functions return immediately if thread_check_susp()
 1460  * returned non-zero.  On the other hand, retrying the whole lock
 1461  * operation, we better not stop there but delegate the handling to
 1462  * ast.
 1463  *
 1464  * If the request is for thread termination P_SINGLE_EXIT, we cannot
 1465  * handle it at all, and simply return EINTR.
 1466  */
 1467 int
 1468 thread_check_susp(struct thread *td, bool sleep)
 1469 {
 1470         struct proc *p;
 1471         int error;
 1472 
 1473         /*
 1474          * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
 1475          * eventually break the lockstep loop.
 1476          */
 1477         if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
 1478                 return (0);
 1479         error = 0;
 1480         p = td->td_proc;
 1481         PROC_LOCK(p);
 1482         if (p->p_flag & P_SINGLE_EXIT)
 1483                 error = EINTR;
 1484         else if (P_SHOULDSTOP(p) ||
 1485             ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND)))
 1486                 error = sleep ? thread_suspend_check(0) : ERESTART;
 1487         PROC_UNLOCK(p);
 1488         return (error);
 1489 }
 1490 
 1491 void
 1492 thread_suspend_switch(struct thread *td, struct proc *p)
 1493 {
 1494 
 1495         KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
 1496         PROC_LOCK_ASSERT(p, MA_OWNED);
 1497         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1498         /*
 1499          * We implement thread_suspend_one in stages here to avoid
 1500          * dropping the proc lock while the thread lock is owned.
 1501          */
 1502         if (p == td->td_proc) {
 1503                 thread_stopped(p);
 1504                 p->p_suspcount++;
 1505         }
 1506         PROC_UNLOCK(p);
 1507         thread_lock(td);
 1508         td->td_flags &= ~TDF_NEEDSUSPCHK;
 1509         TD_SET_SUSPENDED(td);
 1510         sched_sleep(td, 0);
 1511         PROC_SUNLOCK(p);
 1512         DROP_GIANT();
 1513         mi_switch(SW_VOL | SWT_SUSPEND);
 1514         PICKUP_GIANT();
 1515         PROC_LOCK(p);
 1516         PROC_SLOCK(p);
 1517 }
 1518 
 1519 void
 1520 thread_suspend_one(struct thread *td)
 1521 {
 1522         struct proc *p;
 1523 
 1524         p = td->td_proc;
 1525         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1526         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1527         KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
 1528         p->p_suspcount++;
 1529         td->td_flags &= ~TDF_NEEDSUSPCHK;
 1530         TD_SET_SUSPENDED(td);
 1531         sched_sleep(td, 0);
 1532 }
 1533 
 1534 static int
 1535 thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary)
 1536 {
 1537 
 1538         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1539         KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
 1540         TD_CLR_SUSPENDED(td);
 1541         td->td_flags &= ~TDF_ALLPROCSUSP;
 1542         if (td->td_proc == p) {
 1543                 PROC_SLOCK_ASSERT(p, MA_OWNED);
 1544                 p->p_suspcount--;
 1545                 if (boundary && (td->td_flags & TDF_BOUNDARY) != 0) {
 1546                         td->td_flags &= ~TDF_BOUNDARY;
 1547                         p->p_boundary_count--;
 1548                 }
 1549         }
 1550         return (setrunnable(td, 0));
 1551 }
 1552 
 1553 void
 1554 thread_run_flash(struct thread *td)
 1555 {
 1556         struct proc *p;
 1557 
 1558         p = td->td_proc;
 1559         PROC_LOCK_ASSERT(p, MA_OWNED);
 1560 
 1561         if (TD_ON_SLEEPQ(td))
 1562                 sleepq_remove_nested(td);
 1563         else
 1564                 thread_lock(td);
 1565 
 1566         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1567         KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
 1568 
 1569         TD_CLR_SUSPENDED(td);
 1570         PROC_SLOCK(p);
 1571         MPASS(p->p_suspcount > 0);
 1572         p->p_suspcount--;
 1573         PROC_SUNLOCK(p);
 1574         if (setrunnable(td, 0))
 1575                 kick_proc0();
 1576 }
 1577 
 1578 /*
 1579  * Allow all threads blocked by single threading to continue running.
 1580  */
 1581 void
 1582 thread_unsuspend(struct proc *p)
 1583 {
 1584         struct thread *td;
 1585         int wakeup_swapper;
 1586 
 1587         PROC_LOCK_ASSERT(p, MA_OWNED);
 1588         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1589         wakeup_swapper = 0;
 1590         if (!P_SHOULDSTOP(p)) {
 1591                 FOREACH_THREAD_IN_PROC(p, td) {
 1592                         thread_lock(td);
 1593                         if (TD_IS_SUSPENDED(td))
 1594                                 wakeup_swapper |= thread_unsuspend_one(td, p,
 1595                                     true);
 1596                         else
 1597                                 thread_unlock(td);
 1598                 }
 1599         } else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
 1600             p->p_numthreads == p->p_suspcount) {
 1601                 /*
 1602                  * Stopping everything also did the job for the single
 1603                  * threading request. Now we've downgraded to single-threaded,
 1604                  * let it continue.
 1605                  */
 1606                 if (p->p_singlethread->td_proc == p) {
 1607                         thread_lock(p->p_singlethread);
 1608                         wakeup_swapper = thread_unsuspend_one(
 1609                             p->p_singlethread, p, false);
 1610                 }
 1611         }
 1612         if (wakeup_swapper)
 1613                 kick_proc0();
 1614 }
 1615 
 1616 /*
 1617  * End the single threading mode..
 1618  */
 1619 void
 1620 thread_single_end(struct proc *p, int mode)
 1621 {
 1622         struct thread *td;
 1623         int wakeup_swapper;
 1624 
 1625         KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
 1626             mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
 1627             ("invalid mode %d", mode));
 1628         PROC_LOCK_ASSERT(p, MA_OWNED);
 1629         KASSERT((mode == SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) != 0) ||
 1630             (mode != SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) == 0),
 1631             ("mode %d does not match P_TOTAL_STOP", mode));
 1632         KASSERT(mode == SINGLE_ALLPROC || p->p_singlethread == curthread,
 1633             ("thread_single_end from other thread %p %p",
 1634             curthread, p->p_singlethread));
 1635         KASSERT(mode != SINGLE_BOUNDARY ||
 1636             (p->p_flag & P_SINGLE_BOUNDARY) != 0,
 1637             ("mis-matched SINGLE_BOUNDARY flags %x", p->p_flag));
 1638         p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY |
 1639             P_TOTAL_STOP);
 1640         PROC_SLOCK(p);
 1641         p->p_singlethread = NULL;
 1642         wakeup_swapper = 0;
 1643         /*
 1644          * If there are other threads they may now run,
 1645          * unless of course there is a blanket 'stop order'
 1646          * on the process. The single threader must be allowed
 1647          * to continue however as this is a bad place to stop.
 1648          */
 1649         if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) {
 1650                 FOREACH_THREAD_IN_PROC(p, td) {
 1651                         thread_lock(td);
 1652                         if (TD_IS_SUSPENDED(td)) {
 1653                                 wakeup_swapper |= thread_unsuspend_one(td, p,
 1654                                     true);
 1655                         } else
 1656                                 thread_unlock(td);
 1657                 }
 1658         }
 1659         KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,
 1660             ("inconsistent boundary count %d", p->p_boundary_count));
 1661         PROC_SUNLOCK(p);
 1662         if (wakeup_swapper)
 1663                 kick_proc0();
 1664         wakeup(&p->p_flag);
 1665 }
 1666 
 1667 /*
 1668  * Locate a thread by number and return with proc lock held.
 1669  *
 1670  * thread exit establishes proc -> tidhash lock ordering, but lookup
 1671  * takes tidhash first and needs to return locked proc.
 1672  *
 1673  * The problem is worked around by relying on type-safety of both
 1674  * structures and doing the work in 2 steps:
 1675  * - tidhash-locked lookup which saves both thread and proc pointers
 1676  * - proc-locked verification that the found thread still matches
 1677  */
 1678 static bool
 1679 tdfind_hash(lwpid_t tid, pid_t pid, struct proc **pp, struct thread **tdp)
 1680 {
 1681 #define RUN_THRESH      16
 1682         struct proc *p;
 1683         struct thread *td;
 1684         int run;
 1685         bool locked;
 1686 
 1687         run = 0;
 1688         rw_rlock(TIDHASHLOCK(tid));
 1689         locked = true;
 1690         LIST_FOREACH(td, TIDHASH(tid), td_hash) {
 1691                 if (td->td_tid != tid) {
 1692                         run++;
 1693                         continue;
 1694                 }
 1695                 p = td->td_proc;
 1696                 if (pid != -1 && p->p_pid != pid) {
 1697                         td = NULL;
 1698                         break;
 1699                 }
 1700                 if (run > RUN_THRESH) {
 1701                         if (rw_try_upgrade(TIDHASHLOCK(tid))) {
 1702                                 LIST_REMOVE(td, td_hash);
 1703                                 LIST_INSERT_HEAD(TIDHASH(td->td_tid),
 1704                                         td, td_hash);
 1705                                 rw_wunlock(TIDHASHLOCK(tid));
 1706                                 locked = false;
 1707                                 break;
 1708                         }
 1709                 }
 1710                 break;
 1711         }
 1712         if (locked)
 1713                 rw_runlock(TIDHASHLOCK(tid));
 1714         if (td == NULL)
 1715                 return (false);
 1716         *pp = p;
 1717         *tdp = td;
 1718         return (true);
 1719 }
 1720 
 1721 struct thread *
 1722 tdfind(lwpid_t tid, pid_t pid)
 1723 {
 1724         struct proc *p;
 1725         struct thread *td;
 1726 
 1727         td = curthread;
 1728         if (td->td_tid == tid) {
 1729                 if (pid != -1 && td->td_proc->p_pid != pid)
 1730                         return (NULL);
 1731                 PROC_LOCK(td->td_proc);
 1732                 return (td);
 1733         }
 1734 
 1735         for (;;) {
 1736                 if (!tdfind_hash(tid, pid, &p, &td))
 1737                         return (NULL);
 1738                 PROC_LOCK(p);
 1739                 if (td->td_tid != tid) {
 1740                         PROC_UNLOCK(p);
 1741                         continue;
 1742                 }
 1743                 if (td->td_proc != p) {
 1744                         PROC_UNLOCK(p);
 1745                         continue;
 1746                 }
 1747                 if (p->p_state == PRS_NEW) {
 1748                         PROC_UNLOCK(p);
 1749                         return (NULL);
 1750                 }
 1751                 return (td);
 1752         }
 1753 }
 1754 
 1755 void
 1756 tidhash_add(struct thread *td)
 1757 {
 1758         rw_wlock(TIDHASHLOCK(td->td_tid));
 1759         LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
 1760         rw_wunlock(TIDHASHLOCK(td->td_tid));
 1761 }
 1762 
 1763 void
 1764 tidhash_remove(struct thread *td)
 1765 {
 1766 
 1767         rw_wlock(TIDHASHLOCK(td->td_tid));
 1768         LIST_REMOVE(td, td_hash);
 1769         rw_wunlock(TIDHASHLOCK(td->td_tid));
 1770 }

Cache object: c58fd6aec028790e6be5207339bbbefd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.