The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_thread.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
    5  *  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice(s), this list of conditions and the following disclaimer as
   12  *    the first lines of this file unmodified other than the possible
   13  *    addition of one or more copyright notices.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice(s), this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
   19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   21  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
   22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   25  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
   28  * DAMAGE.
   29  */
   30 
   31 #include "opt_witness.h"
   32 #include "opt_hwpmc_hooks.h"
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/kernel.h>
   40 #include <sys/lock.h>
   41 #include <sys/mutex.h>
   42 #include <sys/proc.h>
   43 #include <sys/bitstring.h>
   44 #include <sys/epoch.h>
   45 #include <sys/rangelock.h>
   46 #include <sys/resourcevar.h>
   47 #include <sys/sdt.h>
   48 #include <sys/smp.h>
   49 #include <sys/sched.h>
   50 #include <sys/sleepqueue.h>
   51 #include <sys/selinfo.h>
   52 #include <sys/syscallsubr.h>
   53 #include <sys/dtrace_bsd.h>
   54 #include <sys/sysent.h>
   55 #include <sys/turnstile.h>
   56 #include <sys/taskqueue.h>
   57 #include <sys/ktr.h>
   58 #include <sys/rwlock.h>
   59 #include <sys/umtx.h>
   60 #include <sys/vmmeter.h>
   61 #include <sys/cpuset.h>
   62 #ifdef  HWPMC_HOOKS
   63 #include <sys/pmckern.h>
   64 #endif
   65 #include <sys/priv.h>
   66 
   67 #include <security/audit/audit.h>
   68 
   69 #include <vm/pmap.h>
   70 #include <vm/vm.h>
   71 #include <vm/vm_extern.h>
   72 #include <vm/uma.h>
   73 #include <vm/vm_phys.h>
   74 #include <sys/eventhandler.h>
   75 
   76 /*
   77  * Asserts below verify the stability of struct thread and struct proc
   78  * layout, as exposed by KBI to modules.  On head, the KBI is allowed
   79  * to drift, change to the structures must be accompanied by the
   80  * assert update.
   81  *
   82  * On the stable branches after KBI freeze, conditions must not be
   83  * violated.  Typically new fields are moved to the end of the
   84  * structures.
   85  */
   86 #ifdef __amd64__
   87 _Static_assert(offsetof(struct thread, td_flags) == 0xfc,
   88     "struct thread KBI td_flags");
   89 _Static_assert(offsetof(struct thread, td_pflags) == 0x104,
   90     "struct thread KBI td_pflags");
   91 _Static_assert(offsetof(struct thread, td_frame) == 0x4a0,
   92     "struct thread KBI td_frame");
   93 _Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
   94     "struct thread KBI td_emuldata");
   95 _Static_assert(offsetof(struct proc, p_flag) == 0xb8,
   96     "struct proc KBI p_flag");
   97 _Static_assert(offsetof(struct proc, p_pid) == 0xc4,
   98     "struct proc KBI p_pid");
   99 _Static_assert(offsetof(struct proc, p_filemon) == 0x3c0,
  100     "struct proc KBI p_filemon");
  101 _Static_assert(offsetof(struct proc, p_comm) == 0x3d8,
  102     "struct proc KBI p_comm");
  103 _Static_assert(offsetof(struct proc, p_emuldata) == 0x4b8,
  104     "struct proc KBI p_emuldata");
  105 #endif
  106 #ifdef __i386__
  107 _Static_assert(offsetof(struct thread, td_flags) == 0x98,
  108     "struct thread KBI td_flags");
  109 _Static_assert(offsetof(struct thread, td_pflags) == 0xa0,
  110     "struct thread KBI td_pflags");
  111 _Static_assert(offsetof(struct thread, td_frame) == 0x300,
  112     "struct thread KBI td_frame");
  113 _Static_assert(offsetof(struct thread, td_emuldata) == 0x344,
  114     "struct thread KBI td_emuldata");
  115 _Static_assert(offsetof(struct proc, p_flag) == 0x6c,
  116     "struct proc KBI p_flag");
  117 _Static_assert(offsetof(struct proc, p_pid) == 0x78,
  118     "struct proc KBI p_pid");
  119 _Static_assert(offsetof(struct proc, p_filemon) == 0x26c,
  120     "struct proc KBI p_filemon");
  121 _Static_assert(offsetof(struct proc, p_comm) == 0x280,
  122     "struct proc KBI p_comm");
  123 _Static_assert(offsetof(struct proc, p_emuldata) == 0x30c,
  124     "struct proc KBI p_emuldata");
  125 #endif
  126 
  127 SDT_PROVIDER_DECLARE(proc);
  128 SDT_PROBE_DEFINE(proc, , , lwp__exit);
  129 
  130 /*
  131  * thread related storage.
  132  */
  133 static uma_zone_t thread_zone;
  134 
  135 struct thread_domain_data {
  136         struct thread   *tdd_zombies;
  137         int             tdd_reapticks;
  138 } __aligned(CACHE_LINE_SIZE);
  139 
  140 static struct thread_domain_data thread_domain_data[MAXMEMDOM];
  141 
  142 static struct task      thread_reap_task;
  143 static struct callout   thread_reap_callout;
  144 
  145 static void thread_zombie(struct thread *);
  146 static void thread_reap(void);
  147 static void thread_reap_all(void);
  148 static void thread_reap_task_cb(void *, int);
  149 static void thread_reap_callout_cb(void *);
  150 static int thread_unsuspend_one(struct thread *td, struct proc *p,
  151     bool boundary);
  152 static void thread_free_batched(struct thread *td);
  153 
  154 static __exclusive_cache_line struct mtx tid_lock;
  155 static bitstr_t *tid_bitmap;
  156 
  157 static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash");
  158 
  159 static int maxthread;
  160 SYSCTL_INT(_kern, OID_AUTO, maxthread, CTLFLAG_RDTUN,
  161     &maxthread, 0, "Maximum number of threads");
  162 
  163 static __exclusive_cache_line int nthreads;
  164 
  165 static LIST_HEAD(tidhashhead, thread) *tidhashtbl;
  166 static u_long   tidhash;
  167 static u_long   tidhashlock;
  168 static struct   rwlock *tidhashtbl_lock;
  169 #define TIDHASH(tid)            (&tidhashtbl[(tid) & tidhash])
  170 #define TIDHASHLOCK(tid)        (&tidhashtbl_lock[(tid) & tidhashlock])
  171 
  172 EVENTHANDLER_LIST_DEFINE(thread_ctor);
  173 EVENTHANDLER_LIST_DEFINE(thread_dtor);
  174 EVENTHANDLER_LIST_DEFINE(thread_init);
  175 EVENTHANDLER_LIST_DEFINE(thread_fini);
  176 
  177 static bool
  178 thread_count_inc_try(void)
  179 {
  180         int nthreads_new;
  181 
  182         nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1;
  183         if (nthreads_new >= maxthread - 100) {
  184                 if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 ||
  185                     nthreads_new >= maxthread) {
  186                         atomic_subtract_int(&nthreads, 1);
  187                         return (false);
  188                 }
  189         }
  190         return (true);
  191 }
  192 
  193 static bool
  194 thread_count_inc(void)
  195 {
  196         static struct timeval lastfail;
  197         static int curfail;
  198 
  199         thread_reap();
  200         if (thread_count_inc_try()) {
  201                 return (true);
  202         }
  203 
  204         thread_reap_all();
  205         if (thread_count_inc_try()) {
  206                 return (true);
  207         }
  208 
  209         if (ppsratecheck(&lastfail, &curfail, 1)) {
  210                 printf("maxthread limit exceeded by uid %u "
  211                     "(pid %d); consider increasing kern.maxthread\n",
  212                     curthread->td_ucred->cr_ruid, curproc->p_pid);
  213         }
  214         return (false);
  215 }
  216 
  217 static void
  218 thread_count_sub(int n)
  219 {
  220 
  221         atomic_subtract_int(&nthreads, n);
  222 }
  223 
  224 static void
  225 thread_count_dec(void)
  226 {
  227 
  228         thread_count_sub(1);
  229 }
  230 
  231 static lwpid_t
  232 tid_alloc(void)
  233 {
  234         static lwpid_t trytid;
  235         lwpid_t tid;
  236 
  237         mtx_lock(&tid_lock);
  238         /*
  239          * It is an invariant that the bitmap is big enough to hold maxthread
  240          * IDs. If we got to this point there has to be at least one free.
  241          */
  242         if (trytid >= maxthread)
  243                 trytid = 0;
  244         bit_ffc_at(tid_bitmap, trytid, maxthread, &tid);
  245         if (tid == -1) {
  246                 KASSERT(trytid != 0, ("unexpectedly ran out of IDs"));
  247                 trytid = 0;
  248                 bit_ffc_at(tid_bitmap, trytid, maxthread, &tid);
  249                 KASSERT(tid != -1, ("unexpectedly ran out of IDs"));
  250         }
  251         bit_set(tid_bitmap, tid);
  252         trytid = tid + 1;
  253         mtx_unlock(&tid_lock);
  254         return (tid + NO_PID);
  255 }
  256 
  257 static void
  258 tid_free_locked(lwpid_t rtid)
  259 {
  260         lwpid_t tid;
  261 
  262         mtx_assert(&tid_lock, MA_OWNED);
  263         KASSERT(rtid >= NO_PID,
  264             ("%s: invalid tid %d\n", __func__, rtid));
  265         tid = rtid - NO_PID;
  266         KASSERT(bit_test(tid_bitmap, tid) != 0,
  267             ("thread ID %d not allocated\n", rtid));
  268         bit_clear(tid_bitmap, tid);
  269 }
  270 
  271 static void
  272 tid_free(lwpid_t rtid)
  273 {
  274 
  275         mtx_lock(&tid_lock);
  276         tid_free_locked(rtid);
  277         mtx_unlock(&tid_lock);
  278 }
  279 
  280 static void
  281 tid_free_batch(lwpid_t *batch, int n)
  282 {
  283         int i;
  284 
  285         mtx_lock(&tid_lock);
  286         for (i = 0; i < n; i++) {
  287                 tid_free_locked(batch[i]);
  288         }
  289         mtx_unlock(&tid_lock);
  290 }
  291 
  292 /*
  293  * Batching for thread reapping.
  294  */
  295 struct tidbatch {
  296         lwpid_t tab[16];
  297         int n;
  298 };
  299 
  300 static void
  301 tidbatch_prep(struct tidbatch *tb)
  302 {
  303 
  304         tb->n = 0;
  305 }
  306 
  307 static void
  308 tidbatch_add(struct tidbatch *tb, struct thread *td)
  309 {
  310 
  311         KASSERT(tb->n < nitems(tb->tab),
  312             ("%s: count too high %d", __func__, tb->n));
  313         tb->tab[tb->n] = td->td_tid;
  314         tb->n++;
  315 }
  316 
  317 static void
  318 tidbatch_process(struct tidbatch *tb)
  319 {
  320 
  321         KASSERT(tb->n <= nitems(tb->tab),
  322             ("%s: count too high %d", __func__, tb->n));
  323         if (tb->n == nitems(tb->tab)) {
  324                 tid_free_batch(tb->tab, tb->n);
  325                 tb->n = 0;
  326         }
  327 }
  328 
  329 static void
  330 tidbatch_final(struct tidbatch *tb)
  331 {
  332 
  333         KASSERT(tb->n <= nitems(tb->tab),
  334             ("%s: count too high %d", __func__, tb->n));
  335         if (tb->n != 0) {
  336                 tid_free_batch(tb->tab, tb->n);
  337         }
  338 }
  339 
  340 /*
  341  * Prepare a thread for use.
  342  */
  343 static int
  344 thread_ctor(void *mem, int size, void *arg, int flags)
  345 {
  346         struct thread   *td;
  347 
  348         td = (struct thread *)mem;
  349         td->td_state = TDS_INACTIVE;
  350         td->td_lastcpu = td->td_oncpu = NOCPU;
  351 
  352         /*
  353          * Note that td_critnest begins life as 1 because the thread is not
  354          * running and is thereby implicitly waiting to be on the receiving
  355          * end of a context switch.
  356          */
  357         td->td_critnest = 1;
  358         td->td_lend_user_pri = PRI_MAX;
  359 #ifdef AUDIT
  360         audit_thread_alloc(td);
  361 #endif
  362 #ifdef KDTRACE_HOOKS
  363         kdtrace_thread_ctor(td);
  364 #endif
  365         umtx_thread_alloc(td);
  366         MPASS(td->td_sel == NULL);
  367         return (0);
  368 }
  369 
  370 /*
  371  * Reclaim a thread after use.
  372  */
  373 static void
  374 thread_dtor(void *mem, int size, void *arg)
  375 {
  376         struct thread *td;
  377 
  378         td = (struct thread *)mem;
  379 
  380 #ifdef INVARIANTS
  381         /* Verify that this thread is in a safe state to free. */
  382         switch (td->td_state) {
  383         case TDS_INHIBITED:
  384         case TDS_RUNNING:
  385         case TDS_CAN_RUN:
  386         case TDS_RUNQ:
  387                 /*
  388                  * We must never unlink a thread that is in one of
  389                  * these states, because it is currently active.
  390                  */
  391                 panic("bad state for thread unlinking");
  392                 /* NOTREACHED */
  393         case TDS_INACTIVE:
  394                 break;
  395         default:
  396                 panic("bad thread state");
  397                 /* NOTREACHED */
  398         }
  399 #endif
  400 #ifdef AUDIT
  401         audit_thread_free(td);
  402 #endif
  403 #ifdef KDTRACE_HOOKS
  404         kdtrace_thread_dtor(td);
  405 #endif
  406         /* Free all OSD associated to this thread. */
  407         osd_thread_exit(td);
  408         td_softdep_cleanup(td);
  409         MPASS(td->td_su == NULL);
  410         seltdfini(td);
  411 }
  412 
  413 /*
  414  * Initialize type-stable parts of a thread (when newly created).
  415  */
  416 static int
  417 thread_init(void *mem, int size, int flags)
  418 {
  419         struct thread *td;
  420 
  421         td = (struct thread *)mem;
  422 
  423         td->td_allocdomain = vm_phys_domain(vtophys(td));
  424         td->td_sleepqueue = sleepq_alloc();
  425         td->td_turnstile = turnstile_alloc();
  426         td->td_rlqe = NULL;
  427         EVENTHANDLER_DIRECT_INVOKE(thread_init, td);
  428         umtx_thread_init(td);
  429         td->td_kstack = 0;
  430         td->td_sel = NULL;
  431         return (0);
  432 }
  433 
  434 /*
  435  * Tear down type-stable parts of a thread (just before being discarded).
  436  */
  437 static void
  438 thread_fini(void *mem, int size)
  439 {
  440         struct thread *td;
  441 
  442         td = (struct thread *)mem;
  443         EVENTHANDLER_DIRECT_INVOKE(thread_fini, td);
  444         rlqentry_free(td->td_rlqe);
  445         turnstile_free(td->td_turnstile);
  446         sleepq_free(td->td_sleepqueue);
  447         umtx_thread_fini(td);
  448         MPASS(td->td_sel == NULL);
  449 }
  450 
  451 /*
  452  * For a newly created process,
  453  * link up all the structures and its initial threads etc.
  454  * called from:
  455  * {arch}/{arch}/machdep.c   {arch}_init(), init386() etc.
  456  * proc_dtor() (should go away)
  457  * proc_init()
  458  */
  459 void
  460 proc_linkup0(struct proc *p, struct thread *td)
  461 {
  462         TAILQ_INIT(&p->p_threads);           /* all threads in proc */
  463         proc_linkup(p, td);
  464 }
  465 
  466 void
  467 proc_linkup(struct proc *p, struct thread *td)
  468 {
  469 
  470         sigqueue_init(&p->p_sigqueue, p);
  471         p->p_ksi = ksiginfo_alloc(1);
  472         if (p->p_ksi != NULL) {
  473                 /* XXX p_ksi may be null if ksiginfo zone is not ready */
  474                 p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
  475         }
  476         LIST_INIT(&p->p_mqnotifier);
  477         p->p_numthreads = 0;
  478         thread_link(td, p);
  479 }
  480 
  481 extern int max_threads_per_proc;
  482 
  483 /*
  484  * Initialize global thread allocation resources.
  485  */
  486 void
  487 threadinit(void)
  488 {
  489         u_long i;
  490         lwpid_t tid0;
  491         uint32_t flags;
  492 
  493         /*
  494          * Place an upper limit on threads which can be allocated.
  495          *
  496          * Note that other factors may make the de facto limit much lower.
  497          *
  498          * Platform limits are somewhat arbitrary but deemed "more than good
  499          * enough" for the foreseable future.
  500          */
  501         if (maxthread == 0) {
  502 #ifdef _LP64
  503                 maxthread = MIN(maxproc * max_threads_per_proc, 1000000);
  504 #else
  505                 maxthread = MIN(maxproc * max_threads_per_proc, 100000);
  506 #endif
  507         }
  508 
  509         mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
  510         tid_bitmap = bit_alloc(maxthread, M_TIDHASH, M_WAITOK);
  511         /*
  512          * Handle thread0.
  513          */
  514         thread_count_inc();
  515         tid0 = tid_alloc();
  516         if (tid0 != THREAD0_TID)
  517                 panic("tid0 %d != %d\n", tid0, THREAD0_TID);
  518 
  519         flags = UMA_ZONE_NOFREE;
  520 #ifdef __aarch64__
  521         /*
  522          * Force thread structures to be allocated from the direct map.
  523          * Otherwise, superpage promotions and demotions may temporarily
  524          * invalidate thread structure mappings.  For most dynamically allocated
  525          * structures this is not a problem, but translation faults cannot be
  526          * handled without accessing curthread.
  527          */
  528         flags |= UMA_ZONE_CONTIG;
  529 #endif
  530         thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
  531             thread_ctor, thread_dtor, thread_init, thread_fini,
  532             32 - 1, flags);
  533         tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash);
  534         tidhashlock = (tidhash + 1) / 64;
  535         if (tidhashlock > 0)
  536                 tidhashlock--;
  537         tidhashtbl_lock = malloc(sizeof(*tidhashtbl_lock) * (tidhashlock + 1),
  538             M_TIDHASH, M_WAITOK | M_ZERO);
  539         for (i = 0; i < tidhashlock + 1; i++)
  540                 rw_init(&tidhashtbl_lock[i], "tidhash");
  541 
  542         TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL);
  543         callout_init(&thread_reap_callout, 1);
  544         callout_reset(&thread_reap_callout, 5 * hz,
  545             thread_reap_callout_cb, NULL);
  546 }
  547 
  548 /*
  549  * Place an unused thread on the zombie list.
  550  */
  551 void
  552 thread_zombie(struct thread *td)
  553 {
  554         struct thread_domain_data *tdd;
  555         struct thread *ztd;
  556 
  557         tdd = &thread_domain_data[td->td_allocdomain];
  558         ztd = atomic_load_ptr(&tdd->tdd_zombies);
  559         for (;;) {
  560                 td->td_zombie = ztd;
  561                 if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies,
  562                     (uintptr_t *)&ztd, (uintptr_t)td))
  563                         break;
  564                 continue;
  565         }
  566 }
  567 
  568 /*
  569  * Release a thread that has exited after cpu_throw().
  570  */
  571 void
  572 thread_stash(struct thread *td)
  573 {
  574         atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
  575         thread_zombie(td);
  576 }
  577 
  578 /*
  579  * Reap zombies from passed domain.
  580  */
  581 static void
  582 thread_reap_domain(struct thread_domain_data *tdd)
  583 {
  584         struct thread *itd, *ntd;
  585         struct tidbatch tidbatch;
  586         struct credbatch credbatch;
  587         int tdcount;
  588         struct plimit *lim;
  589         int limcount;
  590 
  591         /*
  592          * Reading upfront is pessimal if followed by concurrent atomic_swap,
  593          * but most of the time the list is empty.
  594          */
  595         if (tdd->tdd_zombies == NULL)
  596                 return;
  597 
  598         itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies,
  599             (uintptr_t)NULL);
  600         if (itd == NULL)
  601                 return;
  602 
  603         /*
  604          * Multiple CPUs can get here, the race is fine as ticks is only
  605          * advisory.
  606          */
  607         tdd->tdd_reapticks = ticks;
  608 
  609         tidbatch_prep(&tidbatch);
  610         credbatch_prep(&credbatch);
  611         tdcount = 0;
  612         lim = NULL;
  613         limcount = 0;
  614 
  615         while (itd != NULL) {
  616                 ntd = itd->td_zombie;
  617                 EVENTHANDLER_DIRECT_INVOKE(thread_dtor, itd);
  618                 tidbatch_add(&tidbatch, itd);
  619                 credbatch_add(&credbatch, itd);
  620                 MPASS(itd->td_limit != NULL);
  621                 if (lim != itd->td_limit) {
  622                         if (limcount != 0) {
  623                                 lim_freen(lim, limcount);
  624                                 limcount = 0;
  625                         }
  626                 }
  627                 lim = itd->td_limit;
  628                 limcount++;
  629                 thread_free_batched(itd);
  630                 tidbatch_process(&tidbatch);
  631                 credbatch_process(&credbatch);
  632                 tdcount++;
  633                 if (tdcount == 32) {
  634                         thread_count_sub(tdcount);
  635                         tdcount = 0;
  636                 }
  637                 itd = ntd;
  638         }
  639 
  640         tidbatch_final(&tidbatch);
  641         credbatch_final(&credbatch);
  642         if (tdcount != 0) {
  643                 thread_count_sub(tdcount);
  644         }
  645         MPASS(limcount != 0);
  646         lim_freen(lim, limcount);
  647 }
  648 
  649 /*
  650  * Reap zombies from all domains.
  651  */
  652 static void
  653 thread_reap_all(void)
  654 {
  655         struct thread_domain_data *tdd;
  656         int i, domain;
  657 
  658         domain = PCPU_GET(domain);
  659         for (i = 0; i < vm_ndomains; i++) {
  660                 tdd = &thread_domain_data[(i + domain) % vm_ndomains];
  661                 thread_reap_domain(tdd);
  662         }
  663 }
  664 
  665 /*
  666  * Reap zombies from local domain.
  667  */
  668 static void
  669 thread_reap(void)
  670 {
  671         struct thread_domain_data *tdd;
  672         int domain;
  673 
  674         domain = PCPU_GET(domain);
  675         tdd = &thread_domain_data[domain];
  676 
  677         thread_reap_domain(tdd);
  678 }
  679 
  680 static void
  681 thread_reap_task_cb(void *arg __unused, int pending __unused)
  682 {
  683 
  684         thread_reap_all();
  685 }
  686 
  687 static void
  688 thread_reap_callout_cb(void *arg __unused)
  689 {
  690         struct thread_domain_data *tdd;
  691         int i, cticks, lticks;
  692         bool wantreap;
  693 
  694         wantreap = false;
  695         cticks = atomic_load_int(&ticks);
  696         for (i = 0; i < vm_ndomains; i++) {
  697                 tdd = &thread_domain_data[i];
  698                 lticks = tdd->tdd_reapticks;
  699                 if (tdd->tdd_zombies != NULL &&
  700                     (u_int)(cticks - lticks) > 5 * hz) {
  701                         wantreap = true;
  702                         break;
  703                 }
  704         }
  705 
  706         if (wantreap)
  707                 taskqueue_enqueue(taskqueue_thread, &thread_reap_task);
  708         callout_reset(&thread_reap_callout, 5 * hz,
  709             thread_reap_callout_cb, NULL);
  710 }
  711 
  712 /*
  713  * Calling this function guarantees that any thread that exited before
  714  * the call is reaped when the function returns.  By 'exited' we mean
  715  * a thread removed from the process linkage with thread_unlink().
  716  * Practically this means that caller must lock/unlock corresponding
  717  * process lock before the call, to synchronize with thread_exit().
  718  */
  719 void
  720 thread_reap_barrier(void)
  721 {
  722         struct task *t;
  723 
  724         /*
  725          * First do context switches to each CPU to ensure that all
  726          * PCPU pc_deadthreads are moved to zombie list.
  727          */
  728         quiesce_all_cpus("", PDROP);
  729 
  730         /*
  731          * Second, fire the task in the same thread as normal
  732          * thread_reap() is done, to serialize reaping.
  733          */
  734         t = malloc(sizeof(*t), M_TEMP, M_WAITOK);
  735         TASK_INIT(t, 0, thread_reap_task_cb, t);
  736         taskqueue_enqueue(taskqueue_thread, t);
  737         taskqueue_drain(taskqueue_thread, t);
  738         free(t, M_TEMP);
  739 }
  740 
  741 /*
  742  * Allocate a thread.
  743  */
  744 struct thread *
  745 thread_alloc(int pages)
  746 {
  747         struct thread *td;
  748         lwpid_t tid;
  749 
  750         if (!thread_count_inc()) {
  751                 return (NULL);
  752         }
  753 
  754         tid = tid_alloc();
  755         td = uma_zalloc(thread_zone, M_WAITOK);
  756         KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
  757         if (!vm_thread_new(td, pages)) {
  758                 uma_zfree(thread_zone, td);
  759                 tid_free(tid);
  760                 thread_count_dec();
  761                 return (NULL);
  762         }
  763         td->td_tid = tid;
  764         cpu_thread_alloc(td);
  765         EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td);
  766         return (td);
  767 }
  768 
  769 int
  770 thread_alloc_stack(struct thread *td, int pages)
  771 {
  772 
  773         KASSERT(td->td_kstack == 0,
  774             ("thread_alloc_stack called on a thread with kstack"));
  775         if (!vm_thread_new(td, pages))
  776                 return (0);
  777         cpu_thread_alloc(td);
  778         return (1);
  779 }
  780 
  781 /*
  782  * Deallocate a thread.
  783  */
  784 static void
  785 thread_free_batched(struct thread *td)
  786 {
  787 
  788         lock_profile_thread_exit(td);
  789         if (td->td_cpuset)
  790                 cpuset_rel(td->td_cpuset);
  791         td->td_cpuset = NULL;
  792         cpu_thread_free(td);
  793         if (td->td_kstack != 0)
  794                 vm_thread_dispose(td);
  795         callout_drain(&td->td_slpcallout);
  796         /*
  797          * Freeing handled by the caller.
  798          */
  799         td->td_tid = -1;
  800         uma_zfree(thread_zone, td);
  801 }
  802 
  803 void
  804 thread_free(struct thread *td)
  805 {
  806         lwpid_t tid;
  807 
  808         EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td);
  809         tid = td->td_tid;
  810         thread_free_batched(td);
  811         tid_free(tid);
  812         thread_count_dec();
  813 }
  814 
  815 void
  816 thread_cow_get_proc(struct thread *newtd, struct proc *p)
  817 {
  818 
  819         PROC_LOCK_ASSERT(p, MA_OWNED);
  820         newtd->td_realucred = crcowget(p->p_ucred);
  821         newtd->td_ucred = newtd->td_realucred;
  822         newtd->td_limit = lim_hold(p->p_limit);
  823         newtd->td_cowgen = p->p_cowgen;
  824 }
  825 
  826 void
  827 thread_cow_get(struct thread *newtd, struct thread *td)
  828 {
  829 
  830         MPASS(td->td_realucred == td->td_ucred);
  831         newtd->td_realucred = crcowget(td->td_realucred);
  832         newtd->td_ucred = newtd->td_realucred;
  833         newtd->td_limit = lim_hold(td->td_limit);
  834         newtd->td_cowgen = td->td_cowgen;
  835 }
  836 
  837 void
  838 thread_cow_free(struct thread *td)
  839 {
  840 
  841         if (td->td_realucred != NULL)
  842                 crcowfree(td);
  843         if (td->td_limit != NULL)
  844                 lim_free(td->td_limit);
  845 }
  846 
  847 void
  848 thread_cow_update(struct thread *td)
  849 {
  850         struct proc *p;
  851         struct ucred *oldcred;
  852         struct plimit *oldlimit;
  853 
  854         p = td->td_proc;
  855         oldlimit = NULL;
  856         PROC_LOCK(p);
  857         oldcred = crcowsync();
  858         if (td->td_limit != p->p_limit) {
  859                 oldlimit = td->td_limit;
  860                 td->td_limit = lim_hold(p->p_limit);
  861         }
  862         td->td_cowgen = p->p_cowgen;
  863         PROC_UNLOCK(p);
  864         if (oldcred != NULL)
  865                 crfree(oldcred);
  866         if (oldlimit != NULL)
  867                 lim_free(oldlimit);
  868 }
  869 
  870 /*
  871  * Discard the current thread and exit from its context.
  872  * Always called with scheduler locked.
  873  *
  874  * Because we can't free a thread while we're operating under its context,
  875  * push the current thread into our CPU's deadthread holder. This means
  876  * we needn't worry about someone else grabbing our context before we
  877  * do a cpu_throw().
  878  */
  879 void
  880 thread_exit(void)
  881 {
  882         uint64_t runtime, new_switchtime;
  883         struct thread *td;
  884         struct thread *td2;
  885         struct proc *p;
  886         int wakeup_swapper;
  887 
  888         td = curthread;
  889         p = td->td_proc;
  890 
  891         PROC_SLOCK_ASSERT(p, MA_OWNED);
  892         mtx_assert(&Giant, MA_NOTOWNED);
  893 
  894         PROC_LOCK_ASSERT(p, MA_OWNED);
  895         KASSERT(p != NULL, ("thread exiting without a process"));
  896         CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
  897             (long)p->p_pid, td->td_name);
  898         SDT_PROBE0(proc, , , lwp__exit);
  899         KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
  900         MPASS(td->td_realucred == td->td_ucred);
  901 
  902         /*
  903          * drop FPU & debug register state storage, or any other
  904          * architecture specific resources that
  905          * would not be on a new untouched process.
  906          */
  907         cpu_thread_exit(td);
  908 
  909         /*
  910          * The last thread is left attached to the process
  911          * So that the whole bundle gets recycled. Skip
  912          * all this stuff if we never had threads.
  913          * EXIT clears all sign of other threads when
  914          * it goes to single threading, so the last thread always
  915          * takes the short path.
  916          */
  917         if (p->p_flag & P_HADTHREADS) {
  918                 if (p->p_numthreads > 1) {
  919                         atomic_add_int(&td->td_proc->p_exitthreads, 1);
  920                         thread_unlink(td);
  921                         td2 = FIRST_THREAD_IN_PROC(p);
  922                         sched_exit_thread(td2, td);
  923 
  924                         /*
  925                          * The test below is NOT true if we are the
  926                          * sole exiting thread. P_STOPPED_SINGLE is unset
  927                          * in exit1() after it is the only survivor.
  928                          */
  929                         if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
  930                                 if (p->p_numthreads == p->p_suspcount) {
  931                                         thread_lock(p->p_singlethread);
  932                                         wakeup_swapper = thread_unsuspend_one(
  933                                                 p->p_singlethread, p, false);
  934                                         if (wakeup_swapper)
  935                                                 kick_proc0();
  936                                 }
  937                         }
  938 
  939                         PCPU_SET(deadthread, td);
  940                 } else {
  941                         /*
  942                          * The last thread is exiting.. but not through exit()
  943                          */
  944                         panic ("thread_exit: Last thread exiting on its own");
  945                 }
  946         } 
  947 #ifdef  HWPMC_HOOKS
  948         /*
  949          * If this thread is part of a process that is being tracked by hwpmc(4),
  950          * inform the module of the thread's impending exit.
  951          */
  952         if (PMC_PROC_IS_USING_PMCS(td->td_proc)) {
  953                 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
  954                 PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT, NULL);
  955         } else if (PMC_SYSTEM_SAMPLING_ACTIVE())
  956                 PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT_LOG, NULL);
  957 #endif
  958         PROC_UNLOCK(p);
  959         PROC_STATLOCK(p);
  960         thread_lock(td);
  961         PROC_SUNLOCK(p);
  962 
  963         /* Do the same timestamp bookkeeping that mi_switch() would do. */
  964         new_switchtime = cpu_ticks();
  965         runtime = new_switchtime - PCPU_GET(switchtime);
  966         td->td_runtime += runtime;
  967         td->td_incruntime += runtime;
  968         PCPU_SET(switchtime, new_switchtime);
  969         PCPU_SET(switchticks, ticks);
  970         VM_CNT_INC(v_swtch);
  971 
  972         /* Save our resource usage in our process. */
  973         td->td_ru.ru_nvcsw++;
  974         ruxagg_locked(p, td);
  975         rucollect(&p->p_ru, &td->td_ru);
  976         PROC_STATUNLOCK(p);
  977 
  978         td->td_state = TDS_INACTIVE;
  979 #ifdef WITNESS
  980         witness_thread_exit(td);
  981 #endif
  982         CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
  983         sched_throw(td);
  984         panic("I'm a teapot!");
  985         /* NOTREACHED */
  986 }
  987 
  988 /*
  989  * Do any thread specific cleanups that may be needed in wait()
  990  * called with Giant, proc and schedlock not held.
  991  */
  992 void
  993 thread_wait(struct proc *p)
  994 {
  995         struct thread *td;
  996 
  997         mtx_assert(&Giant, MA_NOTOWNED);
  998         KASSERT(p->p_numthreads == 1, ("multiple threads in thread_wait()"));
  999         KASSERT(p->p_exitthreads == 0, ("p_exitthreads leaking"));
 1000         td = FIRST_THREAD_IN_PROC(p);
 1001         /* Lock the last thread so we spin until it exits cpu_throw(). */
 1002         thread_lock(td);
 1003         thread_unlock(td);
 1004         lock_profile_thread_exit(td);
 1005         cpuset_rel(td->td_cpuset);
 1006         td->td_cpuset = NULL;
 1007         cpu_thread_clean(td);
 1008         thread_cow_free(td);
 1009         callout_drain(&td->td_slpcallout);
 1010         thread_reap();  /* check for zombie threads etc. */
 1011 }
 1012 
 1013 /*
 1014  * Link a thread to a process.
 1015  * set up anything that needs to be initialized for it to
 1016  * be used by the process.
 1017  */
 1018 void
 1019 thread_link(struct thread *td, struct proc *p)
 1020 {
 1021 
 1022         /*
 1023          * XXX This can't be enabled because it's called for proc0 before
 1024          * its lock has been created.
 1025          * PROC_LOCK_ASSERT(p, MA_OWNED);
 1026          */
 1027         td->td_state    = TDS_INACTIVE;
 1028         td->td_proc     = p;
 1029         td->td_flags    = TDF_INMEM;
 1030 
 1031         LIST_INIT(&td->td_contested);
 1032         LIST_INIT(&td->td_lprof[0]);
 1033         LIST_INIT(&td->td_lprof[1]);
 1034 #ifdef EPOCH_TRACE
 1035         SLIST_INIT(&td->td_epochs);
 1036 #endif
 1037         sigqueue_init(&td->td_sigqueue, p);
 1038         callout_init(&td->td_slpcallout, 1);
 1039         TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist);
 1040         p->p_numthreads++;
 1041 }
 1042 
 1043 /*
 1044  * Called from:
 1045  *  thread_exit()
 1046  */
 1047 void
 1048 thread_unlink(struct thread *td)
 1049 {
 1050         struct proc *p = td->td_proc;
 1051 
 1052         PROC_LOCK_ASSERT(p, MA_OWNED);
 1053 #ifdef EPOCH_TRACE
 1054         MPASS(SLIST_EMPTY(&td->td_epochs));
 1055 #endif
 1056 
 1057         TAILQ_REMOVE(&p->p_threads, td, td_plist);
 1058         p->p_numthreads--;
 1059         /* could clear a few other things here */
 1060         /* Must  NOT clear links to proc! */
 1061 }
 1062 
 1063 static int
 1064 calc_remaining(struct proc *p, int mode)
 1065 {
 1066         int remaining;
 1067 
 1068         PROC_LOCK_ASSERT(p, MA_OWNED);
 1069         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1070         if (mode == SINGLE_EXIT)
 1071                 remaining = p->p_numthreads;
 1072         else if (mode == SINGLE_BOUNDARY)
 1073                 remaining = p->p_numthreads - p->p_boundary_count;
 1074         else if (mode == SINGLE_NO_EXIT || mode == SINGLE_ALLPROC)
 1075                 remaining = p->p_numthreads - p->p_suspcount;
 1076         else
 1077                 panic("calc_remaining: wrong mode %d", mode);
 1078         return (remaining);
 1079 }
 1080 
 1081 static int
 1082 remain_for_mode(int mode)
 1083 {
 1084 
 1085         return (mode == SINGLE_ALLPROC ? 0 : 1);
 1086 }
 1087 
 1088 static int
 1089 weed_inhib(int mode, struct thread *td2, struct proc *p)
 1090 {
 1091         int wakeup_swapper;
 1092 
 1093         PROC_LOCK_ASSERT(p, MA_OWNED);
 1094         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1095         THREAD_LOCK_ASSERT(td2, MA_OWNED);
 1096 
 1097         wakeup_swapper = 0;
 1098 
 1099         /*
 1100          * Since the thread lock is dropped by the scheduler we have
 1101          * to retry to check for races.
 1102          */
 1103 restart:
 1104         switch (mode) {
 1105         case SINGLE_EXIT:
 1106                 if (TD_IS_SUSPENDED(td2)) {
 1107                         wakeup_swapper |= thread_unsuspend_one(td2, p, true);
 1108                         thread_lock(td2);
 1109                         goto restart;
 1110                 }
 1111                 if (TD_CAN_ABORT(td2)) {
 1112                         wakeup_swapper |= sleepq_abort(td2, EINTR);
 1113                         return (wakeup_swapper);
 1114                 }
 1115                 break;
 1116         case SINGLE_BOUNDARY:
 1117         case SINGLE_NO_EXIT:
 1118                 if (TD_IS_SUSPENDED(td2) &&
 1119                     (td2->td_flags & TDF_BOUNDARY) == 0) {
 1120                         wakeup_swapper |= thread_unsuspend_one(td2, p, false);
 1121                         thread_lock(td2);
 1122                         goto restart;
 1123                 }
 1124                 if (TD_CAN_ABORT(td2)) {
 1125                         wakeup_swapper |= sleepq_abort(td2, ERESTART);
 1126                         return (wakeup_swapper);
 1127                 }
 1128                 break;
 1129         case SINGLE_ALLPROC:
 1130                 /*
 1131                  * ALLPROC suspend tries to avoid spurious EINTR for
 1132                  * threads sleeping interruptable, by suspending the
 1133                  * thread directly, similarly to sig_suspend_threads().
 1134                  * Since such sleep is not performed at the user
 1135                  * boundary, TDF_BOUNDARY flag is not set, and TDF_ALLPROCSUSP
 1136                  * is used to avoid immediate un-suspend.
 1137                  */
 1138                 if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY |
 1139                     TDF_ALLPROCSUSP)) == 0) {
 1140                         wakeup_swapper |= thread_unsuspend_one(td2, p, false);
 1141                         thread_lock(td2);
 1142                         goto restart;
 1143                 }
 1144                 if (TD_CAN_ABORT(td2)) {
 1145                         if ((td2->td_flags & TDF_SBDRY) == 0) {
 1146                                 thread_suspend_one(td2);
 1147                                 td2->td_flags |= TDF_ALLPROCSUSP;
 1148                         } else {
 1149                                 wakeup_swapper |= sleepq_abort(td2, ERESTART);
 1150                                 return (wakeup_swapper);
 1151                         }
 1152                 }
 1153                 break;
 1154         default:
 1155                 break;
 1156         }
 1157         thread_unlock(td2);
 1158         return (wakeup_swapper);
 1159 }
 1160 
 1161 /*
 1162  * Enforce single-threading.
 1163  *
 1164  * Returns 1 if the caller must abort (another thread is waiting to
 1165  * exit the process or similar). Process is locked!
 1166  * Returns 0 when you are successfully the only thread running.
 1167  * A process has successfully single threaded in the suspend mode when
 1168  * There are no threads in user mode. Threads in the kernel must be
 1169  * allowed to continue until they get to the user boundary. They may even
 1170  * copy out their return values and data before suspending. They may however be
 1171  * accelerated in reaching the user boundary as we will wake up
 1172  * any sleeping threads that are interruptable. (PCATCH).
 1173  */
 1174 int
 1175 thread_single(struct proc *p, int mode)
 1176 {
 1177         struct thread *td;
 1178         struct thread *td2;
 1179         int remaining, wakeup_swapper;
 1180 
 1181         td = curthread;
 1182         KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
 1183             mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
 1184             ("invalid mode %d", mode));
 1185         /*
 1186          * If allowing non-ALLPROC singlethreading for non-curproc
 1187          * callers, calc_remaining() and remain_for_mode() should be
 1188          * adjusted to also account for td->td_proc != p.  For now
 1189          * this is not implemented because it is not used.
 1190          */
 1191         KASSERT((mode == SINGLE_ALLPROC && td->td_proc != p) ||
 1192             (mode != SINGLE_ALLPROC && td->td_proc == p),
 1193             ("mode %d proc %p curproc %p", mode, p, td->td_proc));
 1194         mtx_assert(&Giant, MA_NOTOWNED);
 1195         PROC_LOCK_ASSERT(p, MA_OWNED);
 1196 
 1197         if ((p->p_flag & P_HADTHREADS) == 0 && mode != SINGLE_ALLPROC)
 1198                 return (0);
 1199 
 1200         /* Is someone already single threading? */
 1201         if (p->p_singlethread != NULL && p->p_singlethread != td)
 1202                 return (1);
 1203 
 1204         if (mode == SINGLE_EXIT) {
 1205                 p->p_flag |= P_SINGLE_EXIT;
 1206                 p->p_flag &= ~P_SINGLE_BOUNDARY;
 1207         } else {
 1208                 p->p_flag &= ~P_SINGLE_EXIT;
 1209                 if (mode == SINGLE_BOUNDARY)
 1210                         p->p_flag |= P_SINGLE_BOUNDARY;
 1211                 else
 1212                         p->p_flag &= ~P_SINGLE_BOUNDARY;
 1213         }
 1214         if (mode == SINGLE_ALLPROC)
 1215                 p->p_flag |= P_TOTAL_STOP;
 1216         p->p_flag |= P_STOPPED_SINGLE;
 1217         PROC_SLOCK(p);
 1218         p->p_singlethread = td;
 1219         remaining = calc_remaining(p, mode);
 1220         while (remaining != remain_for_mode(mode)) {
 1221                 if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
 1222                         goto stopme;
 1223                 wakeup_swapper = 0;
 1224                 FOREACH_THREAD_IN_PROC(p, td2) {
 1225                         if (td2 == td)
 1226                                 continue;
 1227                         thread_lock(td2);
 1228                         td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
 1229                         if (TD_IS_INHIBITED(td2)) {
 1230                                 wakeup_swapper |= weed_inhib(mode, td2, p);
 1231 #ifdef SMP
 1232                         } else if (TD_IS_RUNNING(td2) && td != td2) {
 1233                                 forward_signal(td2);
 1234                                 thread_unlock(td2);
 1235 #endif
 1236                         } else
 1237                                 thread_unlock(td2);
 1238                 }
 1239                 if (wakeup_swapper)
 1240                         kick_proc0();
 1241                 remaining = calc_remaining(p, mode);
 1242 
 1243                 /*
 1244                  * Maybe we suspended some threads.. was it enough?
 1245                  */
 1246                 if (remaining == remain_for_mode(mode))
 1247                         break;
 1248 
 1249 stopme:
 1250                 /*
 1251                  * Wake us up when everyone else has suspended.
 1252                  * In the mean time we suspend as well.
 1253                  */
 1254                 thread_suspend_switch(td, p);
 1255                 remaining = calc_remaining(p, mode);
 1256         }
 1257         if (mode == SINGLE_EXIT) {
 1258                 /*
 1259                  * Convert the process to an unthreaded process.  The
 1260                  * SINGLE_EXIT is called by exit1() or execve(), in
 1261                  * both cases other threads must be retired.
 1262                  */
 1263                 KASSERT(p->p_numthreads == 1, ("Unthreading with >1 threads"));
 1264                 p->p_singlethread = NULL;
 1265                 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_HADTHREADS);
 1266 
 1267                 /*
 1268                  * Wait for any remaining threads to exit cpu_throw().
 1269                  */
 1270                 while (p->p_exitthreads != 0) {
 1271                         PROC_SUNLOCK(p);
 1272                         PROC_UNLOCK(p);
 1273                         sched_relinquish(td);
 1274                         PROC_LOCK(p);
 1275                         PROC_SLOCK(p);
 1276                 }
 1277         } else if (mode == SINGLE_BOUNDARY) {
 1278                 /*
 1279                  * Wait until all suspended threads are removed from
 1280                  * the processors.  The thread_suspend_check()
 1281                  * increments p_boundary_count while it is still
 1282                  * running, which makes it possible for the execve()
 1283                  * to destroy vmspace while our other threads are
 1284                  * still using the address space.
 1285                  *
 1286                  * We lock the thread, which is only allowed to
 1287                  * succeed after context switch code finished using
 1288                  * the address space.
 1289                  */
 1290                 FOREACH_THREAD_IN_PROC(p, td2) {
 1291                         if (td2 == td)
 1292                                 continue;
 1293                         thread_lock(td2);
 1294                         KASSERT((td2->td_flags & TDF_BOUNDARY) != 0,
 1295                             ("td %p not on boundary", td2));
 1296                         KASSERT(TD_IS_SUSPENDED(td2),
 1297                             ("td %p is not suspended", td2));
 1298                         thread_unlock(td2);
 1299                 }
 1300         }
 1301         PROC_SUNLOCK(p);
 1302         return (0);
 1303 }
 1304 
 1305 bool
 1306 thread_suspend_check_needed(void)
 1307 {
 1308         struct proc *p;
 1309         struct thread *td;
 1310 
 1311         td = curthread;
 1312         p = td->td_proc;
 1313         PROC_LOCK_ASSERT(p, MA_OWNED);
 1314         return (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) != 0 &&
 1315             (td->td_dbgflags & TDB_SUSPEND) != 0));
 1316 }
 1317 
 1318 /*
 1319  * Called in from locations that can safely check to see
 1320  * whether we have to suspend or at least throttle for a
 1321  * single-thread event (e.g. fork).
 1322  *
 1323  * Such locations include userret().
 1324  * If the "return_instead" argument is non zero, the thread must be able to
 1325  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
 1326  *
 1327  * The 'return_instead' argument tells the function if it may do a
 1328  * thread_exit() or suspend, or whether the caller must abort and back
 1329  * out instead.
 1330  *
 1331  * If the thread that set the single_threading request has set the
 1332  * P_SINGLE_EXIT bit in the process flags then this call will never return
 1333  * if 'return_instead' is false, but will exit.
 1334  *
 1335  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
 1336  *---------------+--------------------+---------------------
 1337  *       0       | returns 0          |   returns 0 or 1
 1338  *               | when ST ends       |   immediately
 1339  *---------------+--------------------+---------------------
 1340  *       1       | thread exits       |   returns 1
 1341  *               |                    |  immediately
 1342  * 0 = thread_exit() or suspension ok,
 1343  * other = return error instead of stopping the thread.
 1344  *
 1345  * While a full suspension is under effect, even a single threading
 1346  * thread would be suspended if it made this call (but it shouldn't).
 1347  * This call should only be made from places where
 1348  * thread_exit() would be safe as that may be the outcome unless
 1349  * return_instead is set.
 1350  */
 1351 int
 1352 thread_suspend_check(int return_instead)
 1353 {
 1354         struct thread *td;
 1355         struct proc *p;
 1356         int wakeup_swapper;
 1357 
 1358         td = curthread;
 1359         p = td->td_proc;
 1360         mtx_assert(&Giant, MA_NOTOWNED);
 1361         PROC_LOCK_ASSERT(p, MA_OWNED);
 1362         while (thread_suspend_check_needed()) {
 1363                 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 1364                         KASSERT(p->p_singlethread != NULL,
 1365                             ("singlethread not set"));
 1366                         /*
 1367                          * The only suspension in action is a
 1368                          * single-threading. Single threader need not stop.
 1369                          * It is safe to access p->p_singlethread unlocked
 1370                          * because it can only be set to our address by us.
 1371                          */
 1372                         if (p->p_singlethread == td)
 1373                                 return (0);     /* Exempt from stopping. */
 1374                 }
 1375                 if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
 1376                         return (EINTR);
 1377 
 1378                 /* Should we goto user boundary if we didn't come from there? */
 1379                 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
 1380                     (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
 1381                         return (ERESTART);
 1382 
 1383                 /*
 1384                  * Ignore suspend requests if they are deferred.
 1385                  */
 1386                 if ((td->td_flags & TDF_SBDRY) != 0) {
 1387                         KASSERT(return_instead,
 1388                             ("TDF_SBDRY set for unsafe thread_suspend_check"));
 1389                         KASSERT((td->td_flags & (TDF_SEINTR | TDF_SERESTART)) !=
 1390                             (TDF_SEINTR | TDF_SERESTART),
 1391                             ("both TDF_SEINTR and TDF_SERESTART"));
 1392                         return (TD_SBDRY_INTR(td) ? TD_SBDRY_ERRNO(td) : 0);
 1393                 }
 1394 
 1395                 /*
 1396                  * If the process is waiting for us to exit,
 1397                  * this thread should just suicide.
 1398                  * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
 1399                  */
 1400                 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
 1401                         PROC_UNLOCK(p);
 1402 
 1403                         /*
 1404                          * Allow Linux emulation layer to do some work
 1405                          * before thread suicide.
 1406                          */
 1407                         if (__predict_false(p->p_sysent->sv_thread_detach != NULL))
 1408                                 (p->p_sysent->sv_thread_detach)(td);
 1409                         umtx_thread_exit(td);
 1410                         kern_thr_exit(td);
 1411                         panic("stopped thread did not exit");
 1412                 }
 1413 
 1414                 PROC_SLOCK(p);
 1415                 thread_stopped(p);
 1416                 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 1417                         if (p->p_numthreads == p->p_suspcount + 1) {
 1418                                 thread_lock(p->p_singlethread);
 1419                                 wakeup_swapper = thread_unsuspend_one(
 1420                                     p->p_singlethread, p, false);
 1421                                 if (wakeup_swapper)
 1422                                         kick_proc0();
 1423                         }
 1424                 }
 1425                 PROC_UNLOCK(p);
 1426                 thread_lock(td);
 1427                 /*
 1428                  * When a thread suspends, it just
 1429                  * gets taken off all queues.
 1430                  */
 1431                 thread_suspend_one(td);
 1432                 if (return_instead == 0) {
 1433                         p->p_boundary_count++;
 1434                         td->td_flags |= TDF_BOUNDARY;
 1435                 }
 1436                 PROC_SUNLOCK(p);
 1437                 mi_switch(SW_INVOL | SWT_SUSPEND);
 1438                 PROC_LOCK(p);
 1439         }
 1440         return (0);
 1441 }
 1442 
 1443 /*
 1444  * Check for possible stops and suspensions while executing a
 1445  * casueword or similar transiently failing operation.
 1446  *
 1447  * The sleep argument controls whether the function can handle a stop
 1448  * request itself or it should return ERESTART and the request is
 1449  * proceed at the kernel/user boundary in ast.
 1450  *
 1451  * Typically, when retrying due to casueword(9) failure (rv == 1), we
 1452  * should handle the stop requests there, with exception of cases when
 1453  * the thread owns a kernel resource, for instance busied the umtx
 1454  * key, or when functions return immediately if thread_check_susp()
 1455  * returned non-zero.  On the other hand, retrying the whole lock
 1456  * operation, we better not stop there but delegate the handling to
 1457  * ast.
 1458  *
 1459  * If the request is for thread termination P_SINGLE_EXIT, we cannot
 1460  * handle it at all, and simply return EINTR.
 1461  */
 1462 int
 1463 thread_check_susp(struct thread *td, bool sleep)
 1464 {
 1465         struct proc *p;
 1466         int error;
 1467 
 1468         /*
 1469          * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
 1470          * eventually break the lockstep loop.
 1471          */
 1472         if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
 1473                 return (0);
 1474         error = 0;
 1475         p = td->td_proc;
 1476         PROC_LOCK(p);
 1477         if (p->p_flag & P_SINGLE_EXIT)
 1478                 error = EINTR;
 1479         else if (P_SHOULDSTOP(p) ||
 1480             ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND)))
 1481                 error = sleep ? thread_suspend_check(0) : ERESTART;
 1482         PROC_UNLOCK(p);
 1483         return (error);
 1484 }
 1485 
 1486 void
 1487 thread_suspend_switch(struct thread *td, struct proc *p)
 1488 {
 1489 
 1490         KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
 1491         PROC_LOCK_ASSERT(p, MA_OWNED);
 1492         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1493         /*
 1494          * We implement thread_suspend_one in stages here to avoid
 1495          * dropping the proc lock while the thread lock is owned.
 1496          */
 1497         if (p == td->td_proc) {
 1498                 thread_stopped(p);
 1499                 p->p_suspcount++;
 1500         }
 1501         PROC_UNLOCK(p);
 1502         thread_lock(td);
 1503         td->td_flags &= ~TDF_NEEDSUSPCHK;
 1504         TD_SET_SUSPENDED(td);
 1505         sched_sleep(td, 0);
 1506         PROC_SUNLOCK(p);
 1507         DROP_GIANT();
 1508         mi_switch(SW_VOL | SWT_SUSPEND);
 1509         PICKUP_GIANT();
 1510         PROC_LOCK(p);
 1511         PROC_SLOCK(p);
 1512 }
 1513 
 1514 void
 1515 thread_suspend_one(struct thread *td)
 1516 {
 1517         struct proc *p;
 1518 
 1519         p = td->td_proc;
 1520         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1521         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1522         KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
 1523         p->p_suspcount++;
 1524         td->td_flags &= ~TDF_NEEDSUSPCHK;
 1525         TD_SET_SUSPENDED(td);
 1526         sched_sleep(td, 0);
 1527 }
 1528 
 1529 static int
 1530 thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary)
 1531 {
 1532 
 1533         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1534         KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
 1535         TD_CLR_SUSPENDED(td);
 1536         td->td_flags &= ~TDF_ALLPROCSUSP;
 1537         if (td->td_proc == p) {
 1538                 PROC_SLOCK_ASSERT(p, MA_OWNED);
 1539                 p->p_suspcount--;
 1540                 if (boundary && (td->td_flags & TDF_BOUNDARY) != 0) {
 1541                         td->td_flags &= ~TDF_BOUNDARY;
 1542                         p->p_boundary_count--;
 1543                 }
 1544         }
 1545         return (setrunnable(td, 0));
 1546 }
 1547 
 1548 void
 1549 thread_run_flash(struct thread *td)
 1550 {
 1551         struct proc *p;
 1552 
 1553         p = td->td_proc;
 1554         PROC_LOCK_ASSERT(p, MA_OWNED);
 1555 
 1556         if (TD_ON_SLEEPQ(td))
 1557                 sleepq_remove_nested(td);
 1558         else
 1559                 thread_lock(td);
 1560 
 1561         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1562         KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
 1563 
 1564         TD_CLR_SUSPENDED(td);
 1565         PROC_SLOCK(p);
 1566         MPASS(p->p_suspcount > 0);
 1567         p->p_suspcount--;
 1568         PROC_SUNLOCK(p);
 1569         if (setrunnable(td, 0))
 1570                 kick_proc0();
 1571 }
 1572 
 1573 /*
 1574  * Allow all threads blocked by single threading to continue running.
 1575  */
 1576 void
 1577 thread_unsuspend(struct proc *p)
 1578 {
 1579         struct thread *td;
 1580         int wakeup_swapper;
 1581 
 1582         PROC_LOCK_ASSERT(p, MA_OWNED);
 1583         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1584         wakeup_swapper = 0;
 1585         if (!P_SHOULDSTOP(p)) {
 1586                 FOREACH_THREAD_IN_PROC(p, td) {
 1587                         thread_lock(td);
 1588                         if (TD_IS_SUSPENDED(td)) {
 1589                                 wakeup_swapper |= thread_unsuspend_one(td, p,
 1590                                     true);
 1591                         } else
 1592                                 thread_unlock(td);
 1593                 }
 1594         } else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
 1595             p->p_numthreads == p->p_suspcount) {
 1596                 /*
 1597                  * Stopping everything also did the job for the single
 1598                  * threading request. Now we've downgraded to single-threaded,
 1599                  * let it continue.
 1600                  */
 1601                 if (p->p_singlethread->td_proc == p) {
 1602                         thread_lock(p->p_singlethread);
 1603                         wakeup_swapper = thread_unsuspend_one(
 1604                             p->p_singlethread, p, false);
 1605                 }
 1606         }
 1607         if (wakeup_swapper)
 1608                 kick_proc0();
 1609 }
 1610 
 1611 /*
 1612  * End the single threading mode..
 1613  */
 1614 void
 1615 thread_single_end(struct proc *p, int mode)
 1616 {
 1617         struct thread *td;
 1618         int wakeup_swapper;
 1619 
 1620         KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
 1621             mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
 1622             ("invalid mode %d", mode));
 1623         PROC_LOCK_ASSERT(p, MA_OWNED);
 1624         KASSERT((mode == SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) != 0) ||
 1625             (mode != SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) == 0),
 1626             ("mode %d does not match P_TOTAL_STOP", mode));
 1627         KASSERT(mode == SINGLE_ALLPROC || p->p_singlethread == curthread,
 1628             ("thread_single_end from other thread %p %p",
 1629             curthread, p->p_singlethread));
 1630         KASSERT(mode != SINGLE_BOUNDARY ||
 1631             (p->p_flag & P_SINGLE_BOUNDARY) != 0,
 1632             ("mis-matched SINGLE_BOUNDARY flags %x", p->p_flag));
 1633         p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY |
 1634             P_TOTAL_STOP);
 1635         PROC_SLOCK(p);
 1636         p->p_singlethread = NULL;
 1637         wakeup_swapper = 0;
 1638         /*
 1639          * If there are other threads they may now run,
 1640          * unless of course there is a blanket 'stop order'
 1641          * on the process. The single threader must be allowed
 1642          * to continue however as this is a bad place to stop.
 1643          */
 1644         if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) {
 1645                 FOREACH_THREAD_IN_PROC(p, td) {
 1646                         thread_lock(td);
 1647                         if (TD_IS_SUSPENDED(td)) {
 1648                                 wakeup_swapper |= thread_unsuspend_one(td, p,
 1649                                     mode == SINGLE_BOUNDARY);
 1650                         } else
 1651                                 thread_unlock(td);
 1652                 }
 1653         }
 1654         KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,
 1655             ("inconsistent boundary count %d", p->p_boundary_count));
 1656         PROC_SUNLOCK(p);
 1657         if (wakeup_swapper)
 1658                 kick_proc0();
 1659 }
 1660 
 1661 /*
 1662  * Locate a thread by number and return with proc lock held.
 1663  *
 1664  * thread exit establishes proc -> tidhash lock ordering, but lookup
 1665  * takes tidhash first and needs to return locked proc.
 1666  *
 1667  * The problem is worked around by relying on type-safety of both
 1668  * structures and doing the work in 2 steps:
 1669  * - tidhash-locked lookup which saves both thread and proc pointers
 1670  * - proc-locked verification that the found thread still matches
 1671  */
 1672 static bool
 1673 tdfind_hash(lwpid_t tid, pid_t pid, struct proc **pp, struct thread **tdp)
 1674 {
 1675 #define RUN_THRESH      16
 1676         struct proc *p;
 1677         struct thread *td;
 1678         int run;
 1679         bool locked;
 1680 
 1681         run = 0;
 1682         rw_rlock(TIDHASHLOCK(tid));
 1683         locked = true;
 1684         LIST_FOREACH(td, TIDHASH(tid), td_hash) {
 1685                 if (td->td_tid != tid) {
 1686                         run++;
 1687                         continue;
 1688                 }
 1689                 p = td->td_proc;
 1690                 if (pid != -1 && p->p_pid != pid) {
 1691                         td = NULL;
 1692                         break;
 1693                 }
 1694                 if (run > RUN_THRESH) {
 1695                         if (rw_try_upgrade(TIDHASHLOCK(tid))) {
 1696                                 LIST_REMOVE(td, td_hash);
 1697                                 LIST_INSERT_HEAD(TIDHASH(td->td_tid),
 1698                                         td, td_hash);
 1699                                 rw_wunlock(TIDHASHLOCK(tid));
 1700                                 locked = false;
 1701                                 break;
 1702                         }
 1703                 }
 1704                 break;
 1705         }
 1706         if (locked)
 1707                 rw_runlock(TIDHASHLOCK(tid));
 1708         if (td == NULL)
 1709                 return (false);
 1710         *pp = p;
 1711         *tdp = td;
 1712         return (true);
 1713 }
 1714 
 1715 struct thread *
 1716 tdfind(lwpid_t tid, pid_t pid)
 1717 {
 1718         struct proc *p;
 1719         struct thread *td;
 1720 
 1721         td = curthread;
 1722         if (td->td_tid == tid) {
 1723                 if (pid != -1 && td->td_proc->p_pid != pid)
 1724                         return (NULL);
 1725                 PROC_LOCK(td->td_proc);
 1726                 return (td);
 1727         }
 1728 
 1729         for (;;) {
 1730                 if (!tdfind_hash(tid, pid, &p, &td))
 1731                         return (NULL);
 1732                 PROC_LOCK(p);
 1733                 if (td->td_tid != tid) {
 1734                         PROC_UNLOCK(p);
 1735                         continue;
 1736                 }
 1737                 if (td->td_proc != p) {
 1738                         PROC_UNLOCK(p);
 1739                         continue;
 1740                 }
 1741                 if (p->p_state == PRS_NEW) {
 1742                         PROC_UNLOCK(p);
 1743                         return (NULL);
 1744                 }
 1745                 return (td);
 1746         }
 1747 }
 1748 
 1749 void
 1750 tidhash_add(struct thread *td)
 1751 {
 1752         rw_wlock(TIDHASHLOCK(td->td_tid));
 1753         LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
 1754         rw_wunlock(TIDHASHLOCK(td->td_tid));
 1755 }
 1756 
 1757 void
 1758 tidhash_remove(struct thread *td)
 1759 {
 1760 
 1761         rw_wlock(TIDHASHLOCK(td->td_tid));
 1762         LIST_REMOVE(td, td_hash);
 1763         rw_wunlock(TIDHASHLOCK(td->td_tid));
 1764 }

Cache object: 0d845cf7d752e2ae7cbbe23cefdca9bb


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.