The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_thread.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
    5  *  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice(s), this list of conditions and the following disclaimer as
   12  *    the first lines of this file unmodified other than the possible
   13  *    addition of one or more copyright notices.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice(s), this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
   19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   21  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
   22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   25  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
   28  * DAMAGE.
   29  */
   30 
   31 #include "opt_witness.h"
   32 #include "opt_hwpmc_hooks.h"
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/kernel.h>
   40 #include <sys/lock.h>
   41 #include <sys/mutex.h>
   42 #include <sys/proc.h>
   43 #include <sys/bitstring.h>
   44 #include <sys/epoch.h>
   45 #include <sys/rangelock.h>
   46 #include <sys/resourcevar.h>
   47 #include <sys/sdt.h>
   48 #include <sys/smp.h>
   49 #include <sys/sched.h>
   50 #include <sys/sleepqueue.h>
   51 #include <sys/selinfo.h>
   52 #include <sys/syscallsubr.h>
   53 #include <sys/dtrace_bsd.h>
   54 #include <sys/sysent.h>
   55 #include <sys/turnstile.h>
   56 #include <sys/taskqueue.h>
   57 #include <sys/ktr.h>
   58 #include <sys/rwlock.h>
   59 #include <sys/umtx.h>
   60 #include <sys/vmmeter.h>
   61 #include <sys/cpuset.h>
   62 #ifdef  HWPMC_HOOKS
   63 #include <sys/pmckern.h>
   64 #endif
   65 #include <sys/priv.h>
   66 
   67 #include <security/audit/audit.h>
   68 
   69 #include <vm/pmap.h>
   70 #include <vm/vm.h>
   71 #include <vm/vm_extern.h>
   72 #include <vm/uma.h>
   73 #include <vm/vm_phys.h>
   74 #include <sys/eventhandler.h>
   75 
   76 /*
   77  * Asserts below verify the stability of struct thread and struct proc
   78  * layout, as exposed by KBI to modules.  On head, the KBI is allowed
   79  * to drift, change to the structures must be accompanied by the
   80  * assert update.
   81  *
   82  * On the stable branches after KBI freeze, conditions must not be
   83  * violated.  Typically new fields are moved to the end of the
   84  * structures.
   85  */
   86 #ifdef __amd64__
   87 _Static_assert(offsetof(struct thread, td_flags) == 0xfc,
   88     "struct thread KBI td_flags");
   89 _Static_assert(offsetof(struct thread, td_pflags) == 0x104,
   90     "struct thread KBI td_pflags");
   91 _Static_assert(offsetof(struct thread, td_frame) == 0x4a0,
   92     "struct thread KBI td_frame");
   93 _Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0,
   94     "struct thread KBI td_emuldata");
   95 _Static_assert(offsetof(struct proc, p_flag) == 0xb8,
   96     "struct proc KBI p_flag");
   97 _Static_assert(offsetof(struct proc, p_pid) == 0xc4,
   98     "struct proc KBI p_pid");
   99 _Static_assert(offsetof(struct proc, p_filemon) == 0x3c0,
  100     "struct proc KBI p_filemon");
  101 _Static_assert(offsetof(struct proc, p_comm) == 0x3d8,
  102     "struct proc KBI p_comm");
  103 _Static_assert(offsetof(struct proc, p_emuldata) == 0x4b8,
  104     "struct proc KBI p_emuldata");
  105 #endif
  106 #ifdef __i386__
  107 _Static_assert(offsetof(struct thread, td_flags) == 0x98,
  108     "struct thread KBI td_flags");
  109 _Static_assert(offsetof(struct thread, td_pflags) == 0xa0,
  110     "struct thread KBI td_pflags");
  111 _Static_assert(offsetof(struct thread, td_frame) == 0x300,
  112     "struct thread KBI td_frame");
  113 _Static_assert(offsetof(struct thread, td_emuldata) == 0x344,
  114     "struct thread KBI td_emuldata");
  115 _Static_assert(offsetof(struct proc, p_flag) == 0x6c,
  116     "struct proc KBI p_flag");
  117 _Static_assert(offsetof(struct proc, p_pid) == 0x78,
  118     "struct proc KBI p_pid");
  119 _Static_assert(offsetof(struct proc, p_filemon) == 0x26c,
  120     "struct proc KBI p_filemon");
  121 _Static_assert(offsetof(struct proc, p_comm) == 0x280,
  122     "struct proc KBI p_comm");
  123 _Static_assert(offsetof(struct proc, p_emuldata) == 0x30c,
  124     "struct proc KBI p_emuldata");
  125 #endif
  126 
  127 SDT_PROVIDER_DECLARE(proc);
  128 SDT_PROBE_DEFINE(proc, , , lwp__exit);
  129 
  130 /*
  131  * thread related storage.
  132  */
  133 static uma_zone_t thread_zone;
  134 
  135 struct thread_domain_data {
  136         struct thread   *tdd_zombies;
  137         int             tdd_reapticks;
  138 } __aligned(CACHE_LINE_SIZE);
  139 
  140 static struct thread_domain_data thread_domain_data[MAXMEMDOM];
  141 
  142 static struct task      thread_reap_task;
  143 static struct callout   thread_reap_callout;
  144 
  145 static void thread_zombie(struct thread *);
  146 static void thread_reap(void);
  147 static void thread_reap_all(void);
  148 static void thread_reap_task_cb(void *, int);
  149 static void thread_reap_callout_cb(void *);
  150 static int thread_unsuspend_one(struct thread *td, struct proc *p,
  151     bool boundary);
  152 static void thread_free_batched(struct thread *td);
  153 
  154 static __exclusive_cache_line struct mtx tid_lock;
  155 static bitstr_t *tid_bitmap;
  156 
  157 static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash");
  158 
  159 static int maxthread;
  160 SYSCTL_INT(_kern, OID_AUTO, maxthread, CTLFLAG_RDTUN,
  161     &maxthread, 0, "Maximum number of threads");
  162 
  163 static __exclusive_cache_line int nthreads;
  164 
  165 static LIST_HEAD(tidhashhead, thread) *tidhashtbl;
  166 static u_long   tidhash;
  167 static u_long   tidhashlock;
  168 static struct   rwlock *tidhashtbl_lock;
  169 #define TIDHASH(tid)            (&tidhashtbl[(tid) & tidhash])
  170 #define TIDHASHLOCK(tid)        (&tidhashtbl_lock[(tid) & tidhashlock])
  171 
  172 EVENTHANDLER_LIST_DEFINE(thread_ctor);
  173 EVENTHANDLER_LIST_DEFINE(thread_dtor);
  174 EVENTHANDLER_LIST_DEFINE(thread_init);
  175 EVENTHANDLER_LIST_DEFINE(thread_fini);
  176 
  177 static bool
  178 thread_count_inc_try(void)
  179 {
  180         int nthreads_new;
  181 
  182         nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1;
  183         if (nthreads_new >= maxthread - 100) {
  184                 if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 ||
  185                     nthreads_new >= maxthread) {
  186                         atomic_subtract_int(&nthreads, 1);
  187                         return (false);
  188                 }
  189         }
  190         return (true);
  191 }
  192 
  193 static bool
  194 thread_count_inc(void)
  195 {
  196         static struct timeval lastfail;
  197         static int curfail;
  198 
  199         thread_reap();
  200         if (thread_count_inc_try()) {
  201                 return (true);
  202         }
  203 
  204         thread_reap_all();
  205         if (thread_count_inc_try()) {
  206                 return (true);
  207         }
  208 
  209         if (ppsratecheck(&lastfail, &curfail, 1)) {
  210                 printf("maxthread limit exceeded by uid %u "
  211                     "(pid %d); consider increasing kern.maxthread\n",
  212                     curthread->td_ucred->cr_ruid, curproc->p_pid);
  213         }
  214         return (false);
  215 }
  216 
  217 static void
  218 thread_count_sub(int n)
  219 {
  220 
  221         atomic_subtract_int(&nthreads, n);
  222 }
  223 
  224 static void
  225 thread_count_dec(void)
  226 {
  227 
  228         thread_count_sub(1);
  229 }
  230 
  231 static lwpid_t
  232 tid_alloc(void)
  233 {
  234         static lwpid_t trytid;
  235         lwpid_t tid;
  236 
  237         mtx_lock(&tid_lock);
  238         /*
  239          * It is an invariant that the bitmap is big enough to hold maxthread
  240          * IDs. If we got to this point there has to be at least one free.
  241          */
  242         if (trytid >= maxthread)
  243                 trytid = 0;
  244         bit_ffc_at(tid_bitmap, trytid, maxthread, &tid);
  245         if (tid == -1) {
  246                 KASSERT(trytid != 0, ("unexpectedly ran out of IDs"));
  247                 trytid = 0;
  248                 bit_ffc_at(tid_bitmap, trytid, maxthread, &tid);
  249                 KASSERT(tid != -1, ("unexpectedly ran out of IDs"));
  250         }
  251         bit_set(tid_bitmap, tid);
  252         trytid = tid + 1;
  253         mtx_unlock(&tid_lock);
  254         return (tid + NO_PID);
  255 }
  256 
  257 static void
  258 tid_free_locked(lwpid_t rtid)
  259 {
  260         lwpid_t tid;
  261 
  262         mtx_assert(&tid_lock, MA_OWNED);
  263         KASSERT(rtid >= NO_PID,
  264             ("%s: invalid tid %d\n", __func__, rtid));
  265         tid = rtid - NO_PID;
  266         KASSERT(bit_test(tid_bitmap, tid) != 0,
  267             ("thread ID %d not allocated\n", rtid));
  268         bit_clear(tid_bitmap, tid);
  269 }
  270 
  271 static void
  272 tid_free(lwpid_t rtid)
  273 {
  274 
  275         mtx_lock(&tid_lock);
  276         tid_free_locked(rtid);
  277         mtx_unlock(&tid_lock);
  278 }
  279 
  280 static void
  281 tid_free_batch(lwpid_t *batch, int n)
  282 {
  283         int i;
  284 
  285         mtx_lock(&tid_lock);
  286         for (i = 0; i < n; i++) {
  287                 tid_free_locked(batch[i]);
  288         }
  289         mtx_unlock(&tid_lock);
  290 }
  291 
  292 /*
  293  * Batching for thread reapping.
  294  */
  295 struct tidbatch {
  296         lwpid_t tab[16];
  297         int n;
  298 };
  299 
  300 static void
  301 tidbatch_prep(struct tidbatch *tb)
  302 {
  303 
  304         tb->n = 0;
  305 }
  306 
  307 static void
  308 tidbatch_add(struct tidbatch *tb, struct thread *td)
  309 {
  310 
  311         KASSERT(tb->n < nitems(tb->tab),
  312             ("%s: count too high %d", __func__, tb->n));
  313         tb->tab[tb->n] = td->td_tid;
  314         tb->n++;
  315 }
  316 
  317 static void
  318 tidbatch_process(struct tidbatch *tb)
  319 {
  320 
  321         KASSERT(tb->n <= nitems(tb->tab),
  322             ("%s: count too high %d", __func__, tb->n));
  323         if (tb->n == nitems(tb->tab)) {
  324                 tid_free_batch(tb->tab, tb->n);
  325                 tb->n = 0;
  326         }
  327 }
  328 
  329 static void
  330 tidbatch_final(struct tidbatch *tb)
  331 {
  332 
  333         KASSERT(tb->n <= nitems(tb->tab),
  334             ("%s: count too high %d", __func__, tb->n));
  335         if (tb->n != 0) {
  336                 tid_free_batch(tb->tab, tb->n);
  337         }
  338 }
  339 
  340 /*
  341  * Prepare a thread for use.
  342  */
  343 static int
  344 thread_ctor(void *mem, int size, void *arg, int flags)
  345 {
  346         struct thread   *td;
  347 
  348         td = (struct thread *)mem;
  349         td->td_state = TDS_INACTIVE;
  350         td->td_lastcpu = td->td_oncpu = NOCPU;
  351 
  352         /*
  353          * Note that td_critnest begins life as 1 because the thread is not
  354          * running and is thereby implicitly waiting to be on the receiving
  355          * end of a context switch.
  356          */
  357         td->td_critnest = 1;
  358         td->td_lend_user_pri = PRI_MAX;
  359 #ifdef AUDIT
  360         audit_thread_alloc(td);
  361 #endif
  362 #ifdef KDTRACE_HOOKS
  363         kdtrace_thread_ctor(td);
  364 #endif
  365         umtx_thread_alloc(td);
  366         MPASS(td->td_sel == NULL);
  367         return (0);
  368 }
  369 
  370 /*
  371  * Reclaim a thread after use.
  372  */
  373 static void
  374 thread_dtor(void *mem, int size, void *arg)
  375 {
  376         struct thread *td;
  377 
  378         td = (struct thread *)mem;
  379 
  380 #ifdef INVARIANTS
  381         /* Verify that this thread is in a safe state to free. */
  382         switch (td->td_state) {
  383         case TDS_INHIBITED:
  384         case TDS_RUNNING:
  385         case TDS_CAN_RUN:
  386         case TDS_RUNQ:
  387                 /*
  388                  * We must never unlink a thread that is in one of
  389                  * these states, because it is currently active.
  390                  */
  391                 panic("bad state for thread unlinking");
  392                 /* NOTREACHED */
  393         case TDS_INACTIVE:
  394                 break;
  395         default:
  396                 panic("bad thread state");
  397                 /* NOTREACHED */
  398         }
  399 #endif
  400 #ifdef AUDIT
  401         audit_thread_free(td);
  402 #endif
  403 #ifdef KDTRACE_HOOKS
  404         kdtrace_thread_dtor(td);
  405 #endif
  406         /* Free all OSD associated to this thread. */
  407         osd_thread_exit(td);
  408         td_softdep_cleanup(td);
  409         MPASS(td->td_su == NULL);
  410         seltdfini(td);
  411 }
  412 
  413 /*
  414  * Initialize type-stable parts of a thread (when newly created).
  415  */
  416 static int
  417 thread_init(void *mem, int size, int flags)
  418 {
  419         struct thread *td;
  420 
  421         td = (struct thread *)mem;
  422 
  423         td->td_allocdomain = vm_phys_domain(vtophys(td));
  424         td->td_sleepqueue = sleepq_alloc();
  425         td->td_turnstile = turnstile_alloc();
  426         td->td_rlqe = NULL;
  427         EVENTHANDLER_DIRECT_INVOKE(thread_init, td);
  428         umtx_thread_init(td);
  429         td->td_kstack = 0;
  430         td->td_sel = NULL;
  431         return (0);
  432 }
  433 
  434 /*
  435  * Tear down type-stable parts of a thread (just before being discarded).
  436  */
  437 static void
  438 thread_fini(void *mem, int size)
  439 {
  440         struct thread *td;
  441 
  442         td = (struct thread *)mem;
  443         EVENTHANDLER_DIRECT_INVOKE(thread_fini, td);
  444         rlqentry_free(td->td_rlqe);
  445         turnstile_free(td->td_turnstile);
  446         sleepq_free(td->td_sleepqueue);
  447         umtx_thread_fini(td);
  448         MPASS(td->td_sel == NULL);
  449 }
  450 
  451 /*
  452  * For a newly created process,
  453  * link up all the structures and its initial threads etc.
  454  * called from:
  455  * {arch}/{arch}/machdep.c   {arch}_init(), init386() etc.
  456  * proc_dtor() (should go away)
  457  * proc_init()
  458  */
  459 void
  460 proc_linkup0(struct proc *p, struct thread *td)
  461 {
  462         TAILQ_INIT(&p->p_threads);           /* all threads in proc */
  463         proc_linkup(p, td);
  464 }
  465 
  466 void
  467 proc_linkup(struct proc *p, struct thread *td)
  468 {
  469 
  470         sigqueue_init(&p->p_sigqueue, p);
  471         p->p_ksi = ksiginfo_alloc(1);
  472         if (p->p_ksi != NULL) {
  473                 /* XXX p_ksi may be null if ksiginfo zone is not ready */
  474                 p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
  475         }
  476         LIST_INIT(&p->p_mqnotifier);
  477         p->p_numthreads = 0;
  478         thread_link(td, p);
  479 }
  480 
  481 extern int max_threads_per_proc;
  482 
  483 /*
  484  * Initialize global thread allocation resources.
  485  */
  486 void
  487 threadinit(void)
  488 {
  489         u_long i;
  490         lwpid_t tid0;
  491         uint32_t flags;
  492 
  493         /*
  494          * Place an upper limit on threads which can be allocated.
  495          *
  496          * Note that other factors may make the de facto limit much lower.
  497          *
  498          * Platform limits are somewhat arbitrary but deemed "more than good
  499          * enough" for the foreseable future.
  500          */
  501         if (maxthread == 0) {
  502 #ifdef _LP64
  503                 maxthread = MIN(maxproc * max_threads_per_proc, 1000000);
  504 #else
  505                 maxthread = MIN(maxproc * max_threads_per_proc, 100000);
  506 #endif
  507         }
  508 
  509         mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
  510         tid_bitmap = bit_alloc(maxthread, M_TIDHASH, M_WAITOK);
  511         /*
  512          * Handle thread0.
  513          */
  514         thread_count_inc();
  515         tid0 = tid_alloc();
  516         if (tid0 != THREAD0_TID)
  517                 panic("tid0 %d != %d\n", tid0, THREAD0_TID);
  518 
  519         flags = UMA_ZONE_NOFREE;
  520 #ifdef __aarch64__
  521         /*
  522          * Force thread structures to be allocated from the direct map.
  523          * Otherwise, superpage promotions and demotions may temporarily
  524          * invalidate thread structure mappings.  For most dynamically allocated
  525          * structures this is not a problem, but translation faults cannot be
  526          * handled without accessing curthread.
  527          */
  528         flags |= UMA_ZONE_CONTIG;
  529 #endif
  530         thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
  531             thread_ctor, thread_dtor, thread_init, thread_fini,
  532             32 - 1, flags);
  533         tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash);
  534         tidhashlock = (tidhash + 1) / 64;
  535         if (tidhashlock > 0)
  536                 tidhashlock--;
  537         tidhashtbl_lock = malloc(sizeof(*tidhashtbl_lock) * (tidhashlock + 1),
  538             M_TIDHASH, M_WAITOK | M_ZERO);
  539         for (i = 0; i < tidhashlock + 1; i++)
  540                 rw_init(&tidhashtbl_lock[i], "tidhash");
  541 
  542         TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL);
  543         callout_init(&thread_reap_callout, 1);
  544         callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL);
  545 }
  546 
  547 /*
  548  * Place an unused thread on the zombie list.
  549  */
  550 void
  551 thread_zombie(struct thread *td)
  552 {
  553         struct thread_domain_data *tdd;
  554         struct thread *ztd;
  555 
  556         tdd = &thread_domain_data[td->td_allocdomain];
  557         ztd = atomic_load_ptr(&tdd->tdd_zombies);
  558         for (;;) {
  559                 td->td_zombie = ztd;
  560                 if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies,
  561                     (uintptr_t *)&ztd, (uintptr_t)td))
  562                         break;
  563                 continue;
  564         }
  565 }
  566 
  567 /*
  568  * Release a thread that has exited after cpu_throw().
  569  */
  570 void
  571 thread_stash(struct thread *td)
  572 {
  573         atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
  574         thread_zombie(td);
  575 }
  576 
  577 /*
  578  * Reap zombies from passed domain.
  579  */
  580 static void
  581 thread_reap_domain(struct thread_domain_data *tdd)
  582 {
  583         struct thread *itd, *ntd;
  584         struct tidbatch tidbatch;
  585         struct credbatch credbatch;
  586         int tdcount;
  587         struct plimit *lim;
  588         int limcount;
  589 
  590         /*
  591          * Reading upfront is pessimal if followed by concurrent atomic_swap,
  592          * but most of the time the list is empty.
  593          */
  594         if (tdd->tdd_zombies == NULL)
  595                 return;
  596 
  597         itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies,
  598             (uintptr_t)NULL);
  599         if (itd == NULL)
  600                 return;
  601 
  602         /*
  603          * Multiple CPUs can get here, the race is fine as ticks is only
  604          * advisory.
  605          */
  606         tdd->tdd_reapticks = ticks;
  607 
  608         tidbatch_prep(&tidbatch);
  609         credbatch_prep(&credbatch);
  610         tdcount = 0;
  611         lim = NULL;
  612         limcount = 0;
  613 
  614         while (itd != NULL) {
  615                 ntd = itd->td_zombie;
  616                 EVENTHANDLER_DIRECT_INVOKE(thread_dtor, itd);
  617                 tidbatch_add(&tidbatch, itd);
  618                 credbatch_add(&credbatch, itd);
  619                 MPASS(itd->td_limit != NULL);
  620                 if (lim != itd->td_limit) {
  621                         if (limcount != 0) {
  622                                 lim_freen(lim, limcount);
  623                                 limcount = 0;
  624                         }
  625                 }
  626                 lim = itd->td_limit;
  627                 limcount++;
  628                 thread_free_batched(itd);
  629                 tidbatch_process(&tidbatch);
  630                 credbatch_process(&credbatch);
  631                 tdcount++;
  632                 if (tdcount == 32) {
  633                         thread_count_sub(tdcount);
  634                         tdcount = 0;
  635                 }
  636                 itd = ntd;
  637         }
  638 
  639         tidbatch_final(&tidbatch);
  640         credbatch_final(&credbatch);
  641         if (tdcount != 0) {
  642                 thread_count_sub(tdcount);
  643         }
  644         MPASS(limcount != 0);
  645         lim_freen(lim, limcount);
  646 }
  647 
  648 /*
  649  * Reap zombies from all domains.
  650  */
  651 static void
  652 thread_reap_all(void)
  653 {
  654         struct thread_domain_data *tdd;
  655         int i, domain;
  656 
  657         domain = PCPU_GET(domain);
  658         for (i = 0; i < vm_ndomains; i++) {
  659                 tdd = &thread_domain_data[(i + domain) % vm_ndomains];
  660                 thread_reap_domain(tdd);
  661         }
  662 }
  663 
  664 /*
  665  * Reap zombies from local domain.
  666  */
  667 static void
  668 thread_reap(void)
  669 {
  670         struct thread_domain_data *tdd;
  671         int domain;
  672 
  673         domain = PCPU_GET(domain);
  674         tdd = &thread_domain_data[domain];
  675 
  676         thread_reap_domain(tdd);
  677 }
  678 
  679 static void
  680 thread_reap_task_cb(void *arg __unused, int pending __unused)
  681 {
  682 
  683         thread_reap_all();
  684 }
  685 
  686 static void
  687 thread_reap_callout_cb(void *arg __unused)
  688 {
  689         struct thread_domain_data *tdd;
  690         int i, cticks, lticks;
  691         bool wantreap;
  692 
  693         wantreap = false;
  694         cticks = atomic_load_int(&ticks);
  695         for (i = 0; i < vm_ndomains; i++) {
  696                 tdd = &thread_domain_data[i];
  697                 lticks = tdd->tdd_reapticks;
  698                 if (tdd->tdd_zombies != NULL &&
  699                     (u_int)(cticks - lticks) > 5 * hz) {
  700                         wantreap = true;
  701                         break;
  702                 }
  703         }
  704 
  705         if (wantreap)
  706                 taskqueue_enqueue(taskqueue_thread, &thread_reap_task);
  707         callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL);
  708 }
  709 
  710 /*
  711  * Allocate a thread.
  712  */
  713 struct thread *
  714 thread_alloc(int pages)
  715 {
  716         struct thread *td;
  717         lwpid_t tid;
  718 
  719         if (!thread_count_inc()) {
  720                 return (NULL);
  721         }
  722 
  723         tid = tid_alloc();
  724         td = uma_zalloc(thread_zone, M_WAITOK);
  725         KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
  726         if (!vm_thread_new(td, pages)) {
  727                 uma_zfree(thread_zone, td);
  728                 tid_free(tid);
  729                 thread_count_dec();
  730                 return (NULL);
  731         }
  732         td->td_tid = tid;
  733         cpu_thread_alloc(td);
  734         EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td);
  735         return (td);
  736 }
  737 
  738 int
  739 thread_alloc_stack(struct thread *td, int pages)
  740 {
  741 
  742         KASSERT(td->td_kstack == 0,
  743             ("thread_alloc_stack called on a thread with kstack"));
  744         if (!vm_thread_new(td, pages))
  745                 return (0);
  746         cpu_thread_alloc(td);
  747         return (1);
  748 }
  749 
  750 /*
  751  * Deallocate a thread.
  752  */
  753 static void
  754 thread_free_batched(struct thread *td)
  755 {
  756 
  757         lock_profile_thread_exit(td);
  758         if (td->td_cpuset)
  759                 cpuset_rel(td->td_cpuset);
  760         td->td_cpuset = NULL;
  761         cpu_thread_free(td);
  762         if (td->td_kstack != 0)
  763                 vm_thread_dispose(td);
  764         callout_drain(&td->td_slpcallout);
  765         /*
  766          * Freeing handled by the caller.
  767          */
  768         td->td_tid = -1;
  769         uma_zfree(thread_zone, td);
  770 }
  771 
  772 void
  773 thread_free(struct thread *td)
  774 {
  775         lwpid_t tid;
  776 
  777         EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td);
  778         tid = td->td_tid;
  779         thread_free_batched(td);
  780         tid_free(tid);
  781         thread_count_dec();
  782 }
  783 
  784 void
  785 thread_cow_get_proc(struct thread *newtd, struct proc *p)
  786 {
  787 
  788         PROC_LOCK_ASSERT(p, MA_OWNED);
  789         newtd->td_realucred = crcowget(p->p_ucred);
  790         newtd->td_ucred = newtd->td_realucred;
  791         newtd->td_limit = lim_hold(p->p_limit);
  792         newtd->td_cowgen = p->p_cowgen;
  793 }
  794 
  795 void
  796 thread_cow_get(struct thread *newtd, struct thread *td)
  797 {
  798 
  799         MPASS(td->td_realucred == td->td_ucred);
  800         newtd->td_realucred = crcowget(td->td_realucred);
  801         newtd->td_ucred = newtd->td_realucred;
  802         newtd->td_limit = lim_hold(td->td_limit);
  803         newtd->td_cowgen = td->td_cowgen;
  804 }
  805 
  806 void
  807 thread_cow_free(struct thread *td)
  808 {
  809 
  810         if (td->td_realucred != NULL)
  811                 crcowfree(td);
  812         if (td->td_limit != NULL)
  813                 lim_free(td->td_limit);
  814 }
  815 
  816 void
  817 thread_cow_update(struct thread *td)
  818 {
  819         struct proc *p;
  820         struct ucred *oldcred;
  821         struct plimit *oldlimit;
  822 
  823         p = td->td_proc;
  824         oldlimit = NULL;
  825         PROC_LOCK(p);
  826         oldcred = crcowsync();
  827         if (td->td_limit != p->p_limit) {
  828                 oldlimit = td->td_limit;
  829                 td->td_limit = lim_hold(p->p_limit);
  830         }
  831         td->td_cowgen = p->p_cowgen;
  832         PROC_UNLOCK(p);
  833         if (oldcred != NULL)
  834                 crfree(oldcred);
  835         if (oldlimit != NULL)
  836                 lim_free(oldlimit);
  837 }
  838 
  839 /*
  840  * Discard the current thread and exit from its context.
  841  * Always called with scheduler locked.
  842  *
  843  * Because we can't free a thread while we're operating under its context,
  844  * push the current thread into our CPU's deadthread holder. This means
  845  * we needn't worry about someone else grabbing our context before we
  846  * do a cpu_throw().
  847  */
  848 void
  849 thread_exit(void)
  850 {
  851         uint64_t runtime, new_switchtime;
  852         struct thread *td;
  853         struct thread *td2;
  854         struct proc *p;
  855         int wakeup_swapper;
  856 
  857         td = curthread;
  858         p = td->td_proc;
  859 
  860         PROC_SLOCK_ASSERT(p, MA_OWNED);
  861         mtx_assert(&Giant, MA_NOTOWNED);
  862 
  863         PROC_LOCK_ASSERT(p, MA_OWNED);
  864         KASSERT(p != NULL, ("thread exiting without a process"));
  865         CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
  866             (long)p->p_pid, td->td_name);
  867         SDT_PROBE0(proc, , , lwp__exit);
  868         KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
  869         MPASS(td->td_realucred == td->td_ucred);
  870 
  871         /*
  872          * drop FPU & debug register state storage, or any other
  873          * architecture specific resources that
  874          * would not be on a new untouched process.
  875          */
  876         cpu_thread_exit(td);
  877 
  878         /*
  879          * The last thread is left attached to the process
  880          * So that the whole bundle gets recycled. Skip
  881          * all this stuff if we never had threads.
  882          * EXIT clears all sign of other threads when
  883          * it goes to single threading, so the last thread always
  884          * takes the short path.
  885          */
  886         if (p->p_flag & P_HADTHREADS) {
  887                 if (p->p_numthreads > 1) {
  888                         atomic_add_int(&td->td_proc->p_exitthreads, 1);
  889                         thread_unlink(td);
  890                         td2 = FIRST_THREAD_IN_PROC(p);
  891                         sched_exit_thread(td2, td);
  892 
  893                         /*
  894                          * The test below is NOT true if we are the
  895                          * sole exiting thread. P_STOPPED_SINGLE is unset
  896                          * in exit1() after it is the only survivor.
  897                          */
  898                         if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
  899                                 if (p->p_numthreads == p->p_suspcount) {
  900                                         thread_lock(p->p_singlethread);
  901                                         wakeup_swapper = thread_unsuspend_one(
  902                                                 p->p_singlethread, p, false);
  903                                         if (wakeup_swapper)
  904                                                 kick_proc0();
  905                                 }
  906                         }
  907 
  908                         PCPU_SET(deadthread, td);
  909                 } else {
  910                         /*
  911                          * The last thread is exiting.. but not through exit()
  912                          */
  913                         panic ("thread_exit: Last thread exiting on its own");
  914                 }
  915         } 
  916 #ifdef  HWPMC_HOOKS
  917         /*
  918          * If this thread is part of a process that is being tracked by hwpmc(4),
  919          * inform the module of the thread's impending exit.
  920          */
  921         if (PMC_PROC_IS_USING_PMCS(td->td_proc)) {
  922                 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
  923                 PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT, NULL);
  924         } else if (PMC_SYSTEM_SAMPLING_ACTIVE())
  925                 PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT_LOG, NULL);
  926 #endif
  927         PROC_UNLOCK(p);
  928         PROC_STATLOCK(p);
  929         thread_lock(td);
  930         PROC_SUNLOCK(p);
  931 
  932         /* Do the same timestamp bookkeeping that mi_switch() would do. */
  933         new_switchtime = cpu_ticks();
  934         runtime = new_switchtime - PCPU_GET(switchtime);
  935         td->td_runtime += runtime;
  936         td->td_incruntime += runtime;
  937         PCPU_SET(switchtime, new_switchtime);
  938         PCPU_SET(switchticks, ticks);
  939         VM_CNT_INC(v_swtch);
  940 
  941         /* Save our resource usage in our process. */
  942         td->td_ru.ru_nvcsw++;
  943         ruxagg_locked(p, td);
  944         rucollect(&p->p_ru, &td->td_ru);
  945         PROC_STATUNLOCK(p);
  946 
  947         td->td_state = TDS_INACTIVE;
  948 #ifdef WITNESS
  949         witness_thread_exit(td);
  950 #endif
  951         CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
  952         sched_throw(td);
  953         panic("I'm a teapot!");
  954         /* NOTREACHED */
  955 }
  956 
  957 /*
  958  * Do any thread specific cleanups that may be needed in wait()
  959  * called with Giant, proc and schedlock not held.
  960  */
  961 void
  962 thread_wait(struct proc *p)
  963 {
  964         struct thread *td;
  965 
  966         mtx_assert(&Giant, MA_NOTOWNED);
  967         KASSERT(p->p_numthreads == 1, ("multiple threads in thread_wait()"));
  968         KASSERT(p->p_exitthreads == 0, ("p_exitthreads leaking"));
  969         td = FIRST_THREAD_IN_PROC(p);
  970         /* Lock the last thread so we spin until it exits cpu_throw(). */
  971         thread_lock(td);
  972         thread_unlock(td);
  973         lock_profile_thread_exit(td);
  974         cpuset_rel(td->td_cpuset);
  975         td->td_cpuset = NULL;
  976         cpu_thread_clean(td);
  977         thread_cow_free(td);
  978         callout_drain(&td->td_slpcallout);
  979         thread_reap();  /* check for zombie threads etc. */
  980 }
  981 
  982 /*
  983  * Link a thread to a process.
  984  * set up anything that needs to be initialized for it to
  985  * be used by the process.
  986  */
  987 void
  988 thread_link(struct thread *td, struct proc *p)
  989 {
  990 
  991         /*
  992          * XXX This can't be enabled because it's called for proc0 before
  993          * its lock has been created.
  994          * PROC_LOCK_ASSERT(p, MA_OWNED);
  995          */
  996         td->td_state    = TDS_INACTIVE;
  997         td->td_proc     = p;
  998         td->td_flags    = TDF_INMEM;
  999 
 1000         LIST_INIT(&td->td_contested);
 1001         LIST_INIT(&td->td_lprof[0]);
 1002         LIST_INIT(&td->td_lprof[1]);
 1003 #ifdef EPOCH_TRACE
 1004         SLIST_INIT(&td->td_epochs);
 1005 #endif
 1006         sigqueue_init(&td->td_sigqueue, p);
 1007         callout_init(&td->td_slpcallout, 1);
 1008         TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist);
 1009         p->p_numthreads++;
 1010 }
 1011 
 1012 /*
 1013  * Called from:
 1014  *  thread_exit()
 1015  */
 1016 void
 1017 thread_unlink(struct thread *td)
 1018 {
 1019         struct proc *p = td->td_proc;
 1020 
 1021         PROC_LOCK_ASSERT(p, MA_OWNED);
 1022 #ifdef EPOCH_TRACE
 1023         MPASS(SLIST_EMPTY(&td->td_epochs));
 1024 #endif
 1025 
 1026         TAILQ_REMOVE(&p->p_threads, td, td_plist);
 1027         p->p_numthreads--;
 1028         /* could clear a few other things here */
 1029         /* Must  NOT clear links to proc! */
 1030 }
 1031 
 1032 static int
 1033 calc_remaining(struct proc *p, int mode)
 1034 {
 1035         int remaining;
 1036 
 1037         PROC_LOCK_ASSERT(p, MA_OWNED);
 1038         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1039         if (mode == SINGLE_EXIT)
 1040                 remaining = p->p_numthreads;
 1041         else if (mode == SINGLE_BOUNDARY)
 1042                 remaining = p->p_numthreads - p->p_boundary_count;
 1043         else if (mode == SINGLE_NO_EXIT || mode == SINGLE_ALLPROC)
 1044                 remaining = p->p_numthreads - p->p_suspcount;
 1045         else
 1046                 panic("calc_remaining: wrong mode %d", mode);
 1047         return (remaining);
 1048 }
 1049 
 1050 static int
 1051 remain_for_mode(int mode)
 1052 {
 1053 
 1054         return (mode == SINGLE_ALLPROC ? 0 : 1);
 1055 }
 1056 
 1057 static int
 1058 weed_inhib(int mode, struct thread *td2, struct proc *p)
 1059 {
 1060         int wakeup_swapper;
 1061 
 1062         PROC_LOCK_ASSERT(p, MA_OWNED);
 1063         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1064         THREAD_LOCK_ASSERT(td2, MA_OWNED);
 1065 
 1066         wakeup_swapper = 0;
 1067 
 1068         /*
 1069          * Since the thread lock is dropped by the scheduler we have
 1070          * to retry to check for races.
 1071          */
 1072 restart:
 1073         switch (mode) {
 1074         case SINGLE_EXIT:
 1075                 if (TD_IS_SUSPENDED(td2)) {
 1076                         wakeup_swapper |= thread_unsuspend_one(td2, p, true);
 1077                         thread_lock(td2);
 1078                         goto restart;
 1079                 }
 1080                 if (TD_CAN_ABORT(td2)) {
 1081                         wakeup_swapper |= sleepq_abort(td2, EINTR);
 1082                         return (wakeup_swapper);
 1083                 }
 1084                 break;
 1085         case SINGLE_BOUNDARY:
 1086         case SINGLE_NO_EXIT:
 1087                 if (TD_IS_SUSPENDED(td2) &&
 1088                     (td2->td_flags & TDF_BOUNDARY) == 0) {
 1089                         wakeup_swapper |= thread_unsuspend_one(td2, p, false);
 1090                         thread_lock(td2);
 1091                         goto restart;
 1092                 }
 1093                 if (TD_CAN_ABORT(td2)) {
 1094                         wakeup_swapper |= sleepq_abort(td2, ERESTART);
 1095                         return (wakeup_swapper);
 1096                 }
 1097                 break;
 1098         case SINGLE_ALLPROC:
 1099                 /*
 1100                  * ALLPROC suspend tries to avoid spurious EINTR for
 1101                  * threads sleeping interruptable, by suspending the
 1102                  * thread directly, similarly to sig_suspend_threads().
 1103                  * Since such sleep is not performed at the user
 1104                  * boundary, TDF_BOUNDARY flag is not set, and TDF_ALLPROCSUSP
 1105                  * is used to avoid immediate un-suspend.
 1106                  */
 1107                 if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY |
 1108                     TDF_ALLPROCSUSP)) == 0) {
 1109                         wakeup_swapper |= thread_unsuspend_one(td2, p, false);
 1110                         thread_lock(td2);
 1111                         goto restart;
 1112                 }
 1113                 if (TD_CAN_ABORT(td2)) {
 1114                         if ((td2->td_flags & TDF_SBDRY) == 0) {
 1115                                 thread_suspend_one(td2);
 1116                                 td2->td_flags |= TDF_ALLPROCSUSP;
 1117                         } else {
 1118                                 wakeup_swapper |= sleepq_abort(td2, ERESTART);
 1119                                 return (wakeup_swapper);
 1120                         }
 1121                 }
 1122                 break;
 1123         default:
 1124                 break;
 1125         }
 1126         thread_unlock(td2);
 1127         return (wakeup_swapper);
 1128 }
 1129 
 1130 /*
 1131  * Enforce single-threading.
 1132  *
 1133  * Returns 1 if the caller must abort (another thread is waiting to
 1134  * exit the process or similar). Process is locked!
 1135  * Returns 0 when you are successfully the only thread running.
 1136  * A process has successfully single threaded in the suspend mode when
 1137  * There are no threads in user mode. Threads in the kernel must be
 1138  * allowed to continue until they get to the user boundary. They may even
 1139  * copy out their return values and data before suspending. They may however be
 1140  * accelerated in reaching the user boundary as we will wake up
 1141  * any sleeping threads that are interruptable. (PCATCH).
 1142  */
 1143 int
 1144 thread_single(struct proc *p, int mode)
 1145 {
 1146         struct thread *td;
 1147         struct thread *td2;
 1148         int remaining, wakeup_swapper;
 1149 
 1150         td = curthread;
 1151         KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
 1152             mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
 1153             ("invalid mode %d", mode));
 1154         /*
 1155          * If allowing non-ALLPROC singlethreading for non-curproc
 1156          * callers, calc_remaining() and remain_for_mode() should be
 1157          * adjusted to also account for td->td_proc != p.  For now
 1158          * this is not implemented because it is not used.
 1159          */
 1160         KASSERT((mode == SINGLE_ALLPROC && td->td_proc != p) ||
 1161             (mode != SINGLE_ALLPROC && td->td_proc == p),
 1162             ("mode %d proc %p curproc %p", mode, p, td->td_proc));
 1163         mtx_assert(&Giant, MA_NOTOWNED);
 1164         PROC_LOCK_ASSERT(p, MA_OWNED);
 1165 
 1166         if ((p->p_flag & P_HADTHREADS) == 0 && mode != SINGLE_ALLPROC)
 1167                 return (0);
 1168 
 1169         /* Is someone already single threading? */
 1170         if (p->p_singlethread != NULL && p->p_singlethread != td)
 1171                 return (1);
 1172 
 1173         if (mode == SINGLE_EXIT) {
 1174                 p->p_flag |= P_SINGLE_EXIT;
 1175                 p->p_flag &= ~P_SINGLE_BOUNDARY;
 1176         } else {
 1177                 p->p_flag &= ~P_SINGLE_EXIT;
 1178                 if (mode == SINGLE_BOUNDARY)
 1179                         p->p_flag |= P_SINGLE_BOUNDARY;
 1180                 else
 1181                         p->p_flag &= ~P_SINGLE_BOUNDARY;
 1182         }
 1183         if (mode == SINGLE_ALLPROC)
 1184                 p->p_flag |= P_TOTAL_STOP;
 1185         p->p_flag |= P_STOPPED_SINGLE;
 1186         PROC_SLOCK(p);
 1187         p->p_singlethread = td;
 1188         remaining = calc_remaining(p, mode);
 1189         while (remaining != remain_for_mode(mode)) {
 1190                 if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
 1191                         goto stopme;
 1192                 wakeup_swapper = 0;
 1193                 FOREACH_THREAD_IN_PROC(p, td2) {
 1194                         if (td2 == td)
 1195                                 continue;
 1196                         thread_lock(td2);
 1197                         td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
 1198                         if (TD_IS_INHIBITED(td2)) {
 1199                                 wakeup_swapper |= weed_inhib(mode, td2, p);
 1200 #ifdef SMP
 1201                         } else if (TD_IS_RUNNING(td2) && td != td2) {
 1202                                 forward_signal(td2);
 1203                                 thread_unlock(td2);
 1204 #endif
 1205                         } else
 1206                                 thread_unlock(td2);
 1207                 }
 1208                 if (wakeup_swapper)
 1209                         kick_proc0();
 1210                 remaining = calc_remaining(p, mode);
 1211 
 1212                 /*
 1213                  * Maybe we suspended some threads.. was it enough?
 1214                  */
 1215                 if (remaining == remain_for_mode(mode))
 1216                         break;
 1217 
 1218 stopme:
 1219                 /*
 1220                  * Wake us up when everyone else has suspended.
 1221                  * In the mean time we suspend as well.
 1222                  */
 1223                 thread_suspend_switch(td, p);
 1224                 remaining = calc_remaining(p, mode);
 1225         }
 1226         if (mode == SINGLE_EXIT) {
 1227                 /*
 1228                  * Convert the process to an unthreaded process.  The
 1229                  * SINGLE_EXIT is called by exit1() or execve(), in
 1230                  * both cases other threads must be retired.
 1231                  */
 1232                 KASSERT(p->p_numthreads == 1, ("Unthreading with >1 threads"));
 1233                 p->p_singlethread = NULL;
 1234                 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_HADTHREADS);
 1235 
 1236                 /*
 1237                  * Wait for any remaining threads to exit cpu_throw().
 1238                  */
 1239                 while (p->p_exitthreads != 0) {
 1240                         PROC_SUNLOCK(p);
 1241                         PROC_UNLOCK(p);
 1242                         sched_relinquish(td);
 1243                         PROC_LOCK(p);
 1244                         PROC_SLOCK(p);
 1245                 }
 1246         } else if (mode == SINGLE_BOUNDARY) {
 1247                 /*
 1248                  * Wait until all suspended threads are removed from
 1249                  * the processors.  The thread_suspend_check()
 1250                  * increments p_boundary_count while it is still
 1251                  * running, which makes it possible for the execve()
 1252                  * to destroy vmspace while our other threads are
 1253                  * still using the address space.
 1254                  *
 1255                  * We lock the thread, which is only allowed to
 1256                  * succeed after context switch code finished using
 1257                  * the address space.
 1258                  */
 1259                 FOREACH_THREAD_IN_PROC(p, td2) {
 1260                         if (td2 == td)
 1261                                 continue;
 1262                         thread_lock(td2);
 1263                         KASSERT((td2->td_flags & TDF_BOUNDARY) != 0,
 1264                             ("td %p not on boundary", td2));
 1265                         KASSERT(TD_IS_SUSPENDED(td2),
 1266                             ("td %p is not suspended", td2));
 1267                         thread_unlock(td2);
 1268                 }
 1269         }
 1270         PROC_SUNLOCK(p);
 1271         return (0);
 1272 }
 1273 
 1274 bool
 1275 thread_suspend_check_needed(void)
 1276 {
 1277         struct proc *p;
 1278         struct thread *td;
 1279 
 1280         td = curthread;
 1281         p = td->td_proc;
 1282         PROC_LOCK_ASSERT(p, MA_OWNED);
 1283         return (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) != 0 &&
 1284             (td->td_dbgflags & TDB_SUSPEND) != 0));
 1285 }
 1286 
 1287 /*
 1288  * Called in from locations that can safely check to see
 1289  * whether we have to suspend or at least throttle for a
 1290  * single-thread event (e.g. fork).
 1291  *
 1292  * Such locations include userret().
 1293  * If the "return_instead" argument is non zero, the thread must be able to
 1294  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
 1295  *
 1296  * The 'return_instead' argument tells the function if it may do a
 1297  * thread_exit() or suspend, or whether the caller must abort and back
 1298  * out instead.
 1299  *
 1300  * If the thread that set the single_threading request has set the
 1301  * P_SINGLE_EXIT bit in the process flags then this call will never return
 1302  * if 'return_instead' is false, but will exit.
 1303  *
 1304  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
 1305  *---------------+--------------------+---------------------
 1306  *       0       | returns 0          |   returns 0 or 1
 1307  *               | when ST ends       |   immediately
 1308  *---------------+--------------------+---------------------
 1309  *       1       | thread exits       |   returns 1
 1310  *               |                    |  immediately
 1311  * 0 = thread_exit() or suspension ok,
 1312  * other = return error instead of stopping the thread.
 1313  *
 1314  * While a full suspension is under effect, even a single threading
 1315  * thread would be suspended if it made this call (but it shouldn't).
 1316  * This call should only be made from places where
 1317  * thread_exit() would be safe as that may be the outcome unless
 1318  * return_instead is set.
 1319  */
 1320 int
 1321 thread_suspend_check(int return_instead)
 1322 {
 1323         struct thread *td;
 1324         struct proc *p;
 1325         int wakeup_swapper;
 1326 
 1327         td = curthread;
 1328         p = td->td_proc;
 1329         mtx_assert(&Giant, MA_NOTOWNED);
 1330         PROC_LOCK_ASSERT(p, MA_OWNED);
 1331         while (thread_suspend_check_needed()) {
 1332                 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 1333                         KASSERT(p->p_singlethread != NULL,
 1334                             ("singlethread not set"));
 1335                         /*
 1336                          * The only suspension in action is a
 1337                          * single-threading. Single threader need not stop.
 1338                          * It is safe to access p->p_singlethread unlocked
 1339                          * because it can only be set to our address by us.
 1340                          */
 1341                         if (p->p_singlethread == td)
 1342                                 return (0);     /* Exempt from stopping. */
 1343                 }
 1344                 if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
 1345                         return (EINTR);
 1346 
 1347                 /* Should we goto user boundary if we didn't come from there? */
 1348                 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
 1349                     (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
 1350                         return (ERESTART);
 1351 
 1352                 /*
 1353                  * Ignore suspend requests if they are deferred.
 1354                  */
 1355                 if ((td->td_flags & TDF_SBDRY) != 0) {
 1356                         KASSERT(return_instead,
 1357                             ("TDF_SBDRY set for unsafe thread_suspend_check"));
 1358                         KASSERT((td->td_flags & (TDF_SEINTR | TDF_SERESTART)) !=
 1359                             (TDF_SEINTR | TDF_SERESTART),
 1360                             ("both TDF_SEINTR and TDF_SERESTART"));
 1361                         return (TD_SBDRY_INTR(td) ? TD_SBDRY_ERRNO(td) : 0);
 1362                 }
 1363 
 1364                 /*
 1365                  * If the process is waiting for us to exit,
 1366                  * this thread should just suicide.
 1367                  * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
 1368                  */
 1369                 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
 1370                         PROC_UNLOCK(p);
 1371 
 1372                         /*
 1373                          * Allow Linux emulation layer to do some work
 1374                          * before thread suicide.
 1375                          */
 1376                         if (__predict_false(p->p_sysent->sv_thread_detach != NULL))
 1377                                 (p->p_sysent->sv_thread_detach)(td);
 1378                         umtx_thread_exit(td);
 1379                         kern_thr_exit(td);
 1380                         panic("stopped thread did not exit");
 1381                 }
 1382 
 1383                 PROC_SLOCK(p);
 1384                 thread_stopped(p);
 1385                 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 1386                         if (p->p_numthreads == p->p_suspcount + 1) {
 1387                                 thread_lock(p->p_singlethread);
 1388                                 wakeup_swapper = thread_unsuspend_one(
 1389                                     p->p_singlethread, p, false);
 1390                                 if (wakeup_swapper)
 1391                                         kick_proc0();
 1392                         }
 1393                 }
 1394                 PROC_UNLOCK(p);
 1395                 thread_lock(td);
 1396                 /*
 1397                  * When a thread suspends, it just
 1398                  * gets taken off all queues.
 1399                  */
 1400                 thread_suspend_one(td);
 1401                 if (return_instead == 0) {
 1402                         p->p_boundary_count++;
 1403                         td->td_flags |= TDF_BOUNDARY;
 1404                 }
 1405                 PROC_SUNLOCK(p);
 1406                 mi_switch(SW_INVOL | SWT_SUSPEND);
 1407                 PROC_LOCK(p);
 1408         }
 1409         return (0);
 1410 }
 1411 
 1412 /*
 1413  * Check for possible stops and suspensions while executing a
 1414  * casueword or similar transiently failing operation.
 1415  *
 1416  * The sleep argument controls whether the function can handle a stop
 1417  * request itself or it should return ERESTART and the request is
 1418  * proceed at the kernel/user boundary in ast.
 1419  *
 1420  * Typically, when retrying due to casueword(9) failure (rv == 1), we
 1421  * should handle the stop requests there, with exception of cases when
 1422  * the thread owns a kernel resource, for instance busied the umtx
 1423  * key, or when functions return immediately if thread_check_susp()
 1424  * returned non-zero.  On the other hand, retrying the whole lock
 1425  * operation, we better not stop there but delegate the handling to
 1426  * ast.
 1427  *
 1428  * If the request is for thread termination P_SINGLE_EXIT, we cannot
 1429  * handle it at all, and simply return EINTR.
 1430  */
 1431 int
 1432 thread_check_susp(struct thread *td, bool sleep)
 1433 {
 1434         struct proc *p;
 1435         int error;
 1436 
 1437         /*
 1438          * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
 1439          * eventually break the lockstep loop.
 1440          */
 1441         if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
 1442                 return (0);
 1443         error = 0;
 1444         p = td->td_proc;
 1445         PROC_LOCK(p);
 1446         if (p->p_flag & P_SINGLE_EXIT)
 1447                 error = EINTR;
 1448         else if (P_SHOULDSTOP(p) ||
 1449             ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND)))
 1450                 error = sleep ? thread_suspend_check(0) : ERESTART;
 1451         PROC_UNLOCK(p);
 1452         return (error);
 1453 }
 1454 
 1455 void
 1456 thread_suspend_switch(struct thread *td, struct proc *p)
 1457 {
 1458 
 1459         KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
 1460         PROC_LOCK_ASSERT(p, MA_OWNED);
 1461         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1462         /*
 1463          * We implement thread_suspend_one in stages here to avoid
 1464          * dropping the proc lock while the thread lock is owned.
 1465          */
 1466         if (p == td->td_proc) {
 1467                 thread_stopped(p);
 1468                 p->p_suspcount++;
 1469         }
 1470         PROC_UNLOCK(p);
 1471         thread_lock(td);
 1472         td->td_flags &= ~TDF_NEEDSUSPCHK;
 1473         TD_SET_SUSPENDED(td);
 1474         sched_sleep(td, 0);
 1475         PROC_SUNLOCK(p);
 1476         DROP_GIANT();
 1477         mi_switch(SW_VOL | SWT_SUSPEND);
 1478         PICKUP_GIANT();
 1479         PROC_LOCK(p);
 1480         PROC_SLOCK(p);
 1481 }
 1482 
 1483 void
 1484 thread_suspend_one(struct thread *td)
 1485 {
 1486         struct proc *p;
 1487 
 1488         p = td->td_proc;
 1489         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1490         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1491         KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
 1492         p->p_suspcount++;
 1493         td->td_flags &= ~TDF_NEEDSUSPCHK;
 1494         TD_SET_SUSPENDED(td);
 1495         sched_sleep(td, 0);
 1496 }
 1497 
 1498 static int
 1499 thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary)
 1500 {
 1501 
 1502         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1503         KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
 1504         TD_CLR_SUSPENDED(td);
 1505         td->td_flags &= ~TDF_ALLPROCSUSP;
 1506         if (td->td_proc == p) {
 1507                 PROC_SLOCK_ASSERT(p, MA_OWNED);
 1508                 p->p_suspcount--;
 1509                 if (boundary && (td->td_flags & TDF_BOUNDARY) != 0) {
 1510                         td->td_flags &= ~TDF_BOUNDARY;
 1511                         p->p_boundary_count--;
 1512                 }
 1513         }
 1514         return (setrunnable(td, 0));
 1515 }
 1516 
 1517 /*
 1518  * Allow all threads blocked by single threading to continue running.
 1519  */
 1520 void
 1521 thread_unsuspend(struct proc *p)
 1522 {
 1523         struct thread *td;
 1524         int wakeup_swapper;
 1525 
 1526         PROC_LOCK_ASSERT(p, MA_OWNED);
 1527         PROC_SLOCK_ASSERT(p, MA_OWNED);
 1528         wakeup_swapper = 0;
 1529         if (!P_SHOULDSTOP(p)) {
 1530                 FOREACH_THREAD_IN_PROC(p, td) {
 1531                         thread_lock(td);
 1532                         if (TD_IS_SUSPENDED(td)) {
 1533                                 wakeup_swapper |= thread_unsuspend_one(td, p,
 1534                                     true);
 1535                         } else
 1536                                 thread_unlock(td);
 1537                 }
 1538         } else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
 1539             p->p_numthreads == p->p_suspcount) {
 1540                 /*
 1541                  * Stopping everything also did the job for the single
 1542                  * threading request. Now we've downgraded to single-threaded,
 1543                  * let it continue.
 1544                  */
 1545                 if (p->p_singlethread->td_proc == p) {
 1546                         thread_lock(p->p_singlethread);
 1547                         wakeup_swapper = thread_unsuspend_one(
 1548                             p->p_singlethread, p, false);
 1549                 }
 1550         }
 1551         if (wakeup_swapper)
 1552                 kick_proc0();
 1553 }
 1554 
 1555 /*
 1556  * End the single threading mode..
 1557  */
 1558 void
 1559 thread_single_end(struct proc *p, int mode)
 1560 {
 1561         struct thread *td;
 1562         int wakeup_swapper;
 1563 
 1564         KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
 1565             mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
 1566             ("invalid mode %d", mode));
 1567         PROC_LOCK_ASSERT(p, MA_OWNED);
 1568         KASSERT((mode == SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) != 0) ||
 1569             (mode != SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) == 0),
 1570             ("mode %d does not match P_TOTAL_STOP", mode));
 1571         KASSERT(mode == SINGLE_ALLPROC || p->p_singlethread == curthread,
 1572             ("thread_single_end from other thread %p %p",
 1573             curthread, p->p_singlethread));
 1574         KASSERT(mode != SINGLE_BOUNDARY ||
 1575             (p->p_flag & P_SINGLE_BOUNDARY) != 0,
 1576             ("mis-matched SINGLE_BOUNDARY flags %x", p->p_flag));
 1577         p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY |
 1578             P_TOTAL_STOP);
 1579         PROC_SLOCK(p);
 1580         p->p_singlethread = NULL;
 1581         wakeup_swapper = 0;
 1582         /*
 1583          * If there are other threads they may now run,
 1584          * unless of course there is a blanket 'stop order'
 1585          * on the process. The single threader must be allowed
 1586          * to continue however as this is a bad place to stop.
 1587          */
 1588         if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) {
 1589                 FOREACH_THREAD_IN_PROC(p, td) {
 1590                         thread_lock(td);
 1591                         if (TD_IS_SUSPENDED(td)) {
 1592                                 wakeup_swapper |= thread_unsuspend_one(td, p,
 1593                                     mode == SINGLE_BOUNDARY);
 1594                         } else
 1595                                 thread_unlock(td);
 1596                 }
 1597         }
 1598         KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,
 1599             ("inconsistent boundary count %d", p->p_boundary_count));
 1600         PROC_SUNLOCK(p);
 1601         if (wakeup_swapper)
 1602                 kick_proc0();
 1603 }
 1604 
 1605 /*
 1606  * Locate a thread by number and return with proc lock held.
 1607  *
 1608  * thread exit establishes proc -> tidhash lock ordering, but lookup
 1609  * takes tidhash first and needs to return locked proc.
 1610  *
 1611  * The problem is worked around by relying on type-safety of both
 1612  * structures and doing the work in 2 steps:
 1613  * - tidhash-locked lookup which saves both thread and proc pointers
 1614  * - proc-locked verification that the found thread still matches
 1615  */
 1616 static bool
 1617 tdfind_hash(lwpid_t tid, pid_t pid, struct proc **pp, struct thread **tdp)
 1618 {
 1619 #define RUN_THRESH      16
 1620         struct proc *p;
 1621         struct thread *td;
 1622         int run;
 1623         bool locked;
 1624 
 1625         run = 0;
 1626         rw_rlock(TIDHASHLOCK(tid));
 1627         locked = true;
 1628         LIST_FOREACH(td, TIDHASH(tid), td_hash) {
 1629                 if (td->td_tid != tid) {
 1630                         run++;
 1631                         continue;
 1632                 }
 1633                 p = td->td_proc;
 1634                 if (pid != -1 && p->p_pid != pid) {
 1635                         td = NULL;
 1636                         break;
 1637                 }
 1638                 if (run > RUN_THRESH) {
 1639                         if (rw_try_upgrade(TIDHASHLOCK(tid))) {
 1640                                 LIST_REMOVE(td, td_hash);
 1641                                 LIST_INSERT_HEAD(TIDHASH(td->td_tid),
 1642                                         td, td_hash);
 1643                                 rw_wunlock(TIDHASHLOCK(tid));
 1644                                 locked = false;
 1645                                 break;
 1646                         }
 1647                 }
 1648                 break;
 1649         }
 1650         if (locked)
 1651                 rw_runlock(TIDHASHLOCK(tid));
 1652         if (td == NULL)
 1653                 return (false);
 1654         *pp = p;
 1655         *tdp = td;
 1656         return (true);
 1657 }
 1658 
 1659 struct thread *
 1660 tdfind(lwpid_t tid, pid_t pid)
 1661 {
 1662         struct proc *p;
 1663         struct thread *td;
 1664 
 1665         td = curthread;
 1666         if (td->td_tid == tid) {
 1667                 if (pid != -1 && td->td_proc->p_pid != pid)
 1668                         return (NULL);
 1669                 PROC_LOCK(td->td_proc);
 1670                 return (td);
 1671         }
 1672 
 1673         for (;;) {
 1674                 if (!tdfind_hash(tid, pid, &p, &td))
 1675                         return (NULL);
 1676                 PROC_LOCK(p);
 1677                 if (td->td_tid != tid) {
 1678                         PROC_UNLOCK(p);
 1679                         continue;
 1680                 }
 1681                 if (td->td_proc != p) {
 1682                         PROC_UNLOCK(p);
 1683                         continue;
 1684                 }
 1685                 if (p->p_state == PRS_NEW) {
 1686                         PROC_UNLOCK(p);
 1687                         return (NULL);
 1688                 }
 1689                 return (td);
 1690         }
 1691 }
 1692 
 1693 void
 1694 tidhash_add(struct thread *td)
 1695 {
 1696         rw_wlock(TIDHASHLOCK(td->td_tid));
 1697         LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
 1698         rw_wunlock(TIDHASHLOCK(td->td_tid));
 1699 }
 1700 
 1701 void
 1702 tidhash_remove(struct thread *td)
 1703 {
 1704 
 1705         rw_wlock(TIDHASHLOCK(td->td_tid));
 1706         LIST_REMOVE(td, td_hash);
 1707         rw_wunlock(TIDHASHLOCK(td->td_tid));
 1708 }

Cache object: 61f4f0b45b235d6522c8457712b4d6c2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.