compat_sa.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: compat_sa.c,v 1.6.2.4 2009/03/12 23:11:32 snj Exp $    */
    2 
    3 /*-
    4  * Copyright (c) 2001, 2004, 2005, 2006 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Nathan J. Williams, and by Andrew Doran.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *        This product includes software developed by the NetBSD
   21  *        Foundation, Inc. and its contributors.
   22  * 4. Neither the name of The NetBSD Foundation nor the names of its
   23  *    contributors may be used to endorse or promote products derived
   24  *    from this software without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36  * POSSIBILITY OF SUCH DAMAGE.
   37  */
   38 
   39 #include <sys/cdefs.h>
   40 
   41 #include "opt_ktrace.h"
   42 #include "opt_multiprocessor.h"
   43 #include "opt_sa.h"
   44 __KERNEL_RCSID(0, "$NetBSD: compat_sa.c,v 1.6.2.4 2009/03/12 23:11:32 snj Exp $");
   45 
   46 #include <sys/param.h>
   47 #include <sys/systm.h>
   48 #include <sys/cpu.h>
   49 #include <sys/pool.h>
   50 #include <sys/proc.h>
   51 #include <sys/types.h>
   52 #include <sys/ucontext.h>
   53 #include <sys/kernel.h>
   54 #include <sys/kmem.h>
   55 #include <sys/mount.h>
   56 #include <sys/sa.h>
   57 #include <sys/savar.h>
   58 #include <sys/syscallargs.h>
   59 #include <sys/ktrace.h>
   60 #include <sys/sched.h>
   61 #include <sys/sleepq.h>
   62 #include <sys/atomic.h> /* for membar_producer() */
   63 
   64 #include <uvm/uvm_extern.h>
   65 
   66 /*
   67  * Now handle building with SA diabled. We always compile this file,
   68  * just if SA's disabled we merely build in stub routines for call
   69  * entry points we still need.
   70  */
   71 #ifdef KERN_SA
   72 
   73 /*
   74  * SA_CONCURRENCY is buggy can lead to kernel crashes.
   75  */
   76 #ifdef SA_CONCURRENCY
   77 #ifndef MULTIPROCESSOR
   78         #error "SA_CONCURRENCY is only valid on MULTIPROCESSOR kernels"
   79 #endif
   80 #endif
   81 
   82 /*
   83  * memory pool for sadata structures
   84  */
   85 static POOL_INIT(sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl",
   86     &pool_allocator_nointr, IPL_NONE);
   87 
   88 /*
   89  * memory pool for pending upcalls
   90  */
   91 static POOL_INIT(saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0,
   92     "saupcpl", &pool_allocator_nointr, IPL_NONE);
   93 
   94 /*
   95  * memory pool for sastack structs
   96  */
   97 static POOL_INIT(sastack_pool, sizeof(struct sastack), 0, 0, 0, "sastackpl",
   98     &pool_allocator_nointr, IPL_NONE);
   99 
  100 /*
  101  * memory pool for sadata_vp structures
  102  */
  103 static POOL_INIT(savp_pool, sizeof(struct sadata_vp), 0, 0, 0, "savppl",
  104     &pool_allocator_nointr, IPL_NONE);
  105 
  106 static struct sadata_vp *sa_newsavp(struct proc *);
  107 static void sa_freevp(struct proc *, struct sadata *, struct sadata_vp *);
  108 static inline int sa_stackused(struct sastack *, struct sadata *);
  109 static inline void sa_setstackfree(struct sastack *, struct sadata *);
  110 static struct sastack *sa_getstack(struct sadata *);
  111 static inline struct sastack *sa_getstack0(struct sadata *);
  112 static inline int sast_compare(struct sastack *, struct sastack *);
  113 #ifdef SA_CONCURRENCY
  114 static int sa_increaseconcurrency(struct lwp *, int);
  115 #endif
  116 static void sa_switchcall(void *);
  117 static void sa_neverrun(void *);
  118 static int sa_newcachelwp(struct lwp *, struct sadata_vp *);
  119 static void sa_makeupcalls(struct lwp *, struct sadata_upcall *);
  120 
  121 static inline int sa_pagefault(struct lwp *, ucontext_t *);
  122 
  123 static void sa_upcall0(struct sadata_upcall *, int, struct lwp *, struct lwp *,
  124     size_t, void *, void (*)(void *));
  125 static void sa_upcall_getstate(union sau_state *, struct lwp *, int);
  126 
  127 void    sa_putcachelwp(struct proc *, struct lwp *);
  128 struct lwp *sa_getcachelwp(struct proc *, struct sadata_vp *);
  129 static void     sa_setrunning(struct lwp *);
  130 
  131 #define SA_DEBUG
  132 
  133 #ifdef SA_DEBUG
  134 #define DPRINTF(x)      do { if (sadebug) printf_nolog x; } while (0)
  135 #define DPRINTFN(n,x)   do { if (sadebug & (1<<(n-1))) printf_nolog x; } while (0)
  136 int     sadebug = 0;
  137 #else
  138 #define DPRINTF(x)
  139 #define DPRINTFN(n,x)
  140 #endif
  141 
  142 static syncobj_t sa_sobj = {
  143         SOBJ_SLEEPQ_FIFO,
  144         sleepq_unsleep,
  145         sleepq_changepri,
  146         sleepq_lendpri,
  147         syncobj_noowner,
  148 };
  149 
  150 static const char *sa_lwpcache_wmesg = "lwpcache";
  151 static const char *sa_lwpwoken_wmesg = "lwpublk";
  152 
  153 #define SA_LWP_STATE_LOCK(l, f) do {                            \
  154         (f) = ~(l)->l_pflag & LP_SA_NOBLOCK;                    \
  155         (l)->l_pflag |= LP_SA_NOBLOCK;                          \
  156 } while (/*CONSTCOND*/ 0)
  157 
  158 #define SA_LWP_STATE_UNLOCK(l, f) do {                          \
  159         (l)->l_pflag ^= (f);                                    \
  160 } while (/*CONSTCOND*/ 0)
  161 
  162 RB_PROTOTYPE(sasttree, sastack, sast_node, sast_compare);
  163 RB_GENERATE(sasttree, sastack, sast_node, sast_compare);
  164 
  165 kmutex_t        saupcall_mutex;
  166 SIMPLEQ_HEAD(, sadata_upcall) saupcall_freelist;
  167 
  168 /*
  169  * sa_critpath API
  170  * permit other parts of the kernel to make SA_LWP_STATE_{UN,}LOCK calls.
  171  */
  172 void
  173 sa_critpath_enter(struct lwp *l1, sa_critpath_t *f1)
  174 {
  175         SA_LWP_STATE_LOCK(l1, *f1);
  176 }
  177 void
  178 sa_critpath_exit(struct lwp *l1, sa_critpath_t *f1)
  179 {
  180         SA_LWP_STATE_UNLOCK(l1, *f1);
  181 }
  182 
  183 
  184 /*
  185  * sadata_upcall_alloc:
  186  *
  187  *      Allocate an sadata_upcall structure.
  188  */
  189 struct sadata_upcall *
  190 sadata_upcall_alloc(int waitok)
  191 {
  192         struct sadata_upcall *sau;
  193 
  194         sau = NULL;
  195         if (waitok && !SIMPLEQ_EMPTY(&saupcall_freelist)) {
  196                 mutex_enter(&saupcall_mutex);
  197                 if ((sau = SIMPLEQ_FIRST(&saupcall_freelist)) != NULL)
  198                         SIMPLEQ_REMOVE_HEAD(&saupcall_freelist, sau_next);
  199                 mutex_exit(&saupcall_mutex);
  200                 if (sau != NULL && sau->sau_arg != NULL)
  201                         (*sau->sau_argfreefunc)(sau->sau_arg);
  202         }
  203 
  204         if (sau == NULL)
  205                 sau = pool_get(&saupcall_pool, waitok ? PR_WAITOK : PR_NOWAIT);
  206         if (sau != NULL)
  207                 sau->sau_arg = NULL;
  208 
  209         return sau;
  210 }
  211 
  212 /*
  213  * sadata_upcall_free:
  214  *
  215  *      Free an sadata_upcall structure and any associated argument data.
  216  */
  217 void
  218 sadata_upcall_free(struct sadata_upcall *sau)
  219 {
  220         if (sau == NULL)
  221                 return;
  222 
  223         /*
  224          * If our current synchronisation object is a sleep queue or
  225          * similar, we must not put the object back to the pool as
  226          * doing to could acquire sleep locks.  That could trigger
  227          * a recursive sleep.
  228          */
  229         if (curlwp->l_syncobj == &sched_syncobj) {
  230                 if (sau->sau_arg)
  231                         (*sau->sau_argfreefunc)(sau->sau_arg);
  232                 pool_put(&saupcall_pool, sau);
  233                 sadata_upcall_drain();
  234         } else {
  235                 mutex_enter(&saupcall_mutex);
  236                 SIMPLEQ_INSERT_HEAD(&saupcall_freelist, sau, sau_next);
  237                 mutex_exit(&saupcall_mutex);
  238         }
  239 }
  240 
  241 /*
  242  * sadata_upcall_drain:
  243  *
  244  *      Put freed upcall structures back to the pool.
  245  */
  246 void
  247 sadata_upcall_drain(void)
  248 {
  249         struct sadata_upcall *sau;
  250 
  251         sau = SIMPLEQ_FIRST(&saupcall_freelist);
  252         while (sau != NULL) {
  253                 mutex_enter(&saupcall_mutex);
  254                 if ((sau = SIMPLEQ_FIRST(&saupcall_freelist)) != NULL)
  255                         SIMPLEQ_REMOVE_HEAD(&saupcall_freelist, sau_next);
  256                 mutex_exit(&saupcall_mutex);
  257                 if (sau != NULL) /* XXX sau_arg free needs a call! */
  258                         pool_put(&saupcall_pool, sau);
  259         }
  260 }
  261 
  262 /*
  263  * sa_newsavp
  264  *
  265  * Allocate a new virtual processor structure, do some simple
  266  * initialization and add it to the passed-in sa. Pre-allocate
  267  * an upcall event data structure for when the main thread on
  268  * this vp blocks.
  269  *
  270  * We lock ??? while manipulating the list of vp's.
  271  *
  272  * We allocate the lwp to run on this separately. In the case of the
  273  * first lwp/vp for a process, the lwp already exists. It's the
  274  * main (only) lwp of the process.
  275  */
  276 static struct sadata_vp *
  277 sa_newsavp(struct proc *p)
  278 {
  279         struct sadata *sa = p->p_sa;
  280         struct sadata_vp *vp, *qvp;
  281         struct sadata_upcall *sau;
  282 
  283         /* Allocate virtual processor data structure */
  284         vp = pool_get(&savp_pool, PR_WAITOK);
  285         /* And preallocate an upcall data structure for sleeping */
  286         sau = sadata_upcall_alloc(1);
  287         /* Initialize. */
  288         memset(vp, 0, sizeof(*vp));
  289         /* Lock has to be IPL_SCHED, since we use it in the
  290          * hooks from the scheduler code */
  291         vp->savp_lwp = NULL;
  292         vp->savp_faultaddr = 0;
  293         vp->savp_ofaultaddr = 0;
  294         vp->savp_woken_count = 0;
  295         vp->savp_lwpcache_count = 0;
  296         vp->savp_pflags = 0;
  297         vp->savp_sleeper_upcall = sau;
  298         mutex_init(&vp->savp_mutex, MUTEX_DEFAULT, IPL_SCHED);
  299         sleepq_init(&vp->savp_lwpcache);
  300         sleepq_init(&vp->savp_woken);
  301         SIMPLEQ_INIT(&vp->savp_upcalls);
  302 
  303         /* We're writing sa_savps, so lock both locks */
  304         mutex_enter(p->p_lock);
  305         mutex_enter(&sa->sa_mutex);
  306         /* find first free savp_id and add vp to sorted slist */
  307         if (SLIST_EMPTY(&sa->sa_vps) ||
  308             SLIST_FIRST(&sa->sa_vps)->savp_id != 0) {
  309                 vp->savp_id = 0;
  310                 SLIST_INSERT_HEAD(&sa->sa_vps, vp, savp_next);
  311         } else {
  312                 SLIST_FOREACH(qvp, &sa->sa_vps, savp_next) {
  313                         if (SLIST_NEXT(qvp, savp_next) == NULL ||
  314                             SLIST_NEXT(qvp, savp_next)->savp_id !=
  315                             qvp->savp_id + 1)
  316                                 break;
  317                 }
  318                 vp->savp_id = qvp->savp_id + 1;
  319                 SLIST_INSERT_AFTER(qvp, vp, savp_next);
  320         }
  321         mutex_exit(&sa->sa_mutex);
  322         mutex_exit(p->p_lock);
  323 
  324         DPRINTFN(1, ("sa_newsavp(%d) allocated vp %p\n", p->p_pid, vp));
  325 
  326         return (vp);
  327 }
  328 
  329 /*
  330  * sa_freevp:
  331  *
  332  *      Deallocate a vp. Must be called with no locks held.
  333  * Will lock and unlock p_lock.
  334  */
  335 static void
  336 sa_freevp(struct proc *p, struct sadata *sa, struct sadata_vp *vp)
  337 {
  338         DPRINTFN(1, ("sa_freevp(%d) freeing vp %p\n", p->p_pid, vp));
  339 
  340         mutex_enter(p->p_lock);
  341 
  342         DPRINTFN(1, ("sa_freevp(%d) about to unlink in vp %p\n", p->p_pid, vp));
  343         SLIST_REMOVE(&sa->sa_vps, vp, sadata_vp, savp_next);
  344         DPRINTFN(1, ("sa_freevp(%d) done unlink in vp %p\n", p->p_pid, vp));
  345 
  346         if (vp->savp_sleeper_upcall) {
  347                 sadata_upcall_free(vp->savp_sleeper_upcall);
  348                 vp->savp_sleeper_upcall = NULL;
  349         }
  350         DPRINTFN(1, ("sa_freevp(%d) about to mut_det in vp %p\n", p->p_pid, vp));
  351 
  352         mutex_destroy(&vp->savp_mutex);
  353 
  354         mutex_exit(p->p_lock);
  355 
  356         pool_put(&savp_pool, vp);
  357 }
  358 
  359 /*
  360  *
  361  */
  362 int sa_system_disabled = 0;
  363 
  364 /*
  365  * sys_sa_register
  366  *      Handle copyin and copyout of info for registering the
  367  * upcall handler address.
  368  */
  369 int
  370 sys_sa_register(struct lwp *l, const struct sys_sa_register_args *uap,
  371     register_t *retval)
  372 {
  373         int error;
  374         sa_upcall_t prev;
  375 
  376         error = dosa_register(l, SCARG(uap, new), &prev, SCARG(uap, flags),
  377             SCARG(uap, stackinfo_offset));
  378         if (error)
  379                 return error;
  380 
  381         if (SCARG(uap, old))
  382                 return copyout(&prev, SCARG(uap, old),
  383                     sizeof(prev));
  384         return 0;
  385 }
  386 
  387 /*
  388  * dosa_register
  389  *
  390  *      Change the upcall address for the process. If needed, allocate
  391  * an sadata structure (and initialize it) for the process. If initializing,
  392  * set the flags in the sadata structure to those passed in. Flags will
  393  * be ignored if the sadata structure already exists (dosa_regiister was
  394  * already called).
  395  *
  396  * Note: changing the upcall handler address for a process that has
  397  * concurrency greater than one can yield ambiguous results. The one
  398  * guarantee we can offer is that any upcalls generated on all CPUs
  399  * after this routine finishes will use the new upcall handler. Note
  400  * that any upcalls delivered upon return to user level by the
  401  * sys_sa_register() system call that called this routine will use the
  402  * new upcall handler. Note that any such upcalls will be delivered
  403  * before the old upcall handling address has been returned to
  404  * the application.
  405  */
  406 int
  407 dosa_register(struct lwp *l, sa_upcall_t new, sa_upcall_t *prev, int flags,
  408     ssize_t stackinfo_offset)
  409 {
  410         struct proc *p = l->l_proc;
  411         struct sadata *sa;
  412 
  413         if (sa_system_disabled)
  414                 return EINVAL;
  415 
  416         if (p->p_sa == NULL) {
  417                 /* Allocate scheduler activations data structure */
  418                 sa = pool_get(&sadata_pool, PR_WAITOK);
  419                 memset(sa, 0, sizeof(*sa));
  420 
  421                 /* WRS: not sure if need SCHED. need to audit lockers */
  422                 mutex_init(&sa->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
  423                 mutex_enter(p->p_lock);
  424                 if ((p->p_sflag & PS_NOSA) != 0) {
  425                         mutex_exit(p->p_lock);
  426                         mutex_destroy(&sa->sa_mutex);
  427                         pool_put(&sadata_pool, sa);
  428                         return EINVAL;
  429                 }
  430 
  431                 /* Initialize. */
  432                 sa->sa_flag = flags & SA_FLAG_ALL;
  433                 sa->sa_maxconcurrency = 1;
  434                 sa->sa_concurrency = 1;
  435                 RB_INIT(&sa->sa_stackstree);
  436                 sa->sa_stacknext = NULL;
  437                 if (flags & SA_FLAG_STACKINFO)
  438                         sa->sa_stackinfo_offset = stackinfo_offset;
  439                 else
  440                         sa->sa_stackinfo_offset = 0;
  441                 sa->sa_nstacks = 0;
  442                 sigemptyset(&sa->sa_sigmask);
  443                 sigplusset(&l->l_sigmask, &sa->sa_sigmask);
  444                 sigemptyset(&l->l_sigmask);
  445                 SLIST_INIT(&sa->sa_vps);
  446                 cv_init(&sa->sa_cv, "sawait");
  447                 membar_producer();
  448                 p->p_sa = sa;
  449                 KASSERT(l->l_savp == NULL);
  450                 mutex_exit(p->p_lock);
  451         }
  452         if (l->l_savp == NULL) {        /* XXXSMP */
  453                 l->l_savp = sa_newsavp(p);
  454                 sa_newcachelwp(l, NULL);
  455         }
  456 
  457         *prev = p->p_sa->sa_upcall;
  458         p->p_sa->sa_upcall = new;
  459 
  460         return (0);
  461 }
  462 
  463 void
  464 sa_release(struct proc *p)
  465 {
  466         struct sadata *sa;
  467         struct sastack *sast, *next;
  468         struct sadata_vp *vp;
  469         struct lwp *l;
  470 
  471         sa = p->p_sa;
  472         KASSERT(sa != NULL);
  473         KASSERT(p->p_nlwps <= 1);
  474 
  475         for (sast = RB_MIN(sasttree, &sa->sa_stackstree); sast != NULL;
  476              sast = next) {
  477                 next = RB_NEXT(sasttree, &sa->sa_stackstree, sast);
  478                 RB_REMOVE(sasttree, &sa->sa_stackstree, sast);
  479                 pool_put(&sastack_pool, sast);
  480         }
  481 
  482         mutex_enter(p->p_lock);
  483         p->p_sflag = (p->p_sflag & ~PS_SA) | PS_NOSA;
  484         p->p_sa = NULL;
  485         l = LIST_FIRST(&p->p_lwps);
  486         if (l) {
  487                 lwp_lock(l);
  488                 KASSERT(LIST_NEXT(l, l_sibling) == NULL);
  489                 l->l_savp = NULL;
  490                 lwp_unlock(l);
  491         }
  492         mutex_exit(p->p_lock);
  493 
  494         while ((vp = SLIST_FIRST(&sa->sa_vps)) != NULL) {
  495                 sa_freevp(p, sa, vp);
  496         }
  497 
  498         DPRINTFN(1, ("sa_release(%d) done vps\n", p->p_pid));
  499 
  500         mutex_destroy(&sa->sa_mutex);
  501         cv_destroy(&sa->sa_cv);
  502         pool_put(&sadata_pool, sa);
  503 
  504         DPRINTFN(1, ("sa_release(%d) put sa\n", p->p_pid));
  505 
  506         mutex_enter(p->p_lock);
  507         p->p_sflag &= ~PS_NOSA;
  508         mutex_exit(p->p_lock);
  509 }
  510 
  511 /*
  512  * sa_fetchstackgen
  513  *
  514  *      copyin the generation number for the stack in question.
  515  *
  516  * WRS: I think this routine needs the SA_LWP_STATE_LOCK() dance, either
  517  * here or in its caller.
  518  *
  519  * Must be called with sa_mutex locked.
  520  */
  521 static int
  522 sa_fetchstackgen(struct sastack *sast, struct sadata *sa, unsigned int *gen)
  523 {
  524         int error;
  525 
  526         /* COMPAT_NETBSD32:  believe it or not, but the following is ok */
  527         mutex_exit(&sa->sa_mutex);
  528         error = copyin(&((struct sa_stackinfo_t *)
  529             ((char *)sast->sast_stack.ss_sp +
  530             sa->sa_stackinfo_offset))->sasi_stackgen, gen, sizeof(*gen));
  531         mutex_enter(&sa->sa_mutex);
  532 
  533         return error;
  534 }
  535 
  536 /*
  537  * sa_stackused
  538  *
  539  *      Convenience routine to determine if a given stack has been used
  540  * or not. We consider a stack to be unused if the kernel's concept
  541  * of its generation number matches that of userland.
  542  *      We kill the application with SIGILL if there is an error copying
  543  * in the userland generation number.
  544  */
  545 static inline int
  546 sa_stackused(struct sastack *sast, struct sadata *sa)
  547 {
  548         unsigned int gen;
  549 
  550         KASSERT(mutex_owned(&sa->sa_mutex));
  551 
  552         if (sa_fetchstackgen(sast, sa, &gen)) {
  553                 sigexit(curlwp, SIGILL);
  554                 /* NOTREACHED */
  555         }
  556         return (sast->sast_gen != gen);
  557 }
  558 
  559 /*
  560  * sa_setstackfree
  561  *
  562  *      Convenience routine to mark a stack as unused in the kernel's
  563  * eyes. We do this by setting the kernel's generation number for the stack
  564  * to that of userland.
  565  *      We kill the application with SIGILL if there is an error copying
  566  * in the userland generation number.
  567  */
  568 static inline void
  569 sa_setstackfree(struct sastack *sast, struct sadata *sa)
  570 {
  571         unsigned int gen;
  572 
  573         KASSERT(mutex_owned(&sa->sa_mutex));
  574 
  575         if (sa_fetchstackgen(sast, sa, &gen)) {
  576                 sigexit(curlwp, SIGILL);
  577                 /* NOTREACHED */
  578         }
  579         sast->sast_gen = gen;
  580 }
  581 
  582 /*
  583  * sa_getstack
  584  *
  585  * Find next free stack, starting at sa->sa_stacknext.  Must be called
  586  * with sa->sa_mutex held, and will release while checking for stack
  587  * availability.
  588  *
  589  * Caller should have set LP_SA_NOBLOCK for our thread. This is not the time
  590  * to go generating upcalls as we aren't in a position to deliver another one.
  591  */
  592 static struct sastack *
  593 sa_getstack(struct sadata *sa)
  594 {
  595         struct sastack *sast;
  596         int chg;
  597 
  598         KASSERT(mutex_owned(&sa->sa_mutex));
  599 
  600         do {
  601                 chg = sa->sa_stackchg;
  602                 sast = sa->sa_stacknext;
  603                 if (sast == NULL || sa_stackused(sast, sa))
  604                         sast = sa_getstack0(sa);
  605         } while (chg != sa->sa_stackchg);
  606 
  607         if (sast == NULL)
  608                 return NULL;
  609 
  610         sast->sast_gen++;
  611         sa->sa_stackchg++;
  612 
  613         return sast;
  614 }
  615 
  616 /*
  617  * sa_getstack0 -- get the lowest numbered sa stack
  618  *
  619  *      We walk the splay tree in order and find the lowest-numbered
  620  * (as defined by SPLAY_MIN() and SPLAY_NEXT() ordering) stack that
  621  * is unused.
  622  */
  623 static inline struct sastack *
  624 sa_getstack0(struct sadata *sa)
  625 {
  626         struct sastack *start;
  627         int chg;
  628 
  629         KASSERT(mutex_owned(&sa->sa_mutex));
  630 
  631  retry:
  632         chg = sa->sa_stackchg;
  633         if (sa->sa_stacknext == NULL) {
  634                 sa->sa_stacknext = RB_MIN(sasttree, &sa->sa_stackstree);
  635                 if (sa->sa_stacknext == NULL)
  636                         return NULL;
  637         }
  638         start = sa->sa_stacknext;
  639 
  640         while (sa_stackused(sa->sa_stacknext, sa)) {
  641                 if (sa->sa_stackchg != chg)
  642                         goto retry;
  643                 sa->sa_stacknext = RB_NEXT(sasttree, &sa->sa_stackstree,
  644                     sa->sa_stacknext);
  645                 if (sa->sa_stacknext == NULL)
  646                         sa->sa_stacknext = RB_MIN(sasttree,
  647                             &sa->sa_stackstree);
  648                 if (sa->sa_stacknext == start)
  649                         return NULL;
  650         }
  651         return sa->sa_stacknext;
  652 }
  653 
  654 /*
  655  * sast_compare - compare two sastacks
  656  *
  657  *      We sort stacks according to their userspace addresses.
  658  * Stacks are "equal" if their start + size overlap.
  659  */
  660 static inline int
  661 sast_compare(struct sastack *a, struct sastack *b)
  662 {
  663 
  664         if ((vaddr_t)a->sast_stack.ss_sp + a->sast_stack.ss_size <=
  665             (vaddr_t)b->sast_stack.ss_sp)
  666                 return (-1);
  667         if ((vaddr_t)a->sast_stack.ss_sp >=
  668             (vaddr_t)b->sast_stack.ss_sp + b->sast_stack.ss_size)
  669                 return (1);
  670         return (0);
  671 }
  672 
  673 /*
  674  * sa_copyin_stack -- copyin a stack.
  675  */
  676 static int
  677 sa_copyin_stack(stack_t *stacks, int index, stack_t *dest)
  678 {
  679         return copyin(stacks + index, dest, sizeof(stack_t));
  680 }
  681 
  682 /*
  683  * sys_sa_stacks -- the user level threading library is passing us stacks
  684  *
  685  * We copy in some arguments then call sa_stacks1() to do the main
  686  * work. NETBSD32 has its own front-end for this call.
  687  */
  688 int
  689 sys_sa_stacks(struct lwp *l, const struct sys_sa_stacks_args *uap,
  690         register_t *retval)
  691 {
  692         return sa_stacks1(l, retval, SCARG(uap, num), SCARG(uap, stacks),
  693             sa_copyin_stack);
  694 }
  695 
  696 /*
  697  * sa_stacks1
  698  *      Process stacks passed-in by the user threading library. At
  699  * present we use the kernel lock to lock the SPLAY tree, which we
  700  * manipulate to load in the stacks.
  701  *
  702  *      It is an error to pass in a stack that we already know about
  703  * and which hasn't been used. Passing in a known-but-used one is fine.
  704  * We accept up to SA_MAXNUMSTACKS per desired vp (concurrency level).
  705  */
  706 int
  707 sa_stacks1(struct lwp *l, register_t *retval, int num, stack_t *stacks,
  708     sa_copyin_stack_t do_sa_copyin_stack)
  709 {
  710         struct sadata *sa = l->l_proc->p_sa;
  711         struct sastack *sast, *new;
  712         int count, error, f, i, chg;
  713 
  714         /* We have to be using scheduler activations */
  715         if (sa == NULL)
  716                 return (EINVAL);
  717 
  718         count = num;
  719         if (count < 0)
  720                 return (EINVAL);
  721 
  722         SA_LWP_STATE_LOCK(l, f);
  723 
  724         error = 0;
  725 
  726         for (i = 0; i < count; i++) {
  727                 new = pool_get(&sastack_pool, PR_WAITOK);
  728                 error = do_sa_copyin_stack(stacks, i, &new->sast_stack);
  729                 if (error) {
  730                         count = i;
  731                         break;
  732                 }
  733                 mutex_enter(&sa->sa_mutex);
  734          restart:
  735                 chg = sa->sa_stackchg;
  736                 sa_setstackfree(new, sa);
  737                 sast = RB_FIND(sasttree, &sa->sa_stackstree, new);
  738                 if (sast != NULL) {
  739                         DPRINTFN(9, ("sa_stacks(%d.%d) returning stack %p\n",
  740                                      l->l_proc->p_pid, l->l_lid,
  741                                      new->sast_stack.ss_sp));
  742                         if (sa_stackused(sast, sa) == 0) {
  743                                 count = i;
  744                                 error = EEXIST;
  745                                 mutex_exit(&sa->sa_mutex);
  746                                 pool_put(&sastack_pool, new);
  747                                 break;
  748                         }
  749                         if (chg != sa->sa_stackchg)
  750                                 goto restart;
  751                 } else if (sa->sa_nstacks >=
  752                     SA_MAXNUMSTACKS * sa->sa_concurrency) {
  753                         DPRINTFN(9,
  754                             ("sa_stacks(%d.%d) already using %d stacks\n",
  755                             l->l_proc->p_pid, l->l_lid,
  756                             SA_MAXNUMSTACKS * sa->sa_concurrency));
  757                         count = i;
  758                         error = ENOMEM;
  759                         mutex_exit(&sa->sa_mutex);
  760                         pool_put(&sastack_pool, new);
  761                         break;
  762                 } else {
  763                         DPRINTFN(9, ("sa_stacks(%d.%d) adding stack %p\n",
  764                                      l->l_proc->p_pid, l->l_lid,
  765                                      new->sast_stack.ss_sp));
  766                         RB_INSERT(sasttree, &sa->sa_stackstree, new);
  767                         sa->sa_nstacks++;
  768                         sa->sa_stackchg++;
  769                 }
  770                 mutex_exit(&sa->sa_mutex);
  771         }
  772 
  773         SA_LWP_STATE_UNLOCK(l, f);
  774 
  775         *retval = count;
  776         return (error);
  777 }
  778 
  779 
  780 /*
  781  * sys_sa_enable - throw the switch & enable SA
  782  *
  783  * Fairly simple. Make sure the sadata and vp've been set up for this
  784  * process, assign this thread to the vp and initiate the first upcall
  785  * (SA_UPCALL_NEWPROC).
  786  */
  787 int
  788 sys_sa_enable(struct lwp *l, const void *v, register_t *retval)
  789 {
  790         struct proc *p = l->l_proc;
  791         struct sadata *sa = p->p_sa;
  792         struct sadata_vp *vp = l->l_savp;
  793         int error;
  794 
  795         DPRINTF(("sys_sa_enable(%d.%d)\n", l->l_proc->p_pid,
  796             l->l_lid));
  797 
  798         /* We have to be using scheduler activations */
  799         if (sa == NULL || vp == NULL)
  800                 return (EINVAL);
  801 
  802         if (p->p_sflag & PS_SA) /* Already running! */
  803                 return (EBUSY);
  804 
  805         error = sa_upcall(l, SA_UPCALL_NEWPROC, l, NULL, 0, NULL, NULL);
  806         if (error)
  807                 return (error);
  808 
  809         /* Assign this LWP to the virtual processor */
  810         mutex_enter(p->p_lock);
  811         vp->savp_lwp = l;
  812         p->p_sflag |= PS_SA;
  813         lwp_lock(l);
  814         l->l_flag |= LW_SA; /* We are now an activation LWP */
  815         lwp_unlock(l);
  816         mutex_exit(p->p_lock);
  817 
  818         /*
  819          * This will return to the SA handler previously registered.
  820          */
  821         return (0);
  822 }
  823 
  824 
  825 /*
  826  * sa_increaseconcurrency
  827  *      Raise the process's maximum concurrency level to the
  828  * requested level. Does nothing if the current maximum councurrency
  829  * is greater than the requested.
  830  *      Must be called with sa_mutex locked. Will unlock and relock as
  831  * needed, and will lock p_lock. Will exit with sa_mutex locked.
  832  */
  833 #ifdef SA_CONCURRENCY
  834 
  835 static int
  836 sa_increaseconcurrency(struct lwp *l, int concurrency)
  837 {
  838         struct proc *p;
  839         struct lwp *l2;
  840         struct sadata *sa;
  841         struct sadata_vp *vp;
  842         struct sadata_upcall *sau;
  843         int addedconcurrency, error;
  844 
  845         p = l->l_proc;
  846         sa = p->p_sa;
  847 
  848         KASSERT(mutex_owned(&sa->sa_mutex));
  849 
  850         addedconcurrency = 0;
  851         while (sa->sa_maxconcurrency < concurrency) {
  852                 sa->sa_maxconcurrency++;
  853                 sa->sa_concurrency++;
  854                 mutex_exit(&sa->sa_mutex);
  855 
  856                 vp = sa_newsavp(p);
  857                 error = sa_newcachelwp(l, vp);
  858                 if (error) {
  859                         /* reset concurrency */
  860                         mutex_enter(&sa->sa_mutex);
  861                         sa->sa_maxconcurrency--;
  862                         sa->sa_concurrency--;
  863                         return (addedconcurrency);
  864                 }
  865                 mutex_enter(&vp->savp_mutex);
  866                 l2 = sa_getcachelwp(p, vp);
  867                 vp->savp_lwp = l2;
  868 
  869                 sau = vp->savp_sleeper_upcall;
  870                 vp->savp_sleeper_upcall = NULL;
  871                 KASSERT(sau != NULL);
  872 
  873                 cpu_setfunc(l2, sa_switchcall, sau);
  874                 sa_upcall0(sau, SA_UPCALL_NEWPROC, NULL, NULL,
  875                     0, NULL, NULL);
  876 
  877                 if (error) {
  878                         /* put l2 into l's VP LWP cache */
  879                         mutex_exit(&vp->savp_mutex);
  880                         lwp_lock(l2);
  881                         l2->l_savp = l->l_savp;
  882                         cpu_setfunc(l2, sa_neverrun, NULL);
  883                         lwp_unlock(l2);
  884                         mutex_enter(&l->l_savp->savp_mutex);
  885                         sa_putcachelwp(p, l2);
  886                         mutex_exit(&l->l_savp->savp_mutex);
  887 
  888                         /* Free new savp */
  889                         sa_freevp(p, sa, vp);
  890 
  891                         /* reset concurrency */
  892                         mutex_enter(&sa->sa_mutex);
  893                         sa->sa_maxconcurrency--;
  894                         sa->sa_concurrency--;
  895                         return (addedconcurrency);
  896                 }
  897                 /* Run the LWP, locked since its mutex is still savp_mutex */
  898                 sa_setrunning(l2);
  899                 uvm_lwp_rele(l2);
  900                 mutex_exit(&vp->savp_mutex);
  901 
  902                 mutex_enter(&sa->sa_mutex);
  903                 addedconcurrency++;
  904         }
  905 
  906         return (addedconcurrency);
  907 }
  908 #endif
  909 
  910 /*
  911  * sys_sa_setconcurrency
  912  *      The user threading library wants to increase the number
  913  * of active virtual CPUS we assign to it. We return the number of virt
  914  * CPUs we assigned to the process. We limit concurrency to the number
  915  * of CPUs in the system.
  916  *
  917  * WRS: at present, this system call serves two purposes. The first is
  918  * for an application to indicate that it wants a certain concurrency
  919  * level. The second is for the application to request that the kernel
  920  * reactivate previously allocated virtual CPUs.
  921  */
  922 int
  923 sys_sa_setconcurrency(struct lwp *l, const struct sys_sa_setconcurrency_args *uap,
  924         register_t *retval)
  925 {
  926         struct proc *p = l->l_proc;
  927         struct sadata *sa = p->p_sa;
  928 #ifdef SA_CONCURRENCY
  929         struct sadata_vp *vp = l->l_savp;
  930         struct lwp *l2;
  931         int ncpus;
  932         struct cpu_info *ci;
  933         CPU_INFO_ITERATOR cii;
  934 #endif
  935 
  936         DPRINTFN(11,("sys_sa_concurrency(%d.%d)\n", p->p_pid,
  937                      l->l_lid));
  938 
  939         /* We have to be using scheduler activations */
  940         if (sa == NULL)
  941                 return (EINVAL);
  942 
  943         if ((p->p_sflag & PS_SA) == 0)
  944                 return (EINVAL);
  945 
  946         if (SCARG(uap, concurrency) < 1)
  947                 return (EINVAL);
  948 
  949         *retval = 0;
  950         /*
  951          * Concurrency greater than the number of physical CPUs does
  952          * not make sense.
  953          * XXX Should we ever support hot-plug CPUs, this will need
  954          * adjustment.
  955          */
  956 #ifdef SA_CONCURRENCY
  957         mutex_enter(&sa->sa_mutex);
  958 
  959         if (SCARG(uap, concurrency) > sa->sa_maxconcurrency) {
  960                 ncpus = 0;
  961                 for (CPU_INFO_FOREACH(cii, ci))
  962                         ncpus++;
  963                 *retval += sa_increaseconcurrency(l,
  964                     min(SCARG(uap, concurrency), ncpus));
  965         }
  966 #endif
  967 
  968         DPRINTFN(11,("sys_sa_concurrency(%d.%d) want %d, have %d, max %d\n",
  969                      p->p_pid, l->l_lid, SCARG(uap, concurrency),
  970                      sa->sa_concurrency, sa->sa_maxconcurrency));
  971 #ifdef SA_CONCURRENCY
  972         if (SCARG(uap, concurrency) <= sa->sa_concurrency) {
  973                 mutex_exit(&sa->sa_mutex);
  974                 return 0;
  975         }
  976         SLIST_FOREACH(vp, &sa->sa_vps, savp_next) {
  977                 l2 = vp->savp_lwp;
  978                 lwp_lock(l2);
  979                 if (l2->l_flag & LW_SA_IDLE) {
  980                         l2->l_flag &= ~(LW_SA_IDLE|LW_SA_YIELD|LW_SINTR);
  981                         lwp_unlock(l2);
  982                         DPRINTFN(11,("sys_sa_concurrency(%d.%d) NEWPROC vp %d\n",
  983                                      p->p_pid, l->l_lid, vp->savp_id));
  984                         sa->sa_concurrency++;
  985                         mutex_exit(&sa->sa_mutex);
  986                         /* error = */ sa_upcall(l2, SA_UPCALL_NEWPROC, NULL,
  987                             NULL, 0, NULL, NULL);
  988                         lwp_lock(l2);
  989                         /* lwp_unsleep() will unlock the LWP */
  990                         lwp_unsleep(vp->savp_lwp, true);
  991                         KASSERT((l2->l_flag & LW_SINTR) == 0);
  992                         (*retval)++;
  993                         mutex_enter(&sa->sa_mutex);
  994                 } else
  995                         lwp_unlock(l2);
  996                 if (sa->sa_concurrency == SCARG(uap, concurrency))
  997                         break;
  998         }
  999         mutex_exit(&sa->sa_mutex);
 1000 #endif
 1001         return 0;
 1002 }
 1003 
 1004 /*
 1005  * sys_sa_yield
 1006  *      application has nothing for this lwp to do, so let it linger in
 1007  * the kernel.
 1008  */
 1009 int
 1010 sys_sa_yield(struct lwp *l, const void *v, register_t *retval)
 1011 {
 1012         struct proc *p = l->l_proc;
 1013 
 1014         mutex_enter(p->p_lock);
 1015         if (p->p_sa == NULL || !(p->p_sflag & PS_SA)) {
 1016                 mutex_exit(p->p_lock);
 1017                 DPRINTFN(2,
 1018                     ("sys_sa_yield(%d.%d) proc %p not SA (p_sa %p, flag %s)\n",
 1019                     p->p_pid, l->l_lid, p, p->p_sa,
 1020                     p->p_sflag & PS_SA ? "T" : "F"));
 1021                 return (EINVAL);
 1022         }
 1023 
 1024         mutex_exit(p->p_lock);
 1025 
 1026         sa_yield(l);
 1027 
 1028         return (EJUSTRETURN);
 1029 }
 1030 
 1031 /*
 1032  * sa_yield
 1033  *      This lwp has nothing to do, so hang around. Assuming we
 1034  * are the lwp "on" our vp, sleep in "sawait" until there's something
 1035  * to do.
 1036  *
 1037  *      Unfortunately some subsystems can't directly tell us if there's an
 1038  * upcall going to happen when we get worken up. Work gets deferred to
 1039  * userret() and that work may trigger an upcall. So we have to try
 1040  * calling userret() (by calling upcallret()) and see if makeupcalls()
 1041  * delivered an upcall. It will clear LW_SA_YIELD if it did.
 1042  */
 1043 void
 1044 sa_yield(struct lwp *l)
 1045 {
 1046         struct proc *p = l->l_proc;
 1047         struct sadata *sa = p->p_sa;
 1048         struct sadata_vp *vp = l->l_savp;
 1049         int ret;
 1050 
 1051         lwp_lock(l);
 1052 
 1053         if (vp->savp_lwp != l) {
 1054                 lwp_unlock(l);
 1055 
 1056                 /*
 1057                  * We lost the VP on our way here, this happens for
 1058                  * instance when we sleep in systrace.  This will end
 1059                  * in an SA_UNBLOCKED_UPCALL in sa_unblock_userret().
 1060                  */
 1061                 DPRINTFN(2,("sa_yield(%d.%d) lost VP\n",
 1062                              p->p_pid, l->l_lid));
 1063                 KASSERT(l->l_flag & LW_SA_BLOCKING);
 1064                 return;
 1065         }
 1066 
 1067         /*
 1068          * If we're the last running LWP, stick around to receive
 1069          * signals.
 1070          */
 1071         KASSERT((l->l_flag & LW_SA_YIELD) == 0);
 1072         DPRINTFN(2,("sa_yield(%d.%d) going dormant\n",
 1073                      p->p_pid, l->l_lid));
 1074         /*
 1075          * A signal will probably wake us up. Worst case, the upcall
 1076          * happens and just causes the process to yield again.
 1077          */
 1078         KASSERT(vp->savp_lwp == l);
 1079 
 1080         /*
 1081          * If we were told to make an upcall or exit already
 1082          * make sure we process it (by returning and letting userret() do
 1083          * the right thing). Otherwise set LW_SA_YIELD and go to sleep.
 1084          */
 1085         ret = 0;
 1086         if (l->l_flag & LW_SA_UPCALL) {
 1087                 lwp_unlock(l);
 1088                 return;
 1089         }
 1090         l->l_flag |= LW_SA_YIELD;
 1091 
 1092         do {
 1093                 lwp_unlock(l);
 1094                 DPRINTFN(2,("sa_yield(%d.%d) really going dormant\n",
 1095                              p->p_pid, l->l_lid));
 1096 
 1097                 mutex_enter(&sa->sa_mutex);
 1098                 sa->sa_concurrency--;
 1099                 ret = cv_wait_sig(&sa->sa_cv, &sa->sa_mutex);
 1100                 sa->sa_concurrency++;
 1101                 mutex_exit(&sa->sa_mutex);
 1102                 DPRINTFN(2,("sa_yield(%d.%d) woke\n",
 1103                              p->p_pid, l->l_lid));
 1104 
 1105                 KASSERT(vp->savp_lwp == l || p->p_sflag & PS_WEXIT);
 1106 
 1107                 /*
 1108                  * We get woken in two different ways. Most code
 1109                  * calls setrunnable() which clears LW_SA_IDLE,
 1110                  * but leaves LW_SA_YIELD. Some call points
 1111                  * (in this file) however also clear LW_SA_YIELD, mainly
 1112                  * as the code knows there is an upcall to be delivered.
 1113                  *
 1114                  * As noted above, except in the cases where other code
 1115                  * in this file cleared LW_SA_YIELD already, we have to
 1116                  * try calling upcallret() & seeing if upcalls happen.
 1117                  * if so, tell userret() NOT to deliver more upcalls on
 1118                  * the way out!
 1119                  */
 1120                 if (l->l_flag & LW_SA_YIELD) {
 1121                         upcallret(l);
 1122                         if (~l->l_flag & LW_SA_YIELD) {
 1123                                 /*
 1124                                  * Ok, we made an upcall. We will exit. Tell
 1125                                  * sa_upcall_userret() to NOT make any more
 1126                                  * upcalls.
 1127                                  */
 1128                                 vp->savp_pflags |= SAVP_FLAG_NOUPCALLS;
 1129                                 /*
 1130                                  * Now force us to call into sa_upcall_userret()
 1131                                  * which will clear SAVP_FLAG_NOUPCALLS
 1132                                  */
 1133                                 lwp_lock(l);
 1134                                 l->l_flag |= LW_SA_UPCALL;
 1135                                 lwp_unlock(l);
 1136                         }
 1137                 }
 1138 
 1139                 lwp_lock(l);
 1140         } while (l->l_flag & LW_SA_YIELD);
 1141 
 1142         DPRINTFN(2,("sa_yield(%d.%d) returned, ret %d\n",
 1143                      p->p_pid, l->l_lid, ret));
 1144 
 1145         lwp_unlock(l);
 1146 }
 1147 
 1148 
 1149 /*
 1150  * sys_sa_preempt - preempt a running thread
 1151  *
 1152  * Given an lwp id, send it a user upcall. This is a way for libpthread to
 1153  * kick something into the upcall handler.
 1154  */
 1155 int
 1156 sys_sa_preempt(struct lwp *l, const struct sys_sa_preempt_args *uap,
 1157     register_t *retval)
 1158 {
 1159 /* Not yet ready */
 1160 #if 0
 1161         struct proc             *p = l->l_proc;
 1162         struct sadata           *sa = p->p_sa;
 1163         struct lwp              *t;
 1164         int                     target, error;
 1165 
 1166         DPRINTFN(11,("sys_sa_preempt(%d.%d)\n", l->l_proc->p_pid,
 1167                      l->l_lid));
 1168 
 1169         /* We have to be using scheduler activations */
 1170         if (sa == NULL)
 1171                 return (EINVAL);
 1172 
 1173         if ((p->p_sflag & PS_SA) == 0)
 1174                 return (EINVAL);
 1175 
 1176         if ((target = SCARG(uap, sa_id)) < 1)
 1177                 return (EINVAL);
 1178 
 1179         mutex_enter(p->p_lock);
 1180 
 1181         LIST_FOREACH(t, &l->l_proc->p_lwps, l_sibling)
 1182                 if (t->l_lid == target)
 1183                         break;
 1184 
 1185         if (t == NULL) {
 1186                 error = ESRCH;
 1187                 goto exit_lock;
 1188         }
 1189 
 1190         /* XXX WRS We really need all of this locking documented */
 1191         mutex_exit(p->p_lock);
 1192 
 1193         error = sa_upcall(l, SA_UPCALL_USER | SA_UPCALL_DEFER_EVENT, l, NULL,
 1194                 0, NULL, NULL);
 1195         if (error)
 1196                 return error;
 1197 
 1198         return 0;
 1199 
 1200 exit_lock:
 1201         mutex_exit(p->p_lock);
 1202 
 1203         return error;
 1204 #else
 1205         /* Just return an error */
 1206         return (sys_nosys(l, (const void *)uap, retval));
 1207 #endif
 1208 }
 1209 
 1210 
 1211 /* XXX Hm, naming collision. */
 1212 /*
 1213  * sa_preempt(). In the 4.0 code, this routine is called when we 
 1214  * are in preempt() and the caller informed us it does NOT
 1215  * have more work to do (it's going to userland after we return).
 1216  * If mi_switch() tells us we switched to another thread, we
 1217  * generate a BLOCKED upcall. Since we are returning to userland
 1218  * we then will immediately generate an UNBLOCKED upcall as well.
 1219  *      The only place that actually didn't tell preempt() that
 1220  * we had more to do was sys_sched_yield() (well, midi did too, but
 1221  * that was a bug).
 1222  *
 1223  * For simplicitly, in 5.0+ code, just call this routine in
 1224  * sys_sched_yield after we preempt(). The BLOCKED/UNBLOCKED
 1225  * upcall sequence will get delivered when we return to userland
 1226  * and will ensure that the SA scheduler has an opportunity to
 1227  * effectively preempt the thread that was running in userland.
 1228  *
 1229  * Of course, it would be simpler for libpthread to just intercept
 1230  * this call, but we do this to ensure binary compatability. Plus
 1231  * it's not hard to do.
 1232  *
 1233  * We are called and return with no locks held.
 1234  */
 1235 void
 1236 sa_preempt(struct lwp *l)
 1237 {
 1238         struct proc *p = l->l_proc;
 1239         struct sadata *sa = p->p_sa;
 1240 
 1241         /*
 1242          * Defer saving the lwp's state because on some ports
 1243          * preemption can occur between generating an unblocked upcall
 1244          * and processing the upcall queue.
 1245          */
 1246         if (sa->sa_flag & SA_FLAG_PREEMPT)
 1247                 sa_upcall(l, SA_UPCALL_PREEMPTED | SA_UPCALL_DEFER_EVENT,
 1248                     l, NULL, 0, NULL, NULL);
 1249 }
 1250 
 1251 
 1252 /*
 1253  * Set up the user-level stack and trapframe to do an upcall.
 1254  *
 1255  * NOTE: This routine WILL FREE "arg" in the case of failure!  Callers
 1256  * should not touch the "arg" pointer once calling sa_upcall().
 1257  */
 1258 int
 1259 sa_upcall(struct lwp *l, int type, struct lwp *event, struct lwp *interrupted,
 1260         size_t argsize, void *arg, void (*func)(void *))
 1261 {
 1262         struct sadata_upcall *sau;
 1263         struct sadata *sa = l->l_proc->p_sa;
 1264         struct sadata_vp *vp = l->l_savp;
 1265         struct sastack *sast;
 1266         int f, error;
 1267 
 1268         KASSERT((type & (SA_UPCALL_LOCKED_EVENT | SA_UPCALL_LOCKED_INTERRUPTED))
 1269                 == 0);
 1270 
 1271         /* XXX prevent recursive upcalls if we sleep for memory */
 1272         SA_LWP_STATE_LOCK(curlwp, f);
 1273         sau = sadata_upcall_alloc(1);
 1274         mutex_enter(&sa->sa_mutex);
 1275         sast = sa_getstack(sa);
 1276         mutex_exit(&sa->sa_mutex);
 1277         SA_LWP_STATE_UNLOCK(curlwp, f);
 1278 
 1279         if (sau == NULL || sast == NULL) {
 1280                 if (sast != NULL) {
 1281                         mutex_enter(&sa->sa_mutex);
 1282                         sa_setstackfree(sast, sa);
 1283                         mutex_exit(&sa->sa_mutex);
 1284                 }
 1285                 if (sau != NULL)
 1286                         sadata_upcall_free(sau);
 1287                 return (ENOMEM);
 1288         }
 1289         DPRINTFN(9,("sa_upcall(%d.%d) using stack %p\n",
 1290             l->l_proc->p_pid, l->l_lid, sast->sast_stack.ss_sp));
 1291 
 1292         if (l->l_proc->p_emul->e_sa->sae_upcallconv) {
 1293                 error = (*l->l_proc->p_emul->e_sa->sae_upcallconv)(l, type,
 1294                     &argsize, &arg, &func);
 1295                 if (error) {
 1296                         mutex_enter(&sa->sa_mutex);
 1297                         sa_setstackfree(sast, sa);
 1298                         mutex_exit(&sa->sa_mutex);
 1299                         sadata_upcall_free(sau);
 1300                         return error;
 1301                 }
 1302         }
 1303 
 1304         sa_upcall0(sau, type, event, interrupted, argsize, arg, func);
 1305         sau->sau_stack = sast->sast_stack;
 1306         mutex_enter(&vp->savp_mutex);
 1307         SIMPLEQ_INSERT_TAIL(&vp->savp_upcalls, sau, sau_next);
 1308         lwp_lock(l);
 1309         l->l_flag |= LW_SA_UPCALL;
 1310         lwp_unlock(l);
 1311         mutex_exit(&vp->savp_mutex);
 1312 
 1313         return (0);
 1314 }
 1315 
 1316 static void
 1317 sa_upcall0(struct sadata_upcall *sau, int type, struct lwp *event,
 1318     struct lwp *interrupted, size_t argsize, void *arg, void (*func)(void *))
 1319 {
 1320         DPRINTFN(12,("sa_upcall0: event %p interrupted %p type %x\n",
 1321             event, interrupted, type));
 1322 
 1323         KASSERT((event == NULL) || (event != interrupted));
 1324 
 1325         sau->sau_flags = 0;
 1326 
 1327         if (type & SA_UPCALL_DEFER_EVENT) {
 1328                 sau->sau_event.ss_deferred.ss_lwp = event;
 1329                 sau->sau_flags |= SAU_FLAG_DEFERRED_EVENT;
 1330         } else
 1331                 sa_upcall_getstate(&sau->sau_event, event,
 1332                         type & SA_UPCALL_LOCKED_EVENT);
 1333         if (type & SA_UPCALL_DEFER_INTERRUPTED) {
 1334                 sau->sau_interrupted.ss_deferred.ss_lwp = interrupted;
 1335                 sau->sau_flags |= SAU_FLAG_DEFERRED_INTERRUPTED;
 1336         } else
 1337                 sa_upcall_getstate(&sau->sau_interrupted, interrupted,
 1338                         type & SA_UPCALL_LOCKED_INTERRUPTED);
 1339 
 1340         sau->sau_type = type & SA_UPCALL_TYPE_MASK;
 1341         sau->sau_argsize = argsize;
 1342         sau->sau_arg = arg;
 1343         sau->sau_argfreefunc = func;
 1344 }
 1345 
 1346 /*
 1347  * sa_ucsp
 1348  *      return the stack pointer (??) for a given context as
 1349  * reported by the _UC_MACHINE_SP() macro.
 1350  */
 1351 void *
 1352 sa_ucsp(void *arg)
 1353 {
 1354         ucontext_t *uc = arg;
 1355 
 1356         return (void *)(uintptr_t)_UC_MACHINE_SP(uc);
 1357 }
 1358 
 1359 /*
 1360  * sa_upcall_getstate
 1361  *      Fill in the given sau_state with info for the passed-in
 1362  * lwp, and update the lwp accordingly.
 1363  *      We set LW_SA_SWITCHING on the target lwp, and so we have to hold
 1364  * l's lock in this call. l must be already locked, or it must be unlocked
 1365  * and locking it must not cause deadlock.
 1366  */
 1367 static void
 1368 sa_upcall_getstate(union sau_state *ss, struct lwp *l, int isLocked)
 1369 {
 1370         uint8_t *sp;
 1371         size_t ucsize;
 1372 
 1373         if (l) {
 1374                 if (isLocked == 0)
 1375                         lwp_lock(l);
 1376                 l->l_flag |= LW_SA_SWITCHING;
 1377                 if (isLocked == 0)
 1378                         lwp_unlock(l);
 1379                 (*l->l_proc->p_emul->e_sa->sae_getucontext)(l,
 1380                     (void *)&ss->ss_captured.ss_ctx);
 1381                 if (isLocked == 0)
 1382                         lwp_lock(l);
 1383                 l->l_flag &= ~LW_SA_SWITCHING;
 1384                 if (isLocked == 0)
 1385                         lwp_unlock(l);
 1386                 sp = (*l->l_proc->p_emul->e_sa->sae_ucsp)
 1387                     (&ss->ss_captured.ss_ctx);
 1388                 /* XXX COMPAT_NETBSD32: _UC_UCONTEXT_ALIGN */
 1389                 sp = STACK_ALIGN(sp, ~_UC_UCONTEXT_ALIGN);
 1390                 ucsize = roundup(l->l_proc->p_emul->e_sa->sae_ucsize,
 1391                     (~_UC_UCONTEXT_ALIGN) + 1);
 1392                 ss->ss_captured.ss_sa.sa_context =
 1393                     (ucontext_t *)STACK_ALLOC(sp, ucsize);
 1394                 ss->ss_captured.ss_sa.sa_id = l->l_lid;
 1395                 ss->ss_captured.ss_sa.sa_cpu = l->l_savp->savp_id;
 1396         } else
 1397                 ss->ss_captured.ss_sa.sa_context = NULL;
 1398 }
 1399 
 1400 
 1401 /*
 1402  * sa_pagefault
 1403  *
 1404  * Detect double pagefaults and pagefaults on upcalls.
 1405  * - double pagefaults are detected by comparing the previous faultaddr
 1406  *   against the current faultaddr
 1407  * - pagefaults on upcalls are detected by checking if the userspace
 1408  *   thread is running on an upcall stack
 1409  */
 1410 static inline int
 1411 sa_pagefault(struct lwp *l, ucontext_t *l_ctx)
 1412 {
 1413         struct proc *p;
 1414         struct sadata *sa;
 1415         struct sadata_vp *vp;
 1416         struct sastack sast;
 1417         int found;
 1418 
 1419         p = l->l_proc;
 1420         sa = p->p_sa;
 1421         vp = l->l_savp;
 1422 
 1423         KASSERT(mutex_owned(&sa->sa_mutex));
 1424         KASSERT(vp->savp_lwp == l);
 1425 
 1426         if (vp->savp_faultaddr == vp->savp_ofaultaddr) {
 1427                 DPRINTFN(10,("sa_pagefault(%d.%d) double page fault\n",
 1428                              p->p_pid, l->l_lid));
 1429                 return 1;
 1430         }
 1431 
 1432         sast.sast_stack.ss_sp = (*p->p_emul->e_sa->sae_ucsp)(l_ctx);
 1433         sast.sast_stack.ss_size = 1;
 1434         found = (RB_FIND(sasttree, &sa->sa_stackstree, &sast) != NULL);
 1435 
 1436         if (found) {
 1437                 DPRINTFN(10,("sa_pagefault(%d.%d) upcall page fault\n",
 1438                              p->p_pid, l->l_lid));
 1439                 return 1;
 1440         }
 1441 
 1442         vp->savp_ofaultaddr = vp->savp_faultaddr;
 1443         return 0;
 1444 }
 1445 
 1446 
 1447 /*
 1448  * sa_switch
 1449  *
 1450  * Called by sleepq_block() when it wants to call mi_switch().
 1451  * Block current LWP and switch to another.
 1452  *
 1453  * WE ARE NOT ALLOWED TO SLEEP HERE!  WE ARE CALLED FROM WITHIN
 1454  * SLEEPQ_BLOCK() ITSELF!  We are called with sched_lock held, and must
 1455  * hold it right through the mi_switch() call.
 1456  *
 1457  * We return with the scheduler unlocked.
 1458  *
 1459  * We are called in one of three conditions:
 1460  *
 1461  * 1:           We are an sa_yield thread. If there are any UNBLOCKED
 1462  *      upcalls to deliver, deliver them (by exiting) instead of sleeping.
 1463  * 2:           We are the main lwp (we're the lwp on our vp). Trigger
 1464  *      delivery of a BLOCKED upcall.
 1465  * 3:           We are not the main lwp on our vp. Chances are we got
 1466  *      woken up but the sleeper turned around and went back to sleep.
 1467  *      It seems that select and poll do this a lot. So just go back to sleep.
 1468  */
 1469 
 1470 void
 1471 sa_switch(struct lwp *l)
 1472 {
 1473         struct proc *p = l->l_proc;
 1474         struct sadata_vp *vp = l->l_savp;
 1475         struct sadata_upcall *sau = NULL;
 1476         struct lwp *l2;
 1477 
 1478         KASSERT(lwp_locked(l, NULL));
 1479 
 1480         DPRINTFN(4,("sa_switch(%d.%d VP %d)\n", p->p_pid, l->l_lid,
 1481             vp->savp_lwp ? vp->savp_lwp->l_lid : 0));
 1482 
 1483         if ((l->l_flag & LW_WEXIT) || (p->p_sflag & (PS_WCORE | PS_WEXIT))) {
 1484                 mi_switch(l);
 1485                 return;
 1486         }
 1487 
 1488         /*
 1489          * We need to hold two locks from here on out. Since you can
 1490          * sleepq_block() on ANY lock, there really can't be a locking
 1491          * hierarcy relative to savp_mutex. So if we can't get the mutex,
 1492          * drop the lwp lock, get the mutex, and carry on.
 1493          *
 1494          * Assumes the lwp lock can never be a sleeping mutex.
 1495          *
 1496          * We do however try hard to never not get savp_mutex. The only
 1497          * times we lock it are either when we are the blessed lwp for
 1498          * our vp, or when a blocked lwp is adding itself to the savp_worken
 1499          * list. So contention should be rare.
 1500          */
 1501         if (!mutex_tryenter(&vp->savp_mutex)) {
 1502                 lwp_unlock(l);
 1503                 mutex_enter(&vp->savp_mutex);
 1504                 lwp_lock(l);
 1505         }
 1506         if (l->l_stat == LSONPROC) {
 1507                 /* Oops! We woke before we got to sleep. Ok, back we go! */
 1508                 lwp_unlock(l);
 1509                 mutex_exit(&vp->savp_mutex);
 1510                 return;
 1511         }
 1512 
 1513         if (l->l_flag & LW_SA_YIELD) {
 1514                 /*
 1515                  * Case 0: we're blocking in sa_yield
 1516                  */
 1517                 DPRINTFN(4,("sa_switch(%d.%d) yield, flags %x pflag %x\n",
 1518                     p->p_pid, l->l_lid, l->l_flag, l->l_pflag));
 1519                 if (vp->savp_woken_count == 0 && p->p_timerpend == 0) {
 1520                         DPRINTFN(4,("sa_switch(%d.%d) setting idle\n",
 1521                             p->p_pid, l->l_lid));
 1522                         l->l_flag |= LW_SA_IDLE;
 1523                         mutex_exit(&vp->savp_mutex);
 1524                         mi_switch(l);
 1525                 } else {
 1526                         /*
 1527                          * Make us running again. lwp_unsleep() will
 1528                          * release the lock.
 1529                          */
 1530                         mutex_exit(&vp->savp_mutex);
 1531                         lwp_unsleep(l, true);
 1532                 }
 1533                 return;
 1534         }
 1535 
 1536         if (vp->savp_lwp == l) {
 1537                 if (vp->savp_pflags & SAVP_FLAG_DELIVERING) {
 1538                         /*
 1539                          * We've exited sa_switchcall() but NOT
 1540                          * made it into a new systemcall. Don't make
 1541                          * a BLOCKED upcall.
 1542                          */
 1543                         mutex_exit(&vp->savp_mutex);
 1544                         mi_switch(l);
 1545                         return;
 1546                 }
 1547                 /*
 1548                  * Case 1: we're blocking for the first time; generate
 1549                  * a SA_BLOCKED upcall and allocate resources for the
 1550                  * UNBLOCKED upcall.
 1551                  */
 1552                 if (vp->savp_sleeper_upcall) {
 1553                         sau = vp->savp_sleeper_upcall;
 1554                         vp->savp_sleeper_upcall = NULL;
 1555                 }
 1556 
 1557                 if (sau == NULL) {
 1558 #ifdef DIAGNOSTIC
 1559                         printf("sa_switch(%d.%d): no upcall data.\n",
 1560                             p->p_pid, l->l_lid);
 1561 #endif
 1562                         panic("Oops! Don't have a sleeper!\n");
 1563                         /* XXXWRS Shouldn't we just kill the app here? */
 1564                         mutex_exit(&vp->savp_mutex);
 1565                         mi_switch(l);
 1566                         return;
 1567                 }
 1568 
 1569                 /*
 1570                  * The process of allocating a new LWP could cause
 1571                  * sleeps. We're called from inside sleep, so that
 1572                  * would be Bad. Therefore, we must use a cached new
 1573                  * LWP. The first thing that this new LWP must do is
 1574                  * allocate another LWP for the cache.
 1575                  */
 1576                 l2 = sa_getcachelwp(p, vp);
 1577                 if (l2 == NULL) {
 1578                         /* XXXSMP */
 1579                         /* No upcall for you! */
 1580                         /* XXX The consequences of this are more subtle and
 1581                          * XXX the recovery from this situation deserves
 1582                          * XXX more thought.
 1583                          */
 1584 
 1585                         /* XXXUPSXXX Should only happen with concurrency > 1 */
 1586                         mutex_exit(&vp->savp_mutex);
 1587                         mi_switch(l);
 1588                         sadata_upcall_free(sau);
 1589                         return;
 1590                 }
 1591 
 1592                 cpu_setfunc(l2, sa_switchcall, sau);
 1593                 sa_upcall0(sau, SA_UPCALL_BLOCKED | SA_UPCALL_LOCKED_EVENT, l,
 1594                         NULL, 0, NULL, NULL);
 1595 
 1596                 /*
 1597                  * Perform the double/upcall pagefault check.
 1598                  * We do this only here since we need l's ucontext to
 1599                  * get l's userspace stack. sa_upcall0 above has saved
 1600                  * it for us.
 1601                  * The LP_SA_PAGEFAULT flag is set in the MD
 1602                  * pagefault code to indicate a pagefault.  The MD
 1603                  * pagefault code also saves the faultaddr for us.
 1604                  *
 1605                  * If the double check is true, turn this into a non-upcall
 1606                  * block.
 1607                  */
 1608                 if ((l->l_flag & LP_SA_PAGEFAULT) && sa_pagefault(l,
 1609                         &sau->sau_event.ss_captured.ss_ctx) != 0) {
 1610                         cpu_setfunc(l2, sa_neverrun, NULL);
 1611                         sa_putcachelwp(p, l2); /* uvm_lwp_hold from sa_getcachelwp */
 1612                         mutex_exit(&vp->savp_mutex);
 1613                         DPRINTFN(4,("sa_switch(%d.%d) Pagefault\n",
 1614                             p->p_pid, l->l_lid));
 1615                         mi_switch(l);
 1616                         /*
 1617                          * WRS Not sure how vp->savp_sleeper_upcall != NULL
 1618                          * but be careful none the less
 1619                          */
 1620                         if (vp->savp_sleeper_upcall == NULL)
 1621                                 vp->savp_sleeper_upcall = sau;
 1622                         else
 1623                                 sadata_upcall_free(sau);
 1624                         DPRINTFN(10,("sa_switch(%d.%d) page fault resolved\n",
 1625                                      p->p_pid, l->l_lid));
 1626                         mutex_enter(&vp->savp_mutex);
 1627                         if (vp->savp_faultaddr == vp->savp_ofaultaddr)
 1628                                 vp->savp_ofaultaddr = -1;
 1629                         mutex_exit(&vp->savp_mutex);
 1630                         return;
 1631                 }
 1632 
 1633                 DPRINTFN(8,("sa_switch(%d.%d) blocked upcall %d\n",
 1634                              p->p_pid, l->l_lid, l2->l_lid));
 1635 
 1636                 l->l_flag |= LW_SA_BLOCKING;
 1637                 vp->savp_blocker = l;
 1638                 vp->savp_lwp = l2;
 1639 
 1640                 sa_setrunning(l2);
 1641 
 1642                 /* Remove the artificial hold-count */
 1643                 uvm_lwp_rele(l2);
 1644 
 1645                 KASSERT(l2 != l);
 1646         } else if (vp->savp_lwp != NULL) {
 1647 
 1648                 /*
 1649                  * Case 2: We've been woken up while another LWP was
 1650                  * on the VP, but we're going back to sleep without
 1651                  * having returned to userland and delivering the
 1652                  * SA_UNBLOCKED upcall (select and poll cause this
 1653                  * kind of behavior a lot).
 1654                  */
 1655                 l2 = NULL;
 1656         } else {
 1657                 /* NOTREACHED */
 1658                 mutex_exit(&vp->savp_mutex);
 1659                 lwp_unlock(l);
 1660                 panic("sa_vp empty");
 1661         }
 1662 
 1663         DPRINTFN(4,("sa_switch(%d.%d) switching to LWP %d.\n",
 1664             p->p_pid, l->l_lid, l2 ? l2->l_lid : 0));
 1665         /* WRS need to add code to make sure we switch to l2 */
 1666         mutex_exit(&vp->savp_mutex);
 1667         mi_switch(l);
 1668         DPRINTFN(4,("sa_switch(%d.%d flag %x) returned.\n",
 1669             p->p_pid, l->l_lid, l->l_flag));
 1670         KASSERT(l->l_wchan == 0);
 1671 }
 1672 
 1673 /*
 1674  * sa_neverrun
 1675  *
 1676  *      Routine for threads that have never run. Calls lwp_exit.
 1677  * New, never-run cache threads get pointed at this routine, which just runs
 1678  * and calls lwp_exit().
 1679  */
 1680 static void
 1681 sa_neverrun(void *arg)
 1682 {
 1683         struct lwp *l;
 1684 
 1685         l = curlwp;
 1686 
 1687         DPRINTFN(1,("sa_neverrun(%d.%d %x) exiting\n", l->l_proc->p_pid,
 1688             l->l_lid, l->l_flag));
 1689 
 1690         lwp_exit(l);
 1691 }
 1692 
 1693 /*
 1694  * sa_switchcall
 1695  *
 1696  * We need to pass an upcall to userland. We are now
 1697  * running on a spare stack and need to allocate a new
 1698  * one. Also, if we are passed an sa upcall, we need to dispatch
 1699  * it to the app.
 1700  */
 1701 static void
 1702 sa_switchcall(void *arg)
 1703 {
 1704         struct lwp *l, *l2;
 1705         struct proc *p;
 1706         struct sadata_vp *vp;
 1707         struct sadata_upcall *sau;
 1708         struct sastack *sast;
 1709         struct sadata *sa;
 1710 
 1711         l2 = curlwp;
 1712         p = l2->l_proc;
 1713         vp = l2->l_savp;
 1714         sau = arg;
 1715         sa = p->p_sa;
 1716 
 1717         lwp_lock(l2);
 1718         KASSERT(vp->savp_lwp == l2);
 1719         if ((l2->l_flag & LW_WEXIT) || (p->p_sflag & (PS_WCORE | PS_WEXIT))) {
 1720                 lwp_unlock(l2);
 1721                 sadata_upcall_free(sau);
 1722                 lwp_exit(l2);
 1723         }
 1724 
 1725         KASSERT(vp->savp_lwp == l2);
 1726         DPRINTFN(6,("sa_switchcall(%d.%d)\n", p->p_pid, l2->l_lid));
 1727 
 1728         l2->l_flag |= LW_SA;
 1729         lwp_unlock(l2);
 1730         l2->l_pflag |= LP_SA_NOBLOCK;
 1731 
 1732         if (vp->savp_lwpcache_count == 0) {
 1733                 /* Allocate the next cache LWP */
 1734                 DPRINTFN(6,("sa_switchcall(%d.%d) allocating LWP\n",
 1735                     p->p_pid, l2->l_lid));
 1736                 sa_newcachelwp(l2, NULL);
 1737         }
 1738 
 1739         if (sau) {
 1740                 mutex_enter(&sa->sa_mutex);
 1741                 sast = sa_getstack(p->p_sa);
 1742                 mutex_exit(&sa->sa_mutex);
 1743                 mutex_enter(&vp->savp_mutex);
 1744                 l = vp->savp_blocker;
 1745                 if (sast) {
 1746                         sau->sau_stack = sast->sast_stack;
 1747                         SIMPLEQ_INSERT_TAIL(&vp->savp_upcalls, sau, sau_next);
 1748                         mutex_exit(&vp->savp_mutex);
 1749                         lwp_lock(l2);
 1750                         l2->l_flag |= LW_SA_UPCALL;
 1751                         lwp_unlock(l2);
 1752                 } else {
 1753                         /*
 1754                          * Oops! We're in trouble. The app hasn't
 1755                          * passeed us in any stacks on which to deliver
 1756                          * the upcall.
 1757                          *
 1758                          * WRS: I think this code is wrong. If we can't
 1759                          * get a stack, we are dead. We either need
 1760                          * to block waiting for one (assuming there's a
 1761                          * live vp still in userland so it can hand back
 1762                          * stacks, or we should just kill the process
 1763                          * as we're deadlocked.
 1764                          */
 1765                         if (vp->savp_sleeper_upcall == NULL)
 1766                                 vp->savp_sleeper_upcall = sau;
 1767                         else
 1768                                 sadata_upcall_free(sau);
 1769                         uvm_lwp_hold(l2);
 1770                         sa_putcachelwp(p, l2); /* sets LW_SA */
 1771                         mutex_exit(&vp->savp_mutex);
 1772                         lwp_lock(l);
 1773                         vp->savp_lwp = l;
 1774                         l->l_flag &= ~LW_SA_BLOCKING;
 1775                         lwp_unlock(l);
 1776                         //mutex_enter(p->p_lock);       /* XXXAD */
 1777                         //p->p_nrlwps--;
 1778                         //mutex_exit(p->p_lock);
 1779                         lwp_lock(l2);
 1780                         mi_switch(l2);
 1781                         /* mostly NOTREACHED */
 1782                         lwp_exit(l2);
 1783                 }
 1784         }
 1785 
 1786         upcallret(l2);
 1787 
 1788         /*
 1789          * Ok, clear LP_SA_NOBLOCK. However it'd be VERY BAD to generate
 1790          * a blocked upcall before this upcall makes it to libpthread.
 1791          * So disable BLOCKED upcalls until this vp enters a syscall.
 1792          */
 1793         l2->l_pflag &= ~LP_SA_NOBLOCK;
 1794         vp->savp_pflags |= SAVP_FLAG_DELIVERING;
 1795 }
 1796 
 1797 /*
 1798  * sa_newcachelwp
 1799  *      Allocate a new lwp, attach it to either the given vp or to l's vp,
 1800  * and add it to its vp's idle cache.
 1801  *      Assumes no locks (other than kernel lock) on entry and exit.
 1802  * Locks scheduler lock during operation.
 1803  *      Returns 0 on success or if process is exiting. Returns ENOMEM
 1804  * if it is unable to allocate a new uarea.
 1805  */
 1806 static int
 1807 sa_newcachelwp(struct lwp *l, struct sadata_vp *targ_vp)
 1808 {
 1809         struct proc *p;
 1810         struct lwp *l2;
 1811         struct sadata_vp *vp;
 1812         vaddr_t uaddr;
 1813         boolean_t inmem;
 1814         int error;
 1815 
 1816         p = l->l_proc;
 1817         if (p->p_sflag & (PS_WCORE | PS_WEXIT))
 1818                 return (0);
 1819 
 1820         inmem = uvm_uarea_alloc(&uaddr);
 1821         if (__predict_false(uaddr == 0)) {
 1822                 return (ENOMEM);
 1823         }
 1824 
 1825         error = lwp_create(l, p, uaddr, inmem, 0, NULL, 0,
 1826             sa_neverrun, NULL, &l2, l->l_class);
 1827         if (error) {
 1828                 uvm_uarea_free(uaddr, curcpu());
 1829                 return error;
 1830         }
 1831 
 1832         /* We don't want this LWP on the process's main LWP list, but
 1833          * newlwp helpfully puts it there. Unclear if newlwp should
 1834          * be tweaked.
 1835          */
 1836         mutex_enter(p->p_lock);
 1837         p->p_nrlwps++;
 1838         mutex_exit(p->p_lock);
 1839         uvm_lwp_hold(l2);
 1840         vp = (targ_vp) ? targ_vp : l->l_savp;
 1841         mutex_enter(&vp->savp_mutex);
 1842         l2->l_savp = vp;
 1843         sa_putcachelwp(p, l2);
 1844         mutex_exit(&vp->savp_mutex);
 1845 
 1846         return 0;
 1847 }
 1848 
 1849 /*
 1850  * sa_putcachelwp
 1851  *      Take a normal process LWP and place it in the SA cache.
 1852  * LWP must not be running, or it must be our caller.
 1853  *      sadat_vp::savp_mutex held on entry and exit.
 1854  *
 1855  *      Previous NetBSD versions removed queued lwps from the list of
 1856  * visible lwps. This made ps cleaner, and hid implementation details.
 1857  * At present, this implementation no longer does that.
 1858  */
 1859 void
 1860 sa_putcachelwp(struct proc *p, struct lwp *l)
 1861 {
 1862         struct sadata_vp *vp;
 1863         sleepq_t        *sq;
 1864 
 1865         vp = l->l_savp;
 1866         sq = &vp->savp_lwpcache;
 1867 
 1868         KASSERT(mutex_owned(&vp->savp_mutex));
 1869 
 1870 #if 0 /* not now, leave lwp visible to all */
 1871         LIST_REMOVE(l, l_sibling);
 1872         p->p_nlwps--;
 1873         l->l_prflag |= LPR_DETACHED;
 1874 #endif
 1875         l->l_flag |= LW_SA;
 1876         membar_producer();
 1877         DPRINTFN(5,("sa_putcachelwp(%d.%d) Adding LWP %d to cache\n",
 1878             p->p_pid, curlwp->l_lid, l->l_lid));
 1879 
 1880         /*
 1881          * Hand-rolled call of the form:
 1882          * sleepq_enter(&vp->savp_woken, l, &vp->savp_mutex);
 1883          * adapted to take into account the fact that (1) l and the mutex
 1884          * we want to lend it are both locked, and (2) we don't have
 1885          * any other locks.
 1886          */
 1887         l->l_mutex = &vp->savp_mutex;
 1888 
 1889         /*
 1890          * XXXWRS: Following is a hand-rolled call of the form:
 1891          * sleepq_enqueue(sq, (void *)sq, "lwpcache", sa_sobj); but
 1892          * hand-done since l might not be curlwp.
 1893          */
 1894 
 1895         l->l_syncobj = &sa_sobj;
 1896         l->l_wchan = sq;
 1897         l->l_sleepq = sq;
 1898         l->l_wmesg = sa_lwpcache_wmesg;
 1899         l->l_slptime = 0;
 1900         l->l_stat = LSSLEEP;
 1901         l->l_sleeperr = 0;
 1902          
 1903         vp->savp_lwpcache_count++;
 1904         sleepq_insert(sq, l, &sa_sobj);
 1905 }
 1906 
 1907 /*
 1908  * sa_getcachelwp
 1909  *      Fetch a LWP from the cache.
 1910  * Called with savp_mutex held.
 1911  */
 1912 struct lwp *
 1913 sa_getcachelwp(struct proc *p, struct sadata_vp *vp)
 1914 {
 1915         struct lwp      *l;
 1916         sleepq_t        *sq = &vp->savp_lwpcache;
 1917 
 1918         KASSERT(mutex_owned(&vp->savp_mutex));
 1919         KASSERT(vp->savp_lwpcache_count > 0);
 1920 
 1921         vp->savp_lwpcache_count--;
 1922         l= TAILQ_FIRST(sq);
 1923 
 1924         /*
 1925          * Now we have a hand-unrolled version of part of sleepq_remove.
 1926          * The main issue is we do NOT want to make the lwp runnable yet
 1927          * since we need to set up the upcall first (we know our caller(s)).
 1928          */
 1929 
 1930         TAILQ_REMOVE(sq, l, l_sleepchain);
 1931         l->l_syncobj = &sched_syncobj;
 1932         l->l_wchan = NULL;
 1933         l->l_sleepq = NULL;
 1934         l->l_flag &= ~LW_SINTR;
 1935 
 1936 #if 0 /* Not now, for now leave lwps in lwp list */
 1937         LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
 1938 #endif
 1939         DPRINTFN(5,("sa_getcachelwp(%d.%d) Got LWP %d from cache.\n",
 1940                 p->p_pid, curlwp->l_lid, l->l_lid));
 1941         return l;
 1942 }
 1943 
 1944 /*
 1945  * sa_setrunning:
 1946  *
 1947  *      Make an lwp we pulled out of the cache, with sa_getcachelwp()
 1948  * above. This routine and sa_getcachelwp() must perform all the work
 1949  * of sleepq_remove().
 1950  */
 1951 static void
 1952 sa_setrunning(struct lwp *l)
 1953 {
 1954         struct schedstate_percpu *spc;
 1955         struct cpu_info *ci;
 1956 
 1957         KASSERT(mutex_owned(&l->l_savp->savp_mutex));
 1958 
 1959         /* Update sleep time delta, call the wake-up handler of scheduler */
 1960         l->l_slpticksum += (hardclock_ticks - l->l_slpticks);
 1961         sched_wakeup(l);
 1962 
 1963         /*
 1964          * Since l was on the sleep queue, we locked it
 1965          * when we locked savp_mutex. Now set it running.
 1966          * This is the second-part of sleepq_remove().
 1967          */
 1968         l->l_priority = MAXPRI_USER; /* XXX WRS needs thought, used to be l_usrpri */
 1969         /* Look for a CPU to wake up */
 1970         l->l_cpu = sched_takecpu(l);
 1971         ci = l->l_cpu;
 1972         spc = &ci->ci_schedstate;
 1973 
 1974         spc_lock(ci);
 1975         lwp_setlock(l, spc->spc_mutex);
 1976         sched_setrunnable(l);
 1977         l->l_stat = LSRUN;
 1978         l->l_slptime = 0;
 1979         sched_enqueue(l, true);
 1980         spc_unlock(ci);
 1981 }
 1982 
 1983 /*
 1984  * sa_upcall_userret
 1985  *      We are about to exit the kernel and return to userland, and
 1986  * userret() noticed we have upcalls pending. So deliver them.
 1987  *
 1988  *      This is the place where unblocking upcalls get generated. We
 1989  * allocate the stack & upcall event here. We may block doing so, but
 1990  * we lock our LWP state (clear LW_SA for the moment) while doing so.
 1991  *
 1992  *      In the case of delivering multiple upcall events, we will end up
 1993  * writing multiple stacks out to userland at once. The last one we send
 1994  * out will be the first one run, then it will notice the others and
 1995  * run them.
 1996  *
 1997  * No locks held on entry or exit. We lock varied processing.
 1998  */
 1999 void
 2000 sa_upcall_userret(struct lwp *l)
 2001 {
 2002         struct lwp *l2;
 2003         struct proc *p;
 2004         struct sadata *sa;
 2005         struct sadata_vp *vp;
 2006         struct sadata_upcall *sau;
 2007         struct sastack *sast;
 2008         sleepq_t *sq;
 2009         int f;
 2010 
 2011         p = l->l_proc;
 2012         sa = p->p_sa;
 2013         vp = l->l_savp;
 2014 
 2015         if (vp->savp_pflags & SAVP_FLAG_NOUPCALLS) {
 2016                 int     do_clear = 0;
 2017                 /*
 2018                  * We made upcalls in sa_yield() (otherwise we would
 2019                  * still be in the loop there!). Don't do it again.
 2020                  * Clear LW_SA_UPCALL, unless there are upcalls to deliver.
 2021                  * they will get delivered next time we return to user mode.
 2022                  */
 2023                 vp->savp_pflags &= ~SAVP_FLAG_NOUPCALLS;
 2024                 mutex_enter(&vp->savp_mutex);
 2025                 if ((vp->savp_woken_count == 0)
 2026                     && SIMPLEQ_EMPTY(&vp->savp_upcalls)) {
 2027                         do_clear = 1;
 2028                 }
 2029                 mutex_exit(&vp->savp_mutex);
 2030                 if (do_clear) {
 2031                         lwp_lock(l);
 2032                         l->l_flag &= ~LW_SA_UPCALL;
 2033                         lwp_unlock(l);
 2034                 }
 2035                 DPRINTFN(7,("sa_upcall_userret(%d.%d %x) skipping processing\n",
 2036                     p->p_pid, l->l_lid, l->l_flag));
 2037                 return;
 2038         }
 2039 
 2040         SA_LWP_STATE_LOCK(l, f);
 2041 
 2042         DPRINTFN(7,("sa_upcall_userret(%d.%d %x) empty %d, woken %d\n",
 2043             p->p_pid, l->l_lid, l->l_flag, SIMPLEQ_EMPTY(&vp->savp_upcalls),
 2044             vp->savp_woken_count));
 2045 
 2046         KASSERT((l->l_flag & LW_SA_BLOCKING) == 0);
 2047 
 2048         mutex_enter(&vp->savp_mutex);
 2049         sast = NULL;
 2050         if (SIMPLEQ_EMPTY(&vp->savp_upcalls) &&
 2051             vp->savp_woken_count != 0) {
 2052                 mutex_exit(&vp->savp_mutex);
 2053                 mutex_enter(&sa->sa_mutex);
 2054                 sast = sa_getstack(sa);
 2055                 mutex_exit(&sa->sa_mutex);
 2056                 if (sast == NULL) {
 2057                         lwp_lock(l);
 2058                         SA_LWP_STATE_UNLOCK(l, f);
 2059                         lwp_unlock(l);
 2060                         preempt();
 2061                         return;
 2062                 }
 2063                 mutex_enter(&vp->savp_mutex);
 2064         }
 2065         if (SIMPLEQ_EMPTY(&vp->savp_upcalls) &&
 2066             vp->savp_woken_count != 0 && sast != NULL) {
 2067                 /*
 2068                  * Invoke an "unblocked" upcall. We create a message
 2069                  * with the first unblock listed here, and then
 2070                  * string along a number of other unblocked stacks when
 2071                  * we deliver the call.
 2072                  */
 2073                 l2 = TAILQ_FIRST(&vp->savp_woken);
 2074                 TAILQ_REMOVE(&vp->savp_woken, l2, l_sleepchain);
 2075                 vp->savp_woken_count--;
 2076                 mutex_exit(&vp->savp_mutex);
 2077 
 2078                 DPRINTFN(9,("sa_upcall_userret(%d.%d) using stack %p\n",
 2079                     l->l_proc->p_pid, l->l_lid, sast->sast_stack.ss_sp));
 2080 
 2081                 if ((l->l_flag & LW_WEXIT)
 2082                     || (p->p_sflag & (PS_WCORE | PS_WEXIT))) {
 2083                         lwp_exit(l);
 2084                         /* NOTREACHED */
 2085                 }
 2086 
 2087                 DPRINTFN(8,("sa_upcall_userret(%d.%d) unblocking %d\n",
 2088                     p->p_pid, l->l_lid, l2->l_lid));
 2089 
 2090                 sau = sadata_upcall_alloc(1);
 2091                 if ((l->l_flag & LW_WEXIT)
 2092                     || (p->p_sflag & (PS_WCORE | PS_WEXIT))) {
 2093                         sadata_upcall_free(sau);
 2094                         lwp_exit(l);
 2095                         /* NOTREACHED */
 2096                 }
 2097 
 2098                 sa_upcall0(sau, SA_UPCALL_UNBLOCKED, l2, l, 0, NULL, NULL);
 2099                 sau->sau_stack = sast->sast_stack;
 2100                 mutex_enter(&vp->savp_mutex);
 2101                 SIMPLEQ_INSERT_TAIL(&vp->savp_upcalls, sau, sau_next);
 2102                 l2->l_flag &= ~LW_SA_BLOCKING;
 2103 
 2104                 /* Now return l2 to the cache. Mutex already set */
 2105                 sq = &vp->savp_lwpcache;
 2106                 l2->l_wchan = sq;
 2107                 l2->l_wmesg = sa_lwpcache_wmesg;
 2108                 vp->savp_lwpcache_count++;
 2109                 sleepq_insert(sq, l2, &sa_sobj);
 2110                         /* uvm_lwp_hold from sa_unblock_userret */
 2111         } else if (sast)
 2112                 sa_setstackfree(sast, sa);
 2113 
 2114         KASSERT(vp->savp_lwp == l);
 2115 
 2116         while ((sau = SIMPLEQ_FIRST(&vp->savp_upcalls)) != NULL) {
 2117                 SIMPLEQ_REMOVE_HEAD(&vp->savp_upcalls, sau_next);
 2118                 mutex_exit(&vp->savp_mutex);
 2119                 sa_makeupcalls(l, sau);
 2120                 mutex_enter(&vp->savp_mutex);
 2121         }
 2122         mutex_exit(&vp->savp_mutex);
 2123 
 2124         lwp_lock(l);
 2125 
 2126         if (vp->savp_woken_count == 0) {
 2127                 l->l_flag &= ~LW_SA_UPCALL;
 2128         }
 2129 
 2130         lwp_unlock(l);
 2131 
 2132         SA_LWP_STATE_UNLOCK(l, f);
 2133 
 2134         return;
 2135 }
 2136 
 2137 #define SACOPYOUT(sae, type, kp, up) \
 2138         (((sae)->sae_sacopyout != NULL) ? \
 2139         (*(sae)->sae_sacopyout)((type), (kp), (void *)(up)) : \
 2140         copyout((kp), (void *)(up), sizeof(*(kp))))
 2141 
 2142 /*
 2143  * sa_makeupcalls
 2144  *      We're delivering the first upcall on lwp l, so
 2145  * copy everything out. We assigned the stack for this upcall
 2146  * when we enqueued it.
 2147  *
 2148  * SA_LWP_STATE should be locked (LP_SA_NOBLOCK set).
 2149  *
 2150  *      If the enqueued event was DEFERRED, this is the time when we set
 2151  * up the upcall event's state.
 2152  */
 2153 static void
 2154 sa_makeupcalls(struct lwp *l, struct sadata_upcall *sau)
 2155 {
 2156         struct lwp *l2;
 2157         struct proc *p;
 2158         const struct sa_emul *sae;
 2159         struct sadata *sa;
 2160         struct sadata_vp *vp;
 2161         sleepq_t *sq;
 2162         uintptr_t sapp, sap;
 2163         struct sa_t self_sa;
 2164         struct sa_t *sas[3];
 2165         struct sa_t **ksapp = NULL;
 2166         void *stack, *ap;
 2167         union sau_state *e_ss;
 2168         ucontext_t *kup, *up;
 2169         size_t sz, ucsize;
 2170         int i, nint, nevents, type, error;
 2171 
 2172         p = l->l_proc;
 2173         sae = p->p_emul->e_sa;
 2174         sa = p->p_sa;
 2175         vp = l->l_savp;
 2176         ucsize = sae->sae_ucsize;
 2177 
 2178         if (sau->sau_flags & SAU_FLAG_DEFERRED_EVENT)
 2179                 sa_upcall_getstate(&sau->sau_event,
 2180                     sau->sau_event.ss_deferred.ss_lwp, 0);
 2181         if (sau->sau_flags & SAU_FLAG_DEFERRED_INTERRUPTED)
 2182                 sa_upcall_getstate(&sau->sau_interrupted,
 2183                     sau->sau_interrupted.ss_deferred.ss_lwp, 0);
 2184 
 2185 #ifdef __MACHINE_STACK_GROWS_UP
 2186         stack = sau->sau_stack.ss_sp;
 2187 #else
 2188         stack = (char *)sau->sau_stack.ss_sp + sau->sau_stack.ss_size;
 2189 #endif
 2190         stack = STACK_ALIGN(stack, ALIGNBYTES);
 2191 
 2192         self_sa.sa_id = l->l_lid;
 2193         self_sa.sa_cpu = vp->savp_id;
 2194         sas[0] = &self_sa;
 2195         nevents = 0;
 2196         nint = 0;
 2197         if (sau->sau_event.ss_captured.ss_sa.sa_context != NULL) {
 2198                 if (copyout(&sau->sau_event.ss_captured.ss_ctx,
 2199                     sau->sau_event.ss_captured.ss_sa.sa_context,
 2200                     ucsize) != 0) {
 2201                         sigexit(l, SIGILL);
 2202                         /* NOTREACHED */
 2203                 }
 2204                 sas[1] = &sau->sau_event.ss_captured.ss_sa;
 2205                 nevents = 1;
 2206         }
 2207         if (sau->sau_interrupted.ss_captured.ss_sa.sa_context != NULL) {
 2208                 KASSERT(sau->sau_interrupted.ss_captured.ss_sa.sa_context !=
 2209                     sau->sau_event.ss_captured.ss_sa.sa_context);
 2210                 if (copyout(&sau->sau_interrupted.ss_captured.ss_ctx,
 2211                     sau->sau_interrupted.ss_captured.ss_sa.sa_context,
 2212                     ucsize) != 0) {
 2213                         sigexit(l, SIGILL);
 2214                         /* NOTREACHED */
 2215                 }
 2216                 sas[2] = &sau->sau_interrupted.ss_captured.ss_sa;
 2217                 nint = 1;
 2218         }
 2219 #if 0
 2220         /* For now, limit ourselves to one unblock at once. */
 2221         if (sau->sau_type == SA_UPCALL_UNBLOCKED) {
 2222                 mutex_enter(&vp->savp_mutex);
 2223                 nevents += vp->savp_woken_count;
 2224                 mutex_exit(&vp->savp_mutex);
 2225                 /* XXX WRS Need to limit # unblocks we copy out at once! */
 2226         }
 2227 #endif
 2228 
 2229         /* Copy out the activation's ucontext */
 2230         up = (void *)STACK_ALLOC(stack, ucsize);
 2231         stack = STACK_GROW(stack, ucsize);
 2232         kup = kmem_zalloc(sizeof(*kup), KM_SLEEP);
 2233         KASSERT(kup != NULL);
 2234         kup->uc_stack = sau->sau_stack;
 2235         kup->uc_flags = _UC_STACK;
 2236         error = SACOPYOUT(sae, SAOUT_UCONTEXT, kup, up);
 2237         kmem_free(kup, sizeof(*kup));
 2238         if (error) {
 2239                 sadata_upcall_free(sau);
 2240                 sigexit(l, SIGILL);
 2241                 /* NOTREACHED */
 2242         }
 2243         sas[0]->sa_context = up;
 2244 
 2245         /* Next, copy out the sa_t's and pointers to them. */
 2246 
 2247         sz = (1 + nevents + nint) * sae->sae_sasize;
 2248         sap = (uintptr_t)STACK_ALLOC(stack, sz);
 2249         sap += sz;
 2250         stack = STACK_GROW(stack, sz);
 2251 
 2252         sz = (1 + nevents + nint) * sae->sae_sapsize;
 2253         sapp = (uintptr_t)STACK_ALLOC(stack, sz);
 2254         sapp += sz;
 2255         stack = STACK_GROW(stack, sz);
 2256 
 2257         if (KTRPOINT(p, KTR_SAUPCALL))
 2258                 ksapp = kmem_alloc(sizeof(struct sa_t *) * (nevents + nint + 1),
 2259                     KM_SLEEP);
 2260 
 2261         KASSERT(nint <= 1);
 2262         e_ss = NULL;
 2263         for (i = nevents + nint; i >= 0; i--) {
 2264                 struct sa_t *sasp;
 2265 
 2266                 sap -= sae->sae_sasize;
 2267                 sapp -= sae->sae_sapsize;
 2268                 error = 0;
 2269                 if (i == 1 + nevents)   /* interrupted sa */
 2270                         sasp = sas[2];
 2271                 else if (i <= 1)        /* self_sa and event sa */
 2272                         sasp = sas[i];
 2273                 else {                  /* extra sas */
 2274                         KASSERT(sau->sau_type == SA_UPCALL_UNBLOCKED);
 2275 
 2276                         if (e_ss == NULL) {
 2277                                 e_ss = kmem_alloc(sizeof(*e_ss), KM_SLEEP);
 2278                         }
 2279                         /* Lock vp and all savp_woken lwps */
 2280                         mutex_enter(&vp->savp_mutex);
 2281                         sq = &vp->savp_woken;
 2282                         KASSERT(vp->savp_woken_count > 0);
 2283                         l2 = TAILQ_FIRST(sq);
 2284                         KASSERT(l2 != NULL);
 2285                         TAILQ_REMOVE(sq, l2, l_sleepchain);
 2286                         vp->savp_woken_count--;
 2287 
 2288                         DPRINTFN(8,
 2289                             ("sa_makeupcalls(%d.%d) unblocking extra %d\n",
 2290                             p->p_pid, l->l_lid, l2->l_lid));
 2291                         /*
 2292                          * Since l2 was on savp_woken, we locked it when
 2293                          * we locked savp_mutex
 2294                          */
 2295                         sa_upcall_getstate(e_ss, l2, 1);
 2296                         l2->l_flag &= ~LW_SA_BLOCKING;
 2297 
 2298                         /* Now return l2 to the cache. Mutex already set */
 2299                         sq = &vp->savp_lwpcache;
 2300                         l2->l_wchan = sq;
 2301                         l2->l_wmesg = sa_lwpcache_wmesg;
 2302                         vp->savp_lwpcache_count++;
 2303                         sleepq_insert(sq, l2, &sa_sobj);
 2304                                 /* uvm_lwp_hold from sa_unblock_userret */
 2305                         mutex_exit(&vp->savp_mutex);
 2306 
 2307                         error = copyout(&e_ss->ss_captured.ss_ctx,
 2308                             e_ss->ss_captured.ss_sa.sa_context, ucsize);
 2309                         sasp = &e_ss->ss_captured.ss_sa;
 2310                 }
 2311                 if (error != 0 ||
 2312                     SACOPYOUT(sae, SAOUT_SA_T, sasp, sap) ||
 2313                     SACOPYOUT(sae, SAOUT_SAP_T, &sap, sapp)) {
 2314                         /* Copying onto the stack didn't work. Die. */
 2315                         sadata_upcall_free(sau);
 2316                         if (e_ss != NULL) {
 2317                                 kmem_free(e_ss, sizeof(*e_ss));
 2318                         }
 2319                         goto fail;
 2320                 }
 2321                 if (KTRPOINT(p, KTR_SAUPCALL))
 2322                         ksapp[i] = sasp;
 2323         }
 2324         if (e_ss != NULL) {
 2325                 kmem_free(e_ss, sizeof(*e_ss));
 2326         }
 2327 
 2328         /* Copy out the arg, if any */
 2329         /* xxx assume alignment works out; everything so far has been
 2330          * a structure, so...
 2331          */
 2332         if (sau->sau_arg) {
 2333                 ap = STACK_ALLOC(stack, sau->sau_argsize);
 2334                 stack = STACK_GROW(stack, sau->sau_argsize);
 2335                 if (copyout(sau->sau_arg, ap, sau->sau_argsize) != 0) {
 2336                         /* Copying onto the stack didn't work. Die. */
 2337                         sadata_upcall_free(sau);
 2338                         goto fail;
 2339                 }
 2340         } else {
 2341                 ap = NULL;
 2342 #ifdef __hppa__
 2343                 stack = STACK_ALIGN(stack, HPPA_FRAME_SIZE);
 2344 #endif
 2345         }
 2346         type = sau->sau_type;
 2347 
 2348         if (vp->savp_sleeper_upcall == NULL)
 2349                 vp->savp_sleeper_upcall = sau;
 2350         else
 2351                 sadata_upcall_free(sau);
 2352 
 2353         DPRINTFN(7,("sa_makeupcalls(%d.%d): type %d\n", p->p_pid,
 2354             l->l_lid, type));
 2355 
 2356         if (KTRPOINT(p, KTR_SAUPCALL)) {
 2357                 ktrsaupcall(l, type, nevents, nint, (void *)sapp, ap, ksapp);
 2358                 kmem_free(ksapp, sizeof(struct sa_t *) * (nevents + nint + 1));
 2359         }
 2360 
 2361         (*sae->sae_upcall)(l, type, nevents, nint, (void *)sapp, ap, stack,
 2362             sa->sa_upcall);
 2363 
 2364         lwp_lock(l);
 2365         l->l_flag &= ~LW_SA_YIELD;
 2366         lwp_unlock(l);
 2367         return;
 2368 
 2369 fail:
 2370         if (KTRPOINT(p, KTR_SAUPCALL))
 2371                 kmem_free(ksapp, sizeof(struct sa_t) * (nevents + nint + 1));
 2372         sigexit(l, SIGILL);
 2373         /* NOTREACHED */
 2374 }
 2375 
 2376 /*
 2377  * sa_unblock_userret:
 2378  *
 2379  *      Our lwp is in the process of returning to userland, and
 2380  * userret noticed LW_SA_BLOCKING is set for us. This indicates that
 2381  * we were at one time the blessed lwp for our vp and we blocked.
 2382  * An upcall was delivered to our process indicating that we blocked.
 2383  *      Since then, we have unblocked in the kernel, and proceeded
 2384  * to finish whatever work needed to be done. For instance, pages
 2385  * have been faulted in for a trap or system call results have been
 2386  * saved out for a systemcall.
 2387  *      We now need to simultaneously do two things. First, we have to
 2388  * cause an UNBLOCKED upcall to be generated. Second, we actually
 2389  * have to STOP executing. When the blocked upcall was generated, a
 2390  * new lwp was given to our application. Thus if we simply returned,
 2391  * we would be exceeding our concurrency.
 2392  *       So we put ourself on our vp's savp_woken list and take
 2393  * steps to make sure the blessed lwp will notice us. Note: we maintain
 2394  * loose concurrency controls, so the blessed lwp for our vp could in
 2395  * fact be running on another cpu in the system.
 2396  */
 2397 void
 2398 sa_unblock_userret(struct lwp *l)
 2399 {
 2400         struct lwp *l2, *vp_lwp;
 2401         struct proc *p;
 2402         struct sadata *sa;
 2403         struct sadata_vp *vp;
 2404         int swapper;
 2405 
 2406         p = l->l_proc;
 2407         sa = p->p_sa;
 2408         vp = l->l_savp;
 2409 
 2410         if ((l->l_flag & LW_WEXIT) || (p->p_sflag & (PS_WCORE | PS_WEXIT)))
 2411                 return;
 2412 
 2413         if ((l->l_flag & LW_SA_BLOCKING) == 0)
 2414                 return;
 2415 
 2416         DPRINTFN(7,("sa_unblock_userret(%d.%d %x) \n", p->p_pid, l->l_lid,
 2417             l->l_flag));
 2418 
 2419         p = l->l_proc;
 2420         sa = p->p_sa;
 2421         vp = l->l_savp;
 2422         vp_lwp = vp->savp_lwp;
 2423         l2 = NULL;
 2424         swapper = 0;
 2425 
 2426         KASSERT(vp_lwp != NULL);
 2427         DPRINTFN(3,("sa_unblock_userret(%d.%d) woken, flags %x, vp %d\n",
 2428                      l->l_proc->p_pid, l->l_lid, l->l_flag,
 2429                      vp_lwp->l_lid));
 2430 
 2431 #if notyet
 2432         if (vp_lwp->l_flag & LW_SA_IDLE) {
 2433                 KASSERT((vp_lwp->l_flag & LW_SA_UPCALL) == 0);
 2434                 KASSERT(vp->savp_wokenq_head == NULL);
 2435                 DPRINTFN(3,
 2436                     ("sa_unblock_userret(%d.%d) repossess: idle vp_lwp %d state %d\n",
 2437                     l->l_proc->p_pid, l->l_lid,
 2438                     vp_lwp->l_lid, vp_lwp->l_stat));
 2439                 vp_lwp->l_flag &= ~LW_SA_IDLE;
 2440                 uvm_lwp_rele(l);
 2441                 return;
 2442         }
 2443 #endif
 2444 
 2445         DPRINTFN(3,(
 2446             "sa_unblock_userret(%d.%d) put on wokenq: vp_lwp %d state %d flags %x\n",
 2447                      l->l_proc->p_pid, l->l_lid, vp_lwp->l_lid,
 2448                      vp_lwp->l_stat, vp_lwp->l_flag));
 2449 
 2450         lwp_lock(vp_lwp);
 2451 
 2452         if (!mutex_tryenter(&vp->savp_mutex)) {
 2453                 lwp_unlock(vp_lwp);
 2454                 mutex_enter(&vp->savp_mutex);
 2455                 /* savp_lwp may have changed. We'll be ok even if it did */
 2456                 vp_lwp = vp->savp_lwp;
 2457                 lwp_lock(vp_lwp);
 2458         }
 2459         
 2460 
 2461         switch (vp_lwp->l_stat) {
 2462         case LSONPROC:
 2463                 if (vp_lwp->l_flag & LW_SA_UPCALL)
 2464                         break;
 2465                 vp_lwp->l_flag |= LW_SA_UPCALL;
 2466                 if (vp_lwp->l_flag & LW_SA_YIELD)
 2467                         break;
 2468                 spc_lock(vp_lwp->l_cpu);
 2469                 cpu_need_resched(vp_lwp->l_cpu, RESCHED_IMMED);
 2470                 spc_unlock(vp_lwp->l_cpu);
 2471                 break;
 2472         case LSSLEEP:
 2473                 if (vp_lwp->l_flag & LW_SA_IDLE) {
 2474                         vp_lwp->l_flag &= ~(LW_SA_IDLE|LW_SA_YIELD|LW_SINTR);
 2475                         vp_lwp->l_flag |= LW_SA_UPCALL;
 2476                         /* lwp_unsleep() will unlock the LWP */
 2477                         lwp_unsleep(vp_lwp, true);
 2478                         DPRINTFN(3,(
 2479                             "sa_unblock_userret(%d.%d) woke vp: %d state %d\n",
 2480                              l->l_proc->p_pid, l->l_lid, vp_lwp->l_lid,
 2481                              vp_lwp->l_stat));
 2482                         vp_lwp = NULL;
 2483                         break;
 2484                 }
 2485                 vp_lwp->l_flag |= LW_SA_UPCALL;
 2486                 break;
 2487         case LSSUSPENDED:
 2488                 break;
 2489         case LSSTOP:
 2490                 vp_lwp->l_flag |= LW_SA_UPCALL;
 2491                 break;
 2492         case LSRUN:
 2493                 if (vp_lwp->l_flag & LW_SA_UPCALL)
 2494                         break;
 2495                 vp_lwp->l_flag |= LW_SA_UPCALL;
 2496                 if (vp_lwp->l_flag & LW_SA_YIELD)
 2497                         break;
 2498 #if 0
 2499                 if (vp_lwp->l_slptime > 1) {
 2500                         void updatepri(struct lwp *);
 2501                         updatepri(vp_lwp);
 2502                 }
 2503 #endif
 2504                 vp_lwp->l_slptime = 0;
 2505                 if (vp_lwp->l_flag & LW_INMEM) {
 2506                         if (vp_lwp->l_cpu == curcpu())
 2507                                 l2 = vp_lwp;
 2508                         else {
 2509                                 /*
 2510                                  * don't need to spc_lock the other cpu
 2511                                  * as runable lwps have the cpu as their
 2512                                  * mutex.
 2513                                  */
 2514                                 /* spc_lock(vp_lwp->l_cpu); */
 2515                                 cpu_need_resched(vp_lwp->l_cpu, 0);
 2516                                 /* spc_unlock(vp_lwp->l_cpu); */
 2517                         }
 2518                 } else
 2519                         swapper = 1;
 2520                 break;
 2521         default:
 2522                 panic("sa_vp LWP not sleeping/onproc/runnable");
 2523         }
 2524 
 2525         if (vp_lwp != NULL)
 2526                 lwp_unlock(vp_lwp);
 2527 
 2528         if (swapper)
 2529                 wakeup(&proc0);
 2530 
 2531         /*
 2532          * Add ourselves to the savp_woken queue. Still on p_lwps.
 2533          *
 2534          * We now don't unlock savp_mutex since it now is l's mutex,
 2535          * and it will be released in mi_switch().
 2536          */
 2537         sleepq_enter(&vp->savp_woken, l, &vp->savp_mutex);
 2538         sleepq_enqueue(&vp->savp_woken, &vp->savp_woken, sa_lwpwoken_wmesg,
 2539             &sa_sobj);
 2540         uvm_lwp_hold(l);
 2541         vp->savp_woken_count++;
 2542         //l->l_stat = LSSUSPENDED;
 2543         mi_switch(l);
 2544 
 2545         /*
 2546          * We suspended ourself and put ourself on the savp_woken
 2547          * list. The only way we come back from mi_switch() to this
 2548          * routine is if we were put back on the run queues, which only
 2549          * happens if the process is exiting. So just exit.
 2550          *
 2551          * In the normal lwp lifecycle, cpu_setfunc() will make this lwp
 2552          * run in a different routine by the time we next run.
 2553          */
 2554         lwp_exit(l);
 2555         /* NOTREACHED */
 2556 }
 2557 
 2558 
 2559 
 2560 #ifdef DEBUG
 2561 int debug_print_sa(struct proc *);
 2562 int debug_print_proc(int);
 2563 
 2564 int
 2565 debug_print_proc(int pid)
 2566 {
 2567         struct proc *p;
 2568 
 2569         p = pfind(pid);
 2570         if (p == NULL)
 2571                 printf("No process %d\n", pid);
 2572         else
 2573                 debug_print_sa(p);
 2574 
 2575         return 0;
 2576 }
 2577 
 2578 int
 2579 debug_print_sa(struct proc *p)
 2580 {
 2581         struct sadata *sa;
 2582         struct sadata_vp *vp;
 2583 
 2584         printf("Process %d (%s), state %d, address %p, flags %x\n",
 2585             p->p_pid, p->p_comm, p->p_stat, p, p->p_sflag);
 2586         printf("LWPs: %d (%d running, %d zombies)\n", p->p_nlwps, p->p_nrlwps,
 2587             p->p_nzlwps);
 2588         sa = p->p_sa;
 2589         if (sa) {
 2590                 SLIST_FOREACH(vp, &sa->sa_vps, savp_next) {
 2591                         if (vp->savp_lwp)
 2592                                 printf("SA VP: %d %s\n", vp->savp_lwp->l_lid,
 2593                                     vp->savp_lwp->l_flag & LW_SA_YIELD ?
 2594                                     (vp->savp_lwp->l_flag & LW_SA_IDLE ?
 2595                                         "idle" : "yielding") : "");
 2596                         printf("SAs: %d cached LWPs\n",
 2597                                         vp->savp_lwpcache_count);
 2598                         printf("SAs: %d woken LWPs\n",
 2599                                         vp->savp_woken_count);
 2600                 }
 2601         }
 2602 
 2603         return 0;
 2604 }
 2605 
 2606 #endif
 2607 
 2608 #endif /* KERN_SA */
Cache object: 62514edef2469eb67e6db07ffc88659f
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/compat/sa/compat_sa.c

FreeBSD/Linux Kernel Cross Reference
sys/compat/sa/compat_sa.c