The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
    3  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice unmodified, this list of conditions, and the following
   11  *    disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 
   31 #include "opt_compat.h"
   32 #include "opt_umtx_profiling.h"
   33 
   34 #include <sys/param.h>
   35 #include <sys/kernel.h>
   36 #include <sys/limits.h>
   37 #include <sys/lock.h>
   38 #include <sys/malloc.h>
   39 #include <sys/mutex.h>
   40 #include <sys/priv.h>
   41 #include <sys/proc.h>
   42 #include <sys/sched.h>
   43 #include <sys/smp.h>
   44 #include <sys/sysctl.h>
   45 #include <sys/sysent.h>
   46 #include <sys/systm.h>
   47 #include <sys/sysproto.h>
   48 #include <sys/syscallsubr.h>
   49 #include <sys/eventhandler.h>
   50 #include <sys/umtx.h>
   51 
   52 #include <vm/vm.h>
   53 #include <vm/vm_param.h>
   54 #include <vm/pmap.h>
   55 #include <vm/vm_map.h>
   56 #include <vm/vm_object.h>
   57 
   58 #include <machine/cpu.h>
   59 
   60 #ifdef COMPAT_FREEBSD32
   61 #include <compat/freebsd32/freebsd32_proto.h>
   62 #endif
   63 
   64 #define _UMUTEX_TRY             1
   65 #define _UMUTEX_WAIT            2
   66 
   67 /* Priority inheritance mutex info. */
   68 struct umtx_pi {
   69         /* Owner thread */
   70         struct thread           *pi_owner;
   71 
   72         /* Reference count */
   73         int                     pi_refcount;
   74 
   75         /* List entry to link umtx holding by thread */
   76         TAILQ_ENTRY(umtx_pi)    pi_link;
   77 
   78         /* List entry in hash */
   79         TAILQ_ENTRY(umtx_pi)    pi_hashlink;
   80 
   81         /* List for waiters */
   82         TAILQ_HEAD(,umtx_q)     pi_blocked;
   83 
   84         /* Identify a userland lock object */
   85         struct umtx_key         pi_key;
   86 };
   87 
   88 /* A userland synchronous object user. */
   89 struct umtx_q {
   90         /* Linked list for the hash. */
   91         TAILQ_ENTRY(umtx_q)     uq_link;
   92 
   93         /* Umtx key. */
   94         struct umtx_key         uq_key;
   95 
   96         /* Umtx flags. */
   97         int                     uq_flags;
   98 #define UQF_UMTXQ       0x0001
   99 
  100         /* The thread waits on. */
  101         struct thread           *uq_thread;
  102 
  103         /*
  104          * Blocked on PI mutex. read can use chain lock
  105          * or umtx_lock, write must have both chain lock and
  106          * umtx_lock being hold.
  107          */
  108         struct umtx_pi          *uq_pi_blocked;
  109 
  110         /* On blocked list */
  111         TAILQ_ENTRY(umtx_q)     uq_lockq;
  112 
  113         /* Thread contending with us */
  114         TAILQ_HEAD(,umtx_pi)    uq_pi_contested;
  115 
  116         /* Inherited priority from PP mutex */
  117         u_char                  uq_inherited_pri;
  118         
  119         /* Spare queue ready to be reused */
  120         struct umtxq_queue      *uq_spare_queue;
  121 
  122         /* The queue we on */
  123         struct umtxq_queue      *uq_cur_queue;
  124 };
  125 
  126 TAILQ_HEAD(umtxq_head, umtx_q);
  127 
  128 /* Per-key wait-queue */
  129 struct umtxq_queue {
  130         struct umtxq_head       head;
  131         struct umtx_key         key;
  132         LIST_ENTRY(umtxq_queue) link;
  133         int                     length;
  134 };
  135 
  136 LIST_HEAD(umtxq_list, umtxq_queue);
  137 
  138 /* Userland lock object's wait-queue chain */
  139 struct umtxq_chain {
  140         /* Lock for this chain. */
  141         struct mtx              uc_lock;
  142 
  143         /* List of sleep queues. */
  144         struct umtxq_list       uc_queue[2];
  145 #define UMTX_SHARED_QUEUE       0
  146 #define UMTX_EXCLUSIVE_QUEUE    1
  147 
  148         LIST_HEAD(, umtxq_queue) uc_spare_queue;
  149 
  150         /* Busy flag */
  151         char                    uc_busy;
  152 
  153         /* Chain lock waiters */
  154         int                     uc_waiters;
  155 
  156         /* All PI in the list */
  157         TAILQ_HEAD(,umtx_pi)    uc_pi_list;
  158 
  159 #ifdef UMTX_PROFILING
  160         int                     length;
  161         int                     max_length;
  162 #endif
  163 };
  164 
  165 #define UMTXQ_LOCKED_ASSERT(uc)         mtx_assert(&(uc)->uc_lock, MA_OWNED)
  166 #define UMTXQ_BUSY_ASSERT(uc)   KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
  167 
  168 /*
  169  * Don't propagate time-sharing priority, there is a security reason,
  170  * a user can simply introduce PI-mutex, let thread A lock the mutex,
  171  * and let another thread B block on the mutex, because B is
  172  * sleeping, its priority will be boosted, this causes A's priority to
  173  * be boosted via priority propagating too and will never be lowered even
  174  * if it is using 100%CPU, this is unfair to other processes.
  175  */
  176 
  177 #define UPRI(td)        (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
  178                           (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
  179                          PRI_MAX_TIMESHARE : (td)->td_user_pri)
  180 
  181 #define GOLDEN_RATIO_PRIME      2654404609U
  182 #define UMTX_CHAINS             512
  183 #define UMTX_SHIFTS             (__WORD_BIT - 9)
  184 
  185 #define GET_SHARE(flags)        \
  186     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
  187 
  188 #define BUSY_SPINS              200
  189 
  190 static uma_zone_t               umtx_pi_zone;
  191 static struct umtxq_chain       umtxq_chains[2][UMTX_CHAINS];
  192 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
  193 static int                      umtx_pi_allocated;
  194 
  195 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
  196 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
  197     &umtx_pi_allocated, 0, "Allocated umtx_pi");
  198 
  199 #ifdef UMTX_PROFILING
  200 static long max_length;
  201 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
  202 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
  203 #endif
  204 
  205 static void umtxq_sysinit(void *);
  206 static void umtxq_hash(struct umtx_key *key);
  207 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
  208 static void umtxq_lock(struct umtx_key *key);
  209 static void umtxq_unlock(struct umtx_key *key);
  210 static void umtxq_busy(struct umtx_key *key);
  211 static void umtxq_unbusy(struct umtx_key *key);
  212 static void umtxq_insert_queue(struct umtx_q *uq, int q);
  213 static void umtxq_remove_queue(struct umtx_q *uq, int q);
  214 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
  215 static int umtxq_count(struct umtx_key *key);
  216 static struct umtx_pi *umtx_pi_alloc(int);
  217 static void umtx_pi_free(struct umtx_pi *pi);
  218 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
  219 static void umtx_thread_cleanup(struct thread *td);
  220 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
  221         struct image_params *imgp __unused);
  222 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
  223 
  224 #define umtxq_signal(key, nwake)        umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
  225 #define umtxq_insert(uq)        umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
  226 #define umtxq_remove(uq)        umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
  227 
  228 static struct mtx umtx_lock;
  229 
  230 #ifdef UMTX_PROFILING
  231 static void
  232 umtx_init_profiling(void) 
  233 {
  234         struct sysctl_oid *chain_oid;
  235         char chain_name[10];
  236         int i;
  237 
  238         for (i = 0; i < UMTX_CHAINS; ++i) {
  239                 snprintf(chain_name, sizeof(chain_name), "%d", i);
  240                 chain_oid = SYSCTL_ADD_NODE(NULL, 
  241                     SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 
  242                     chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
  243                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  244                     "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
  245                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  246                     "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
  247         }
  248 }
  249 #endif
  250 
  251 static void
  252 umtxq_sysinit(void *arg __unused)
  253 {
  254         int i, j;
  255 
  256         umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
  257                 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  258         for (i = 0; i < 2; ++i) {
  259                 for (j = 0; j < UMTX_CHAINS; ++j) {
  260                         mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
  261                                  MTX_DEF | MTX_DUPOK);
  262                         LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
  263                         LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
  264                         LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
  265                         TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
  266                         umtxq_chains[i][j].uc_busy = 0;
  267                         umtxq_chains[i][j].uc_waiters = 0;
  268 #ifdef UMTX_PROFILING
  269                         umtxq_chains[i][j].length = 0;
  270                         umtxq_chains[i][j].max_length = 0;      
  271 #endif
  272                 }
  273         }
  274 #ifdef UMTX_PROFILING
  275         umtx_init_profiling();
  276 #endif
  277         mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
  278         EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
  279             EVENTHANDLER_PRI_ANY);
  280 }
  281 
  282 struct umtx_q *
  283 umtxq_alloc(void)
  284 {
  285         struct umtx_q *uq;
  286 
  287         uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
  288         uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
  289         TAILQ_INIT(&uq->uq_spare_queue->head);
  290         TAILQ_INIT(&uq->uq_pi_contested);
  291         uq->uq_inherited_pri = PRI_MAX;
  292         return (uq);
  293 }
  294 
  295 void
  296 umtxq_free(struct umtx_q *uq)
  297 {
  298         MPASS(uq->uq_spare_queue != NULL);
  299         free(uq->uq_spare_queue, M_UMTX);
  300         free(uq, M_UMTX);
  301 }
  302 
  303 static inline void
  304 umtxq_hash(struct umtx_key *key)
  305 {
  306         unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
  307         key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
  308 }
  309 
  310 static inline struct umtxq_chain *
  311 umtxq_getchain(struct umtx_key *key)
  312 {
  313         if (key->type <= TYPE_SEM)
  314                 return (&umtxq_chains[1][key->hash]);
  315         return (&umtxq_chains[0][key->hash]);
  316 }
  317 
  318 /*
  319  * Lock a chain.
  320  */
  321 static inline void
  322 umtxq_lock(struct umtx_key *key)
  323 {
  324         struct umtxq_chain *uc;
  325 
  326         uc = umtxq_getchain(key);
  327         mtx_lock(&uc->uc_lock);
  328 }
  329 
  330 /*
  331  * Unlock a chain.
  332  */
  333 static inline void
  334 umtxq_unlock(struct umtx_key *key)
  335 {
  336         struct umtxq_chain *uc;
  337 
  338         uc = umtxq_getchain(key);
  339         mtx_unlock(&uc->uc_lock);
  340 }
  341 
  342 /*
  343  * Set chain to busy state when following operation
  344  * may be blocked (kernel mutex can not be used).
  345  */
  346 static inline void
  347 umtxq_busy(struct umtx_key *key)
  348 {
  349         struct umtxq_chain *uc;
  350 
  351         uc = umtxq_getchain(key);
  352         mtx_assert(&uc->uc_lock, MA_OWNED);
  353         if (uc->uc_busy) {
  354 #ifdef SMP
  355                 if (smp_cpus > 1) {
  356                         int count = BUSY_SPINS;
  357                         if (count > 0) {
  358                                 umtxq_unlock(key);
  359                                 while (uc->uc_busy && --count > 0)
  360                                         cpu_spinwait();
  361                                 umtxq_lock(key);
  362                         }
  363                 }
  364 #endif
  365                 while (uc->uc_busy) {
  366                         uc->uc_waiters++;
  367                         msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
  368                         uc->uc_waiters--;
  369                 }
  370         }
  371         uc->uc_busy = 1;
  372 }
  373 
  374 /*
  375  * Unbusy a chain.
  376  */
  377 static inline void
  378 umtxq_unbusy(struct umtx_key *key)
  379 {
  380         struct umtxq_chain *uc;
  381 
  382         uc = umtxq_getchain(key);
  383         mtx_assert(&uc->uc_lock, MA_OWNED);
  384         KASSERT(uc->uc_busy != 0, ("not busy"));
  385         uc->uc_busy = 0;
  386         if (uc->uc_waiters)
  387                 wakeup_one(uc);
  388 }
  389 
  390 static struct umtxq_queue *
  391 umtxq_queue_lookup(struct umtx_key *key, int q)
  392 {
  393         struct umtxq_queue *uh;
  394         struct umtxq_chain *uc;
  395 
  396         uc = umtxq_getchain(key);
  397         UMTXQ_LOCKED_ASSERT(uc);
  398         LIST_FOREACH(uh, &uc->uc_queue[q], link) {
  399                 if (umtx_key_match(&uh->key, key))
  400                         return (uh);
  401         }
  402 
  403         return (NULL);
  404 }
  405 
  406 static inline void
  407 umtxq_insert_queue(struct umtx_q *uq, int q)
  408 {
  409         struct umtxq_queue *uh;
  410         struct umtxq_chain *uc;
  411 
  412         uc = umtxq_getchain(&uq->uq_key);
  413         UMTXQ_LOCKED_ASSERT(uc);
  414         KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
  415         uh = umtxq_queue_lookup(&uq->uq_key, q);
  416         if (uh != NULL) {
  417                 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
  418         } else {
  419                 uh = uq->uq_spare_queue;
  420                 uh->key = uq->uq_key;
  421                 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
  422         }
  423         uq->uq_spare_queue = NULL;
  424 
  425         TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
  426         uh->length++;
  427 #ifdef UMTX_PROFILING
  428         uc->length++;
  429         if (uc->length > uc->max_length) {
  430                 uc->max_length = uc->length;
  431                 if (uc->max_length > max_length)
  432                         max_length = uc->max_length;    
  433         }
  434 #endif
  435         uq->uq_flags |= UQF_UMTXQ;
  436         uq->uq_cur_queue = uh;
  437         return;
  438 }
  439 
  440 static inline void
  441 umtxq_remove_queue(struct umtx_q *uq, int q)
  442 {
  443         struct umtxq_chain *uc;
  444         struct umtxq_queue *uh;
  445 
  446         uc = umtxq_getchain(&uq->uq_key);
  447         UMTXQ_LOCKED_ASSERT(uc);
  448         if (uq->uq_flags & UQF_UMTXQ) {
  449                 uh = uq->uq_cur_queue;
  450                 TAILQ_REMOVE(&uh->head, uq, uq_link);
  451                 uh->length--;
  452 #ifdef UMTX_PROFILING
  453                 uc->length--;
  454 #endif
  455                 uq->uq_flags &= ~UQF_UMTXQ;
  456                 if (TAILQ_EMPTY(&uh->head)) {
  457                         KASSERT(uh->length == 0,
  458                             ("inconsistent umtxq_queue length"));
  459                         LIST_REMOVE(uh, link);
  460                 } else {
  461                         uh = LIST_FIRST(&uc->uc_spare_queue);
  462                         KASSERT(uh != NULL, ("uc_spare_queue is empty"));
  463                         LIST_REMOVE(uh, link);
  464                 }
  465                 uq->uq_spare_queue = uh;
  466                 uq->uq_cur_queue = NULL;
  467         }
  468 }
  469 
  470 /*
  471  * Check if there are multiple waiters
  472  */
  473 static int
  474 umtxq_count(struct umtx_key *key)
  475 {
  476         struct umtxq_chain *uc;
  477         struct umtxq_queue *uh;
  478 
  479         uc = umtxq_getchain(key);
  480         UMTXQ_LOCKED_ASSERT(uc);
  481         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  482         if (uh != NULL)
  483                 return (uh->length);
  484         return (0);
  485 }
  486 
  487 /*
  488  * Check if there are multiple PI waiters and returns first
  489  * waiter.
  490  */
  491 static int
  492 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
  493 {
  494         struct umtxq_chain *uc;
  495         struct umtxq_queue *uh;
  496 
  497         *first = NULL;
  498         uc = umtxq_getchain(key);
  499         UMTXQ_LOCKED_ASSERT(uc);
  500         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  501         if (uh != NULL) {
  502                 *first = TAILQ_FIRST(&uh->head);
  503                 return (uh->length);
  504         }
  505         return (0);
  506 }
  507 
  508 static int
  509 umtxq_check_susp(struct thread *td)
  510 {
  511         struct proc *p;
  512         int error;
  513 
  514         /*
  515          * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
  516          * eventually break the lockstep loop.
  517          */
  518         if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
  519                 return (0);
  520         error = 0;
  521         p = td->td_proc;
  522         PROC_LOCK(p);
  523         if (P_SHOULDSTOP(p) ||
  524             ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
  525                 if (p->p_flag & P_SINGLE_EXIT)
  526                         error = EINTR;
  527                 else
  528                         error = ERESTART;
  529         }
  530         PROC_UNLOCK(p);
  531         return (error);
  532 }
  533 
  534 /*
  535  * Wake up threads waiting on an userland object.
  536  */
  537 
  538 static int
  539 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
  540 {
  541         struct umtxq_chain *uc;
  542         struct umtxq_queue *uh;
  543         struct umtx_q *uq;
  544         int ret;
  545 
  546         ret = 0;
  547         uc = umtxq_getchain(key);
  548         UMTXQ_LOCKED_ASSERT(uc);
  549         uh = umtxq_queue_lookup(key, q);
  550         if (uh != NULL) {
  551                 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
  552                         umtxq_remove_queue(uq, q);
  553                         wakeup(uq);
  554                         if (++ret >= n_wake)
  555                                 return (ret);
  556                 }
  557         }
  558         return (ret);
  559 }
  560 
  561 
  562 /*
  563  * Wake up specified thread.
  564  */
  565 static inline void
  566 umtxq_signal_thread(struct umtx_q *uq)
  567 {
  568         struct umtxq_chain *uc;
  569 
  570         uc = umtxq_getchain(&uq->uq_key);
  571         UMTXQ_LOCKED_ASSERT(uc);
  572         umtxq_remove(uq);
  573         wakeup(uq);
  574 }
  575 
  576 /*
  577  * Put thread into sleep state, before sleeping, check if
  578  * thread was removed from umtx queue.
  579  */
  580 static inline int
  581 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
  582 {
  583         struct umtxq_chain *uc;
  584         int error;
  585 
  586         uc = umtxq_getchain(&uq->uq_key);
  587         UMTXQ_LOCKED_ASSERT(uc);
  588         if (!(uq->uq_flags & UQF_UMTXQ))
  589                 return (0);
  590         error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
  591         if (error == EWOULDBLOCK)
  592                 error = ETIMEDOUT;
  593         return (error);
  594 }
  595 
  596 /*
  597  * Convert userspace address into unique logical address.
  598  */
  599 int
  600 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
  601 {
  602         struct thread *td = curthread;
  603         vm_map_t map;
  604         vm_map_entry_t entry;
  605         vm_pindex_t pindex;
  606         vm_prot_t prot;
  607         boolean_t wired;
  608 
  609         key->type = type;
  610         if (share == THREAD_SHARE) {
  611                 key->shared = 0;
  612                 key->info.private.vs = td->td_proc->p_vmspace;
  613                 key->info.private.addr = (uintptr_t)addr;
  614         } else {
  615                 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
  616                 map = &td->td_proc->p_vmspace->vm_map;
  617                 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
  618                     &entry, &key->info.shared.object, &pindex, &prot,
  619                     &wired) != KERN_SUCCESS) {
  620                         return EFAULT;
  621                 }
  622 
  623                 if ((share == PROCESS_SHARE) ||
  624                     (share == AUTO_SHARE &&
  625                      VM_INHERIT_SHARE == entry->inheritance)) {
  626                         key->shared = 1;
  627                         key->info.shared.offset = entry->offset + entry->start -
  628                                 (vm_offset_t)addr;
  629                         vm_object_reference(key->info.shared.object);
  630                 } else {
  631                         key->shared = 0;
  632                         key->info.private.vs = td->td_proc->p_vmspace;
  633                         key->info.private.addr = (uintptr_t)addr;
  634                 }
  635                 vm_map_lookup_done(map, entry);
  636         }
  637 
  638         umtxq_hash(key);
  639         return (0);
  640 }
  641 
  642 /*
  643  * Release key.
  644  */
  645 void
  646 umtx_key_release(struct umtx_key *key)
  647 {
  648         if (key->shared)
  649                 vm_object_deallocate(key->info.shared.object);
  650 }
  651 
  652 /*
  653  * Lock a umtx object.
  654  */
  655 static int
  656 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
  657 {
  658         struct umtx_q *uq;
  659         u_long owner;
  660         u_long old;
  661         int error = 0;
  662 
  663         uq = td->td_umtxq;
  664 
  665         /*
  666          * Care must be exercised when dealing with umtx structure. It
  667          * can fault on any access.
  668          */
  669         for (;;) {
  670                 /*
  671                  * Try the uncontested case.  This should be done in userland.
  672                  */
  673                 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
  674 
  675                 /* The acquire succeeded. */
  676                 if (owner == UMTX_UNOWNED)
  677                         return (0);
  678 
  679                 /* The address was invalid. */
  680                 if (owner == -1)
  681                         return (EFAULT);
  682 
  683                 /* If no one owns it but it is contested try to acquire it. */
  684                 if (owner == UMTX_CONTESTED) {
  685                         owner = casuword(&umtx->u_owner,
  686                             UMTX_CONTESTED, id | UMTX_CONTESTED);
  687 
  688                         if (owner == UMTX_CONTESTED)
  689                                 return (0);
  690 
  691                         /* The address was invalid. */
  692                         if (owner == -1)
  693                                 return (EFAULT);
  694 
  695                         error = umtxq_check_susp(td);
  696                         if (error != 0)
  697                                 break;
  698 
  699                         /* If this failed the lock has changed, restart. */
  700                         continue;
  701                 }
  702 
  703                 /*
  704                  * If we caught a signal, we have retried and now
  705                  * exit immediately.
  706                  */
  707                 if (error != 0)
  708                         return (error);
  709 
  710                 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
  711                         AUTO_SHARE, &uq->uq_key)) != 0)
  712                         return (error);
  713 
  714                 umtxq_lock(&uq->uq_key);
  715                 umtxq_busy(&uq->uq_key);
  716                 umtxq_insert(uq);
  717                 umtxq_unbusy(&uq->uq_key);
  718                 umtxq_unlock(&uq->uq_key);
  719 
  720                 /*
  721                  * Set the contested bit so that a release in user space
  722                  * knows to use the system call for unlock.  If this fails
  723                  * either some one else has acquired the lock or it has been
  724                  * released.
  725                  */
  726                 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
  727 
  728                 /* The address was invalid. */
  729                 if (old == -1) {
  730                         umtxq_lock(&uq->uq_key);
  731                         umtxq_remove(uq);
  732                         umtxq_unlock(&uq->uq_key);
  733                         umtx_key_release(&uq->uq_key);
  734                         return (EFAULT);
  735                 }
  736 
  737                 /*
  738                  * We set the contested bit, sleep. Otherwise the lock changed
  739                  * and we need to retry or we lost a race to the thread
  740                  * unlocking the umtx.
  741                  */
  742                 umtxq_lock(&uq->uq_key);
  743                 if (old == owner)
  744                         error = umtxq_sleep(uq, "umtx", timo);
  745                 umtxq_remove(uq);
  746                 umtxq_unlock(&uq->uq_key);
  747                 umtx_key_release(&uq->uq_key);
  748 
  749                 if (error == 0)
  750                         error = umtxq_check_susp(td);
  751         }
  752 
  753         return (0);
  754 }
  755 
  756 /*
  757  * Lock a umtx object.
  758  */
  759 static int
  760 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
  761         struct timespec *timeout)
  762 {
  763         struct timespec ts, ts2, ts3;
  764         struct timeval tv;
  765         int error;
  766 
  767         if (timeout == NULL) {
  768                 error = _do_lock_umtx(td, umtx, id, 0);
  769                 /* Mutex locking is restarted if it is interrupted. */
  770                 if (error == EINTR)
  771                         error = ERESTART;
  772         } else {
  773                 getnanouptime(&ts);
  774                 timespecadd(&ts, timeout);
  775                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
  776                 for (;;) {
  777                         error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
  778                         if (error != ETIMEDOUT)
  779                                 break;
  780                         getnanouptime(&ts2);
  781                         if (timespeccmp(&ts2, &ts, >=)) {
  782                                 error = ETIMEDOUT;
  783                                 break;
  784                         }
  785                         ts3 = ts;
  786                         timespecsub(&ts3, &ts2);
  787                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
  788                 }
  789                 /* Timed-locking is not restarted. */
  790                 if (error == ERESTART)
  791                         error = EINTR;
  792         }
  793         return (error);
  794 }
  795 
  796 /*
  797  * Unlock a umtx object.
  798  */
  799 static int
  800 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
  801 {
  802         struct umtx_key key;
  803         u_long owner;
  804         u_long old;
  805         int error;
  806         int count;
  807 
  808         /*
  809          * Make sure we own this mtx.
  810          */
  811         owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
  812         if (owner == -1)
  813                 return (EFAULT);
  814 
  815         if ((owner & ~UMTX_CONTESTED) != id)
  816                 return (EPERM);
  817 
  818         /* This should be done in userland */
  819         if ((owner & UMTX_CONTESTED) == 0) {
  820                 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
  821                 if (old == -1)
  822                         return (EFAULT);
  823                 if (old == owner)
  824                         return (0);
  825                 owner = old;
  826         }
  827 
  828         /* We should only ever be in here for contested locks */
  829         if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
  830                 &key)) != 0)
  831                 return (error);
  832 
  833         umtxq_lock(&key);
  834         umtxq_busy(&key);
  835         count = umtxq_count(&key);
  836         umtxq_unlock(&key);
  837 
  838         /*
  839          * When unlocking the umtx, it must be marked as unowned if
  840          * there is zero or one thread only waiting for it.
  841          * Otherwise, it must be marked as contested.
  842          */
  843         old = casuword(&umtx->u_owner, owner,
  844                 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
  845         umtxq_lock(&key);
  846         umtxq_signal(&key,1);
  847         umtxq_unbusy(&key);
  848         umtxq_unlock(&key);
  849         umtx_key_release(&key);
  850         if (old == -1)
  851                 return (EFAULT);
  852         if (old != owner)
  853                 return (EINVAL);
  854         return (0);
  855 }
  856 
  857 #ifdef COMPAT_FREEBSD32
  858 
  859 /*
  860  * Lock a umtx object.
  861  */
  862 static int
  863 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
  864 {
  865         struct umtx_q *uq;
  866         uint32_t owner;
  867         uint32_t old;
  868         int error = 0;
  869 
  870         uq = td->td_umtxq;
  871 
  872         /*
  873          * Care must be exercised when dealing with umtx structure. It
  874          * can fault on any access.
  875          */
  876         for (;;) {
  877                 /*
  878                  * Try the uncontested case.  This should be done in userland.
  879                  */
  880                 owner = casuword32(m, UMUTEX_UNOWNED, id);
  881 
  882                 /* The acquire succeeded. */
  883                 if (owner == UMUTEX_UNOWNED)
  884                         return (0);
  885 
  886                 /* The address was invalid. */
  887                 if (owner == -1)
  888                         return (EFAULT);
  889 
  890                 /* If no one owns it but it is contested try to acquire it. */
  891                 if (owner == UMUTEX_CONTESTED) {
  892                         owner = casuword32(m,
  893                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
  894                         if (owner == UMUTEX_CONTESTED)
  895                                 return (0);
  896 
  897                         /* The address was invalid. */
  898                         if (owner == -1)
  899                                 return (EFAULT);
  900 
  901                         error = umtxq_check_susp(td);
  902                         if (error != 0)
  903                                 break;
  904 
  905                         /* If this failed the lock has changed, restart. */
  906                         continue;
  907                 }
  908 
  909                 /*
  910                  * If we caught a signal, we have retried and now
  911                  * exit immediately.
  912                  */
  913                 if (error != 0)
  914                         return (error);
  915 
  916                 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
  917                         AUTO_SHARE, &uq->uq_key)) != 0)
  918                         return (error);
  919 
  920                 umtxq_lock(&uq->uq_key);
  921                 umtxq_busy(&uq->uq_key);
  922                 umtxq_insert(uq);
  923                 umtxq_unbusy(&uq->uq_key);
  924                 umtxq_unlock(&uq->uq_key);
  925 
  926                 /*
  927                  * Set the contested bit so that a release in user space
  928                  * knows to use the system call for unlock.  If this fails
  929                  * either some one else has acquired the lock or it has been
  930                  * released.
  931                  */
  932                 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
  933 
  934                 /* The address was invalid. */
  935                 if (old == -1) {
  936                         umtxq_lock(&uq->uq_key);
  937                         umtxq_remove(uq);
  938                         umtxq_unlock(&uq->uq_key);
  939                         umtx_key_release(&uq->uq_key);
  940                         return (EFAULT);
  941                 }
  942 
  943                 /*
  944                  * We set the contested bit, sleep. Otherwise the lock changed
  945                  * and we need to retry or we lost a race to the thread
  946                  * unlocking the umtx.
  947                  */
  948                 umtxq_lock(&uq->uq_key);
  949                 if (old == owner)
  950                         error = umtxq_sleep(uq, "umtx", timo);
  951                 umtxq_remove(uq);
  952                 umtxq_unlock(&uq->uq_key);
  953                 umtx_key_release(&uq->uq_key);
  954 
  955                 if (error == 0)
  956                         error = umtxq_check_susp(td);
  957         }
  958 
  959         return (0);
  960 }
  961 
  962 /*
  963  * Lock a umtx object.
  964  */
  965 static int
  966 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
  967         struct timespec *timeout)
  968 {
  969         struct timespec ts, ts2, ts3;
  970         struct timeval tv;
  971         int error;
  972 
  973         if (timeout == NULL) {
  974                 error = _do_lock_umtx32(td, m, id, 0);
  975                 /* Mutex locking is restarted if it is interrupted. */
  976                 if (error == EINTR)
  977                         error = ERESTART;
  978         } else {
  979                 getnanouptime(&ts);
  980                 timespecadd(&ts, timeout);
  981                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
  982                 for (;;) {
  983                         error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
  984                         if (error != ETIMEDOUT)
  985                                 break;
  986                         getnanouptime(&ts2);
  987                         if (timespeccmp(&ts2, &ts, >=)) {
  988                                 error = ETIMEDOUT;
  989                                 break;
  990                         }
  991                         ts3 = ts;
  992                         timespecsub(&ts3, &ts2);
  993                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
  994                 }
  995                 /* Timed-locking is not restarted. */
  996                 if (error == ERESTART)
  997                         error = EINTR;
  998         }
  999         return (error);
 1000 }
 1001 
 1002 /*
 1003  * Unlock a umtx object.
 1004  */
 1005 static int
 1006 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
 1007 {
 1008         struct umtx_key key;
 1009         uint32_t owner;
 1010         uint32_t old;
 1011         int error;
 1012         int count;
 1013 
 1014         /*
 1015          * Make sure we own this mtx.
 1016          */
 1017         owner = fuword32(m);
 1018         if (owner == -1)
 1019                 return (EFAULT);
 1020 
 1021         if ((owner & ~UMUTEX_CONTESTED) != id)
 1022                 return (EPERM);
 1023 
 1024         /* This should be done in userland */
 1025         if ((owner & UMUTEX_CONTESTED) == 0) {
 1026                 old = casuword32(m, owner, UMUTEX_UNOWNED);
 1027                 if (old == -1)
 1028                         return (EFAULT);
 1029                 if (old == owner)
 1030                         return (0);
 1031                 owner = old;
 1032         }
 1033 
 1034         /* We should only ever be in here for contested locks */
 1035         if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
 1036                 &key)) != 0)
 1037                 return (error);
 1038 
 1039         umtxq_lock(&key);
 1040         umtxq_busy(&key);
 1041         count = umtxq_count(&key);
 1042         umtxq_unlock(&key);
 1043 
 1044         /*
 1045          * When unlocking the umtx, it must be marked as unowned if
 1046          * there is zero or one thread only waiting for it.
 1047          * Otherwise, it must be marked as contested.
 1048          */
 1049         old = casuword32(m, owner,
 1050                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1051         umtxq_lock(&key);
 1052         umtxq_signal(&key,1);
 1053         umtxq_unbusy(&key);
 1054         umtxq_unlock(&key);
 1055         umtx_key_release(&key);
 1056         if (old == -1)
 1057                 return (EFAULT);
 1058         if (old != owner)
 1059                 return (EINVAL);
 1060         return (0);
 1061 }
 1062 #endif
 1063 
 1064 /*
 1065  * Fetch and compare value, sleep on the address if value is not changed.
 1066  */
 1067 static int
 1068 do_wait(struct thread *td, void *addr, u_long id,
 1069         struct timespec *timeout, int compat32, int is_private)
 1070 {
 1071         struct umtx_q *uq;
 1072         struct timespec ts, ts2, ts3;
 1073         struct timeval tv;
 1074         u_long tmp;
 1075         int error = 0;
 1076 
 1077         uq = td->td_umtxq;
 1078         if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
 1079                 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
 1080                 return (error);
 1081 
 1082         umtxq_lock(&uq->uq_key);
 1083         umtxq_insert(uq);
 1084         umtxq_unlock(&uq->uq_key);
 1085         if (compat32 == 0)
 1086                 tmp = fuword(addr);
 1087         else
 1088                 tmp = (unsigned int)fuword32(addr);
 1089         if (tmp != id) {
 1090                 umtxq_lock(&uq->uq_key);
 1091                 umtxq_remove(uq);
 1092                 umtxq_unlock(&uq->uq_key);
 1093         } else if (timeout == NULL) {
 1094                 umtxq_lock(&uq->uq_key);
 1095                 error = umtxq_sleep(uq, "uwait", 0);
 1096                 umtxq_remove(uq);
 1097                 umtxq_unlock(&uq->uq_key);
 1098         } else {
 1099                 getnanouptime(&ts);
 1100                 timespecadd(&ts, timeout);
 1101                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
 1102                 umtxq_lock(&uq->uq_key);
 1103                 for (;;) {
 1104                         error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
 1105                         if (!(uq->uq_flags & UQF_UMTXQ)) {
 1106                                 error = 0;
 1107                                 break;
 1108                         }
 1109                         if (error != ETIMEDOUT)
 1110                                 break;
 1111                         umtxq_unlock(&uq->uq_key);
 1112                         getnanouptime(&ts2);
 1113                         if (timespeccmp(&ts2, &ts, >=)) {
 1114                                 error = ETIMEDOUT;
 1115                                 umtxq_lock(&uq->uq_key);
 1116                                 break;
 1117                         }
 1118                         ts3 = ts;
 1119                         timespecsub(&ts3, &ts2);
 1120                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 1121                         umtxq_lock(&uq->uq_key);
 1122                 }
 1123                 umtxq_remove(uq);
 1124                 umtxq_unlock(&uq->uq_key);
 1125         }
 1126         umtx_key_release(&uq->uq_key);
 1127         if (error == ERESTART)
 1128                 error = EINTR;
 1129         return (error);
 1130 }
 1131 
 1132 /*
 1133  * Wake up threads sleeping on the specified address.
 1134  */
 1135 int
 1136 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
 1137 {
 1138         struct umtx_key key;
 1139         int ret;
 1140         
 1141         if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
 1142                 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
 1143                 return (ret);
 1144         umtxq_lock(&key);
 1145         ret = umtxq_signal(&key, n_wake);
 1146         umtxq_unlock(&key);
 1147         umtx_key_release(&key);
 1148         return (0);
 1149 }
 1150 
 1151 /*
 1152  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1153  */
 1154 static int
 1155 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 1156         int mode)
 1157 {
 1158         struct umtx_q *uq;
 1159         uint32_t owner, old, id;
 1160         int error = 0;
 1161 
 1162         id = td->td_tid;
 1163         uq = td->td_umtxq;
 1164 
 1165         /*
 1166          * Care must be exercised when dealing with umtx structure. It
 1167          * can fault on any access.
 1168          */
 1169         for (;;) {
 1170                 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
 1171                 if (mode == _UMUTEX_WAIT) {
 1172                         if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
 1173                                 return (0);
 1174                 } else {
 1175                         /*
 1176                          * Try the uncontested case.  This should be done in userland.
 1177                          */
 1178                         owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 1179 
 1180                         /* The acquire succeeded. */
 1181                         if (owner == UMUTEX_UNOWNED)
 1182                                 return (0);
 1183 
 1184                         /* The address was invalid. */
 1185                         if (owner == -1)
 1186                                 return (EFAULT);
 1187 
 1188                         /* If no one owns it but it is contested try to acquire it. */
 1189                         if (owner == UMUTEX_CONTESTED) {
 1190                                 owner = casuword32(&m->m_owner,
 1191                                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1192 
 1193                                 if (owner == UMUTEX_CONTESTED)
 1194                                         return (0);
 1195 
 1196                                 /* The address was invalid. */
 1197                                 if (owner == -1)
 1198                                         return (EFAULT);
 1199 
 1200                                 error = umtxq_check_susp(td);
 1201                                 if (error != 0)
 1202                                         return (error);
 1203 
 1204                                 /* If this failed the lock has changed, restart. */
 1205                                 continue;
 1206                         }
 1207                 }
 1208 
 1209                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 1210                     (owner & ~UMUTEX_CONTESTED) == id)
 1211                         return (EDEADLK);
 1212 
 1213                 if (mode == _UMUTEX_TRY)
 1214                         return (EBUSY);
 1215 
 1216                 /*
 1217                  * If we caught a signal, we have retried and now
 1218                  * exit immediately.
 1219                  */
 1220                 if (error != 0)
 1221                         return (error);
 1222 
 1223                 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
 1224                     GET_SHARE(flags), &uq->uq_key)) != 0)
 1225                         return (error);
 1226 
 1227                 umtxq_lock(&uq->uq_key);
 1228                 umtxq_busy(&uq->uq_key);
 1229                 umtxq_insert(uq);
 1230                 umtxq_unlock(&uq->uq_key);
 1231 
 1232                 /*
 1233                  * Set the contested bit so that a release in user space
 1234                  * knows to use the system call for unlock.  If this fails
 1235                  * either some one else has acquired the lock or it has been
 1236                  * released.
 1237                  */
 1238                 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 1239 
 1240                 /* The address was invalid. */
 1241                 if (old == -1) {
 1242                         umtxq_lock(&uq->uq_key);
 1243                         umtxq_remove(uq);
 1244                         umtxq_unbusy(&uq->uq_key);
 1245                         umtxq_unlock(&uq->uq_key);
 1246                         umtx_key_release(&uq->uq_key);
 1247                         return (EFAULT);
 1248                 }
 1249 
 1250                 /*
 1251                  * We set the contested bit, sleep. Otherwise the lock changed
 1252                  * and we need to retry or we lost a race to the thread
 1253                  * unlocking the umtx.
 1254                  */
 1255                 umtxq_lock(&uq->uq_key);
 1256                 umtxq_unbusy(&uq->uq_key);
 1257                 if (old == owner)
 1258                         error = umtxq_sleep(uq, "umtxn", timo);
 1259                 umtxq_remove(uq);
 1260                 umtxq_unlock(&uq->uq_key);
 1261                 umtx_key_release(&uq->uq_key);
 1262 
 1263                 if (error == 0)
 1264                         error = umtxq_check_susp(td);
 1265         }
 1266 
 1267         return (0);
 1268 }
 1269 
 1270 /*
 1271  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1272  */
 1273 /*
 1274  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1275  */
 1276 static int
 1277 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
 1278 {
 1279         struct umtx_key key;
 1280         uint32_t owner, old, id;
 1281         int error;
 1282         int count;
 1283 
 1284         id = td->td_tid;
 1285         /*
 1286          * Make sure we own this mtx.
 1287          */
 1288         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1289         if (owner == -1)
 1290                 return (EFAULT);
 1291 
 1292         if ((owner & ~UMUTEX_CONTESTED) != id)
 1293                 return (EPERM);
 1294 
 1295         if ((owner & UMUTEX_CONTESTED) == 0) {
 1296                 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 1297                 if (old == -1)
 1298                         return (EFAULT);
 1299                 if (old == owner)
 1300                         return (0);
 1301                 owner = old;
 1302         }
 1303 
 1304         /* We should only ever be in here for contested locks */
 1305         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1306             &key)) != 0)
 1307                 return (error);
 1308 
 1309         umtxq_lock(&key);
 1310         umtxq_busy(&key);
 1311         count = umtxq_count(&key);
 1312         umtxq_unlock(&key);
 1313 
 1314         /*
 1315          * When unlocking the umtx, it must be marked as unowned if
 1316          * there is zero or one thread only waiting for it.
 1317          * Otherwise, it must be marked as contested.
 1318          */
 1319         old = casuword32(&m->m_owner, owner,
 1320                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1321         umtxq_lock(&key);
 1322         umtxq_signal(&key,1);
 1323         umtxq_unbusy(&key);
 1324         umtxq_unlock(&key);
 1325         umtx_key_release(&key);
 1326         if (old == -1)
 1327                 return (EFAULT);
 1328         if (old != owner)
 1329                 return (EINVAL);
 1330         return (0);
 1331 }
 1332 
 1333 /*
 1334  * Check if the mutex is available and wake up a waiter,
 1335  * only for simple mutex.
 1336  */
 1337 static int
 1338 do_wake_umutex(struct thread *td, struct umutex *m)
 1339 {
 1340         struct umtx_key key;
 1341         uint32_t owner;
 1342         uint32_t flags;
 1343         int error;
 1344         int count;
 1345 
 1346         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1347         if (owner == -1)
 1348                 return (EFAULT);
 1349 
 1350         if ((owner & ~UMUTEX_CONTESTED) != 0)
 1351                 return (0);
 1352 
 1353         flags = fuword32(&m->m_flags);
 1354 
 1355         /* We should only ever be in here for contested locks */
 1356         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1357             &key)) != 0)
 1358                 return (error);
 1359 
 1360         umtxq_lock(&key);
 1361         umtxq_busy(&key);
 1362         count = umtxq_count(&key);
 1363         umtxq_unlock(&key);
 1364 
 1365         if (count <= 1)
 1366                 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
 1367 
 1368         umtxq_lock(&key);
 1369         if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
 1370                 umtxq_signal(&key, 1);
 1371         umtxq_unbusy(&key);
 1372         umtxq_unlock(&key);
 1373         umtx_key_release(&key);
 1374         return (0);
 1375 }
 1376 
 1377 /*
 1378  * Check if the mutex has waiters and tries to fix contention bit.
 1379  */
 1380 static int
 1381 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
 1382 {
 1383         struct umtx_key key;
 1384         uint32_t owner, old;
 1385         int type;
 1386         int error;
 1387         int count;
 1388 
 1389         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 1390         case 0:
 1391                 type = TYPE_NORMAL_UMUTEX;
 1392                 break;
 1393         case UMUTEX_PRIO_INHERIT:
 1394                 type = TYPE_PI_UMUTEX;
 1395                 break;
 1396         case UMUTEX_PRIO_PROTECT:
 1397                 type = TYPE_PP_UMUTEX;
 1398                 break;
 1399         default:
 1400                 return (EINVAL);
 1401         }
 1402         if ((error = umtx_key_get(m, type, GET_SHARE(flags),
 1403             &key)) != 0)
 1404                 return (error);
 1405 
 1406         owner = 0;
 1407         umtxq_lock(&key);
 1408         umtxq_busy(&key);
 1409         count = umtxq_count(&key);
 1410         umtxq_unlock(&key);
 1411         /*
 1412          * Only repair contention bit if there is a waiter, this means the mutex
 1413          * is still being referenced by userland code, otherwise don't update
 1414          * any memory.
 1415          */
 1416         if (count > 1) {
 1417                 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1418                 while ((owner & UMUTEX_CONTESTED) ==0) {
 1419                         old = casuword32(&m->m_owner, owner,
 1420                             owner|UMUTEX_CONTESTED);
 1421                         if (old == owner)
 1422                                 break;
 1423                         owner = old;
 1424                         if (old == -1)
 1425                                 break;
 1426                         error = umtxq_check_susp(td);
 1427                         if (error != 0)
 1428                                 break;
 1429                 }
 1430         } else if (count == 1) {
 1431                 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1432                 while ((owner & ~UMUTEX_CONTESTED) != 0 &&
 1433                        (owner & UMUTEX_CONTESTED) == 0) {
 1434                         old = casuword32(&m->m_owner, owner,
 1435                             owner|UMUTEX_CONTESTED);
 1436                         if (old == owner)
 1437                                 break;
 1438                         owner = old;
 1439                         if (old == -1)
 1440                                 break;
 1441                         error = umtxq_check_susp(td);
 1442                         if (error != 0)
 1443                                 break;
 1444                 }
 1445         }
 1446         umtxq_lock(&key);
 1447         if (owner == -1) {
 1448                 error = EFAULT;
 1449                 umtxq_signal(&key, INT_MAX);
 1450         }
 1451         else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
 1452                 umtxq_signal(&key, 1);
 1453         umtxq_unbusy(&key);
 1454         umtxq_unlock(&key);
 1455         umtx_key_release(&key);
 1456         return (error);
 1457 }
 1458 
 1459 static inline struct umtx_pi *
 1460 umtx_pi_alloc(int flags)
 1461 {
 1462         struct umtx_pi *pi;
 1463 
 1464         pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
 1465         TAILQ_INIT(&pi->pi_blocked);
 1466         atomic_add_int(&umtx_pi_allocated, 1);
 1467         return (pi);
 1468 }
 1469 
 1470 static inline void
 1471 umtx_pi_free(struct umtx_pi *pi)
 1472 {
 1473         uma_zfree(umtx_pi_zone, pi);
 1474         atomic_add_int(&umtx_pi_allocated, -1);
 1475 }
 1476 
 1477 /*
 1478  * Adjust the thread's position on a pi_state after its priority has been
 1479  * changed.
 1480  */
 1481 static int
 1482 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
 1483 {
 1484         struct umtx_q *uq, *uq1, *uq2;
 1485         struct thread *td1;
 1486 
 1487         mtx_assert(&umtx_lock, MA_OWNED);
 1488         if (pi == NULL)
 1489                 return (0);
 1490 
 1491         uq = td->td_umtxq;
 1492 
 1493         /*
 1494          * Check if the thread needs to be moved on the blocked chain.
 1495          * It needs to be moved if either its priority is lower than
 1496          * the previous thread or higher than the next thread.
 1497          */
 1498         uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
 1499         uq2 = TAILQ_NEXT(uq, uq_lockq);
 1500         if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
 1501             (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
 1502                 /*
 1503                  * Remove thread from blocked chain and determine where
 1504                  * it should be moved to.
 1505                  */
 1506                 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1507                 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1508                         td1 = uq1->uq_thread;
 1509                         MPASS(td1->td_proc->p_magic == P_MAGIC);
 1510                         if (UPRI(td1) > UPRI(td))
 1511                                 break;
 1512                 }
 1513 
 1514                 if (uq1 == NULL)
 1515                         TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1516                 else
 1517                         TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1518         }
 1519         return (1);
 1520 }
 1521 
 1522 /*
 1523  * Propagate priority when a thread is blocked on POSIX
 1524  * PI mutex.
 1525  */ 
 1526 static void
 1527 umtx_propagate_priority(struct thread *td)
 1528 {
 1529         struct umtx_q *uq;
 1530         struct umtx_pi *pi;
 1531         int pri;
 1532 
 1533         mtx_assert(&umtx_lock, MA_OWNED);
 1534         pri = UPRI(td);
 1535         uq = td->td_umtxq;
 1536         pi = uq->uq_pi_blocked;
 1537         if (pi == NULL)
 1538                 return;
 1539 
 1540         for (;;) {
 1541                 td = pi->pi_owner;
 1542                 if (td == NULL || td == curthread)
 1543                         return;
 1544 
 1545                 MPASS(td->td_proc != NULL);
 1546                 MPASS(td->td_proc->p_magic == P_MAGIC);
 1547 
 1548                 thread_lock(td);
 1549                 if (td->td_lend_user_pri > pri)
 1550                         sched_lend_user_prio(td, pri);
 1551                 else {
 1552                         thread_unlock(td);
 1553                         break;
 1554                 }
 1555                 thread_unlock(td);
 1556 
 1557                 /*
 1558                  * Pick up the lock that td is blocked on.
 1559                  */
 1560                 uq = td->td_umtxq;
 1561                 pi = uq->uq_pi_blocked;
 1562                 if (pi == NULL)
 1563                         break;
 1564                 /* Resort td on the list if needed. */
 1565                 umtx_pi_adjust_thread(pi, td);
 1566         }
 1567 }
 1568 
 1569 /*
 1570  * Unpropagate priority for a PI mutex when a thread blocked on
 1571  * it is interrupted by signal or resumed by others.
 1572  */
 1573 static void
 1574 umtx_repropagate_priority(struct umtx_pi *pi)
 1575 {
 1576         struct umtx_q *uq, *uq_owner;
 1577         struct umtx_pi *pi2;
 1578         int pri;
 1579 
 1580         mtx_assert(&umtx_lock, MA_OWNED);
 1581 
 1582         while (pi != NULL && pi->pi_owner != NULL) {
 1583                 pri = PRI_MAX;
 1584                 uq_owner = pi->pi_owner->td_umtxq;
 1585 
 1586                 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
 1587                         uq = TAILQ_FIRST(&pi2->pi_blocked);
 1588                         if (uq != NULL) {
 1589                                 if (pri > UPRI(uq->uq_thread))
 1590                                         pri = UPRI(uq->uq_thread);
 1591                         }
 1592                 }
 1593 
 1594                 if (pri > uq_owner->uq_inherited_pri)
 1595                         pri = uq_owner->uq_inherited_pri;
 1596                 thread_lock(pi->pi_owner);
 1597                 sched_lend_user_prio(pi->pi_owner, pri);
 1598                 thread_unlock(pi->pi_owner);
 1599                 if ((pi = uq_owner->uq_pi_blocked) != NULL)
 1600                         umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
 1601         }
 1602 }
 1603 
 1604 /*
 1605  * Insert a PI mutex into owned list.
 1606  */
 1607 static void
 1608 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
 1609 {
 1610         struct umtx_q *uq_owner;
 1611 
 1612         uq_owner = owner->td_umtxq;
 1613         mtx_assert(&umtx_lock, MA_OWNED);
 1614         if (pi->pi_owner != NULL)
 1615                 panic("pi_ower != NULL");
 1616         pi->pi_owner = owner;
 1617         TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
 1618 }
 1619 
 1620 /*
 1621  * Claim ownership of a PI mutex.
 1622  */
 1623 static int
 1624 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
 1625 {
 1626         struct umtx_q *uq, *uq_owner;
 1627 
 1628         uq_owner = owner->td_umtxq;
 1629         mtx_lock_spin(&umtx_lock);
 1630         if (pi->pi_owner == owner) {
 1631                 mtx_unlock_spin(&umtx_lock);
 1632                 return (0);
 1633         }
 1634 
 1635         if (pi->pi_owner != NULL) {
 1636                 /*
 1637                  * userland may have already messed the mutex, sigh.
 1638                  */
 1639                 mtx_unlock_spin(&umtx_lock);
 1640                 return (EPERM);
 1641         }
 1642         umtx_pi_setowner(pi, owner);
 1643         uq = TAILQ_FIRST(&pi->pi_blocked);
 1644         if (uq != NULL) {
 1645                 int pri;
 1646 
 1647                 pri = UPRI(uq->uq_thread);
 1648                 thread_lock(owner);
 1649                 if (pri < UPRI(owner))
 1650                         sched_lend_user_prio(owner, pri);
 1651                 thread_unlock(owner);
 1652         }
 1653         mtx_unlock_spin(&umtx_lock);
 1654         return (0);
 1655 }
 1656 
 1657 /*
 1658  * Adjust a thread's order position in its blocked PI mutex,
 1659  * this may result new priority propagating process.
 1660  */
 1661 void
 1662 umtx_pi_adjust(struct thread *td, u_char oldpri)
 1663 {
 1664         struct umtx_q *uq;
 1665         struct umtx_pi *pi;
 1666 
 1667         uq = td->td_umtxq;
 1668         mtx_lock_spin(&umtx_lock);
 1669         /*
 1670          * Pick up the lock that td is blocked on.
 1671          */
 1672         pi = uq->uq_pi_blocked;
 1673         if (pi != NULL) {
 1674                 umtx_pi_adjust_thread(pi, td);
 1675                 umtx_repropagate_priority(pi);
 1676         }
 1677         mtx_unlock_spin(&umtx_lock);
 1678 }
 1679 
 1680 /*
 1681  * Sleep on a PI mutex.
 1682  */
 1683 static int
 1684 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
 1685         uint32_t owner, const char *wmesg, int timo)
 1686 {
 1687         struct umtxq_chain *uc;
 1688         struct thread *td, *td1;
 1689         struct umtx_q *uq1;
 1690         int pri;
 1691         int error = 0;
 1692 
 1693         td = uq->uq_thread;
 1694         KASSERT(td == curthread, ("inconsistent uq_thread"));
 1695         uc = umtxq_getchain(&uq->uq_key);
 1696         UMTXQ_LOCKED_ASSERT(uc);
 1697         UMTXQ_BUSY_ASSERT(uc);
 1698         umtxq_insert(uq);
 1699         mtx_lock_spin(&umtx_lock);
 1700         if (pi->pi_owner == NULL) {
 1701                 mtx_unlock_spin(&umtx_lock);
 1702                 /* XXX Only look up thread in current process. */
 1703                 td1 = tdfind(owner, curproc->p_pid);
 1704                 mtx_lock_spin(&umtx_lock);
 1705                 if (td1 != NULL) {
 1706                         if (pi->pi_owner == NULL)
 1707                                 umtx_pi_setowner(pi, td1);
 1708                         PROC_UNLOCK(td1->td_proc);
 1709                 }
 1710         }
 1711 
 1712         TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1713                 pri = UPRI(uq1->uq_thread);
 1714                 if (pri > UPRI(td))
 1715                         break;
 1716         }
 1717 
 1718         if (uq1 != NULL)
 1719                 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1720         else
 1721                 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1722 
 1723         uq->uq_pi_blocked = pi;
 1724         thread_lock(td);
 1725         td->td_flags |= TDF_UPIBLOCKED;
 1726         thread_unlock(td);
 1727         umtx_propagate_priority(td);
 1728         mtx_unlock_spin(&umtx_lock);
 1729         umtxq_unbusy(&uq->uq_key);
 1730 
 1731         if (uq->uq_flags & UQF_UMTXQ) {
 1732                 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
 1733                 if (error == EWOULDBLOCK)
 1734                         error = ETIMEDOUT;
 1735                 if (uq->uq_flags & UQF_UMTXQ) {
 1736                         umtxq_remove(uq);
 1737                 }
 1738         }
 1739         mtx_lock_spin(&umtx_lock);
 1740         uq->uq_pi_blocked = NULL;
 1741         thread_lock(td);
 1742         td->td_flags &= ~TDF_UPIBLOCKED;
 1743         thread_unlock(td);
 1744         TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1745         umtx_repropagate_priority(pi);
 1746         mtx_unlock_spin(&umtx_lock);
 1747         umtxq_unlock(&uq->uq_key);
 1748 
 1749         return (error);
 1750 }
 1751 
 1752 /*
 1753  * Add reference count for a PI mutex.
 1754  */
 1755 static void
 1756 umtx_pi_ref(struct umtx_pi *pi)
 1757 {
 1758         struct umtxq_chain *uc;
 1759 
 1760         uc = umtxq_getchain(&pi->pi_key);
 1761         UMTXQ_LOCKED_ASSERT(uc);
 1762         pi->pi_refcount++;
 1763 }
 1764 
 1765 /*
 1766  * Decrease reference count for a PI mutex, if the counter
 1767  * is decreased to zero, its memory space is freed.
 1768  */ 
 1769 static void
 1770 umtx_pi_unref(struct umtx_pi *pi)
 1771 {
 1772         struct umtxq_chain *uc;
 1773 
 1774         uc = umtxq_getchain(&pi->pi_key);
 1775         UMTXQ_LOCKED_ASSERT(uc);
 1776         KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
 1777         if (--pi->pi_refcount == 0) {
 1778                 mtx_lock_spin(&umtx_lock);
 1779                 if (pi->pi_owner != NULL) {
 1780                         TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
 1781                                 pi, pi_link);
 1782                         pi->pi_owner = NULL;
 1783                 }
 1784                 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
 1785                         ("blocked queue not empty"));
 1786                 mtx_unlock_spin(&umtx_lock);
 1787                 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
 1788                 umtx_pi_free(pi);
 1789         }
 1790 }
 1791 
 1792 /*
 1793  * Find a PI mutex in hash table.
 1794  */
 1795 static struct umtx_pi *
 1796 umtx_pi_lookup(struct umtx_key *key)
 1797 {
 1798         struct umtxq_chain *uc;
 1799         struct umtx_pi *pi;
 1800 
 1801         uc = umtxq_getchain(key);
 1802         UMTXQ_LOCKED_ASSERT(uc);
 1803 
 1804         TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
 1805                 if (umtx_key_match(&pi->pi_key, key)) {
 1806                         return (pi);
 1807                 }
 1808         }
 1809         return (NULL);
 1810 }
 1811 
 1812 /*
 1813  * Insert a PI mutex into hash table.
 1814  */
 1815 static inline void
 1816 umtx_pi_insert(struct umtx_pi *pi)
 1817 {
 1818         struct umtxq_chain *uc;
 1819 
 1820         uc = umtxq_getchain(&pi->pi_key);
 1821         UMTXQ_LOCKED_ASSERT(uc);
 1822         TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
 1823 }
 1824 
 1825 /*
 1826  * Lock a PI mutex.
 1827  */
 1828 static int
 1829 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 1830         int try)
 1831 {
 1832         struct umtx_q *uq;
 1833         struct umtx_pi *pi, *new_pi;
 1834         uint32_t id, owner, old;
 1835         int error;
 1836 
 1837         id = td->td_tid;
 1838         uq = td->td_umtxq;
 1839 
 1840         if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 1841             &uq->uq_key)) != 0)
 1842                 return (error);
 1843         umtxq_lock(&uq->uq_key);
 1844         pi = umtx_pi_lookup(&uq->uq_key);
 1845         if (pi == NULL) {
 1846                 new_pi = umtx_pi_alloc(M_NOWAIT);
 1847                 if (new_pi == NULL) {
 1848                         umtxq_unlock(&uq->uq_key);
 1849                         new_pi = umtx_pi_alloc(M_WAITOK);
 1850                         umtxq_lock(&uq->uq_key);
 1851                         pi = umtx_pi_lookup(&uq->uq_key);
 1852                         if (pi != NULL) {
 1853                                 umtx_pi_free(new_pi);
 1854                                 new_pi = NULL;
 1855                         }
 1856                 }
 1857                 if (new_pi != NULL) {
 1858                         new_pi->pi_key = uq->uq_key;
 1859                         umtx_pi_insert(new_pi);
 1860                         pi = new_pi;
 1861                 }
 1862         }
 1863         umtx_pi_ref(pi);
 1864         umtxq_unlock(&uq->uq_key);
 1865 
 1866         /*
 1867          * Care must be exercised when dealing with umtx structure.  It
 1868          * can fault on any access.
 1869          */
 1870         for (;;) {
 1871                 /*
 1872                  * Try the uncontested case.  This should be done in userland.
 1873                  */
 1874                 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 1875 
 1876                 /* The acquire succeeded. */
 1877                 if (owner == UMUTEX_UNOWNED) {
 1878                         error = 0;
 1879                         break;
 1880                 }
 1881 
 1882                 /* The address was invalid. */
 1883                 if (owner == -1) {
 1884                         error = EFAULT;
 1885                         break;
 1886                 }
 1887 
 1888                 /* If no one owns it but it is contested try to acquire it. */
 1889                 if (owner == UMUTEX_CONTESTED) {
 1890                         owner = casuword32(&m->m_owner,
 1891                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1892 
 1893                         if (owner == UMUTEX_CONTESTED) {
 1894                                 umtxq_lock(&uq->uq_key);
 1895                                 umtxq_busy(&uq->uq_key);
 1896                                 error = umtx_pi_claim(pi, td);
 1897                                 umtxq_unbusy(&uq->uq_key);
 1898                                 umtxq_unlock(&uq->uq_key);
 1899                                 break;
 1900                         }
 1901 
 1902                         /* The address was invalid. */
 1903                         if (owner == -1) {
 1904                                 error = EFAULT;
 1905                                 break;
 1906                         }
 1907 
 1908                         error = umtxq_check_susp(td);
 1909                         if (error != 0)
 1910                                 break;
 1911 
 1912                         /* If this failed the lock has changed, restart. */
 1913                         continue;
 1914                 }
 1915 
 1916                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 1917                     (owner & ~UMUTEX_CONTESTED) == id) {
 1918                         error = EDEADLK;
 1919                         break;
 1920                 }
 1921 
 1922                 if (try != 0) {
 1923                         error = EBUSY;
 1924                         break;
 1925                 }
 1926 
 1927                 /*
 1928                  * If we caught a signal, we have retried and now
 1929                  * exit immediately.
 1930                  */
 1931                 if (error != 0)
 1932                         break;
 1933                         
 1934                 umtxq_lock(&uq->uq_key);
 1935                 umtxq_busy(&uq->uq_key);
 1936                 umtxq_unlock(&uq->uq_key);
 1937 
 1938                 /*
 1939                  * Set the contested bit so that a release in user space
 1940                  * knows to use the system call for unlock.  If this fails
 1941                  * either some one else has acquired the lock or it has been
 1942                  * released.
 1943                  */
 1944                 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 1945 
 1946                 /* The address was invalid. */
 1947                 if (old == -1) {
 1948                         umtxq_lock(&uq->uq_key);
 1949                         umtxq_unbusy(&uq->uq_key);
 1950                         umtxq_unlock(&uq->uq_key);
 1951                         error = EFAULT;
 1952                         break;
 1953                 }
 1954 
 1955                 umtxq_lock(&uq->uq_key);
 1956                 /*
 1957                  * We set the contested bit, sleep. Otherwise the lock changed
 1958                  * and we need to retry or we lost a race to the thread
 1959                  * unlocking the umtx.
 1960                  */
 1961                 if (old == owner)
 1962                         error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
 1963                                  "umtxpi", timo);
 1964                 else {
 1965                         umtxq_unbusy(&uq->uq_key);
 1966                         umtxq_unlock(&uq->uq_key);
 1967                 }
 1968 
 1969                 error = umtxq_check_susp(td);
 1970                 if (error != 0)
 1971                         break;
 1972         }
 1973 
 1974         umtxq_lock(&uq->uq_key);
 1975         umtx_pi_unref(pi);
 1976         umtxq_unlock(&uq->uq_key);
 1977 
 1978         umtx_key_release(&uq->uq_key);
 1979         return (error);
 1980 }
 1981 
 1982 /*
 1983  * Unlock a PI mutex.
 1984  */
 1985 static int
 1986 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
 1987 {
 1988         struct umtx_key key;
 1989         struct umtx_q *uq_first, *uq_first2, *uq_me;
 1990         struct umtx_pi *pi, *pi2;
 1991         uint32_t owner, old, id;
 1992         int error;
 1993         int count;
 1994         int pri;
 1995 
 1996         id = td->td_tid;
 1997         /*
 1998          * Make sure we own this mtx.
 1999          */
 2000         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 2001         if (owner == -1)
 2002                 return (EFAULT);
 2003 
 2004         if ((owner & ~UMUTEX_CONTESTED) != id)
 2005                 return (EPERM);
 2006 
 2007         /* This should be done in userland */
 2008         if ((owner & UMUTEX_CONTESTED) == 0) {
 2009                 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 2010                 if (old == -1)
 2011                         return (EFAULT);
 2012                 if (old == owner)
 2013                         return (0);
 2014                 owner = old;
 2015         }
 2016 
 2017         /* We should only ever be in here for contested locks */
 2018         if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 2019             &key)) != 0)
 2020                 return (error);
 2021 
 2022         umtxq_lock(&key);
 2023         umtxq_busy(&key);
 2024         count = umtxq_count_pi(&key, &uq_first);
 2025         if (uq_first != NULL) {
 2026                 mtx_lock_spin(&umtx_lock);
 2027                 pi = uq_first->uq_pi_blocked;
 2028                 KASSERT(pi != NULL, ("pi == NULL?"));
 2029                 if (pi->pi_owner != curthread) {
 2030                         mtx_unlock_spin(&umtx_lock);
 2031                         umtxq_unbusy(&key);
 2032                         umtxq_unlock(&key);
 2033                         umtx_key_release(&key);
 2034                         /* userland messed the mutex */
 2035                         return (EPERM);
 2036                 }
 2037                 uq_me = curthread->td_umtxq;
 2038                 pi->pi_owner = NULL;
 2039                 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
 2040                 /* get highest priority thread which is still sleeping. */
 2041                 uq_first = TAILQ_FIRST(&pi->pi_blocked);
 2042                 while (uq_first != NULL && 
 2043                        (uq_first->uq_flags & UQF_UMTXQ) == 0) {
 2044                         uq_first = TAILQ_NEXT(uq_first, uq_lockq);
 2045                 }
 2046                 pri = PRI_MAX;
 2047                 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
 2048                         uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
 2049                         if (uq_first2 != NULL) {
 2050                                 if (pri > UPRI(uq_first2->uq_thread))
 2051                                         pri = UPRI(uq_first2->uq_thread);
 2052                         }
 2053                 }
 2054                 thread_lock(curthread);
 2055                 sched_lend_user_prio(curthread, pri);
 2056                 thread_unlock(curthread);
 2057                 mtx_unlock_spin(&umtx_lock);
 2058                 if (uq_first)
 2059                         umtxq_signal_thread(uq_first);
 2060         }
 2061         umtxq_unlock(&key);
 2062 
 2063         /*
 2064          * When unlocking the umtx, it must be marked as unowned if
 2065          * there is zero or one thread only waiting for it.
 2066          * Otherwise, it must be marked as contested.
 2067          */
 2068         old = casuword32(&m->m_owner, owner,
 2069                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 2070 
 2071         umtxq_lock(&key);
 2072         umtxq_unbusy(&key);
 2073         umtxq_unlock(&key);
 2074         umtx_key_release(&key);
 2075         if (old == -1)
 2076                 return (EFAULT);
 2077         if (old != owner)
 2078                 return (EINVAL);
 2079         return (0);
 2080 }
 2081 
 2082 /*
 2083  * Lock a PP mutex.
 2084  */
 2085 static int
 2086 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 2087         int try)
 2088 {
 2089         struct umtx_q *uq, *uq2;
 2090         struct umtx_pi *pi;
 2091         uint32_t ceiling;
 2092         uint32_t owner, id;
 2093         int error, pri, old_inherited_pri, su;
 2094 
 2095         id = td->td_tid;
 2096         uq = td->td_umtxq;
 2097         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2098             &uq->uq_key)) != 0)
 2099                 return (error);
 2100         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2101         for (;;) {
 2102                 old_inherited_pri = uq->uq_inherited_pri;
 2103                 umtxq_lock(&uq->uq_key);
 2104                 umtxq_busy(&uq->uq_key);
 2105                 umtxq_unlock(&uq->uq_key);
 2106 
 2107                 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
 2108                 if (ceiling > RTP_PRIO_MAX) {
 2109                         error = EINVAL;
 2110                         goto out;
 2111                 }
 2112 
 2113                 mtx_lock_spin(&umtx_lock);
 2114                 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
 2115                         mtx_unlock_spin(&umtx_lock);
 2116                         error = EINVAL;
 2117                         goto out;
 2118                 }
 2119                 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
 2120                         uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
 2121                         thread_lock(td);
 2122                         if (uq->uq_inherited_pri < UPRI(td))
 2123                                 sched_lend_user_prio(td, uq->uq_inherited_pri);
 2124                         thread_unlock(td);
 2125                 }
 2126                 mtx_unlock_spin(&umtx_lock);
 2127 
 2128                 owner = casuword32(&m->m_owner,
 2129                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 2130 
 2131                 if (owner == UMUTEX_CONTESTED) {
 2132                         error = 0;
 2133                         break;
 2134                 }
 2135 
 2136                 /* The address was invalid. */
 2137                 if (owner == -1) {
 2138                         error = EFAULT;
 2139                         break;
 2140                 }
 2141 
 2142                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 2143                     (owner & ~UMUTEX_CONTESTED) == id) {
 2144                         error = EDEADLK;
 2145                         break;
 2146                 }
 2147 
 2148                 if (try != 0) {
 2149                         error = EBUSY;
 2150                         break;
 2151                 }
 2152 
 2153                 /*
 2154                  * If we caught a signal, we have retried and now
 2155                  * exit immediately.
 2156                  */
 2157                 if (error != 0)
 2158                         break;
 2159 
 2160                 umtxq_lock(&uq->uq_key);
 2161                 umtxq_insert(uq);
 2162                 umtxq_unbusy(&uq->uq_key);
 2163                 error = umtxq_sleep(uq, "umtxpp", timo);
 2164                 umtxq_remove(uq);
 2165                 umtxq_unlock(&uq->uq_key);
 2166 
 2167                 mtx_lock_spin(&umtx_lock);
 2168                 uq->uq_inherited_pri = old_inherited_pri;
 2169                 pri = PRI_MAX;
 2170                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2171                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2172                         if (uq2 != NULL) {
 2173                                 if (pri > UPRI(uq2->uq_thread))
 2174                                         pri = UPRI(uq2->uq_thread);
 2175                         }
 2176                 }
 2177                 if (pri > uq->uq_inherited_pri)
 2178                         pri = uq->uq_inherited_pri;
 2179                 thread_lock(td);
 2180                 sched_lend_user_prio(td, pri);
 2181                 thread_unlock(td);
 2182                 mtx_unlock_spin(&umtx_lock);
 2183         }
 2184 
 2185         if (error != 0) {
 2186                 mtx_lock_spin(&umtx_lock);
 2187                 uq->uq_inherited_pri = old_inherited_pri;
 2188                 pri = PRI_MAX;
 2189                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2190                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2191                         if (uq2 != NULL) {
 2192                                 if (pri > UPRI(uq2->uq_thread))
 2193                                         pri = UPRI(uq2->uq_thread);
 2194                         }
 2195                 }
 2196                 if (pri > uq->uq_inherited_pri)
 2197                         pri = uq->uq_inherited_pri;
 2198                 thread_lock(td);
 2199                 sched_lend_user_prio(td, pri);
 2200                 thread_unlock(td);
 2201                 mtx_unlock_spin(&umtx_lock);
 2202         }
 2203 
 2204 out:
 2205         umtxq_lock(&uq->uq_key);
 2206         umtxq_unbusy(&uq->uq_key);
 2207         umtxq_unlock(&uq->uq_key);
 2208         umtx_key_release(&uq->uq_key);
 2209         return (error);
 2210 }
 2211 
 2212 /*
 2213  * Unlock a PP mutex.
 2214  */
 2215 static int
 2216 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
 2217 {
 2218         struct umtx_key key;
 2219         struct umtx_q *uq, *uq2;
 2220         struct umtx_pi *pi;
 2221         uint32_t owner, id;
 2222         uint32_t rceiling;
 2223         int error, pri, new_inherited_pri, su;
 2224 
 2225         id = td->td_tid;
 2226         uq = td->td_umtxq;
 2227         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2228 
 2229         /*
 2230          * Make sure we own this mtx.
 2231          */
 2232         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 2233         if (owner == -1)
 2234                 return (EFAULT);
 2235 
 2236         if ((owner & ~UMUTEX_CONTESTED) != id)
 2237                 return (EPERM);
 2238 
 2239         error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
 2240         if (error != 0)
 2241                 return (error);
 2242 
 2243         if (rceiling == -1)
 2244                 new_inherited_pri = PRI_MAX;
 2245         else {
 2246                 rceiling = RTP_PRIO_MAX - rceiling;
 2247                 if (rceiling > RTP_PRIO_MAX)
 2248                         return (EINVAL);
 2249                 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
 2250         }
 2251 
 2252         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2253             &key)) != 0)
 2254                 return (error);
 2255         umtxq_lock(&key);
 2256         umtxq_busy(&key);
 2257         umtxq_unlock(&key);
 2258         /*
 2259          * For priority protected mutex, always set unlocked state
 2260          * to UMUTEX_CONTESTED, so that userland always enters kernel
 2261          * to lock the mutex, it is necessary because thread priority
 2262          * has to be adjusted for such mutex.
 2263          */
 2264         error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 2265                 UMUTEX_CONTESTED);
 2266 
 2267         umtxq_lock(&key);
 2268         if (error == 0)
 2269                 umtxq_signal(&key, 1);
 2270         umtxq_unbusy(&key);
 2271         umtxq_unlock(&key);
 2272 
 2273         if (error == -1)
 2274                 error = EFAULT;
 2275         else {
 2276                 mtx_lock_spin(&umtx_lock);
 2277                 if (su != 0)
 2278                         uq->uq_inherited_pri = new_inherited_pri;
 2279                 pri = PRI_MAX;
 2280                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2281                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2282                         if (uq2 != NULL) {
 2283                                 if (pri > UPRI(uq2->uq_thread))
 2284                                         pri = UPRI(uq2->uq_thread);
 2285                         }
 2286                 }
 2287                 if (pri > uq->uq_inherited_pri)
 2288                         pri = uq->uq_inherited_pri;
 2289                 thread_lock(td);
 2290                 sched_lend_user_prio(td, pri);
 2291                 thread_unlock(td);
 2292                 mtx_unlock_spin(&umtx_lock);
 2293         }
 2294         umtx_key_release(&key);
 2295         return (error);
 2296 }
 2297 
 2298 static int
 2299 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
 2300         uint32_t *old_ceiling)
 2301 {
 2302         struct umtx_q *uq;
 2303         uint32_t save_ceiling;
 2304         uint32_t owner, id;
 2305         uint32_t flags;
 2306         int error;
 2307 
 2308         flags = fuword32(&m->m_flags);
 2309         if ((flags & UMUTEX_PRIO_PROTECT) == 0)
 2310                 return (EINVAL);
 2311         if (ceiling > RTP_PRIO_MAX)
 2312                 return (EINVAL);
 2313         id = td->td_tid;
 2314         uq = td->td_umtxq;
 2315         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2316            &uq->uq_key)) != 0)
 2317                 return (error);
 2318         for (;;) {
 2319                 umtxq_lock(&uq->uq_key);
 2320                 umtxq_busy(&uq->uq_key);
 2321                 umtxq_unlock(&uq->uq_key);
 2322 
 2323                 save_ceiling = fuword32(&m->m_ceilings[0]);
 2324 
 2325                 owner = casuword32(&m->m_owner,
 2326                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 2327 
 2328                 if (owner == UMUTEX_CONTESTED) {
 2329                         suword32(&m->m_ceilings[0], ceiling);
 2330                         suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 2331                                 UMUTEX_CONTESTED);
 2332                         error = 0;
 2333                         break;
 2334                 }
 2335 
 2336                 /* The address was invalid. */
 2337                 if (owner == -1) {
 2338                         error = EFAULT;
 2339                         break;
 2340                 }
 2341 
 2342                 if ((owner & ~UMUTEX_CONTESTED) == id) {
 2343                         suword32(&m->m_ceilings[0], ceiling);
 2344                         error = 0;
 2345                         break;
 2346                 }
 2347 
 2348                 /*
 2349                  * If we caught a signal, we have retried and now
 2350                  * exit immediately.
 2351                  */
 2352                 if (error != 0)
 2353                         break;
 2354 
 2355                 /*
 2356                  * We set the contested bit, sleep. Otherwise the lock changed
 2357                  * and we need to retry or we lost a race to the thread
 2358                  * unlocking the umtx.
 2359                  */
 2360                 umtxq_lock(&uq->uq_key);
 2361                 umtxq_insert(uq);
 2362                 umtxq_unbusy(&uq->uq_key);
 2363                 error = umtxq_sleep(uq, "umtxpp", 0);
 2364                 umtxq_remove(uq);
 2365                 umtxq_unlock(&uq->uq_key);
 2366         }
 2367         umtxq_lock(&uq->uq_key);
 2368         if (error == 0)
 2369                 umtxq_signal(&uq->uq_key, INT_MAX);
 2370         umtxq_unbusy(&uq->uq_key);
 2371         umtxq_unlock(&uq->uq_key);
 2372         umtx_key_release(&uq->uq_key);
 2373         if (error == 0 && old_ceiling != NULL)
 2374                 suword32(old_ceiling, save_ceiling);
 2375         return (error);
 2376 }
 2377 
 2378 static int
 2379 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
 2380         int mode)
 2381 {
 2382         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2383         case 0:
 2384                 return (_do_lock_normal(td, m, flags, timo, mode));
 2385         case UMUTEX_PRIO_INHERIT:
 2386                 return (_do_lock_pi(td, m, flags, timo, mode));
 2387         case UMUTEX_PRIO_PROTECT:
 2388                 return (_do_lock_pp(td, m, flags, timo, mode));
 2389         }
 2390         return (EINVAL);
 2391 }
 2392 
 2393 /*
 2394  * Lock a userland POSIX mutex.
 2395  */
 2396 static int
 2397 do_lock_umutex(struct thread *td, struct umutex *m,
 2398         struct timespec *timeout, int mode)
 2399 {
 2400         struct timespec ts, ts2, ts3;
 2401         struct timeval tv;
 2402         uint32_t flags;
 2403         int error;
 2404 
 2405         flags = fuword32(&m->m_flags);
 2406         if (flags == -1)
 2407                 return (EFAULT);
 2408 
 2409         if (timeout == NULL) {
 2410                 error = _do_lock_umutex(td, m, flags, 0, mode);
 2411                 /* Mutex locking is restarted if it is interrupted. */
 2412                 if (error == EINTR && mode != _UMUTEX_WAIT)
 2413                         error = ERESTART;
 2414         } else {
 2415                 getnanouptime(&ts);
 2416                 timespecadd(&ts, timeout);
 2417                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2418                 for (;;) {
 2419                         error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
 2420                         if (error != ETIMEDOUT)
 2421                                 break;
 2422                         getnanouptime(&ts2);
 2423                         if (timespeccmp(&ts2, &ts, >=)) {
 2424                                 error = ETIMEDOUT;
 2425                                 break;
 2426                         }
 2427                         ts3 = ts;
 2428                         timespecsub(&ts3, &ts2);
 2429                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 2430                 }
 2431                 /* Timed-locking is not restarted. */
 2432                 if (error == ERESTART)
 2433                         error = EINTR;
 2434         }
 2435         return (error);
 2436 }
 2437 
 2438 /*
 2439  * Unlock a userland POSIX mutex.
 2440  */
 2441 static int
 2442 do_unlock_umutex(struct thread *td, struct umutex *m)
 2443 {
 2444         uint32_t flags;
 2445 
 2446         flags = fuword32(&m->m_flags);
 2447         if (flags == -1)
 2448                 return (EFAULT);
 2449 
 2450         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2451         case 0:
 2452                 return (do_unlock_normal(td, m, flags));
 2453         case UMUTEX_PRIO_INHERIT:
 2454                 return (do_unlock_pi(td, m, flags));
 2455         case UMUTEX_PRIO_PROTECT:
 2456                 return (do_unlock_pp(td, m, flags));
 2457         }
 2458 
 2459         return (EINVAL);
 2460 }
 2461 
 2462 static int
 2463 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
 2464         struct timespec *timeout, u_long wflags)
 2465 {
 2466         struct umtx_q *uq;
 2467         struct timeval tv;
 2468         struct timespec cts, ets, tts;
 2469         uint32_t flags;
 2470         uint32_t clockid;
 2471         int error;
 2472 
 2473         uq = td->td_umtxq;
 2474         flags = fuword32(&cv->c_flags);
 2475         error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
 2476         if (error != 0)
 2477                 return (error);
 2478 
 2479         if ((wflags & CVWAIT_CLOCKID) != 0) {
 2480                 clockid = fuword32(&cv->c_clockid);
 2481                 if (clockid < CLOCK_REALTIME ||
 2482                     clockid >= CLOCK_THREAD_CPUTIME_ID) {
 2483                         /* hmm, only HW clock id will work. */
 2484                         return (EINVAL);
 2485                 }
 2486         } else {
 2487                 clockid = CLOCK_REALTIME;
 2488         }
 2489 
 2490         umtxq_lock(&uq->uq_key);
 2491         umtxq_busy(&uq->uq_key);
 2492         umtxq_insert(uq);
 2493         umtxq_unlock(&uq->uq_key);
 2494 
 2495         /*
 2496          * Set c_has_waiters to 1 before releasing user mutex, also
 2497          * don't modify cache line when unnecessary.
 2498          */
 2499         if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
 2500                 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
 2501 
 2502         umtxq_lock(&uq->uq_key);
 2503         umtxq_unbusy(&uq->uq_key);
 2504         umtxq_unlock(&uq->uq_key);
 2505 
 2506         error = do_unlock_umutex(td, m);
 2507         
 2508         umtxq_lock(&uq->uq_key);
 2509         if (error == 0) {
 2510                 if (timeout == NULL) {
 2511                         error = umtxq_sleep(uq, "ucond", 0);
 2512                 } else {
 2513                         if ((wflags & CVWAIT_ABSTIME) == 0) {
 2514                                 kern_clock_gettime(td, clockid, &ets);
 2515                                 timespecadd(&ets, timeout);
 2516                                 tts = *timeout;
 2517                         } else { /* absolute time */
 2518                                 ets = *timeout;
 2519                                 tts = *timeout;
 2520                                 kern_clock_gettime(td, clockid, &cts);
 2521                                 timespecsub(&tts, &cts);
 2522                         }
 2523                         TIMESPEC_TO_TIMEVAL(&tv, &tts);
 2524                         for (;;) {
 2525                                 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
 2526                                 if (error != ETIMEDOUT)
 2527                                         break;
 2528                                 kern_clock_gettime(td, clockid, &cts);
 2529                                 if (timespeccmp(&cts, &ets, >=)) {
 2530                                         error = ETIMEDOUT;
 2531                                         break;
 2532                                 }
 2533                                 tts = ets;
 2534                                 timespecsub(&tts, &cts);
 2535                                 TIMESPEC_TO_TIMEVAL(&tv, &tts);
 2536                         }
 2537                 }
 2538         }
 2539 
 2540         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 2541                 error = 0;
 2542         else {
 2543                 /*
 2544                  * This must be timeout,interrupted by signal or
 2545                  * surprious wakeup, clear c_has_waiter flag when
 2546                  * necessary.
 2547                  */
 2548                 umtxq_busy(&uq->uq_key);
 2549                 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
 2550                         int oldlen = uq->uq_cur_queue->length;
 2551                         umtxq_remove(uq);
 2552                         if (oldlen == 1) {
 2553                                 umtxq_unlock(&uq->uq_key);
 2554                                 suword32(
 2555                                     __DEVOLATILE(uint32_t *,
 2556                                          &cv->c_has_waiters), 0);
 2557                                 umtxq_lock(&uq->uq_key);
 2558                         }
 2559                 }
 2560                 umtxq_unbusy(&uq->uq_key);
 2561                 if (error == ERESTART)
 2562                         error = EINTR;
 2563         }
 2564 
 2565         umtxq_unlock(&uq->uq_key);
 2566         umtx_key_release(&uq->uq_key);
 2567         return (error);
 2568 }
 2569 
 2570 /*
 2571  * Signal a userland condition variable.
 2572  */
 2573 static int
 2574 do_cv_signal(struct thread *td, struct ucond *cv)
 2575 {
 2576         struct umtx_key key;
 2577         int error, cnt, nwake;
 2578         uint32_t flags;
 2579 
 2580         flags = fuword32(&cv->c_flags);
 2581         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2582                 return (error); 
 2583         umtxq_lock(&key);
 2584         umtxq_busy(&key);
 2585         cnt = umtxq_count(&key);
 2586         nwake = umtxq_signal(&key, 1);
 2587         if (cnt <= nwake) {
 2588                 umtxq_unlock(&key);
 2589                 error = suword32(
 2590                     __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 2591                 umtxq_lock(&key);
 2592         }
 2593         umtxq_unbusy(&key);
 2594         umtxq_unlock(&key);
 2595         umtx_key_release(&key);
 2596         return (error);
 2597 }
 2598 
 2599 static int
 2600 do_cv_broadcast(struct thread *td, struct ucond *cv)
 2601 {
 2602         struct umtx_key key;
 2603         int error;
 2604         uint32_t flags;
 2605 
 2606         flags = fuword32(&cv->c_flags);
 2607         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2608                 return (error); 
 2609 
 2610         umtxq_lock(&key);
 2611         umtxq_busy(&key);
 2612         umtxq_signal(&key, INT_MAX);
 2613         umtxq_unlock(&key);
 2614 
 2615         error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 2616 
 2617         umtxq_lock(&key);
 2618         umtxq_unbusy(&key);
 2619         umtxq_unlock(&key);
 2620 
 2621         umtx_key_release(&key);
 2622         return (error);
 2623 }
 2624 
 2625 static int
 2626 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
 2627 {
 2628         struct umtx_q *uq;
 2629         uint32_t flags, wrflags;
 2630         int32_t state, oldstate;
 2631         int32_t blocked_readers;
 2632         int error;
 2633 
 2634         uq = td->td_umtxq;
 2635         flags = fuword32(&rwlock->rw_flags);
 2636         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2637         if (error != 0)
 2638                 return (error);
 2639 
 2640         wrflags = URWLOCK_WRITE_OWNER;
 2641         if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
 2642                 wrflags |= URWLOCK_WRITE_WAITERS;
 2643 
 2644         for (;;) {
 2645                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2646                 /* try to lock it */
 2647                 while (!(state & wrflags)) {
 2648                         if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
 2649                                 umtx_key_release(&uq->uq_key);
 2650                                 return (EAGAIN);
 2651                         }
 2652                         oldstate = casuword32(&rwlock->rw_state, state, state + 1);
 2653                         if (oldstate == -1) {
 2654                                 umtx_key_release(&uq->uq_key);
 2655                                 return (EFAULT);
 2656                         }
 2657                         if (oldstate == state) {
 2658                                 umtx_key_release(&uq->uq_key);
 2659                                 return (0);
 2660                         }
 2661                         error = umtxq_check_susp(td);
 2662                         if (error != 0)
 2663                                 break;
 2664                         state = oldstate;
 2665                 }
 2666 
 2667                 if (error)
 2668                         break;
 2669 
 2670                 /* grab monitor lock */
 2671                 umtxq_lock(&uq->uq_key);
 2672                 umtxq_busy(&uq->uq_key);
 2673                 umtxq_unlock(&uq->uq_key);
 2674 
 2675                 /*
 2676                  * re-read the state, in case it changed between the try-lock above
 2677                  * and the check below
 2678                  */
 2679                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2680 
 2681                 /* set read contention bit */
 2682                 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
 2683                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
 2684                         if (oldstate == -1) {
 2685                                 error = EFAULT;
 2686                                 break;
 2687                         }
 2688                         if (oldstate == state)
 2689                                 goto sleep;
 2690                         state = oldstate;
 2691                         error = umtxq_check_susp(td);
 2692                         if (error != 0)
 2693                                 break;
 2694                 }
 2695                 if (error != 0) {
 2696                         umtxq_lock(&uq->uq_key);
 2697                         umtxq_unbusy(&uq->uq_key);
 2698                         umtxq_unlock(&uq->uq_key);
 2699                         break;
 2700                 }
 2701 
 2702                 /* state is changed while setting flags, restart */
 2703                 if (!(state & wrflags)) {
 2704                         umtxq_lock(&uq->uq_key);
 2705                         umtxq_unbusy(&uq->uq_key);
 2706                         umtxq_unlock(&uq->uq_key);
 2707                         error = umtxq_check_susp(td);
 2708                         if (error != 0)
 2709                                 break;
 2710                         continue;
 2711                 }
 2712 
 2713 sleep:
 2714                 /* contention bit is set, before sleeping, increase read waiter count */
 2715                 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2716                 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
 2717 
 2718                 while (state & wrflags) {
 2719                         umtxq_lock(&uq->uq_key);
 2720                         umtxq_insert(uq);
 2721                         umtxq_unbusy(&uq->uq_key);
 2722 
 2723                         error = umtxq_sleep(uq, "urdlck", timo);
 2724 
 2725                         umtxq_busy(&uq->uq_key);
 2726                         umtxq_remove(uq);
 2727                         umtxq_unlock(&uq->uq_key);
 2728                         if (error)
 2729                                 break;
 2730                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2731                 }
 2732 
 2733                 /* decrease read waiter count, and may clear read contention bit */
 2734                 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2735                 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
 2736                 if (blocked_readers == 1) {
 2737                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2738                         for (;;) {
 2739                                 oldstate = casuword32(&rwlock->rw_state, state,
 2740                                          state & ~URWLOCK_READ_WAITERS);
 2741                                 if (oldstate == -1) {
 2742                                         error = EFAULT;
 2743                                         break;
 2744                                 }
 2745                                 if (oldstate == state)
 2746                                         break;
 2747                                 state = oldstate;
 2748                                 error = umtxq_check_susp(td);
 2749                                 if (error != 0)
 2750                                         break;
 2751                         }
 2752                 }
 2753 
 2754                 umtxq_lock(&uq->uq_key);
 2755                 umtxq_unbusy(&uq->uq_key);
 2756                 umtxq_unlock(&uq->uq_key);
 2757                 if (error != 0)
 2758                         break;
 2759         }
 2760         umtx_key_release(&uq->uq_key);
 2761         return (error);
 2762 }
 2763 
 2764 static int
 2765 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
 2766 {
 2767         struct timespec ts, ts2, ts3;
 2768         struct timeval tv;
 2769         int error;
 2770 
 2771         getnanouptime(&ts);
 2772         timespecadd(&ts, timeout);
 2773         TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2774         for (;;) {
 2775                 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
 2776                 if (error != ETIMEDOUT)
 2777                         break;
 2778                 getnanouptime(&ts2);
 2779                 if (timespeccmp(&ts2, &ts, >=)) {
 2780                         error = ETIMEDOUT;
 2781                         break;
 2782                 }
 2783                 ts3 = ts;
 2784                 timespecsub(&ts3, &ts2);
 2785                 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 2786         }
 2787         if (error == ERESTART)
 2788                 error = EINTR;
 2789         return (error);
 2790 }
 2791 
 2792 static int
 2793 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
 2794 {
 2795         struct umtx_q *uq;
 2796         uint32_t flags;
 2797         int32_t state, oldstate;
 2798         int32_t blocked_writers;
 2799         int32_t blocked_readers;
 2800         int error;
 2801 
 2802         uq = td->td_umtxq;
 2803         flags = fuword32(&rwlock->rw_flags);
 2804         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2805         if (error != 0)
 2806                 return (error);
 2807 
 2808         blocked_readers = 0;
 2809         for (;;) {
 2810                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2811                 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 2812                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
 2813                         if (oldstate == -1) {
 2814                                 umtx_key_release(&uq->uq_key);
 2815                                 return (EFAULT);
 2816                         }
 2817                         if (oldstate == state) {
 2818                                 umtx_key_release(&uq->uq_key);
 2819                                 return (0);
 2820                         }
 2821                         state = oldstate;
 2822                         error = umtxq_check_susp(td);
 2823                         if (error != 0)
 2824                                 break;
 2825                 }
 2826 
 2827                 if (error) {
 2828                         if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
 2829                             blocked_readers != 0) {
 2830                                 umtxq_lock(&uq->uq_key);
 2831                                 umtxq_busy(&uq->uq_key);
 2832                                 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
 2833                                 umtxq_unbusy(&uq->uq_key);
 2834                                 umtxq_unlock(&uq->uq_key);
 2835                         }
 2836 
 2837                         break;
 2838                 }
 2839 
 2840                 /* grab monitor lock */
 2841                 umtxq_lock(&uq->uq_key);
 2842                 umtxq_busy(&uq->uq_key);
 2843                 umtxq_unlock(&uq->uq_key);
 2844 
 2845                 /*
 2846                  * re-read the state, in case it changed between the try-lock above
 2847                  * and the check below
 2848                  */
 2849                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2850 
 2851                 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
 2852                        (state & URWLOCK_WRITE_WAITERS) == 0) {
 2853                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
 2854                         if (oldstate == -1) {
 2855                                 error = EFAULT;
 2856                                 break;
 2857                         }
 2858                         if (oldstate == state)
 2859                                 goto sleep;
 2860                         state = oldstate;
 2861                         error = umtxq_check_susp(td);
 2862                         if (error != 0)
 2863                                 break;
 2864                 }
 2865                 if (error != 0) {
 2866                         umtxq_lock(&uq->uq_key);
 2867                         umtxq_unbusy(&uq->uq_key);
 2868                         umtxq_unlock(&uq->uq_key);
 2869                         break;
 2870                 }
 2871 
 2872                 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 2873                         umtxq_lock(&uq->uq_key);
 2874                         umtxq_unbusy(&uq->uq_key);
 2875                         umtxq_unlock(&uq->uq_key);
 2876                         error = umtxq_check_susp(td);
 2877                         if (error != 0)
 2878                                 break;
 2879                         continue;
 2880                 }
 2881 sleep:
 2882                 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 2883                 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
 2884 
 2885                 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
 2886                         umtxq_lock(&uq->uq_key);
 2887                         umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 2888                         umtxq_unbusy(&uq->uq_key);
 2889 
 2890                         error = umtxq_sleep(uq, "uwrlck", timo);
 2891 
 2892                         umtxq_busy(&uq->uq_key);
 2893                         umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 2894                         umtxq_unlock(&uq->uq_key);
 2895                         if (error)
 2896                                 break;
 2897                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2898                 }
 2899 
 2900                 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 2901                 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
 2902                 if (blocked_writers == 1) {
 2903                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2904                         for (;;) {
 2905                                 oldstate = casuword32(&rwlock->rw_state, state,
 2906                                          state & ~URWLOCK_WRITE_WAITERS);
 2907                                 if (oldstate == -1) {
 2908                                         error = EFAULT;
 2909                                         break;
 2910                                 }
 2911                                 if (oldstate == state)
 2912                                         break;
 2913                                 state = oldstate;
 2914                                 error = umtxq_check_susp(td);
 2915                                 /*
 2916                                  * We are leaving the URWLOCK_WRITE_WAITERS
 2917                                  * behind, but this should not harm the
 2918                                  * correctness.
 2919                                  */
 2920                                 if (error != 0)
 2921                                         break;
 2922                         }
 2923                         blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2924                 } else
 2925                         blocked_readers = 0;
 2926 
 2927                 umtxq_lock(&uq->uq_key);
 2928                 umtxq_unbusy(&uq->uq_key);
 2929                 umtxq_unlock(&uq->uq_key);
 2930         }
 2931 
 2932         umtx_key_release(&uq->uq_key);
 2933         return (error);
 2934 }
 2935 
 2936 static int
 2937 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
 2938 {
 2939         struct timespec ts, ts2, ts3;
 2940         struct timeval tv;
 2941         int error;
 2942 
 2943         getnanouptime(&ts);
 2944         timespecadd(&ts, timeout);
 2945         TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2946         for (;;) {
 2947                 error = do_rw_wrlock(td, obj, tvtohz(&tv));
 2948                 if (error != ETIMEDOUT)
 2949                         break;
 2950                 getnanouptime(&ts2);
 2951                 if (timespeccmp(&ts2, &ts, >=)) {
 2952                         error = ETIMEDOUT;
 2953                         break;
 2954                 }
 2955                 ts3 = ts;
 2956                 timespecsub(&ts3, &ts2);
 2957                 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 2958         }
 2959         if (error == ERESTART)
 2960                 error = EINTR;
 2961         return (error);
 2962 }
 2963 
 2964 static int
 2965 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
 2966 {
 2967         struct umtx_q *uq;
 2968         uint32_t flags;
 2969         int32_t state, oldstate;
 2970         int error, q, count;
 2971 
 2972         uq = td->td_umtxq;
 2973         flags = fuword32(&rwlock->rw_flags);
 2974         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2975         if (error != 0)
 2976                 return (error);
 2977 
 2978         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2979         if (state & URWLOCK_WRITE_OWNER) {
 2980                 for (;;) {
 2981                         oldstate = casuword32(&rwlock->rw_state, state, 
 2982                                 state & ~URWLOCK_WRITE_OWNER);
 2983                         if (oldstate == -1) {
 2984                                 error = EFAULT;
 2985                                 goto out;
 2986                         }
 2987                         if (oldstate != state) {
 2988                                 state = oldstate;
 2989                                 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
 2990                                         error = EPERM;
 2991                                         goto out;
 2992                                 }
 2993                                 error = umtxq_check_susp(td);
 2994                                 if (error != 0)
 2995                                         goto out;
 2996                         } else
 2997                                 break;
 2998                 }
 2999         } else if (URWLOCK_READER_COUNT(state) != 0) {
 3000                 for (;;) {
 3001                         oldstate = casuword32(&rwlock->rw_state, state,
 3002                                 state - 1);
 3003                         if (oldstate == -1) {
 3004                                 error = EFAULT;
 3005                                 goto out;
 3006                         }
 3007                         if (oldstate != state) {
 3008                                 state = oldstate;
 3009                                 if (URWLOCK_READER_COUNT(oldstate) == 0) {
 3010                                         error = EPERM;
 3011                                         goto out;
 3012                                 }
 3013                                 error = umtxq_check_susp(td);
 3014                                 if (error != 0)
 3015                                         goto out;
 3016                         } else
 3017                                 break;
 3018                 }
 3019         } else {
 3020                 error = EPERM;
 3021                 goto out;
 3022         }
 3023 
 3024         count = 0;
 3025 
 3026         if (!(flags & URWLOCK_PREFER_READER)) {
 3027                 if (state & URWLOCK_WRITE_WAITERS) {
 3028                         count = 1;
 3029                         q = UMTX_EXCLUSIVE_QUEUE;
 3030                 } else if (state & URWLOCK_READ_WAITERS) {
 3031                         count = INT_MAX;
 3032                         q = UMTX_SHARED_QUEUE;
 3033                 }
 3034         } else {
 3035                 if (state & URWLOCK_READ_WAITERS) {
 3036                         count = INT_MAX;
 3037                         q = UMTX_SHARED_QUEUE;
 3038                 } else if (state & URWLOCK_WRITE_WAITERS) {
 3039                         count = 1;
 3040                         q = UMTX_EXCLUSIVE_QUEUE;
 3041                 }
 3042         }
 3043 
 3044         if (count) {
 3045                 umtxq_lock(&uq->uq_key);
 3046                 umtxq_busy(&uq->uq_key);
 3047                 umtxq_signal_queue(&uq->uq_key, count, q);
 3048                 umtxq_unbusy(&uq->uq_key);
 3049                 umtxq_unlock(&uq->uq_key);
 3050         }
 3051 out:
 3052         umtx_key_release(&uq->uq_key);
 3053         return (error);
 3054 }
 3055 
 3056 static int
 3057 do_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout)
 3058 {
 3059         struct umtx_q *uq;
 3060         struct timeval tv;
 3061         struct timespec cts, ets, tts;
 3062         uint32_t flags, count;
 3063         int error;
 3064 
 3065         uq = td->td_umtxq;
 3066         flags = fuword32(&sem->_flags);
 3067         error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
 3068         if (error != 0)
 3069                 return (error);
 3070         umtxq_lock(&uq->uq_key);
 3071         umtxq_busy(&uq->uq_key);
 3072         umtxq_insert(uq);
 3073         umtxq_unlock(&uq->uq_key);
 3074 
 3075         if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0)
 3076                 casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
 3077 
 3078         count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
 3079         if (count != 0) {
 3080                 umtxq_lock(&uq->uq_key);
 3081                 umtxq_unbusy(&uq->uq_key);
 3082                 umtxq_remove(uq);
 3083                 umtxq_unlock(&uq->uq_key);
 3084                 umtx_key_release(&uq->uq_key);
 3085                 return (0);
 3086         }
 3087 
 3088         umtxq_lock(&uq->uq_key);
 3089         umtxq_unbusy(&uq->uq_key);
 3090         umtxq_unlock(&uq->uq_key);
 3091 
 3092         umtxq_lock(&uq->uq_key);
 3093         if (timeout == NULL) {
 3094                 error = umtxq_sleep(uq, "usem", 0);
 3095         } else {
 3096                 getnanouptime(&ets);
 3097                 timespecadd(&ets, timeout);
 3098                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
 3099                 for (;;) {
 3100                         error = umtxq_sleep(uq, "usem", tvtohz(&tv));
 3101                         if (error != ETIMEDOUT)
 3102                                 break;
 3103                         getnanouptime(&cts);
 3104                         if (timespeccmp(&cts, &ets, >=)) {
 3105                                 error = ETIMEDOUT;
 3106                                 break;
 3107                         }
 3108                         tts = ets;
 3109                         timespecsub(&tts, &cts);
 3110                         TIMESPEC_TO_TIMEVAL(&tv, &tts);
 3111                 }
 3112         }
 3113 
 3114         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 3115                 error = 0;
 3116         else {
 3117                 umtxq_remove(uq);
 3118                 /* A relative timeout cannot be restarted. */
 3119                 if (error == ERESTART && timeout != NULL)
 3120                         error = EINTR;
 3121         }
 3122         umtxq_unlock(&uq->uq_key);
 3123         umtx_key_release(&uq->uq_key);
 3124         return (error);
 3125 }
 3126 
 3127 /*
 3128  * Signal a userland condition variable.
 3129  */
 3130 static int
 3131 do_sem_wake(struct thread *td, struct _usem *sem)
 3132 {
 3133         struct umtx_key key;
 3134         int error, cnt, nwake;
 3135         uint32_t flags;
 3136 
 3137         flags = fuword32(&sem->_flags);
 3138         if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
 3139                 return (error); 
 3140         umtxq_lock(&key);
 3141         umtxq_busy(&key);
 3142         cnt = umtxq_count(&key);
 3143         nwake = umtxq_signal(&key, 1);
 3144         if (cnt <= nwake) {
 3145                 umtxq_unlock(&key);
 3146                 error = suword32(
 3147                     __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
 3148                 umtxq_lock(&key);
 3149         }
 3150         umtxq_unbusy(&key);
 3151         umtxq_unlock(&key);
 3152         umtx_key_release(&key);
 3153         return (error);
 3154 }
 3155 
 3156 int
 3157 sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
 3158     /* struct umtx *umtx */
 3159 {
 3160         return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
 3161 }
 3162 
 3163 int
 3164 sys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
 3165     /* struct umtx *umtx */
 3166 {
 3167         return do_unlock_umtx(td, uap->umtx, td->td_tid);
 3168 }
 3169 
 3170 inline int
 3171 umtx_copyin_timeout(const void *addr, struct timespec *tsp)
 3172 {
 3173         int error;
 3174 
 3175         error = copyin(addr, tsp, sizeof(struct timespec));
 3176         if (error == 0) {
 3177                 if (tsp->tv_sec < 0 ||
 3178                     tsp->tv_nsec >= 1000000000 ||
 3179                     tsp->tv_nsec < 0)
 3180                         error = EINVAL;
 3181         }
 3182         return (error);
 3183 }
 3184 
 3185 static int
 3186 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
 3187 {
 3188         struct timespec *ts, timeout;
 3189         int error;
 3190 
 3191         /* Allow a null timespec (wait forever). */
 3192         if (uap->uaddr2 == NULL)
 3193                 ts = NULL;
 3194         else {
 3195                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3196                 if (error != 0)
 3197                         return (error);
 3198                 ts = &timeout;
 3199         }
 3200         return (do_lock_umtx(td, uap->obj, uap->val, ts));
 3201 }
 3202 
 3203 static int
 3204 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
 3205 {
 3206         return (do_unlock_umtx(td, uap->obj, uap->val));
 3207 }
 3208 
 3209 static int
 3210 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
 3211 {
 3212         struct timespec *ts, timeout;
 3213         int error;
 3214 
 3215         if (uap->uaddr2 == NULL)
 3216                 ts = NULL;
 3217         else {
 3218                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3219                 if (error != 0)
 3220                         return (error);
 3221                 ts = &timeout;
 3222         }
 3223         return do_wait(td, uap->obj, uap->val, ts, 0, 0);
 3224 }
 3225 
 3226 static int
 3227 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
 3228 {
 3229         struct timespec *ts, timeout;
 3230         int error;
 3231 
 3232         if (uap->uaddr2 == NULL)
 3233                 ts = NULL;
 3234         else {
 3235                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3236                 if (error != 0)
 3237                         return (error);
 3238                 ts = &timeout;
 3239         }
 3240         return do_wait(td, uap->obj, uap->val, ts, 1, 0);
 3241 }
 3242 
 3243 static int
 3244 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
 3245 {
 3246         struct timespec *ts, timeout;
 3247         int error;
 3248 
 3249         if (uap->uaddr2 == NULL)
 3250                 ts = NULL;
 3251         else {
 3252                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3253                 if (error != 0)
 3254                         return (error);
 3255                 ts = &timeout;
 3256         }
 3257         return do_wait(td, uap->obj, uap->val, ts, 1, 1);
 3258 }
 3259 
 3260 static int
 3261 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
 3262 {
 3263         return (kern_umtx_wake(td, uap->obj, uap->val, 0));
 3264 }
 3265 
 3266 #define BATCH_SIZE      128
 3267 static int
 3268 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
 3269 {
 3270         int count = uap->val;
 3271         void *uaddrs[BATCH_SIZE];
 3272         char **upp = (char **)uap->obj;
 3273         int tocopy;
 3274         int error = 0;
 3275         int i, pos = 0;
 3276 
 3277         while (count > 0) {
 3278                 tocopy = count;
 3279                 if (tocopy > BATCH_SIZE)
 3280                         tocopy = BATCH_SIZE;
 3281                 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
 3282                 if (error != 0)
 3283                         break;
 3284                 for (i = 0; i < tocopy; ++i)
 3285                         kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
 3286                 count -= tocopy;
 3287                 pos += tocopy;
 3288         }
 3289         return (error);
 3290 }
 3291 
 3292 static int
 3293 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
 3294 {
 3295         return (kern_umtx_wake(td, uap->obj, uap->val, 1));
 3296 }
 3297 
 3298 static int
 3299 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
 3300 {
 3301         struct timespec *ts, timeout;
 3302         int error;
 3303 
 3304         /* Allow a null timespec (wait forever). */
 3305         if (uap->uaddr2 == NULL)
 3306                 ts = NULL;
 3307         else {
 3308                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3309                 if (error != 0)
 3310                         return (error);
 3311                 ts = &timeout;
 3312         }
 3313         return do_lock_umutex(td, uap->obj, ts, 0);
 3314 }
 3315 
 3316 static int
 3317 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
 3318 {
 3319         return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
 3320 }
 3321 
 3322 static int
 3323 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
 3324 {
 3325         struct timespec *ts, timeout;
 3326         int error;
 3327 
 3328         /* Allow a null timespec (wait forever). */
 3329         if (uap->uaddr2 == NULL)
 3330                 ts = NULL;
 3331         else {
 3332                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3333                 if (error != 0)
 3334                         return (error);
 3335                 ts = &timeout;
 3336         }
 3337         return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
 3338 }
 3339 
 3340 static int
 3341 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
 3342 {
 3343         return do_wake_umutex(td, uap->obj);
 3344 }
 3345 
 3346 static int
 3347 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
 3348 {
 3349         return do_unlock_umutex(td, uap->obj);
 3350 }
 3351 
 3352 static int
 3353 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
 3354 {
 3355         return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
 3356 }
 3357 
 3358 static int
 3359 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
 3360 {
 3361         struct timespec *ts, timeout;
 3362         int error;
 3363 
 3364         /* Allow a null timespec (wait forever). */
 3365         if (uap->uaddr2 == NULL)
 3366                 ts = NULL;
 3367         else {
 3368                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3369                 if (error != 0)
 3370                         return (error);
 3371                 ts = &timeout;
 3372         }
 3373         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 3374 }
 3375 
 3376 static int
 3377 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
 3378 {
 3379         return do_cv_signal(td, uap->obj);
 3380 }
 3381 
 3382 static int
 3383 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
 3384 {
 3385         return do_cv_broadcast(td, uap->obj);
 3386 }
 3387 
 3388 static int
 3389 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
 3390 {
 3391         struct timespec timeout;
 3392         int error;
 3393 
 3394         /* Allow a null timespec (wait forever). */
 3395         if (uap->uaddr2 == NULL) {
 3396                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 3397         } else {
 3398                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3399                 if (error != 0)
 3400                         return (error);
 3401                 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
 3402         }
 3403         return (error);
 3404 }
 3405 
 3406 static int
 3407 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
 3408 {
 3409         struct timespec timeout;
 3410         int error;
 3411 
 3412         /* Allow a null timespec (wait forever). */
 3413         if (uap->uaddr2 == NULL) {
 3414                 error = do_rw_wrlock(td, uap->obj, 0);
 3415         } else {
 3416                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3417                 if (error != 0)
 3418                         return (error);
 3419 
 3420                 error = do_rw_wrlock2(td, uap->obj, &timeout);
 3421         }
 3422         return (error);
 3423 }
 3424 
 3425 static int
 3426 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
 3427 {
 3428         return do_rw_unlock(td, uap->obj);
 3429 }
 3430 
 3431 static int
 3432 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
 3433 {
 3434         struct timespec *ts, timeout;
 3435         int error;
 3436 
 3437         /* Allow a null timespec (wait forever). */
 3438         if (uap->uaddr2 == NULL)
 3439                 ts = NULL;
 3440         else {
 3441                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3442                 if (error != 0)
 3443                         return (error);
 3444                 ts = &timeout;
 3445         }
 3446         return (do_sem_wait(td, uap->obj, ts));
 3447 }
 3448 
 3449 static int
 3450 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
 3451 {
 3452         return do_sem_wake(td, uap->obj);
 3453 }
 3454 
 3455 static int
 3456 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
 3457 {
 3458         return do_wake2_umutex(td, uap->obj, uap->val);
 3459 }
 3460 
 3461 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
 3462 
 3463 static _umtx_op_func op_table[] = {
 3464         __umtx_op_lock_umtx,            /* UMTX_OP_LOCK */
 3465         __umtx_op_unlock_umtx,          /* UMTX_OP_UNLOCK */
 3466         __umtx_op_wait,                 /* UMTX_OP_WAIT */
 3467         __umtx_op_wake,                 /* UMTX_OP_WAKE */
 3468         __umtx_op_trylock_umutex,       /* UMTX_OP_MUTEX_TRYLOCK */
 3469         __umtx_op_lock_umutex,          /* UMTX_OP_MUTEX_LOCK */
 3470         __umtx_op_unlock_umutex,        /* UMTX_OP_MUTEX_UNLOCK */
 3471         __umtx_op_set_ceiling,          /* UMTX_OP_SET_CEILING */
 3472         __umtx_op_cv_wait,              /* UMTX_OP_CV_WAIT*/
 3473         __umtx_op_cv_signal,            /* UMTX_OP_CV_SIGNAL */
 3474         __umtx_op_cv_broadcast,         /* UMTX_OP_CV_BROADCAST */
 3475         __umtx_op_wait_uint,            /* UMTX_OP_WAIT_UINT */
 3476         __umtx_op_rw_rdlock,            /* UMTX_OP_RW_RDLOCK */
 3477         __umtx_op_rw_wrlock,            /* UMTX_OP_RW_WRLOCK */
 3478         __umtx_op_rw_unlock,            /* UMTX_OP_RW_UNLOCK */
 3479         __umtx_op_wait_uint_private,    /* UMTX_OP_WAIT_UINT_PRIVATE */
 3480         __umtx_op_wake_private,         /* UMTX_OP_WAKE_PRIVATE */
 3481         __umtx_op_wait_umutex,          /* UMTX_OP_UMUTEX_WAIT */
 3482         __umtx_op_wake_umutex,          /* UMTX_OP_UMUTEX_WAKE */
 3483         __umtx_op_sem_wait,             /* UMTX_OP_SEM_WAIT */
 3484         __umtx_op_sem_wake,             /* UMTX_OP_SEM_WAKE */
 3485         __umtx_op_nwake_private,        /* UMTX_OP_NWAKE_PRIVATE */
 3486         __umtx_op_wake2_umutex          /* UMTX_OP_UMUTEX_WAKE2 */
 3487 };
 3488 
 3489 int
 3490 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
 3491 {
 3492         if ((unsigned)uap->op < UMTX_OP_MAX)
 3493                 return (*op_table[uap->op])(td, uap);
 3494         return (EINVAL);
 3495 }
 3496 
 3497 #ifdef COMPAT_FREEBSD32
 3498 int
 3499 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
 3500     /* struct umtx *umtx */
 3501 {
 3502         return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
 3503 }
 3504 
 3505 int
 3506 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
 3507     /* struct umtx *umtx */
 3508 {
 3509         return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
 3510 }
 3511 
 3512 struct timespec32 {
 3513         int32_t tv_sec;
 3514         int32_t tv_nsec;
 3515 };
 3516 
 3517 static inline int
 3518 umtx_copyin_timeout32(void *addr, struct timespec *tsp)
 3519 {
 3520         struct timespec32 ts32;
 3521         int error;
 3522 
 3523         error = copyin(addr, &ts32, sizeof(struct timespec32));
 3524         if (error == 0) {
 3525                 if (ts32.tv_sec < 0 ||
 3526                     ts32.tv_nsec >= 1000000000 ||
 3527                     ts32.tv_nsec < 0)
 3528                         error = EINVAL;
 3529                 else {
 3530                         tsp->tv_sec = ts32.tv_sec;
 3531                         tsp->tv_nsec = ts32.tv_nsec;
 3532                 }
 3533         }
 3534         return (error);
 3535 }
 3536 
 3537 static int
 3538 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 3539 {
 3540         struct timespec *ts, timeout;
 3541         int error;
 3542 
 3543         /* Allow a null timespec (wait forever). */
 3544         if (uap->uaddr2 == NULL)
 3545                 ts = NULL;
 3546         else {
 3547                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3548                 if (error != 0)
 3549                         return (error);
 3550                 ts = &timeout;
 3551         }
 3552         return (do_lock_umtx32(td, uap->obj, uap->val, ts));
 3553 }
 3554 
 3555 static int
 3556 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 3557 {
 3558         return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
 3559 }
 3560 
 3561 static int
 3562 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3563 {
 3564         struct timespec *ts, timeout;
 3565         int error;
 3566 
 3567         if (uap->uaddr2 == NULL)
 3568                 ts = NULL;
 3569         else {
 3570                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3571                 if (error != 0)
 3572                         return (error);
 3573                 ts = &timeout;
 3574         }
 3575         return do_wait(td, uap->obj, uap->val, ts, 1, 0);
 3576 }
 3577 
 3578 static int
 3579 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 3580 {
 3581         struct timespec *ts, timeout;
 3582         int error;
 3583 
 3584         /* Allow a null timespec (wait forever). */
 3585         if (uap->uaddr2 == NULL)
 3586                 ts = NULL;
 3587         else {
 3588                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3589                 if (error != 0)
 3590                         return (error);
 3591                 ts = &timeout;
 3592         }
 3593         return do_lock_umutex(td, uap->obj, ts, 0);
 3594 }
 3595 
 3596 static int
 3597 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 3598 {
 3599         struct timespec *ts, timeout;
 3600         int error;
 3601 
 3602         /* Allow a null timespec (wait forever). */
 3603         if (uap->uaddr2 == NULL)
 3604                 ts = NULL;
 3605         else {
 3606                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3607                 if (error != 0)
 3608                         return (error);
 3609                 ts = &timeout;
 3610         }
 3611         return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
 3612 }
 3613 
 3614 static int
 3615 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3616 {
 3617         struct timespec *ts, timeout;
 3618         int error;
 3619 
 3620         /* Allow a null timespec (wait forever). */
 3621         if (uap->uaddr2 == NULL)
 3622                 ts = NULL;
 3623         else {
 3624                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3625                 if (error != 0)
 3626                         return (error);
 3627                 ts = &timeout;
 3628         }
 3629         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 3630 }
 3631 
 3632 static int
 3633 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 3634 {
 3635         struct timespec timeout;
 3636         int error;
 3637 
 3638         /* Allow a null timespec (wait forever). */
 3639         if (uap->uaddr2 == NULL) {
 3640                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 3641         } else {
 3642                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3643                 if (error != 0)
 3644                         return (error);
 3645                 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
 3646         }
 3647         return (error);
 3648 }
 3649 
 3650 static int
 3651 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 3652 {
 3653         struct timespec timeout;
 3654         int error;
 3655 
 3656         /* Allow a null timespec (wait forever). */
 3657         if (uap->uaddr2 == NULL) {
 3658                 error = do_rw_wrlock(td, uap->obj, 0);
 3659         } else {
 3660                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3661                 if (error != 0)
 3662                         return (error);
 3663 
 3664                 error = do_rw_wrlock2(td, uap->obj, &timeout);
 3665         }
 3666         return (error);
 3667 }
 3668 
 3669 static int
 3670 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
 3671 {
 3672         struct timespec *ts, timeout;
 3673         int error;
 3674 
 3675         if (uap->uaddr2 == NULL)
 3676                 ts = NULL;
 3677         else {
 3678                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3679                 if (error != 0)
 3680                         return (error);
 3681                 ts = &timeout;
 3682         }
 3683         return do_wait(td, uap->obj, uap->val, ts, 1, 1);
 3684 }
 3685 
 3686 static int
 3687 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3688 {
 3689         struct timespec *ts, timeout;
 3690         int error;
 3691 
 3692         /* Allow a null timespec (wait forever). */
 3693         if (uap->uaddr2 == NULL)
 3694                 ts = NULL;
 3695         else {
 3696                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3697                 if (error != 0)
 3698                         return (error);
 3699                 ts = &timeout;
 3700         }
 3701         return (do_sem_wait(td, uap->obj, ts));
 3702 }
 3703 
 3704 static int
 3705 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
 3706 {
 3707         int count = uap->val;
 3708         uint32_t uaddrs[BATCH_SIZE];
 3709         uint32_t **upp = (uint32_t **)uap->obj;
 3710         int tocopy;
 3711         int error = 0;
 3712         int i, pos = 0;
 3713 
 3714         while (count > 0) {
 3715                 tocopy = count;
 3716                 if (tocopy > BATCH_SIZE)
 3717                         tocopy = BATCH_SIZE;
 3718                 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
 3719                 if (error != 0)
 3720                         break;
 3721                 for (i = 0; i < tocopy; ++i)
 3722                         kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
 3723                                 INT_MAX, 1);
 3724                 count -= tocopy;
 3725                 pos += tocopy;
 3726         }
 3727         return (error);
 3728 }
 3729 
 3730 static _umtx_op_func op_table_compat32[] = {
 3731         __umtx_op_lock_umtx_compat32,   /* UMTX_OP_LOCK */
 3732         __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
 3733         __umtx_op_wait_compat32,        /* UMTX_OP_WAIT */
 3734         __umtx_op_wake,                 /* UMTX_OP_WAKE */
 3735         __umtx_op_trylock_umutex,       /* UMTX_OP_MUTEX_LOCK */
 3736         __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
 3737         __umtx_op_unlock_umutex,        /* UMTX_OP_MUTEX_UNLOCK */
 3738         __umtx_op_set_ceiling,          /* UMTX_OP_SET_CEILING */
 3739         __umtx_op_cv_wait_compat32,     /* UMTX_OP_CV_WAIT*/
 3740         __umtx_op_cv_signal,            /* UMTX_OP_CV_SIGNAL */
 3741         __umtx_op_cv_broadcast,         /* UMTX_OP_CV_BROADCAST */
 3742         __umtx_op_wait_compat32,        /* UMTX_OP_WAIT_UINT */
 3743         __umtx_op_rw_rdlock_compat32,   /* UMTX_OP_RW_RDLOCK */
 3744         __umtx_op_rw_wrlock_compat32,   /* UMTX_OP_RW_WRLOCK */
 3745         __umtx_op_rw_unlock,            /* UMTX_OP_RW_UNLOCK */
 3746         __umtx_op_wait_uint_private_compat32,   /* UMTX_OP_WAIT_UINT_PRIVATE */
 3747         __umtx_op_wake_private,         /* UMTX_OP_WAKE_PRIVATE */
 3748         __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
 3749         __umtx_op_wake_umutex,          /* UMTX_OP_UMUTEX_WAKE */
 3750         __umtx_op_sem_wait_compat32,    /* UMTX_OP_SEM_WAIT */
 3751         __umtx_op_sem_wake,             /* UMTX_OP_SEM_WAKE */
 3752         __umtx_op_nwake_private32,      /* UMTX_OP_NWAKE_PRIVATE */
 3753         __umtx_op_wake2_umutex          /* UMTX_OP_UMUTEX_WAKE2 */
 3754 };
 3755 
 3756 int
 3757 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
 3758 {
 3759         if ((unsigned)uap->op < UMTX_OP_MAX)
 3760                 return (*op_table_compat32[uap->op])(td,
 3761                         (struct _umtx_op_args *)uap);
 3762         return (EINVAL);
 3763 }
 3764 #endif
 3765 
 3766 void
 3767 umtx_thread_init(struct thread *td)
 3768 {
 3769         td->td_umtxq = umtxq_alloc();
 3770         td->td_umtxq->uq_thread = td;
 3771 }
 3772 
 3773 void
 3774 umtx_thread_fini(struct thread *td)
 3775 {
 3776         umtxq_free(td->td_umtxq);
 3777 }
 3778 
 3779 /*
 3780  * It will be called when new thread is created, e.g fork().
 3781  */
 3782 void
 3783 umtx_thread_alloc(struct thread *td)
 3784 {
 3785         struct umtx_q *uq;
 3786 
 3787         uq = td->td_umtxq;
 3788         uq->uq_inherited_pri = PRI_MAX;
 3789 
 3790         KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
 3791         KASSERT(uq->uq_thread == td, ("uq_thread != td"));
 3792         KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
 3793         KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
 3794 }
 3795 
 3796 /*
 3797  * exec() hook.
 3798  */
 3799 static void
 3800 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
 3801         struct image_params *imgp __unused)
 3802 {
 3803         umtx_thread_cleanup(curthread);
 3804 }
 3805 
 3806 /*
 3807  * thread_exit() hook.
 3808  */
 3809 void
 3810 umtx_thread_exit(struct thread *td)
 3811 {
 3812         umtx_thread_cleanup(td);
 3813 }
 3814 
 3815 /*
 3816  * clean up umtx data.
 3817  */
 3818 static void
 3819 umtx_thread_cleanup(struct thread *td)
 3820 {
 3821         struct umtx_q *uq;
 3822         struct umtx_pi *pi;
 3823 
 3824         if ((uq = td->td_umtxq) == NULL)
 3825                 return;
 3826 
 3827         mtx_lock_spin(&umtx_lock);
 3828         uq->uq_inherited_pri = PRI_MAX;
 3829         while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
 3830                 pi->pi_owner = NULL;
 3831                 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
 3832         }
 3833         mtx_unlock_spin(&umtx_lock);
 3834         thread_lock(td);
 3835         sched_lend_user_prio(td, PRI_MAX);
 3836         thread_unlock(td);
 3837 }

Cache object: 55ce3e09d0b36d8bd9572d3828b3a069


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.