The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
    3  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice unmodified, this list of conditions, and the following
   11  *    disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/8.3/sys/kern/kern_umtx.c 228439 2011-12-12 17:33:38Z pho $");
   30 
   31 #include "opt_compat.h"
   32 #include <sys/param.h>
   33 #include <sys/kernel.h>
   34 #include <sys/limits.h>
   35 #include <sys/lock.h>
   36 #include <sys/malloc.h>
   37 #include <sys/mutex.h>
   38 #include <sys/priv.h>
   39 #include <sys/proc.h>
   40 #include <sys/sched.h>
   41 #include <sys/smp.h>
   42 #include <sys/sysctl.h>
   43 #include <sys/sysent.h>
   44 #include <sys/systm.h>
   45 #include <sys/sysproto.h>
   46 #include <sys/eventhandler.h>
   47 #include <sys/umtx.h>
   48 
   49 #include <vm/vm.h>
   50 #include <vm/vm_param.h>
   51 #include <vm/pmap.h>
   52 #include <vm/vm_map.h>
   53 #include <vm/vm_object.h>
   54 
   55 #include <machine/cpu.h>
   56 
   57 #ifdef COMPAT_FREEBSD32
   58 #include <compat/freebsd32/freebsd32_proto.h>
   59 #endif
   60 
   61 #define _UMUTEX_TRY             1
   62 #define _UMUTEX_WAIT            2
   63 
   64 /* Priority inheritance mutex info. */
   65 struct umtx_pi {
   66         /* Owner thread */
   67         struct thread           *pi_owner;
   68 
   69         /* Reference count */
   70         int                     pi_refcount;
   71 
   72         /* List entry to link umtx holding by thread */
   73         TAILQ_ENTRY(umtx_pi)    pi_link;
   74 
   75         /* List entry in hash */
   76         TAILQ_ENTRY(umtx_pi)    pi_hashlink;
   77 
   78         /* List for waiters */
   79         TAILQ_HEAD(,umtx_q)     pi_blocked;
   80 
   81         /* Identify a userland lock object */
   82         struct umtx_key         pi_key;
   83 };
   84 
   85 /* A userland synchronous object user. */
   86 struct umtx_q {
   87         /* Linked list for the hash. */
   88         TAILQ_ENTRY(umtx_q)     uq_link;
   89 
   90         /* Umtx key. */
   91         struct umtx_key         uq_key;
   92 
   93         /* Umtx flags. */
   94         int                     uq_flags;
   95 #define UQF_UMTXQ       0x0001
   96 
   97         /* The thread waits on. */
   98         struct thread           *uq_thread;
   99 
  100         /*
  101          * Blocked on PI mutex. read can use chain lock
  102          * or umtx_lock, write must have both chain lock and
  103          * umtx_lock being hold.
  104          */
  105         struct umtx_pi          *uq_pi_blocked;
  106 
  107         /* On blocked list */
  108         TAILQ_ENTRY(umtx_q)     uq_lockq;
  109 
  110         /* Thread contending with us */
  111         TAILQ_HEAD(,umtx_pi)    uq_pi_contested;
  112 
  113         /* Inherited priority from PP mutex */
  114         u_char                  uq_inherited_pri;
  115 };
  116 
  117 TAILQ_HEAD(umtxq_head, umtx_q);
  118 
  119 /* Userland lock object's wait-queue chain */
  120 struct umtxq_chain {
  121         /* Lock for this chain. */
  122         struct mtx              uc_lock;
  123 
  124         /* List of sleep queues. */
  125         struct umtxq_head       uc_queue[2];
  126 #define UMTX_SHARED_QUEUE       0
  127 #define UMTX_EXCLUSIVE_QUEUE    1
  128 
  129         /* Busy flag */
  130         char                    uc_busy;
  131 
  132         /* Chain lock waiters */
  133         int                     uc_waiters;
  134 
  135         /* All PI in the list */
  136         TAILQ_HEAD(,umtx_pi)    uc_pi_list;
  137 };
  138 
  139 #define UMTXQ_LOCKED_ASSERT(uc)         mtx_assert(&(uc)->uc_lock, MA_OWNED)
  140 #define UMTXQ_BUSY_ASSERT(uc)   KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
  141 
  142 /*
  143  * Don't propagate time-sharing priority, there is a security reason,
  144  * a user can simply introduce PI-mutex, let thread A lock the mutex,
  145  * and let another thread B block on the mutex, because B is
  146  * sleeping, its priority will be boosted, this causes A's priority to
  147  * be boosted via priority propagating too and will never be lowered even
  148  * if it is using 100%CPU, this is unfair to other processes.
  149  */
  150 
  151 #define UPRI(td)        (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
  152                           (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
  153                          PRI_MAX_TIMESHARE : (td)->td_user_pri)
  154 
  155 #define GOLDEN_RATIO_PRIME      2654404609U
  156 #define UMTX_CHAINS             128
  157 #define UMTX_SHIFTS             (__WORD_BIT - 7)
  158 
  159 #define GET_SHARE(flags)        \
  160     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
  161 
  162 #define BUSY_SPINS              200
  163 
  164 static uma_zone_t               umtx_pi_zone;
  165 static struct umtxq_chain       umtxq_chains[2][UMTX_CHAINS];
  166 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
  167 static int                      umtx_pi_allocated;
  168 
  169 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
  170 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
  171     &umtx_pi_allocated, 0, "Allocated umtx_pi");
  172 
  173 static void umtxq_sysinit(void *);
  174 static void umtxq_hash(struct umtx_key *key);
  175 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
  176 static void umtxq_lock(struct umtx_key *key);
  177 static void umtxq_unlock(struct umtx_key *key);
  178 static void umtxq_busy(struct umtx_key *key);
  179 static void umtxq_unbusy(struct umtx_key *key);
  180 static void umtxq_insert_queue(struct umtx_q *uq, int q);
  181 static void umtxq_remove_queue(struct umtx_q *uq, int q);
  182 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
  183 static int umtxq_count(struct umtx_key *key);
  184 static struct umtx_pi *umtx_pi_alloc(int);
  185 static void umtx_pi_free(struct umtx_pi *pi);
  186 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
  187 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
  188 static void umtx_thread_cleanup(struct thread *td);
  189 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
  190         struct image_params *imgp __unused);
  191 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
  192 
  193 #define umtxq_signal(key, nwake)        umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
  194 #define umtxq_insert(uq)        umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
  195 #define umtxq_remove(uq)        umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
  196 
  197 static struct mtx umtx_lock;
  198 
  199 static void
  200 umtxq_sysinit(void *arg __unused)
  201 {
  202         int i, j;
  203 
  204         umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
  205                 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  206         for (i = 0; i < 2; ++i) {
  207                 for (j = 0; j < UMTX_CHAINS; ++j) {
  208                         mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
  209                                  MTX_DEF | MTX_DUPOK);
  210                         TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]);
  211                         TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]);
  212                         TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
  213                         umtxq_chains[i][j].uc_busy = 0;
  214                         umtxq_chains[i][j].uc_waiters = 0;
  215                 }
  216         }
  217         mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
  218         EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
  219             EVENTHANDLER_PRI_ANY);
  220 }
  221 
  222 struct umtx_q *
  223 umtxq_alloc(void)
  224 {
  225         struct umtx_q *uq;
  226 
  227         uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
  228         TAILQ_INIT(&uq->uq_pi_contested);
  229         uq->uq_inherited_pri = PRI_MAX;
  230         return (uq);
  231 }
  232 
  233 void
  234 umtxq_free(struct umtx_q *uq)
  235 {
  236         free(uq, M_UMTX);
  237 }
  238 
  239 static inline void
  240 umtxq_hash(struct umtx_key *key)
  241 {
  242         unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
  243         key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
  244 }
  245 
  246 static inline struct umtxq_chain *
  247 umtxq_getchain(struct umtx_key *key)
  248 {
  249         if (key->type <= TYPE_CV)
  250                 return (&umtxq_chains[1][key->hash]);
  251         return (&umtxq_chains[0][key->hash]);
  252 }
  253 
  254 /*
  255  * Lock a chain.
  256  */
  257 static inline void
  258 umtxq_lock(struct umtx_key *key)
  259 {
  260         struct umtxq_chain *uc;
  261 
  262         uc = umtxq_getchain(key);
  263         mtx_lock(&uc->uc_lock);
  264 }
  265 
  266 /*
  267  * Unlock a chain.
  268  */
  269 static inline void
  270 umtxq_unlock(struct umtx_key *key)
  271 {
  272         struct umtxq_chain *uc;
  273 
  274         uc = umtxq_getchain(key);
  275         mtx_unlock(&uc->uc_lock);
  276 }
  277 
  278 /*
  279  * Set chain to busy state when following operation
  280  * may be blocked (kernel mutex can not be used).
  281  */
  282 static inline void
  283 umtxq_busy(struct umtx_key *key)
  284 {
  285         struct umtxq_chain *uc;
  286 
  287         uc = umtxq_getchain(key);
  288         mtx_assert(&uc->uc_lock, MA_OWNED);
  289         if (uc->uc_busy) {
  290 #ifdef SMP
  291                 if (smp_cpus > 1) {
  292                         int count = BUSY_SPINS;
  293                         if (count > 0) {
  294                                 umtxq_unlock(key);
  295                                 while (uc->uc_busy && --count > 0)
  296                                         cpu_spinwait();
  297                                 umtxq_lock(key);
  298                         }
  299                 }
  300 #endif
  301                 while (uc->uc_busy) {
  302                         uc->uc_waiters++;
  303                         msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
  304                         uc->uc_waiters--;
  305                 }
  306         }
  307         uc->uc_busy = 1;
  308 }
  309 
  310 /*
  311  * Unbusy a chain.
  312  */
  313 static inline void
  314 umtxq_unbusy(struct umtx_key *key)
  315 {
  316         struct umtxq_chain *uc;
  317 
  318         uc = umtxq_getchain(key);
  319         mtx_assert(&uc->uc_lock, MA_OWNED);
  320         KASSERT(uc->uc_busy != 0, ("not busy"));
  321         uc->uc_busy = 0;
  322         if (uc->uc_waiters)
  323                 wakeup_one(uc);
  324 }
  325 
  326 static inline void
  327 umtxq_insert_queue(struct umtx_q *uq, int q)
  328 {
  329         struct umtxq_chain *uc;
  330 
  331         uc = umtxq_getchain(&uq->uq_key);
  332         UMTXQ_LOCKED_ASSERT(uc);
  333         TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link);
  334         uq->uq_flags |= UQF_UMTXQ;
  335 }
  336 
  337 static inline void
  338 umtxq_remove_queue(struct umtx_q *uq, int q)
  339 {
  340         struct umtxq_chain *uc;
  341 
  342         uc = umtxq_getchain(&uq->uq_key);
  343         UMTXQ_LOCKED_ASSERT(uc);
  344         if (uq->uq_flags & UQF_UMTXQ) {
  345                 TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link);
  346                 uq->uq_flags &= ~UQF_UMTXQ;
  347         }
  348 }
  349 
  350 /*
  351  * Check if there are multiple waiters
  352  */
  353 static int
  354 umtxq_count(struct umtx_key *key)
  355 {
  356         struct umtxq_chain *uc;
  357         struct umtx_q *uq;
  358         int count = 0;
  359 
  360         uc = umtxq_getchain(key);
  361         UMTXQ_LOCKED_ASSERT(uc);
  362         TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
  363                 if (umtx_key_match(&uq->uq_key, key)) {
  364                         if (++count > 1)
  365                                 break;
  366                 }
  367         }
  368         return (count);
  369 }
  370 
  371 /*
  372  * Check if there are multiple PI waiters and returns first
  373  * waiter.
  374  */
  375 static int
  376 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
  377 {
  378         struct umtxq_chain *uc;
  379         struct umtx_q *uq;
  380         int count = 0;
  381 
  382         *first = NULL;
  383         uc = umtxq_getchain(key);
  384         UMTXQ_LOCKED_ASSERT(uc);
  385         TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
  386                 if (umtx_key_match(&uq->uq_key, key)) {
  387                         if (++count > 1)
  388                                 break;
  389                         *first = uq;
  390                 }
  391         }
  392         return (count);
  393 }
  394 
  395 /*
  396  * Wake up threads waiting on an userland object.
  397  */
  398 
  399 static int
  400 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
  401 {
  402         struct umtxq_chain *uc;
  403         struct umtx_q *uq, *next;
  404         int ret;
  405 
  406         ret = 0;
  407         uc = umtxq_getchain(key);
  408         UMTXQ_LOCKED_ASSERT(uc);
  409         TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) {
  410                 if (umtx_key_match(&uq->uq_key, key)) {
  411                         umtxq_remove_queue(uq, q);
  412                         wakeup(uq);
  413                         if (++ret >= n_wake)
  414                                 break;
  415                 }
  416         }
  417         return (ret);
  418 }
  419 
  420 
  421 /*
  422  * Wake up specified thread.
  423  */
  424 static inline void
  425 umtxq_signal_thread(struct umtx_q *uq)
  426 {
  427         struct umtxq_chain *uc;
  428 
  429         uc = umtxq_getchain(&uq->uq_key);
  430         UMTXQ_LOCKED_ASSERT(uc);
  431         umtxq_remove(uq);
  432         wakeup(uq);
  433 }
  434 
  435 /*
  436  * Put thread into sleep state, before sleeping, check if
  437  * thread was removed from umtx queue.
  438  */
  439 static inline int
  440 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
  441 {
  442         struct umtxq_chain *uc;
  443         int error;
  444 
  445         uc = umtxq_getchain(&uq->uq_key);
  446         UMTXQ_LOCKED_ASSERT(uc);
  447         if (!(uq->uq_flags & UQF_UMTXQ))
  448                 return (0);
  449         error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
  450         if (error == EWOULDBLOCK)
  451                 error = ETIMEDOUT;
  452         return (error);
  453 }
  454 
  455 /*
  456  * Convert userspace address into unique logical address.
  457  */
  458 int
  459 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
  460 {
  461         struct thread *td = curthread;
  462         vm_map_t map;
  463         vm_map_entry_t entry;
  464         vm_pindex_t pindex;
  465         vm_prot_t prot;
  466         boolean_t wired;
  467 
  468         key->type = type;
  469         if (share == THREAD_SHARE) {
  470                 key->shared = 0;
  471                 key->info.private.vs = td->td_proc->p_vmspace;
  472                 key->info.private.addr = (uintptr_t)addr;
  473         } else {
  474                 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
  475                 map = &td->td_proc->p_vmspace->vm_map;
  476                 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
  477                     &entry, &key->info.shared.object, &pindex, &prot,
  478                     &wired) != KERN_SUCCESS) {
  479                         return EFAULT;
  480                 }
  481 
  482                 if ((share == PROCESS_SHARE) ||
  483                     (share == AUTO_SHARE &&
  484                      VM_INHERIT_SHARE == entry->inheritance)) {
  485                         key->shared = 1;
  486                         key->info.shared.offset = entry->offset + entry->start -
  487                                 (vm_offset_t)addr;
  488                         vm_object_reference(key->info.shared.object);
  489                 } else {
  490                         key->shared = 0;
  491                         key->info.private.vs = td->td_proc->p_vmspace;
  492                         key->info.private.addr = (uintptr_t)addr;
  493                 }
  494                 vm_map_lookup_done(map, entry);
  495         }
  496 
  497         umtxq_hash(key);
  498         return (0);
  499 }
  500 
  501 /*
  502  * Release key.
  503  */
  504 void
  505 umtx_key_release(struct umtx_key *key)
  506 {
  507         if (key->shared)
  508                 vm_object_deallocate(key->info.shared.object);
  509 }
  510 
  511 /*
  512  * Lock a umtx object.
  513  */
  514 static int
  515 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
  516 {
  517         struct umtx_q *uq;
  518         u_long owner;
  519         u_long old;
  520         int error = 0;
  521 
  522         uq = td->td_umtxq;
  523 
  524         /*
  525          * Care must be exercised when dealing with umtx structure. It
  526          * can fault on any access.
  527          */
  528         for (;;) {
  529                 /*
  530                  * Try the uncontested case.  This should be done in userland.
  531                  */
  532                 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
  533 
  534                 /* The acquire succeeded. */
  535                 if (owner == UMTX_UNOWNED)
  536                         return (0);
  537 
  538                 /* The address was invalid. */
  539                 if (owner == -1)
  540                         return (EFAULT);
  541 
  542                 /* If no one owns it but it is contested try to acquire it. */
  543                 if (owner == UMTX_CONTESTED) {
  544                         owner = casuword(&umtx->u_owner,
  545                             UMTX_CONTESTED, id | UMTX_CONTESTED);
  546 
  547                         if (owner == UMTX_CONTESTED)
  548                                 return (0);
  549 
  550                         /* The address was invalid. */
  551                         if (owner == -1)
  552                                 return (EFAULT);
  553 
  554                         /* If this failed the lock has changed, restart. */
  555                         continue;
  556                 }
  557 
  558                 /*
  559                  * If we caught a signal, we have retried and now
  560                  * exit immediately.
  561                  */
  562                 if (error != 0)
  563                         return (error);
  564 
  565                 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
  566                         AUTO_SHARE, &uq->uq_key)) != 0)
  567                         return (error);
  568 
  569                 umtxq_lock(&uq->uq_key);
  570                 umtxq_busy(&uq->uq_key);
  571                 umtxq_insert(uq);
  572                 umtxq_unbusy(&uq->uq_key);
  573                 umtxq_unlock(&uq->uq_key);
  574 
  575                 /*
  576                  * Set the contested bit so that a release in user space
  577                  * knows to use the system call for unlock.  If this fails
  578                  * either some one else has acquired the lock or it has been
  579                  * released.
  580                  */
  581                 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
  582 
  583                 /* The address was invalid. */
  584                 if (old == -1) {
  585                         umtxq_lock(&uq->uq_key);
  586                         umtxq_remove(uq);
  587                         umtxq_unlock(&uq->uq_key);
  588                         umtx_key_release(&uq->uq_key);
  589                         return (EFAULT);
  590                 }
  591 
  592                 /*
  593                  * We set the contested bit, sleep. Otherwise the lock changed
  594                  * and we need to retry or we lost a race to the thread
  595                  * unlocking the umtx.
  596                  */
  597                 umtxq_lock(&uq->uq_key);
  598                 if (old == owner)
  599                         error = umtxq_sleep(uq, "umtx", timo);
  600                 umtxq_remove(uq);
  601                 umtxq_unlock(&uq->uq_key);
  602                 umtx_key_release(&uq->uq_key);
  603         }
  604 
  605         return (0);
  606 }
  607 
  608 /*
  609  * Lock a umtx object.
  610  */
  611 static int
  612 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
  613         struct timespec *timeout)
  614 {
  615         struct timespec ts, ts2, ts3;
  616         struct timeval tv;
  617         int error;
  618 
  619         if (timeout == NULL) {
  620                 error = _do_lock_umtx(td, umtx, id, 0);
  621                 /* Mutex locking is restarted if it is interrupted. */
  622                 if (error == EINTR)
  623                         error = ERESTART;
  624         } else {
  625                 getnanouptime(&ts);
  626                 timespecadd(&ts, timeout);
  627                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
  628                 for (;;) {
  629                         error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
  630                         if (error != ETIMEDOUT)
  631                                 break;
  632                         getnanouptime(&ts2);
  633                         if (timespeccmp(&ts2, &ts, >=)) {
  634                                 error = ETIMEDOUT;
  635                                 break;
  636                         }
  637                         ts3 = ts;
  638                         timespecsub(&ts3, &ts2);
  639                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
  640                 }
  641                 /* Timed-locking is not restarted. */
  642                 if (error == ERESTART)
  643                         error = EINTR;
  644         }
  645         return (error);
  646 }
  647 
  648 /*
  649  * Unlock a umtx object.
  650  */
  651 static int
  652 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
  653 {
  654         struct umtx_key key;
  655         u_long owner;
  656         u_long old;
  657         int error;
  658         int count;
  659 
  660         /*
  661          * Make sure we own this mtx.
  662          */
  663         owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
  664         if (owner == -1)
  665                 return (EFAULT);
  666 
  667         if ((owner & ~UMTX_CONTESTED) != id)
  668                 return (EPERM);
  669 
  670         /* This should be done in userland */
  671         if ((owner & UMTX_CONTESTED) == 0) {
  672                 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
  673                 if (old == -1)
  674                         return (EFAULT);
  675                 if (old == owner)
  676                         return (0);
  677                 owner = old;
  678         }
  679 
  680         /* We should only ever be in here for contested locks */
  681         if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
  682                 &key)) != 0)
  683                 return (error);
  684 
  685         umtxq_lock(&key);
  686         umtxq_busy(&key);
  687         count = umtxq_count(&key);
  688         umtxq_unlock(&key);
  689 
  690         /*
  691          * When unlocking the umtx, it must be marked as unowned if
  692          * there is zero or one thread only waiting for it.
  693          * Otherwise, it must be marked as contested.
  694          */
  695         old = casuword(&umtx->u_owner, owner,
  696                 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
  697         umtxq_lock(&key);
  698         umtxq_signal(&key,1);
  699         umtxq_unbusy(&key);
  700         umtxq_unlock(&key);
  701         umtx_key_release(&key);
  702         if (old == -1)
  703                 return (EFAULT);
  704         if (old != owner)
  705                 return (EINVAL);
  706         return (0);
  707 }
  708 
  709 #ifdef COMPAT_FREEBSD32
  710 
  711 /*
  712  * Lock a umtx object.
  713  */
  714 static int
  715 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
  716 {
  717         struct umtx_q *uq;
  718         uint32_t owner;
  719         uint32_t old;
  720         int error = 0;
  721 
  722         uq = td->td_umtxq;
  723 
  724         /*
  725          * Care must be exercised when dealing with umtx structure. It
  726          * can fault on any access.
  727          */
  728         for (;;) {
  729                 /*
  730                  * Try the uncontested case.  This should be done in userland.
  731                  */
  732                 owner = casuword32(m, UMUTEX_UNOWNED, id);
  733 
  734                 /* The acquire succeeded. */
  735                 if (owner == UMUTEX_UNOWNED)
  736                         return (0);
  737 
  738                 /* The address was invalid. */
  739                 if (owner == -1)
  740                         return (EFAULT);
  741 
  742                 /* If no one owns it but it is contested try to acquire it. */
  743                 if (owner == UMUTEX_CONTESTED) {
  744                         owner = casuword32(m,
  745                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
  746                         if (owner == UMUTEX_CONTESTED)
  747                                 return (0);
  748 
  749                         /* The address was invalid. */
  750                         if (owner == -1)
  751                                 return (EFAULT);
  752 
  753                         /* If this failed the lock has changed, restart. */
  754                         continue;
  755                 }
  756 
  757                 /*
  758                  * If we caught a signal, we have retried and now
  759                  * exit immediately.
  760                  */
  761                 if (error != 0)
  762                         return (error);
  763 
  764                 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
  765                         AUTO_SHARE, &uq->uq_key)) != 0)
  766                         return (error);
  767 
  768                 umtxq_lock(&uq->uq_key);
  769                 umtxq_busy(&uq->uq_key);
  770                 umtxq_insert(uq);
  771                 umtxq_unbusy(&uq->uq_key);
  772                 umtxq_unlock(&uq->uq_key);
  773 
  774                 /*
  775                  * Set the contested bit so that a release in user space
  776                  * knows to use the system call for unlock.  If this fails
  777                  * either some one else has acquired the lock or it has been
  778                  * released.
  779                  */
  780                 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
  781 
  782                 /* The address was invalid. */
  783                 if (old == -1) {
  784                         umtxq_lock(&uq->uq_key);
  785                         umtxq_remove(uq);
  786                         umtxq_unlock(&uq->uq_key);
  787                         umtx_key_release(&uq->uq_key);
  788                         return (EFAULT);
  789                 }
  790 
  791                 /*
  792                  * We set the contested bit, sleep. Otherwise the lock changed
  793                  * and we need to retry or we lost a race to the thread
  794                  * unlocking the umtx.
  795                  */
  796                 umtxq_lock(&uq->uq_key);
  797                 if (old == owner)
  798                         error = umtxq_sleep(uq, "umtx", timo);
  799                 umtxq_remove(uq);
  800                 umtxq_unlock(&uq->uq_key);
  801                 umtx_key_release(&uq->uq_key);
  802         }
  803 
  804         return (0);
  805 }
  806 
  807 /*
  808  * Lock a umtx object.
  809  */
  810 static int
  811 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
  812         struct timespec *timeout)
  813 {
  814         struct timespec ts, ts2, ts3;
  815         struct timeval tv;
  816         int error;
  817 
  818         if (timeout == NULL) {
  819                 error = _do_lock_umtx32(td, m, id, 0);
  820                 /* Mutex locking is restarted if it is interrupted. */
  821                 if (error == EINTR)
  822                         error = ERESTART;
  823         } else {
  824                 getnanouptime(&ts);
  825                 timespecadd(&ts, timeout);
  826                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
  827                 for (;;) {
  828                         error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
  829                         if (error != ETIMEDOUT)
  830                                 break;
  831                         getnanouptime(&ts2);
  832                         if (timespeccmp(&ts2, &ts, >=)) {
  833                                 error = ETIMEDOUT;
  834                                 break;
  835                         }
  836                         ts3 = ts;
  837                         timespecsub(&ts3, &ts2);
  838                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
  839                 }
  840                 /* Timed-locking is not restarted. */
  841                 if (error == ERESTART)
  842                         error = EINTR;
  843         }
  844         return (error);
  845 }
  846 
  847 /*
  848  * Unlock a umtx object.
  849  */
  850 static int
  851 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
  852 {
  853         struct umtx_key key;
  854         uint32_t owner;
  855         uint32_t old;
  856         int error;
  857         int count;
  858 
  859         /*
  860          * Make sure we own this mtx.
  861          */
  862         owner = fuword32(m);
  863         if (owner == -1)
  864                 return (EFAULT);
  865 
  866         if ((owner & ~UMUTEX_CONTESTED) != id)
  867                 return (EPERM);
  868 
  869         /* This should be done in userland */
  870         if ((owner & UMUTEX_CONTESTED) == 0) {
  871                 old = casuword32(m, owner, UMUTEX_UNOWNED);
  872                 if (old == -1)
  873                         return (EFAULT);
  874                 if (old == owner)
  875                         return (0);
  876                 owner = old;
  877         }
  878 
  879         /* We should only ever be in here for contested locks */
  880         if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
  881                 &key)) != 0)
  882                 return (error);
  883 
  884         umtxq_lock(&key);
  885         umtxq_busy(&key);
  886         count = umtxq_count(&key);
  887         umtxq_unlock(&key);
  888 
  889         /*
  890          * When unlocking the umtx, it must be marked as unowned if
  891          * there is zero or one thread only waiting for it.
  892          * Otherwise, it must be marked as contested.
  893          */
  894         old = casuword32(m, owner,
  895                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
  896         umtxq_lock(&key);
  897         umtxq_signal(&key,1);
  898         umtxq_unbusy(&key);
  899         umtxq_unlock(&key);
  900         umtx_key_release(&key);
  901         if (old == -1)
  902                 return (EFAULT);
  903         if (old != owner)
  904                 return (EINVAL);
  905         return (0);
  906 }
  907 #endif
  908 
  909 /*
  910  * Fetch and compare value, sleep on the address if value is not changed.
  911  */
  912 static int
  913 do_wait(struct thread *td, void *addr, u_long id,
  914         struct timespec *timeout, int compat32, int is_private)
  915 {
  916         struct umtx_q *uq;
  917         struct timespec ts, ts2, ts3;
  918         struct timeval tv;
  919         u_long tmp;
  920         int error = 0;
  921 
  922         uq = td->td_umtxq;
  923         if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
  924                 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
  925                 return (error);
  926 
  927         umtxq_lock(&uq->uq_key);
  928         umtxq_insert(uq);
  929         umtxq_unlock(&uq->uq_key);
  930         if (compat32 == 0)
  931                 tmp = fuword(addr);
  932         else
  933                 tmp = (unsigned int)fuword32(addr);
  934         if (tmp != id) {
  935                 umtxq_lock(&uq->uq_key);
  936                 umtxq_remove(uq);
  937                 umtxq_unlock(&uq->uq_key);
  938         } else if (timeout == NULL) {
  939                 umtxq_lock(&uq->uq_key);
  940                 error = umtxq_sleep(uq, "uwait", 0);
  941                 umtxq_remove(uq);
  942                 umtxq_unlock(&uq->uq_key);
  943         } else {
  944                 getnanouptime(&ts);
  945                 timespecadd(&ts, timeout);
  946                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
  947                 umtxq_lock(&uq->uq_key);
  948                 for (;;) {
  949                         error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
  950                         if (!(uq->uq_flags & UQF_UMTXQ))
  951                                 break;
  952                         if (error != ETIMEDOUT)
  953                                 break;
  954                         umtxq_unlock(&uq->uq_key);
  955                         getnanouptime(&ts2);
  956                         if (timespeccmp(&ts2, &ts, >=)) {
  957                                 error = ETIMEDOUT;
  958                                 umtxq_lock(&uq->uq_key);
  959                                 break;
  960                         }
  961                         ts3 = ts;
  962                         timespecsub(&ts3, &ts2);
  963                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
  964                         umtxq_lock(&uq->uq_key);
  965                 }
  966                 umtxq_remove(uq);
  967                 umtxq_unlock(&uq->uq_key);
  968         }
  969         umtx_key_release(&uq->uq_key);
  970         if (error == ERESTART)
  971                 error = EINTR;
  972         return (error);
  973 }
  974 
  975 /*
  976  * Wake up threads sleeping on the specified address.
  977  */
  978 int
  979 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
  980 {
  981         struct umtx_key key;
  982         int ret;
  983         
  984         if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
  985                 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
  986                 return (ret);
  987         umtxq_lock(&key);
  988         ret = umtxq_signal(&key, n_wake);
  989         umtxq_unlock(&key);
  990         umtx_key_release(&key);
  991         return (0);
  992 }
  993 
  994 /*
  995  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
  996  */
  997 static int
  998 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
  999         int mode)
 1000 {
 1001         struct umtx_q *uq;
 1002         uint32_t owner, old, id;
 1003         int error = 0;
 1004 
 1005         id = td->td_tid;
 1006         uq = td->td_umtxq;
 1007 
 1008         /*
 1009          * Care must be exercised when dealing with umtx structure. It
 1010          * can fault on any access.
 1011          */
 1012         for (;;) {
 1013                 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
 1014                 if (mode == _UMUTEX_WAIT) {
 1015                         if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
 1016                                 return (0);
 1017                 } else {
 1018                         /*
 1019                          * Try the uncontested case.  This should be done in userland.
 1020                          */
 1021                         owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 1022 
 1023                         /* The acquire succeeded. */
 1024                         if (owner == UMUTEX_UNOWNED)
 1025                                 return (0);
 1026 
 1027                         /* The address was invalid. */
 1028                         if (owner == -1)
 1029                                 return (EFAULT);
 1030 
 1031                         /* If no one owns it but it is contested try to acquire it. */
 1032                         if (owner == UMUTEX_CONTESTED) {
 1033                                 owner = casuword32(&m->m_owner,
 1034                                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1035 
 1036                                 if (owner == UMUTEX_CONTESTED)
 1037                                         return (0);
 1038 
 1039                                 /* The address was invalid. */
 1040                                 if (owner == -1)
 1041                                         return (EFAULT);
 1042 
 1043                                 /* If this failed the lock has changed, restart. */
 1044                                 continue;
 1045                         }
 1046                 }
 1047 
 1048                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 1049                     (owner & ~UMUTEX_CONTESTED) == id)
 1050                         return (EDEADLK);
 1051 
 1052                 if (mode == _UMUTEX_TRY)
 1053                         return (EBUSY);
 1054 
 1055                 /*
 1056                  * If we caught a signal, we have retried and now
 1057                  * exit immediately.
 1058                  */
 1059                 if (error != 0)
 1060                         return (error);
 1061 
 1062                 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
 1063                     GET_SHARE(flags), &uq->uq_key)) != 0)
 1064                         return (error);
 1065 
 1066                 umtxq_lock(&uq->uq_key);
 1067                 umtxq_busy(&uq->uq_key);
 1068                 umtxq_insert(uq);
 1069                 umtxq_unlock(&uq->uq_key);
 1070 
 1071                 /*
 1072                  * Set the contested bit so that a release in user space
 1073                  * knows to use the system call for unlock.  If this fails
 1074                  * either some one else has acquired the lock or it has been
 1075                  * released.
 1076                  */
 1077                 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 1078 
 1079                 /* The address was invalid. */
 1080                 if (old == -1) {
 1081                         umtxq_lock(&uq->uq_key);
 1082                         umtxq_remove(uq);
 1083                         umtxq_unbusy(&uq->uq_key);
 1084                         umtxq_unlock(&uq->uq_key);
 1085                         umtx_key_release(&uq->uq_key);
 1086                         return (EFAULT);
 1087                 }
 1088 
 1089                 /*
 1090                  * We set the contested bit, sleep. Otherwise the lock changed
 1091                  * and we need to retry or we lost a race to the thread
 1092                  * unlocking the umtx.
 1093                  */
 1094                 umtxq_lock(&uq->uq_key);
 1095                 umtxq_unbusy(&uq->uq_key);
 1096                 if (old == owner)
 1097                         error = umtxq_sleep(uq, "umtxn", timo);
 1098                 umtxq_remove(uq);
 1099                 umtxq_unlock(&uq->uq_key);
 1100                 umtx_key_release(&uq->uq_key);
 1101         }
 1102 
 1103         return (0);
 1104 }
 1105 
 1106 /*
 1107  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1108  */
 1109 /*
 1110  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1111  */
 1112 static int
 1113 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
 1114 {
 1115         struct umtx_key key;
 1116         uint32_t owner, old, id;
 1117         int error;
 1118         int count;
 1119 
 1120         id = td->td_tid;
 1121         /*
 1122          * Make sure we own this mtx.
 1123          */
 1124         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1125         if (owner == -1)
 1126                 return (EFAULT);
 1127 
 1128         if ((owner & ~UMUTEX_CONTESTED) != id)
 1129                 return (EPERM);
 1130 
 1131         if ((owner & UMUTEX_CONTESTED) == 0) {
 1132                 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 1133                 if (old == -1)
 1134                         return (EFAULT);
 1135                 if (old == owner)
 1136                         return (0);
 1137                 owner = old;
 1138         }
 1139 
 1140         /* We should only ever be in here for contested locks */
 1141         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1142             &key)) != 0)
 1143                 return (error);
 1144 
 1145         umtxq_lock(&key);
 1146         umtxq_busy(&key);
 1147         count = umtxq_count(&key);
 1148         umtxq_unlock(&key);
 1149 
 1150         /*
 1151          * When unlocking the umtx, it must be marked as unowned if
 1152          * there is zero or one thread only waiting for it.
 1153          * Otherwise, it must be marked as contested.
 1154          */
 1155         old = casuword32(&m->m_owner, owner,
 1156                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1157         umtxq_lock(&key);
 1158         umtxq_signal(&key,1);
 1159         umtxq_unbusy(&key);
 1160         umtxq_unlock(&key);
 1161         umtx_key_release(&key);
 1162         if (old == -1)
 1163                 return (EFAULT);
 1164         if (old != owner)
 1165                 return (EINVAL);
 1166         return (0);
 1167 }
 1168 
 1169 /*
 1170  * Check if the mutex is available and wake up a waiter,
 1171  * only for simple mutex.
 1172  */
 1173 static int
 1174 do_wake_umutex(struct thread *td, struct umutex *m)
 1175 {
 1176         struct umtx_key key;
 1177         uint32_t owner;
 1178         uint32_t flags;
 1179         int error;
 1180         int count;
 1181 
 1182         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1183         if (owner == -1)
 1184                 return (EFAULT);
 1185 
 1186         if ((owner & ~UMUTEX_CONTESTED) != 0)
 1187                 return (0);
 1188 
 1189         flags = fuword32(&m->m_flags);
 1190 
 1191         /* We should only ever be in here for contested locks */
 1192         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1193             &key)) != 0)
 1194                 return (error);
 1195 
 1196         umtxq_lock(&key);
 1197         umtxq_busy(&key);
 1198         count = umtxq_count(&key);
 1199         umtxq_unlock(&key);
 1200 
 1201         if (count <= 1)
 1202                 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
 1203 
 1204         umtxq_lock(&key);
 1205         if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
 1206                 umtxq_signal(&key, 1);
 1207         umtxq_unbusy(&key);
 1208         umtxq_unlock(&key);
 1209         umtx_key_release(&key);
 1210         return (0);
 1211 }
 1212 
 1213 static inline struct umtx_pi *
 1214 umtx_pi_alloc(int flags)
 1215 {
 1216         struct umtx_pi *pi;
 1217 
 1218         pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
 1219         TAILQ_INIT(&pi->pi_blocked);
 1220         atomic_add_int(&umtx_pi_allocated, 1);
 1221         return (pi);
 1222 }
 1223 
 1224 static inline void
 1225 umtx_pi_free(struct umtx_pi *pi)
 1226 {
 1227         uma_zfree(umtx_pi_zone, pi);
 1228         atomic_add_int(&umtx_pi_allocated, -1);
 1229 }
 1230 
 1231 /*
 1232  * Adjust the thread's position on a pi_state after its priority has been
 1233  * changed.
 1234  */
 1235 static int
 1236 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
 1237 {
 1238         struct umtx_q *uq, *uq1, *uq2;
 1239         struct thread *td1;
 1240 
 1241         mtx_assert(&umtx_lock, MA_OWNED);
 1242         if (pi == NULL)
 1243                 return (0);
 1244 
 1245         uq = td->td_umtxq;
 1246 
 1247         /*
 1248          * Check if the thread needs to be moved on the blocked chain.
 1249          * It needs to be moved if either its priority is lower than
 1250          * the previous thread or higher than the next thread.
 1251          */
 1252         uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
 1253         uq2 = TAILQ_NEXT(uq, uq_lockq);
 1254         if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
 1255             (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
 1256                 /*
 1257                  * Remove thread from blocked chain and determine where
 1258                  * it should be moved to.
 1259                  */
 1260                 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1261                 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1262                         td1 = uq1->uq_thread;
 1263                         MPASS(td1->td_proc->p_magic == P_MAGIC);
 1264                         if (UPRI(td1) > UPRI(td))
 1265                                 break;
 1266                 }
 1267 
 1268                 if (uq1 == NULL)
 1269                         TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1270                 else
 1271                         TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1272         }
 1273         return (1);
 1274 }
 1275 
 1276 /*
 1277  * Propagate priority when a thread is blocked on POSIX
 1278  * PI mutex.
 1279  */ 
 1280 static void
 1281 umtx_propagate_priority(struct thread *td)
 1282 {
 1283         struct umtx_q *uq;
 1284         struct umtx_pi *pi;
 1285         int pri;
 1286 
 1287         mtx_assert(&umtx_lock, MA_OWNED);
 1288         pri = UPRI(td);
 1289         uq = td->td_umtxq;
 1290         pi = uq->uq_pi_blocked;
 1291         if (pi == NULL)
 1292                 return;
 1293 
 1294         for (;;) {
 1295                 td = pi->pi_owner;
 1296                 if (td == NULL)
 1297                         return;
 1298 
 1299                 MPASS(td->td_proc != NULL);
 1300                 MPASS(td->td_proc->p_magic == P_MAGIC);
 1301 
 1302                 if (UPRI(td) <= pri)
 1303                         return;
 1304 
 1305                 thread_lock(td);
 1306                 sched_lend_user_prio(td, pri);
 1307                 thread_unlock(td);
 1308 
 1309                 /*
 1310                  * Pick up the lock that td is blocked on.
 1311                  */
 1312                 uq = td->td_umtxq;
 1313                 pi = uq->uq_pi_blocked;
 1314                 /* Resort td on the list if needed. */
 1315                 if (!umtx_pi_adjust_thread(pi, td))
 1316                         break;
 1317         }
 1318 }
 1319 
 1320 /*
 1321  * Unpropagate priority for a PI mutex when a thread blocked on
 1322  * it is interrupted by signal or resumed by others.
 1323  */
 1324 static void
 1325 umtx_unpropagate_priority(struct umtx_pi *pi)
 1326 {
 1327         struct umtx_q *uq, *uq_owner;
 1328         struct umtx_pi *pi2;
 1329         int pri, oldpri;
 1330 
 1331         mtx_assert(&umtx_lock, MA_OWNED);
 1332 
 1333         while (pi != NULL && pi->pi_owner != NULL) {
 1334                 pri = PRI_MAX;
 1335                 uq_owner = pi->pi_owner->td_umtxq;
 1336 
 1337                 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
 1338                         uq = TAILQ_FIRST(&pi2->pi_blocked);
 1339                         if (uq != NULL) {
 1340                                 if (pri > UPRI(uq->uq_thread))
 1341                                         pri = UPRI(uq->uq_thread);
 1342                         }
 1343                 }
 1344 
 1345                 if (pri > uq_owner->uq_inherited_pri)
 1346                         pri = uq_owner->uq_inherited_pri;
 1347                 thread_lock(pi->pi_owner);
 1348                 oldpri = pi->pi_owner->td_user_pri;
 1349                 sched_unlend_user_prio(pi->pi_owner, pri);
 1350                 thread_unlock(pi->pi_owner);
 1351                 if (uq_owner->uq_pi_blocked != NULL)
 1352                         umtx_pi_adjust_locked(pi->pi_owner, oldpri);
 1353                 pi = uq_owner->uq_pi_blocked;
 1354         }
 1355 }
 1356 
 1357 /*
 1358  * Insert a PI mutex into owned list.
 1359  */
 1360 static void
 1361 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
 1362 {
 1363         struct umtx_q *uq_owner;
 1364 
 1365         uq_owner = owner->td_umtxq;
 1366         mtx_assert(&umtx_lock, MA_OWNED);
 1367         if (pi->pi_owner != NULL)
 1368                 panic("pi_ower != NULL");
 1369         pi->pi_owner = owner;
 1370         TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
 1371 }
 1372 
 1373 /*
 1374  * Claim ownership of a PI mutex.
 1375  */
 1376 static int
 1377 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
 1378 {
 1379         struct umtx_q *uq, *uq_owner;
 1380 
 1381         uq_owner = owner->td_umtxq;
 1382         mtx_lock_spin(&umtx_lock);
 1383         if (pi->pi_owner == owner) {
 1384                 mtx_unlock_spin(&umtx_lock);
 1385                 return (0);
 1386         }
 1387 
 1388         if (pi->pi_owner != NULL) {
 1389                 /*
 1390                  * userland may have already messed the mutex, sigh.
 1391                  */
 1392                 mtx_unlock_spin(&umtx_lock);
 1393                 return (EPERM);
 1394         }
 1395         umtx_pi_setowner(pi, owner);
 1396         uq = TAILQ_FIRST(&pi->pi_blocked);
 1397         if (uq != NULL) {
 1398                 int pri;
 1399 
 1400                 pri = UPRI(uq->uq_thread);
 1401                 thread_lock(owner);
 1402                 if (pri < UPRI(owner))
 1403                         sched_lend_user_prio(owner, pri);
 1404                 thread_unlock(owner);
 1405         }
 1406         mtx_unlock_spin(&umtx_lock);
 1407         return (0);
 1408 }
 1409 
 1410 static void
 1411 umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
 1412 {
 1413         struct umtx_q *uq;
 1414         struct umtx_pi *pi;
 1415 
 1416         uq = td->td_umtxq;
 1417         /*
 1418          * Pick up the lock that td is blocked on.
 1419          */
 1420         pi = uq->uq_pi_blocked;
 1421         MPASS(pi != NULL);
 1422 
 1423         /* Resort the turnstile on the list. */
 1424         if (!umtx_pi_adjust_thread(pi, td))
 1425                 return;
 1426 
 1427         /*
 1428          * If our priority was lowered and we are at the head of the
 1429          * turnstile, then propagate our new priority up the chain.
 1430          */
 1431         if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
 1432                 umtx_propagate_priority(td);
 1433 }
 1434 
 1435 /*
 1436  * Adjust a thread's order position in its blocked PI mutex,
 1437  * this may result new priority propagating process.
 1438  */
 1439 void
 1440 umtx_pi_adjust(struct thread *td, u_char oldpri)
 1441 {
 1442         struct umtx_q *uq;
 1443         struct umtx_pi *pi;
 1444 
 1445         uq = td->td_umtxq;
 1446         mtx_lock_spin(&umtx_lock);
 1447         /*
 1448          * Pick up the lock that td is blocked on.
 1449          */
 1450         pi = uq->uq_pi_blocked;
 1451         if (pi != NULL)
 1452                 umtx_pi_adjust_locked(td, oldpri);
 1453         mtx_unlock_spin(&umtx_lock);
 1454 }
 1455 
 1456 /*
 1457  * Sleep on a PI mutex.
 1458  */
 1459 static int
 1460 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
 1461         uint32_t owner, const char *wmesg, int timo)
 1462 {
 1463         struct umtxq_chain *uc;
 1464         struct thread *td, *td1;
 1465         struct umtx_q *uq1;
 1466         int pri;
 1467         int error = 0;
 1468 
 1469         td = uq->uq_thread;
 1470         KASSERT(td == curthread, ("inconsistent uq_thread"));
 1471         uc = umtxq_getchain(&uq->uq_key);
 1472         UMTXQ_LOCKED_ASSERT(uc);
 1473         UMTXQ_BUSY_ASSERT(uc);
 1474         umtxq_insert(uq);
 1475         mtx_lock_spin(&umtx_lock);
 1476         if (pi->pi_owner == NULL) {
 1477                 /* XXX
 1478                  * Current, We only support process private PI-mutex,
 1479                  * non-contended PI-mutexes are locked in userland.
 1480                  * Process shared PI-mutex should always be initialized
 1481                  * by kernel and be registered in kernel, locking should
 1482                  * always be done by kernel to avoid security problems.
 1483                  * For process private PI-mutex, we can find owner
 1484                  * thread and boost its priority safely.
 1485                  */
 1486                 mtx_unlock_spin(&umtx_lock);
 1487                 PROC_LOCK(curproc);
 1488                 td1 = thread_find(curproc, owner);
 1489                 mtx_lock_spin(&umtx_lock);
 1490                 if (td1 != NULL && pi->pi_owner == NULL) {
 1491                         uq1 = td1->td_umtxq;
 1492                         umtx_pi_setowner(pi, td1);
 1493                 }
 1494                 PROC_UNLOCK(curproc);
 1495         }
 1496 
 1497         TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1498                 pri = UPRI(uq1->uq_thread);
 1499                 if (pri > UPRI(td))
 1500                         break;
 1501         }
 1502 
 1503         if (uq1 != NULL)
 1504                 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1505         else
 1506                 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1507 
 1508         uq->uq_pi_blocked = pi;
 1509         thread_lock(td);
 1510         td->td_flags |= TDF_UPIBLOCKED;
 1511         thread_unlock(td);
 1512         umtx_propagate_priority(td);
 1513         mtx_unlock_spin(&umtx_lock);
 1514         umtxq_unbusy(&uq->uq_key);
 1515 
 1516         if (uq->uq_flags & UQF_UMTXQ) {
 1517                 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
 1518                 if (error == EWOULDBLOCK)
 1519                         error = ETIMEDOUT;
 1520                 if (uq->uq_flags & UQF_UMTXQ) {
 1521                         umtxq_remove(uq);
 1522                 }
 1523         }
 1524         mtx_lock_spin(&umtx_lock);
 1525         uq->uq_pi_blocked = NULL;
 1526         thread_lock(td);
 1527         td->td_flags &= ~TDF_UPIBLOCKED;
 1528         thread_unlock(td);
 1529         TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1530         umtx_unpropagate_priority(pi);
 1531         mtx_unlock_spin(&umtx_lock);
 1532         umtxq_unlock(&uq->uq_key);
 1533 
 1534         return (error);
 1535 }
 1536 
 1537 /*
 1538  * Add reference count for a PI mutex.
 1539  */
 1540 static void
 1541 umtx_pi_ref(struct umtx_pi *pi)
 1542 {
 1543         struct umtxq_chain *uc;
 1544 
 1545         uc = umtxq_getchain(&pi->pi_key);
 1546         UMTXQ_LOCKED_ASSERT(uc);
 1547         pi->pi_refcount++;
 1548 }
 1549 
 1550 /*
 1551  * Decrease reference count for a PI mutex, if the counter
 1552  * is decreased to zero, its memory space is freed.
 1553  */ 
 1554 static void
 1555 umtx_pi_unref(struct umtx_pi *pi)
 1556 {
 1557         struct umtxq_chain *uc;
 1558 
 1559         uc = umtxq_getchain(&pi->pi_key);
 1560         UMTXQ_LOCKED_ASSERT(uc);
 1561         KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
 1562         if (--pi->pi_refcount == 0) {
 1563                 mtx_lock_spin(&umtx_lock);
 1564                 if (pi->pi_owner != NULL) {
 1565                         TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
 1566                                 pi, pi_link);
 1567                         pi->pi_owner = NULL;
 1568                 }
 1569                 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
 1570                         ("blocked queue not empty"));
 1571                 mtx_unlock_spin(&umtx_lock);
 1572                 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
 1573                 umtx_pi_free(pi);
 1574         }
 1575 }
 1576 
 1577 /*
 1578  * Find a PI mutex in hash table.
 1579  */
 1580 static struct umtx_pi *
 1581 umtx_pi_lookup(struct umtx_key *key)
 1582 {
 1583         struct umtxq_chain *uc;
 1584         struct umtx_pi *pi;
 1585 
 1586         uc = umtxq_getchain(key);
 1587         UMTXQ_LOCKED_ASSERT(uc);
 1588 
 1589         TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
 1590                 if (umtx_key_match(&pi->pi_key, key)) {
 1591                         return (pi);
 1592                 }
 1593         }
 1594         return (NULL);
 1595 }
 1596 
 1597 /*
 1598  * Insert a PI mutex into hash table.
 1599  */
 1600 static inline void
 1601 umtx_pi_insert(struct umtx_pi *pi)
 1602 {
 1603         struct umtxq_chain *uc;
 1604 
 1605         uc = umtxq_getchain(&pi->pi_key);
 1606         UMTXQ_LOCKED_ASSERT(uc);
 1607         TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
 1608 }
 1609 
 1610 /*
 1611  * Lock a PI mutex.
 1612  */
 1613 static int
 1614 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 1615         int try)
 1616 {
 1617         struct umtx_q *uq;
 1618         struct umtx_pi *pi, *new_pi;
 1619         uint32_t id, owner, old;
 1620         int error;
 1621 
 1622         id = td->td_tid;
 1623         uq = td->td_umtxq;
 1624 
 1625         if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 1626             &uq->uq_key)) != 0)
 1627                 return (error);
 1628         umtxq_lock(&uq->uq_key);
 1629         pi = umtx_pi_lookup(&uq->uq_key);
 1630         if (pi == NULL) {
 1631                 new_pi = umtx_pi_alloc(M_NOWAIT);
 1632                 if (new_pi == NULL) {
 1633                         umtxq_unlock(&uq->uq_key);
 1634                         new_pi = umtx_pi_alloc(M_WAITOK);
 1635                         umtxq_lock(&uq->uq_key);
 1636                         pi = umtx_pi_lookup(&uq->uq_key);
 1637                         if (pi != NULL) {
 1638                                 umtx_pi_free(new_pi);
 1639                                 new_pi = NULL;
 1640                         }
 1641                 }
 1642                 if (new_pi != NULL) {
 1643                         new_pi->pi_key = uq->uq_key;
 1644                         umtx_pi_insert(new_pi);
 1645                         pi = new_pi;
 1646                 }
 1647         }
 1648         umtx_pi_ref(pi);
 1649         umtxq_unlock(&uq->uq_key);
 1650 
 1651         /*
 1652          * Care must be exercised when dealing with umtx structure.  It
 1653          * can fault on any access.
 1654          */
 1655         for (;;) {
 1656                 /*
 1657                  * Try the uncontested case.  This should be done in userland.
 1658                  */
 1659                 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 1660 
 1661                 /* The acquire succeeded. */
 1662                 if (owner == UMUTEX_UNOWNED) {
 1663                         error = 0;
 1664                         break;
 1665                 }
 1666 
 1667                 /* The address was invalid. */
 1668                 if (owner == -1) {
 1669                         error = EFAULT;
 1670                         break;
 1671                 }
 1672 
 1673                 /* If no one owns it but it is contested try to acquire it. */
 1674                 if (owner == UMUTEX_CONTESTED) {
 1675                         owner = casuword32(&m->m_owner,
 1676                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1677 
 1678                         if (owner == UMUTEX_CONTESTED) {
 1679                                 umtxq_lock(&uq->uq_key);
 1680                                 umtxq_busy(&uq->uq_key);
 1681                                 error = umtx_pi_claim(pi, td);
 1682                                 umtxq_unbusy(&uq->uq_key);
 1683                                 umtxq_unlock(&uq->uq_key);
 1684                                 break;
 1685                         }
 1686 
 1687                         /* The address was invalid. */
 1688                         if (owner == -1) {
 1689                                 error = EFAULT;
 1690                                 break;
 1691                         }
 1692 
 1693                         /* If this failed the lock has changed, restart. */
 1694                         continue;
 1695                 }
 1696 
 1697                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 1698                     (owner & ~UMUTEX_CONTESTED) == id) {
 1699                         error = EDEADLK;
 1700                         break;
 1701                 }
 1702 
 1703                 if (try != 0) {
 1704                         error = EBUSY;
 1705                         break;
 1706                 }
 1707 
 1708                 /*
 1709                  * If we caught a signal, we have retried and now
 1710                  * exit immediately.
 1711                  */
 1712                 if (error != 0)
 1713                         break;
 1714                         
 1715                 umtxq_lock(&uq->uq_key);
 1716                 umtxq_busy(&uq->uq_key);
 1717                 umtxq_unlock(&uq->uq_key);
 1718 
 1719                 /*
 1720                  * Set the contested bit so that a release in user space
 1721                  * knows to use the system call for unlock.  If this fails
 1722                  * either some one else has acquired the lock or it has been
 1723                  * released.
 1724                  */
 1725                 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 1726 
 1727                 /* The address was invalid. */
 1728                 if (old == -1) {
 1729                         umtxq_lock(&uq->uq_key);
 1730                         umtxq_unbusy(&uq->uq_key);
 1731                         umtxq_unlock(&uq->uq_key);
 1732                         error = EFAULT;
 1733                         break;
 1734                 }
 1735 
 1736                 umtxq_lock(&uq->uq_key);
 1737                 /*
 1738                  * We set the contested bit, sleep. Otherwise the lock changed
 1739                  * and we need to retry or we lost a race to the thread
 1740                  * unlocking the umtx.
 1741                  */
 1742                 if (old == owner)
 1743                         error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
 1744                                  "umtxpi", timo);
 1745                 else {
 1746                         umtxq_unbusy(&uq->uq_key);
 1747                         umtxq_unlock(&uq->uq_key);
 1748                 }
 1749         }
 1750 
 1751         umtxq_lock(&uq->uq_key);
 1752         umtx_pi_unref(pi);
 1753         umtxq_unlock(&uq->uq_key);
 1754 
 1755         umtx_key_release(&uq->uq_key);
 1756         return (error);
 1757 }
 1758 
 1759 /*
 1760  * Unlock a PI mutex.
 1761  */
 1762 static int
 1763 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
 1764 {
 1765         struct umtx_key key;
 1766         struct umtx_q *uq_first, *uq_first2, *uq_me;
 1767         struct umtx_pi *pi, *pi2;
 1768         uint32_t owner, old, id;
 1769         int error;
 1770         int count;
 1771         int pri;
 1772 
 1773         id = td->td_tid;
 1774         /*
 1775          * Make sure we own this mtx.
 1776          */
 1777         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1778         if (owner == -1)
 1779                 return (EFAULT);
 1780 
 1781         if ((owner & ~UMUTEX_CONTESTED) != id)
 1782                 return (EPERM);
 1783 
 1784         /* This should be done in userland */
 1785         if ((owner & UMUTEX_CONTESTED) == 0) {
 1786                 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 1787                 if (old == -1)
 1788                         return (EFAULT);
 1789                 if (old == owner)
 1790                         return (0);
 1791                 owner = old;
 1792         }
 1793 
 1794         /* We should only ever be in here for contested locks */
 1795         if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 1796             &key)) != 0)
 1797                 return (error);
 1798 
 1799         umtxq_lock(&key);
 1800         umtxq_busy(&key);
 1801         count = umtxq_count_pi(&key, &uq_first);
 1802         if (uq_first != NULL) {
 1803                 mtx_lock_spin(&umtx_lock);
 1804                 pi = uq_first->uq_pi_blocked;
 1805                 KASSERT(pi != NULL, ("pi == NULL?"));
 1806                 if (pi->pi_owner != curthread) {
 1807                         mtx_unlock_spin(&umtx_lock);
 1808                         umtxq_unbusy(&key);
 1809                         umtxq_unlock(&key);
 1810                         umtx_key_release(&key);
 1811                         /* userland messed the mutex */
 1812                         return (EPERM);
 1813                 }
 1814                 uq_me = curthread->td_umtxq;
 1815                 pi->pi_owner = NULL;
 1816                 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
 1817                 /* get highest priority thread which is still sleeping. */
 1818                 uq_first = TAILQ_FIRST(&pi->pi_blocked);
 1819                 while (uq_first != NULL && 
 1820                        (uq_first->uq_flags & UQF_UMTXQ) == 0) {
 1821                         uq_first = TAILQ_NEXT(uq_first, uq_lockq);
 1822                 }
 1823                 pri = PRI_MAX;
 1824                 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
 1825                         uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
 1826                         if (uq_first2 != NULL) {
 1827                                 if (pri > UPRI(uq_first2->uq_thread))
 1828                                         pri = UPRI(uq_first2->uq_thread);
 1829                         }
 1830                 }
 1831                 thread_lock(curthread);
 1832                 sched_unlend_user_prio(curthread, pri);
 1833                 thread_unlock(curthread);
 1834                 mtx_unlock_spin(&umtx_lock);
 1835                 if (uq_first)
 1836                         umtxq_signal_thread(uq_first);
 1837         }
 1838         umtxq_unlock(&key);
 1839 
 1840         /*
 1841          * When unlocking the umtx, it must be marked as unowned if
 1842          * there is zero or one thread only waiting for it.
 1843          * Otherwise, it must be marked as contested.
 1844          */
 1845         old = casuword32(&m->m_owner, owner,
 1846                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1847 
 1848         umtxq_lock(&key);
 1849         umtxq_unbusy(&key);
 1850         umtxq_unlock(&key);
 1851         umtx_key_release(&key);
 1852         if (old == -1)
 1853                 return (EFAULT);
 1854         if (old != owner)
 1855                 return (EINVAL);
 1856         return (0);
 1857 }
 1858 
 1859 /*
 1860  * Lock a PP mutex.
 1861  */
 1862 static int
 1863 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 1864         int try)
 1865 {
 1866         struct umtx_q *uq, *uq2;
 1867         struct umtx_pi *pi;
 1868         uint32_t ceiling;
 1869         uint32_t owner, id;
 1870         int error, pri, old_inherited_pri, su;
 1871 
 1872         id = td->td_tid;
 1873         uq = td->td_umtxq;
 1874         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 1875             &uq->uq_key)) != 0)
 1876                 return (error);
 1877         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 1878         for (;;) {
 1879                 old_inherited_pri = uq->uq_inherited_pri;
 1880                 umtxq_lock(&uq->uq_key);
 1881                 umtxq_busy(&uq->uq_key);
 1882                 umtxq_unlock(&uq->uq_key);
 1883 
 1884                 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
 1885                 if (ceiling > RTP_PRIO_MAX) {
 1886                         error = EINVAL;
 1887                         goto out;
 1888                 }
 1889 
 1890                 mtx_lock_spin(&umtx_lock);
 1891                 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
 1892                         mtx_unlock_spin(&umtx_lock);
 1893                         error = EINVAL;
 1894                         goto out;
 1895                 }
 1896                 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
 1897                         uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
 1898                         thread_lock(td);
 1899                         if (uq->uq_inherited_pri < UPRI(td))
 1900                                 sched_lend_user_prio(td, uq->uq_inherited_pri);
 1901                         thread_unlock(td);
 1902                 }
 1903                 mtx_unlock_spin(&umtx_lock);
 1904 
 1905                 owner = casuword32(&m->m_owner,
 1906                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1907 
 1908                 if (owner == UMUTEX_CONTESTED) {
 1909                         error = 0;
 1910                         break;
 1911                 }
 1912 
 1913                 /* The address was invalid. */
 1914                 if (owner == -1) {
 1915                         error = EFAULT;
 1916                         break;
 1917                 }
 1918 
 1919                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 1920                     (owner & ~UMUTEX_CONTESTED) == id) {
 1921                         error = EDEADLK;
 1922                         break;
 1923                 }
 1924 
 1925                 if (try != 0) {
 1926                         error = EBUSY;
 1927                         break;
 1928                 }
 1929 
 1930                 /*
 1931                  * If we caught a signal, we have retried and now
 1932                  * exit immediately.
 1933                  */
 1934                 if (error != 0)
 1935                         break;
 1936 
 1937                 umtxq_lock(&uq->uq_key);
 1938                 umtxq_insert(uq);
 1939                 umtxq_unbusy(&uq->uq_key);
 1940                 error = umtxq_sleep(uq, "umtxpp", timo);
 1941                 umtxq_remove(uq);
 1942                 umtxq_unlock(&uq->uq_key);
 1943 
 1944                 mtx_lock_spin(&umtx_lock);
 1945                 uq->uq_inherited_pri = old_inherited_pri;
 1946                 pri = PRI_MAX;
 1947                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 1948                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 1949                         if (uq2 != NULL) {
 1950                                 if (pri > UPRI(uq2->uq_thread))
 1951                                         pri = UPRI(uq2->uq_thread);
 1952                         }
 1953                 }
 1954                 if (pri > uq->uq_inherited_pri)
 1955                         pri = uq->uq_inherited_pri;
 1956                 thread_lock(td);
 1957                 sched_unlend_user_prio(td, pri);
 1958                 thread_unlock(td);
 1959                 mtx_unlock_spin(&umtx_lock);
 1960         }
 1961 
 1962         if (error != 0) {
 1963                 mtx_lock_spin(&umtx_lock);
 1964                 uq->uq_inherited_pri = old_inherited_pri;
 1965                 pri = PRI_MAX;
 1966                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 1967                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 1968                         if (uq2 != NULL) {
 1969                                 if (pri > UPRI(uq2->uq_thread))
 1970                                         pri = UPRI(uq2->uq_thread);
 1971                         }
 1972                 }
 1973                 if (pri > uq->uq_inherited_pri)
 1974                         pri = uq->uq_inherited_pri;
 1975                 thread_lock(td);
 1976                 sched_unlend_user_prio(td, pri);
 1977                 thread_unlock(td);
 1978                 mtx_unlock_spin(&umtx_lock);
 1979         }
 1980 
 1981 out:
 1982         umtxq_lock(&uq->uq_key);
 1983         umtxq_unbusy(&uq->uq_key);
 1984         umtxq_unlock(&uq->uq_key);
 1985         umtx_key_release(&uq->uq_key);
 1986         return (error);
 1987 }
 1988 
 1989 /*
 1990  * Unlock a PP mutex.
 1991  */
 1992 static int
 1993 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
 1994 {
 1995         struct umtx_key key;
 1996         struct umtx_q *uq, *uq2;
 1997         struct umtx_pi *pi;
 1998         uint32_t owner, id;
 1999         uint32_t rceiling;
 2000         int error, pri, new_inherited_pri, su;
 2001 
 2002         id = td->td_tid;
 2003         uq = td->td_umtxq;
 2004         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2005 
 2006         /*
 2007          * Make sure we own this mtx.
 2008          */
 2009         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 2010         if (owner == -1)
 2011                 return (EFAULT);
 2012 
 2013         if ((owner & ~UMUTEX_CONTESTED) != id)
 2014                 return (EPERM);
 2015 
 2016         error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
 2017         if (error != 0)
 2018                 return (error);
 2019 
 2020         if (rceiling == -1)
 2021                 new_inherited_pri = PRI_MAX;
 2022         else {
 2023                 rceiling = RTP_PRIO_MAX - rceiling;
 2024                 if (rceiling > RTP_PRIO_MAX)
 2025                         return (EINVAL);
 2026                 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
 2027         }
 2028 
 2029         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2030             &key)) != 0)
 2031                 return (error);
 2032         umtxq_lock(&key);
 2033         umtxq_busy(&key);
 2034         umtxq_unlock(&key);
 2035         /*
 2036          * For priority protected mutex, always set unlocked state
 2037          * to UMUTEX_CONTESTED, so that userland always enters kernel
 2038          * to lock the mutex, it is necessary because thread priority
 2039          * has to be adjusted for such mutex.
 2040          */
 2041         error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 2042                 UMUTEX_CONTESTED);
 2043 
 2044         umtxq_lock(&key);
 2045         if (error == 0)
 2046                 umtxq_signal(&key, 1);
 2047         umtxq_unbusy(&key);
 2048         umtxq_unlock(&key);
 2049 
 2050         if (error == -1)
 2051                 error = EFAULT;
 2052         else {
 2053                 mtx_lock_spin(&umtx_lock);
 2054                 if (su != 0)
 2055                         uq->uq_inherited_pri = new_inherited_pri;
 2056                 pri = PRI_MAX;
 2057                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2058                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2059                         if (uq2 != NULL) {
 2060                                 if (pri > UPRI(uq2->uq_thread))
 2061                                         pri = UPRI(uq2->uq_thread);
 2062                         }
 2063                 }
 2064                 if (pri > uq->uq_inherited_pri)
 2065                         pri = uq->uq_inherited_pri;
 2066                 thread_lock(td);
 2067                 sched_unlend_user_prio(td, pri);
 2068                 thread_unlock(td);
 2069                 mtx_unlock_spin(&umtx_lock);
 2070         }
 2071         umtx_key_release(&key);
 2072         return (error);
 2073 }
 2074 
 2075 static int
 2076 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
 2077         uint32_t *old_ceiling)
 2078 {
 2079         struct umtx_q *uq;
 2080         uint32_t save_ceiling;
 2081         uint32_t owner, id;
 2082         uint32_t flags;
 2083         int error;
 2084 
 2085         flags = fuword32(&m->m_flags);
 2086         if ((flags & UMUTEX_PRIO_PROTECT) == 0)
 2087                 return (EINVAL);
 2088         if (ceiling > RTP_PRIO_MAX)
 2089                 return (EINVAL);
 2090         id = td->td_tid;
 2091         uq = td->td_umtxq;
 2092         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2093            &uq->uq_key)) != 0)
 2094                 return (error);
 2095         for (;;) {
 2096                 umtxq_lock(&uq->uq_key);
 2097                 umtxq_busy(&uq->uq_key);
 2098                 umtxq_unlock(&uq->uq_key);
 2099 
 2100                 save_ceiling = fuword32(&m->m_ceilings[0]);
 2101 
 2102                 owner = casuword32(&m->m_owner,
 2103                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 2104 
 2105                 if (owner == UMUTEX_CONTESTED) {
 2106                         suword32(&m->m_ceilings[0], ceiling);
 2107                         suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 2108                                 UMUTEX_CONTESTED);
 2109                         error = 0;
 2110                         break;
 2111                 }
 2112 
 2113                 /* The address was invalid. */
 2114                 if (owner == -1) {
 2115                         error = EFAULT;
 2116                         break;
 2117                 }
 2118 
 2119                 if ((owner & ~UMUTEX_CONTESTED) == id) {
 2120                         suword32(&m->m_ceilings[0], ceiling);
 2121                         error = 0;
 2122                         break;
 2123                 }
 2124 
 2125                 /*
 2126                  * If we caught a signal, we have retried and now
 2127                  * exit immediately.
 2128                  */
 2129                 if (error != 0)
 2130                         break;
 2131 
 2132                 /*
 2133                  * We set the contested bit, sleep. Otherwise the lock changed
 2134                  * and we need to retry or we lost a race to the thread
 2135                  * unlocking the umtx.
 2136                  */
 2137                 umtxq_lock(&uq->uq_key);
 2138                 umtxq_insert(uq);
 2139                 umtxq_unbusy(&uq->uq_key);
 2140                 error = umtxq_sleep(uq, "umtxpp", 0);
 2141                 umtxq_remove(uq);
 2142                 umtxq_unlock(&uq->uq_key);
 2143         }
 2144         umtxq_lock(&uq->uq_key);
 2145         if (error == 0)
 2146                 umtxq_signal(&uq->uq_key, INT_MAX);
 2147         umtxq_unbusy(&uq->uq_key);
 2148         umtxq_unlock(&uq->uq_key);
 2149         umtx_key_release(&uq->uq_key);
 2150         if (error == 0 && old_ceiling != NULL)
 2151                 suword32(old_ceiling, save_ceiling);
 2152         return (error);
 2153 }
 2154 
 2155 static int
 2156 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
 2157         int mode)
 2158 {
 2159         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2160         case 0:
 2161                 return (_do_lock_normal(td, m, flags, timo, mode));
 2162         case UMUTEX_PRIO_INHERIT:
 2163                 return (_do_lock_pi(td, m, flags, timo, mode));
 2164         case UMUTEX_PRIO_PROTECT:
 2165                 return (_do_lock_pp(td, m, flags, timo, mode));
 2166         }
 2167         return (EINVAL);
 2168 }
 2169 
 2170 /*
 2171  * Lock a userland POSIX mutex.
 2172  */
 2173 static int
 2174 do_lock_umutex(struct thread *td, struct umutex *m,
 2175         struct timespec *timeout, int mode)
 2176 {
 2177         struct timespec ts, ts2, ts3;
 2178         struct timeval tv;
 2179         uint32_t flags;
 2180         int error;
 2181 
 2182         flags = fuword32(&m->m_flags);
 2183         if (flags == -1)
 2184                 return (EFAULT);
 2185 
 2186         if (timeout == NULL) {
 2187                 error = _do_lock_umutex(td, m, flags, 0, mode);
 2188                 /* Mutex locking is restarted if it is interrupted. */
 2189                 if (error == EINTR && mode != _UMUTEX_WAIT)
 2190                         error = ERESTART;
 2191         } else {
 2192                 getnanouptime(&ts);
 2193                 timespecadd(&ts, timeout);
 2194                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2195                 for (;;) {
 2196                         error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
 2197                         if (error != ETIMEDOUT)
 2198                                 break;
 2199                         getnanouptime(&ts2);
 2200                         if (timespeccmp(&ts2, &ts, >=)) {
 2201                                 error = ETIMEDOUT;
 2202                                 break;
 2203                         }
 2204                         ts3 = ts;
 2205                         timespecsub(&ts3, &ts2);
 2206                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 2207                 }
 2208                 /* Timed-locking is not restarted. */
 2209                 if (error == ERESTART)
 2210                         error = EINTR;
 2211         }
 2212         return (error);
 2213 }
 2214 
 2215 /*
 2216  * Unlock a userland POSIX mutex.
 2217  */
 2218 static int
 2219 do_unlock_umutex(struct thread *td, struct umutex *m)
 2220 {
 2221         uint32_t flags;
 2222 
 2223         flags = fuword32(&m->m_flags);
 2224         if (flags == -1)
 2225                 return (EFAULT);
 2226 
 2227         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2228         case 0:
 2229                 return (do_unlock_normal(td, m, flags));
 2230         case UMUTEX_PRIO_INHERIT:
 2231                 return (do_unlock_pi(td, m, flags));
 2232         case UMUTEX_PRIO_PROTECT:
 2233                 return (do_unlock_pp(td, m, flags));
 2234         }
 2235 
 2236         return (EINVAL);
 2237 }
 2238 
 2239 static int
 2240 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
 2241         struct timespec *timeout, u_long wflags)
 2242 {
 2243         struct umtx_q *uq;
 2244         struct timeval tv;
 2245         struct timespec cts, ets, tts;
 2246         uint32_t flags;
 2247         int error;
 2248 
 2249         uq = td->td_umtxq;
 2250         flags = fuword32(&cv->c_flags);
 2251         error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
 2252         if (error != 0)
 2253                 return (error);
 2254         umtxq_lock(&uq->uq_key);
 2255         umtxq_busy(&uq->uq_key);
 2256         umtxq_insert(uq);
 2257         umtxq_unlock(&uq->uq_key);
 2258 
 2259         /*
 2260          * The magic thing is we should set c_has_waiters to 1 before
 2261          * releasing user mutex.
 2262          */
 2263         suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
 2264 
 2265         umtxq_lock(&uq->uq_key);
 2266         umtxq_unbusy(&uq->uq_key);
 2267         umtxq_unlock(&uq->uq_key);
 2268 
 2269         error = do_unlock_umutex(td, m);
 2270         
 2271         umtxq_lock(&uq->uq_key);
 2272         if (error == 0) {
 2273                 if ((wflags & UMTX_CHECK_UNPARKING) &&
 2274                     (td->td_pflags & TDP_WAKEUP)) {
 2275                         td->td_pflags &= ~TDP_WAKEUP;
 2276                         error = EINTR;
 2277                 } else if (timeout == NULL) {
 2278                         error = umtxq_sleep(uq, "ucond", 0);
 2279                 } else {
 2280                         getnanouptime(&ets);
 2281                         timespecadd(&ets, timeout);
 2282                         TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2283                         for (;;) {
 2284                                 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
 2285                                 if (error != ETIMEDOUT)
 2286                                         break;
 2287                                 getnanouptime(&cts);
 2288                                 if (timespeccmp(&cts, &ets, >=)) {
 2289                                         error = ETIMEDOUT;
 2290                                         break;
 2291                                 }
 2292                                 tts = ets;
 2293                                 timespecsub(&tts, &cts);
 2294                                 TIMESPEC_TO_TIMEVAL(&tv, &tts);
 2295                         }
 2296                 }
 2297         }
 2298 
 2299         if (error != 0) {
 2300                 if ((uq->uq_flags & UQF_UMTXQ) == 0) {
 2301                         /*
 2302                          * If we concurrently got do_cv_signal()d
 2303                          * and we got an error or UNIX signals or a timeout,
 2304                          * then, perform another umtxq_signal to avoid
 2305                          * consuming the wakeup. This may cause supurious
 2306                          * wakeup for another thread which was just queued,
 2307                          * but SUSV3 explicitly allows supurious wakeup to
 2308                          * occur, and indeed a kernel based implementation
 2309                          * can not avoid it.
 2310                          */ 
 2311                         if (!umtxq_signal(&uq->uq_key, 1))
 2312                                 error = 0;
 2313                 }
 2314                 if (error == ERESTART)
 2315                         error = EINTR;
 2316         }
 2317         umtxq_remove(uq);
 2318         umtxq_unlock(&uq->uq_key);
 2319         umtx_key_release(&uq->uq_key);
 2320         return (error);
 2321 }
 2322 
 2323 /*
 2324  * Signal a userland condition variable.
 2325  */
 2326 static int
 2327 do_cv_signal(struct thread *td, struct ucond *cv)
 2328 {
 2329         struct umtx_key key;
 2330         int error, cnt, nwake;
 2331         uint32_t flags;
 2332 
 2333         flags = fuword32(&cv->c_flags);
 2334         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2335                 return (error); 
 2336         umtxq_lock(&key);
 2337         umtxq_busy(&key);
 2338         cnt = umtxq_count(&key);
 2339         nwake = umtxq_signal(&key, 1);
 2340         if (cnt <= nwake) {
 2341                 umtxq_unlock(&key);
 2342                 error = suword32(
 2343                     __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 2344                 umtxq_lock(&key);
 2345         }
 2346         umtxq_unbusy(&key);
 2347         umtxq_unlock(&key);
 2348         umtx_key_release(&key);
 2349         return (error);
 2350 }
 2351 
 2352 static int
 2353 do_cv_broadcast(struct thread *td, struct ucond *cv)
 2354 {
 2355         struct umtx_key key;
 2356         int error;
 2357         uint32_t flags;
 2358 
 2359         flags = fuword32(&cv->c_flags);
 2360         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2361                 return (error); 
 2362 
 2363         umtxq_lock(&key);
 2364         umtxq_busy(&key);
 2365         umtxq_signal(&key, INT_MAX);
 2366         umtxq_unlock(&key);
 2367 
 2368         error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 2369 
 2370         umtxq_lock(&key);
 2371         umtxq_unbusy(&key);
 2372         umtxq_unlock(&key);
 2373 
 2374         umtx_key_release(&key);
 2375         return (error);
 2376 }
 2377 
 2378 static int
 2379 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
 2380 {
 2381         struct umtx_q *uq;
 2382         uint32_t flags, wrflags;
 2383         int32_t state, oldstate;
 2384         int32_t blocked_readers;
 2385         int error;
 2386 
 2387         uq = td->td_umtxq;
 2388         flags = fuword32(&rwlock->rw_flags);
 2389         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2390         if (error != 0)
 2391                 return (error);
 2392 
 2393         wrflags = URWLOCK_WRITE_OWNER;
 2394         if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
 2395                 wrflags |= URWLOCK_WRITE_WAITERS;
 2396 
 2397         for (;;) {
 2398                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2399                 /* try to lock it */
 2400                 while (!(state & wrflags)) {
 2401                         if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
 2402                                 umtx_key_release(&uq->uq_key);
 2403                                 return (EAGAIN);
 2404                         }
 2405                         oldstate = casuword32(&rwlock->rw_state, state, state + 1);
 2406                         if (oldstate == state) {
 2407                                 umtx_key_release(&uq->uq_key);
 2408                                 return (0);
 2409                         }
 2410                         state = oldstate;
 2411                 }
 2412 
 2413                 if (error)
 2414                         break;
 2415 
 2416                 /* grab monitor lock */
 2417                 umtxq_lock(&uq->uq_key);
 2418                 umtxq_busy(&uq->uq_key);
 2419                 umtxq_unlock(&uq->uq_key);
 2420 
 2421                 /*
 2422                  * re-read the state, in case it changed between the try-lock above
 2423                  * and the check below
 2424                  */
 2425                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2426 
 2427                 /* set read contention bit */
 2428                 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
 2429                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
 2430                         if (oldstate == state)
 2431                                 goto sleep;
 2432                         state = oldstate;
 2433                 }
 2434 
 2435                 /* state is changed while setting flags, restart */
 2436                 if (!(state & wrflags)) {
 2437                         umtxq_lock(&uq->uq_key);
 2438                         umtxq_unbusy(&uq->uq_key);
 2439                         umtxq_unlock(&uq->uq_key);
 2440                         continue;
 2441                 }
 2442 
 2443 sleep:
 2444                 /* contention bit is set, before sleeping, increase read waiter count */
 2445                 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2446                 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
 2447 
 2448                 while (state & wrflags) {
 2449                         umtxq_lock(&uq->uq_key);
 2450                         umtxq_insert(uq);
 2451                         umtxq_unbusy(&uq->uq_key);
 2452 
 2453                         error = umtxq_sleep(uq, "urdlck", timo);
 2454 
 2455                         umtxq_busy(&uq->uq_key);
 2456                         umtxq_remove(uq);
 2457                         umtxq_unlock(&uq->uq_key);
 2458                         if (error)
 2459                                 break;
 2460                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2461                 }
 2462 
 2463                 /* decrease read waiter count, and may clear read contention bit */
 2464                 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2465                 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
 2466                 if (blocked_readers == 1) {
 2467                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2468                         for (;;) {
 2469                                 oldstate = casuword32(&rwlock->rw_state, state,
 2470                                          state & ~URWLOCK_READ_WAITERS);
 2471                                 if (oldstate == state)
 2472                                         break;
 2473                                 state = oldstate;
 2474                         }
 2475                 }
 2476 
 2477                 umtxq_lock(&uq->uq_key);
 2478                 umtxq_unbusy(&uq->uq_key);
 2479                 umtxq_unlock(&uq->uq_key);
 2480         }
 2481         umtx_key_release(&uq->uq_key);
 2482         return (error);
 2483 }
 2484 
 2485 static int
 2486 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
 2487 {
 2488         struct timespec ts, ts2, ts3;
 2489         struct timeval tv;
 2490         int error;
 2491 
 2492         getnanouptime(&ts);
 2493         timespecadd(&ts, timeout);
 2494         TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2495         for (;;) {
 2496                 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
 2497                 if (error != ETIMEDOUT)
 2498                         break;
 2499                 getnanouptime(&ts2);
 2500                 if (timespeccmp(&ts2, &ts, >=)) {
 2501                         error = ETIMEDOUT;
 2502                         break;
 2503                 }
 2504                 ts3 = ts;
 2505                 timespecsub(&ts3, &ts2);
 2506                 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 2507         }
 2508         if (error == ERESTART)
 2509                 error = EINTR;
 2510         return (error);
 2511 }
 2512 
 2513 static int
 2514 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
 2515 {
 2516         struct umtx_q *uq;
 2517         uint32_t flags;
 2518         int32_t state, oldstate;
 2519         int32_t blocked_writers;
 2520         int32_t blocked_readers;
 2521         int error;
 2522 
 2523         uq = td->td_umtxq;
 2524         flags = fuword32(&rwlock->rw_flags);
 2525         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2526         if (error != 0)
 2527                 return (error);
 2528 
 2529         blocked_readers = 0;
 2530         for (;;) {
 2531                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2532                 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 2533                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
 2534                         if (oldstate == state) {
 2535                                 umtx_key_release(&uq->uq_key);
 2536                                 return (0);
 2537                         }
 2538                         state = oldstate;
 2539                 }
 2540 
 2541                 if (error) {
 2542                         if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
 2543                             blocked_readers != 0) {
 2544                                 umtxq_lock(&uq->uq_key);
 2545                                 umtxq_busy(&uq->uq_key);
 2546                                 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
 2547                                 umtxq_unbusy(&uq->uq_key);
 2548                                 umtxq_unlock(&uq->uq_key);
 2549                         }
 2550 
 2551                         break;
 2552                 }
 2553 
 2554                 /* grab monitor lock */
 2555                 umtxq_lock(&uq->uq_key);
 2556                 umtxq_busy(&uq->uq_key);
 2557                 umtxq_unlock(&uq->uq_key);
 2558 
 2559                 /*
 2560                  * re-read the state, in case it changed between the try-lock above
 2561                  * and the check below
 2562                  */
 2563                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2564 
 2565                 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
 2566                        (state & URWLOCK_WRITE_WAITERS) == 0) {
 2567                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
 2568                         if (oldstate == state)
 2569                                 goto sleep;
 2570                         state = oldstate;
 2571                 }
 2572 
 2573                 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 2574                         umtxq_lock(&uq->uq_key);
 2575                         umtxq_unbusy(&uq->uq_key);
 2576                         umtxq_unlock(&uq->uq_key);
 2577                         continue;
 2578                 }
 2579 sleep:
 2580                 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 2581                 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
 2582 
 2583                 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
 2584                         umtxq_lock(&uq->uq_key);
 2585                         umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 2586                         umtxq_unbusy(&uq->uq_key);
 2587 
 2588                         error = umtxq_sleep(uq, "uwrlck", timo);
 2589 
 2590                         umtxq_busy(&uq->uq_key);
 2591                         umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 2592                         umtxq_unlock(&uq->uq_key);
 2593                         if (error)
 2594                                 break;
 2595                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2596                 }
 2597 
 2598                 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 2599                 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
 2600                 if (blocked_writers == 1) {
 2601                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2602                         for (;;) {
 2603                                 oldstate = casuword32(&rwlock->rw_state, state,
 2604                                          state & ~URWLOCK_WRITE_WAITERS);
 2605                                 if (oldstate == state)
 2606                                         break;
 2607                                 state = oldstate;
 2608                         }
 2609                         blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2610                 } else
 2611                         blocked_readers = 0;
 2612 
 2613                 umtxq_lock(&uq->uq_key);
 2614                 umtxq_unbusy(&uq->uq_key);
 2615                 umtxq_unlock(&uq->uq_key);
 2616         }
 2617 
 2618         umtx_key_release(&uq->uq_key);
 2619         return (error);
 2620 }
 2621 
 2622 static int
 2623 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
 2624 {
 2625         struct timespec ts, ts2, ts3;
 2626         struct timeval tv;
 2627         int error;
 2628 
 2629         getnanouptime(&ts);
 2630         timespecadd(&ts, timeout);
 2631         TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2632         for (;;) {
 2633                 error = do_rw_wrlock(td, obj, tvtohz(&tv));
 2634                 if (error != ETIMEDOUT)
 2635                         break;
 2636                 getnanouptime(&ts2);
 2637                 if (timespeccmp(&ts2, &ts, >=)) {
 2638                         error = ETIMEDOUT;
 2639                         break;
 2640                 }
 2641                 ts3 = ts;
 2642                 timespecsub(&ts3, &ts2);
 2643                 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 2644         }
 2645         if (error == ERESTART)
 2646                 error = EINTR;
 2647         return (error);
 2648 }
 2649 
 2650 static int
 2651 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
 2652 {
 2653         struct umtx_q *uq;
 2654         uint32_t flags;
 2655         int32_t state, oldstate;
 2656         int error, q, count;
 2657 
 2658         uq = td->td_umtxq;
 2659         flags = fuword32(&rwlock->rw_flags);
 2660         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2661         if (error != 0)
 2662                 return (error);
 2663 
 2664         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2665         if (state & URWLOCK_WRITE_OWNER) {
 2666                 for (;;) {
 2667                         oldstate = casuword32(&rwlock->rw_state, state, 
 2668                                 state & ~URWLOCK_WRITE_OWNER);
 2669                         if (oldstate != state) {
 2670                                 state = oldstate;
 2671                                 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
 2672                                         error = EPERM;
 2673                                         goto out;
 2674                                 }
 2675                         } else
 2676                                 break;
 2677                 }
 2678         } else if (URWLOCK_READER_COUNT(state) != 0) {
 2679                 for (;;) {
 2680                         oldstate = casuword32(&rwlock->rw_state, state,
 2681                                 state - 1);
 2682                         if (oldstate != state) {
 2683                                 state = oldstate;
 2684                                 if (URWLOCK_READER_COUNT(oldstate) == 0) {
 2685                                         error = EPERM;
 2686                                         goto out;
 2687                                 }
 2688                         }
 2689                         else
 2690                                 break;
 2691                 }
 2692         } else {
 2693                 error = EPERM;
 2694                 goto out;
 2695         }
 2696 
 2697         count = 0;
 2698 
 2699         if (!(flags & URWLOCK_PREFER_READER)) {
 2700                 if (state & URWLOCK_WRITE_WAITERS) {
 2701                         count = 1;
 2702                         q = UMTX_EXCLUSIVE_QUEUE;
 2703                 } else if (state & URWLOCK_READ_WAITERS) {
 2704                         count = INT_MAX;
 2705                         q = UMTX_SHARED_QUEUE;
 2706                 }
 2707         } else {
 2708                 if (state & URWLOCK_READ_WAITERS) {
 2709                         count = INT_MAX;
 2710                         q = UMTX_SHARED_QUEUE;
 2711                 } else if (state & URWLOCK_WRITE_WAITERS) {
 2712                         count = 1;
 2713                         q = UMTX_EXCLUSIVE_QUEUE;
 2714                 }
 2715         }
 2716 
 2717         if (count) {
 2718                 umtxq_lock(&uq->uq_key);
 2719                 umtxq_busy(&uq->uq_key);
 2720                 umtxq_signal_queue(&uq->uq_key, count, q);
 2721                 umtxq_unbusy(&uq->uq_key);
 2722                 umtxq_unlock(&uq->uq_key);
 2723         }
 2724 out:
 2725         umtx_key_release(&uq->uq_key);
 2726         return (error);
 2727 }
 2728 
 2729 int
 2730 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
 2731     /* struct umtx *umtx */
 2732 {
 2733         return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
 2734 }
 2735 
 2736 int
 2737 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
 2738     /* struct umtx *umtx */
 2739 {
 2740         return do_unlock_umtx(td, uap->umtx, td->td_tid);
 2741 }
 2742 
 2743 inline int
 2744 umtx_copyin_timeout(const void *addr, struct timespec *tsp)
 2745 {
 2746         int error;
 2747 
 2748         error = copyin(addr, tsp, sizeof(struct timespec));
 2749         if (error == 0) {
 2750                 if (tsp->tv_sec < 0 ||
 2751                     tsp->tv_nsec >= 1000000000 ||
 2752                     tsp->tv_nsec < 0)
 2753                         error = EINVAL;
 2754         }
 2755         return (error);
 2756 }
 2757 
 2758 static int
 2759 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
 2760 {
 2761         struct timespec *ts, timeout;
 2762         int error;
 2763 
 2764         /* Allow a null timespec (wait forever). */
 2765         if (uap->uaddr2 == NULL)
 2766                 ts = NULL;
 2767         else {
 2768                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2769                 if (error != 0)
 2770                         return (error);
 2771                 ts = &timeout;
 2772         }
 2773         return (do_lock_umtx(td, uap->obj, uap->val, ts));
 2774 }
 2775 
 2776 static int
 2777 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
 2778 {
 2779         return (do_unlock_umtx(td, uap->obj, uap->val));
 2780 }
 2781 
 2782 static int
 2783 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
 2784 {
 2785         struct timespec *ts, timeout;
 2786         int error;
 2787 
 2788         if (uap->uaddr2 == NULL)
 2789                 ts = NULL;
 2790         else {
 2791                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2792                 if (error != 0)
 2793                         return (error);
 2794                 ts = &timeout;
 2795         }
 2796         return do_wait(td, uap->obj, uap->val, ts, 0, 0);
 2797 }
 2798 
 2799 static int
 2800 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
 2801 {
 2802         struct timespec *ts, timeout;
 2803         int error;
 2804 
 2805         if (uap->uaddr2 == NULL)
 2806                 ts = NULL;
 2807         else {
 2808                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2809                 if (error != 0)
 2810                         return (error);
 2811                 ts = &timeout;
 2812         }
 2813         return do_wait(td, uap->obj, uap->val, ts, 1, 0);
 2814 }
 2815 
 2816 static int
 2817 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
 2818 {
 2819         struct timespec *ts, timeout;
 2820         int error;
 2821 
 2822         if (uap->uaddr2 == NULL)
 2823                 ts = NULL;
 2824         else {
 2825                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2826                 if (error != 0)
 2827                         return (error);
 2828                 ts = &timeout;
 2829         }
 2830         return do_wait(td, uap->obj, uap->val, ts, 1, 1);
 2831 }
 2832 
 2833 static int
 2834 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
 2835 {
 2836         return (kern_umtx_wake(td, uap->obj, uap->val, 0));
 2837 }
 2838 
 2839 static int
 2840 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
 2841 {
 2842         return (kern_umtx_wake(td, uap->obj, uap->val, 1));
 2843 }
 2844 
 2845 static int
 2846 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
 2847 {
 2848         struct timespec *ts, timeout;
 2849         int error;
 2850 
 2851         /* Allow a null timespec (wait forever). */
 2852         if (uap->uaddr2 == NULL)
 2853                 ts = NULL;
 2854         else {
 2855                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2856                 if (error != 0)
 2857                         return (error);
 2858                 ts = &timeout;
 2859         }
 2860         return do_lock_umutex(td, uap->obj, ts, 0);
 2861 }
 2862 
 2863 static int
 2864 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
 2865 {
 2866         return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
 2867 }
 2868 
 2869 static int
 2870 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
 2871 {
 2872         struct timespec *ts, timeout;
 2873         int error;
 2874 
 2875         /* Allow a null timespec (wait forever). */
 2876         if (uap->uaddr2 == NULL)
 2877                 ts = NULL;
 2878         else {
 2879                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2880                 if (error != 0)
 2881                         return (error);
 2882                 ts = &timeout;
 2883         }
 2884         return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
 2885 }
 2886 
 2887 static int
 2888 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
 2889 {
 2890         return do_wake_umutex(td, uap->obj);
 2891 }
 2892 
 2893 static int
 2894 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
 2895 {
 2896         return do_unlock_umutex(td, uap->obj);
 2897 }
 2898 
 2899 static int
 2900 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
 2901 {
 2902         return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
 2903 }
 2904 
 2905 static int
 2906 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
 2907 {
 2908         struct timespec *ts, timeout;
 2909         int error;
 2910 
 2911         /* Allow a null timespec (wait forever). */
 2912         if (uap->uaddr2 == NULL)
 2913                 ts = NULL;
 2914         else {
 2915                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2916                 if (error != 0)
 2917                         return (error);
 2918                 ts = &timeout;
 2919         }
 2920         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 2921 }
 2922 
 2923 static int
 2924 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
 2925 {
 2926         return do_cv_signal(td, uap->obj);
 2927 }
 2928 
 2929 static int
 2930 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
 2931 {
 2932         return do_cv_broadcast(td, uap->obj);
 2933 }
 2934 
 2935 static int
 2936 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
 2937 {
 2938         struct timespec timeout;
 2939         int error;
 2940 
 2941         /* Allow a null timespec (wait forever). */
 2942         if (uap->uaddr2 == NULL) {
 2943                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 2944         } else {
 2945                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2946                 if (error != 0)
 2947                         return (error);
 2948                 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
 2949         }
 2950         return (error);
 2951 }
 2952 
 2953 static int
 2954 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
 2955 {
 2956         struct timespec timeout;
 2957         int error;
 2958 
 2959         /* Allow a null timespec (wait forever). */
 2960         if (uap->uaddr2 == NULL) {
 2961                 error = do_rw_wrlock(td, uap->obj, 0);
 2962         } else {
 2963                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2964                 if (error != 0)
 2965                         return (error);
 2966 
 2967                 error = do_rw_wrlock2(td, uap->obj, &timeout);
 2968         }
 2969         return (error);
 2970 }
 2971 
 2972 static int
 2973 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
 2974 {
 2975         return do_rw_unlock(td, uap->obj);
 2976 }
 2977 
 2978 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
 2979 
 2980 static _umtx_op_func op_table[] = {
 2981         __umtx_op_lock_umtx,            /* UMTX_OP_LOCK */
 2982         __umtx_op_unlock_umtx,          /* UMTX_OP_UNLOCK */
 2983         __umtx_op_wait,                 /* UMTX_OP_WAIT */
 2984         __umtx_op_wake,                 /* UMTX_OP_WAKE */
 2985         __umtx_op_trylock_umutex,       /* UMTX_OP_MUTEX_TRYLOCK */
 2986         __umtx_op_lock_umutex,          /* UMTX_OP_MUTEX_LOCK */
 2987         __umtx_op_unlock_umutex,        /* UMTX_OP_MUTEX_UNLOCK */
 2988         __umtx_op_set_ceiling,          /* UMTX_OP_SET_CEILING */
 2989         __umtx_op_cv_wait,              /* UMTX_OP_CV_WAIT*/
 2990         __umtx_op_cv_signal,            /* UMTX_OP_CV_SIGNAL */
 2991         __umtx_op_cv_broadcast,         /* UMTX_OP_CV_BROADCAST */
 2992         __umtx_op_wait_uint,            /* UMTX_OP_WAIT_UINT */
 2993         __umtx_op_rw_rdlock,            /* UMTX_OP_RW_RDLOCK */
 2994         __umtx_op_rw_wrlock,            /* UMTX_OP_RW_WRLOCK */
 2995         __umtx_op_rw_unlock,            /* UMTX_OP_RW_UNLOCK */
 2996         __umtx_op_wait_uint_private,    /* UMTX_OP_WAIT_UINT_PRIVATE */
 2997         __umtx_op_wake_private,         /* UMTX_OP_WAKE_PRIVATE */
 2998         __umtx_op_wait_umutex,          /* UMTX_OP_UMUTEX_WAIT */
 2999         __umtx_op_wake_umutex           /* UMTX_OP_UMUTEX_WAKE */
 3000 };
 3001 
 3002 int
 3003 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
 3004 {
 3005         if ((unsigned)uap->op < UMTX_OP_MAX)
 3006                 return (*op_table[uap->op])(td, uap);
 3007         return (EINVAL);
 3008 }
 3009 
 3010 #ifdef COMPAT_FREEBSD32
 3011 int
 3012 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
 3013     /* struct umtx *umtx */
 3014 {
 3015         return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
 3016 }
 3017 
 3018 int
 3019 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
 3020     /* struct umtx *umtx */
 3021 {
 3022         return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
 3023 }
 3024 
 3025 struct timespec32 {
 3026         u_int32_t tv_sec;
 3027         u_int32_t tv_nsec;
 3028 };
 3029 
 3030 static inline int
 3031 umtx_copyin_timeout32(void *addr, struct timespec *tsp)
 3032 {
 3033         struct timespec32 ts32;
 3034         int error;
 3035 
 3036         error = copyin(addr, &ts32, sizeof(struct timespec32));
 3037         if (error == 0) {
 3038                 if (ts32.tv_sec < 0 ||
 3039                     ts32.tv_nsec >= 1000000000 ||
 3040                     ts32.tv_nsec < 0)
 3041                         error = EINVAL;
 3042                 else {
 3043                         tsp->tv_sec = ts32.tv_sec;
 3044                         tsp->tv_nsec = ts32.tv_nsec;
 3045                 }
 3046         }
 3047         return (error);
 3048 }
 3049 
 3050 static int
 3051 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 3052 {
 3053         struct timespec *ts, timeout;
 3054         int error;
 3055 
 3056         /* Allow a null timespec (wait forever). */
 3057         if (uap->uaddr2 == NULL)
 3058                 ts = NULL;
 3059         else {
 3060                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3061                 if (error != 0)
 3062                         return (error);
 3063                 ts = &timeout;
 3064         }
 3065         return (do_lock_umtx32(td, uap->obj, uap->val, ts));
 3066 }
 3067 
 3068 static int
 3069 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 3070 {
 3071         return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
 3072 }
 3073 
 3074 static int
 3075 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3076 {
 3077         struct timespec *ts, timeout;
 3078         int error;
 3079 
 3080         if (uap->uaddr2 == NULL)
 3081                 ts = NULL;
 3082         else {
 3083                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3084                 if (error != 0)
 3085                         return (error);
 3086                 ts = &timeout;
 3087         }
 3088         return do_wait(td, uap->obj, uap->val, ts, 1, 0);
 3089 }
 3090 
 3091 static int
 3092 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 3093 {
 3094         struct timespec *ts, timeout;
 3095         int error;
 3096 
 3097         /* Allow a null timespec (wait forever). */
 3098         if (uap->uaddr2 == NULL)
 3099                 ts = NULL;
 3100         else {
 3101                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3102                 if (error != 0)
 3103                         return (error);
 3104                 ts = &timeout;
 3105         }
 3106         return do_lock_umutex(td, uap->obj, ts, 0);
 3107 }
 3108 
 3109 static int
 3110 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 3111 {
 3112         struct timespec *ts, timeout;
 3113         int error;
 3114 
 3115         /* Allow a null timespec (wait forever). */
 3116         if (uap->uaddr2 == NULL)
 3117                 ts = NULL;
 3118         else {
 3119                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3120                 if (error != 0)
 3121                         return (error);
 3122                 ts = &timeout;
 3123         }
 3124         return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
 3125 }
 3126 
 3127 static int
 3128 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3129 {
 3130         struct timespec *ts, timeout;
 3131         int error;
 3132 
 3133         /* Allow a null timespec (wait forever). */
 3134         if (uap->uaddr2 == NULL)
 3135                 ts = NULL;
 3136         else {
 3137                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3138                 if (error != 0)
 3139                         return (error);
 3140                 ts = &timeout;
 3141         }
 3142         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 3143 }
 3144 
 3145 static int
 3146 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 3147 {
 3148         struct timespec timeout;
 3149         int error;
 3150 
 3151         /* Allow a null timespec (wait forever). */
 3152         if (uap->uaddr2 == NULL) {
 3153                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 3154         } else {
 3155                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3156                 if (error != 0)
 3157                         return (error);
 3158                 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
 3159         }
 3160         return (error);
 3161 }
 3162 
 3163 static int
 3164 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 3165 {
 3166         struct timespec timeout;
 3167         int error;
 3168 
 3169         /* Allow a null timespec (wait forever). */
 3170         if (uap->uaddr2 == NULL) {
 3171                 error = do_rw_wrlock(td, uap->obj, 0);
 3172         } else {
 3173                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3174                 if (error != 0)
 3175                         return (error);
 3176 
 3177                 error = do_rw_wrlock2(td, uap->obj, &timeout);
 3178         }
 3179         return (error);
 3180 }
 3181 
 3182 static int
 3183 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
 3184 {
 3185         struct timespec *ts, timeout;
 3186         int error;
 3187 
 3188         if (uap->uaddr2 == NULL)
 3189                 ts = NULL;
 3190         else {
 3191                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3192                 if (error != 0)
 3193                         return (error);
 3194                 ts = &timeout;
 3195         }
 3196         return do_wait(td, uap->obj, uap->val, ts, 1, 1);
 3197 }
 3198 
 3199 static _umtx_op_func op_table_compat32[] = {
 3200         __umtx_op_lock_umtx_compat32,   /* UMTX_OP_LOCK */
 3201         __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
 3202         __umtx_op_wait_compat32,        /* UMTX_OP_WAIT */
 3203         __umtx_op_wake,                 /* UMTX_OP_WAKE */
 3204         __umtx_op_trylock_umutex,       /* UMTX_OP_MUTEX_LOCK */
 3205         __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
 3206         __umtx_op_unlock_umutex,        /* UMTX_OP_MUTEX_UNLOCK */
 3207         __umtx_op_set_ceiling,          /* UMTX_OP_SET_CEILING */
 3208         __umtx_op_cv_wait_compat32,     /* UMTX_OP_CV_WAIT*/
 3209         __umtx_op_cv_signal,            /* UMTX_OP_CV_SIGNAL */
 3210         __umtx_op_cv_broadcast,         /* UMTX_OP_CV_BROADCAST */
 3211         __umtx_op_wait_compat32,        /* UMTX_OP_WAIT_UINT */
 3212         __umtx_op_rw_rdlock_compat32,   /* UMTX_OP_RW_RDLOCK */
 3213         __umtx_op_rw_wrlock_compat32,   /* UMTX_OP_RW_WRLOCK */
 3214         __umtx_op_rw_unlock,            /* UMTX_OP_RW_UNLOCK */
 3215         __umtx_op_wait_uint_private_compat32,   /* UMTX_OP_WAIT_UINT_PRIVATE */
 3216         __umtx_op_wake_private,         /* UMTX_OP_WAKE_PRIVATE */
 3217         __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
 3218         __umtx_op_wake_umutex           /* UMTX_OP_UMUTEX_WAKE */
 3219 };
 3220 
 3221 int
 3222 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
 3223 {
 3224         if ((unsigned)uap->op < UMTX_OP_MAX)
 3225                 return (*op_table_compat32[uap->op])(td,
 3226                         (struct _umtx_op_args *)uap);
 3227         return (EINVAL);
 3228 }
 3229 #endif
 3230 
 3231 void
 3232 umtx_thread_init(struct thread *td)
 3233 {
 3234         td->td_umtxq = umtxq_alloc();
 3235         td->td_umtxq->uq_thread = td;
 3236 }
 3237 
 3238 void
 3239 umtx_thread_fini(struct thread *td)
 3240 {
 3241         umtxq_free(td->td_umtxq);
 3242 }
 3243 
 3244 /*
 3245  * It will be called when new thread is created, e.g fork().
 3246  */
 3247 void
 3248 umtx_thread_alloc(struct thread *td)
 3249 {
 3250         struct umtx_q *uq;
 3251 
 3252         uq = td->td_umtxq;
 3253         uq->uq_inherited_pri = PRI_MAX;
 3254 
 3255         KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
 3256         KASSERT(uq->uq_thread == td, ("uq_thread != td"));
 3257         KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
 3258         KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
 3259 }
 3260 
 3261 /*
 3262  * exec() hook.
 3263  */
 3264 static void
 3265 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
 3266         struct image_params *imgp __unused)
 3267 {
 3268         umtx_thread_cleanup(curthread);
 3269 }
 3270 
 3271 /*
 3272  * thread_exit() hook.
 3273  */
 3274 void
 3275 umtx_thread_exit(struct thread *td)
 3276 {
 3277         umtx_thread_cleanup(td);
 3278 }
 3279 
 3280 /*
 3281  * clean up umtx data.
 3282  */
 3283 static void
 3284 umtx_thread_cleanup(struct thread *td)
 3285 {
 3286         struct umtx_q *uq;
 3287         struct umtx_pi *pi;
 3288 
 3289         if ((uq = td->td_umtxq) == NULL)
 3290                 return;
 3291 
 3292         mtx_lock_spin(&umtx_lock);
 3293         uq->uq_inherited_pri = PRI_MAX;
 3294         while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
 3295                 pi->pi_owner = NULL;
 3296                 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
 3297         }
 3298         thread_lock(td);
 3299         td->td_flags &= ~TDF_UBORROWING;
 3300         thread_unlock(td);
 3301         mtx_unlock_spin(&umtx_lock);
 3302 }

Cache object: 7cfceea34d54a60b93c6f24b09ee21e4


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.