The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
    3  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice unmodified, this list of conditions, and the following
   11  *    disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/7.3/sys/kern/kern_umtx.c 203694 2010-02-09 01:19:10Z davidxu $");
   30 
   31 #include "opt_compat.h"
   32 #include <sys/param.h>
   33 #include <sys/kernel.h>
   34 #include <sys/limits.h>
   35 #include <sys/lock.h>
   36 #include <sys/malloc.h>
   37 #include <sys/mutex.h>
   38 #include <sys/priv.h>
   39 #include <sys/proc.h>
   40 #include <sys/sched.h>
   41 #include <sys/smp.h>
   42 #include <sys/sysctl.h>
   43 #include <sys/sysent.h>
   44 #include <sys/systm.h>
   45 #include <sys/sysproto.h>
   46 #include <sys/eventhandler.h>
   47 #include <sys/umtx.h>
   48 
   49 #include <vm/vm.h>
   50 #include <vm/vm_param.h>
   51 #include <vm/pmap.h>
   52 #include <vm/vm_map.h>
   53 #include <vm/vm_object.h>
   54 
   55 #include <machine/cpu.h>
   56 
   57 #ifdef COMPAT_IA32
   58 #include <compat/freebsd32/freebsd32_proto.h>
   59 #endif
   60 
   61 #define TYPE_SIMPLE_WAIT        0
   62 #define TYPE_CV                 1
   63 #define TYPE_SIMPLE_LOCK        2
   64 #define TYPE_NORMAL_UMUTEX      3
   65 #define TYPE_PI_UMUTEX          4
   66 #define TYPE_PP_UMUTEX          5
   67 #define TYPE_RWLOCK             6
   68 
   69 #define _UMUTEX_TRY             1
   70 #define _UMUTEX_WAIT            2
   71 
   72 /* Key to represent a unique userland synchronous object */
   73 struct umtx_key {
   74         int     hash;
   75         int     type;
   76         int     shared;
   77         union {
   78                 struct {
   79                         vm_object_t     object;
   80                         uintptr_t       offset;
   81                 } shared;
   82                 struct {
   83                         struct vmspace  *vs;
   84                         uintptr_t       addr;
   85                 } private;
   86                 struct {
   87                         void            *a;
   88                         uintptr_t       b;
   89                 } both;
   90         } info;
   91 };
   92 
   93 /* Priority inheritance mutex info. */
   94 struct umtx_pi {
   95         /* Owner thread */
   96         struct thread           *pi_owner;
   97 
   98         /* Reference count */
   99         int                     pi_refcount;
  100 
  101         /* List entry to link umtx holding by thread */
  102         TAILQ_ENTRY(umtx_pi)    pi_link;
  103 
  104         /* List entry in hash */
  105         TAILQ_ENTRY(umtx_pi)    pi_hashlink;
  106 
  107         /* List for waiters */
  108         TAILQ_HEAD(,umtx_q)     pi_blocked;
  109 
  110         /* Identify a userland lock object */
  111         struct umtx_key         pi_key;
  112 };
  113 
  114 /* A userland synchronous object user. */
  115 struct umtx_q {
  116         /* Linked list for the hash. */
  117         TAILQ_ENTRY(umtx_q)     uq_link;
  118 
  119         /* Umtx key. */
  120         struct umtx_key         uq_key;
  121 
  122         /* Umtx flags. */
  123         int                     uq_flags;
  124 #define UQF_UMTXQ       0x0001
  125 
  126         /* The thread waits on. */
  127         struct thread           *uq_thread;
  128 
  129         /*
  130          * Blocked on PI mutex. read can use chain lock
  131          * or umtx_lock, write must have both chain lock and
  132          * umtx_lock being hold.
  133          */
  134         struct umtx_pi          *uq_pi_blocked;
  135 
  136         /* On blocked list */
  137         TAILQ_ENTRY(umtx_q)     uq_lockq;
  138 
  139         /* Thread contending with us */
  140         TAILQ_HEAD(,umtx_pi)    uq_pi_contested;
  141 
  142         /* Inherited priority from PP mutex */
  143         u_char                  uq_inherited_pri;
  144 };
  145 
  146 TAILQ_HEAD(umtxq_head, umtx_q);
  147 
  148 /* Userland lock object's wait-queue chain */
  149 struct umtxq_chain {
  150         /* Lock for this chain. */
  151         struct mtx              uc_lock;
  152 
  153         /* List of sleep queues. */
  154         struct umtxq_head       uc_queue[2];
  155 #define UMTX_SHARED_QUEUE       0
  156 #define UMTX_EXCLUSIVE_QUEUE    1
  157 
  158         /* Busy flag */
  159         char                    uc_busy;
  160 
  161         /* Chain lock waiters */
  162         int                     uc_waiters;
  163 
  164         /* All PI in the list */
  165         TAILQ_HEAD(,umtx_pi)    uc_pi_list;
  166 };
  167 
  168 #define UMTXQ_LOCKED_ASSERT(uc)         mtx_assert(&(uc)->uc_lock, MA_OWNED)
  169 #define UMTXQ_BUSY_ASSERT(uc)   KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
  170 
  171 /*
  172  * Don't propagate time-sharing priority, there is a security reason,
  173  * a user can simply introduce PI-mutex, let thread A lock the mutex,
  174  * and let another thread B block on the mutex, because B is
  175  * sleeping, its priority will be boosted, this causes A's priority to
  176  * be boosted via priority propagating too and will never be lowered even
  177  * if it is using 100%CPU, this is unfair to other processes.
  178  */
  179 
  180 #define UPRI(td)        (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
  181                           (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
  182                          PRI_MAX_TIMESHARE : (td)->td_user_pri)
  183 
  184 #define GOLDEN_RATIO_PRIME      2654404609U
  185 #define UMTX_CHAINS             128
  186 #define UMTX_SHIFTS             (__WORD_BIT - 7)
  187 
  188 #define THREAD_SHARE            0
  189 #define PROCESS_SHARE           1
  190 #define AUTO_SHARE              2
  191 
  192 #define GET_SHARE(flags)        \
  193     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
  194 
  195 #define BUSY_SPINS              200
  196 
  197 static uma_zone_t               umtx_pi_zone;
  198 static struct umtxq_chain       umtxq_chains[2][UMTX_CHAINS];
  199 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
  200 static int                      umtx_pi_allocated;
  201 
  202 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
  203 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
  204     &umtx_pi_allocated, 0, "Allocated umtx_pi");
  205 
  206 static void umtxq_sysinit(void *);
  207 static void umtxq_hash(struct umtx_key *key);
  208 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
  209 static void umtxq_lock(struct umtx_key *key);
  210 static void umtxq_unlock(struct umtx_key *key);
  211 static void umtxq_busy(struct umtx_key *key);
  212 static void umtxq_unbusy(struct umtx_key *key);
  213 static void umtxq_insert_queue(struct umtx_q *uq, int q);
  214 static void umtxq_remove_queue(struct umtx_q *uq, int q);
  215 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
  216 static int umtxq_count(struct umtx_key *key);
  217 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
  218 static int umtx_key_get(void *addr, int type, int share,
  219         struct umtx_key *key);
  220 static void umtx_key_release(struct umtx_key *key);
  221 static struct umtx_pi *umtx_pi_alloc(int);
  222 static void umtx_pi_free(struct umtx_pi *pi);
  223 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
  224 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
  225 static void umtx_thread_cleanup(struct thread *td);
  226 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
  227         struct image_params *imgp __unused);
  228 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
  229 
  230 #define umtxq_signal(key, nwake)        umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
  231 #define umtxq_insert(uq)        umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
  232 #define umtxq_remove(uq)        umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
  233 
  234 static struct mtx umtx_lock;
  235 
  236 static void
  237 umtxq_sysinit(void *arg __unused)
  238 {
  239         int i, j;
  240 
  241         umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
  242                 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  243         for (i = 0; i < 2; ++i) {
  244                 for (j = 0; j < UMTX_CHAINS; ++j) {
  245                         mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
  246                                  MTX_DEF | MTX_DUPOK);
  247                         TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]);
  248                         TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]);
  249                         TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
  250                         umtxq_chains[i][j].uc_busy = 0;
  251                         umtxq_chains[i][j].uc_waiters = 0;
  252                 }
  253         }
  254         mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
  255         EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
  256             EVENTHANDLER_PRI_ANY);
  257 }
  258 
  259 struct umtx_q *
  260 umtxq_alloc(void)
  261 {
  262         struct umtx_q *uq;
  263 
  264         uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
  265         TAILQ_INIT(&uq->uq_pi_contested);
  266         uq->uq_inherited_pri = PRI_MAX;
  267         return (uq);
  268 }
  269 
  270 void
  271 umtxq_free(struct umtx_q *uq)
  272 {
  273         free(uq, M_UMTX);
  274 }
  275 
  276 static inline void
  277 umtxq_hash(struct umtx_key *key)
  278 {
  279         unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
  280         key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
  281 }
  282 
  283 static inline int
  284 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
  285 {
  286         return (k1->type == k2->type &&
  287                 k1->info.both.a == k2->info.both.a &&
  288                 k1->info.both.b == k2->info.both.b);
  289 }
  290 
  291 static inline struct umtxq_chain *
  292 umtxq_getchain(struct umtx_key *key)
  293 {
  294         if (key->type <= TYPE_CV)
  295                 return (&umtxq_chains[1][key->hash]);
  296         return (&umtxq_chains[0][key->hash]);
  297 }
  298 
  299 /*
  300  * Lock a chain.
  301  */
  302 static inline void
  303 umtxq_lock(struct umtx_key *key)
  304 {
  305         struct umtxq_chain *uc;
  306 
  307         uc = umtxq_getchain(key);
  308         mtx_lock(&uc->uc_lock);
  309 }
  310 
  311 /*
  312  * Unlock a chain.
  313  */
  314 static inline void
  315 umtxq_unlock(struct umtx_key *key)
  316 {
  317         struct umtxq_chain *uc;
  318 
  319         uc = umtxq_getchain(key);
  320         mtx_unlock(&uc->uc_lock);
  321 }
  322 
  323 /*
  324  * Set chain to busy state when following operation
  325  * may be blocked (kernel mutex can not be used).
  326  */
  327 static inline void
  328 umtxq_busy(struct umtx_key *key)
  329 {
  330         struct umtxq_chain *uc;
  331 
  332         uc = umtxq_getchain(key);
  333         mtx_assert(&uc->uc_lock, MA_OWNED);
  334         if (uc->uc_busy) {
  335 #ifdef SMP
  336                 if (smp_cpus > 1) {
  337                         int count = BUSY_SPINS;
  338                         if (count > 0) {
  339                                 umtxq_unlock(key);
  340                                 while (uc->uc_busy && --count > 0)
  341                                         cpu_spinwait();
  342                                 umtxq_lock(key);
  343                         }
  344                 }
  345 #endif
  346                 while (uc->uc_busy) {
  347                         uc->uc_waiters++;
  348                         msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
  349                         uc->uc_waiters--;
  350                 }
  351         }
  352         uc->uc_busy = 1;
  353 }
  354 
  355 /*
  356  * Unbusy a chain.
  357  */
  358 static inline void
  359 umtxq_unbusy(struct umtx_key *key)
  360 {
  361         struct umtxq_chain *uc;
  362 
  363         uc = umtxq_getchain(key);
  364         mtx_assert(&uc->uc_lock, MA_OWNED);
  365         KASSERT(uc->uc_busy != 0, ("not busy"));
  366         uc->uc_busy = 0;
  367         if (uc->uc_waiters)
  368                 wakeup_one(uc);
  369 }
  370 
  371 static inline void
  372 umtxq_insert_queue(struct umtx_q *uq, int q)
  373 {
  374         struct umtxq_chain *uc;
  375 
  376         uc = umtxq_getchain(&uq->uq_key);
  377         UMTXQ_LOCKED_ASSERT(uc);
  378         TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link);
  379         uq->uq_flags |= UQF_UMTXQ;
  380 }
  381 
  382 static inline void
  383 umtxq_remove_queue(struct umtx_q *uq, int q)
  384 {
  385         struct umtxq_chain *uc;
  386 
  387         uc = umtxq_getchain(&uq->uq_key);
  388         UMTXQ_LOCKED_ASSERT(uc);
  389         if (uq->uq_flags & UQF_UMTXQ) {
  390                 TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link);
  391                 uq->uq_flags &= ~UQF_UMTXQ;
  392         }
  393 }
  394 
  395 /*
  396  * Check if there are multiple waiters
  397  */
  398 static int
  399 umtxq_count(struct umtx_key *key)
  400 {
  401         struct umtxq_chain *uc;
  402         struct umtx_q *uq;
  403         int count = 0;
  404 
  405         uc = umtxq_getchain(key);
  406         UMTXQ_LOCKED_ASSERT(uc);
  407         TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
  408                 if (umtx_key_match(&uq->uq_key, key)) {
  409                         if (++count > 1)
  410                                 break;
  411                 }
  412         }
  413         return (count);
  414 }
  415 
  416 /*
  417  * Check if there are multiple PI waiters and returns first
  418  * waiter.
  419  */
  420 static int
  421 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
  422 {
  423         struct umtxq_chain *uc;
  424         struct umtx_q *uq;
  425         int count = 0;
  426 
  427         *first = NULL;
  428         uc = umtxq_getchain(key);
  429         UMTXQ_LOCKED_ASSERT(uc);
  430         TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
  431                 if (umtx_key_match(&uq->uq_key, key)) {
  432                         if (++count > 1)
  433                                 break;
  434                         *first = uq;
  435                 }
  436         }
  437         return (count);
  438 }
  439 
  440 /*
  441  * Wake up threads waiting on an userland object.
  442  */
  443 
  444 static int
  445 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
  446 {
  447         struct umtxq_chain *uc;
  448         struct umtx_q *uq, *next;
  449         int ret;
  450 
  451         ret = 0;
  452         uc = umtxq_getchain(key);
  453         UMTXQ_LOCKED_ASSERT(uc);
  454         TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) {
  455                 if (umtx_key_match(&uq->uq_key, key)) {
  456                         umtxq_remove_queue(uq, q);
  457                         wakeup(uq);
  458                         if (++ret >= n_wake)
  459                                 break;
  460                 }
  461         }
  462         return (ret);
  463 }
  464 
  465 
  466 /*
  467  * Wake up specified thread.
  468  */
  469 static inline void
  470 umtxq_signal_thread(struct umtx_q *uq)
  471 {
  472         struct umtxq_chain *uc;
  473 
  474         uc = umtxq_getchain(&uq->uq_key);
  475         UMTXQ_LOCKED_ASSERT(uc);
  476         umtxq_remove(uq);
  477         wakeup(uq);
  478 }
  479 
  480 /*
  481  * Put thread into sleep state, before sleeping, check if
  482  * thread was removed from umtx queue.
  483  */
  484 static inline int
  485 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
  486 {
  487         struct umtxq_chain *uc;
  488         int error;
  489 
  490         uc = umtxq_getchain(&uq->uq_key);
  491         UMTXQ_LOCKED_ASSERT(uc);
  492         if (!(uq->uq_flags & UQF_UMTXQ))
  493                 return (0);
  494         error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
  495         if (error == EWOULDBLOCK)
  496                 error = ETIMEDOUT;
  497         return (error);
  498 }
  499 
  500 /*
  501  * Convert userspace address into unique logical address.
  502  */
  503 static int
  504 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
  505 {
  506         struct thread *td = curthread;
  507         vm_map_t map;
  508         vm_map_entry_t entry;
  509         vm_pindex_t pindex;
  510         vm_prot_t prot;
  511         boolean_t wired;
  512 
  513         key->type = type;
  514         if (share == THREAD_SHARE) {
  515                 key->shared = 0;
  516                 key->info.private.vs = td->td_proc->p_vmspace;
  517                 key->info.private.addr = (uintptr_t)addr;
  518         } else {
  519                 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
  520                 map = &td->td_proc->p_vmspace->vm_map;
  521                 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
  522                     &entry, &key->info.shared.object, &pindex, &prot,
  523                     &wired) != KERN_SUCCESS) {
  524                         return EFAULT;
  525                 }
  526 
  527                 if ((share == PROCESS_SHARE) ||
  528                     (share == AUTO_SHARE &&
  529                      VM_INHERIT_SHARE == entry->inheritance)) {
  530                         key->shared = 1;
  531                         key->info.shared.offset = entry->offset + entry->start -
  532                                 (vm_offset_t)addr;
  533                         vm_object_reference(key->info.shared.object);
  534                 } else {
  535                         key->shared = 0;
  536                         key->info.private.vs = td->td_proc->p_vmspace;
  537                         key->info.private.addr = (uintptr_t)addr;
  538                 }
  539                 vm_map_lookup_done(map, entry);
  540         }
  541 
  542         umtxq_hash(key);
  543         return (0);
  544 }
  545 
  546 /*
  547  * Release key.
  548  */
  549 static inline void
  550 umtx_key_release(struct umtx_key *key)
  551 {
  552         if (key->shared)
  553                 vm_object_deallocate(key->info.shared.object);
  554 }
  555 
  556 /*
  557  * Lock a umtx object.
  558  */
  559 static int
  560 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
  561 {
  562         struct umtx_q *uq;
  563         u_long owner;
  564         u_long old;
  565         int error = 0;
  566 
  567         uq = td->td_umtxq;
  568 
  569         /*
  570          * Care must be exercised when dealing with umtx structure. It
  571          * can fault on any access.
  572          */
  573         for (;;) {
  574                 /*
  575                  * Try the uncontested case.  This should be done in userland.
  576                  */
  577                 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
  578 
  579                 /* The acquire succeeded. */
  580                 if (owner == UMTX_UNOWNED)
  581                         return (0);
  582 
  583                 /* The address was invalid. */
  584                 if (owner == -1)
  585                         return (EFAULT);
  586 
  587                 /* If no one owns it but it is contested try to acquire it. */
  588                 if (owner == UMTX_CONTESTED) {
  589                         owner = casuword(&umtx->u_owner,
  590                             UMTX_CONTESTED, id | UMTX_CONTESTED);
  591 
  592                         if (owner == UMTX_CONTESTED)
  593                                 return (0);
  594 
  595                         /* The address was invalid. */
  596                         if (owner == -1)
  597                                 return (EFAULT);
  598 
  599                         /* If this failed the lock has changed, restart. */
  600                         continue;
  601                 }
  602 
  603                 /*
  604                  * If we caught a signal, we have retried and now
  605                  * exit immediately.
  606                  */
  607                 if (error != 0)
  608                         return (error);
  609 
  610                 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
  611                         AUTO_SHARE, &uq->uq_key)) != 0)
  612                         return (error);
  613 
  614                 umtxq_lock(&uq->uq_key);
  615                 umtxq_busy(&uq->uq_key);
  616                 umtxq_insert(uq);
  617                 umtxq_unbusy(&uq->uq_key);
  618                 umtxq_unlock(&uq->uq_key);
  619 
  620                 /*
  621                  * Set the contested bit so that a release in user space
  622                  * knows to use the system call for unlock.  If this fails
  623                  * either some one else has acquired the lock or it has been
  624                  * released.
  625                  */
  626                 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
  627 
  628                 /* The address was invalid. */
  629                 if (old == -1) {
  630                         umtxq_lock(&uq->uq_key);
  631                         umtxq_remove(uq);
  632                         umtxq_unlock(&uq->uq_key);
  633                         umtx_key_release(&uq->uq_key);
  634                         return (EFAULT);
  635                 }
  636 
  637                 /*
  638                  * We set the contested bit, sleep. Otherwise the lock changed
  639                  * and we need to retry or we lost a race to the thread
  640                  * unlocking the umtx.
  641                  */
  642                 umtxq_lock(&uq->uq_key);
  643                 if (old == owner)
  644                         error = umtxq_sleep(uq, "umtx", timo);
  645                 umtxq_remove(uq);
  646                 umtxq_unlock(&uq->uq_key);
  647                 umtx_key_release(&uq->uq_key);
  648         }
  649 
  650         return (0);
  651 }
  652 
  653 /*
  654  * Lock a umtx object.
  655  */
  656 static int
  657 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
  658         struct timespec *timeout)
  659 {
  660         struct timespec ts, ts2, ts3;
  661         struct timeval tv;
  662         int error;
  663 
  664         if (timeout == NULL) {
  665                 error = _do_lock_umtx(td, umtx, id, 0);
  666                 /* Mutex locking is restarted if it is interrupted. */
  667                 if (error == EINTR)
  668                         error = ERESTART;
  669         } else {
  670                 getnanouptime(&ts);
  671                 timespecadd(&ts, timeout);
  672                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
  673                 for (;;) {
  674                         error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
  675                         if (error != ETIMEDOUT)
  676                                 break;
  677                         getnanouptime(&ts2);
  678                         if (timespeccmp(&ts2, &ts, >=)) {
  679                                 error = ETIMEDOUT;
  680                                 break;
  681                         }
  682                         ts3 = ts;
  683                         timespecsub(&ts3, &ts2);
  684                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
  685                 }
  686                 /* Timed-locking is not restarted. */
  687                 if (error == ERESTART)
  688                         error = EINTR;
  689         }
  690         return (error);
  691 }
  692 
  693 /*
  694  * Unlock a umtx object.
  695  */
  696 static int
  697 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
  698 {
  699         struct umtx_key key;
  700         u_long owner;
  701         u_long old;
  702         int error;
  703         int count;
  704 
  705         /*
  706          * Make sure we own this mtx.
  707          */
  708         owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
  709         if (owner == -1)
  710                 return (EFAULT);
  711 
  712         if ((owner & ~UMTX_CONTESTED) != id)
  713                 return (EPERM);
  714 
  715         /* This should be done in userland */
  716         if ((owner & UMTX_CONTESTED) == 0) {
  717                 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
  718                 if (old == -1)
  719                         return (EFAULT);
  720                 if (old == owner)
  721                         return (0);
  722                 owner = old;
  723         }
  724 
  725         /* We should only ever be in here for contested locks */
  726         if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
  727                 &key)) != 0)
  728                 return (error);
  729 
  730         umtxq_lock(&key);
  731         umtxq_busy(&key);
  732         count = umtxq_count(&key);
  733         umtxq_unlock(&key);
  734 
  735         /*
  736          * When unlocking the umtx, it must be marked as unowned if
  737          * there is zero or one thread only waiting for it.
  738          * Otherwise, it must be marked as contested.
  739          */
  740         old = casuword(&umtx->u_owner, owner,
  741                 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
  742         umtxq_lock(&key);
  743         umtxq_signal(&key,1);
  744         umtxq_unbusy(&key);
  745         umtxq_unlock(&key);
  746         umtx_key_release(&key);
  747         if (old == -1)
  748                 return (EFAULT);
  749         if (old != owner)
  750                 return (EINVAL);
  751         return (0);
  752 }
  753 
  754 #ifdef COMPAT_IA32
  755 
  756 /*
  757  * Lock a umtx object.
  758  */
  759 static int
  760 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
  761 {
  762         struct umtx_q *uq;
  763         uint32_t owner;
  764         uint32_t old;
  765         int error = 0;
  766 
  767         uq = td->td_umtxq;
  768 
  769         /*
  770          * Care must be exercised when dealing with umtx structure. It
  771          * can fault on any access.
  772          */
  773         for (;;) {
  774                 /*
  775                  * Try the uncontested case.  This should be done in userland.
  776                  */
  777                 owner = casuword32(m, UMUTEX_UNOWNED, id);
  778 
  779                 /* The acquire succeeded. */
  780                 if (owner == UMUTEX_UNOWNED)
  781                         return (0);
  782 
  783                 /* The address was invalid. */
  784                 if (owner == -1)
  785                         return (EFAULT);
  786 
  787                 /* If no one owns it but it is contested try to acquire it. */
  788                 if (owner == UMUTEX_CONTESTED) {
  789                         owner = casuword32(m,
  790                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
  791                         if (owner == UMUTEX_CONTESTED)
  792                                 return (0);
  793 
  794                         /* The address was invalid. */
  795                         if (owner == -1)
  796                                 return (EFAULT);
  797 
  798                         /* If this failed the lock has changed, restart. */
  799                         continue;
  800                 }
  801 
  802                 /*
  803                  * If we caught a signal, we have retried and now
  804                  * exit immediately.
  805                  */
  806                 if (error != 0)
  807                         return (error);
  808 
  809                 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
  810                         AUTO_SHARE, &uq->uq_key)) != 0)
  811                         return (error);
  812 
  813                 umtxq_lock(&uq->uq_key);
  814                 umtxq_busy(&uq->uq_key);
  815                 umtxq_insert(uq);
  816                 umtxq_unbusy(&uq->uq_key);
  817                 umtxq_unlock(&uq->uq_key);
  818 
  819                 /*
  820                  * Set the contested bit so that a release in user space
  821                  * knows to use the system call for unlock.  If this fails
  822                  * either some one else has acquired the lock or it has been
  823                  * released.
  824                  */
  825                 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
  826 
  827                 /* The address was invalid. */
  828                 if (old == -1) {
  829                         umtxq_lock(&uq->uq_key);
  830                         umtxq_remove(uq);
  831                         umtxq_unlock(&uq->uq_key);
  832                         umtx_key_release(&uq->uq_key);
  833                         return (EFAULT);
  834                 }
  835 
  836                 /*
  837                  * We set the contested bit, sleep. Otherwise the lock changed
  838                  * and we need to retry or we lost a race to the thread
  839                  * unlocking the umtx.
  840                  */
  841                 umtxq_lock(&uq->uq_key);
  842                 if (old == owner)
  843                         error = umtxq_sleep(uq, "umtx", timo);
  844                 umtxq_remove(uq);
  845                 umtxq_unlock(&uq->uq_key);
  846                 umtx_key_release(&uq->uq_key);
  847         }
  848 
  849         return (0);
  850 }
  851 
  852 /*
  853  * Lock a umtx object.
  854  */
  855 static int
  856 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
  857         struct timespec *timeout)
  858 {
  859         struct timespec ts, ts2, ts3;
  860         struct timeval tv;
  861         int error;
  862 
  863         if (timeout == NULL) {
  864                 error = _do_lock_umtx32(td, m, id, 0);
  865                 /* Mutex locking is restarted if it is interrupted. */
  866                 if (error == EINTR)
  867                         error = ERESTART;
  868         } else {
  869                 getnanouptime(&ts);
  870                 timespecadd(&ts, timeout);
  871                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
  872                 for (;;) {
  873                         error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
  874                         if (error != ETIMEDOUT)
  875                                 break;
  876                         getnanouptime(&ts2);
  877                         if (timespeccmp(&ts2, &ts, >=)) {
  878                                 error = ETIMEDOUT;
  879                                 break;
  880                         }
  881                         ts3 = ts;
  882                         timespecsub(&ts3, &ts2);
  883                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
  884                 }
  885                 /* Timed-locking is not restarted. */
  886                 if (error == ERESTART)
  887                         error = EINTR;
  888         }
  889         return (error);
  890 }
  891 
  892 /*
  893  * Unlock a umtx object.
  894  */
  895 static int
  896 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
  897 {
  898         struct umtx_key key;
  899         uint32_t owner;
  900         uint32_t old;
  901         int error;
  902         int count;
  903 
  904         /*
  905          * Make sure we own this mtx.
  906          */
  907         owner = fuword32(m);
  908         if (owner == -1)
  909                 return (EFAULT);
  910 
  911         if ((owner & ~UMUTEX_CONTESTED) != id)
  912                 return (EPERM);
  913 
  914         /* This should be done in userland */
  915         if ((owner & UMUTEX_CONTESTED) == 0) {
  916                 old = casuword32(m, owner, UMUTEX_UNOWNED);
  917                 if (old == -1)
  918                         return (EFAULT);
  919                 if (old == owner)
  920                         return (0);
  921                 owner = old;
  922         }
  923 
  924         /* We should only ever be in here for contested locks */
  925         if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
  926                 &key)) != 0)
  927                 return (error);
  928 
  929         umtxq_lock(&key);
  930         umtxq_busy(&key);
  931         count = umtxq_count(&key);
  932         umtxq_unlock(&key);
  933 
  934         /*
  935          * When unlocking the umtx, it must be marked as unowned if
  936          * there is zero or one thread only waiting for it.
  937          * Otherwise, it must be marked as contested.
  938          */
  939         old = casuword32(m, owner,
  940                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
  941         umtxq_lock(&key);
  942         umtxq_signal(&key,1);
  943         umtxq_unbusy(&key);
  944         umtxq_unlock(&key);
  945         umtx_key_release(&key);
  946         if (old == -1)
  947                 return (EFAULT);
  948         if (old != owner)
  949                 return (EINVAL);
  950         return (0);
  951 }
  952 #endif
  953 
  954 /*
  955  * Fetch and compare value, sleep on the address if value is not changed.
  956  */
  957 static int
  958 do_wait(struct thread *td, void *addr, u_long id,
  959         struct timespec *timeout, int compat32, int is_private)
  960 {
  961         struct umtx_q *uq;
  962         struct timespec ts, ts2, ts3;
  963         struct timeval tv;
  964         u_long tmp;
  965         int error = 0;
  966 
  967         uq = td->td_umtxq;
  968         if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
  969                 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
  970                 return (error);
  971 
  972         umtxq_lock(&uq->uq_key);
  973         umtxq_insert(uq);
  974         umtxq_unlock(&uq->uq_key);
  975         if (compat32 == 0)
  976                 tmp = fuword(addr);
  977         else
  978                 tmp = (unsigned int)fuword32(addr);
  979         if (tmp != id) {
  980                 umtxq_lock(&uq->uq_key);
  981                 umtxq_remove(uq);
  982                 umtxq_unlock(&uq->uq_key);
  983         } else if (timeout == NULL) {
  984                 umtxq_lock(&uq->uq_key);
  985                 error = umtxq_sleep(uq, "uwait", 0);
  986                 umtxq_remove(uq);
  987                 umtxq_unlock(&uq->uq_key);
  988         } else {
  989                 getnanouptime(&ts);
  990                 timespecadd(&ts, timeout);
  991                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
  992                 umtxq_lock(&uq->uq_key);
  993                 for (;;) {
  994                         error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
  995                         if (!(uq->uq_flags & UQF_UMTXQ))
  996                                 break;
  997                         if (error != ETIMEDOUT)
  998                                 break;
  999                         umtxq_unlock(&uq->uq_key);
 1000                         getnanouptime(&ts2);
 1001                         if (timespeccmp(&ts2, &ts, >=)) {
 1002                                 error = ETIMEDOUT;
 1003                                 umtxq_lock(&uq->uq_key);
 1004                                 break;
 1005                         }
 1006                         ts3 = ts;
 1007                         timespecsub(&ts3, &ts2);
 1008                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 1009                         umtxq_lock(&uq->uq_key);
 1010                 }
 1011                 umtxq_remove(uq);
 1012                 umtxq_unlock(&uq->uq_key);
 1013         }
 1014         umtx_key_release(&uq->uq_key);
 1015         if (error == ERESTART)
 1016                 error = EINTR;
 1017         return (error);
 1018 }
 1019 
 1020 /*
 1021  * Wake up threads sleeping on the specified address.
 1022  */
 1023 int
 1024 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
 1025 {
 1026         struct umtx_key key;
 1027         int ret;
 1028         
 1029         if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
 1030                 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
 1031                 return (ret);
 1032         umtxq_lock(&key);
 1033         ret = umtxq_signal(&key, n_wake);
 1034         umtxq_unlock(&key);
 1035         umtx_key_release(&key);
 1036         return (0);
 1037 }
 1038 
 1039 /*
 1040  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1041  */
 1042 static int
 1043 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 1044         int mode)
 1045 {
 1046         struct umtx_q *uq;
 1047         uint32_t owner, old, id;
 1048         int error = 0;
 1049 
 1050         id = td->td_tid;
 1051         uq = td->td_umtxq;
 1052 
 1053         /*
 1054          * Care must be exercised when dealing with umtx structure. It
 1055          * can fault on any access.
 1056          */
 1057         for (;;) {
 1058                 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
 1059                 if (mode == _UMUTEX_WAIT) {
 1060                         if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
 1061                                 return (0);
 1062                 } else {
 1063                         /*
 1064                          * Try the uncontested case.  This should be done in userland.
 1065                          */
 1066                         owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 1067 
 1068                         /* The acquire succeeded. */
 1069                         if (owner == UMUTEX_UNOWNED)
 1070                                 return (0);
 1071 
 1072                         /* The address was invalid. */
 1073                         if (owner == -1)
 1074                                 return (EFAULT);
 1075 
 1076                         /* If no one owns it but it is contested try to acquire it. */
 1077                         if (owner == UMUTEX_CONTESTED) {
 1078                                 owner = casuword32(&m->m_owner,
 1079                                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1080 
 1081                                 if (owner == UMUTEX_CONTESTED)
 1082                                         return (0);
 1083 
 1084                                 /* The address was invalid. */
 1085                                 if (owner == -1)
 1086                                         return (EFAULT);
 1087 
 1088                                 /* If this failed the lock has changed, restart. */
 1089                                 continue;
 1090                         }
 1091                 }
 1092 
 1093                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 1094                     (owner & ~UMUTEX_CONTESTED) == id)
 1095                         return (EDEADLK);
 1096 
 1097                 if (mode == _UMUTEX_TRY)
 1098                         return (EBUSY);
 1099 
 1100                 /*
 1101                  * If we caught a signal, we have retried and now
 1102                  * exit immediately.
 1103                  */
 1104                 if (error != 0)
 1105                         return (error);
 1106 
 1107                 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
 1108                     GET_SHARE(flags), &uq->uq_key)) != 0)
 1109                         return (error);
 1110 
 1111                 umtxq_lock(&uq->uq_key);
 1112                 umtxq_busy(&uq->uq_key);
 1113                 umtxq_insert(uq);
 1114                 umtxq_unlock(&uq->uq_key);
 1115 
 1116                 /*
 1117                  * Set the contested bit so that a release in user space
 1118                  * knows to use the system call for unlock.  If this fails
 1119                  * either some one else has acquired the lock or it has been
 1120                  * released.
 1121                  */
 1122                 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 1123 
 1124                 /* The address was invalid. */
 1125                 if (old == -1) {
 1126                         umtxq_lock(&uq->uq_key);
 1127                         umtxq_remove(uq);
 1128                         umtxq_unbusy(&uq->uq_key);
 1129                         umtxq_unlock(&uq->uq_key);
 1130                         umtx_key_release(&uq->uq_key);
 1131                         return (EFAULT);
 1132                 }
 1133 
 1134                 /*
 1135                  * We set the contested bit, sleep. Otherwise the lock changed
 1136                  * and we need to retry or we lost a race to the thread
 1137                  * unlocking the umtx.
 1138                  */
 1139                 umtxq_lock(&uq->uq_key);
 1140                 umtxq_unbusy(&uq->uq_key);
 1141                 if (old == owner)
 1142                         error = umtxq_sleep(uq, "umtxn", timo);
 1143                 umtxq_remove(uq);
 1144                 umtxq_unlock(&uq->uq_key);
 1145                 umtx_key_release(&uq->uq_key);
 1146         }
 1147 
 1148         return (0);
 1149 }
 1150 
 1151 /*
 1152  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1153  */
 1154 /*
 1155  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1156  */
 1157 static int
 1158 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
 1159 {
 1160         struct umtx_key key;
 1161         uint32_t owner, old, id;
 1162         int error;
 1163         int count;
 1164 
 1165         id = td->td_tid;
 1166         /*
 1167          * Make sure we own this mtx.
 1168          */
 1169         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1170         if (owner == -1)
 1171                 return (EFAULT);
 1172 
 1173         if ((owner & ~UMUTEX_CONTESTED) != id)
 1174                 return (EPERM);
 1175 
 1176         if ((owner & UMUTEX_CONTESTED) == 0) {
 1177                 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 1178                 if (old == -1)
 1179                         return (EFAULT);
 1180                 if (old == owner)
 1181                         return (0);
 1182                 owner = old;
 1183         }
 1184 
 1185         /* We should only ever be in here for contested locks */
 1186         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1187             &key)) != 0)
 1188                 return (error);
 1189 
 1190         umtxq_lock(&key);
 1191         umtxq_busy(&key);
 1192         count = umtxq_count(&key);
 1193         umtxq_unlock(&key);
 1194 
 1195         /*
 1196          * When unlocking the umtx, it must be marked as unowned if
 1197          * there is zero or one thread only waiting for it.
 1198          * Otherwise, it must be marked as contested.
 1199          */
 1200         old = casuword32(&m->m_owner, owner,
 1201                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1202         umtxq_lock(&key);
 1203         umtxq_signal(&key,1);
 1204         umtxq_unbusy(&key);
 1205         umtxq_unlock(&key);
 1206         umtx_key_release(&key);
 1207         if (old == -1)
 1208                 return (EFAULT);
 1209         if (old != owner)
 1210                 return (EINVAL);
 1211         return (0);
 1212 }
 1213 
 1214 /*
 1215  * Check if the mutex is available and wake up a waiter,
 1216  * only for simple mutex.
 1217  */
 1218 static int
 1219 do_wake_umutex(struct thread *td, struct umutex *m)
 1220 {
 1221         struct umtx_key key;
 1222         uint32_t owner;
 1223         uint32_t flags;
 1224         int error;
 1225         int count;
 1226 
 1227         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1228         if (owner == -1)
 1229                 return (EFAULT);
 1230 
 1231         if ((owner & ~UMUTEX_CONTESTED) != 0)
 1232                 return (0);
 1233 
 1234         flags = fuword32(&m->m_flags);
 1235 
 1236         /* We should only ever be in here for contested locks */
 1237         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1238             &key)) != 0)
 1239                 return (error);
 1240 
 1241         umtxq_lock(&key);
 1242         umtxq_busy(&key);
 1243         count = umtxq_count(&key);
 1244         umtxq_unlock(&key);
 1245 
 1246         if (count <= 1)
 1247                 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
 1248 
 1249         umtxq_lock(&key);
 1250         if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
 1251                 umtxq_signal(&key, 1);
 1252         umtxq_unbusy(&key);
 1253         umtxq_unlock(&key);
 1254         umtx_key_release(&key);
 1255         return (0);
 1256 }
 1257 
 1258 static inline struct umtx_pi *
 1259 umtx_pi_alloc(int flags)
 1260 {
 1261         struct umtx_pi *pi;
 1262 
 1263         pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
 1264         TAILQ_INIT(&pi->pi_blocked);
 1265         atomic_add_int(&umtx_pi_allocated, 1);
 1266         return (pi);
 1267 }
 1268 
 1269 static inline void
 1270 umtx_pi_free(struct umtx_pi *pi)
 1271 {
 1272         uma_zfree(umtx_pi_zone, pi);
 1273         atomic_add_int(&umtx_pi_allocated, -1);
 1274 }
 1275 
 1276 /*
 1277  * Adjust the thread's position on a pi_state after its priority has been
 1278  * changed.
 1279  */
 1280 static int
 1281 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
 1282 {
 1283         struct umtx_q *uq, *uq1, *uq2;
 1284         struct thread *td1;
 1285 
 1286         mtx_assert(&umtx_lock, MA_OWNED);
 1287         if (pi == NULL)
 1288                 return (0);
 1289 
 1290         uq = td->td_umtxq;
 1291 
 1292         /*
 1293          * Check if the thread needs to be moved on the blocked chain.
 1294          * It needs to be moved if either its priority is lower than
 1295          * the previous thread or higher than the next thread.
 1296          */
 1297         uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
 1298         uq2 = TAILQ_NEXT(uq, uq_lockq);
 1299         if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
 1300             (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
 1301                 /*
 1302                  * Remove thread from blocked chain and determine where
 1303                  * it should be moved to.
 1304                  */
 1305                 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1306                 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1307                         td1 = uq1->uq_thread;
 1308                         MPASS(td1->td_proc->p_magic == P_MAGIC);
 1309                         if (UPRI(td1) > UPRI(td))
 1310                                 break;
 1311                 }
 1312 
 1313                 if (uq1 == NULL)
 1314                         TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1315                 else
 1316                         TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1317         }
 1318         return (1);
 1319 }
 1320 
 1321 /*
 1322  * Propagate priority when a thread is blocked on POSIX
 1323  * PI mutex.
 1324  */ 
 1325 static void
 1326 umtx_propagate_priority(struct thread *td)
 1327 {
 1328         struct umtx_q *uq;
 1329         struct umtx_pi *pi;
 1330         int pri;
 1331 
 1332         mtx_assert(&umtx_lock, MA_OWNED);
 1333         pri = UPRI(td);
 1334         uq = td->td_umtxq;
 1335         pi = uq->uq_pi_blocked;
 1336         if (pi == NULL)
 1337                 return;
 1338 
 1339         for (;;) {
 1340                 td = pi->pi_owner;
 1341                 if (td == NULL)
 1342                         return;
 1343 
 1344                 MPASS(td->td_proc != NULL);
 1345                 MPASS(td->td_proc->p_magic == P_MAGIC);
 1346 
 1347                 if (UPRI(td) <= pri)
 1348                         return;
 1349 
 1350                 thread_lock(td);
 1351                 sched_lend_user_prio(td, pri);
 1352                 thread_unlock(td);
 1353 
 1354                 /*
 1355                  * Pick up the lock that td is blocked on.
 1356                  */
 1357                 uq = td->td_umtxq;
 1358                 pi = uq->uq_pi_blocked;
 1359                 /* Resort td on the list if needed. */
 1360                 if (!umtx_pi_adjust_thread(pi, td))
 1361                         break;
 1362         }
 1363 }
 1364 
 1365 /*
 1366  * Unpropagate priority for a PI mutex when a thread blocked on
 1367  * it is interrupted by signal or resumed by others.
 1368  */
 1369 static void
 1370 umtx_unpropagate_priority(struct umtx_pi *pi)
 1371 {
 1372         struct umtx_q *uq, *uq_owner;
 1373         struct umtx_pi *pi2;
 1374         int pri, oldpri;
 1375 
 1376         mtx_assert(&umtx_lock, MA_OWNED);
 1377 
 1378         while (pi != NULL && pi->pi_owner != NULL) {
 1379                 pri = PRI_MAX;
 1380                 uq_owner = pi->pi_owner->td_umtxq;
 1381 
 1382                 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
 1383                         uq = TAILQ_FIRST(&pi2->pi_blocked);
 1384                         if (uq != NULL) {
 1385                                 if (pri > UPRI(uq->uq_thread))
 1386                                         pri = UPRI(uq->uq_thread);
 1387                         }
 1388                 }
 1389 
 1390                 if (pri > uq_owner->uq_inherited_pri)
 1391                         pri = uq_owner->uq_inherited_pri;
 1392                 thread_lock(pi->pi_owner);
 1393                 oldpri = pi->pi_owner->td_user_pri;
 1394                 sched_unlend_user_prio(pi->pi_owner, pri);
 1395                 thread_unlock(pi->pi_owner);
 1396                 if (uq_owner->uq_pi_blocked != NULL)
 1397                         umtx_pi_adjust_locked(pi->pi_owner, oldpri);
 1398                 pi = uq_owner->uq_pi_blocked;
 1399         }
 1400 }
 1401 
 1402 /*
 1403  * Insert a PI mutex into owned list.
 1404  */
 1405 static void
 1406 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
 1407 {
 1408         struct umtx_q *uq_owner;
 1409 
 1410         uq_owner = owner->td_umtxq;
 1411         mtx_assert(&umtx_lock, MA_OWNED);
 1412         if (pi->pi_owner != NULL)
 1413                 panic("pi_ower != NULL");
 1414         pi->pi_owner = owner;
 1415         TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
 1416 }
 1417 
 1418 /*
 1419  * Claim ownership of a PI mutex.
 1420  */
 1421 static int
 1422 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
 1423 {
 1424         struct umtx_q *uq, *uq_owner;
 1425 
 1426         uq_owner = owner->td_umtxq;
 1427         mtx_lock_spin(&umtx_lock);
 1428         if (pi->pi_owner == owner) {
 1429                 mtx_unlock_spin(&umtx_lock);
 1430                 return (0);
 1431         }
 1432 
 1433         if (pi->pi_owner != NULL) {
 1434                 /*
 1435                  * userland may have already messed the mutex, sigh.
 1436                  */
 1437                 mtx_unlock_spin(&umtx_lock);
 1438                 return (EPERM);
 1439         }
 1440         umtx_pi_setowner(pi, owner);
 1441         uq = TAILQ_FIRST(&pi->pi_blocked);
 1442         if (uq != NULL) {
 1443                 int pri;
 1444 
 1445                 pri = UPRI(uq->uq_thread);
 1446                 thread_lock(owner);
 1447                 if (pri < UPRI(owner))
 1448                         sched_lend_user_prio(owner, pri);
 1449                 thread_unlock(owner);
 1450         }
 1451         mtx_unlock_spin(&umtx_lock);
 1452         return (0);
 1453 }
 1454 
 1455 static void
 1456 umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
 1457 {
 1458         struct umtx_q *uq;
 1459         struct umtx_pi *pi;
 1460 
 1461         uq = td->td_umtxq;
 1462         /*
 1463          * Pick up the lock that td is blocked on.
 1464          */
 1465         pi = uq->uq_pi_blocked;
 1466         MPASS(pi != NULL);
 1467 
 1468         /* Resort the turnstile on the list. */
 1469         if (!umtx_pi_adjust_thread(pi, td))
 1470                 return;
 1471 
 1472         /*
 1473          * If our priority was lowered and we are at the head of the
 1474          * turnstile, then propagate our new priority up the chain.
 1475          */
 1476         if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
 1477                 umtx_propagate_priority(td);
 1478 }
 1479 
 1480 /*
 1481  * Adjust a thread's order position in its blocked PI mutex,
 1482  * this may result new priority propagating process.
 1483  */
 1484 void
 1485 umtx_pi_adjust(struct thread *td, u_char oldpri)
 1486 {
 1487         struct umtx_q *uq;
 1488         struct umtx_pi *pi;
 1489 
 1490         uq = td->td_umtxq;
 1491         mtx_lock_spin(&umtx_lock);
 1492         /*
 1493          * Pick up the lock that td is blocked on.
 1494          */
 1495         pi = uq->uq_pi_blocked;
 1496         if (pi != NULL)
 1497                 umtx_pi_adjust_locked(td, oldpri);
 1498         mtx_unlock_spin(&umtx_lock);
 1499 }
 1500 
 1501 /*
 1502  * Sleep on a PI mutex.
 1503  */
 1504 static int
 1505 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
 1506         uint32_t owner, const char *wmesg, int timo)
 1507 {
 1508         struct umtxq_chain *uc;
 1509         struct thread *td, *td1;
 1510         struct umtx_q *uq1;
 1511         int pri;
 1512         int error = 0;
 1513 
 1514         td = uq->uq_thread;
 1515         KASSERT(td == curthread, ("inconsistent uq_thread"));
 1516         uc = umtxq_getchain(&uq->uq_key);
 1517         UMTXQ_LOCKED_ASSERT(uc);
 1518         UMTXQ_BUSY_ASSERT(uc);
 1519         umtxq_insert(uq);
 1520         mtx_lock_spin(&umtx_lock);
 1521         if (pi->pi_owner == NULL) {
 1522                 /* XXX
 1523                  * Current, We only support process private PI-mutex,
 1524                  * non-contended PI-mutexes are locked in userland.
 1525                  * Process shared PI-mutex should always be initialized
 1526                  * by kernel and be registered in kernel, locking should
 1527                  * always be done by kernel to avoid security problems.
 1528                  * For process private PI-mutex, we can find owner
 1529                  * thread and boost its priority safely.
 1530                  */
 1531                 mtx_unlock_spin(&umtx_lock);
 1532                 PROC_LOCK(curproc);
 1533                 td1 = thread_find(curproc, owner);
 1534                 mtx_lock_spin(&umtx_lock);
 1535                 if (td1 != NULL && pi->pi_owner == NULL) {
 1536                         uq1 = td1->td_umtxq;
 1537                         umtx_pi_setowner(pi, td1);
 1538                 }
 1539                 PROC_UNLOCK(curproc);
 1540         }
 1541 
 1542         TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1543                 pri = UPRI(uq1->uq_thread);
 1544                 if (pri > UPRI(td))
 1545                         break;
 1546         }
 1547 
 1548         if (uq1 != NULL)
 1549                 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1550         else
 1551                 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1552 
 1553         uq->uq_pi_blocked = pi;
 1554         thread_lock(td);
 1555         td->td_flags |= TDF_UPIBLOCKED;
 1556         thread_unlock(td);
 1557         umtx_propagate_priority(td);
 1558         mtx_unlock_spin(&umtx_lock);
 1559         umtxq_unbusy(&uq->uq_key);
 1560 
 1561         if (uq->uq_flags & UQF_UMTXQ) {
 1562                 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
 1563                 if (error == EWOULDBLOCK)
 1564                         error = ETIMEDOUT;
 1565                 if (uq->uq_flags & UQF_UMTXQ) {
 1566                         umtxq_remove(uq);
 1567                 }
 1568         }
 1569         mtx_lock_spin(&umtx_lock);
 1570         uq->uq_pi_blocked = NULL;
 1571         thread_lock(td);
 1572         td->td_flags &= ~TDF_UPIBLOCKED;
 1573         thread_unlock(td);
 1574         TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1575         umtx_unpropagate_priority(pi);
 1576         mtx_unlock_spin(&umtx_lock);
 1577         umtxq_unlock(&uq->uq_key);
 1578 
 1579         return (error);
 1580 }
 1581 
 1582 /*
 1583  * Add reference count for a PI mutex.
 1584  */
 1585 static void
 1586 umtx_pi_ref(struct umtx_pi *pi)
 1587 {
 1588         struct umtxq_chain *uc;
 1589 
 1590         uc = umtxq_getchain(&pi->pi_key);
 1591         UMTXQ_LOCKED_ASSERT(uc);
 1592         pi->pi_refcount++;
 1593 }
 1594 
 1595 /*
 1596  * Decrease reference count for a PI mutex, if the counter
 1597  * is decreased to zero, its memory space is freed.
 1598  */ 
 1599 static void
 1600 umtx_pi_unref(struct umtx_pi *pi)
 1601 {
 1602         struct umtxq_chain *uc;
 1603 
 1604         uc = umtxq_getchain(&pi->pi_key);
 1605         UMTXQ_LOCKED_ASSERT(uc);
 1606         KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
 1607         if (--pi->pi_refcount == 0) {
 1608                 mtx_lock_spin(&umtx_lock);
 1609                 if (pi->pi_owner != NULL) {
 1610                         TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
 1611                                 pi, pi_link);
 1612                         pi->pi_owner = NULL;
 1613                 }
 1614                 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
 1615                         ("blocked queue not empty"));
 1616                 mtx_unlock_spin(&umtx_lock);
 1617                 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
 1618                 umtx_pi_free(pi);
 1619         }
 1620 }
 1621 
 1622 /*
 1623  * Find a PI mutex in hash table.
 1624  */
 1625 static struct umtx_pi *
 1626 umtx_pi_lookup(struct umtx_key *key)
 1627 {
 1628         struct umtxq_chain *uc;
 1629         struct umtx_pi *pi;
 1630 
 1631         uc = umtxq_getchain(key);
 1632         UMTXQ_LOCKED_ASSERT(uc);
 1633 
 1634         TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
 1635                 if (umtx_key_match(&pi->pi_key, key)) {
 1636                         return (pi);
 1637                 }
 1638         }
 1639         return (NULL);
 1640 }
 1641 
 1642 /*
 1643  * Insert a PI mutex into hash table.
 1644  */
 1645 static inline void
 1646 umtx_pi_insert(struct umtx_pi *pi)
 1647 {
 1648         struct umtxq_chain *uc;
 1649 
 1650         uc = umtxq_getchain(&pi->pi_key);
 1651         UMTXQ_LOCKED_ASSERT(uc);
 1652         TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
 1653 }
 1654 
 1655 /*
 1656  * Lock a PI mutex.
 1657  */
 1658 static int
 1659 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 1660         int try)
 1661 {
 1662         struct umtx_q *uq;
 1663         struct umtx_pi *pi, *new_pi;
 1664         uint32_t id, owner, old;
 1665         int error;
 1666 
 1667         id = td->td_tid;
 1668         uq = td->td_umtxq;
 1669 
 1670         if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 1671             &uq->uq_key)) != 0)
 1672                 return (error);
 1673         umtxq_lock(&uq->uq_key);
 1674         pi = umtx_pi_lookup(&uq->uq_key);
 1675         if (pi == NULL) {
 1676                 new_pi = umtx_pi_alloc(M_NOWAIT);
 1677                 if (new_pi == NULL) {
 1678                         umtxq_unlock(&uq->uq_key);
 1679                         new_pi = umtx_pi_alloc(M_WAITOK);
 1680                         umtxq_lock(&uq->uq_key);
 1681                         pi = umtx_pi_lookup(&uq->uq_key);
 1682                         if (pi != NULL) {
 1683                                 umtx_pi_free(new_pi);
 1684                                 new_pi = NULL;
 1685                         }
 1686                 }
 1687                 if (new_pi != NULL) {
 1688                         new_pi->pi_key = uq->uq_key;
 1689                         umtx_pi_insert(new_pi);
 1690                         pi = new_pi;
 1691                 }
 1692         }
 1693         umtx_pi_ref(pi);
 1694         umtxq_unlock(&uq->uq_key);
 1695 
 1696         /*
 1697          * Care must be exercised when dealing with umtx structure.  It
 1698          * can fault on any access.
 1699          */
 1700         for (;;) {
 1701                 /*
 1702                  * Try the uncontested case.  This should be done in userland.
 1703                  */
 1704                 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 1705 
 1706                 /* The acquire succeeded. */
 1707                 if (owner == UMUTEX_UNOWNED) {
 1708                         error = 0;
 1709                         break;
 1710                 }
 1711 
 1712                 /* The address was invalid. */
 1713                 if (owner == -1) {
 1714                         error = EFAULT;
 1715                         break;
 1716                 }
 1717 
 1718                 /* If no one owns it but it is contested try to acquire it. */
 1719                 if (owner == UMUTEX_CONTESTED) {
 1720                         owner = casuword32(&m->m_owner,
 1721                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1722 
 1723                         if (owner == UMUTEX_CONTESTED) {
 1724                                 umtxq_lock(&uq->uq_key);
 1725                                 umtxq_busy(&uq->uq_key);
 1726                                 error = umtx_pi_claim(pi, td);
 1727                                 umtxq_unbusy(&uq->uq_key);
 1728                                 umtxq_unlock(&uq->uq_key);
 1729                                 break;
 1730                         }
 1731 
 1732                         /* The address was invalid. */
 1733                         if (owner == -1) {
 1734                                 error = EFAULT;
 1735                                 break;
 1736                         }
 1737 
 1738                         /* If this failed the lock has changed, restart. */
 1739                         continue;
 1740                 }
 1741 
 1742                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 1743                     (owner & ~UMUTEX_CONTESTED) == id) {
 1744                         error = EDEADLK;
 1745                         break;
 1746                 }
 1747 
 1748                 if (try != 0) {
 1749                         error = EBUSY;
 1750                         break;
 1751                 }
 1752 
 1753                 /*
 1754                  * If we caught a signal, we have retried and now
 1755                  * exit immediately.
 1756                  */
 1757                 if (error != 0)
 1758                         break;
 1759                         
 1760                 umtxq_lock(&uq->uq_key);
 1761                 umtxq_busy(&uq->uq_key);
 1762                 umtxq_unlock(&uq->uq_key);
 1763 
 1764                 /*
 1765                  * Set the contested bit so that a release in user space
 1766                  * knows to use the system call for unlock.  If this fails
 1767                  * either some one else has acquired the lock or it has been
 1768                  * released.
 1769                  */
 1770                 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 1771 
 1772                 /* The address was invalid. */
 1773                 if (old == -1) {
 1774                         umtxq_lock(&uq->uq_key);
 1775                         umtxq_unbusy(&uq->uq_key);
 1776                         umtxq_unlock(&uq->uq_key);
 1777                         error = EFAULT;
 1778                         break;
 1779                 }
 1780 
 1781                 umtxq_lock(&uq->uq_key);
 1782                 /*
 1783                  * We set the contested bit, sleep. Otherwise the lock changed
 1784                  * and we need to retry or we lost a race to the thread
 1785                  * unlocking the umtx.
 1786                  */
 1787                 if (old == owner)
 1788                         error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
 1789                                  "umtxpi", timo);
 1790                 else {
 1791                         umtxq_unbusy(&uq->uq_key);
 1792                         umtxq_unlock(&uq->uq_key);
 1793                 }
 1794         }
 1795 
 1796         umtxq_lock(&uq->uq_key);
 1797         umtx_pi_unref(pi);
 1798         umtxq_unlock(&uq->uq_key);
 1799 
 1800         umtx_key_release(&uq->uq_key);
 1801         return (error);
 1802 }
 1803 
 1804 /*
 1805  * Unlock a PI mutex.
 1806  */
 1807 static int
 1808 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
 1809 {
 1810         struct umtx_key key;
 1811         struct umtx_q *uq_first, *uq_first2, *uq_me;
 1812         struct umtx_pi *pi, *pi2;
 1813         uint32_t owner, old, id;
 1814         int error;
 1815         int count;
 1816         int pri;
 1817 
 1818         id = td->td_tid;
 1819         /*
 1820          * Make sure we own this mtx.
 1821          */
 1822         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1823         if (owner == -1)
 1824                 return (EFAULT);
 1825 
 1826         if ((owner & ~UMUTEX_CONTESTED) != id)
 1827                 return (EPERM);
 1828 
 1829         /* This should be done in userland */
 1830         if ((owner & UMUTEX_CONTESTED) == 0) {
 1831                 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 1832                 if (old == -1)
 1833                         return (EFAULT);
 1834                 if (old == owner)
 1835                         return (0);
 1836                 owner = old;
 1837         }
 1838 
 1839         /* We should only ever be in here for contested locks */
 1840         if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 1841             &key)) != 0)
 1842                 return (error);
 1843 
 1844         umtxq_lock(&key);
 1845         umtxq_busy(&key);
 1846         count = umtxq_count_pi(&key, &uq_first);
 1847         if (uq_first != NULL) {
 1848                 mtx_lock_spin(&umtx_lock);
 1849                 pi = uq_first->uq_pi_blocked;
 1850                 KASSERT(pi != NULL, ("pi == NULL?"));
 1851                 if (pi->pi_owner != curthread) {
 1852                         mtx_unlock_spin(&umtx_lock);
 1853                         umtxq_unbusy(&key);
 1854                         umtxq_unlock(&key);
 1855                         umtx_key_release(&key);
 1856                         /* userland messed the mutex */
 1857                         return (EPERM);
 1858                 }
 1859                 uq_me = curthread->td_umtxq;
 1860                 pi->pi_owner = NULL;
 1861                 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
 1862                 /* get highest priority thread which is still sleeping. */
 1863                 uq_first = TAILQ_FIRST(&pi->pi_blocked);
 1864                 while (uq_first != NULL && 
 1865                        (uq_first->uq_flags & UQF_UMTXQ) == 0) {
 1866                         uq_first = TAILQ_NEXT(uq_first, uq_lockq);
 1867                 }
 1868                 pri = PRI_MAX;
 1869                 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
 1870                         uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
 1871                         if (uq_first2 != NULL) {
 1872                                 if (pri > UPRI(uq_first2->uq_thread))
 1873                                         pri = UPRI(uq_first2->uq_thread);
 1874                         }
 1875                 }
 1876                 thread_lock(curthread);
 1877                 sched_unlend_user_prio(curthread, pri);
 1878                 thread_unlock(curthread);
 1879                 mtx_unlock_spin(&umtx_lock);
 1880                 if (uq_first)
 1881                         umtxq_signal_thread(uq_first);
 1882         }
 1883         umtxq_unlock(&key);
 1884 
 1885         /*
 1886          * When unlocking the umtx, it must be marked as unowned if
 1887          * there is zero or one thread only waiting for it.
 1888          * Otherwise, it must be marked as contested.
 1889          */
 1890         old = casuword32(&m->m_owner, owner,
 1891                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1892 
 1893         umtxq_lock(&key);
 1894         umtxq_unbusy(&key);
 1895         umtxq_unlock(&key);
 1896         umtx_key_release(&key);
 1897         if (old == -1)
 1898                 return (EFAULT);
 1899         if (old != owner)
 1900                 return (EINVAL);
 1901         return (0);
 1902 }
 1903 
 1904 /*
 1905  * Lock a PP mutex.
 1906  */
 1907 static int
 1908 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 1909         int try)
 1910 {
 1911         struct umtx_q *uq, *uq2;
 1912         struct umtx_pi *pi;
 1913         uint32_t ceiling;
 1914         uint32_t owner, id;
 1915         int error, pri, old_inherited_pri, su;
 1916 
 1917         id = td->td_tid;
 1918         uq = td->td_umtxq;
 1919         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 1920             &uq->uq_key)) != 0)
 1921                 return (error);
 1922         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 1923         for (;;) {
 1924                 old_inherited_pri = uq->uq_inherited_pri;
 1925                 umtxq_lock(&uq->uq_key);
 1926                 umtxq_busy(&uq->uq_key);
 1927                 umtxq_unlock(&uq->uq_key);
 1928 
 1929                 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
 1930                 if (ceiling > RTP_PRIO_MAX) {
 1931                         error = EINVAL;
 1932                         goto out;
 1933                 }
 1934 
 1935                 mtx_lock_spin(&umtx_lock);
 1936                 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
 1937                         mtx_unlock_spin(&umtx_lock);
 1938                         error = EINVAL;
 1939                         goto out;
 1940                 }
 1941                 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
 1942                         uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
 1943                         thread_lock(td);
 1944                         if (uq->uq_inherited_pri < UPRI(td))
 1945                                 sched_lend_user_prio(td, uq->uq_inherited_pri);
 1946                         thread_unlock(td);
 1947                 }
 1948                 mtx_unlock_spin(&umtx_lock);
 1949 
 1950                 owner = casuword32(&m->m_owner,
 1951                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1952 
 1953                 if (owner == UMUTEX_CONTESTED) {
 1954                         error = 0;
 1955                         break;
 1956                 }
 1957 
 1958                 /* The address was invalid. */
 1959                 if (owner == -1) {
 1960                         error = EFAULT;
 1961                         break;
 1962                 }
 1963 
 1964                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 1965                     (owner & ~UMUTEX_CONTESTED) == id) {
 1966                         error = EDEADLK;
 1967                         break;
 1968                 }
 1969 
 1970                 if (try != 0) {
 1971                         error = EBUSY;
 1972                         break;
 1973                 }
 1974 
 1975                 /*
 1976                  * If we caught a signal, we have retried and now
 1977                  * exit immediately.
 1978                  */
 1979                 if (error != 0)
 1980                         break;
 1981 
 1982                 umtxq_lock(&uq->uq_key);
 1983                 umtxq_insert(uq);
 1984                 umtxq_unbusy(&uq->uq_key);
 1985                 error = umtxq_sleep(uq, "umtxpp", timo);
 1986                 umtxq_remove(uq);
 1987                 umtxq_unlock(&uq->uq_key);
 1988 
 1989                 mtx_lock_spin(&umtx_lock);
 1990                 uq->uq_inherited_pri = old_inherited_pri;
 1991                 pri = PRI_MAX;
 1992                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 1993                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 1994                         if (uq2 != NULL) {
 1995                                 if (pri > UPRI(uq2->uq_thread))
 1996                                         pri = UPRI(uq2->uq_thread);
 1997                         }
 1998                 }
 1999                 if (pri > uq->uq_inherited_pri)
 2000                         pri = uq->uq_inherited_pri;
 2001                 thread_lock(td);
 2002                 sched_unlend_user_prio(td, pri);
 2003                 thread_unlock(td);
 2004                 mtx_unlock_spin(&umtx_lock);
 2005         }
 2006 
 2007         if (error != 0) {
 2008                 mtx_lock_spin(&umtx_lock);
 2009                 uq->uq_inherited_pri = old_inherited_pri;
 2010                 pri = PRI_MAX;
 2011                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2012                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2013                         if (uq2 != NULL) {
 2014                                 if (pri > UPRI(uq2->uq_thread))
 2015                                         pri = UPRI(uq2->uq_thread);
 2016                         }
 2017                 }
 2018                 if (pri > uq->uq_inherited_pri)
 2019                         pri = uq->uq_inherited_pri;
 2020                 thread_lock(td);
 2021                 sched_unlend_user_prio(td, pri);
 2022                 thread_unlock(td);
 2023                 mtx_unlock_spin(&umtx_lock);
 2024         }
 2025 
 2026 out:
 2027         umtxq_lock(&uq->uq_key);
 2028         umtxq_unbusy(&uq->uq_key);
 2029         umtxq_unlock(&uq->uq_key);
 2030         umtx_key_release(&uq->uq_key);
 2031         return (error);
 2032 }
 2033 
 2034 /*
 2035  * Unlock a PP mutex.
 2036  */
 2037 static int
 2038 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
 2039 {
 2040         struct umtx_key key;
 2041         struct umtx_q *uq, *uq2;
 2042         struct umtx_pi *pi;
 2043         uint32_t owner, id;
 2044         uint32_t rceiling;
 2045         int error, pri, new_inherited_pri, su;
 2046 
 2047         id = td->td_tid;
 2048         uq = td->td_umtxq;
 2049         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2050 
 2051         /*
 2052          * Make sure we own this mtx.
 2053          */
 2054         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 2055         if (owner == -1)
 2056                 return (EFAULT);
 2057 
 2058         if ((owner & ~UMUTEX_CONTESTED) != id)
 2059                 return (EPERM);
 2060 
 2061         error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
 2062         if (error != 0)
 2063                 return (error);
 2064 
 2065         if (rceiling == -1)
 2066                 new_inherited_pri = PRI_MAX;
 2067         else {
 2068                 rceiling = RTP_PRIO_MAX - rceiling;
 2069                 if (rceiling > RTP_PRIO_MAX)
 2070                         return (EINVAL);
 2071                 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
 2072         }
 2073 
 2074         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2075             &key)) != 0)
 2076                 return (error);
 2077         umtxq_lock(&key);
 2078         umtxq_busy(&key);
 2079         umtxq_unlock(&key);
 2080         /*
 2081          * For priority protected mutex, always set unlocked state
 2082          * to UMUTEX_CONTESTED, so that userland always enters kernel
 2083          * to lock the mutex, it is necessary because thread priority
 2084          * has to be adjusted for such mutex.
 2085          */
 2086         error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 2087                 UMUTEX_CONTESTED);
 2088 
 2089         umtxq_lock(&key);
 2090         if (error == 0)
 2091                 umtxq_signal(&key, 1);
 2092         umtxq_unbusy(&key);
 2093         umtxq_unlock(&key);
 2094 
 2095         if (error == -1)
 2096                 error = EFAULT;
 2097         else {
 2098                 mtx_lock_spin(&umtx_lock);
 2099                 if (su != 0)
 2100                         uq->uq_inherited_pri = new_inherited_pri;
 2101                 pri = PRI_MAX;
 2102                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2103                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2104                         if (uq2 != NULL) {
 2105                                 if (pri > UPRI(uq2->uq_thread))
 2106                                         pri = UPRI(uq2->uq_thread);
 2107                         }
 2108                 }
 2109                 if (pri > uq->uq_inherited_pri)
 2110                         pri = uq->uq_inherited_pri;
 2111                 thread_lock(td);
 2112                 sched_unlend_user_prio(td, pri);
 2113                 thread_unlock(td);
 2114                 mtx_unlock_spin(&umtx_lock);
 2115         }
 2116         umtx_key_release(&key);
 2117         return (error);
 2118 }
 2119 
 2120 static int
 2121 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
 2122         uint32_t *old_ceiling)
 2123 {
 2124         struct umtx_q *uq;
 2125         uint32_t save_ceiling;
 2126         uint32_t owner, id;
 2127         uint32_t flags;
 2128         int error;
 2129 
 2130         flags = fuword32(&m->m_flags);
 2131         if ((flags & UMUTEX_PRIO_PROTECT) == 0)
 2132                 return (EINVAL);
 2133         if (ceiling > RTP_PRIO_MAX)
 2134                 return (EINVAL);
 2135         id = td->td_tid;
 2136         uq = td->td_umtxq;
 2137         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2138            &uq->uq_key)) != 0)
 2139                 return (error);
 2140         for (;;) {
 2141                 umtxq_lock(&uq->uq_key);
 2142                 umtxq_busy(&uq->uq_key);
 2143                 umtxq_unlock(&uq->uq_key);
 2144 
 2145                 save_ceiling = fuword32(&m->m_ceilings[0]);
 2146 
 2147                 owner = casuword32(&m->m_owner,
 2148                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 2149 
 2150                 if (owner == UMUTEX_CONTESTED) {
 2151                         suword32(&m->m_ceilings[0], ceiling);
 2152                         suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 2153                                 UMUTEX_CONTESTED);
 2154                         error = 0;
 2155                         break;
 2156                 }
 2157 
 2158                 /* The address was invalid. */
 2159                 if (owner == -1) {
 2160                         error = EFAULT;
 2161                         break;
 2162                 }
 2163 
 2164                 if ((owner & ~UMUTEX_CONTESTED) == id) {
 2165                         suword32(&m->m_ceilings[0], ceiling);
 2166                         error = 0;
 2167                         break;
 2168                 }
 2169 
 2170                 /*
 2171                  * If we caught a signal, we have retried and now
 2172                  * exit immediately.
 2173                  */
 2174                 if (error != 0)
 2175                         break;
 2176 
 2177                 /*
 2178                  * We set the contested bit, sleep. Otherwise the lock changed
 2179                  * and we need to retry or we lost a race to the thread
 2180                  * unlocking the umtx.
 2181                  */
 2182                 umtxq_lock(&uq->uq_key);
 2183                 umtxq_insert(uq);
 2184                 umtxq_unbusy(&uq->uq_key);
 2185                 error = umtxq_sleep(uq, "umtxpp", 0);
 2186                 umtxq_remove(uq);
 2187                 umtxq_unlock(&uq->uq_key);
 2188         }
 2189         umtxq_lock(&uq->uq_key);
 2190         if (error == 0)
 2191                 umtxq_signal(&uq->uq_key, INT_MAX);
 2192         umtxq_unbusy(&uq->uq_key);
 2193         umtxq_unlock(&uq->uq_key);
 2194         umtx_key_release(&uq->uq_key);
 2195         if (error == 0 && old_ceiling != NULL)
 2196                 suword32(old_ceiling, save_ceiling);
 2197         return (error);
 2198 }
 2199 
 2200 static int
 2201 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
 2202         int mode)
 2203 {
 2204         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2205         case 0:
 2206                 return (_do_lock_normal(td, m, flags, timo, mode));
 2207         case UMUTEX_PRIO_INHERIT:
 2208                 return (_do_lock_pi(td, m, flags, timo, mode));
 2209         case UMUTEX_PRIO_PROTECT:
 2210                 return (_do_lock_pp(td, m, flags, timo, mode));
 2211         }
 2212         return (EINVAL);
 2213 }
 2214 
 2215 /*
 2216  * Lock a userland POSIX mutex.
 2217  */
 2218 static int
 2219 do_lock_umutex(struct thread *td, struct umutex *m,
 2220         struct timespec *timeout, int mode)
 2221 {
 2222         struct timespec ts, ts2, ts3;
 2223         struct timeval tv;
 2224         uint32_t flags;
 2225         int error;
 2226 
 2227         flags = fuword32(&m->m_flags);
 2228         if (flags == -1)
 2229                 return (EFAULT);
 2230 
 2231         if (timeout == NULL) {
 2232                 error = _do_lock_umutex(td, m, flags, 0, mode);
 2233                 /* Mutex locking is restarted if it is interrupted. */
 2234                 if (error == EINTR && mode != _UMUTEX_WAIT)
 2235                         error = ERESTART;
 2236         } else {
 2237                 getnanouptime(&ts);
 2238                 timespecadd(&ts, timeout);
 2239                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2240                 for (;;) {
 2241                         error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
 2242                         if (error != ETIMEDOUT)
 2243                                 break;
 2244                         getnanouptime(&ts2);
 2245                         if (timespeccmp(&ts2, &ts, >=)) {
 2246                                 error = ETIMEDOUT;
 2247                                 break;
 2248                         }
 2249                         ts3 = ts;
 2250                         timespecsub(&ts3, &ts2);
 2251                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 2252                 }
 2253                 /* Timed-locking is not restarted. */
 2254                 if (error == ERESTART)
 2255                         error = EINTR;
 2256         }
 2257         return (error);
 2258 }
 2259 
 2260 /*
 2261  * Unlock a userland POSIX mutex.
 2262  */
 2263 static int
 2264 do_unlock_umutex(struct thread *td, struct umutex *m)
 2265 {
 2266         uint32_t flags;
 2267 
 2268         flags = fuword32(&m->m_flags);
 2269         if (flags == -1)
 2270                 return (EFAULT);
 2271 
 2272         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2273         case 0:
 2274                 return (do_unlock_normal(td, m, flags));
 2275         case UMUTEX_PRIO_INHERIT:
 2276                 return (do_unlock_pi(td, m, flags));
 2277         case UMUTEX_PRIO_PROTECT:
 2278                 return (do_unlock_pp(td, m, flags));
 2279         }
 2280 
 2281         return (EINVAL);
 2282 }
 2283 
 2284 static int
 2285 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
 2286         struct timespec *timeout, u_long wflags)
 2287 {
 2288         struct umtx_q *uq;
 2289         struct timeval tv;
 2290         struct timespec cts, ets, tts;
 2291         uint32_t flags;
 2292         int error;
 2293 
 2294         uq = td->td_umtxq;
 2295         flags = fuword32(&cv->c_flags);
 2296         error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
 2297         if (error != 0)
 2298                 return (error);
 2299         umtxq_lock(&uq->uq_key);
 2300         umtxq_busy(&uq->uq_key);
 2301         umtxq_insert(uq);
 2302         umtxq_unlock(&uq->uq_key);
 2303 
 2304         /*
 2305          * The magic thing is we should set c_has_waiters to 1 before
 2306          * releasing user mutex.
 2307          */
 2308         suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
 2309 
 2310         umtxq_lock(&uq->uq_key);
 2311         umtxq_unbusy(&uq->uq_key);
 2312         umtxq_unlock(&uq->uq_key);
 2313 
 2314         error = do_unlock_umutex(td, m);
 2315         
 2316         umtxq_lock(&uq->uq_key);
 2317         if (error == 0) {
 2318                 if ((wflags & UMTX_CHECK_UNPARKING) &&
 2319                     (td->td_pflags & TDP_WAKEUP)) {
 2320                         td->td_pflags &= ~TDP_WAKEUP;
 2321                         error = EINTR;
 2322                 } else if (timeout == NULL) {
 2323                         error = umtxq_sleep(uq, "ucond", 0);
 2324                 } else {
 2325                         getnanouptime(&ets);
 2326                         timespecadd(&ets, timeout);
 2327                         TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2328                         for (;;) {
 2329                                 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
 2330                                 if (error != ETIMEDOUT)
 2331                                         break;
 2332                                 getnanouptime(&cts);
 2333                                 if (timespeccmp(&cts, &ets, >=)) {
 2334                                         error = ETIMEDOUT;
 2335                                         break;
 2336                                 }
 2337                                 tts = ets;
 2338                                 timespecsub(&tts, &cts);
 2339                                 TIMESPEC_TO_TIMEVAL(&tv, &tts);
 2340                         }
 2341                 }
 2342         }
 2343 
 2344         if (error != 0) {
 2345                 if ((uq->uq_flags & UQF_UMTXQ) == 0) {
 2346                         /*
 2347                          * If we concurrently got do_cv_signal()d
 2348                          * and we got an error or UNIX signals or a timeout,
 2349                          * then, perform another umtxq_signal to avoid
 2350                          * consuming the wakeup. This may cause supurious
 2351                          * wakeup for another thread which was just queued,
 2352                          * but SUSV3 explicitly allows supurious wakeup to
 2353                          * occur, and indeed a kernel based implementation
 2354                          * can not avoid it.
 2355                          */ 
 2356                         if (!umtxq_signal(&uq->uq_key, 1))
 2357                                 error = 0;
 2358                 }
 2359                 if (error == ERESTART)
 2360                         error = EINTR;
 2361         }
 2362         umtxq_remove(uq);
 2363         umtxq_unlock(&uq->uq_key);
 2364         umtx_key_release(&uq->uq_key);
 2365         return (error);
 2366 }
 2367 
 2368 /*
 2369  * Signal a userland condition variable.
 2370  */
 2371 static int
 2372 do_cv_signal(struct thread *td, struct ucond *cv)
 2373 {
 2374         struct umtx_key key;
 2375         int error, cnt, nwake;
 2376         uint32_t flags;
 2377 
 2378         flags = fuword32(&cv->c_flags);
 2379         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2380                 return (error); 
 2381         umtxq_lock(&key);
 2382         umtxq_busy(&key);
 2383         cnt = umtxq_count(&key);
 2384         nwake = umtxq_signal(&key, 1);
 2385         if (cnt <= nwake) {
 2386                 umtxq_unlock(&key);
 2387                 error = suword32(
 2388                     __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 2389                 umtxq_lock(&key);
 2390         }
 2391         umtxq_unbusy(&key);
 2392         umtxq_unlock(&key);
 2393         umtx_key_release(&key);
 2394         return (error);
 2395 }
 2396 
 2397 static int
 2398 do_cv_broadcast(struct thread *td, struct ucond *cv)
 2399 {
 2400         struct umtx_key key;
 2401         int error;
 2402         uint32_t flags;
 2403 
 2404         flags = fuword32(&cv->c_flags);
 2405         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2406                 return (error); 
 2407 
 2408         umtxq_lock(&key);
 2409         umtxq_busy(&key);
 2410         umtxq_signal(&key, INT_MAX);
 2411         umtxq_unlock(&key);
 2412 
 2413         error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 2414 
 2415         umtxq_lock(&key);
 2416         umtxq_unbusy(&key);
 2417         umtxq_unlock(&key);
 2418 
 2419         umtx_key_release(&key);
 2420         return (error);
 2421 }
 2422 
 2423 static int
 2424 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
 2425 {
 2426         struct umtx_q *uq;
 2427         uint32_t flags, wrflags;
 2428         int32_t state, oldstate;
 2429         int32_t blocked_readers;
 2430         int error;
 2431 
 2432         uq = td->td_umtxq;
 2433         flags = fuword32(&rwlock->rw_flags);
 2434         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2435         if (error != 0)
 2436                 return (error);
 2437 
 2438         wrflags = URWLOCK_WRITE_OWNER;
 2439         if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
 2440                 wrflags |= URWLOCK_WRITE_WAITERS;
 2441 
 2442         for (;;) {
 2443                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2444                 /* try to lock it */
 2445                 while (!(state & wrflags)) {
 2446                         if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
 2447                                 umtx_key_release(&uq->uq_key);
 2448                                 return (EAGAIN);
 2449                         }
 2450                         oldstate = casuword32(&rwlock->rw_state, state, state + 1);
 2451                         if (oldstate == state) {
 2452                                 umtx_key_release(&uq->uq_key);
 2453                                 return (0);
 2454                         }
 2455                         state = oldstate;
 2456                 }
 2457 
 2458                 if (error)
 2459                         break;
 2460 
 2461                 /* grab monitor lock */
 2462                 umtxq_lock(&uq->uq_key);
 2463                 umtxq_busy(&uq->uq_key);
 2464                 umtxq_unlock(&uq->uq_key);
 2465 
 2466                 /*
 2467                  * re-read the state, in case it changed between the try-lock above
 2468                  * and the check below
 2469                  */
 2470                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2471 
 2472                 /* set read contention bit */
 2473                 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
 2474                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
 2475                         if (oldstate == state)
 2476                                 goto sleep;
 2477                         state = oldstate;
 2478                 }
 2479 
 2480                 /* state is changed while setting flags, restart */
 2481                 if (!(state & wrflags)) {
 2482                         umtxq_lock(&uq->uq_key);
 2483                         umtxq_unbusy(&uq->uq_key);
 2484                         umtxq_unlock(&uq->uq_key);
 2485                         continue;
 2486                 }
 2487 
 2488 sleep:
 2489                 /* contention bit is set, before sleeping, increase read waiter count */
 2490                 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2491                 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
 2492 
 2493                 while (state & wrflags) {
 2494                         umtxq_lock(&uq->uq_key);
 2495                         umtxq_insert(uq);
 2496                         umtxq_unbusy(&uq->uq_key);
 2497 
 2498                         error = umtxq_sleep(uq, "urdlck", timo);
 2499 
 2500                         umtxq_busy(&uq->uq_key);
 2501                         umtxq_remove(uq);
 2502                         umtxq_unlock(&uq->uq_key);
 2503                         if (error)
 2504                                 break;
 2505                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2506                 }
 2507 
 2508                 /* decrease read waiter count, and may clear read contention bit */
 2509                 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2510                 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
 2511                 if (blocked_readers == 1) {
 2512                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2513                         for (;;) {
 2514                                 oldstate = casuword32(&rwlock->rw_state, state,
 2515                                          state & ~URWLOCK_READ_WAITERS);
 2516                                 if (oldstate == state)
 2517                                         break;
 2518                                 state = oldstate;
 2519                         }
 2520                 }
 2521 
 2522                 umtxq_lock(&uq->uq_key);
 2523                 umtxq_unbusy(&uq->uq_key);
 2524                 umtxq_unlock(&uq->uq_key);
 2525         }
 2526         umtx_key_release(&uq->uq_key);
 2527         return (error);
 2528 }
 2529 
 2530 static int
 2531 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
 2532 {
 2533         struct timespec ts, ts2, ts3;
 2534         struct timeval tv;
 2535         int error;
 2536 
 2537         getnanouptime(&ts);
 2538         timespecadd(&ts, timeout);
 2539         TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2540         for (;;) {
 2541                 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
 2542                 if (error != ETIMEDOUT)
 2543                         break;
 2544                 getnanouptime(&ts2);
 2545                 if (timespeccmp(&ts2, &ts, >=)) {
 2546                         error = ETIMEDOUT;
 2547                         break;
 2548                 }
 2549                 ts3 = ts;
 2550                 timespecsub(&ts3, &ts2);
 2551                 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 2552         }
 2553         if (error == ERESTART)
 2554                 error = EINTR;
 2555         return (error);
 2556 }
 2557 
 2558 static int
 2559 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
 2560 {
 2561         struct umtx_q *uq;
 2562         uint32_t flags;
 2563         int32_t state, oldstate;
 2564         int32_t blocked_writers;
 2565         int32_t blocked_readers;
 2566         int error;
 2567 
 2568         uq = td->td_umtxq;
 2569         flags = fuword32(&rwlock->rw_flags);
 2570         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2571         if (error != 0)
 2572                 return (error);
 2573 
 2574         blocked_readers = 0;
 2575         for (;;) {
 2576                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2577                 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 2578                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
 2579                         if (oldstate == state) {
 2580                                 umtx_key_release(&uq->uq_key);
 2581                                 return (0);
 2582                         }
 2583                         state = oldstate;
 2584                 }
 2585 
 2586                 if (error) {
 2587                         if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
 2588                             blocked_readers != 0) {
 2589                                 umtxq_lock(&uq->uq_key);
 2590                                 umtxq_busy(&uq->uq_key);
 2591                                 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
 2592                                 umtxq_unbusy(&uq->uq_key);
 2593                                 umtxq_unlock(&uq->uq_key);
 2594                         }
 2595 
 2596                         break;
 2597                 }
 2598 
 2599                 /* grab monitor lock */
 2600                 umtxq_lock(&uq->uq_key);
 2601                 umtxq_busy(&uq->uq_key);
 2602                 umtxq_unlock(&uq->uq_key);
 2603 
 2604                 /*
 2605                  * re-read the state, in case it changed between the try-lock above
 2606                  * and the check below
 2607                  */
 2608                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2609 
 2610                 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
 2611                        (state & URWLOCK_WRITE_WAITERS) == 0) {
 2612                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
 2613                         if (oldstate == state)
 2614                                 goto sleep;
 2615                         state = oldstate;
 2616                 }
 2617 
 2618                 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 2619                         umtxq_lock(&uq->uq_key);
 2620                         umtxq_unbusy(&uq->uq_key);
 2621                         umtxq_unlock(&uq->uq_key);
 2622                         continue;
 2623                 }
 2624 sleep:
 2625                 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 2626                 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
 2627 
 2628                 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
 2629                         umtxq_lock(&uq->uq_key);
 2630                         umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 2631                         umtxq_unbusy(&uq->uq_key);
 2632 
 2633                         error = umtxq_sleep(uq, "uwrlck", timo);
 2634 
 2635                         umtxq_busy(&uq->uq_key);
 2636                         umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 2637                         umtxq_unlock(&uq->uq_key);
 2638                         if (error)
 2639                                 break;
 2640                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2641                 }
 2642 
 2643                 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 2644                 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
 2645                 if (blocked_writers == 1) {
 2646                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2647                         for (;;) {
 2648                                 oldstate = casuword32(&rwlock->rw_state, state,
 2649                                          state & ~URWLOCK_WRITE_WAITERS);
 2650                                 if (oldstate == state)
 2651                                         break;
 2652                                 state = oldstate;
 2653                         }
 2654                         blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2655                 } else
 2656                         blocked_readers = 0;
 2657 
 2658                 umtxq_lock(&uq->uq_key);
 2659                 umtxq_unbusy(&uq->uq_key);
 2660                 umtxq_unlock(&uq->uq_key);
 2661         }
 2662 
 2663         umtx_key_release(&uq->uq_key);
 2664         return (error);
 2665 }
 2666 
 2667 static int
 2668 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
 2669 {
 2670         struct timespec ts, ts2, ts3;
 2671         struct timeval tv;
 2672         int error;
 2673 
 2674         getnanouptime(&ts);
 2675         timespecadd(&ts, timeout);
 2676         TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2677         for (;;) {
 2678                 error = do_rw_wrlock(td, obj, tvtohz(&tv));
 2679                 if (error != ETIMEDOUT)
 2680                         break;
 2681                 getnanouptime(&ts2);
 2682                 if (timespeccmp(&ts2, &ts, >=)) {
 2683                         error = ETIMEDOUT;
 2684                         break;
 2685                 }
 2686                 ts3 = ts;
 2687                 timespecsub(&ts3, &ts2);
 2688                 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 2689         }
 2690         if (error == ERESTART)
 2691                 error = EINTR;
 2692         return (error);
 2693 }
 2694 
 2695 static int
 2696 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
 2697 {
 2698         struct umtx_q *uq;
 2699         uint32_t flags;
 2700         int32_t state, oldstate;
 2701         int error, q, count;
 2702 
 2703         uq = td->td_umtxq;
 2704         flags = fuword32(&rwlock->rw_flags);
 2705         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2706         if (error != 0)
 2707                 return (error);
 2708 
 2709         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2710         if (state & URWLOCK_WRITE_OWNER) {
 2711                 for (;;) {
 2712                         oldstate = casuword32(&rwlock->rw_state, state, 
 2713                                 state & ~URWLOCK_WRITE_OWNER);
 2714                         if (oldstate != state) {
 2715                                 state = oldstate;
 2716                                 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
 2717                                         error = EPERM;
 2718                                         goto out;
 2719                                 }
 2720                         } else
 2721                                 break;
 2722                 }
 2723         } else if (URWLOCK_READER_COUNT(state) != 0) {
 2724                 for (;;) {
 2725                         oldstate = casuword32(&rwlock->rw_state, state,
 2726                                 state - 1);
 2727                         if (oldstate != state) {
 2728                                 state = oldstate;
 2729                                 if (URWLOCK_READER_COUNT(oldstate) == 0) {
 2730                                         error = EPERM;
 2731                                         goto out;
 2732                                 }
 2733                         }
 2734                         else
 2735                                 break;
 2736                 }
 2737         } else {
 2738                 error = EPERM;
 2739                 goto out;
 2740         }
 2741 
 2742         count = 0;
 2743 
 2744         if (!(flags & URWLOCK_PREFER_READER)) {
 2745                 if (state & URWLOCK_WRITE_WAITERS) {
 2746                         count = 1;
 2747                         q = UMTX_EXCLUSIVE_QUEUE;
 2748                 } else if (state & URWLOCK_READ_WAITERS) {
 2749                         count = INT_MAX;
 2750                         q = UMTX_SHARED_QUEUE;
 2751                 }
 2752         } else {
 2753                 if (state & URWLOCK_READ_WAITERS) {
 2754                         count = INT_MAX;
 2755                         q = UMTX_SHARED_QUEUE;
 2756                 } else if (state & URWLOCK_WRITE_WAITERS) {
 2757                         count = 1;
 2758                         q = UMTX_EXCLUSIVE_QUEUE;
 2759                 }
 2760         }
 2761 
 2762         if (count) {
 2763                 umtxq_lock(&uq->uq_key);
 2764                 umtxq_busy(&uq->uq_key);
 2765                 umtxq_signal_queue(&uq->uq_key, count, q);
 2766                 umtxq_unbusy(&uq->uq_key);
 2767                 umtxq_unlock(&uq->uq_key);
 2768         }
 2769 out:
 2770         umtx_key_release(&uq->uq_key);
 2771         return (error);
 2772 }
 2773 
 2774 int
 2775 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
 2776     /* struct umtx *umtx */
 2777 {
 2778         return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
 2779 }
 2780 
 2781 int
 2782 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
 2783     /* struct umtx *umtx */
 2784 {
 2785         return do_unlock_umtx(td, uap->umtx, td->td_tid);
 2786 }
 2787 
 2788 static int
 2789 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
 2790 {
 2791         struct timespec *ts, timeout;
 2792         int error;
 2793 
 2794         /* Allow a null timespec (wait forever). */
 2795         if (uap->uaddr2 == NULL)
 2796                 ts = NULL;
 2797         else {
 2798                 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
 2799                 if (error != 0)
 2800                         return (error);
 2801                 if (timeout.tv_nsec >= 1000000000 ||
 2802                     timeout.tv_nsec < 0) {
 2803                         return (EINVAL);
 2804                 }
 2805                 ts = &timeout;
 2806         }
 2807         return (do_lock_umtx(td, uap->obj, uap->val, ts));
 2808 }
 2809 
 2810 static int
 2811 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
 2812 {
 2813         return (do_unlock_umtx(td, uap->obj, uap->val));
 2814 }
 2815 
 2816 static int
 2817 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
 2818 {
 2819         struct timespec *ts, timeout;
 2820         int error;
 2821 
 2822         if (uap->uaddr2 == NULL)
 2823                 ts = NULL;
 2824         else {
 2825                 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
 2826                 if (error != 0)
 2827                         return (error);
 2828                 if (timeout.tv_nsec >= 1000000000 ||
 2829                     timeout.tv_nsec < 0)
 2830                         return (EINVAL);
 2831                 ts = &timeout;
 2832         }
 2833         return do_wait(td, uap->obj, uap->val, ts, 0, 0);
 2834 }
 2835 
 2836 static int
 2837 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
 2838 {
 2839         struct timespec *ts, timeout;
 2840         int error;
 2841 
 2842         if (uap->uaddr2 == NULL)
 2843                 ts = NULL;
 2844         else {
 2845                 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
 2846                 if (error != 0)
 2847                         return (error);
 2848                 if (timeout.tv_nsec >= 1000000000 ||
 2849                     timeout.tv_nsec < 0)
 2850                         return (EINVAL);
 2851                 ts = &timeout;
 2852         }
 2853         return do_wait(td, uap->obj, uap->val, ts, 1, 0);
 2854 }
 2855 
 2856 static int
 2857 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
 2858 {
 2859         struct timespec *ts, timeout;
 2860         int error;
 2861 
 2862         if (uap->uaddr2 == NULL)
 2863                 ts = NULL;
 2864         else {
 2865                 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
 2866                 if (error != 0)
 2867                         return (error);
 2868                 if (timeout.tv_nsec >= 1000000000 ||
 2869                     timeout.tv_nsec < 0)
 2870                         return (EINVAL);
 2871                 ts = &timeout;
 2872         }
 2873         return do_wait(td, uap->obj, uap->val, ts, 1, 1);
 2874 }
 2875 
 2876 static int
 2877 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
 2878 {
 2879         return (kern_umtx_wake(td, uap->obj, uap->val, 0));
 2880 }
 2881 
 2882 static int
 2883 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
 2884 {
 2885         return (kern_umtx_wake(td, uap->obj, uap->val, 1));
 2886 }
 2887 
 2888 static int
 2889 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
 2890 {
 2891         struct timespec *ts, timeout;
 2892         int error;
 2893 
 2894         /* Allow a null timespec (wait forever). */
 2895         if (uap->uaddr2 == NULL)
 2896                 ts = NULL;
 2897         else {
 2898                 error = copyin(uap->uaddr2, &timeout,
 2899                     sizeof(timeout));
 2900                 if (error != 0)
 2901                         return (error);
 2902                 if (timeout.tv_nsec >= 1000000000 ||
 2903                     timeout.tv_nsec < 0) {
 2904                         return (EINVAL);
 2905                 }
 2906                 ts = &timeout;
 2907         }
 2908         return do_lock_umutex(td, uap->obj, ts, 0);
 2909 }
 2910 
 2911 static int
 2912 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
 2913 {
 2914         return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
 2915 }
 2916 
 2917 static int
 2918 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
 2919 {
 2920         struct timespec *ts, timeout;
 2921         int error;
 2922 
 2923         /* Allow a null timespec (wait forever). */
 2924         if (uap->uaddr2 == NULL)
 2925                 ts = NULL;
 2926         else {
 2927                 error = copyin(uap->uaddr2, &timeout,
 2928                     sizeof(timeout));
 2929                 if (error != 0)
 2930                         return (error);
 2931                 if (timeout.tv_nsec >= 1000000000 ||
 2932                     timeout.tv_nsec < 0) {
 2933                         return (EINVAL);
 2934                 }
 2935                 ts = &timeout;
 2936         }
 2937         return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
 2938 }
 2939 
 2940 static int
 2941 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
 2942 {
 2943         return do_wake_umutex(td, uap->obj);
 2944 }
 2945 
 2946 static int
 2947 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
 2948 {
 2949         return do_unlock_umutex(td, uap->obj);
 2950 }
 2951 
 2952 static int
 2953 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
 2954 {
 2955         return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
 2956 }
 2957 
 2958 static int
 2959 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
 2960 {
 2961         struct timespec *ts, timeout;
 2962         int error;
 2963 
 2964         /* Allow a null timespec (wait forever). */
 2965         if (uap->uaddr2 == NULL)
 2966                 ts = NULL;
 2967         else {
 2968                 error = copyin(uap->uaddr2, &timeout,
 2969                     sizeof(timeout));
 2970                 if (error != 0)
 2971                         return (error);
 2972                 if (timeout.tv_nsec >= 1000000000 ||
 2973                     timeout.tv_nsec < 0) {
 2974                         return (EINVAL);
 2975                 }
 2976                 ts = &timeout;
 2977         }
 2978         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 2979 }
 2980 
 2981 static int
 2982 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
 2983 {
 2984         return do_cv_signal(td, uap->obj);
 2985 }
 2986 
 2987 static int
 2988 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
 2989 {
 2990         return do_cv_broadcast(td, uap->obj);
 2991 }
 2992 
 2993 static int
 2994 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
 2995 {
 2996         struct timespec timeout;
 2997         int error;
 2998 
 2999         /* Allow a null timespec (wait forever). */
 3000         if (uap->uaddr2 == NULL) {
 3001                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 3002         } else {
 3003                 error = copyin(uap->uaddr2, &timeout,
 3004                     sizeof(timeout));
 3005                 if (error != 0)
 3006                         return (error);
 3007                 if (timeout.tv_nsec >= 1000000000 ||
 3008                     timeout.tv_nsec < 0) {
 3009                         return (EINVAL);
 3010                 }
 3011                 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
 3012         }
 3013         return (error);
 3014 }
 3015 
 3016 static int
 3017 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
 3018 {
 3019         struct timespec timeout;
 3020         int error;
 3021 
 3022         /* Allow a null timespec (wait forever). */
 3023         if (uap->uaddr2 == NULL) {
 3024                 error = do_rw_wrlock(td, uap->obj, 0);
 3025         } else {
 3026                 error = copyin(uap->uaddr2, &timeout,
 3027                     sizeof(timeout));
 3028                 if (error != 0)
 3029                         return (error);
 3030                 if (timeout.tv_nsec >= 1000000000 ||
 3031                     timeout.tv_nsec < 0) {
 3032                         return (EINVAL);
 3033                 }
 3034 
 3035                 error = do_rw_wrlock2(td, uap->obj, &timeout);
 3036         }
 3037         return (error);
 3038 }
 3039 
 3040 static int
 3041 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
 3042 {
 3043         return do_rw_unlock(td, uap->obj);
 3044 }
 3045 
 3046 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
 3047 
 3048 static _umtx_op_func op_table[] = {
 3049         __umtx_op_lock_umtx,            /* UMTX_OP_LOCK */
 3050         __umtx_op_unlock_umtx,          /* UMTX_OP_UNLOCK */
 3051         __umtx_op_wait,                 /* UMTX_OP_WAIT */
 3052         __umtx_op_wake,                 /* UMTX_OP_WAKE */
 3053         __umtx_op_trylock_umutex,       /* UMTX_OP_MUTEX_TRYLOCK */
 3054         __umtx_op_lock_umutex,          /* UMTX_OP_MUTEX_LOCK */
 3055         __umtx_op_unlock_umutex,        /* UMTX_OP_MUTEX_UNLOCK */
 3056         __umtx_op_set_ceiling,          /* UMTX_OP_SET_CEILING */
 3057         __umtx_op_cv_wait,              /* UMTX_OP_CV_WAIT*/
 3058         __umtx_op_cv_signal,            /* UMTX_OP_CV_SIGNAL */
 3059         __umtx_op_cv_broadcast,         /* UMTX_OP_CV_BROADCAST */
 3060         __umtx_op_wait_uint,            /* UMTX_OP_WAIT_UINT */
 3061         __umtx_op_rw_rdlock,            /* UMTX_OP_RW_RDLOCK */
 3062         __umtx_op_rw_wrlock,            /* UMTX_OP_RW_WRLOCK */
 3063         __umtx_op_rw_unlock,            /* UMTX_OP_RW_UNLOCK */
 3064         __umtx_op_wait_uint_private,    /* UMTX_OP_WAIT_UINT_PRIVATE */
 3065         __umtx_op_wake_private,         /* UMTX_OP_WAKE_PRIVATE */
 3066         __umtx_op_wait_umutex,          /* UMTX_OP_UMUTEX_WAIT */
 3067         __umtx_op_wake_umutex           /* UMTX_OP_UMUTEX_WAKE */
 3068 };
 3069 
 3070 int
 3071 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
 3072 {
 3073         if ((unsigned)uap->op < UMTX_OP_MAX)
 3074                 return (*op_table[uap->op])(td, uap);
 3075         return (EINVAL);
 3076 }
 3077 
 3078 #ifdef COMPAT_IA32
 3079 int
 3080 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
 3081     /* struct umtx *umtx */
 3082 {
 3083         return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
 3084 }
 3085 
 3086 int
 3087 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
 3088     /* struct umtx *umtx */
 3089 {
 3090         return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
 3091 }
 3092 
 3093 struct timespec32 {
 3094         u_int32_t tv_sec;
 3095         u_int32_t tv_nsec;
 3096 };
 3097 
 3098 static inline int
 3099 copyin_timeout32(void *addr, struct timespec *tsp)
 3100 {
 3101         struct timespec32 ts32;
 3102         int error;
 3103 
 3104         error = copyin(addr, &ts32, sizeof(struct timespec32));
 3105         if (error == 0) {
 3106                 tsp->tv_sec = ts32.tv_sec;
 3107                 tsp->tv_nsec = ts32.tv_nsec;
 3108         }
 3109         return (error);
 3110 }
 3111 
 3112 static int
 3113 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 3114 {
 3115         struct timespec *ts, timeout;
 3116         int error;
 3117 
 3118         /* Allow a null timespec (wait forever). */
 3119         if (uap->uaddr2 == NULL)
 3120                 ts = NULL;
 3121         else {
 3122                 error = copyin_timeout32(uap->uaddr2, &timeout);
 3123                 if (error != 0)
 3124                         return (error);
 3125                 if (timeout.tv_nsec >= 1000000000 ||
 3126                     timeout.tv_nsec < 0) {
 3127                         return (EINVAL);
 3128                 }
 3129                 ts = &timeout;
 3130         }
 3131         return (do_lock_umtx32(td, uap->obj, uap->val, ts));
 3132 }
 3133 
 3134 static int
 3135 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 3136 {
 3137         return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
 3138 }
 3139 
 3140 static int
 3141 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3142 {
 3143         struct timespec *ts, timeout;
 3144         int error;
 3145 
 3146         if (uap->uaddr2 == NULL)
 3147                 ts = NULL;
 3148         else {
 3149                 error = copyin_timeout32(uap->uaddr2, &timeout);
 3150                 if (error != 0)
 3151                         return (error);
 3152                 if (timeout.tv_nsec >= 1000000000 ||
 3153                     timeout.tv_nsec < 0)
 3154                         return (EINVAL);
 3155                 ts = &timeout;
 3156         }
 3157         return do_wait(td, uap->obj, uap->val, ts, 1, 0);
 3158 }
 3159 
 3160 static int
 3161 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 3162 {
 3163         struct timespec *ts, timeout;
 3164         int error;
 3165 
 3166         /* Allow a null timespec (wait forever). */
 3167         if (uap->uaddr2 == NULL)
 3168                 ts = NULL;
 3169         else {
 3170                 error = copyin_timeout32(uap->uaddr2, &timeout);
 3171                 if (error != 0)
 3172                         return (error);
 3173                 if (timeout.tv_nsec >= 1000000000 ||
 3174                     timeout.tv_nsec < 0)
 3175                         return (EINVAL);
 3176                 ts = &timeout;
 3177         }
 3178         return do_lock_umutex(td, uap->obj, ts, 0);
 3179 }
 3180 
 3181 static int
 3182 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 3183 {
 3184         struct timespec *ts, timeout;
 3185         int error;
 3186 
 3187         /* Allow a null timespec (wait forever). */
 3188         if (uap->uaddr2 == NULL)
 3189                 ts = NULL;
 3190         else {
 3191                 error = copyin_timeout32(uap->uaddr2, &timeout);
 3192                 if (error != 0)
 3193                         return (error);
 3194                 if (timeout.tv_nsec >= 1000000000 ||
 3195                     timeout.tv_nsec < 0)
 3196                         return (EINVAL);
 3197                 ts = &timeout;
 3198         }
 3199         return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
 3200 }
 3201 
 3202 static int
 3203 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3204 {
 3205         struct timespec *ts, timeout;
 3206         int error;
 3207 
 3208         /* Allow a null timespec (wait forever). */
 3209         if (uap->uaddr2 == NULL)
 3210                 ts = NULL;
 3211         else {
 3212                 error = copyin_timeout32(uap->uaddr2, &timeout);
 3213                 if (error != 0)
 3214                         return (error);
 3215                 if (timeout.tv_nsec >= 1000000000 ||
 3216                     timeout.tv_nsec < 0)
 3217                         return (EINVAL);
 3218                 ts = &timeout;
 3219         }
 3220         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 3221 }
 3222 
 3223 static int
 3224 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 3225 {
 3226         struct timespec timeout;
 3227         int error;
 3228 
 3229         /* Allow a null timespec (wait forever). */
 3230         if (uap->uaddr2 == NULL) {
 3231                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 3232         } else {
 3233                 error = copyin(uap->uaddr2, &timeout,
 3234                     sizeof(timeout));
 3235                 if (error != 0)
 3236                         return (error);
 3237                 if (timeout.tv_nsec >= 1000000000 ||
 3238                     timeout.tv_nsec < 0) {
 3239                         return (EINVAL);
 3240                 }
 3241                 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
 3242         }
 3243         return (error);
 3244 }
 3245 
 3246 static int
 3247 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 3248 {
 3249         struct timespec timeout;
 3250         int error;
 3251 
 3252         /* Allow a null timespec (wait forever). */
 3253         if (uap->uaddr2 == NULL) {
 3254                 error = do_rw_wrlock(td, uap->obj, 0);
 3255         } else {
 3256                 error = copyin_timeout32(uap->uaddr2, &timeout);
 3257                 if (error != 0)
 3258                         return (error);
 3259                 if (timeout.tv_nsec >= 1000000000 ||
 3260                     timeout.tv_nsec < 0) {
 3261                         return (EINVAL);
 3262                 }
 3263 
 3264                 error = do_rw_wrlock2(td, uap->obj, &timeout);
 3265         }
 3266         return (error);
 3267 }
 3268 
 3269 static int
 3270 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
 3271 {
 3272         struct timespec *ts, timeout;
 3273         int error;
 3274 
 3275         if (uap->uaddr2 == NULL)
 3276                 ts = NULL;
 3277         else {
 3278                 error = copyin_timeout32(uap->uaddr2, &timeout);
 3279                 if (error != 0)
 3280                         return (error);
 3281                 if (timeout.tv_nsec >= 1000000000 ||
 3282                     timeout.tv_nsec < 0)
 3283                         return (EINVAL);
 3284                 ts = &timeout;
 3285         }
 3286         return do_wait(td, uap->obj, uap->val, ts, 1, 1);
 3287 }
 3288 
 3289 static _umtx_op_func op_table_compat32[] = {
 3290         __umtx_op_lock_umtx_compat32,   /* UMTX_OP_LOCK */
 3291         __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
 3292         __umtx_op_wait_compat32,        /* UMTX_OP_WAIT */
 3293         __umtx_op_wake,                 /* UMTX_OP_WAKE */
 3294         __umtx_op_trylock_umutex,       /* UMTX_OP_MUTEX_LOCK */
 3295         __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
 3296         __umtx_op_unlock_umutex,        /* UMTX_OP_MUTEX_UNLOCK */
 3297         __umtx_op_set_ceiling,          /* UMTX_OP_SET_CEILING */
 3298         __umtx_op_cv_wait_compat32,     /* UMTX_OP_CV_WAIT*/
 3299         __umtx_op_cv_signal,            /* UMTX_OP_CV_SIGNAL */
 3300         __umtx_op_cv_broadcast,         /* UMTX_OP_CV_BROADCAST */
 3301         __umtx_op_wait_compat32,        /* UMTX_OP_WAIT_UINT */
 3302         __umtx_op_rw_rdlock_compat32,   /* UMTX_OP_RW_RDLOCK */
 3303         __umtx_op_rw_wrlock_compat32,   /* UMTX_OP_RW_WRLOCK */
 3304         __umtx_op_rw_unlock,            /* UMTX_OP_RW_UNLOCK */
 3305         __umtx_op_wait_uint_private_compat32,   /* UMTX_OP_WAIT_UINT_PRIVATE */
 3306         __umtx_op_wake_private,         /* UMTX_OP_WAKE_PRIVATE */
 3307         __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
 3308         __umtx_op_wake_umutex           /* UMTX_OP_UMUTEX_WAKE */
 3309 };
 3310 
 3311 int
 3312 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
 3313 {
 3314         if ((unsigned)uap->op < UMTX_OP_MAX)
 3315                 return (*op_table_compat32[uap->op])(td,
 3316                         (struct _umtx_op_args *)uap);
 3317         return (EINVAL);
 3318 }
 3319 #endif
 3320 
 3321 void
 3322 umtx_thread_init(struct thread *td)
 3323 {
 3324         td->td_umtxq = umtxq_alloc();
 3325         td->td_umtxq->uq_thread = td;
 3326 }
 3327 
 3328 void
 3329 umtx_thread_fini(struct thread *td)
 3330 {
 3331         umtxq_free(td->td_umtxq);
 3332 }
 3333 
 3334 /*
 3335  * It will be called when new thread is created, e.g fork().
 3336  */
 3337 void
 3338 umtx_thread_alloc(struct thread *td)
 3339 {
 3340         struct umtx_q *uq;
 3341 
 3342         uq = td->td_umtxq;
 3343         uq->uq_inherited_pri = PRI_MAX;
 3344 
 3345         KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
 3346         KASSERT(uq->uq_thread == td, ("uq_thread != td"));
 3347         KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
 3348         KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
 3349 }
 3350 
 3351 /*
 3352  * exec() hook.
 3353  */
 3354 static void
 3355 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
 3356         struct image_params *imgp __unused)
 3357 {
 3358         umtx_thread_cleanup(curthread);
 3359 }
 3360 
 3361 /*
 3362  * thread_exit() hook.
 3363  */
 3364 void
 3365 umtx_thread_exit(struct thread *td)
 3366 {
 3367         umtx_thread_cleanup(td);
 3368 }
 3369 
 3370 /*
 3371  * clean up umtx data.
 3372  */
 3373 static void
 3374 umtx_thread_cleanup(struct thread *td)
 3375 {
 3376         struct umtx_q *uq;
 3377         struct umtx_pi *pi;
 3378 
 3379         if ((uq = td->td_umtxq) == NULL)
 3380                 return;
 3381 
 3382         mtx_lock_spin(&umtx_lock);
 3383         uq->uq_inherited_pri = PRI_MAX;
 3384         while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
 3385                 pi->pi_owner = NULL;
 3386                 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
 3387         }
 3388         thread_lock(td);
 3389         td->td_flags &= ~TDF_UBORROWING;
 3390         thread_unlock(td);
 3391         mtx_unlock_spin(&umtx_lock);
 3392 }

Cache object: dab09b1b7ec6d9baec0f1598473a97ea


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.