The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
    3  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice unmodified, this list of conditions, and the following
   11  *    disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 
   31 #include "opt_compat.h"
   32 #include "opt_umtx_profiling.h"
   33 
   34 #include <sys/param.h>
   35 #include <sys/kernel.h>
   36 #include <sys/limits.h>
   37 #include <sys/lock.h>
   38 #include <sys/malloc.h>
   39 #include <sys/mutex.h>
   40 #include <sys/priv.h>
   41 #include <sys/proc.h>
   42 #include <sys/sched.h>
   43 #include <sys/smp.h>
   44 #include <sys/sysctl.h>
   45 #include <sys/sysent.h>
   46 #include <sys/systm.h>
   47 #include <sys/sysproto.h>
   48 #include <sys/eventhandler.h>
   49 #include <sys/umtx.h>
   50 
   51 #include <vm/vm.h>
   52 #include <vm/vm_param.h>
   53 #include <vm/pmap.h>
   54 #include <vm/vm_map.h>
   55 #include <vm/vm_object.h>
   56 
   57 #include <machine/cpu.h>
   58 
   59 #ifdef COMPAT_FREEBSD32
   60 #include <compat/freebsd32/freebsd32_proto.h>
   61 #endif
   62 
   63 #define _UMUTEX_TRY             1
   64 #define _UMUTEX_WAIT            2
   65 
   66 /* Priority inheritance mutex info. */
   67 struct umtx_pi {
   68         /* Owner thread */
   69         struct thread           *pi_owner;
   70 
   71         /* Reference count */
   72         int                     pi_refcount;
   73 
   74         /* List entry to link umtx holding by thread */
   75         TAILQ_ENTRY(umtx_pi)    pi_link;
   76 
   77         /* List entry in hash */
   78         TAILQ_ENTRY(umtx_pi)    pi_hashlink;
   79 
   80         /* List for waiters */
   81         TAILQ_HEAD(,umtx_q)     pi_blocked;
   82 
   83         /* Identify a userland lock object */
   84         struct umtx_key         pi_key;
   85 };
   86 
   87 /* A userland synchronous object user. */
   88 struct umtx_q {
   89         /* Linked list for the hash. */
   90         TAILQ_ENTRY(umtx_q)     uq_link;
   91 
   92         /* Umtx key. */
   93         struct umtx_key         uq_key;
   94 
   95         /* Umtx flags. */
   96         int                     uq_flags;
   97 #define UQF_UMTXQ       0x0001
   98 
   99         /* The thread waits on. */
  100         struct thread           *uq_thread;
  101 
  102         /*
  103          * Blocked on PI mutex. read can use chain lock
  104          * or umtx_lock, write must have both chain lock and
  105          * umtx_lock being hold.
  106          */
  107         struct umtx_pi          *uq_pi_blocked;
  108 
  109         /* On blocked list */
  110         TAILQ_ENTRY(umtx_q)     uq_lockq;
  111 
  112         /* Thread contending with us */
  113         TAILQ_HEAD(,umtx_pi)    uq_pi_contested;
  114 
  115         /* Inherited priority from PP mutex */
  116         u_char                  uq_inherited_pri;
  117 };
  118 
  119 TAILQ_HEAD(umtxq_head, umtx_q);
  120 
  121 /* Userland lock object's wait-queue chain */
  122 struct umtxq_chain {
  123         /* Lock for this chain. */
  124         struct mtx              uc_lock;
  125 
  126         /* List of sleep queues. */
  127         struct umtxq_head       uc_queue[2];
  128 #define UMTX_SHARED_QUEUE       0
  129 #define UMTX_EXCLUSIVE_QUEUE    1
  130 
  131         /* Busy flag */
  132         char                    uc_busy;
  133 
  134         /* Chain lock waiters */
  135         int                     uc_waiters;
  136 
  137         /* All PI in the list */
  138         TAILQ_HEAD(,umtx_pi)    uc_pi_list;
  139 #ifdef UMTX_PROFILING
  140         int                     length;
  141         int                     max_length;
  142 #endif
  143 };
  144 
  145 #define UMTXQ_LOCKED_ASSERT(uc)         mtx_assert(&(uc)->uc_lock, MA_OWNED)
  146 #define UMTXQ_BUSY_ASSERT(uc)   KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
  147 
  148 /*
  149  * Don't propagate time-sharing priority, there is a security reason,
  150  * a user can simply introduce PI-mutex, let thread A lock the mutex,
  151  * and let another thread B block on the mutex, because B is
  152  * sleeping, its priority will be boosted, this causes A's priority to
  153  * be boosted via priority propagating too and will never be lowered even
  154  * if it is using 100%CPU, this is unfair to other processes.
  155  */
  156 
  157 #define UPRI(td)        (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
  158                           (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
  159                          PRI_MAX_TIMESHARE : (td)->td_user_pri)
  160 
  161 #define GOLDEN_RATIO_PRIME      2654404609U
  162 #define UMTX_CHAINS             128
  163 #define UMTX_SHIFTS             (__WORD_BIT - 7)
  164 
  165 #define GET_SHARE(flags)        \
  166     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
  167 
  168 #define BUSY_SPINS              200
  169 
  170 static uma_zone_t               umtx_pi_zone;
  171 static struct umtxq_chain       umtxq_chains[2][UMTX_CHAINS];
  172 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
  173 static int                      umtx_pi_allocated;
  174 
  175 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
  176 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
  177     &umtx_pi_allocated, 0, "Allocated umtx_pi");
  178 
  179 #ifdef UMTX_PROFILING
  180 static long max_length;
  181 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
  182 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
  183 #endif
  184 
  185 static void umtxq_sysinit(void *);
  186 static void umtxq_hash(struct umtx_key *key);
  187 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
  188 static void umtxq_lock(struct umtx_key *key);
  189 static void umtxq_unlock(struct umtx_key *key);
  190 static void umtxq_busy(struct umtx_key *key);
  191 static void umtxq_unbusy(struct umtx_key *key);
  192 static void umtxq_insert_queue(struct umtx_q *uq, int q);
  193 static void umtxq_remove_queue(struct umtx_q *uq, int q);
  194 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
  195 static int umtxq_count(struct umtx_key *key);
  196 static struct umtx_pi *umtx_pi_alloc(int);
  197 static void umtx_pi_free(struct umtx_pi *pi);
  198 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
  199 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
  200 static void umtx_thread_cleanup(struct thread *td);
  201 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
  202         struct image_params *imgp __unused);
  203 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
  204 
  205 #define umtxq_signal(key, nwake)        umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
  206 #define umtxq_insert(uq)        umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
  207 #define umtxq_remove(uq)        umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
  208 
  209 static struct mtx umtx_lock;
  210 
  211 #ifdef UMTX_PROFILING
  212 static void
  213 umtx_init_profiling(void) 
  214 {
  215         struct sysctl_oid *chain_oid;
  216         char chain_name[10];
  217         int i;
  218 
  219         for (i = 0; i < UMTX_CHAINS; ++i) {
  220                 snprintf(chain_name, sizeof(chain_name), "%d", i);
  221                 chain_oid = SYSCTL_ADD_NODE(NULL, 
  222                     SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 
  223                     chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
  224                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  225                     "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
  226                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  227                     "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
  228         }
  229 }
  230 #endif
  231 
  232 static void
  233 umtxq_sysinit(void *arg __unused)
  234 {
  235         int i, j;
  236 
  237         umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
  238                 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  239         for (i = 0; i < 2; ++i) {
  240                 for (j = 0; j < UMTX_CHAINS; ++j) {
  241                         mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
  242                                  MTX_DEF | MTX_DUPOK);
  243                         TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]);
  244                         TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]);
  245                         TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
  246                         umtxq_chains[i][j].uc_busy = 0;
  247                         umtxq_chains[i][j].uc_waiters = 0;
  248 #ifdef UMTX_PROFILING
  249                         umtxq_chains[i][j].length = 0;
  250                         umtxq_chains[i][j].max_length = 0;      
  251 #endif
  252                 }
  253         }
  254 #ifdef UMTX_PROFILING
  255         umtx_init_profiling();
  256 #endif
  257         mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
  258         EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
  259             EVENTHANDLER_PRI_ANY);
  260 }
  261 
  262 struct umtx_q *
  263 umtxq_alloc(void)
  264 {
  265         struct umtx_q *uq;
  266 
  267         uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
  268         TAILQ_INIT(&uq->uq_pi_contested);
  269         uq->uq_inherited_pri = PRI_MAX;
  270         return (uq);
  271 }
  272 
  273 void
  274 umtxq_free(struct umtx_q *uq)
  275 {
  276         free(uq, M_UMTX);
  277 }
  278 
  279 static inline void
  280 umtxq_hash(struct umtx_key *key)
  281 {
  282         unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
  283         key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
  284 }
  285 
  286 static inline struct umtxq_chain *
  287 umtxq_getchain(struct umtx_key *key)
  288 {
  289         if (key->type <= TYPE_CV)
  290                 return (&umtxq_chains[1][key->hash]);
  291         return (&umtxq_chains[0][key->hash]);
  292 }
  293 
  294 /*
  295  * Lock a chain.
  296  */
  297 static inline void
  298 umtxq_lock(struct umtx_key *key)
  299 {
  300         struct umtxq_chain *uc;
  301 
  302         uc = umtxq_getchain(key);
  303         mtx_lock(&uc->uc_lock);
  304 }
  305 
  306 /*
  307  * Unlock a chain.
  308  */
  309 static inline void
  310 umtxq_unlock(struct umtx_key *key)
  311 {
  312         struct umtxq_chain *uc;
  313 
  314         uc = umtxq_getchain(key);
  315         mtx_unlock(&uc->uc_lock);
  316 }
  317 
  318 /*
  319  * Set chain to busy state when following operation
  320  * may be blocked (kernel mutex can not be used).
  321  */
  322 static inline void
  323 umtxq_busy(struct umtx_key *key)
  324 {
  325         struct umtxq_chain *uc;
  326 
  327         uc = umtxq_getchain(key);
  328         mtx_assert(&uc->uc_lock, MA_OWNED);
  329         if (uc->uc_busy) {
  330 #ifdef SMP
  331                 if (smp_cpus > 1) {
  332                         int count = BUSY_SPINS;
  333                         if (count > 0) {
  334                                 umtxq_unlock(key);
  335                                 while (uc->uc_busy && --count > 0)
  336                                         cpu_spinwait();
  337                                 umtxq_lock(key);
  338                         }
  339                 }
  340 #endif
  341                 while (uc->uc_busy) {
  342                         uc->uc_waiters++;
  343                         msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
  344                         uc->uc_waiters--;
  345                 }
  346         }
  347         uc->uc_busy = 1;
  348 }
  349 
  350 /*
  351  * Unbusy a chain.
  352  */
  353 static inline void
  354 umtxq_unbusy(struct umtx_key *key)
  355 {
  356         struct umtxq_chain *uc;
  357 
  358         uc = umtxq_getchain(key);
  359         mtx_assert(&uc->uc_lock, MA_OWNED);
  360         KASSERT(uc->uc_busy != 0, ("not busy"));
  361         uc->uc_busy = 0;
  362         if (uc->uc_waiters)
  363                 wakeup_one(uc);
  364 }
  365 
  366 static inline void
  367 umtxq_insert_queue(struct umtx_q *uq, int q)
  368 {
  369         struct umtxq_chain *uc;
  370 
  371         uc = umtxq_getchain(&uq->uq_key);
  372         UMTXQ_LOCKED_ASSERT(uc);
  373         TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link);
  374 #ifdef UMTX_PROFILING
  375         uc->length++;
  376         if (uc->length > uc->max_length) {
  377                 uc->max_length = uc->length;
  378                 if (uc->max_length > max_length)
  379                         max_length = uc->max_length;    
  380         }
  381 #endif
  382         uq->uq_flags |= UQF_UMTXQ;
  383 }
  384 
  385 static inline void
  386 umtxq_remove_queue(struct umtx_q *uq, int q)
  387 {
  388         struct umtxq_chain *uc;
  389 
  390         uc = umtxq_getchain(&uq->uq_key);
  391         UMTXQ_LOCKED_ASSERT(uc);
  392         if (uq->uq_flags & UQF_UMTXQ) {
  393                 TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link);
  394 #ifdef UMTX_PROFILING
  395                 uc->length--;
  396 #endif
  397                 uq->uq_flags &= ~UQF_UMTXQ;
  398         }
  399 }
  400 
  401 /*
  402  * Check if there are multiple waiters
  403  */
  404 static int
  405 umtxq_count(struct umtx_key *key)
  406 {
  407         struct umtxq_chain *uc;
  408         struct umtx_q *uq;
  409         int count = 0;
  410 
  411         uc = umtxq_getchain(key);
  412         UMTXQ_LOCKED_ASSERT(uc);
  413         TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
  414                 if (umtx_key_match(&uq->uq_key, key)) {
  415                         if (++count > 1)
  416                                 break;
  417                 }
  418         }
  419         return (count);
  420 }
  421 
  422 /*
  423  * Check if there are multiple PI waiters and returns first
  424  * waiter.
  425  */
  426 static int
  427 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
  428 {
  429         struct umtxq_chain *uc;
  430         struct umtx_q *uq;
  431         int count = 0;
  432 
  433         *first = NULL;
  434         uc = umtxq_getchain(key);
  435         UMTXQ_LOCKED_ASSERT(uc);
  436         TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
  437                 if (umtx_key_match(&uq->uq_key, key)) {
  438                         if (++count > 1)
  439                                 break;
  440                         *first = uq;
  441                 }
  442         }
  443         return (count);
  444 }
  445 
  446 /*
  447  * Wake up threads waiting on an userland object.
  448  */
  449 
  450 static int
  451 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
  452 {
  453         struct umtxq_chain *uc;
  454         struct umtx_q *uq, *next;
  455         int ret;
  456 
  457         ret = 0;
  458         uc = umtxq_getchain(key);
  459         UMTXQ_LOCKED_ASSERT(uc);
  460         TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) {
  461                 if (umtx_key_match(&uq->uq_key, key)) {
  462                         umtxq_remove_queue(uq, q);
  463                         wakeup(uq);
  464                         if (++ret >= n_wake)
  465                                 break;
  466                 }
  467         }
  468         return (ret);
  469 }
  470 
  471 
  472 /*
  473  * Wake up specified thread.
  474  */
  475 static inline void
  476 umtxq_signal_thread(struct umtx_q *uq)
  477 {
  478         struct umtxq_chain *uc;
  479 
  480         uc = umtxq_getchain(&uq->uq_key);
  481         UMTXQ_LOCKED_ASSERT(uc);
  482         umtxq_remove(uq);
  483         wakeup(uq);
  484 }
  485 
  486 /*
  487  * Put thread into sleep state, before sleeping, check if
  488  * thread was removed from umtx queue.
  489  */
  490 static inline int
  491 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
  492 {
  493         struct umtxq_chain *uc;
  494         int error;
  495 
  496         uc = umtxq_getchain(&uq->uq_key);
  497         UMTXQ_LOCKED_ASSERT(uc);
  498         if (!(uq->uq_flags & UQF_UMTXQ))
  499                 return (0);
  500         error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
  501         if (error == EWOULDBLOCK)
  502                 error = ETIMEDOUT;
  503         return (error);
  504 }
  505 
  506 /*
  507  * Convert userspace address into unique logical address.
  508  */
  509 int
  510 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
  511 {
  512         struct thread *td = curthread;
  513         vm_map_t map;
  514         vm_map_entry_t entry;
  515         vm_pindex_t pindex;
  516         vm_prot_t prot;
  517         boolean_t wired;
  518 
  519         key->type = type;
  520         if (share == THREAD_SHARE) {
  521                 key->shared = 0;
  522                 key->info.private.vs = td->td_proc->p_vmspace;
  523                 key->info.private.addr = (uintptr_t)addr;
  524         } else {
  525                 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
  526                 map = &td->td_proc->p_vmspace->vm_map;
  527                 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
  528                     &entry, &key->info.shared.object, &pindex, &prot,
  529                     &wired) != KERN_SUCCESS) {
  530                         return EFAULT;
  531                 }
  532 
  533                 if ((share == PROCESS_SHARE) ||
  534                     (share == AUTO_SHARE &&
  535                      VM_INHERIT_SHARE == entry->inheritance)) {
  536                         key->shared = 1;
  537                         key->info.shared.offset = entry->offset + entry->start -
  538                                 (vm_offset_t)addr;
  539                         vm_object_reference(key->info.shared.object);
  540                 } else {
  541                         key->shared = 0;
  542                         key->info.private.vs = td->td_proc->p_vmspace;
  543                         key->info.private.addr = (uintptr_t)addr;
  544                 }
  545                 vm_map_lookup_done(map, entry);
  546         }
  547 
  548         umtxq_hash(key);
  549         return (0);
  550 }
  551 
  552 /*
  553  * Release key.
  554  */
  555 void
  556 umtx_key_release(struct umtx_key *key)
  557 {
  558         if (key->shared)
  559                 vm_object_deallocate(key->info.shared.object);
  560 }
  561 
  562 /*
  563  * Lock a umtx object.
  564  */
  565 static int
  566 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
  567 {
  568         struct umtx_q *uq;
  569         u_long owner;
  570         u_long old;
  571         int error = 0;
  572 
  573         uq = td->td_umtxq;
  574 
  575         /*
  576          * Care must be exercised when dealing with umtx structure. It
  577          * can fault on any access.
  578          */
  579         for (;;) {
  580                 /*
  581                  * Try the uncontested case.  This should be done in userland.
  582                  */
  583                 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
  584 
  585                 /* The acquire succeeded. */
  586                 if (owner == UMTX_UNOWNED)
  587                         return (0);
  588 
  589                 /* The address was invalid. */
  590                 if (owner == -1)
  591                         return (EFAULT);
  592 
  593                 /* If no one owns it but it is contested try to acquire it. */
  594                 if (owner == UMTX_CONTESTED) {
  595                         owner = casuword(&umtx->u_owner,
  596                             UMTX_CONTESTED, id | UMTX_CONTESTED);
  597 
  598                         if (owner == UMTX_CONTESTED)
  599                                 return (0);
  600 
  601                         /* The address was invalid. */
  602                         if (owner == -1)
  603                                 return (EFAULT);
  604 
  605                         /* If this failed the lock has changed, restart. */
  606                         continue;
  607                 }
  608 
  609                 /*
  610                  * If we caught a signal, we have retried and now
  611                  * exit immediately.
  612                  */
  613                 if (error != 0)
  614                         return (error);
  615 
  616                 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
  617                         AUTO_SHARE, &uq->uq_key)) != 0)
  618                         return (error);
  619 
  620                 umtxq_lock(&uq->uq_key);
  621                 umtxq_busy(&uq->uq_key);
  622                 umtxq_insert(uq);
  623                 umtxq_unbusy(&uq->uq_key);
  624                 umtxq_unlock(&uq->uq_key);
  625 
  626                 /*
  627                  * Set the contested bit so that a release in user space
  628                  * knows to use the system call for unlock.  If this fails
  629                  * either some one else has acquired the lock or it has been
  630                  * released.
  631                  */
  632                 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
  633 
  634                 /* The address was invalid. */
  635                 if (old == -1) {
  636                         umtxq_lock(&uq->uq_key);
  637                         umtxq_remove(uq);
  638                         umtxq_unlock(&uq->uq_key);
  639                         umtx_key_release(&uq->uq_key);
  640                         return (EFAULT);
  641                 }
  642 
  643                 /*
  644                  * We set the contested bit, sleep. Otherwise the lock changed
  645                  * and we need to retry or we lost a race to the thread
  646                  * unlocking the umtx.
  647                  */
  648                 umtxq_lock(&uq->uq_key);
  649                 if (old == owner)
  650                         error = umtxq_sleep(uq, "umtx", timo);
  651                 umtxq_remove(uq);
  652                 umtxq_unlock(&uq->uq_key);
  653                 umtx_key_release(&uq->uq_key);
  654         }
  655 
  656         return (0);
  657 }
  658 
  659 /*
  660  * Lock a umtx object.
  661  */
  662 static int
  663 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
  664         struct timespec *timeout)
  665 {
  666         struct timespec ts, ts2, ts3;
  667         struct timeval tv;
  668         int error;
  669 
  670         if (timeout == NULL) {
  671                 error = _do_lock_umtx(td, umtx, id, 0);
  672                 /* Mutex locking is restarted if it is interrupted. */
  673                 if (error == EINTR)
  674                         error = ERESTART;
  675         } else {
  676                 getnanouptime(&ts);
  677                 timespecadd(&ts, timeout);
  678                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
  679                 for (;;) {
  680                         error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
  681                         if (error != ETIMEDOUT)
  682                                 break;
  683                         getnanouptime(&ts2);
  684                         if (timespeccmp(&ts2, &ts, >=)) {
  685                                 error = ETIMEDOUT;
  686                                 break;
  687                         }
  688                         ts3 = ts;
  689                         timespecsub(&ts3, &ts2);
  690                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
  691                 }
  692                 /* Timed-locking is not restarted. */
  693                 if (error == ERESTART)
  694                         error = EINTR;
  695         }
  696         return (error);
  697 }
  698 
  699 /*
  700  * Unlock a umtx object.
  701  */
  702 static int
  703 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
  704 {
  705         struct umtx_key key;
  706         u_long owner;
  707         u_long old;
  708         int error;
  709         int count;
  710 
  711         /*
  712          * Make sure we own this mtx.
  713          */
  714         owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
  715         if (owner == -1)
  716                 return (EFAULT);
  717 
  718         if ((owner & ~UMTX_CONTESTED) != id)
  719                 return (EPERM);
  720 
  721         /* This should be done in userland */
  722         if ((owner & UMTX_CONTESTED) == 0) {
  723                 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
  724                 if (old == -1)
  725                         return (EFAULT);
  726                 if (old == owner)
  727                         return (0);
  728                 owner = old;
  729         }
  730 
  731         /* We should only ever be in here for contested locks */
  732         if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
  733                 &key)) != 0)
  734                 return (error);
  735 
  736         umtxq_lock(&key);
  737         umtxq_busy(&key);
  738         count = umtxq_count(&key);
  739         umtxq_unlock(&key);
  740 
  741         /*
  742          * When unlocking the umtx, it must be marked as unowned if
  743          * there is zero or one thread only waiting for it.
  744          * Otherwise, it must be marked as contested.
  745          */
  746         old = casuword(&umtx->u_owner, owner,
  747                 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
  748         umtxq_lock(&key);
  749         umtxq_signal(&key,1);
  750         umtxq_unbusy(&key);
  751         umtxq_unlock(&key);
  752         umtx_key_release(&key);
  753         if (old == -1)
  754                 return (EFAULT);
  755         if (old != owner)
  756                 return (EINVAL);
  757         return (0);
  758 }
  759 
  760 #ifdef COMPAT_FREEBSD32
  761 
  762 /*
  763  * Lock a umtx object.
  764  */
  765 static int
  766 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
  767 {
  768         struct umtx_q *uq;
  769         uint32_t owner;
  770         uint32_t old;
  771         int error = 0;
  772 
  773         uq = td->td_umtxq;
  774 
  775         /*
  776          * Care must be exercised when dealing with umtx structure. It
  777          * can fault on any access.
  778          */
  779         for (;;) {
  780                 /*
  781                  * Try the uncontested case.  This should be done in userland.
  782                  */
  783                 owner = casuword32(m, UMUTEX_UNOWNED, id);
  784 
  785                 /* The acquire succeeded. */
  786                 if (owner == UMUTEX_UNOWNED)
  787                         return (0);
  788 
  789                 /* The address was invalid. */
  790                 if (owner == -1)
  791                         return (EFAULT);
  792 
  793                 /* If no one owns it but it is contested try to acquire it. */
  794                 if (owner == UMUTEX_CONTESTED) {
  795                         owner = casuword32(m,
  796                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
  797                         if (owner == UMUTEX_CONTESTED)
  798                                 return (0);
  799 
  800                         /* The address was invalid. */
  801                         if (owner == -1)
  802                                 return (EFAULT);
  803 
  804                         /* If this failed the lock has changed, restart. */
  805                         continue;
  806                 }
  807 
  808                 /*
  809                  * If we caught a signal, we have retried and now
  810                  * exit immediately.
  811                  */
  812                 if (error != 0)
  813                         return (error);
  814 
  815                 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
  816                         AUTO_SHARE, &uq->uq_key)) != 0)
  817                         return (error);
  818 
  819                 umtxq_lock(&uq->uq_key);
  820                 umtxq_busy(&uq->uq_key);
  821                 umtxq_insert(uq);
  822                 umtxq_unbusy(&uq->uq_key);
  823                 umtxq_unlock(&uq->uq_key);
  824 
  825                 /*
  826                  * Set the contested bit so that a release in user space
  827                  * knows to use the system call for unlock.  If this fails
  828                  * either some one else has acquired the lock or it has been
  829                  * released.
  830                  */
  831                 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
  832 
  833                 /* The address was invalid. */
  834                 if (old == -1) {
  835                         umtxq_lock(&uq->uq_key);
  836                         umtxq_remove(uq);
  837                         umtxq_unlock(&uq->uq_key);
  838                         umtx_key_release(&uq->uq_key);
  839                         return (EFAULT);
  840                 }
  841 
  842                 /*
  843                  * We set the contested bit, sleep. Otherwise the lock changed
  844                  * and we need to retry or we lost a race to the thread
  845                  * unlocking the umtx.
  846                  */
  847                 umtxq_lock(&uq->uq_key);
  848                 if (old == owner)
  849                         error = umtxq_sleep(uq, "umtx", timo);
  850                 umtxq_remove(uq);
  851                 umtxq_unlock(&uq->uq_key);
  852                 umtx_key_release(&uq->uq_key);
  853         }
  854 
  855         return (0);
  856 }
  857 
  858 /*
  859  * Lock a umtx object.
  860  */
  861 static int
  862 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
  863         struct timespec *timeout)
  864 {
  865         struct timespec ts, ts2, ts3;
  866         struct timeval tv;
  867         int error;
  868 
  869         if (timeout == NULL) {
  870                 error = _do_lock_umtx32(td, m, id, 0);
  871                 /* Mutex locking is restarted if it is interrupted. */
  872                 if (error == EINTR)
  873                         error = ERESTART;
  874         } else {
  875                 getnanouptime(&ts);
  876                 timespecadd(&ts, timeout);
  877                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
  878                 for (;;) {
  879                         error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
  880                         if (error != ETIMEDOUT)
  881                                 break;
  882                         getnanouptime(&ts2);
  883                         if (timespeccmp(&ts2, &ts, >=)) {
  884                                 error = ETIMEDOUT;
  885                                 break;
  886                         }
  887                         ts3 = ts;
  888                         timespecsub(&ts3, &ts2);
  889                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
  890                 }
  891                 /* Timed-locking is not restarted. */
  892                 if (error == ERESTART)
  893                         error = EINTR;
  894         }
  895         return (error);
  896 }
  897 
  898 /*
  899  * Unlock a umtx object.
  900  */
  901 static int
  902 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
  903 {
  904         struct umtx_key key;
  905         uint32_t owner;
  906         uint32_t old;
  907         int error;
  908         int count;
  909 
  910         /*
  911          * Make sure we own this mtx.
  912          */
  913         owner = fuword32(m);
  914         if (owner == -1)
  915                 return (EFAULT);
  916 
  917         if ((owner & ~UMUTEX_CONTESTED) != id)
  918                 return (EPERM);
  919 
  920         /* This should be done in userland */
  921         if ((owner & UMUTEX_CONTESTED) == 0) {
  922                 old = casuword32(m, owner, UMUTEX_UNOWNED);
  923                 if (old == -1)
  924                         return (EFAULT);
  925                 if (old == owner)
  926                         return (0);
  927                 owner = old;
  928         }
  929 
  930         /* We should only ever be in here for contested locks */
  931         if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
  932                 &key)) != 0)
  933                 return (error);
  934 
  935         umtxq_lock(&key);
  936         umtxq_busy(&key);
  937         count = umtxq_count(&key);
  938         umtxq_unlock(&key);
  939 
  940         /*
  941          * When unlocking the umtx, it must be marked as unowned if
  942          * there is zero or one thread only waiting for it.
  943          * Otherwise, it must be marked as contested.
  944          */
  945         old = casuword32(m, owner,
  946                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
  947         umtxq_lock(&key);
  948         umtxq_signal(&key,1);
  949         umtxq_unbusy(&key);
  950         umtxq_unlock(&key);
  951         umtx_key_release(&key);
  952         if (old == -1)
  953                 return (EFAULT);
  954         if (old != owner)
  955                 return (EINVAL);
  956         return (0);
  957 }
  958 #endif
  959 
  960 /*
  961  * Fetch and compare value, sleep on the address if value is not changed.
  962  */
  963 static int
  964 do_wait(struct thread *td, void *addr, u_long id,
  965         struct timespec *timeout, int compat32, int is_private)
  966 {
  967         struct umtx_q *uq;
  968         struct timespec ts, ts2, ts3;
  969         struct timeval tv;
  970         u_long tmp;
  971         int error = 0;
  972 
  973         uq = td->td_umtxq;
  974         if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
  975                 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
  976                 return (error);
  977 
  978         umtxq_lock(&uq->uq_key);
  979         umtxq_insert(uq);
  980         umtxq_unlock(&uq->uq_key);
  981         if (compat32 == 0)
  982                 tmp = fuword(addr);
  983         else
  984                 tmp = (unsigned int)fuword32(addr);
  985         if (tmp != id) {
  986                 umtxq_lock(&uq->uq_key);
  987                 umtxq_remove(uq);
  988                 umtxq_unlock(&uq->uq_key);
  989         } else if (timeout == NULL) {
  990                 umtxq_lock(&uq->uq_key);
  991                 error = umtxq_sleep(uq, "uwait", 0);
  992                 umtxq_remove(uq);
  993                 umtxq_unlock(&uq->uq_key);
  994         } else {
  995                 getnanouptime(&ts);
  996                 timespecadd(&ts, timeout);
  997                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
  998                 umtxq_lock(&uq->uq_key);
  999                 for (;;) {
 1000                         error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
 1001                         if (!(uq->uq_flags & UQF_UMTXQ))
 1002                                 break;
 1003                         if (error != ETIMEDOUT)
 1004                                 break;
 1005                         umtxq_unlock(&uq->uq_key);
 1006                         getnanouptime(&ts2);
 1007                         if (timespeccmp(&ts2, &ts, >=)) {
 1008                                 error = ETIMEDOUT;
 1009                                 umtxq_lock(&uq->uq_key);
 1010                                 break;
 1011                         }
 1012                         ts3 = ts;
 1013                         timespecsub(&ts3, &ts2);
 1014                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 1015                         umtxq_lock(&uq->uq_key);
 1016                 }
 1017                 umtxq_remove(uq);
 1018                 umtxq_unlock(&uq->uq_key);
 1019         }
 1020         umtx_key_release(&uq->uq_key);
 1021         if (error == ERESTART)
 1022                 error = EINTR;
 1023         return (error);
 1024 }
 1025 
 1026 /*
 1027  * Wake up threads sleeping on the specified address.
 1028  */
 1029 int
 1030 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
 1031 {
 1032         struct umtx_key key;
 1033         int ret;
 1034         
 1035         if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
 1036                 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
 1037                 return (ret);
 1038         umtxq_lock(&key);
 1039         ret = umtxq_signal(&key, n_wake);
 1040         umtxq_unlock(&key);
 1041         umtx_key_release(&key);
 1042         return (0);
 1043 }
 1044 
 1045 /*
 1046  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1047  */
 1048 static int
 1049 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 1050         int mode)
 1051 {
 1052         struct umtx_q *uq;
 1053         uint32_t owner, old, id;
 1054         int error = 0;
 1055 
 1056         id = td->td_tid;
 1057         uq = td->td_umtxq;
 1058 
 1059         /*
 1060          * Care must be exercised when dealing with umtx structure. It
 1061          * can fault on any access.
 1062          */
 1063         for (;;) {
 1064                 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
 1065                 if (mode == _UMUTEX_WAIT) {
 1066                         if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
 1067                                 return (0);
 1068                 } else {
 1069                         /*
 1070                          * Try the uncontested case.  This should be done in userland.
 1071                          */
 1072                         owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 1073 
 1074                         /* The acquire succeeded. */
 1075                         if (owner == UMUTEX_UNOWNED)
 1076                                 return (0);
 1077 
 1078                         /* The address was invalid. */
 1079                         if (owner == -1)
 1080                                 return (EFAULT);
 1081 
 1082                         /* If no one owns it but it is contested try to acquire it. */
 1083                         if (owner == UMUTEX_CONTESTED) {
 1084                                 owner = casuword32(&m->m_owner,
 1085                                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1086 
 1087                                 if (owner == UMUTEX_CONTESTED)
 1088                                         return (0);
 1089 
 1090                                 /* The address was invalid. */
 1091                                 if (owner == -1)
 1092                                         return (EFAULT);
 1093 
 1094                                 /* If this failed the lock has changed, restart. */
 1095                                 continue;
 1096                         }
 1097                 }
 1098 
 1099                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 1100                     (owner & ~UMUTEX_CONTESTED) == id)
 1101                         return (EDEADLK);
 1102 
 1103                 if (mode == _UMUTEX_TRY)
 1104                         return (EBUSY);
 1105 
 1106                 /*
 1107                  * If we caught a signal, we have retried and now
 1108                  * exit immediately.
 1109                  */
 1110                 if (error != 0)
 1111                         return (error);
 1112 
 1113                 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
 1114                     GET_SHARE(flags), &uq->uq_key)) != 0)
 1115                         return (error);
 1116 
 1117                 umtxq_lock(&uq->uq_key);
 1118                 umtxq_busy(&uq->uq_key);
 1119                 umtxq_insert(uq);
 1120                 umtxq_unlock(&uq->uq_key);
 1121 
 1122                 /*
 1123                  * Set the contested bit so that a release in user space
 1124                  * knows to use the system call for unlock.  If this fails
 1125                  * either some one else has acquired the lock or it has been
 1126                  * released.
 1127                  */
 1128                 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 1129 
 1130                 /* The address was invalid. */
 1131                 if (old == -1) {
 1132                         umtxq_lock(&uq->uq_key);
 1133                         umtxq_remove(uq);
 1134                         umtxq_unbusy(&uq->uq_key);
 1135                         umtxq_unlock(&uq->uq_key);
 1136                         umtx_key_release(&uq->uq_key);
 1137                         return (EFAULT);
 1138                 }
 1139 
 1140                 /*
 1141                  * We set the contested bit, sleep. Otherwise the lock changed
 1142                  * and we need to retry or we lost a race to the thread
 1143                  * unlocking the umtx.
 1144                  */
 1145                 umtxq_lock(&uq->uq_key);
 1146                 umtxq_unbusy(&uq->uq_key);
 1147                 if (old == owner)
 1148                         error = umtxq_sleep(uq, "umtxn", timo);
 1149                 umtxq_remove(uq);
 1150                 umtxq_unlock(&uq->uq_key);
 1151                 umtx_key_release(&uq->uq_key);
 1152         }
 1153 
 1154         return (0);
 1155 }
 1156 
 1157 /*
 1158  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1159  */
 1160 /*
 1161  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1162  */
 1163 static int
 1164 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
 1165 {
 1166         struct umtx_key key;
 1167         uint32_t owner, old, id;
 1168         int error;
 1169         int count;
 1170 
 1171         id = td->td_tid;
 1172         /*
 1173          * Make sure we own this mtx.
 1174          */
 1175         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1176         if (owner == -1)
 1177                 return (EFAULT);
 1178 
 1179         if ((owner & ~UMUTEX_CONTESTED) != id)
 1180                 return (EPERM);
 1181 
 1182         if ((owner & UMUTEX_CONTESTED) == 0) {
 1183                 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 1184                 if (old == -1)
 1185                         return (EFAULT);
 1186                 if (old == owner)
 1187                         return (0);
 1188                 owner = old;
 1189         }
 1190 
 1191         /* We should only ever be in here for contested locks */
 1192         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1193             &key)) != 0)
 1194                 return (error);
 1195 
 1196         umtxq_lock(&key);
 1197         umtxq_busy(&key);
 1198         count = umtxq_count(&key);
 1199         umtxq_unlock(&key);
 1200 
 1201         /*
 1202          * When unlocking the umtx, it must be marked as unowned if
 1203          * there is zero or one thread only waiting for it.
 1204          * Otherwise, it must be marked as contested.
 1205          */
 1206         old = casuword32(&m->m_owner, owner,
 1207                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1208         umtxq_lock(&key);
 1209         umtxq_signal(&key,1);
 1210         umtxq_unbusy(&key);
 1211         umtxq_unlock(&key);
 1212         umtx_key_release(&key);
 1213         if (old == -1)
 1214                 return (EFAULT);
 1215         if (old != owner)
 1216                 return (EINVAL);
 1217         return (0);
 1218 }
 1219 
 1220 /*
 1221  * Check if the mutex is available and wake up a waiter,
 1222  * only for simple mutex.
 1223  */
 1224 static int
 1225 do_wake_umutex(struct thread *td, struct umutex *m)
 1226 {
 1227         struct umtx_key key;
 1228         uint32_t owner;
 1229         uint32_t flags;
 1230         int error;
 1231         int count;
 1232 
 1233         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1234         if (owner == -1)
 1235                 return (EFAULT);
 1236 
 1237         if ((owner & ~UMUTEX_CONTESTED) != 0)
 1238                 return (0);
 1239 
 1240         flags = fuword32(&m->m_flags);
 1241 
 1242         /* We should only ever be in here for contested locks */
 1243         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1244             &key)) != 0)
 1245                 return (error);
 1246 
 1247         umtxq_lock(&key);
 1248         umtxq_busy(&key);
 1249         count = umtxq_count(&key);
 1250         umtxq_unlock(&key);
 1251 
 1252         if (count <= 1)
 1253                 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
 1254 
 1255         umtxq_lock(&key);
 1256         if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
 1257                 umtxq_signal(&key, 1);
 1258         umtxq_unbusy(&key);
 1259         umtxq_unlock(&key);
 1260         umtx_key_release(&key);
 1261         return (0);
 1262 }
 1263 
 1264 /*
 1265  * Check if the mutex has waiters and tries to fix contention bit.
 1266  */
 1267 static int
 1268 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
 1269 {
 1270         struct umtx_key key;
 1271         uint32_t owner, old;
 1272         int type;
 1273         int error;
 1274         int count;
 1275 
 1276         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 1277         case 0:
 1278                 type = TYPE_NORMAL_UMUTEX;
 1279                 break;
 1280         case UMUTEX_PRIO_INHERIT:
 1281                 type = TYPE_PI_UMUTEX;
 1282                 break;
 1283         case UMUTEX_PRIO_PROTECT:
 1284                 type = TYPE_PP_UMUTEX;
 1285                 break;
 1286         default:
 1287                 return (EINVAL);
 1288         }
 1289         if ((error = umtx_key_get(m, type, GET_SHARE(flags),
 1290             &key)) != 0)
 1291                 return (error);
 1292 
 1293         owner = 0;
 1294         umtxq_lock(&key);
 1295         umtxq_busy(&key);
 1296         count = umtxq_count(&key);
 1297         umtxq_unlock(&key);
 1298         /*
 1299          * Only repair contention bit if there is a waiter, this means the mutex
 1300          * is still being referenced by userland code, otherwise don't update
 1301          * any memory.
 1302          */
 1303         if (count > 1) {
 1304                 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1305                 while ((owner & UMUTEX_CONTESTED) ==0) {
 1306                         old = casuword32(&m->m_owner, owner,
 1307                             owner|UMUTEX_CONTESTED);
 1308                         if (old == owner)
 1309                                 break;
 1310                         owner = old;
 1311                 }
 1312         } else if (count == 1) {
 1313                 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1314                 while ((owner & ~UMUTEX_CONTESTED) != 0 &&
 1315                        (owner & UMUTEX_CONTESTED) == 0) {
 1316                         old = casuword32(&m->m_owner, owner,
 1317                             owner|UMUTEX_CONTESTED);
 1318                         if (old == owner)
 1319                                 break;
 1320                         owner = old;
 1321                 }
 1322         }
 1323         umtxq_lock(&key);
 1324         if (owner == -1) {
 1325                 error = EFAULT;
 1326                 umtxq_signal(&key, INT_MAX);
 1327         }
 1328         else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
 1329                 umtxq_signal(&key, 1);
 1330         umtxq_unbusy(&key);
 1331         umtxq_unlock(&key);
 1332         umtx_key_release(&key);
 1333         return (error);
 1334 }
 1335 
 1336 static inline struct umtx_pi *
 1337 umtx_pi_alloc(int flags)
 1338 {
 1339         struct umtx_pi *pi;
 1340 
 1341         pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
 1342         TAILQ_INIT(&pi->pi_blocked);
 1343         atomic_add_int(&umtx_pi_allocated, 1);
 1344         return (pi);
 1345 }
 1346 
 1347 static inline void
 1348 umtx_pi_free(struct umtx_pi *pi)
 1349 {
 1350         uma_zfree(umtx_pi_zone, pi);
 1351         atomic_add_int(&umtx_pi_allocated, -1);
 1352 }
 1353 
 1354 /*
 1355  * Adjust the thread's position on a pi_state after its priority has been
 1356  * changed.
 1357  */
 1358 static int
 1359 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
 1360 {
 1361         struct umtx_q *uq, *uq1, *uq2;
 1362         struct thread *td1;
 1363 
 1364         mtx_assert(&umtx_lock, MA_OWNED);
 1365         if (pi == NULL)
 1366                 return (0);
 1367 
 1368         uq = td->td_umtxq;
 1369 
 1370         /*
 1371          * Check if the thread needs to be moved on the blocked chain.
 1372          * It needs to be moved if either its priority is lower than
 1373          * the previous thread or higher than the next thread.
 1374          */
 1375         uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
 1376         uq2 = TAILQ_NEXT(uq, uq_lockq);
 1377         if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
 1378             (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
 1379                 /*
 1380                  * Remove thread from blocked chain and determine where
 1381                  * it should be moved to.
 1382                  */
 1383                 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1384                 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1385                         td1 = uq1->uq_thread;
 1386                         MPASS(td1->td_proc->p_magic == P_MAGIC);
 1387                         if (UPRI(td1) > UPRI(td))
 1388                                 break;
 1389                 }
 1390 
 1391                 if (uq1 == NULL)
 1392                         TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1393                 else
 1394                         TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1395         }
 1396         return (1);
 1397 }
 1398 
 1399 /*
 1400  * Propagate priority when a thread is blocked on POSIX
 1401  * PI mutex.
 1402  */ 
 1403 static void
 1404 umtx_propagate_priority(struct thread *td)
 1405 {
 1406         struct umtx_q *uq;
 1407         struct umtx_pi *pi;
 1408         int pri;
 1409 
 1410         mtx_assert(&umtx_lock, MA_OWNED);
 1411         pri = UPRI(td);
 1412         uq = td->td_umtxq;
 1413         pi = uq->uq_pi_blocked;
 1414         if (pi == NULL)
 1415                 return;
 1416 
 1417         for (;;) {
 1418                 td = pi->pi_owner;
 1419                 if (td == NULL)
 1420                         return;
 1421 
 1422                 MPASS(td->td_proc != NULL);
 1423                 MPASS(td->td_proc->p_magic == P_MAGIC);
 1424 
 1425                 if (UPRI(td) <= pri)
 1426                         return;
 1427 
 1428                 thread_lock(td);
 1429                 sched_lend_user_prio(td, pri);
 1430                 thread_unlock(td);
 1431 
 1432                 /*
 1433                  * Pick up the lock that td is blocked on.
 1434                  */
 1435                 uq = td->td_umtxq;
 1436                 pi = uq->uq_pi_blocked;
 1437                 /* Resort td on the list if needed. */
 1438                 if (!umtx_pi_adjust_thread(pi, td))
 1439                         break;
 1440         }
 1441 }
 1442 
 1443 /*
 1444  * Unpropagate priority for a PI mutex when a thread blocked on
 1445  * it is interrupted by signal or resumed by others.
 1446  */
 1447 static void
 1448 umtx_unpropagate_priority(struct umtx_pi *pi)
 1449 {
 1450         struct umtx_q *uq, *uq_owner;
 1451         struct umtx_pi *pi2;
 1452         int pri, oldpri;
 1453 
 1454         mtx_assert(&umtx_lock, MA_OWNED);
 1455 
 1456         while (pi != NULL && pi->pi_owner != NULL) {
 1457                 pri = PRI_MAX;
 1458                 uq_owner = pi->pi_owner->td_umtxq;
 1459 
 1460                 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
 1461                         uq = TAILQ_FIRST(&pi2->pi_blocked);
 1462                         if (uq != NULL) {
 1463                                 if (pri > UPRI(uq->uq_thread))
 1464                                         pri = UPRI(uq->uq_thread);
 1465                         }
 1466                 }
 1467 
 1468                 if (pri > uq_owner->uq_inherited_pri)
 1469                         pri = uq_owner->uq_inherited_pri;
 1470                 thread_lock(pi->pi_owner);
 1471                 oldpri = pi->pi_owner->td_user_pri;
 1472                 sched_unlend_user_prio(pi->pi_owner, pri);
 1473                 thread_unlock(pi->pi_owner);
 1474                 if (uq_owner->uq_pi_blocked != NULL)
 1475                         umtx_pi_adjust_locked(pi->pi_owner, oldpri);
 1476                 pi = uq_owner->uq_pi_blocked;
 1477         }
 1478 }
 1479 
 1480 /*
 1481  * Insert a PI mutex into owned list.
 1482  */
 1483 static void
 1484 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
 1485 {
 1486         struct umtx_q *uq_owner;
 1487 
 1488         uq_owner = owner->td_umtxq;
 1489         mtx_assert(&umtx_lock, MA_OWNED);
 1490         if (pi->pi_owner != NULL)
 1491                 panic("pi_ower != NULL");
 1492         pi->pi_owner = owner;
 1493         TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
 1494 }
 1495 
 1496 /*
 1497  * Claim ownership of a PI mutex.
 1498  */
 1499 static int
 1500 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
 1501 {
 1502         struct umtx_q *uq, *uq_owner;
 1503 
 1504         uq_owner = owner->td_umtxq;
 1505         mtx_lock_spin(&umtx_lock);
 1506         if (pi->pi_owner == owner) {
 1507                 mtx_unlock_spin(&umtx_lock);
 1508                 return (0);
 1509         }
 1510 
 1511         if (pi->pi_owner != NULL) {
 1512                 /*
 1513                  * userland may have already messed the mutex, sigh.
 1514                  */
 1515                 mtx_unlock_spin(&umtx_lock);
 1516                 return (EPERM);
 1517         }
 1518         umtx_pi_setowner(pi, owner);
 1519         uq = TAILQ_FIRST(&pi->pi_blocked);
 1520         if (uq != NULL) {
 1521                 int pri;
 1522 
 1523                 pri = UPRI(uq->uq_thread);
 1524                 thread_lock(owner);
 1525                 if (pri < UPRI(owner))
 1526                         sched_lend_user_prio(owner, pri);
 1527                 thread_unlock(owner);
 1528         }
 1529         mtx_unlock_spin(&umtx_lock);
 1530         return (0);
 1531 }
 1532 
 1533 static void
 1534 umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
 1535 {
 1536         struct umtx_q *uq;
 1537         struct umtx_pi *pi;
 1538 
 1539         uq = td->td_umtxq;
 1540         /*
 1541          * Pick up the lock that td is blocked on.
 1542          */
 1543         pi = uq->uq_pi_blocked;
 1544         MPASS(pi != NULL);
 1545 
 1546         /* Resort the turnstile on the list. */
 1547         if (!umtx_pi_adjust_thread(pi, td))
 1548                 return;
 1549 
 1550         /*
 1551          * If our priority was lowered and we are at the head of the
 1552          * turnstile, then propagate our new priority up the chain.
 1553          */
 1554         if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
 1555                 umtx_propagate_priority(td);
 1556 }
 1557 
 1558 /*
 1559  * Adjust a thread's order position in its blocked PI mutex,
 1560  * this may result new priority propagating process.
 1561  */
 1562 void
 1563 umtx_pi_adjust(struct thread *td, u_char oldpri)
 1564 {
 1565         struct umtx_q *uq;
 1566         struct umtx_pi *pi;
 1567 
 1568         uq = td->td_umtxq;
 1569         mtx_lock_spin(&umtx_lock);
 1570         /*
 1571          * Pick up the lock that td is blocked on.
 1572          */
 1573         pi = uq->uq_pi_blocked;
 1574         if (pi != NULL)
 1575                 umtx_pi_adjust_locked(td, oldpri);
 1576         mtx_unlock_spin(&umtx_lock);
 1577 }
 1578 
 1579 /*
 1580  * Sleep on a PI mutex.
 1581  */
 1582 static int
 1583 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
 1584         uint32_t owner, const char *wmesg, int timo)
 1585 {
 1586         struct umtxq_chain *uc;
 1587         struct thread *td, *td1;
 1588         struct umtx_q *uq1;
 1589         int pri;
 1590         int error = 0;
 1591 
 1592         td = uq->uq_thread;
 1593         KASSERT(td == curthread, ("inconsistent uq_thread"));
 1594         uc = umtxq_getchain(&uq->uq_key);
 1595         UMTXQ_LOCKED_ASSERT(uc);
 1596         UMTXQ_BUSY_ASSERT(uc);
 1597         umtxq_insert(uq);
 1598         mtx_lock_spin(&umtx_lock);
 1599         if (pi->pi_owner == NULL) {
 1600                 /* XXX
 1601                  * Current, We only support process private PI-mutex,
 1602                  * non-contended PI-mutexes are locked in userland.
 1603                  * Process shared PI-mutex should always be initialized
 1604                  * by kernel and be registered in kernel, locking should
 1605                  * always be done by kernel to avoid security problems.
 1606                  * For process private PI-mutex, we can find owner
 1607                  * thread and boost its priority safely.
 1608                  */
 1609                 mtx_unlock_spin(&umtx_lock);
 1610                 PROC_LOCK(curproc);
 1611                 td1 = thread_find(curproc, owner);
 1612                 mtx_lock_spin(&umtx_lock);
 1613                 if (td1 != NULL && pi->pi_owner == NULL) {
 1614                         uq1 = td1->td_umtxq;
 1615                         umtx_pi_setowner(pi, td1);
 1616                 }
 1617                 PROC_UNLOCK(curproc);
 1618         }
 1619 
 1620         TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1621                 pri = UPRI(uq1->uq_thread);
 1622                 if (pri > UPRI(td))
 1623                         break;
 1624         }
 1625 
 1626         if (uq1 != NULL)
 1627                 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1628         else
 1629                 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1630 
 1631         uq->uq_pi_blocked = pi;
 1632         thread_lock(td);
 1633         td->td_flags |= TDF_UPIBLOCKED;
 1634         thread_unlock(td);
 1635         umtx_propagate_priority(td);
 1636         mtx_unlock_spin(&umtx_lock);
 1637         umtxq_unbusy(&uq->uq_key);
 1638 
 1639         if (uq->uq_flags & UQF_UMTXQ) {
 1640                 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
 1641                 if (error == EWOULDBLOCK)
 1642                         error = ETIMEDOUT;
 1643                 if (uq->uq_flags & UQF_UMTXQ) {
 1644                         umtxq_remove(uq);
 1645                 }
 1646         }
 1647         mtx_lock_spin(&umtx_lock);
 1648         uq->uq_pi_blocked = NULL;
 1649         thread_lock(td);
 1650         td->td_flags &= ~TDF_UPIBLOCKED;
 1651         thread_unlock(td);
 1652         TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1653         umtx_unpropagate_priority(pi);
 1654         mtx_unlock_spin(&umtx_lock);
 1655         umtxq_unlock(&uq->uq_key);
 1656 
 1657         return (error);
 1658 }
 1659 
 1660 /*
 1661  * Add reference count for a PI mutex.
 1662  */
 1663 static void
 1664 umtx_pi_ref(struct umtx_pi *pi)
 1665 {
 1666         struct umtxq_chain *uc;
 1667 
 1668         uc = umtxq_getchain(&pi->pi_key);
 1669         UMTXQ_LOCKED_ASSERT(uc);
 1670         pi->pi_refcount++;
 1671 }
 1672 
 1673 /*
 1674  * Decrease reference count for a PI mutex, if the counter
 1675  * is decreased to zero, its memory space is freed.
 1676  */ 
 1677 static void
 1678 umtx_pi_unref(struct umtx_pi *pi)
 1679 {
 1680         struct umtxq_chain *uc;
 1681 
 1682         uc = umtxq_getchain(&pi->pi_key);
 1683         UMTXQ_LOCKED_ASSERT(uc);
 1684         KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
 1685         if (--pi->pi_refcount == 0) {
 1686                 mtx_lock_spin(&umtx_lock);
 1687                 if (pi->pi_owner != NULL) {
 1688                         TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
 1689                                 pi, pi_link);
 1690                         pi->pi_owner = NULL;
 1691                 }
 1692                 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
 1693                         ("blocked queue not empty"));
 1694                 mtx_unlock_spin(&umtx_lock);
 1695                 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
 1696                 umtx_pi_free(pi);
 1697         }
 1698 }
 1699 
 1700 /*
 1701  * Find a PI mutex in hash table.
 1702  */
 1703 static struct umtx_pi *
 1704 umtx_pi_lookup(struct umtx_key *key)
 1705 {
 1706         struct umtxq_chain *uc;
 1707         struct umtx_pi *pi;
 1708 
 1709         uc = umtxq_getchain(key);
 1710         UMTXQ_LOCKED_ASSERT(uc);
 1711 
 1712         TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
 1713                 if (umtx_key_match(&pi->pi_key, key)) {
 1714                         return (pi);
 1715                 }
 1716         }
 1717         return (NULL);
 1718 }
 1719 
 1720 /*
 1721  * Insert a PI mutex into hash table.
 1722  */
 1723 static inline void
 1724 umtx_pi_insert(struct umtx_pi *pi)
 1725 {
 1726         struct umtxq_chain *uc;
 1727 
 1728         uc = umtxq_getchain(&pi->pi_key);
 1729         UMTXQ_LOCKED_ASSERT(uc);
 1730         TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
 1731 }
 1732 
 1733 /*
 1734  * Lock a PI mutex.
 1735  */
 1736 static int
 1737 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 1738         int try)
 1739 {
 1740         struct umtx_q *uq;
 1741         struct umtx_pi *pi, *new_pi;
 1742         uint32_t id, owner, old;
 1743         int error;
 1744 
 1745         id = td->td_tid;
 1746         uq = td->td_umtxq;
 1747 
 1748         if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 1749             &uq->uq_key)) != 0)
 1750                 return (error);
 1751         umtxq_lock(&uq->uq_key);
 1752         pi = umtx_pi_lookup(&uq->uq_key);
 1753         if (pi == NULL) {
 1754                 new_pi = umtx_pi_alloc(M_NOWAIT);
 1755                 if (new_pi == NULL) {
 1756                         umtxq_unlock(&uq->uq_key);
 1757                         new_pi = umtx_pi_alloc(M_WAITOK);
 1758                         umtxq_lock(&uq->uq_key);
 1759                         pi = umtx_pi_lookup(&uq->uq_key);
 1760                         if (pi != NULL) {
 1761                                 umtx_pi_free(new_pi);
 1762                                 new_pi = NULL;
 1763                         }
 1764                 }
 1765                 if (new_pi != NULL) {
 1766                         new_pi->pi_key = uq->uq_key;
 1767                         umtx_pi_insert(new_pi);
 1768                         pi = new_pi;
 1769                 }
 1770         }
 1771         umtx_pi_ref(pi);
 1772         umtxq_unlock(&uq->uq_key);
 1773 
 1774         /*
 1775          * Care must be exercised when dealing with umtx structure.  It
 1776          * can fault on any access.
 1777          */
 1778         for (;;) {
 1779                 /*
 1780                  * Try the uncontested case.  This should be done in userland.
 1781                  */
 1782                 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 1783 
 1784                 /* The acquire succeeded. */
 1785                 if (owner == UMUTEX_UNOWNED) {
 1786                         error = 0;
 1787                         break;
 1788                 }
 1789 
 1790                 /* The address was invalid. */
 1791                 if (owner == -1) {
 1792                         error = EFAULT;
 1793                         break;
 1794                 }
 1795 
 1796                 /* If no one owns it but it is contested try to acquire it. */
 1797                 if (owner == UMUTEX_CONTESTED) {
 1798                         owner = casuword32(&m->m_owner,
 1799                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1800 
 1801                         if (owner == UMUTEX_CONTESTED) {
 1802                                 umtxq_lock(&uq->uq_key);
 1803                                 umtxq_busy(&uq->uq_key);
 1804                                 error = umtx_pi_claim(pi, td);
 1805                                 umtxq_unbusy(&uq->uq_key);
 1806                                 umtxq_unlock(&uq->uq_key);
 1807                                 break;
 1808                         }
 1809 
 1810                         /* The address was invalid. */
 1811                         if (owner == -1) {
 1812                                 error = EFAULT;
 1813                                 break;
 1814                         }
 1815 
 1816                         /* If this failed the lock has changed, restart. */
 1817                         continue;
 1818                 }
 1819 
 1820                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 1821                     (owner & ~UMUTEX_CONTESTED) == id) {
 1822                         error = EDEADLK;
 1823                         break;
 1824                 }
 1825 
 1826                 if (try != 0) {
 1827                         error = EBUSY;
 1828                         break;
 1829                 }
 1830 
 1831                 /*
 1832                  * If we caught a signal, we have retried and now
 1833                  * exit immediately.
 1834                  */
 1835                 if (error != 0)
 1836                         break;
 1837                         
 1838                 umtxq_lock(&uq->uq_key);
 1839                 umtxq_busy(&uq->uq_key);
 1840                 umtxq_unlock(&uq->uq_key);
 1841 
 1842                 /*
 1843                  * Set the contested bit so that a release in user space
 1844                  * knows to use the system call for unlock.  If this fails
 1845                  * either some one else has acquired the lock or it has been
 1846                  * released.
 1847                  */
 1848                 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 1849 
 1850                 /* The address was invalid. */
 1851                 if (old == -1) {
 1852                         umtxq_lock(&uq->uq_key);
 1853                         umtxq_unbusy(&uq->uq_key);
 1854                         umtxq_unlock(&uq->uq_key);
 1855                         error = EFAULT;
 1856                         break;
 1857                 }
 1858 
 1859                 umtxq_lock(&uq->uq_key);
 1860                 /*
 1861                  * We set the contested bit, sleep. Otherwise the lock changed
 1862                  * and we need to retry or we lost a race to the thread
 1863                  * unlocking the umtx.
 1864                  */
 1865                 if (old == owner)
 1866                         error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
 1867                                  "umtxpi", timo);
 1868                 else {
 1869                         umtxq_unbusy(&uq->uq_key);
 1870                         umtxq_unlock(&uq->uq_key);
 1871                 }
 1872         }
 1873 
 1874         umtxq_lock(&uq->uq_key);
 1875         umtx_pi_unref(pi);
 1876         umtxq_unlock(&uq->uq_key);
 1877 
 1878         umtx_key_release(&uq->uq_key);
 1879         return (error);
 1880 }
 1881 
 1882 /*
 1883  * Unlock a PI mutex.
 1884  */
 1885 static int
 1886 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
 1887 {
 1888         struct umtx_key key;
 1889         struct umtx_q *uq_first, *uq_first2, *uq_me;
 1890         struct umtx_pi *pi, *pi2;
 1891         uint32_t owner, old, id;
 1892         int error;
 1893         int count;
 1894         int pri;
 1895 
 1896         id = td->td_tid;
 1897         /*
 1898          * Make sure we own this mtx.
 1899          */
 1900         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1901         if (owner == -1)
 1902                 return (EFAULT);
 1903 
 1904         if ((owner & ~UMUTEX_CONTESTED) != id)
 1905                 return (EPERM);
 1906 
 1907         /* This should be done in userland */
 1908         if ((owner & UMUTEX_CONTESTED) == 0) {
 1909                 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 1910                 if (old == -1)
 1911                         return (EFAULT);
 1912                 if (old == owner)
 1913                         return (0);
 1914                 owner = old;
 1915         }
 1916 
 1917         /* We should only ever be in here for contested locks */
 1918         if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 1919             &key)) != 0)
 1920                 return (error);
 1921 
 1922         umtxq_lock(&key);
 1923         umtxq_busy(&key);
 1924         count = umtxq_count_pi(&key, &uq_first);
 1925         if (uq_first != NULL) {
 1926                 mtx_lock_spin(&umtx_lock);
 1927                 pi = uq_first->uq_pi_blocked;
 1928                 KASSERT(pi != NULL, ("pi == NULL?"));
 1929                 if (pi->pi_owner != curthread) {
 1930                         mtx_unlock_spin(&umtx_lock);
 1931                         umtxq_unbusy(&key);
 1932                         umtxq_unlock(&key);
 1933                         umtx_key_release(&key);
 1934                         /* userland messed the mutex */
 1935                         return (EPERM);
 1936                 }
 1937                 uq_me = curthread->td_umtxq;
 1938                 pi->pi_owner = NULL;
 1939                 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
 1940                 /* get highest priority thread which is still sleeping. */
 1941                 uq_first = TAILQ_FIRST(&pi->pi_blocked);
 1942                 while (uq_first != NULL && 
 1943                        (uq_first->uq_flags & UQF_UMTXQ) == 0) {
 1944                         uq_first = TAILQ_NEXT(uq_first, uq_lockq);
 1945                 }
 1946                 pri = PRI_MAX;
 1947                 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
 1948                         uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
 1949                         if (uq_first2 != NULL) {
 1950                                 if (pri > UPRI(uq_first2->uq_thread))
 1951                                         pri = UPRI(uq_first2->uq_thread);
 1952                         }
 1953                 }
 1954                 thread_lock(curthread);
 1955                 sched_unlend_user_prio(curthread, pri);
 1956                 thread_unlock(curthread);
 1957                 mtx_unlock_spin(&umtx_lock);
 1958                 if (uq_first)
 1959                         umtxq_signal_thread(uq_first);
 1960         }
 1961         umtxq_unlock(&key);
 1962 
 1963         /*
 1964          * When unlocking the umtx, it must be marked as unowned if
 1965          * there is zero or one thread only waiting for it.
 1966          * Otherwise, it must be marked as contested.
 1967          */
 1968         old = casuword32(&m->m_owner, owner,
 1969                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1970 
 1971         umtxq_lock(&key);
 1972         umtxq_unbusy(&key);
 1973         umtxq_unlock(&key);
 1974         umtx_key_release(&key);
 1975         if (old == -1)
 1976                 return (EFAULT);
 1977         if (old != owner)
 1978                 return (EINVAL);
 1979         return (0);
 1980 }
 1981 
 1982 /*
 1983  * Lock a PP mutex.
 1984  */
 1985 static int
 1986 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
 1987         int try)
 1988 {
 1989         struct umtx_q *uq, *uq2;
 1990         struct umtx_pi *pi;
 1991         uint32_t ceiling;
 1992         uint32_t owner, id;
 1993         int error, pri, old_inherited_pri, su;
 1994 
 1995         id = td->td_tid;
 1996         uq = td->td_umtxq;
 1997         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 1998             &uq->uq_key)) != 0)
 1999                 return (error);
 2000         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2001         for (;;) {
 2002                 old_inherited_pri = uq->uq_inherited_pri;
 2003                 umtxq_lock(&uq->uq_key);
 2004                 umtxq_busy(&uq->uq_key);
 2005                 umtxq_unlock(&uq->uq_key);
 2006 
 2007                 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
 2008                 if (ceiling > RTP_PRIO_MAX) {
 2009                         error = EINVAL;
 2010                         goto out;
 2011                 }
 2012 
 2013                 mtx_lock_spin(&umtx_lock);
 2014                 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
 2015                         mtx_unlock_spin(&umtx_lock);
 2016                         error = EINVAL;
 2017                         goto out;
 2018                 }
 2019                 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
 2020                         uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
 2021                         thread_lock(td);
 2022                         if (uq->uq_inherited_pri < UPRI(td))
 2023                                 sched_lend_user_prio(td, uq->uq_inherited_pri);
 2024                         thread_unlock(td);
 2025                 }
 2026                 mtx_unlock_spin(&umtx_lock);
 2027 
 2028                 owner = casuword32(&m->m_owner,
 2029                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 2030 
 2031                 if (owner == UMUTEX_CONTESTED) {
 2032                         error = 0;
 2033                         break;
 2034                 }
 2035 
 2036                 /* The address was invalid. */
 2037                 if (owner == -1) {
 2038                         error = EFAULT;
 2039                         break;
 2040                 }
 2041 
 2042                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 2043                     (owner & ~UMUTEX_CONTESTED) == id) {
 2044                         error = EDEADLK;
 2045                         break;
 2046                 }
 2047 
 2048                 if (try != 0) {
 2049                         error = EBUSY;
 2050                         break;
 2051                 }
 2052 
 2053                 /*
 2054                  * If we caught a signal, we have retried and now
 2055                  * exit immediately.
 2056                  */
 2057                 if (error != 0)
 2058                         break;
 2059 
 2060                 umtxq_lock(&uq->uq_key);
 2061                 umtxq_insert(uq);
 2062                 umtxq_unbusy(&uq->uq_key);
 2063                 error = umtxq_sleep(uq, "umtxpp", timo);
 2064                 umtxq_remove(uq);
 2065                 umtxq_unlock(&uq->uq_key);
 2066 
 2067                 mtx_lock_spin(&umtx_lock);
 2068                 uq->uq_inherited_pri = old_inherited_pri;
 2069                 pri = PRI_MAX;
 2070                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2071                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2072                         if (uq2 != NULL) {
 2073                                 if (pri > UPRI(uq2->uq_thread))
 2074                                         pri = UPRI(uq2->uq_thread);
 2075                         }
 2076                 }
 2077                 if (pri > uq->uq_inherited_pri)
 2078                         pri = uq->uq_inherited_pri;
 2079                 thread_lock(td);
 2080                 sched_unlend_user_prio(td, pri);
 2081                 thread_unlock(td);
 2082                 mtx_unlock_spin(&umtx_lock);
 2083         }
 2084 
 2085         if (error != 0) {
 2086                 mtx_lock_spin(&umtx_lock);
 2087                 uq->uq_inherited_pri = old_inherited_pri;
 2088                 pri = PRI_MAX;
 2089                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2090                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2091                         if (uq2 != NULL) {
 2092                                 if (pri > UPRI(uq2->uq_thread))
 2093                                         pri = UPRI(uq2->uq_thread);
 2094                         }
 2095                 }
 2096                 if (pri > uq->uq_inherited_pri)
 2097                         pri = uq->uq_inherited_pri;
 2098                 thread_lock(td);
 2099                 sched_unlend_user_prio(td, pri);
 2100                 thread_unlock(td);
 2101                 mtx_unlock_spin(&umtx_lock);
 2102         }
 2103 
 2104 out:
 2105         umtxq_lock(&uq->uq_key);
 2106         umtxq_unbusy(&uq->uq_key);
 2107         umtxq_unlock(&uq->uq_key);
 2108         umtx_key_release(&uq->uq_key);
 2109         return (error);
 2110 }
 2111 
 2112 /*
 2113  * Unlock a PP mutex.
 2114  */
 2115 static int
 2116 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
 2117 {
 2118         struct umtx_key key;
 2119         struct umtx_q *uq, *uq2;
 2120         struct umtx_pi *pi;
 2121         uint32_t owner, id;
 2122         uint32_t rceiling;
 2123         int error, pri, new_inherited_pri, su;
 2124 
 2125         id = td->td_tid;
 2126         uq = td->td_umtxq;
 2127         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2128 
 2129         /*
 2130          * Make sure we own this mtx.
 2131          */
 2132         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 2133         if (owner == -1)
 2134                 return (EFAULT);
 2135 
 2136         if ((owner & ~UMUTEX_CONTESTED) != id)
 2137                 return (EPERM);
 2138 
 2139         error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
 2140         if (error != 0)
 2141                 return (error);
 2142 
 2143         if (rceiling == -1)
 2144                 new_inherited_pri = PRI_MAX;
 2145         else {
 2146                 rceiling = RTP_PRIO_MAX - rceiling;
 2147                 if (rceiling > RTP_PRIO_MAX)
 2148                         return (EINVAL);
 2149                 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
 2150         }
 2151 
 2152         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2153             &key)) != 0)
 2154                 return (error);
 2155         umtxq_lock(&key);
 2156         umtxq_busy(&key);
 2157         umtxq_unlock(&key);
 2158         /*
 2159          * For priority protected mutex, always set unlocked state
 2160          * to UMUTEX_CONTESTED, so that userland always enters kernel
 2161          * to lock the mutex, it is necessary because thread priority
 2162          * has to be adjusted for such mutex.
 2163          */
 2164         error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 2165                 UMUTEX_CONTESTED);
 2166 
 2167         umtxq_lock(&key);
 2168         if (error == 0)
 2169                 umtxq_signal(&key, 1);
 2170         umtxq_unbusy(&key);
 2171         umtxq_unlock(&key);
 2172 
 2173         if (error == -1)
 2174                 error = EFAULT;
 2175         else {
 2176                 mtx_lock_spin(&umtx_lock);
 2177                 if (su != 0)
 2178                         uq->uq_inherited_pri = new_inherited_pri;
 2179                 pri = PRI_MAX;
 2180                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2181                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2182                         if (uq2 != NULL) {
 2183                                 if (pri > UPRI(uq2->uq_thread))
 2184                                         pri = UPRI(uq2->uq_thread);
 2185                         }
 2186                 }
 2187                 if (pri > uq->uq_inherited_pri)
 2188                         pri = uq->uq_inherited_pri;
 2189                 thread_lock(td);
 2190                 sched_unlend_user_prio(td, pri);
 2191                 thread_unlock(td);
 2192                 mtx_unlock_spin(&umtx_lock);
 2193         }
 2194         umtx_key_release(&key);
 2195         return (error);
 2196 }
 2197 
 2198 static int
 2199 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
 2200         uint32_t *old_ceiling)
 2201 {
 2202         struct umtx_q *uq;
 2203         uint32_t save_ceiling;
 2204         uint32_t owner, id;
 2205         uint32_t flags;
 2206         int error;
 2207 
 2208         flags = fuword32(&m->m_flags);
 2209         if ((flags & UMUTEX_PRIO_PROTECT) == 0)
 2210                 return (EINVAL);
 2211         if (ceiling > RTP_PRIO_MAX)
 2212                 return (EINVAL);
 2213         id = td->td_tid;
 2214         uq = td->td_umtxq;
 2215         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2216            &uq->uq_key)) != 0)
 2217                 return (error);
 2218         for (;;) {
 2219                 umtxq_lock(&uq->uq_key);
 2220                 umtxq_busy(&uq->uq_key);
 2221                 umtxq_unlock(&uq->uq_key);
 2222 
 2223                 save_ceiling = fuword32(&m->m_ceilings[0]);
 2224 
 2225                 owner = casuword32(&m->m_owner,
 2226                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 2227 
 2228                 if (owner == UMUTEX_CONTESTED) {
 2229                         suword32(&m->m_ceilings[0], ceiling);
 2230                         suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 2231                                 UMUTEX_CONTESTED);
 2232                         error = 0;
 2233                         break;
 2234                 }
 2235 
 2236                 /* The address was invalid. */
 2237                 if (owner == -1) {
 2238                         error = EFAULT;
 2239                         break;
 2240                 }
 2241 
 2242                 if ((owner & ~UMUTEX_CONTESTED) == id) {
 2243                         suword32(&m->m_ceilings[0], ceiling);
 2244                         error = 0;
 2245                         break;
 2246                 }
 2247 
 2248                 /*
 2249                  * If we caught a signal, we have retried and now
 2250                  * exit immediately.
 2251                  */
 2252                 if (error != 0)
 2253                         break;
 2254 
 2255                 /*
 2256                  * We set the contested bit, sleep. Otherwise the lock changed
 2257                  * and we need to retry or we lost a race to the thread
 2258                  * unlocking the umtx.
 2259                  */
 2260                 umtxq_lock(&uq->uq_key);
 2261                 umtxq_insert(uq);
 2262                 umtxq_unbusy(&uq->uq_key);
 2263                 error = umtxq_sleep(uq, "umtxpp", 0);
 2264                 umtxq_remove(uq);
 2265                 umtxq_unlock(&uq->uq_key);
 2266         }
 2267         umtxq_lock(&uq->uq_key);
 2268         if (error == 0)
 2269                 umtxq_signal(&uq->uq_key, INT_MAX);
 2270         umtxq_unbusy(&uq->uq_key);
 2271         umtxq_unlock(&uq->uq_key);
 2272         umtx_key_release(&uq->uq_key);
 2273         if (error == 0 && old_ceiling != NULL)
 2274                 suword32(old_ceiling, save_ceiling);
 2275         return (error);
 2276 }
 2277 
 2278 static int
 2279 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
 2280         int mode)
 2281 {
 2282         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2283         case 0:
 2284                 return (_do_lock_normal(td, m, flags, timo, mode));
 2285         case UMUTEX_PRIO_INHERIT:
 2286                 return (_do_lock_pi(td, m, flags, timo, mode));
 2287         case UMUTEX_PRIO_PROTECT:
 2288                 return (_do_lock_pp(td, m, flags, timo, mode));
 2289         }
 2290         return (EINVAL);
 2291 }
 2292 
 2293 /*
 2294  * Lock a userland POSIX mutex.
 2295  */
 2296 static int
 2297 do_lock_umutex(struct thread *td, struct umutex *m,
 2298         struct timespec *timeout, int mode)
 2299 {
 2300         struct timespec ts, ts2, ts3;
 2301         struct timeval tv;
 2302         uint32_t flags;
 2303         int error;
 2304 
 2305         flags = fuword32(&m->m_flags);
 2306         if (flags == -1)
 2307                 return (EFAULT);
 2308 
 2309         if (timeout == NULL) {
 2310                 error = _do_lock_umutex(td, m, flags, 0, mode);
 2311                 /* Mutex locking is restarted if it is interrupted. */
 2312                 if (error == EINTR && mode != _UMUTEX_WAIT)
 2313                         error = ERESTART;
 2314         } else {
 2315                 getnanouptime(&ts);
 2316                 timespecadd(&ts, timeout);
 2317                 TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2318                 for (;;) {
 2319                         error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
 2320                         if (error != ETIMEDOUT)
 2321                                 break;
 2322                         getnanouptime(&ts2);
 2323                         if (timespeccmp(&ts2, &ts, >=)) {
 2324                                 error = ETIMEDOUT;
 2325                                 break;
 2326                         }
 2327                         ts3 = ts;
 2328                         timespecsub(&ts3, &ts2);
 2329                         TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 2330                 }
 2331                 /* Timed-locking is not restarted. */
 2332                 if (error == ERESTART)
 2333                         error = EINTR;
 2334         }
 2335         return (error);
 2336 }
 2337 
 2338 /*
 2339  * Unlock a userland POSIX mutex.
 2340  */
 2341 static int
 2342 do_unlock_umutex(struct thread *td, struct umutex *m)
 2343 {
 2344         uint32_t flags;
 2345 
 2346         flags = fuword32(&m->m_flags);
 2347         if (flags == -1)
 2348                 return (EFAULT);
 2349 
 2350         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2351         case 0:
 2352                 return (do_unlock_normal(td, m, flags));
 2353         case UMUTEX_PRIO_INHERIT:
 2354                 return (do_unlock_pi(td, m, flags));
 2355         case UMUTEX_PRIO_PROTECT:
 2356                 return (do_unlock_pp(td, m, flags));
 2357         }
 2358 
 2359         return (EINVAL);
 2360 }
 2361 
 2362 static int
 2363 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
 2364         struct timespec *timeout, u_long wflags)
 2365 {
 2366         struct umtx_q *uq;
 2367         struct timeval tv;
 2368         struct timespec cts, ets, tts;
 2369         uint32_t flags;
 2370         int error;
 2371 
 2372         uq = td->td_umtxq;
 2373         flags = fuword32(&cv->c_flags);
 2374         error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
 2375         if (error != 0)
 2376                 return (error);
 2377         umtxq_lock(&uq->uq_key);
 2378         umtxq_busy(&uq->uq_key);
 2379         umtxq_insert(uq);
 2380         umtxq_unlock(&uq->uq_key);
 2381 
 2382         /*
 2383          * The magic thing is we should set c_has_waiters to 1 before
 2384          * releasing user mutex.
 2385          */
 2386         suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
 2387 
 2388         umtxq_lock(&uq->uq_key);
 2389         umtxq_unbusy(&uq->uq_key);
 2390         umtxq_unlock(&uq->uq_key);
 2391 
 2392         error = do_unlock_umutex(td, m);
 2393         
 2394         umtxq_lock(&uq->uq_key);
 2395         if (error == 0) {
 2396                 if ((wflags & UMTX_CHECK_UNPARKING) &&
 2397                     (td->td_pflags & TDP_WAKEUP)) {
 2398                         td->td_pflags &= ~TDP_WAKEUP;
 2399                         error = EINTR;
 2400                 } else if (timeout == NULL) {
 2401                         error = umtxq_sleep(uq, "ucond", 0);
 2402                 } else {
 2403                         getnanouptime(&ets);
 2404                         timespecadd(&ets, timeout);
 2405                         TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2406                         for (;;) {
 2407                                 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
 2408                                 if (error != ETIMEDOUT)
 2409                                         break;
 2410                                 getnanouptime(&cts);
 2411                                 if (timespeccmp(&cts, &ets, >=)) {
 2412                                         error = ETIMEDOUT;
 2413                                         break;
 2414                                 }
 2415                                 tts = ets;
 2416                                 timespecsub(&tts, &cts);
 2417                                 TIMESPEC_TO_TIMEVAL(&tv, &tts);
 2418                         }
 2419                 }
 2420         }
 2421 
 2422         if (error != 0) {
 2423                 if ((uq->uq_flags & UQF_UMTXQ) == 0) {
 2424                         /*
 2425                          * If we concurrently got do_cv_signal()d
 2426                          * and we got an error or UNIX signals or a timeout,
 2427                          * then, perform another umtxq_signal to avoid
 2428                          * consuming the wakeup. This may cause supurious
 2429                          * wakeup for another thread which was just queued,
 2430                          * but SUSV3 explicitly allows supurious wakeup to
 2431                          * occur, and indeed a kernel based implementation
 2432                          * can not avoid it.
 2433                          */ 
 2434                         if (!umtxq_signal(&uq->uq_key, 1))
 2435                                 error = 0;
 2436                 }
 2437                 if (error == ERESTART)
 2438                         error = EINTR;
 2439         }
 2440         umtxq_remove(uq);
 2441         umtxq_unlock(&uq->uq_key);
 2442         umtx_key_release(&uq->uq_key);
 2443         return (error);
 2444 }
 2445 
 2446 /*
 2447  * Signal a userland condition variable.
 2448  */
 2449 static int
 2450 do_cv_signal(struct thread *td, struct ucond *cv)
 2451 {
 2452         struct umtx_key key;
 2453         int error, cnt, nwake;
 2454         uint32_t flags;
 2455 
 2456         flags = fuword32(&cv->c_flags);
 2457         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2458                 return (error); 
 2459         umtxq_lock(&key);
 2460         umtxq_busy(&key);
 2461         cnt = umtxq_count(&key);
 2462         nwake = umtxq_signal(&key, 1);
 2463         if (cnt <= nwake) {
 2464                 umtxq_unlock(&key);
 2465                 error = suword32(
 2466                     __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 2467                 umtxq_lock(&key);
 2468         }
 2469         umtxq_unbusy(&key);
 2470         umtxq_unlock(&key);
 2471         umtx_key_release(&key);
 2472         return (error);
 2473 }
 2474 
 2475 static int
 2476 do_cv_broadcast(struct thread *td, struct ucond *cv)
 2477 {
 2478         struct umtx_key key;
 2479         int error;
 2480         uint32_t flags;
 2481 
 2482         flags = fuword32(&cv->c_flags);
 2483         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2484                 return (error); 
 2485 
 2486         umtxq_lock(&key);
 2487         umtxq_busy(&key);
 2488         umtxq_signal(&key, INT_MAX);
 2489         umtxq_unlock(&key);
 2490 
 2491         error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 2492 
 2493         umtxq_lock(&key);
 2494         umtxq_unbusy(&key);
 2495         umtxq_unlock(&key);
 2496 
 2497         umtx_key_release(&key);
 2498         return (error);
 2499 }
 2500 
 2501 static int
 2502 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
 2503 {
 2504         struct umtx_q *uq;
 2505         uint32_t flags, wrflags;
 2506         int32_t state, oldstate;
 2507         int32_t blocked_readers;
 2508         int error;
 2509 
 2510         uq = td->td_umtxq;
 2511         flags = fuword32(&rwlock->rw_flags);
 2512         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2513         if (error != 0)
 2514                 return (error);
 2515 
 2516         wrflags = URWLOCK_WRITE_OWNER;
 2517         if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
 2518                 wrflags |= URWLOCK_WRITE_WAITERS;
 2519 
 2520         for (;;) {
 2521                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2522                 /* try to lock it */
 2523                 while (!(state & wrflags)) {
 2524                         if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
 2525                                 umtx_key_release(&uq->uq_key);
 2526                                 return (EAGAIN);
 2527                         }
 2528                         oldstate = casuword32(&rwlock->rw_state, state, state + 1);
 2529                         if (oldstate == state) {
 2530                                 umtx_key_release(&uq->uq_key);
 2531                                 return (0);
 2532                         }
 2533                         state = oldstate;
 2534                 }
 2535 
 2536                 if (error)
 2537                         break;
 2538 
 2539                 /* grab monitor lock */
 2540                 umtxq_lock(&uq->uq_key);
 2541                 umtxq_busy(&uq->uq_key);
 2542                 umtxq_unlock(&uq->uq_key);
 2543 
 2544                 /*
 2545                  * re-read the state, in case it changed between the try-lock above
 2546                  * and the check below
 2547                  */
 2548                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2549 
 2550                 /* set read contention bit */
 2551                 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
 2552                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
 2553                         if (oldstate == state)
 2554                                 goto sleep;
 2555                         state = oldstate;
 2556                 }
 2557 
 2558                 /* state is changed while setting flags, restart */
 2559                 if (!(state & wrflags)) {
 2560                         umtxq_lock(&uq->uq_key);
 2561                         umtxq_unbusy(&uq->uq_key);
 2562                         umtxq_unlock(&uq->uq_key);
 2563                         continue;
 2564                 }
 2565 
 2566 sleep:
 2567                 /* contention bit is set, before sleeping, increase read waiter count */
 2568                 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2569                 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
 2570 
 2571                 while (state & wrflags) {
 2572                         umtxq_lock(&uq->uq_key);
 2573                         umtxq_insert(uq);
 2574                         umtxq_unbusy(&uq->uq_key);
 2575 
 2576                         error = umtxq_sleep(uq, "urdlck", timo);
 2577 
 2578                         umtxq_busy(&uq->uq_key);
 2579                         umtxq_remove(uq);
 2580                         umtxq_unlock(&uq->uq_key);
 2581                         if (error)
 2582                                 break;
 2583                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2584                 }
 2585 
 2586                 /* decrease read waiter count, and may clear read contention bit */
 2587                 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2588                 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
 2589                 if (blocked_readers == 1) {
 2590                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2591                         for (;;) {
 2592                                 oldstate = casuword32(&rwlock->rw_state, state,
 2593                                          state & ~URWLOCK_READ_WAITERS);
 2594                                 if (oldstate == state)
 2595                                         break;
 2596                                 state = oldstate;
 2597                         }
 2598                 }
 2599 
 2600                 umtxq_lock(&uq->uq_key);
 2601                 umtxq_unbusy(&uq->uq_key);
 2602                 umtxq_unlock(&uq->uq_key);
 2603         }
 2604         umtx_key_release(&uq->uq_key);
 2605         return (error);
 2606 }
 2607 
 2608 static int
 2609 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
 2610 {
 2611         struct timespec ts, ts2, ts3;
 2612         struct timeval tv;
 2613         int error;
 2614 
 2615         getnanouptime(&ts);
 2616         timespecadd(&ts, timeout);
 2617         TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2618         for (;;) {
 2619                 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
 2620                 if (error != ETIMEDOUT)
 2621                         break;
 2622                 getnanouptime(&ts2);
 2623                 if (timespeccmp(&ts2, &ts, >=)) {
 2624                         error = ETIMEDOUT;
 2625                         break;
 2626                 }
 2627                 ts3 = ts;
 2628                 timespecsub(&ts3, &ts2);
 2629                 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 2630         }
 2631         if (error == ERESTART)
 2632                 error = EINTR;
 2633         return (error);
 2634 }
 2635 
 2636 static int
 2637 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
 2638 {
 2639         struct umtx_q *uq;
 2640         uint32_t flags;
 2641         int32_t state, oldstate;
 2642         int32_t blocked_writers;
 2643         int32_t blocked_readers;
 2644         int error;
 2645 
 2646         uq = td->td_umtxq;
 2647         flags = fuword32(&rwlock->rw_flags);
 2648         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2649         if (error != 0)
 2650                 return (error);
 2651 
 2652         blocked_readers = 0;
 2653         for (;;) {
 2654                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2655                 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 2656                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
 2657                         if (oldstate == state) {
 2658                                 umtx_key_release(&uq->uq_key);
 2659                                 return (0);
 2660                         }
 2661                         state = oldstate;
 2662                 }
 2663 
 2664                 if (error) {
 2665                         if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
 2666                             blocked_readers != 0) {
 2667                                 umtxq_lock(&uq->uq_key);
 2668                                 umtxq_busy(&uq->uq_key);
 2669                                 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
 2670                                 umtxq_unbusy(&uq->uq_key);
 2671                                 umtxq_unlock(&uq->uq_key);
 2672                         }
 2673 
 2674                         break;
 2675                 }
 2676 
 2677                 /* grab monitor lock */
 2678                 umtxq_lock(&uq->uq_key);
 2679                 umtxq_busy(&uq->uq_key);
 2680                 umtxq_unlock(&uq->uq_key);
 2681 
 2682                 /*
 2683                  * re-read the state, in case it changed between the try-lock above
 2684                  * and the check below
 2685                  */
 2686                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2687 
 2688                 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
 2689                        (state & URWLOCK_WRITE_WAITERS) == 0) {
 2690                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
 2691                         if (oldstate == state)
 2692                                 goto sleep;
 2693                         state = oldstate;
 2694                 }
 2695 
 2696                 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 2697                         umtxq_lock(&uq->uq_key);
 2698                         umtxq_unbusy(&uq->uq_key);
 2699                         umtxq_unlock(&uq->uq_key);
 2700                         continue;
 2701                 }
 2702 sleep:
 2703                 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 2704                 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
 2705 
 2706                 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
 2707                         umtxq_lock(&uq->uq_key);
 2708                         umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 2709                         umtxq_unbusy(&uq->uq_key);
 2710 
 2711                         error = umtxq_sleep(uq, "uwrlck", timo);
 2712 
 2713                         umtxq_busy(&uq->uq_key);
 2714                         umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 2715                         umtxq_unlock(&uq->uq_key);
 2716                         if (error)
 2717                                 break;
 2718                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2719                 }
 2720 
 2721                 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 2722                 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
 2723                 if (blocked_writers == 1) {
 2724                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2725                         for (;;) {
 2726                                 oldstate = casuword32(&rwlock->rw_state, state,
 2727                                          state & ~URWLOCK_WRITE_WAITERS);
 2728                                 if (oldstate == state)
 2729                                         break;
 2730                                 state = oldstate;
 2731                         }
 2732                         blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2733                 } else
 2734                         blocked_readers = 0;
 2735 
 2736                 umtxq_lock(&uq->uq_key);
 2737                 umtxq_unbusy(&uq->uq_key);
 2738                 umtxq_unlock(&uq->uq_key);
 2739         }
 2740 
 2741         umtx_key_release(&uq->uq_key);
 2742         return (error);
 2743 }
 2744 
 2745 static int
 2746 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
 2747 {
 2748         struct timespec ts, ts2, ts3;
 2749         struct timeval tv;
 2750         int error;
 2751 
 2752         getnanouptime(&ts);
 2753         timespecadd(&ts, timeout);
 2754         TIMESPEC_TO_TIMEVAL(&tv, timeout);
 2755         for (;;) {
 2756                 error = do_rw_wrlock(td, obj, tvtohz(&tv));
 2757                 if (error != ETIMEDOUT)
 2758                         break;
 2759                 getnanouptime(&ts2);
 2760                 if (timespeccmp(&ts2, &ts, >=)) {
 2761                         error = ETIMEDOUT;
 2762                         break;
 2763                 }
 2764                 ts3 = ts;
 2765                 timespecsub(&ts3, &ts2);
 2766                 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
 2767         }
 2768         if (error == ERESTART)
 2769                 error = EINTR;
 2770         return (error);
 2771 }
 2772 
 2773 static int
 2774 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
 2775 {
 2776         struct umtx_q *uq;
 2777         uint32_t flags;
 2778         int32_t state, oldstate;
 2779         int error, q, count;
 2780 
 2781         uq = td->td_umtxq;
 2782         flags = fuword32(&rwlock->rw_flags);
 2783         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2784         if (error != 0)
 2785                 return (error);
 2786 
 2787         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2788         if (state & URWLOCK_WRITE_OWNER) {
 2789                 for (;;) {
 2790                         oldstate = casuword32(&rwlock->rw_state, state, 
 2791                                 state & ~URWLOCK_WRITE_OWNER);
 2792                         if (oldstate != state) {
 2793                                 state = oldstate;
 2794                                 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
 2795                                         error = EPERM;
 2796                                         goto out;
 2797                                 }
 2798                         } else
 2799                                 break;
 2800                 }
 2801         } else if (URWLOCK_READER_COUNT(state) != 0) {
 2802                 for (;;) {
 2803                         oldstate = casuword32(&rwlock->rw_state, state,
 2804                                 state - 1);
 2805                         if (oldstate != state) {
 2806                                 state = oldstate;
 2807                                 if (URWLOCK_READER_COUNT(oldstate) == 0) {
 2808                                         error = EPERM;
 2809                                         goto out;
 2810                                 }
 2811                         }
 2812                         else
 2813                                 break;
 2814                 }
 2815         } else {
 2816                 error = EPERM;
 2817                 goto out;
 2818         }
 2819 
 2820         count = 0;
 2821 
 2822         if (!(flags & URWLOCK_PREFER_READER)) {
 2823                 if (state & URWLOCK_WRITE_WAITERS) {
 2824                         count = 1;
 2825                         q = UMTX_EXCLUSIVE_QUEUE;
 2826                 } else if (state & URWLOCK_READ_WAITERS) {
 2827                         count = INT_MAX;
 2828                         q = UMTX_SHARED_QUEUE;
 2829                 }
 2830         } else {
 2831                 if (state & URWLOCK_READ_WAITERS) {
 2832                         count = INT_MAX;
 2833                         q = UMTX_SHARED_QUEUE;
 2834                 } else if (state & URWLOCK_WRITE_WAITERS) {
 2835                         count = 1;
 2836                         q = UMTX_EXCLUSIVE_QUEUE;
 2837                 }
 2838         }
 2839 
 2840         if (count) {
 2841                 umtxq_lock(&uq->uq_key);
 2842                 umtxq_busy(&uq->uq_key);
 2843                 umtxq_signal_queue(&uq->uq_key, count, q);
 2844                 umtxq_unbusy(&uq->uq_key);
 2845                 umtxq_unlock(&uq->uq_key);
 2846         }
 2847 out:
 2848         umtx_key_release(&uq->uq_key);
 2849         return (error);
 2850 }
 2851 
 2852 int
 2853 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
 2854     /* struct umtx *umtx */
 2855 {
 2856         return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
 2857 }
 2858 
 2859 int
 2860 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
 2861     /* struct umtx *umtx */
 2862 {
 2863         return do_unlock_umtx(td, uap->umtx, td->td_tid);
 2864 }
 2865 
 2866 inline int
 2867 umtx_copyin_timeout(const void *addr, struct timespec *tsp)
 2868 {
 2869         int error;
 2870 
 2871         error = copyin(addr, tsp, sizeof(struct timespec));
 2872         if (error == 0) {
 2873                 if (tsp->tv_sec < 0 ||
 2874                     tsp->tv_nsec >= 1000000000 ||
 2875                     tsp->tv_nsec < 0)
 2876                         error = EINVAL;
 2877         }
 2878         return (error);
 2879 }
 2880 
 2881 static int
 2882 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
 2883 {
 2884         struct timespec *ts, timeout;
 2885         int error;
 2886 
 2887         /* Allow a null timespec (wait forever). */
 2888         if (uap->uaddr2 == NULL)
 2889                 ts = NULL;
 2890         else {
 2891                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2892                 if (error != 0)
 2893                         return (error);
 2894                 ts = &timeout;
 2895         }
 2896         return (do_lock_umtx(td, uap->obj, uap->val, ts));
 2897 }
 2898 
 2899 static int
 2900 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
 2901 {
 2902         return (do_unlock_umtx(td, uap->obj, uap->val));
 2903 }
 2904 
 2905 static int
 2906 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
 2907 {
 2908         struct timespec *ts, timeout;
 2909         int error;
 2910 
 2911         if (uap->uaddr2 == NULL)
 2912                 ts = NULL;
 2913         else {
 2914                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2915                 if (error != 0)
 2916                         return (error);
 2917                 ts = &timeout;
 2918         }
 2919         return do_wait(td, uap->obj, uap->val, ts, 0, 0);
 2920 }
 2921 
 2922 static int
 2923 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
 2924 {
 2925         struct timespec *ts, timeout;
 2926         int error;
 2927 
 2928         if (uap->uaddr2 == NULL)
 2929                 ts = NULL;
 2930         else {
 2931                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2932                 if (error != 0)
 2933                         return (error);
 2934                 ts = &timeout;
 2935         }
 2936         return do_wait(td, uap->obj, uap->val, ts, 1, 0);
 2937 }
 2938 
 2939 static int
 2940 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
 2941 {
 2942         struct timespec *ts, timeout;
 2943         int error;
 2944 
 2945         if (uap->uaddr2 == NULL)
 2946                 ts = NULL;
 2947         else {
 2948                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2949                 if (error != 0)
 2950                         return (error);
 2951                 ts = &timeout;
 2952         }
 2953         return do_wait(td, uap->obj, uap->val, ts, 1, 1);
 2954 }
 2955 
 2956 static int
 2957 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
 2958 {
 2959         return (kern_umtx_wake(td, uap->obj, uap->val, 0));
 2960 }
 2961 
 2962 static int
 2963 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
 2964 {
 2965         return (kern_umtx_wake(td, uap->obj, uap->val, 1));
 2966 }
 2967 
 2968 static int
 2969 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
 2970 {
 2971         struct timespec *ts, timeout;
 2972         int error;
 2973 
 2974         /* Allow a null timespec (wait forever). */
 2975         if (uap->uaddr2 == NULL)
 2976                 ts = NULL;
 2977         else {
 2978                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 2979                 if (error != 0)
 2980                         return (error);
 2981                 ts = &timeout;
 2982         }
 2983         return do_lock_umutex(td, uap->obj, ts, 0);
 2984 }
 2985 
 2986 static int
 2987 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
 2988 {
 2989         return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
 2990 }
 2991 
 2992 static int
 2993 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
 2994 {
 2995         struct timespec *ts, timeout;
 2996         int error;
 2997 
 2998         /* Allow a null timespec (wait forever). */
 2999         if (uap->uaddr2 == NULL)
 3000                 ts = NULL;
 3001         else {
 3002                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3003                 if (error != 0)
 3004                         return (error);
 3005                 ts = &timeout;
 3006         }
 3007         return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
 3008 }
 3009 
 3010 static int
 3011 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
 3012 {
 3013         return do_wake_umutex(td, uap->obj);
 3014 }
 3015 
 3016 static int
 3017 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
 3018 {
 3019         return do_unlock_umutex(td, uap->obj);
 3020 }
 3021 
 3022 static int
 3023 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
 3024 {
 3025         return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
 3026 }
 3027 
 3028 static int
 3029 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
 3030 {
 3031         struct timespec *ts, timeout;
 3032         int error;
 3033 
 3034         /* Allow a null timespec (wait forever). */
 3035         if (uap->uaddr2 == NULL)
 3036                 ts = NULL;
 3037         else {
 3038                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3039                 if (error != 0)
 3040                         return (error);
 3041                 ts = &timeout;
 3042         }
 3043         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 3044 }
 3045 
 3046 static int
 3047 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
 3048 {
 3049         return do_cv_signal(td, uap->obj);
 3050 }
 3051 
 3052 static int
 3053 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
 3054 {
 3055         return do_cv_broadcast(td, uap->obj);
 3056 }
 3057 
 3058 static int
 3059 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
 3060 {
 3061         struct timespec timeout;
 3062         int error;
 3063 
 3064         /* Allow a null timespec (wait forever). */
 3065         if (uap->uaddr2 == NULL) {
 3066                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 3067         } else {
 3068                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3069                 if (error != 0)
 3070                         return (error);
 3071                 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
 3072         }
 3073         return (error);
 3074 }
 3075 
 3076 static int
 3077 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
 3078 {
 3079         struct timespec timeout;
 3080         int error;
 3081 
 3082         /* Allow a null timespec (wait forever). */
 3083         if (uap->uaddr2 == NULL) {
 3084                 error = do_rw_wrlock(td, uap->obj, 0);
 3085         } else {
 3086                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3087                 if (error != 0)
 3088                         return (error);
 3089 
 3090                 error = do_rw_wrlock2(td, uap->obj, &timeout);
 3091         }
 3092         return (error);
 3093 }
 3094 
 3095 static int
 3096 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
 3097 {
 3098         return do_rw_unlock(td, uap->obj);
 3099 }
 3100 
 3101 static int
 3102 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
 3103 {
 3104         return do_wake2_umutex(td, uap->obj, uap->val);
 3105 }
 3106 
 3107 static int
 3108 __umtx_op_not_sup(struct thread *td __unused, struct _umtx_op_args *uap __unused)
 3109 {
 3110         return ENOTSUP;
 3111 }
 3112 
 3113 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
 3114 
 3115 static _umtx_op_func op_table[] = {
 3116         __umtx_op_lock_umtx,            /* UMTX_OP_LOCK */
 3117         __umtx_op_unlock_umtx,          /* UMTX_OP_UNLOCK */
 3118         __umtx_op_wait,                 /* UMTX_OP_WAIT */
 3119         __umtx_op_wake,                 /* UMTX_OP_WAKE */
 3120         __umtx_op_trylock_umutex,       /* UMTX_OP_MUTEX_TRYLOCK */
 3121         __umtx_op_lock_umutex,          /* UMTX_OP_MUTEX_LOCK */
 3122         __umtx_op_unlock_umutex,        /* UMTX_OP_MUTEX_UNLOCK */
 3123         __umtx_op_set_ceiling,          /* UMTX_OP_SET_CEILING */
 3124         __umtx_op_cv_wait,              /* UMTX_OP_CV_WAIT*/
 3125         __umtx_op_cv_signal,            /* UMTX_OP_CV_SIGNAL */
 3126         __umtx_op_cv_broadcast,         /* UMTX_OP_CV_BROADCAST */
 3127         __umtx_op_wait_uint,            /* UMTX_OP_WAIT_UINT */
 3128         __umtx_op_rw_rdlock,            /* UMTX_OP_RW_RDLOCK */
 3129         __umtx_op_rw_wrlock,            /* UMTX_OP_RW_WRLOCK */
 3130         __umtx_op_rw_unlock,            /* UMTX_OP_RW_UNLOCK */
 3131         __umtx_op_wait_uint_private,    /* UMTX_OP_WAIT_UINT_PRIVATE */
 3132         __umtx_op_wake_private,         /* UMTX_OP_WAKE_PRIVATE */
 3133         __umtx_op_wait_umutex,          /* UMTX_OP_UMUTEX_WAIT */
 3134         __umtx_op_wake_umutex,          /* UMTX_OP_UMUTEX_WAKE */
 3135         __umtx_op_not_sup,              /* UMTX_OP_SEM_WAIT */
 3136         __umtx_op_not_sup,              /* UMTX_OP_SEM_WAKE */
 3137         __umtx_op_not_sup,              /* UMTX_OP_NWAKE_PRIVATE */
 3138         __umtx_op_wake2_umutex          /* UMTX_OP_UMUTEX_WAKE2 */
 3139 };
 3140 
 3141 int
 3142 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
 3143 {
 3144         if ((unsigned)uap->op < UMTX_OP_MAX)
 3145                 return (*op_table[uap->op])(td, uap);
 3146         return (EINVAL);
 3147 }
 3148 
 3149 #ifdef COMPAT_FREEBSD32
 3150 int
 3151 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
 3152     /* struct umtx *umtx */
 3153 {
 3154         return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
 3155 }
 3156 
 3157 int
 3158 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
 3159     /* struct umtx *umtx */
 3160 {
 3161         return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
 3162 }
 3163 
 3164 struct timespec32 {
 3165         u_int32_t tv_sec;
 3166         u_int32_t tv_nsec;
 3167 };
 3168 
 3169 static inline int
 3170 umtx_copyin_timeout32(void *addr, struct timespec *tsp)
 3171 {
 3172         struct timespec32 ts32;
 3173         int error;
 3174 
 3175         error = copyin(addr, &ts32, sizeof(struct timespec32));
 3176         if (error == 0) {
 3177                 if (ts32.tv_sec < 0 ||
 3178                     ts32.tv_nsec >= 1000000000 ||
 3179                     ts32.tv_nsec < 0)
 3180                         error = EINVAL;
 3181                 else {
 3182                         tsp->tv_sec = ts32.tv_sec;
 3183                         tsp->tv_nsec = ts32.tv_nsec;
 3184                 }
 3185         }
 3186         return (error);
 3187 }
 3188 
 3189 static int
 3190 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 3191 {
 3192         struct timespec *ts, timeout;
 3193         int error;
 3194 
 3195         /* Allow a null timespec (wait forever). */
 3196         if (uap->uaddr2 == NULL)
 3197                 ts = NULL;
 3198         else {
 3199                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3200                 if (error != 0)
 3201                         return (error);
 3202                 ts = &timeout;
 3203         }
 3204         return (do_lock_umtx32(td, uap->obj, uap->val, ts));
 3205 }
 3206 
 3207 static int
 3208 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 3209 {
 3210         return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
 3211 }
 3212 
 3213 static int
 3214 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3215 {
 3216         struct timespec *ts, timeout;
 3217         int error;
 3218 
 3219         if (uap->uaddr2 == NULL)
 3220                 ts = NULL;
 3221         else {
 3222                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3223                 if (error != 0)
 3224                         return (error);
 3225                 ts = &timeout;
 3226         }
 3227         return do_wait(td, uap->obj, uap->val, ts, 1, 0);
 3228 }
 3229 
 3230 static int
 3231 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 3232 {
 3233         struct timespec *ts, timeout;
 3234         int error;
 3235 
 3236         /* Allow a null timespec (wait forever). */
 3237         if (uap->uaddr2 == NULL)
 3238                 ts = NULL;
 3239         else {
 3240                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3241                 if (error != 0)
 3242                         return (error);
 3243                 ts = &timeout;
 3244         }
 3245         return do_lock_umutex(td, uap->obj, ts, 0);
 3246 }
 3247 
 3248 static int
 3249 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 3250 {
 3251         struct timespec *ts, timeout;
 3252         int error;
 3253 
 3254         /* Allow a null timespec (wait forever). */
 3255         if (uap->uaddr2 == NULL)
 3256                 ts = NULL;
 3257         else {
 3258                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3259                 if (error != 0)
 3260                         return (error);
 3261                 ts = &timeout;
 3262         }
 3263         return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
 3264 }
 3265 
 3266 static int
 3267 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3268 {
 3269         struct timespec *ts, timeout;
 3270         int error;
 3271 
 3272         /* Allow a null timespec (wait forever). */
 3273         if (uap->uaddr2 == NULL)
 3274                 ts = NULL;
 3275         else {
 3276                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3277                 if (error != 0)
 3278                         return (error);
 3279                 ts = &timeout;
 3280         }
 3281         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 3282 }
 3283 
 3284 static int
 3285 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 3286 {
 3287         struct timespec timeout;
 3288         int error;
 3289 
 3290         /* Allow a null timespec (wait forever). */
 3291         if (uap->uaddr2 == NULL) {
 3292                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 3293         } else {
 3294                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3295                 if (error != 0)
 3296                         return (error);
 3297                 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
 3298         }
 3299         return (error);
 3300 }
 3301 
 3302 static int
 3303 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 3304 {
 3305         struct timespec timeout;
 3306         int error;
 3307 
 3308         /* Allow a null timespec (wait forever). */
 3309         if (uap->uaddr2 == NULL) {
 3310                 error = do_rw_wrlock(td, uap->obj, 0);
 3311         } else {
 3312                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3313                 if (error != 0)
 3314                         return (error);
 3315 
 3316                 error = do_rw_wrlock2(td, uap->obj, &timeout);
 3317         }
 3318         return (error);
 3319 }
 3320 
 3321 static int
 3322 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
 3323 {
 3324         struct timespec *ts, timeout;
 3325         int error;
 3326 
 3327         if (uap->uaddr2 == NULL)
 3328                 ts = NULL;
 3329         else {
 3330                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3331                 if (error != 0)
 3332                         return (error);
 3333                 ts = &timeout;
 3334         }
 3335         return do_wait(td, uap->obj, uap->val, ts, 1, 1);
 3336 }
 3337 
 3338 static _umtx_op_func op_table_compat32[] = {
 3339         __umtx_op_lock_umtx_compat32,   /* UMTX_OP_LOCK */
 3340         __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
 3341         __umtx_op_wait_compat32,        /* UMTX_OP_WAIT */
 3342         __umtx_op_wake,                 /* UMTX_OP_WAKE */
 3343         __umtx_op_trylock_umutex,       /* UMTX_OP_MUTEX_LOCK */
 3344         __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
 3345         __umtx_op_unlock_umutex,        /* UMTX_OP_MUTEX_UNLOCK */
 3346         __umtx_op_set_ceiling,          /* UMTX_OP_SET_CEILING */
 3347         __umtx_op_cv_wait_compat32,     /* UMTX_OP_CV_WAIT*/
 3348         __umtx_op_cv_signal,            /* UMTX_OP_CV_SIGNAL */
 3349         __umtx_op_cv_broadcast,         /* UMTX_OP_CV_BROADCAST */
 3350         __umtx_op_wait_compat32,        /* UMTX_OP_WAIT_UINT */
 3351         __umtx_op_rw_rdlock_compat32,   /* UMTX_OP_RW_RDLOCK */
 3352         __umtx_op_rw_wrlock_compat32,   /* UMTX_OP_RW_WRLOCK */
 3353         __umtx_op_rw_unlock,            /* UMTX_OP_RW_UNLOCK */
 3354         __umtx_op_wait_uint_private_compat32,   /* UMTX_OP_WAIT_UINT_PRIVATE */
 3355         __umtx_op_wake_private,         /* UMTX_OP_WAKE_PRIVATE */
 3356         __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
 3357         __umtx_op_wake_umutex,          /* UMTX_OP_UMUTEX_WAKE */
 3358         __umtx_op_not_sup,              /* UMTX_OP_SEM_WAIT */
 3359         __umtx_op_not_sup,              /* UMTX_OP_SEM_WAKE */
 3360         __umtx_op_not_sup,              /* UMTX_OP_NWAKE_PRIVATE */
 3361         __umtx_op_wake2_umutex          /* UMTX_OP_UMUTEX_WAKE2 */
 3362 };
 3363 
 3364 int
 3365 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
 3366 {
 3367         if ((unsigned)uap->op < UMTX_OP_MAX)
 3368                 return (*op_table_compat32[uap->op])(td,
 3369                         (struct _umtx_op_args *)uap);
 3370         return (EINVAL);
 3371 }
 3372 #endif
 3373 
 3374 void
 3375 umtx_thread_init(struct thread *td)
 3376 {
 3377         td->td_umtxq = umtxq_alloc();
 3378         td->td_umtxq->uq_thread = td;
 3379 }
 3380 
 3381 void
 3382 umtx_thread_fini(struct thread *td)
 3383 {
 3384         umtxq_free(td->td_umtxq);
 3385 }
 3386 
 3387 /*
 3388  * It will be called when new thread is created, e.g fork().
 3389  */
 3390 void
 3391 umtx_thread_alloc(struct thread *td)
 3392 {
 3393         struct umtx_q *uq;
 3394 
 3395         uq = td->td_umtxq;
 3396         uq->uq_inherited_pri = PRI_MAX;
 3397 
 3398         KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
 3399         KASSERT(uq->uq_thread == td, ("uq_thread != td"));
 3400         KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
 3401         KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
 3402 }
 3403 
 3404 /*
 3405  * exec() hook.
 3406  */
 3407 static void
 3408 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
 3409         struct image_params *imgp __unused)
 3410 {
 3411         umtx_thread_cleanup(curthread);
 3412 }
 3413 
 3414 /*
 3415  * thread_exit() hook.
 3416  */
 3417 void
 3418 umtx_thread_exit(struct thread *td)
 3419 {
 3420         umtx_thread_cleanup(td);
 3421 }
 3422 
 3423 /*
 3424  * clean up umtx data.
 3425  */
 3426 static void
 3427 umtx_thread_cleanup(struct thread *td)
 3428 {
 3429         struct umtx_q *uq;
 3430         struct umtx_pi *pi;
 3431 
 3432         if ((uq = td->td_umtxq) == NULL)
 3433                 return;
 3434 
 3435         mtx_lock_spin(&umtx_lock);
 3436         uq->uq_inherited_pri = PRI_MAX;
 3437         while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
 3438                 pi->pi_owner = NULL;
 3439                 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
 3440         }
 3441         thread_lock(td);
 3442         td->td_flags &= ~TDF_UBORROWING;
 3443         thread_unlock(td);
 3444         mtx_unlock_spin(&umtx_lock);
 3445 }

Cache object: c8d2042947f917bb34cb62a9743f24bf


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.