The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
    3  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice unmodified, this list of conditions, and the following
   11  *    disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/10.1/sys/kern/kern_umtx.c 270789 2014-08-29 08:42:20Z kib $");
   30 
   31 #include "opt_compat.h"
   32 #include "opt_umtx_profiling.h"
   33 
   34 #include <sys/param.h>
   35 #include <sys/kernel.h>
   36 #include <sys/limits.h>
   37 #include <sys/lock.h>
   38 #include <sys/malloc.h>
   39 #include <sys/mutex.h>
   40 #include <sys/priv.h>
   41 #include <sys/proc.h>
   42 #include <sys/sbuf.h>
   43 #include <sys/sched.h>
   44 #include <sys/smp.h>
   45 #include <sys/sysctl.h>
   46 #include <sys/sysent.h>
   47 #include <sys/systm.h>
   48 #include <sys/sysproto.h>
   49 #include <sys/syscallsubr.h>
   50 #include <sys/eventhandler.h>
   51 #include <sys/umtx.h>
   52 
   53 #include <vm/vm.h>
   54 #include <vm/vm_param.h>
   55 #include <vm/pmap.h>
   56 #include <vm/vm_map.h>
   57 #include <vm/vm_object.h>
   58 
   59 #include <machine/cpu.h>
   60 
   61 #ifdef COMPAT_FREEBSD32
   62 #include <compat/freebsd32/freebsd32_proto.h>
   63 #endif
   64 
   65 #define _UMUTEX_TRY             1
   66 #define _UMUTEX_WAIT            2
   67 
   68 #ifdef UMTX_PROFILING
   69 #define UPROF_PERC_BIGGER(w, f, sw, sf)                                 \
   70         (((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
   71 #endif
   72 
   73 /* Priority inheritance mutex info. */
   74 struct umtx_pi {
   75         /* Owner thread */
   76         struct thread           *pi_owner;
   77 
   78         /* Reference count */
   79         int                     pi_refcount;
   80 
   81         /* List entry to link umtx holding by thread */
   82         TAILQ_ENTRY(umtx_pi)    pi_link;
   83 
   84         /* List entry in hash */
   85         TAILQ_ENTRY(umtx_pi)    pi_hashlink;
   86 
   87         /* List for waiters */
   88         TAILQ_HEAD(,umtx_q)     pi_blocked;
   89 
   90         /* Identify a userland lock object */
   91         struct umtx_key         pi_key;
   92 };
   93 
   94 /* A userland synchronous object user. */
   95 struct umtx_q {
   96         /* Linked list for the hash. */
   97         TAILQ_ENTRY(umtx_q)     uq_link;
   98 
   99         /* Umtx key. */
  100         struct umtx_key         uq_key;
  101 
  102         /* Umtx flags. */
  103         int                     uq_flags;
  104 #define UQF_UMTXQ       0x0001
  105 
  106         /* The thread waits on. */
  107         struct thread           *uq_thread;
  108 
  109         /*
  110          * Blocked on PI mutex. read can use chain lock
  111          * or umtx_lock, write must have both chain lock and
  112          * umtx_lock being hold.
  113          */
  114         struct umtx_pi          *uq_pi_blocked;
  115 
  116         /* On blocked list */
  117         TAILQ_ENTRY(umtx_q)     uq_lockq;
  118 
  119         /* Thread contending with us */
  120         TAILQ_HEAD(,umtx_pi)    uq_pi_contested;
  121 
  122         /* Inherited priority from PP mutex */
  123         u_char                  uq_inherited_pri;
  124         
  125         /* Spare queue ready to be reused */
  126         struct umtxq_queue      *uq_spare_queue;
  127 
  128         /* The queue we on */
  129         struct umtxq_queue      *uq_cur_queue;
  130 };
  131 
  132 TAILQ_HEAD(umtxq_head, umtx_q);
  133 
  134 /* Per-key wait-queue */
  135 struct umtxq_queue {
  136         struct umtxq_head       head;
  137         struct umtx_key         key;
  138         LIST_ENTRY(umtxq_queue) link;
  139         int                     length;
  140 };
  141 
  142 LIST_HEAD(umtxq_list, umtxq_queue);
  143 
  144 /* Userland lock object's wait-queue chain */
  145 struct umtxq_chain {
  146         /* Lock for this chain. */
  147         struct mtx              uc_lock;
  148 
  149         /* List of sleep queues. */
  150         struct umtxq_list       uc_queue[2];
  151 #define UMTX_SHARED_QUEUE       0
  152 #define UMTX_EXCLUSIVE_QUEUE    1
  153 
  154         LIST_HEAD(, umtxq_queue) uc_spare_queue;
  155 
  156         /* Busy flag */
  157         char                    uc_busy;
  158 
  159         /* Chain lock waiters */
  160         int                     uc_waiters;
  161 
  162         /* All PI in the list */
  163         TAILQ_HEAD(,umtx_pi)    uc_pi_list;
  164 
  165 #ifdef UMTX_PROFILING
  166         u_int                   length;
  167         u_int                   max_length;
  168 #endif
  169 };
  170 
  171 #define UMTXQ_LOCKED_ASSERT(uc)         mtx_assert(&(uc)->uc_lock, MA_OWNED)
  172 #define UMTXQ_BUSY_ASSERT(uc)   KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
  173 
  174 /*
  175  * Don't propagate time-sharing priority, there is a security reason,
  176  * a user can simply introduce PI-mutex, let thread A lock the mutex,
  177  * and let another thread B block on the mutex, because B is
  178  * sleeping, its priority will be boosted, this causes A's priority to
  179  * be boosted via priority propagating too and will never be lowered even
  180  * if it is using 100%CPU, this is unfair to other processes.
  181  */
  182 
  183 #define UPRI(td)        (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
  184                           (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
  185                          PRI_MAX_TIMESHARE : (td)->td_user_pri)
  186 
  187 #define GOLDEN_RATIO_PRIME      2654404609U
  188 #define UMTX_CHAINS             512
  189 #define UMTX_SHIFTS             (__WORD_BIT - 9)
  190 
  191 #define GET_SHARE(flags)        \
  192     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
  193 
  194 #define BUSY_SPINS              200
  195 
  196 struct abs_timeout {
  197         int clockid;
  198         struct timespec cur;
  199         struct timespec end;
  200 };
  201 
  202 static uma_zone_t               umtx_pi_zone;
  203 static struct umtxq_chain       umtxq_chains[2][UMTX_CHAINS];
  204 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
  205 static int                      umtx_pi_allocated;
  206 
  207 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
  208 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
  209     &umtx_pi_allocated, 0, "Allocated umtx_pi");
  210 
  211 #ifdef UMTX_PROFILING
  212 static long max_length;
  213 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
  214 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
  215 #endif
  216 
  217 static void umtxq_sysinit(void *);
  218 static void umtxq_hash(struct umtx_key *key);
  219 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
  220 static void umtxq_lock(struct umtx_key *key);
  221 static void umtxq_unlock(struct umtx_key *key);
  222 static void umtxq_busy(struct umtx_key *key);
  223 static void umtxq_unbusy(struct umtx_key *key);
  224 static void umtxq_insert_queue(struct umtx_q *uq, int q);
  225 static void umtxq_remove_queue(struct umtx_q *uq, int q);
  226 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *);
  227 static int umtxq_count(struct umtx_key *key);
  228 static struct umtx_pi *umtx_pi_alloc(int);
  229 static void umtx_pi_free(struct umtx_pi *pi);
  230 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
  231 static void umtx_thread_cleanup(struct thread *td);
  232 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
  233         struct image_params *imgp __unused);
  234 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
  235 
  236 #define umtxq_signal(key, nwake)        umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
  237 #define umtxq_insert(uq)        umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
  238 #define umtxq_remove(uq)        umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
  239 
  240 static struct mtx umtx_lock;
  241 
  242 #ifdef UMTX_PROFILING
  243 static void
  244 umtx_init_profiling(void) 
  245 {
  246         struct sysctl_oid *chain_oid;
  247         char chain_name[10];
  248         int i;
  249 
  250         for (i = 0; i < UMTX_CHAINS; ++i) {
  251                 snprintf(chain_name, sizeof(chain_name), "%d", i);
  252                 chain_oid = SYSCTL_ADD_NODE(NULL, 
  253                     SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 
  254                     chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
  255                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  256                     "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
  257                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  258                     "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
  259         }
  260 }
  261 
  262 static int
  263 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
  264 {
  265         char buf[512];
  266         struct sbuf sb;
  267         struct umtxq_chain *uc;
  268         u_int fract, i, j, tot, whole;
  269         u_int sf0, sf1, sf2, sf3, sf4;
  270         u_int si0, si1, si2, si3, si4;
  271         u_int sw0, sw1, sw2, sw3, sw4;
  272 
  273         sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
  274         for (i = 0; i < 2; i++) {
  275                 tot = 0;
  276                 for (j = 0; j < UMTX_CHAINS; ++j) {
  277                         uc = &umtxq_chains[i][j];
  278                         mtx_lock(&uc->uc_lock);
  279                         tot += uc->max_length;
  280                         mtx_unlock(&uc->uc_lock);
  281                 }
  282                 if (tot == 0)
  283                         sbuf_printf(&sb, "%u) Empty ", i);
  284                 else {
  285                         sf0 = sf1 = sf2 = sf3 = sf4 = 0;
  286                         si0 = si1 = si2 = si3 = si4 = 0;
  287                         sw0 = sw1 = sw2 = sw3 = sw4 = 0;
  288                         for (j = 0; j < UMTX_CHAINS; j++) {
  289                                 uc = &umtxq_chains[i][j];
  290                                 mtx_lock(&uc->uc_lock);
  291                                 whole = uc->max_length * 100;
  292                                 mtx_unlock(&uc->uc_lock);
  293                                 fract = (whole % tot) * 100;
  294                                 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
  295                                         sf0 = fract;
  296                                         si0 = j;
  297                                         sw0 = whole;
  298                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw1,
  299                                     sf1)) {
  300                                         sf1 = fract;
  301                                         si1 = j;
  302                                         sw1 = whole;
  303                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw2,
  304                                     sf2)) {
  305                                         sf2 = fract;
  306                                         si2 = j;
  307                                         sw2 = whole;
  308                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw3,
  309                                     sf3)) {
  310                                         sf3 = fract;
  311                                         si3 = j;
  312                                         sw3 = whole;
  313                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw4,
  314                                     sf4)) {
  315                                         sf4 = fract;
  316                                         si4 = j;
  317                                         sw4 = whole;
  318                                 }
  319                         }
  320                         sbuf_printf(&sb, "queue %u:\n", i);
  321                         sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
  322                             sf0 / tot, si0);
  323                         sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
  324                             sf1 / tot, si1);
  325                         sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
  326                             sf2 / tot, si2);
  327                         sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
  328                             sf3 / tot, si3);
  329                         sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
  330                             sf4 / tot, si4);
  331                 }
  332         }
  333         sbuf_trim(&sb);
  334         sbuf_finish(&sb);
  335         sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
  336         sbuf_delete(&sb);
  337         return (0);
  338 }
  339 
  340 static int
  341 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
  342 {
  343         struct umtxq_chain *uc;
  344         u_int i, j;
  345         int clear, error;
  346 
  347         clear = 0;
  348         error = sysctl_handle_int(oidp, &clear, 0, req);
  349         if (error != 0 || req->newptr == NULL)
  350                 return (error);
  351 
  352         if (clear != 0) {
  353                 for (i = 0; i < 2; ++i) {
  354                         for (j = 0; j < UMTX_CHAINS; ++j) {
  355                                 uc = &umtxq_chains[i][j];
  356                                 mtx_lock(&uc->uc_lock);
  357                                 uc->length = 0;
  358                                 uc->max_length = 0;     
  359                                 mtx_unlock(&uc->uc_lock);
  360                         }
  361                 }
  362         }
  363         return (0);
  364 }
  365 
  366 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
  367     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
  368     sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics");
  369 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
  370     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
  371     sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length");
  372 #endif
  373 
  374 static void
  375 umtxq_sysinit(void *arg __unused)
  376 {
  377         int i, j;
  378 
  379         umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
  380                 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  381         for (i = 0; i < 2; ++i) {
  382                 for (j = 0; j < UMTX_CHAINS; ++j) {
  383                         mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
  384                                  MTX_DEF | MTX_DUPOK);
  385                         LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
  386                         LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
  387                         LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
  388                         TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
  389                         umtxq_chains[i][j].uc_busy = 0;
  390                         umtxq_chains[i][j].uc_waiters = 0;
  391 #ifdef UMTX_PROFILING
  392                         umtxq_chains[i][j].length = 0;
  393                         umtxq_chains[i][j].max_length = 0;      
  394 #endif
  395                 }
  396         }
  397 #ifdef UMTX_PROFILING
  398         umtx_init_profiling();
  399 #endif
  400         mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
  401         EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
  402             EVENTHANDLER_PRI_ANY);
  403 }
  404 
  405 struct umtx_q *
  406 umtxq_alloc(void)
  407 {
  408         struct umtx_q *uq;
  409 
  410         uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
  411         uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
  412         TAILQ_INIT(&uq->uq_spare_queue->head);
  413         TAILQ_INIT(&uq->uq_pi_contested);
  414         uq->uq_inherited_pri = PRI_MAX;
  415         return (uq);
  416 }
  417 
  418 void
  419 umtxq_free(struct umtx_q *uq)
  420 {
  421         MPASS(uq->uq_spare_queue != NULL);
  422         free(uq->uq_spare_queue, M_UMTX);
  423         free(uq, M_UMTX);
  424 }
  425 
  426 static inline void
  427 umtxq_hash(struct umtx_key *key)
  428 {
  429         unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
  430         key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
  431 }
  432 
  433 static inline struct umtxq_chain *
  434 umtxq_getchain(struct umtx_key *key)
  435 {
  436         if (key->type <= TYPE_SEM)
  437                 return (&umtxq_chains[1][key->hash]);
  438         return (&umtxq_chains[0][key->hash]);
  439 }
  440 
  441 /*
  442  * Lock a chain.
  443  */
  444 static inline void
  445 umtxq_lock(struct umtx_key *key)
  446 {
  447         struct umtxq_chain *uc;
  448 
  449         uc = umtxq_getchain(key);
  450         mtx_lock(&uc->uc_lock);
  451 }
  452 
  453 /*
  454  * Unlock a chain.
  455  */
  456 static inline void
  457 umtxq_unlock(struct umtx_key *key)
  458 {
  459         struct umtxq_chain *uc;
  460 
  461         uc = umtxq_getchain(key);
  462         mtx_unlock(&uc->uc_lock);
  463 }
  464 
  465 /*
  466  * Set chain to busy state when following operation
  467  * may be blocked (kernel mutex can not be used).
  468  */
  469 static inline void
  470 umtxq_busy(struct umtx_key *key)
  471 {
  472         struct umtxq_chain *uc;
  473 
  474         uc = umtxq_getchain(key);
  475         mtx_assert(&uc->uc_lock, MA_OWNED);
  476         if (uc->uc_busy) {
  477 #ifdef SMP
  478                 if (smp_cpus > 1) {
  479                         int count = BUSY_SPINS;
  480                         if (count > 0) {
  481                                 umtxq_unlock(key);
  482                                 while (uc->uc_busy && --count > 0)
  483                                         cpu_spinwait();
  484                                 umtxq_lock(key);
  485                         }
  486                 }
  487 #endif
  488                 while (uc->uc_busy) {
  489                         uc->uc_waiters++;
  490                         msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
  491                         uc->uc_waiters--;
  492                 }
  493         }
  494         uc->uc_busy = 1;
  495 }
  496 
  497 /*
  498  * Unbusy a chain.
  499  */
  500 static inline void
  501 umtxq_unbusy(struct umtx_key *key)
  502 {
  503         struct umtxq_chain *uc;
  504 
  505         uc = umtxq_getchain(key);
  506         mtx_assert(&uc->uc_lock, MA_OWNED);
  507         KASSERT(uc->uc_busy != 0, ("not busy"));
  508         uc->uc_busy = 0;
  509         if (uc->uc_waiters)
  510                 wakeup_one(uc);
  511 }
  512 
  513 static struct umtxq_queue *
  514 umtxq_queue_lookup(struct umtx_key *key, int q)
  515 {
  516         struct umtxq_queue *uh;
  517         struct umtxq_chain *uc;
  518 
  519         uc = umtxq_getchain(key);
  520         UMTXQ_LOCKED_ASSERT(uc);
  521         LIST_FOREACH(uh, &uc->uc_queue[q], link) {
  522                 if (umtx_key_match(&uh->key, key))
  523                         return (uh);
  524         }
  525 
  526         return (NULL);
  527 }
  528 
  529 static inline void
  530 umtxq_insert_queue(struct umtx_q *uq, int q)
  531 {
  532         struct umtxq_queue *uh;
  533         struct umtxq_chain *uc;
  534 
  535         uc = umtxq_getchain(&uq->uq_key);
  536         UMTXQ_LOCKED_ASSERT(uc);
  537         KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
  538         uh = umtxq_queue_lookup(&uq->uq_key, q);
  539         if (uh != NULL) {
  540                 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
  541         } else {
  542                 uh = uq->uq_spare_queue;
  543                 uh->key = uq->uq_key;
  544                 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
  545 #ifdef UMTX_PROFILING
  546                 uc->length++;
  547                 if (uc->length > uc->max_length) {
  548                         uc->max_length = uc->length;
  549                         if (uc->max_length > max_length)
  550                                 max_length = uc->max_length;    
  551                 }
  552 #endif
  553         }
  554         uq->uq_spare_queue = NULL;
  555 
  556         TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
  557         uh->length++;
  558         uq->uq_flags |= UQF_UMTXQ;
  559         uq->uq_cur_queue = uh;
  560         return;
  561 }
  562 
  563 static inline void
  564 umtxq_remove_queue(struct umtx_q *uq, int q)
  565 {
  566         struct umtxq_chain *uc;
  567         struct umtxq_queue *uh;
  568 
  569         uc = umtxq_getchain(&uq->uq_key);
  570         UMTXQ_LOCKED_ASSERT(uc);
  571         if (uq->uq_flags & UQF_UMTXQ) {
  572                 uh = uq->uq_cur_queue;
  573                 TAILQ_REMOVE(&uh->head, uq, uq_link);
  574                 uh->length--;
  575                 uq->uq_flags &= ~UQF_UMTXQ;
  576                 if (TAILQ_EMPTY(&uh->head)) {
  577                         KASSERT(uh->length == 0,
  578                             ("inconsistent umtxq_queue length"));
  579 #ifdef UMTX_PROFILING
  580                         uc->length--;
  581 #endif
  582                         LIST_REMOVE(uh, link);
  583                 } else {
  584                         uh = LIST_FIRST(&uc->uc_spare_queue);
  585                         KASSERT(uh != NULL, ("uc_spare_queue is empty"));
  586                         LIST_REMOVE(uh, link);
  587                 }
  588                 uq->uq_spare_queue = uh;
  589                 uq->uq_cur_queue = NULL;
  590         }
  591 }
  592 
  593 /*
  594  * Check if there are multiple waiters
  595  */
  596 static int
  597 umtxq_count(struct umtx_key *key)
  598 {
  599         struct umtxq_chain *uc;
  600         struct umtxq_queue *uh;
  601 
  602         uc = umtxq_getchain(key);
  603         UMTXQ_LOCKED_ASSERT(uc);
  604         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  605         if (uh != NULL)
  606                 return (uh->length);
  607         return (0);
  608 }
  609 
  610 /*
  611  * Check if there are multiple PI waiters and returns first
  612  * waiter.
  613  */
  614 static int
  615 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
  616 {
  617         struct umtxq_chain *uc;
  618         struct umtxq_queue *uh;
  619 
  620         *first = NULL;
  621         uc = umtxq_getchain(key);
  622         UMTXQ_LOCKED_ASSERT(uc);
  623         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  624         if (uh != NULL) {
  625                 *first = TAILQ_FIRST(&uh->head);
  626                 return (uh->length);
  627         }
  628         return (0);
  629 }
  630 
  631 static int
  632 umtxq_check_susp(struct thread *td)
  633 {
  634         struct proc *p;
  635         int error;
  636 
  637         /*
  638          * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
  639          * eventually break the lockstep loop.
  640          */
  641         if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
  642                 return (0);
  643         error = 0;
  644         p = td->td_proc;
  645         PROC_LOCK(p);
  646         if (P_SHOULDSTOP(p) ||
  647             ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
  648                 if (p->p_flag & P_SINGLE_EXIT)
  649                         error = EINTR;
  650                 else
  651                         error = ERESTART;
  652         }
  653         PROC_UNLOCK(p);
  654         return (error);
  655 }
  656 
  657 /*
  658  * Wake up threads waiting on an userland object.
  659  */
  660 
  661 static int
  662 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
  663 {
  664         struct umtxq_chain *uc;
  665         struct umtxq_queue *uh;
  666         struct umtx_q *uq;
  667         int ret;
  668 
  669         ret = 0;
  670         uc = umtxq_getchain(key);
  671         UMTXQ_LOCKED_ASSERT(uc);
  672         uh = umtxq_queue_lookup(key, q);
  673         if (uh != NULL) {
  674                 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
  675                         umtxq_remove_queue(uq, q);
  676                         wakeup(uq);
  677                         if (++ret >= n_wake)
  678                                 return (ret);
  679                 }
  680         }
  681         return (ret);
  682 }
  683 
  684 
  685 /*
  686  * Wake up specified thread.
  687  */
  688 static inline void
  689 umtxq_signal_thread(struct umtx_q *uq)
  690 {
  691         struct umtxq_chain *uc;
  692 
  693         uc = umtxq_getchain(&uq->uq_key);
  694         UMTXQ_LOCKED_ASSERT(uc);
  695         umtxq_remove(uq);
  696         wakeup(uq);
  697 }
  698 
  699 static inline int 
  700 tstohz(const struct timespec *tsp)
  701 {
  702         struct timeval tv;
  703 
  704         TIMESPEC_TO_TIMEVAL(&tv, tsp);
  705         return tvtohz(&tv);
  706 }
  707 
  708 static void
  709 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute,
  710         const struct timespec *timeout)
  711 {
  712 
  713         timo->clockid = clockid;
  714         if (!absolute) {
  715                 kern_clock_gettime(curthread, clockid, &timo->end);
  716                 timo->cur = timo->end;
  717                 timespecadd(&timo->end, timeout);
  718         } else {
  719                 timo->end = *timeout;
  720                 kern_clock_gettime(curthread, clockid, &timo->cur);
  721         }
  722 }
  723 
  724 static void
  725 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime)
  726 {
  727 
  728         abs_timeout_init(timo, umtxtime->_clockid,
  729                 (umtxtime->_flags & UMTX_ABSTIME) != 0,
  730                 &umtxtime->_timeout);
  731 }
  732 
  733 static inline void
  734 abs_timeout_update(struct abs_timeout *timo)
  735 {
  736         kern_clock_gettime(curthread, timo->clockid, &timo->cur);
  737 }
  738 
  739 static int
  740 abs_timeout_gethz(struct abs_timeout *timo)
  741 {
  742         struct timespec tts;
  743 
  744         if (timespeccmp(&timo->end, &timo->cur, <=))
  745                 return (-1); 
  746         tts = timo->end;
  747         timespecsub(&tts, &timo->cur);
  748         return (tstohz(&tts));
  749 }
  750 
  751 /*
  752  * Put thread into sleep state, before sleeping, check if
  753  * thread was removed from umtx queue.
  754  */
  755 static inline int
  756 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime)
  757 {
  758         struct umtxq_chain *uc;
  759         int error, timo;
  760 
  761         uc = umtxq_getchain(&uq->uq_key);
  762         UMTXQ_LOCKED_ASSERT(uc);
  763         for (;;) {
  764                 if (!(uq->uq_flags & UQF_UMTXQ))
  765                         return (0);
  766                 if (abstime != NULL) {
  767                         timo = abs_timeout_gethz(abstime);
  768                         if (timo < 0)
  769                                 return (ETIMEDOUT);
  770                 } else
  771                         timo = 0;
  772                 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo);
  773                 if (error != EWOULDBLOCK) {
  774                         umtxq_lock(&uq->uq_key);
  775                         break;
  776                 }
  777                 if (abstime != NULL)
  778                         abs_timeout_update(abstime);
  779                 umtxq_lock(&uq->uq_key);
  780         }
  781         return (error);
  782 }
  783 
  784 /*
  785  * Convert userspace address into unique logical address.
  786  */
  787 int
  788 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
  789 {
  790         struct thread *td = curthread;
  791         vm_map_t map;
  792         vm_map_entry_t entry;
  793         vm_pindex_t pindex;
  794         vm_prot_t prot;
  795         boolean_t wired;
  796 
  797         key->type = type;
  798         if (share == THREAD_SHARE) {
  799                 key->shared = 0;
  800                 key->info.private.vs = td->td_proc->p_vmspace;
  801                 key->info.private.addr = (uintptr_t)addr;
  802         } else {
  803                 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
  804                 map = &td->td_proc->p_vmspace->vm_map;
  805                 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
  806                     &entry, &key->info.shared.object, &pindex, &prot,
  807                     &wired) != KERN_SUCCESS) {
  808                         return EFAULT;
  809                 }
  810 
  811                 if ((share == PROCESS_SHARE) ||
  812                     (share == AUTO_SHARE &&
  813                      VM_INHERIT_SHARE == entry->inheritance)) {
  814                         key->shared = 1;
  815                         key->info.shared.offset = entry->offset + entry->start -
  816                                 (vm_offset_t)addr;
  817                         vm_object_reference(key->info.shared.object);
  818                 } else {
  819                         key->shared = 0;
  820                         key->info.private.vs = td->td_proc->p_vmspace;
  821                         key->info.private.addr = (uintptr_t)addr;
  822                 }
  823                 vm_map_lookup_done(map, entry);
  824         }
  825 
  826         umtxq_hash(key);
  827         return (0);
  828 }
  829 
  830 /*
  831  * Release key.
  832  */
  833 void
  834 umtx_key_release(struct umtx_key *key)
  835 {
  836         if (key->shared)
  837                 vm_object_deallocate(key->info.shared.object);
  838 }
  839 
  840 /*
  841  * Lock a umtx object.
  842  */
  843 static int
  844 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
  845         const struct timespec *timeout)
  846 {
  847         struct abs_timeout timo;
  848         struct umtx_q *uq;
  849         u_long owner;
  850         u_long old;
  851         int error = 0;
  852 
  853         uq = td->td_umtxq;
  854         if (timeout != NULL)
  855                 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
  856 
  857         /*
  858          * Care must be exercised when dealing with umtx structure. It
  859          * can fault on any access.
  860          */
  861         for (;;) {
  862                 /*
  863                  * Try the uncontested case.  This should be done in userland.
  864                  */
  865                 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
  866 
  867                 /* The acquire succeeded. */
  868                 if (owner == UMTX_UNOWNED)
  869                         return (0);
  870 
  871                 /* The address was invalid. */
  872                 if (owner == -1)
  873                         return (EFAULT);
  874 
  875                 /* If no one owns it but it is contested try to acquire it. */
  876                 if (owner == UMTX_CONTESTED) {
  877                         owner = casuword(&umtx->u_owner,
  878                             UMTX_CONTESTED, id | UMTX_CONTESTED);
  879 
  880                         if (owner == UMTX_CONTESTED)
  881                                 return (0);
  882 
  883                         /* The address was invalid. */
  884                         if (owner == -1)
  885                                 return (EFAULT);
  886 
  887                         error = umtxq_check_susp(td);
  888                         if (error != 0)
  889                                 break;
  890 
  891                         /* If this failed the lock has changed, restart. */
  892                         continue;
  893                 }
  894 
  895                 /*
  896                  * If we caught a signal, we have retried and now
  897                  * exit immediately.
  898                  */
  899                 if (error != 0)
  900                         break;
  901 
  902                 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
  903                         AUTO_SHARE, &uq->uq_key)) != 0)
  904                         return (error);
  905 
  906                 umtxq_lock(&uq->uq_key);
  907                 umtxq_busy(&uq->uq_key);
  908                 umtxq_insert(uq);
  909                 umtxq_unbusy(&uq->uq_key);
  910                 umtxq_unlock(&uq->uq_key);
  911 
  912                 /*
  913                  * Set the contested bit so that a release in user space
  914                  * knows to use the system call for unlock.  If this fails
  915                  * either some one else has acquired the lock or it has been
  916                  * released.
  917                  */
  918                 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
  919 
  920                 /* The address was invalid. */
  921                 if (old == -1) {
  922                         umtxq_lock(&uq->uq_key);
  923                         umtxq_remove(uq);
  924                         umtxq_unlock(&uq->uq_key);
  925                         umtx_key_release(&uq->uq_key);
  926                         return (EFAULT);
  927                 }
  928 
  929                 /*
  930                  * We set the contested bit, sleep. Otherwise the lock changed
  931                  * and we need to retry or we lost a race to the thread
  932                  * unlocking the umtx.
  933                  */
  934                 umtxq_lock(&uq->uq_key);
  935                 if (old == owner)
  936                         error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL :
  937                             &timo);
  938                 umtxq_remove(uq);
  939                 umtxq_unlock(&uq->uq_key);
  940                 umtx_key_release(&uq->uq_key);
  941 
  942                 if (error == 0)
  943                         error = umtxq_check_susp(td);
  944         }
  945 
  946         if (timeout == NULL) {
  947                 /* Mutex locking is restarted if it is interrupted. */
  948                 if (error == EINTR)
  949                         error = ERESTART;
  950         } else {
  951                 /* Timed-locking is not restarted. */
  952                 if (error == ERESTART)
  953                         error = EINTR;
  954         }
  955         return (error);
  956 }
  957 
  958 /*
  959  * Unlock a umtx object.
  960  */
  961 static int
  962 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
  963 {
  964         struct umtx_key key;
  965         u_long owner;
  966         u_long old;
  967         int error;
  968         int count;
  969 
  970         /*
  971          * Make sure we own this mtx.
  972          */
  973         owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
  974         if (owner == -1)
  975                 return (EFAULT);
  976 
  977         if ((owner & ~UMTX_CONTESTED) != id)
  978                 return (EPERM);
  979 
  980         /* This should be done in userland */
  981         if ((owner & UMTX_CONTESTED) == 0) {
  982                 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
  983                 if (old == -1)
  984                         return (EFAULT);
  985                 if (old == owner)
  986                         return (0);
  987                 owner = old;
  988         }
  989 
  990         /* We should only ever be in here for contested locks */
  991         if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
  992                 &key)) != 0)
  993                 return (error);
  994 
  995         umtxq_lock(&key);
  996         umtxq_busy(&key);
  997         count = umtxq_count(&key);
  998         umtxq_unlock(&key);
  999 
 1000         /*
 1001          * When unlocking the umtx, it must be marked as unowned if
 1002          * there is zero or one thread only waiting for it.
 1003          * Otherwise, it must be marked as contested.
 1004          */
 1005         old = casuword(&umtx->u_owner, owner,
 1006                 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
 1007         umtxq_lock(&key);
 1008         umtxq_signal(&key,1);
 1009         umtxq_unbusy(&key);
 1010         umtxq_unlock(&key);
 1011         umtx_key_release(&key);
 1012         if (old == -1)
 1013                 return (EFAULT);
 1014         if (old != owner)
 1015                 return (EINVAL);
 1016         return (0);
 1017 }
 1018 
 1019 #ifdef COMPAT_FREEBSD32
 1020 
 1021 /*
 1022  * Lock a umtx object.
 1023  */
 1024 static int
 1025 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id,
 1026         const struct timespec *timeout)
 1027 {
 1028         struct abs_timeout timo;
 1029         struct umtx_q *uq;
 1030         uint32_t owner;
 1031         uint32_t old;
 1032         int error = 0;
 1033 
 1034         uq = td->td_umtxq;
 1035 
 1036         if (timeout != NULL)
 1037                 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
 1038 
 1039         /*
 1040          * Care must be exercised when dealing with umtx structure. It
 1041          * can fault on any access.
 1042          */
 1043         for (;;) {
 1044                 /*
 1045                  * Try the uncontested case.  This should be done in userland.
 1046                  */
 1047                 owner = casuword32(m, UMUTEX_UNOWNED, id);
 1048 
 1049                 /* The acquire succeeded. */
 1050                 if (owner == UMUTEX_UNOWNED)
 1051                         return (0);
 1052 
 1053                 /* The address was invalid. */
 1054                 if (owner == -1)
 1055                         return (EFAULT);
 1056 
 1057                 /* If no one owns it but it is contested try to acquire it. */
 1058                 if (owner == UMUTEX_CONTESTED) {
 1059                         owner = casuword32(m,
 1060                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1061                         if (owner == UMUTEX_CONTESTED)
 1062                                 return (0);
 1063 
 1064                         /* The address was invalid. */
 1065                         if (owner == -1)
 1066                                 return (EFAULT);
 1067 
 1068                         error = umtxq_check_susp(td);
 1069                         if (error != 0)
 1070                                 break;
 1071 
 1072                         /* If this failed the lock has changed, restart. */
 1073                         continue;
 1074                 }
 1075 
 1076                 /*
 1077                  * If we caught a signal, we have retried and now
 1078                  * exit immediately.
 1079                  */
 1080                 if (error != 0)
 1081                         return (error);
 1082 
 1083                 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
 1084                         AUTO_SHARE, &uq->uq_key)) != 0)
 1085                         return (error);
 1086 
 1087                 umtxq_lock(&uq->uq_key);
 1088                 umtxq_busy(&uq->uq_key);
 1089                 umtxq_insert(uq);
 1090                 umtxq_unbusy(&uq->uq_key);
 1091                 umtxq_unlock(&uq->uq_key);
 1092 
 1093                 /*
 1094                  * Set the contested bit so that a release in user space
 1095                  * knows to use the system call for unlock.  If this fails
 1096                  * either some one else has acquired the lock or it has been
 1097                  * released.
 1098                  */
 1099                 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
 1100 
 1101                 /* The address was invalid. */
 1102                 if (old == -1) {
 1103                         umtxq_lock(&uq->uq_key);
 1104                         umtxq_remove(uq);
 1105                         umtxq_unlock(&uq->uq_key);
 1106                         umtx_key_release(&uq->uq_key);
 1107                         return (EFAULT);
 1108                 }
 1109 
 1110                 /*
 1111                  * We set the contested bit, sleep. Otherwise the lock changed
 1112                  * and we need to retry or we lost a race to the thread
 1113                  * unlocking the umtx.
 1114                  */
 1115                 umtxq_lock(&uq->uq_key);
 1116                 if (old == owner)
 1117                         error = umtxq_sleep(uq, "umtx", timeout == NULL ?
 1118                             NULL : &timo);
 1119                 umtxq_remove(uq);
 1120                 umtxq_unlock(&uq->uq_key);
 1121                 umtx_key_release(&uq->uq_key);
 1122 
 1123                 if (error == 0)
 1124                         error = umtxq_check_susp(td);
 1125         }
 1126 
 1127         if (timeout == NULL) {
 1128                 /* Mutex locking is restarted if it is interrupted. */
 1129                 if (error == EINTR)
 1130                         error = ERESTART;
 1131         } else {
 1132                 /* Timed-locking is not restarted. */
 1133                 if (error == ERESTART)
 1134                         error = EINTR;
 1135         }
 1136         return (error);
 1137 }
 1138 
 1139 /*
 1140  * Unlock a umtx object.
 1141  */
 1142 static int
 1143 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
 1144 {
 1145         struct umtx_key key;
 1146         uint32_t owner;
 1147         uint32_t old;
 1148         int error;
 1149         int count;
 1150 
 1151         /*
 1152          * Make sure we own this mtx.
 1153          */
 1154         owner = fuword32(m);
 1155         if (owner == -1)
 1156                 return (EFAULT);
 1157 
 1158         if ((owner & ~UMUTEX_CONTESTED) != id)
 1159                 return (EPERM);
 1160 
 1161         /* This should be done in userland */
 1162         if ((owner & UMUTEX_CONTESTED) == 0) {
 1163                 old = casuword32(m, owner, UMUTEX_UNOWNED);
 1164                 if (old == -1)
 1165                         return (EFAULT);
 1166                 if (old == owner)
 1167                         return (0);
 1168                 owner = old;
 1169         }
 1170 
 1171         /* We should only ever be in here for contested locks */
 1172         if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
 1173                 &key)) != 0)
 1174                 return (error);
 1175 
 1176         umtxq_lock(&key);
 1177         umtxq_busy(&key);
 1178         count = umtxq_count(&key);
 1179         umtxq_unlock(&key);
 1180 
 1181         /*
 1182          * When unlocking the umtx, it must be marked as unowned if
 1183          * there is zero or one thread only waiting for it.
 1184          * Otherwise, it must be marked as contested.
 1185          */
 1186         old = casuword32(m, owner,
 1187                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1188         umtxq_lock(&key);
 1189         umtxq_signal(&key,1);
 1190         umtxq_unbusy(&key);
 1191         umtxq_unlock(&key);
 1192         umtx_key_release(&key);
 1193         if (old == -1)
 1194                 return (EFAULT);
 1195         if (old != owner)
 1196                 return (EINVAL);
 1197         return (0);
 1198 }
 1199 #endif
 1200 
 1201 /*
 1202  * Fetch and compare value, sleep on the address if value is not changed.
 1203  */
 1204 static int
 1205 do_wait(struct thread *td, void *addr, u_long id,
 1206         struct _umtx_time *timeout, int compat32, int is_private)
 1207 {
 1208         struct abs_timeout timo;
 1209         struct umtx_q *uq;
 1210         u_long tmp;
 1211         int error = 0;
 1212 
 1213         uq = td->td_umtxq;
 1214         if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
 1215                 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
 1216                 return (error);
 1217 
 1218         if (timeout != NULL)
 1219                 abs_timeout_init2(&timo, timeout);
 1220 
 1221         umtxq_lock(&uq->uq_key);
 1222         umtxq_insert(uq);
 1223         umtxq_unlock(&uq->uq_key);
 1224         if (compat32 == 0)
 1225                 tmp = fuword(addr);
 1226         else
 1227                 tmp = (unsigned int)fuword32(addr);
 1228         umtxq_lock(&uq->uq_key);
 1229         if (tmp == id)
 1230                 error = umtxq_sleep(uq, "uwait", timeout == NULL ?
 1231                     NULL : &timo);
 1232         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 1233                 error = 0;
 1234         else
 1235                 umtxq_remove(uq);
 1236         umtxq_unlock(&uq->uq_key);
 1237         umtx_key_release(&uq->uq_key);
 1238         if (error == ERESTART)
 1239                 error = EINTR;
 1240         return (error);
 1241 }
 1242 
 1243 /*
 1244  * Wake up threads sleeping on the specified address.
 1245  */
 1246 int
 1247 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
 1248 {
 1249         struct umtx_key key;
 1250         int ret;
 1251         
 1252         if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
 1253                 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
 1254                 return (ret);
 1255         umtxq_lock(&key);
 1256         ret = umtxq_signal(&key, n_wake);
 1257         umtxq_unlock(&key);
 1258         umtx_key_release(&key);
 1259         return (0);
 1260 }
 1261 
 1262 /*
 1263  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1264  */
 1265 static int
 1266 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
 1267         struct _umtx_time *timeout, int mode)
 1268 {
 1269         struct abs_timeout timo;
 1270         struct umtx_q *uq;
 1271         uint32_t owner, old, id;
 1272         int error = 0;
 1273 
 1274         id = td->td_tid;
 1275         uq = td->td_umtxq;
 1276 
 1277         if (timeout != NULL)
 1278                 abs_timeout_init2(&timo, timeout);
 1279 
 1280         /*
 1281          * Care must be exercised when dealing with umtx structure. It
 1282          * can fault on any access.
 1283          */
 1284         for (;;) {
 1285                 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
 1286                 if (mode == _UMUTEX_WAIT) {
 1287                         if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
 1288                                 return (0);
 1289                 } else {
 1290                         /*
 1291                          * Try the uncontested case.  This should be done in userland.
 1292                          */
 1293                         owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 1294 
 1295                         /* The acquire succeeded. */
 1296                         if (owner == UMUTEX_UNOWNED)
 1297                                 return (0);
 1298 
 1299                         /* The address was invalid. */
 1300                         if (owner == -1)
 1301                                 return (EFAULT);
 1302 
 1303                         /* If no one owns it but it is contested try to acquire it. */
 1304                         if (owner == UMUTEX_CONTESTED) {
 1305                                 owner = casuword32(&m->m_owner,
 1306                                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1307 
 1308                                 if (owner == UMUTEX_CONTESTED)
 1309                                         return (0);
 1310 
 1311                                 /* The address was invalid. */
 1312                                 if (owner == -1)
 1313                                         return (EFAULT);
 1314 
 1315                                 error = umtxq_check_susp(td);
 1316                                 if (error != 0)
 1317                                         return (error);
 1318 
 1319                                 /* If this failed the lock has changed, restart. */
 1320                                 continue;
 1321                         }
 1322                 }
 1323 
 1324                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 1325                     (owner & ~UMUTEX_CONTESTED) == id)
 1326                         return (EDEADLK);
 1327 
 1328                 if (mode == _UMUTEX_TRY)
 1329                         return (EBUSY);
 1330 
 1331                 /*
 1332                  * If we caught a signal, we have retried and now
 1333                  * exit immediately.
 1334                  */
 1335                 if (error != 0)
 1336                         return (error);
 1337 
 1338                 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
 1339                     GET_SHARE(flags), &uq->uq_key)) != 0)
 1340                         return (error);
 1341 
 1342                 umtxq_lock(&uq->uq_key);
 1343                 umtxq_busy(&uq->uq_key);
 1344                 umtxq_insert(uq);
 1345                 umtxq_unlock(&uq->uq_key);
 1346 
 1347                 /*
 1348                  * Set the contested bit so that a release in user space
 1349                  * knows to use the system call for unlock.  If this fails
 1350                  * either some one else has acquired the lock or it has been
 1351                  * released.
 1352                  */
 1353                 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 1354 
 1355                 /* The address was invalid. */
 1356                 if (old == -1) {
 1357                         umtxq_lock(&uq->uq_key);
 1358                         umtxq_remove(uq);
 1359                         umtxq_unbusy(&uq->uq_key);
 1360                         umtxq_unlock(&uq->uq_key);
 1361                         umtx_key_release(&uq->uq_key);
 1362                         return (EFAULT);
 1363                 }
 1364 
 1365                 /*
 1366                  * We set the contested bit, sleep. Otherwise the lock changed
 1367                  * and we need to retry or we lost a race to the thread
 1368                  * unlocking the umtx.
 1369                  */
 1370                 umtxq_lock(&uq->uq_key);
 1371                 umtxq_unbusy(&uq->uq_key);
 1372                 if (old == owner)
 1373                         error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
 1374                             NULL : &timo);
 1375                 umtxq_remove(uq);
 1376                 umtxq_unlock(&uq->uq_key);
 1377                 umtx_key_release(&uq->uq_key);
 1378 
 1379                 if (error == 0)
 1380                         error = umtxq_check_susp(td);
 1381         }
 1382 
 1383         return (0);
 1384 }
 1385 
 1386 /*
 1387  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1388  */
 1389 static int
 1390 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
 1391 {
 1392         struct umtx_key key;
 1393         uint32_t owner, old, id;
 1394         int error;
 1395         int count;
 1396 
 1397         id = td->td_tid;
 1398         /*
 1399          * Make sure we own this mtx.
 1400          */
 1401         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1402         if (owner == -1)
 1403                 return (EFAULT);
 1404 
 1405         if ((owner & ~UMUTEX_CONTESTED) != id)
 1406                 return (EPERM);
 1407 
 1408         if ((owner & UMUTEX_CONTESTED) == 0) {
 1409                 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 1410                 if (old == -1)
 1411                         return (EFAULT);
 1412                 if (old == owner)
 1413                         return (0);
 1414                 owner = old;
 1415         }
 1416 
 1417         /* We should only ever be in here for contested locks */
 1418         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1419             &key)) != 0)
 1420                 return (error);
 1421 
 1422         umtxq_lock(&key);
 1423         umtxq_busy(&key);
 1424         count = umtxq_count(&key);
 1425         umtxq_unlock(&key);
 1426 
 1427         /*
 1428          * When unlocking the umtx, it must be marked as unowned if
 1429          * there is zero or one thread only waiting for it.
 1430          * Otherwise, it must be marked as contested.
 1431          */
 1432         old = casuword32(&m->m_owner, owner,
 1433                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1434         umtxq_lock(&key);
 1435         umtxq_signal(&key,1);
 1436         umtxq_unbusy(&key);
 1437         umtxq_unlock(&key);
 1438         umtx_key_release(&key);
 1439         if (old == -1)
 1440                 return (EFAULT);
 1441         if (old != owner)
 1442                 return (EINVAL);
 1443         return (0);
 1444 }
 1445 
 1446 /*
 1447  * Check if the mutex is available and wake up a waiter,
 1448  * only for simple mutex.
 1449  */
 1450 static int
 1451 do_wake_umutex(struct thread *td, struct umutex *m)
 1452 {
 1453         struct umtx_key key;
 1454         uint32_t owner;
 1455         uint32_t flags;
 1456         int error;
 1457         int count;
 1458 
 1459         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1460         if (owner == -1)
 1461                 return (EFAULT);
 1462 
 1463         if ((owner & ~UMUTEX_CONTESTED) != 0)
 1464                 return (0);
 1465 
 1466         flags = fuword32(&m->m_flags);
 1467 
 1468         /* We should only ever be in here for contested locks */
 1469         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1470             &key)) != 0)
 1471                 return (error);
 1472 
 1473         umtxq_lock(&key);
 1474         umtxq_busy(&key);
 1475         count = umtxq_count(&key);
 1476         umtxq_unlock(&key);
 1477 
 1478         if (count <= 1)
 1479                 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
 1480 
 1481         umtxq_lock(&key);
 1482         if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
 1483                 umtxq_signal(&key, 1);
 1484         umtxq_unbusy(&key);
 1485         umtxq_unlock(&key);
 1486         umtx_key_release(&key);
 1487         return (0);
 1488 }
 1489 
 1490 /*
 1491  * Check if the mutex has waiters and tries to fix contention bit.
 1492  */
 1493 static int
 1494 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
 1495 {
 1496         struct umtx_key key;
 1497         uint32_t owner, old;
 1498         int type;
 1499         int error;
 1500         int count;
 1501 
 1502         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 1503         case 0:
 1504                 type = TYPE_NORMAL_UMUTEX;
 1505                 break;
 1506         case UMUTEX_PRIO_INHERIT:
 1507                 type = TYPE_PI_UMUTEX;
 1508                 break;
 1509         case UMUTEX_PRIO_PROTECT:
 1510                 type = TYPE_PP_UMUTEX;
 1511                 break;
 1512         default:
 1513                 return (EINVAL);
 1514         }
 1515         if ((error = umtx_key_get(m, type, GET_SHARE(flags),
 1516             &key)) != 0)
 1517                 return (error);
 1518 
 1519         owner = 0;
 1520         umtxq_lock(&key);
 1521         umtxq_busy(&key);
 1522         count = umtxq_count(&key);
 1523         umtxq_unlock(&key);
 1524         /*
 1525          * Only repair contention bit if there is a waiter, this means the mutex
 1526          * is still being referenced by userland code, otherwise don't update
 1527          * any memory.
 1528          */
 1529         if (count > 1) {
 1530                 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1531                 while ((owner & UMUTEX_CONTESTED) ==0) {
 1532                         old = casuword32(&m->m_owner, owner,
 1533                             owner|UMUTEX_CONTESTED);
 1534                         if (old == owner)
 1535                                 break;
 1536                         owner = old;
 1537                         if (old == -1)
 1538                                 break;
 1539                         error = umtxq_check_susp(td);
 1540                         if (error != 0)
 1541                                 break;
 1542                 }
 1543         } else if (count == 1) {
 1544                 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1545                 while ((owner & ~UMUTEX_CONTESTED) != 0 &&
 1546                        (owner & UMUTEX_CONTESTED) == 0) {
 1547                         old = casuword32(&m->m_owner, owner,
 1548                             owner|UMUTEX_CONTESTED);
 1549                         if (old == owner)
 1550                                 break;
 1551                         owner = old;
 1552                         if (old == -1)
 1553                                 break;
 1554                         error = umtxq_check_susp(td);
 1555                         if (error != 0)
 1556                                 break;
 1557                 }
 1558         }
 1559         umtxq_lock(&key);
 1560         if (owner == -1) {
 1561                 error = EFAULT;
 1562                 umtxq_signal(&key, INT_MAX);
 1563         }
 1564         else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
 1565                 umtxq_signal(&key, 1);
 1566         umtxq_unbusy(&key);
 1567         umtxq_unlock(&key);
 1568         umtx_key_release(&key);
 1569         return (error);
 1570 }
 1571 
 1572 static inline struct umtx_pi *
 1573 umtx_pi_alloc(int flags)
 1574 {
 1575         struct umtx_pi *pi;
 1576 
 1577         pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
 1578         TAILQ_INIT(&pi->pi_blocked);
 1579         atomic_add_int(&umtx_pi_allocated, 1);
 1580         return (pi);
 1581 }
 1582 
 1583 static inline void
 1584 umtx_pi_free(struct umtx_pi *pi)
 1585 {
 1586         uma_zfree(umtx_pi_zone, pi);
 1587         atomic_add_int(&umtx_pi_allocated, -1);
 1588 }
 1589 
 1590 /*
 1591  * Adjust the thread's position on a pi_state after its priority has been
 1592  * changed.
 1593  */
 1594 static int
 1595 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
 1596 {
 1597         struct umtx_q *uq, *uq1, *uq2;
 1598         struct thread *td1;
 1599 
 1600         mtx_assert(&umtx_lock, MA_OWNED);
 1601         if (pi == NULL)
 1602                 return (0);
 1603 
 1604         uq = td->td_umtxq;
 1605 
 1606         /*
 1607          * Check if the thread needs to be moved on the blocked chain.
 1608          * It needs to be moved if either its priority is lower than
 1609          * the previous thread or higher than the next thread.
 1610          */
 1611         uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
 1612         uq2 = TAILQ_NEXT(uq, uq_lockq);
 1613         if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
 1614             (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
 1615                 /*
 1616                  * Remove thread from blocked chain and determine where
 1617                  * it should be moved to.
 1618                  */
 1619                 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1620                 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1621                         td1 = uq1->uq_thread;
 1622                         MPASS(td1->td_proc->p_magic == P_MAGIC);
 1623                         if (UPRI(td1) > UPRI(td))
 1624                                 break;
 1625                 }
 1626 
 1627                 if (uq1 == NULL)
 1628                         TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1629                 else
 1630                         TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1631         }
 1632         return (1);
 1633 }
 1634 
 1635 /*
 1636  * Propagate priority when a thread is blocked on POSIX
 1637  * PI mutex.
 1638  */ 
 1639 static void
 1640 umtx_propagate_priority(struct thread *td)
 1641 {
 1642         struct umtx_q *uq;
 1643         struct umtx_pi *pi;
 1644         int pri;
 1645 
 1646         mtx_assert(&umtx_lock, MA_OWNED);
 1647         pri = UPRI(td);
 1648         uq = td->td_umtxq;
 1649         pi = uq->uq_pi_blocked;
 1650         if (pi == NULL)
 1651                 return;
 1652 
 1653         for (;;) {
 1654                 td = pi->pi_owner;
 1655                 if (td == NULL || td == curthread)
 1656                         return;
 1657 
 1658                 MPASS(td->td_proc != NULL);
 1659                 MPASS(td->td_proc->p_magic == P_MAGIC);
 1660 
 1661                 thread_lock(td);
 1662                 if (td->td_lend_user_pri > pri)
 1663                         sched_lend_user_prio(td, pri);
 1664                 else {
 1665                         thread_unlock(td);
 1666                         break;
 1667                 }
 1668                 thread_unlock(td);
 1669 
 1670                 /*
 1671                  * Pick up the lock that td is blocked on.
 1672                  */
 1673                 uq = td->td_umtxq;
 1674                 pi = uq->uq_pi_blocked;
 1675                 if (pi == NULL)
 1676                         break;
 1677                 /* Resort td on the list if needed. */
 1678                 umtx_pi_adjust_thread(pi, td);
 1679         }
 1680 }
 1681 
 1682 /*
 1683  * Unpropagate priority for a PI mutex when a thread blocked on
 1684  * it is interrupted by signal or resumed by others.
 1685  */
 1686 static void
 1687 umtx_repropagate_priority(struct umtx_pi *pi)
 1688 {
 1689         struct umtx_q *uq, *uq_owner;
 1690         struct umtx_pi *pi2;
 1691         int pri;
 1692 
 1693         mtx_assert(&umtx_lock, MA_OWNED);
 1694 
 1695         while (pi != NULL && pi->pi_owner != NULL) {
 1696                 pri = PRI_MAX;
 1697                 uq_owner = pi->pi_owner->td_umtxq;
 1698 
 1699                 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
 1700                         uq = TAILQ_FIRST(&pi2->pi_blocked);
 1701                         if (uq != NULL) {
 1702                                 if (pri > UPRI(uq->uq_thread))
 1703                                         pri = UPRI(uq->uq_thread);
 1704                         }
 1705                 }
 1706 
 1707                 if (pri > uq_owner->uq_inherited_pri)
 1708                         pri = uq_owner->uq_inherited_pri;
 1709                 thread_lock(pi->pi_owner);
 1710                 sched_lend_user_prio(pi->pi_owner, pri);
 1711                 thread_unlock(pi->pi_owner);
 1712                 if ((pi = uq_owner->uq_pi_blocked) != NULL)
 1713                         umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
 1714         }
 1715 }
 1716 
 1717 /*
 1718  * Insert a PI mutex into owned list.
 1719  */
 1720 static void
 1721 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
 1722 {
 1723         struct umtx_q *uq_owner;
 1724 
 1725         uq_owner = owner->td_umtxq;
 1726         mtx_assert(&umtx_lock, MA_OWNED);
 1727         if (pi->pi_owner != NULL)
 1728                 panic("pi_ower != NULL");
 1729         pi->pi_owner = owner;
 1730         TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
 1731 }
 1732 
 1733 /*
 1734  * Claim ownership of a PI mutex.
 1735  */
 1736 static int
 1737 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
 1738 {
 1739         struct umtx_q *uq, *uq_owner;
 1740 
 1741         uq_owner = owner->td_umtxq;
 1742         mtx_lock_spin(&umtx_lock);
 1743         if (pi->pi_owner == owner) {
 1744                 mtx_unlock_spin(&umtx_lock);
 1745                 return (0);
 1746         }
 1747 
 1748         if (pi->pi_owner != NULL) {
 1749                 /*
 1750                  * userland may have already messed the mutex, sigh.
 1751                  */
 1752                 mtx_unlock_spin(&umtx_lock);
 1753                 return (EPERM);
 1754         }
 1755         umtx_pi_setowner(pi, owner);
 1756         uq = TAILQ_FIRST(&pi->pi_blocked);
 1757         if (uq != NULL) {
 1758                 int pri;
 1759 
 1760                 pri = UPRI(uq->uq_thread);
 1761                 thread_lock(owner);
 1762                 if (pri < UPRI(owner))
 1763                         sched_lend_user_prio(owner, pri);
 1764                 thread_unlock(owner);
 1765         }
 1766         mtx_unlock_spin(&umtx_lock);
 1767         return (0);
 1768 }
 1769 
 1770 /*
 1771  * Adjust a thread's order position in its blocked PI mutex,
 1772  * this may result new priority propagating process.
 1773  */
 1774 void
 1775 umtx_pi_adjust(struct thread *td, u_char oldpri)
 1776 {
 1777         struct umtx_q *uq;
 1778         struct umtx_pi *pi;
 1779 
 1780         uq = td->td_umtxq;
 1781         mtx_lock_spin(&umtx_lock);
 1782         /*
 1783          * Pick up the lock that td is blocked on.
 1784          */
 1785         pi = uq->uq_pi_blocked;
 1786         if (pi != NULL) {
 1787                 umtx_pi_adjust_thread(pi, td);
 1788                 umtx_repropagate_priority(pi);
 1789         }
 1790         mtx_unlock_spin(&umtx_lock);
 1791 }
 1792 
 1793 /*
 1794  * Sleep on a PI mutex.
 1795  */
 1796 static int
 1797 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
 1798         uint32_t owner, const char *wmesg, struct abs_timeout *timo)
 1799 {
 1800         struct umtxq_chain *uc;
 1801         struct thread *td, *td1;
 1802         struct umtx_q *uq1;
 1803         int pri;
 1804         int error = 0;
 1805 
 1806         td = uq->uq_thread;
 1807         KASSERT(td == curthread, ("inconsistent uq_thread"));
 1808         uc = umtxq_getchain(&uq->uq_key);
 1809         UMTXQ_LOCKED_ASSERT(uc);
 1810         UMTXQ_BUSY_ASSERT(uc);
 1811         umtxq_insert(uq);
 1812         mtx_lock_spin(&umtx_lock);
 1813         if (pi->pi_owner == NULL) {
 1814                 mtx_unlock_spin(&umtx_lock);
 1815                 /* XXX Only look up thread in current process. */
 1816                 td1 = tdfind(owner, curproc->p_pid);
 1817                 mtx_lock_spin(&umtx_lock);
 1818                 if (td1 != NULL) {
 1819                         if (pi->pi_owner == NULL)
 1820                                 umtx_pi_setowner(pi, td1);
 1821                         PROC_UNLOCK(td1->td_proc);
 1822                 }
 1823         }
 1824 
 1825         TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1826                 pri = UPRI(uq1->uq_thread);
 1827                 if (pri > UPRI(td))
 1828                         break;
 1829         }
 1830 
 1831         if (uq1 != NULL)
 1832                 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1833         else
 1834                 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1835 
 1836         uq->uq_pi_blocked = pi;
 1837         thread_lock(td);
 1838         td->td_flags |= TDF_UPIBLOCKED;
 1839         thread_unlock(td);
 1840         umtx_propagate_priority(td);
 1841         mtx_unlock_spin(&umtx_lock);
 1842         umtxq_unbusy(&uq->uq_key);
 1843 
 1844         error = umtxq_sleep(uq, wmesg, timo);
 1845         umtxq_remove(uq);
 1846 
 1847         mtx_lock_spin(&umtx_lock);
 1848         uq->uq_pi_blocked = NULL;
 1849         thread_lock(td);
 1850         td->td_flags &= ~TDF_UPIBLOCKED;
 1851         thread_unlock(td);
 1852         TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1853         umtx_repropagate_priority(pi);
 1854         mtx_unlock_spin(&umtx_lock);
 1855         umtxq_unlock(&uq->uq_key);
 1856 
 1857         return (error);
 1858 }
 1859 
 1860 /*
 1861  * Add reference count for a PI mutex.
 1862  */
 1863 static void
 1864 umtx_pi_ref(struct umtx_pi *pi)
 1865 {
 1866         struct umtxq_chain *uc;
 1867 
 1868         uc = umtxq_getchain(&pi->pi_key);
 1869         UMTXQ_LOCKED_ASSERT(uc);
 1870         pi->pi_refcount++;
 1871 }
 1872 
 1873 /*
 1874  * Decrease reference count for a PI mutex, if the counter
 1875  * is decreased to zero, its memory space is freed.
 1876  */ 
 1877 static void
 1878 umtx_pi_unref(struct umtx_pi *pi)
 1879 {
 1880         struct umtxq_chain *uc;
 1881 
 1882         uc = umtxq_getchain(&pi->pi_key);
 1883         UMTXQ_LOCKED_ASSERT(uc);
 1884         KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
 1885         if (--pi->pi_refcount == 0) {
 1886                 mtx_lock_spin(&umtx_lock);
 1887                 if (pi->pi_owner != NULL) {
 1888                         TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
 1889                                 pi, pi_link);
 1890                         pi->pi_owner = NULL;
 1891                 }
 1892                 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
 1893                         ("blocked queue not empty"));
 1894                 mtx_unlock_spin(&umtx_lock);
 1895                 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
 1896                 umtx_pi_free(pi);
 1897         }
 1898 }
 1899 
 1900 /*
 1901  * Find a PI mutex in hash table.
 1902  */
 1903 static struct umtx_pi *
 1904 umtx_pi_lookup(struct umtx_key *key)
 1905 {
 1906         struct umtxq_chain *uc;
 1907         struct umtx_pi *pi;
 1908 
 1909         uc = umtxq_getchain(key);
 1910         UMTXQ_LOCKED_ASSERT(uc);
 1911 
 1912         TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
 1913                 if (umtx_key_match(&pi->pi_key, key)) {
 1914                         return (pi);
 1915                 }
 1916         }
 1917         return (NULL);
 1918 }
 1919 
 1920 /*
 1921  * Insert a PI mutex into hash table.
 1922  */
 1923 static inline void
 1924 umtx_pi_insert(struct umtx_pi *pi)
 1925 {
 1926         struct umtxq_chain *uc;
 1927 
 1928         uc = umtxq_getchain(&pi->pi_key);
 1929         UMTXQ_LOCKED_ASSERT(uc);
 1930         TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
 1931 }
 1932 
 1933 /*
 1934  * Lock a PI mutex.
 1935  */
 1936 static int
 1937 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
 1938     struct _umtx_time *timeout, int try)
 1939 {
 1940         struct abs_timeout timo;
 1941         struct umtx_q *uq;
 1942         struct umtx_pi *pi, *new_pi;
 1943         uint32_t id, owner, old;
 1944         int error;
 1945 
 1946         id = td->td_tid;
 1947         uq = td->td_umtxq;
 1948 
 1949         if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 1950             &uq->uq_key)) != 0)
 1951                 return (error);
 1952 
 1953         if (timeout != NULL)
 1954                 abs_timeout_init2(&timo, timeout);
 1955 
 1956         umtxq_lock(&uq->uq_key);
 1957         pi = umtx_pi_lookup(&uq->uq_key);
 1958         if (pi == NULL) {
 1959                 new_pi = umtx_pi_alloc(M_NOWAIT);
 1960                 if (new_pi == NULL) {
 1961                         umtxq_unlock(&uq->uq_key);
 1962                         new_pi = umtx_pi_alloc(M_WAITOK);
 1963                         umtxq_lock(&uq->uq_key);
 1964                         pi = umtx_pi_lookup(&uq->uq_key);
 1965                         if (pi != NULL) {
 1966                                 umtx_pi_free(new_pi);
 1967                                 new_pi = NULL;
 1968                         }
 1969                 }
 1970                 if (new_pi != NULL) {
 1971                         new_pi->pi_key = uq->uq_key;
 1972                         umtx_pi_insert(new_pi);
 1973                         pi = new_pi;
 1974                 }
 1975         }
 1976         umtx_pi_ref(pi);
 1977         umtxq_unlock(&uq->uq_key);
 1978 
 1979         /*
 1980          * Care must be exercised when dealing with umtx structure.  It
 1981          * can fault on any access.
 1982          */
 1983         for (;;) {
 1984                 /*
 1985                  * Try the uncontested case.  This should be done in userland.
 1986                  */
 1987                 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 1988 
 1989                 /* The acquire succeeded. */
 1990                 if (owner == UMUTEX_UNOWNED) {
 1991                         error = 0;
 1992                         break;
 1993                 }
 1994 
 1995                 /* The address was invalid. */
 1996                 if (owner == -1) {
 1997                         error = EFAULT;
 1998                         break;
 1999                 }
 2000 
 2001                 /* If no one owns it but it is contested try to acquire it. */
 2002                 if (owner == UMUTEX_CONTESTED) {
 2003                         owner = casuword32(&m->m_owner,
 2004                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 2005 
 2006                         if (owner == UMUTEX_CONTESTED) {
 2007                                 umtxq_lock(&uq->uq_key);
 2008                                 umtxq_busy(&uq->uq_key);
 2009                                 error = umtx_pi_claim(pi, td);
 2010                                 umtxq_unbusy(&uq->uq_key);
 2011                                 umtxq_unlock(&uq->uq_key);
 2012                                 break;
 2013                         }
 2014 
 2015                         /* The address was invalid. */
 2016                         if (owner == -1) {
 2017                                 error = EFAULT;
 2018                                 break;
 2019                         }
 2020 
 2021                         error = umtxq_check_susp(td);
 2022                         if (error != 0)
 2023                                 break;
 2024 
 2025                         /* If this failed the lock has changed, restart. */
 2026                         continue;
 2027                 }
 2028 
 2029                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 2030                     (owner & ~UMUTEX_CONTESTED) == id) {
 2031                         error = EDEADLK;
 2032                         break;
 2033                 }
 2034 
 2035                 if (try != 0) {
 2036                         error = EBUSY;
 2037                         break;
 2038                 }
 2039 
 2040                 /*
 2041                  * If we caught a signal, we have retried and now
 2042                  * exit immediately.
 2043                  */
 2044                 if (error != 0)
 2045                         break;
 2046                         
 2047                 umtxq_lock(&uq->uq_key);
 2048                 umtxq_busy(&uq->uq_key);
 2049                 umtxq_unlock(&uq->uq_key);
 2050 
 2051                 /*
 2052                  * Set the contested bit so that a release in user space
 2053                  * knows to use the system call for unlock.  If this fails
 2054                  * either some one else has acquired the lock or it has been
 2055                  * released.
 2056                  */
 2057                 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 2058 
 2059                 /* The address was invalid. */
 2060                 if (old == -1) {
 2061                         umtxq_lock(&uq->uq_key);
 2062                         umtxq_unbusy(&uq->uq_key);
 2063                         umtxq_unlock(&uq->uq_key);
 2064                         error = EFAULT;
 2065                         break;
 2066                 }
 2067 
 2068                 umtxq_lock(&uq->uq_key);
 2069                 /*
 2070                  * We set the contested bit, sleep. Otherwise the lock changed
 2071                  * and we need to retry or we lost a race to the thread
 2072                  * unlocking the umtx.
 2073                  */
 2074                 if (old == owner) {
 2075                         error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
 2076                             "umtxpi", timeout == NULL ? NULL : &timo);
 2077                         if (error != 0)
 2078                                 continue;
 2079                 } else {
 2080                         umtxq_unbusy(&uq->uq_key);
 2081                         umtxq_unlock(&uq->uq_key);
 2082                 }
 2083 
 2084                 error = umtxq_check_susp(td);
 2085                 if (error != 0)
 2086                         break;
 2087         }
 2088 
 2089         umtxq_lock(&uq->uq_key);
 2090         umtx_pi_unref(pi);
 2091         umtxq_unlock(&uq->uq_key);
 2092 
 2093         umtx_key_release(&uq->uq_key);
 2094         return (error);
 2095 }
 2096 
 2097 /*
 2098  * Unlock a PI mutex.
 2099  */
 2100 static int
 2101 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
 2102 {
 2103         struct umtx_key key;
 2104         struct umtx_q *uq_first, *uq_first2, *uq_me;
 2105         struct umtx_pi *pi, *pi2;
 2106         uint32_t owner, old, id;
 2107         int error;
 2108         int count;
 2109         int pri;
 2110 
 2111         id = td->td_tid;
 2112         /*
 2113          * Make sure we own this mtx.
 2114          */
 2115         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 2116         if (owner == -1)
 2117                 return (EFAULT);
 2118 
 2119         if ((owner & ~UMUTEX_CONTESTED) != id)
 2120                 return (EPERM);
 2121 
 2122         /* This should be done in userland */
 2123         if ((owner & UMUTEX_CONTESTED) == 0) {
 2124                 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 2125                 if (old == -1)
 2126                         return (EFAULT);
 2127                 if (old == owner)
 2128                         return (0);
 2129                 owner = old;
 2130         }
 2131 
 2132         /* We should only ever be in here for contested locks */
 2133         if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 2134             &key)) != 0)
 2135                 return (error);
 2136 
 2137         umtxq_lock(&key);
 2138         umtxq_busy(&key);
 2139         count = umtxq_count_pi(&key, &uq_first);
 2140         if (uq_first != NULL) {
 2141                 mtx_lock_spin(&umtx_lock);
 2142                 pi = uq_first->uq_pi_blocked;
 2143                 KASSERT(pi != NULL, ("pi == NULL?"));
 2144                 if (pi->pi_owner != curthread) {
 2145                         mtx_unlock_spin(&umtx_lock);
 2146                         umtxq_unbusy(&key);
 2147                         umtxq_unlock(&key);
 2148                         umtx_key_release(&key);
 2149                         /* userland messed the mutex */
 2150                         return (EPERM);
 2151                 }
 2152                 uq_me = curthread->td_umtxq;
 2153                 pi->pi_owner = NULL;
 2154                 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
 2155                 /* get highest priority thread which is still sleeping. */
 2156                 uq_first = TAILQ_FIRST(&pi->pi_blocked);
 2157                 while (uq_first != NULL && 
 2158                        (uq_first->uq_flags & UQF_UMTXQ) == 0) {
 2159                         uq_first = TAILQ_NEXT(uq_first, uq_lockq);
 2160                 }
 2161                 pri = PRI_MAX;
 2162                 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
 2163                         uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
 2164                         if (uq_first2 != NULL) {
 2165                                 if (pri > UPRI(uq_first2->uq_thread))
 2166                                         pri = UPRI(uq_first2->uq_thread);
 2167                         }
 2168                 }
 2169                 thread_lock(curthread);
 2170                 sched_lend_user_prio(curthread, pri);
 2171                 thread_unlock(curthread);
 2172                 mtx_unlock_spin(&umtx_lock);
 2173                 if (uq_first)
 2174                         umtxq_signal_thread(uq_first);
 2175         }
 2176         umtxq_unlock(&key);
 2177 
 2178         /*
 2179          * When unlocking the umtx, it must be marked as unowned if
 2180          * there is zero or one thread only waiting for it.
 2181          * Otherwise, it must be marked as contested.
 2182          */
 2183         old = casuword32(&m->m_owner, owner,
 2184                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 2185 
 2186         umtxq_lock(&key);
 2187         umtxq_unbusy(&key);
 2188         umtxq_unlock(&key);
 2189         umtx_key_release(&key);
 2190         if (old == -1)
 2191                 return (EFAULT);
 2192         if (old != owner)
 2193                 return (EINVAL);
 2194         return (0);
 2195 }
 2196 
 2197 /*
 2198  * Lock a PP mutex.
 2199  */
 2200 static int
 2201 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
 2202     struct _umtx_time *timeout, int try)
 2203 {
 2204         struct abs_timeout timo;
 2205         struct umtx_q *uq, *uq2;
 2206         struct umtx_pi *pi;
 2207         uint32_t ceiling;
 2208         uint32_t owner, id;
 2209         int error, pri, old_inherited_pri, su;
 2210 
 2211         id = td->td_tid;
 2212         uq = td->td_umtxq;
 2213         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2214             &uq->uq_key)) != 0)
 2215                 return (error);
 2216 
 2217         if (timeout != NULL)
 2218                 abs_timeout_init2(&timo, timeout);
 2219 
 2220         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2221         for (;;) {
 2222                 old_inherited_pri = uq->uq_inherited_pri;
 2223                 umtxq_lock(&uq->uq_key);
 2224                 umtxq_busy(&uq->uq_key);
 2225                 umtxq_unlock(&uq->uq_key);
 2226 
 2227                 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
 2228                 if (ceiling > RTP_PRIO_MAX) {
 2229                         error = EINVAL;
 2230                         goto out;
 2231                 }
 2232 
 2233                 mtx_lock_spin(&umtx_lock);
 2234                 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
 2235                         mtx_unlock_spin(&umtx_lock);
 2236                         error = EINVAL;
 2237                         goto out;
 2238                 }
 2239                 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
 2240                         uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
 2241                         thread_lock(td);
 2242                         if (uq->uq_inherited_pri < UPRI(td))
 2243                                 sched_lend_user_prio(td, uq->uq_inherited_pri);
 2244                         thread_unlock(td);
 2245                 }
 2246                 mtx_unlock_spin(&umtx_lock);
 2247 
 2248                 owner = casuword32(&m->m_owner,
 2249                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 2250 
 2251                 if (owner == UMUTEX_CONTESTED) {
 2252                         error = 0;
 2253                         break;
 2254                 }
 2255 
 2256                 /* The address was invalid. */
 2257                 if (owner == -1) {
 2258                         error = EFAULT;
 2259                         break;
 2260                 }
 2261 
 2262                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 2263                     (owner & ~UMUTEX_CONTESTED) == id) {
 2264                         error = EDEADLK;
 2265                         break;
 2266                 }
 2267 
 2268                 if (try != 0) {
 2269                         error = EBUSY;
 2270                         break;
 2271                 }
 2272 
 2273                 /*
 2274                  * If we caught a signal, we have retried and now
 2275                  * exit immediately.
 2276                  */
 2277                 if (error != 0)
 2278                         break;
 2279 
 2280                 umtxq_lock(&uq->uq_key);
 2281                 umtxq_insert(uq);
 2282                 umtxq_unbusy(&uq->uq_key);
 2283                 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
 2284                     NULL : &timo);
 2285                 umtxq_remove(uq);
 2286                 umtxq_unlock(&uq->uq_key);
 2287 
 2288                 mtx_lock_spin(&umtx_lock);
 2289                 uq->uq_inherited_pri = old_inherited_pri;
 2290                 pri = PRI_MAX;
 2291                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2292                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2293                         if (uq2 != NULL) {
 2294                                 if (pri > UPRI(uq2->uq_thread))
 2295                                         pri = UPRI(uq2->uq_thread);
 2296                         }
 2297                 }
 2298                 if (pri > uq->uq_inherited_pri)
 2299                         pri = uq->uq_inherited_pri;
 2300                 thread_lock(td);
 2301                 sched_lend_user_prio(td, pri);
 2302                 thread_unlock(td);
 2303                 mtx_unlock_spin(&umtx_lock);
 2304         }
 2305 
 2306         if (error != 0) {
 2307                 mtx_lock_spin(&umtx_lock);
 2308                 uq->uq_inherited_pri = old_inherited_pri;
 2309                 pri = PRI_MAX;
 2310                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2311                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2312                         if (uq2 != NULL) {
 2313                                 if (pri > UPRI(uq2->uq_thread))
 2314                                         pri = UPRI(uq2->uq_thread);
 2315                         }
 2316                 }
 2317                 if (pri > uq->uq_inherited_pri)
 2318                         pri = uq->uq_inherited_pri;
 2319                 thread_lock(td);
 2320                 sched_lend_user_prio(td, pri);
 2321                 thread_unlock(td);
 2322                 mtx_unlock_spin(&umtx_lock);
 2323         }
 2324 
 2325 out:
 2326         umtxq_lock(&uq->uq_key);
 2327         umtxq_unbusy(&uq->uq_key);
 2328         umtxq_unlock(&uq->uq_key);
 2329         umtx_key_release(&uq->uq_key);
 2330         return (error);
 2331 }
 2332 
 2333 /*
 2334  * Unlock a PP mutex.
 2335  */
 2336 static int
 2337 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
 2338 {
 2339         struct umtx_key key;
 2340         struct umtx_q *uq, *uq2;
 2341         struct umtx_pi *pi;
 2342         uint32_t owner, id;
 2343         uint32_t rceiling;
 2344         int error, pri, new_inherited_pri, su;
 2345 
 2346         id = td->td_tid;
 2347         uq = td->td_umtxq;
 2348         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2349 
 2350         /*
 2351          * Make sure we own this mtx.
 2352          */
 2353         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 2354         if (owner == -1)
 2355                 return (EFAULT);
 2356 
 2357         if ((owner & ~UMUTEX_CONTESTED) != id)
 2358                 return (EPERM);
 2359 
 2360         error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
 2361         if (error != 0)
 2362                 return (error);
 2363 
 2364         if (rceiling == -1)
 2365                 new_inherited_pri = PRI_MAX;
 2366         else {
 2367                 rceiling = RTP_PRIO_MAX - rceiling;
 2368                 if (rceiling > RTP_PRIO_MAX)
 2369                         return (EINVAL);
 2370                 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
 2371         }
 2372 
 2373         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2374             &key)) != 0)
 2375                 return (error);
 2376         umtxq_lock(&key);
 2377         umtxq_busy(&key);
 2378         umtxq_unlock(&key);
 2379         /*
 2380          * For priority protected mutex, always set unlocked state
 2381          * to UMUTEX_CONTESTED, so that userland always enters kernel
 2382          * to lock the mutex, it is necessary because thread priority
 2383          * has to be adjusted for such mutex.
 2384          */
 2385         error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 2386                 UMUTEX_CONTESTED);
 2387 
 2388         umtxq_lock(&key);
 2389         if (error == 0)
 2390                 umtxq_signal(&key, 1);
 2391         umtxq_unbusy(&key);
 2392         umtxq_unlock(&key);
 2393 
 2394         if (error == -1)
 2395                 error = EFAULT;
 2396         else {
 2397                 mtx_lock_spin(&umtx_lock);
 2398                 if (su != 0)
 2399                         uq->uq_inherited_pri = new_inherited_pri;
 2400                 pri = PRI_MAX;
 2401                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2402                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2403                         if (uq2 != NULL) {
 2404                                 if (pri > UPRI(uq2->uq_thread))
 2405                                         pri = UPRI(uq2->uq_thread);
 2406                         }
 2407                 }
 2408                 if (pri > uq->uq_inherited_pri)
 2409                         pri = uq->uq_inherited_pri;
 2410                 thread_lock(td);
 2411                 sched_lend_user_prio(td, pri);
 2412                 thread_unlock(td);
 2413                 mtx_unlock_spin(&umtx_lock);
 2414         }
 2415         umtx_key_release(&key);
 2416         return (error);
 2417 }
 2418 
 2419 static int
 2420 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
 2421         uint32_t *old_ceiling)
 2422 {
 2423         struct umtx_q *uq;
 2424         uint32_t save_ceiling;
 2425         uint32_t owner, id;
 2426         uint32_t flags;
 2427         int error;
 2428 
 2429         flags = fuword32(&m->m_flags);
 2430         if ((flags & UMUTEX_PRIO_PROTECT) == 0)
 2431                 return (EINVAL);
 2432         if (ceiling > RTP_PRIO_MAX)
 2433                 return (EINVAL);
 2434         id = td->td_tid;
 2435         uq = td->td_umtxq;
 2436         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2437            &uq->uq_key)) != 0)
 2438                 return (error);
 2439         for (;;) {
 2440                 umtxq_lock(&uq->uq_key);
 2441                 umtxq_busy(&uq->uq_key);
 2442                 umtxq_unlock(&uq->uq_key);
 2443 
 2444                 save_ceiling = fuword32(&m->m_ceilings[0]);
 2445 
 2446                 owner = casuword32(&m->m_owner,
 2447                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 2448 
 2449                 if (owner == UMUTEX_CONTESTED) {
 2450                         suword32(&m->m_ceilings[0], ceiling);
 2451                         suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 2452                                 UMUTEX_CONTESTED);
 2453                         error = 0;
 2454                         break;
 2455                 }
 2456 
 2457                 /* The address was invalid. */
 2458                 if (owner == -1) {
 2459                         error = EFAULT;
 2460                         break;
 2461                 }
 2462 
 2463                 if ((owner & ~UMUTEX_CONTESTED) == id) {
 2464                         suword32(&m->m_ceilings[0], ceiling);
 2465                         error = 0;
 2466                         break;
 2467                 }
 2468 
 2469                 /*
 2470                  * If we caught a signal, we have retried and now
 2471                  * exit immediately.
 2472                  */
 2473                 if (error != 0)
 2474                         break;
 2475 
 2476                 /*
 2477                  * We set the contested bit, sleep. Otherwise the lock changed
 2478                  * and we need to retry or we lost a race to the thread
 2479                  * unlocking the umtx.
 2480                  */
 2481                 umtxq_lock(&uq->uq_key);
 2482                 umtxq_insert(uq);
 2483                 umtxq_unbusy(&uq->uq_key);
 2484                 error = umtxq_sleep(uq, "umtxpp", NULL);
 2485                 umtxq_remove(uq);
 2486                 umtxq_unlock(&uq->uq_key);
 2487         }
 2488         umtxq_lock(&uq->uq_key);
 2489         if (error == 0)
 2490                 umtxq_signal(&uq->uq_key, INT_MAX);
 2491         umtxq_unbusy(&uq->uq_key);
 2492         umtxq_unlock(&uq->uq_key);
 2493         umtx_key_release(&uq->uq_key);
 2494         if (error == 0 && old_ceiling != NULL)
 2495                 suword32(old_ceiling, save_ceiling);
 2496         return (error);
 2497 }
 2498 
 2499 /*
 2500  * Lock a userland POSIX mutex.
 2501  */
 2502 static int
 2503 do_lock_umutex(struct thread *td, struct umutex *m,
 2504     struct _umtx_time *timeout, int mode)
 2505 {
 2506         uint32_t flags;
 2507         int error;
 2508 
 2509         flags = fuword32(&m->m_flags);
 2510         if (flags == -1)
 2511                 return (EFAULT);
 2512 
 2513         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2514         case 0:
 2515                 error = do_lock_normal(td, m, flags, timeout, mode);
 2516                 break;
 2517         case UMUTEX_PRIO_INHERIT:
 2518                 error = do_lock_pi(td, m, flags, timeout, mode);
 2519                 break;
 2520         case UMUTEX_PRIO_PROTECT:
 2521                 error = do_lock_pp(td, m, flags, timeout, mode);
 2522                 break;
 2523         default:
 2524                 return (EINVAL);
 2525         }
 2526         if (timeout == NULL) {
 2527                 if (error == EINTR && mode != _UMUTEX_WAIT)
 2528                         error = ERESTART;
 2529         } else {
 2530                 /* Timed-locking is not restarted. */
 2531                 if (error == ERESTART)
 2532                         error = EINTR;
 2533         }
 2534         return (error);
 2535 }
 2536 
 2537 /*
 2538  * Unlock a userland POSIX mutex.
 2539  */
 2540 static int
 2541 do_unlock_umutex(struct thread *td, struct umutex *m)
 2542 {
 2543         uint32_t flags;
 2544 
 2545         flags = fuword32(&m->m_flags);
 2546         if (flags == -1)
 2547                 return (EFAULT);
 2548 
 2549         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2550         case 0:
 2551                 return (do_unlock_normal(td, m, flags));
 2552         case UMUTEX_PRIO_INHERIT:
 2553                 return (do_unlock_pi(td, m, flags));
 2554         case UMUTEX_PRIO_PROTECT:
 2555                 return (do_unlock_pp(td, m, flags));
 2556         }
 2557 
 2558         return (EINVAL);
 2559 }
 2560 
 2561 static int
 2562 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
 2563         struct timespec *timeout, u_long wflags)
 2564 {
 2565         struct abs_timeout timo;
 2566         struct umtx_q *uq;
 2567         uint32_t flags;
 2568         uint32_t clockid;
 2569         int error;
 2570 
 2571         uq = td->td_umtxq;
 2572         flags = fuword32(&cv->c_flags);
 2573         error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
 2574         if (error != 0)
 2575                 return (error);
 2576 
 2577         if ((wflags & CVWAIT_CLOCKID) != 0) {
 2578                 clockid = fuword32(&cv->c_clockid);
 2579                 if (clockid < CLOCK_REALTIME ||
 2580                     clockid >= CLOCK_THREAD_CPUTIME_ID) {
 2581                         /* hmm, only HW clock id will work. */
 2582                         return (EINVAL);
 2583                 }
 2584         } else {
 2585                 clockid = CLOCK_REALTIME;
 2586         }
 2587 
 2588         umtxq_lock(&uq->uq_key);
 2589         umtxq_busy(&uq->uq_key);
 2590         umtxq_insert(uq);
 2591         umtxq_unlock(&uq->uq_key);
 2592 
 2593         /*
 2594          * Set c_has_waiters to 1 before releasing user mutex, also
 2595          * don't modify cache line when unnecessary.
 2596          */
 2597         if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
 2598                 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
 2599 
 2600         umtxq_lock(&uq->uq_key);
 2601         umtxq_unbusy(&uq->uq_key);
 2602         umtxq_unlock(&uq->uq_key);
 2603 
 2604         error = do_unlock_umutex(td, m);
 2605 
 2606         if (timeout != NULL)
 2607                 abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0),
 2608                         timeout);
 2609         
 2610         umtxq_lock(&uq->uq_key);
 2611         if (error == 0) {
 2612                 error = umtxq_sleep(uq, "ucond", timeout == NULL ?
 2613                     NULL : &timo);
 2614         }
 2615 
 2616         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 2617                 error = 0;
 2618         else {
 2619                 /*
 2620                  * This must be timeout,interrupted by signal or
 2621                  * surprious wakeup, clear c_has_waiter flag when
 2622                  * necessary.
 2623                  */
 2624                 umtxq_busy(&uq->uq_key);
 2625                 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
 2626                         int oldlen = uq->uq_cur_queue->length;
 2627                         umtxq_remove(uq);
 2628                         if (oldlen == 1) {
 2629                                 umtxq_unlock(&uq->uq_key);
 2630                                 suword32(
 2631                                     __DEVOLATILE(uint32_t *,
 2632                                          &cv->c_has_waiters), 0);
 2633                                 umtxq_lock(&uq->uq_key);
 2634                         }
 2635                 }
 2636                 umtxq_unbusy(&uq->uq_key);
 2637                 if (error == ERESTART)
 2638                         error = EINTR;
 2639         }
 2640 
 2641         umtxq_unlock(&uq->uq_key);
 2642         umtx_key_release(&uq->uq_key);
 2643         return (error);
 2644 }
 2645 
 2646 /*
 2647  * Signal a userland condition variable.
 2648  */
 2649 static int
 2650 do_cv_signal(struct thread *td, struct ucond *cv)
 2651 {
 2652         struct umtx_key key;
 2653         int error, cnt, nwake;
 2654         uint32_t flags;
 2655 
 2656         flags = fuword32(&cv->c_flags);
 2657         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2658                 return (error); 
 2659         umtxq_lock(&key);
 2660         umtxq_busy(&key);
 2661         cnt = umtxq_count(&key);
 2662         nwake = umtxq_signal(&key, 1);
 2663         if (cnt <= nwake) {
 2664                 umtxq_unlock(&key);
 2665                 error = suword32(
 2666                     __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 2667                 umtxq_lock(&key);
 2668         }
 2669         umtxq_unbusy(&key);
 2670         umtxq_unlock(&key);
 2671         umtx_key_release(&key);
 2672         return (error);
 2673 }
 2674 
 2675 static int
 2676 do_cv_broadcast(struct thread *td, struct ucond *cv)
 2677 {
 2678         struct umtx_key key;
 2679         int error;
 2680         uint32_t flags;
 2681 
 2682         flags = fuword32(&cv->c_flags);
 2683         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2684                 return (error); 
 2685 
 2686         umtxq_lock(&key);
 2687         umtxq_busy(&key);
 2688         umtxq_signal(&key, INT_MAX);
 2689         umtxq_unlock(&key);
 2690 
 2691         error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 2692 
 2693         umtxq_lock(&key);
 2694         umtxq_unbusy(&key);
 2695         umtxq_unlock(&key);
 2696 
 2697         umtx_key_release(&key);
 2698         return (error);
 2699 }
 2700 
 2701 static int
 2702 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout)
 2703 {
 2704         struct abs_timeout timo;
 2705         struct umtx_q *uq;
 2706         uint32_t flags, wrflags;
 2707         int32_t state, oldstate;
 2708         int32_t blocked_readers;
 2709         int error;
 2710 
 2711         uq = td->td_umtxq;
 2712         flags = fuword32(&rwlock->rw_flags);
 2713         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2714         if (error != 0)
 2715                 return (error);
 2716 
 2717         if (timeout != NULL)
 2718                 abs_timeout_init2(&timo, timeout);
 2719 
 2720         wrflags = URWLOCK_WRITE_OWNER;
 2721         if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
 2722                 wrflags |= URWLOCK_WRITE_WAITERS;
 2723 
 2724         for (;;) {
 2725                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2726                 /* try to lock it */
 2727                 while (!(state & wrflags)) {
 2728                         if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
 2729                                 umtx_key_release(&uq->uq_key);
 2730                                 return (EAGAIN);
 2731                         }
 2732                         oldstate = casuword32(&rwlock->rw_state, state, state + 1);
 2733                         if (oldstate == -1) {
 2734                                 umtx_key_release(&uq->uq_key);
 2735                                 return (EFAULT);
 2736                         }
 2737                         if (oldstate == state) {
 2738                                 umtx_key_release(&uq->uq_key);
 2739                                 return (0);
 2740                         }
 2741                         error = umtxq_check_susp(td);
 2742                         if (error != 0)
 2743                                 break;
 2744                         state = oldstate;
 2745                 }
 2746 
 2747                 if (error)
 2748                         break;
 2749 
 2750                 /* grab monitor lock */
 2751                 umtxq_lock(&uq->uq_key);
 2752                 umtxq_busy(&uq->uq_key);
 2753                 umtxq_unlock(&uq->uq_key);
 2754 
 2755                 /*
 2756                  * re-read the state, in case it changed between the try-lock above
 2757                  * and the check below
 2758                  */
 2759                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2760 
 2761                 /* set read contention bit */
 2762                 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
 2763                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
 2764                         if (oldstate == -1) {
 2765                                 error = EFAULT;
 2766                                 break;
 2767                         }
 2768                         if (oldstate == state)
 2769                                 goto sleep;
 2770                         state = oldstate;
 2771                         error = umtxq_check_susp(td);
 2772                         if (error != 0)
 2773                                 break;
 2774                 }
 2775                 if (error != 0) {
 2776                         umtxq_lock(&uq->uq_key);
 2777                         umtxq_unbusy(&uq->uq_key);
 2778                         umtxq_unlock(&uq->uq_key);
 2779                         break;
 2780                 }
 2781 
 2782                 /* state is changed while setting flags, restart */
 2783                 if (!(state & wrflags)) {
 2784                         umtxq_lock(&uq->uq_key);
 2785                         umtxq_unbusy(&uq->uq_key);
 2786                         umtxq_unlock(&uq->uq_key);
 2787                         error = umtxq_check_susp(td);
 2788                         if (error != 0)
 2789                                 break;
 2790                         continue;
 2791                 }
 2792 
 2793 sleep:
 2794                 /* contention bit is set, before sleeping, increase read waiter count */
 2795                 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2796                 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
 2797 
 2798                 while (state & wrflags) {
 2799                         umtxq_lock(&uq->uq_key);
 2800                         umtxq_insert(uq);
 2801                         umtxq_unbusy(&uq->uq_key);
 2802 
 2803                         error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
 2804                             NULL : &timo);
 2805 
 2806                         umtxq_busy(&uq->uq_key);
 2807                         umtxq_remove(uq);
 2808                         umtxq_unlock(&uq->uq_key);
 2809                         if (error)
 2810                                 break;
 2811                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2812                 }
 2813 
 2814                 /* decrease read waiter count, and may clear read contention bit */
 2815                 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2816                 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
 2817                 if (blocked_readers == 1) {
 2818                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2819                         for (;;) {
 2820                                 oldstate = casuword32(&rwlock->rw_state, state,
 2821                                          state & ~URWLOCK_READ_WAITERS);
 2822                                 if (oldstate == -1) {
 2823                                         error = EFAULT;
 2824                                         break;
 2825                                 }
 2826                                 if (oldstate == state)
 2827                                         break;
 2828                                 state = oldstate;
 2829                                 error = umtxq_check_susp(td);
 2830                                 if (error != 0)
 2831                                         break;
 2832                         }
 2833                 }
 2834 
 2835                 umtxq_lock(&uq->uq_key);
 2836                 umtxq_unbusy(&uq->uq_key);
 2837                 umtxq_unlock(&uq->uq_key);
 2838                 if (error != 0)
 2839                         break;
 2840         }
 2841         umtx_key_release(&uq->uq_key);
 2842         if (error == ERESTART)
 2843                 error = EINTR;
 2844         return (error);
 2845 }
 2846 
 2847 static int
 2848 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
 2849 {
 2850         struct abs_timeout timo;
 2851         struct umtx_q *uq;
 2852         uint32_t flags;
 2853         int32_t state, oldstate;
 2854         int32_t blocked_writers;
 2855         int32_t blocked_readers;
 2856         int error;
 2857 
 2858         uq = td->td_umtxq;
 2859         flags = fuword32(&rwlock->rw_flags);
 2860         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2861         if (error != 0)
 2862                 return (error);
 2863 
 2864         if (timeout != NULL)
 2865                 abs_timeout_init2(&timo, timeout);
 2866 
 2867         blocked_readers = 0;
 2868         for (;;) {
 2869                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2870                 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 2871                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
 2872                         if (oldstate == -1) {
 2873                                 umtx_key_release(&uq->uq_key);
 2874                                 return (EFAULT);
 2875                         }
 2876                         if (oldstate == state) {
 2877                                 umtx_key_release(&uq->uq_key);
 2878                                 return (0);
 2879                         }
 2880                         state = oldstate;
 2881                         error = umtxq_check_susp(td);
 2882                         if (error != 0)
 2883                                 break;
 2884                 }
 2885 
 2886                 if (error) {
 2887                         if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
 2888                             blocked_readers != 0) {
 2889                                 umtxq_lock(&uq->uq_key);
 2890                                 umtxq_busy(&uq->uq_key);
 2891                                 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
 2892                                 umtxq_unbusy(&uq->uq_key);
 2893                                 umtxq_unlock(&uq->uq_key);
 2894                         }
 2895 
 2896                         break;
 2897                 }
 2898 
 2899                 /* grab monitor lock */
 2900                 umtxq_lock(&uq->uq_key);
 2901                 umtxq_busy(&uq->uq_key);
 2902                 umtxq_unlock(&uq->uq_key);
 2903 
 2904                 /*
 2905                  * re-read the state, in case it changed between the try-lock above
 2906                  * and the check below
 2907                  */
 2908                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2909 
 2910                 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
 2911                        (state & URWLOCK_WRITE_WAITERS) == 0) {
 2912                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
 2913                         if (oldstate == -1) {
 2914                                 error = EFAULT;
 2915                                 break;
 2916                         }
 2917                         if (oldstate == state)
 2918                                 goto sleep;
 2919                         state = oldstate;
 2920                         error = umtxq_check_susp(td);
 2921                         if (error != 0)
 2922                                 break;
 2923                 }
 2924                 if (error != 0) {
 2925                         umtxq_lock(&uq->uq_key);
 2926                         umtxq_unbusy(&uq->uq_key);
 2927                         umtxq_unlock(&uq->uq_key);
 2928                         break;
 2929                 }
 2930 
 2931                 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 2932                         umtxq_lock(&uq->uq_key);
 2933                         umtxq_unbusy(&uq->uq_key);
 2934                         umtxq_unlock(&uq->uq_key);
 2935                         error = umtxq_check_susp(td);
 2936                         if (error != 0)
 2937                                 break;
 2938                         continue;
 2939                 }
 2940 sleep:
 2941                 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 2942                 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
 2943 
 2944                 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
 2945                         umtxq_lock(&uq->uq_key);
 2946                         umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 2947                         umtxq_unbusy(&uq->uq_key);
 2948 
 2949                         error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
 2950                             NULL : &timo);
 2951 
 2952                         umtxq_busy(&uq->uq_key);
 2953                         umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 2954                         umtxq_unlock(&uq->uq_key);
 2955                         if (error)
 2956                                 break;
 2957                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2958                 }
 2959 
 2960                 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 2961                 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
 2962                 if (blocked_writers == 1) {
 2963                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2964                         for (;;) {
 2965                                 oldstate = casuword32(&rwlock->rw_state, state,
 2966                                          state & ~URWLOCK_WRITE_WAITERS);
 2967                                 if (oldstate == -1) {
 2968                                         error = EFAULT;
 2969                                         break;
 2970                                 }
 2971                                 if (oldstate == state)
 2972                                         break;
 2973                                 state = oldstate;
 2974                                 error = umtxq_check_susp(td);
 2975                                 /*
 2976                                  * We are leaving the URWLOCK_WRITE_WAITERS
 2977                                  * behind, but this should not harm the
 2978                                  * correctness.
 2979                                  */
 2980                                 if (error != 0)
 2981                                         break;
 2982                         }
 2983                         blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2984                 } else
 2985                         blocked_readers = 0;
 2986 
 2987                 umtxq_lock(&uq->uq_key);
 2988                 umtxq_unbusy(&uq->uq_key);
 2989                 umtxq_unlock(&uq->uq_key);
 2990         }
 2991 
 2992         umtx_key_release(&uq->uq_key);
 2993         if (error == ERESTART)
 2994                 error = EINTR;
 2995         return (error);
 2996 }
 2997 
 2998 static int
 2999 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
 3000 {
 3001         struct umtx_q *uq;
 3002         uint32_t flags;
 3003         int32_t state, oldstate;
 3004         int error, q, count;
 3005 
 3006         uq = td->td_umtxq;
 3007         flags = fuword32(&rwlock->rw_flags);
 3008         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 3009         if (error != 0)
 3010                 return (error);
 3011 
 3012         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 3013         if (state & URWLOCK_WRITE_OWNER) {
 3014                 for (;;) {
 3015                         oldstate = casuword32(&rwlock->rw_state, state, 
 3016                                 state & ~URWLOCK_WRITE_OWNER);
 3017                         if (oldstate == -1) {
 3018                                 error = EFAULT;
 3019                                 goto out;
 3020                         }
 3021                         if (oldstate != state) {
 3022                                 state = oldstate;
 3023                                 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
 3024                                         error = EPERM;
 3025                                         goto out;
 3026                                 }
 3027                                 error = umtxq_check_susp(td);
 3028                                 if (error != 0)
 3029                                         goto out;
 3030                         } else
 3031                                 break;
 3032                 }
 3033         } else if (URWLOCK_READER_COUNT(state) != 0) {
 3034                 for (;;) {
 3035                         oldstate = casuword32(&rwlock->rw_state, state,
 3036                                 state - 1);
 3037                         if (oldstate == -1) {
 3038                                 error = EFAULT;
 3039                                 goto out;
 3040                         }
 3041                         if (oldstate != state) {
 3042                                 state = oldstate;
 3043                                 if (URWLOCK_READER_COUNT(oldstate) == 0) {
 3044                                         error = EPERM;
 3045                                         goto out;
 3046                                 }
 3047                                 error = umtxq_check_susp(td);
 3048                                 if (error != 0)
 3049                                         goto out;
 3050                         } else
 3051                                 break;
 3052                 }
 3053         } else {
 3054                 error = EPERM;
 3055                 goto out;
 3056         }
 3057 
 3058         count = 0;
 3059 
 3060         if (!(flags & URWLOCK_PREFER_READER)) {
 3061                 if (state & URWLOCK_WRITE_WAITERS) {
 3062                         count = 1;
 3063                         q = UMTX_EXCLUSIVE_QUEUE;
 3064                 } else if (state & URWLOCK_READ_WAITERS) {
 3065                         count = INT_MAX;
 3066                         q = UMTX_SHARED_QUEUE;
 3067                 }
 3068         } else {
 3069                 if (state & URWLOCK_READ_WAITERS) {
 3070                         count = INT_MAX;
 3071                         q = UMTX_SHARED_QUEUE;
 3072                 } else if (state & URWLOCK_WRITE_WAITERS) {
 3073                         count = 1;
 3074                         q = UMTX_EXCLUSIVE_QUEUE;
 3075                 }
 3076         }
 3077 
 3078         if (count) {
 3079                 umtxq_lock(&uq->uq_key);
 3080                 umtxq_busy(&uq->uq_key);
 3081                 umtxq_signal_queue(&uq->uq_key, count, q);
 3082                 umtxq_unbusy(&uq->uq_key);
 3083                 umtxq_unlock(&uq->uq_key);
 3084         }
 3085 out:
 3086         umtx_key_release(&uq->uq_key);
 3087         return (error);
 3088 }
 3089 
 3090 static int
 3091 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
 3092 {
 3093         struct abs_timeout timo;
 3094         struct umtx_q *uq;
 3095         uint32_t flags, count;
 3096         int error;
 3097 
 3098         uq = td->td_umtxq;
 3099         flags = fuword32(&sem->_flags);
 3100         error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
 3101         if (error != 0)
 3102                 return (error);
 3103 
 3104         if (timeout != NULL)
 3105                 abs_timeout_init2(&timo, timeout);
 3106 
 3107         umtxq_lock(&uq->uq_key);
 3108         umtxq_busy(&uq->uq_key);
 3109         umtxq_insert(uq);
 3110         umtxq_unlock(&uq->uq_key);
 3111         casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
 3112         count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
 3113         if (count != 0) {
 3114                 umtxq_lock(&uq->uq_key);
 3115                 umtxq_unbusy(&uq->uq_key);
 3116                 umtxq_remove(uq);
 3117                 umtxq_unlock(&uq->uq_key);
 3118                 umtx_key_release(&uq->uq_key);
 3119                 return (0);
 3120         }
 3121         umtxq_lock(&uq->uq_key);
 3122         umtxq_unbusy(&uq->uq_key);
 3123 
 3124         error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
 3125 
 3126         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 3127                 error = 0;
 3128         else {
 3129                 umtxq_remove(uq);
 3130                 /* A relative timeout cannot be restarted. */
 3131                 if (error == ERESTART && timeout != NULL &&
 3132                     (timeout->_flags & UMTX_ABSTIME) == 0)
 3133                         error = EINTR;
 3134         }
 3135         umtxq_unlock(&uq->uq_key);
 3136         umtx_key_release(&uq->uq_key);
 3137         return (error);
 3138 }
 3139 
 3140 /*
 3141  * Signal a userland condition variable.
 3142  */
 3143 static int
 3144 do_sem_wake(struct thread *td, struct _usem *sem)
 3145 {
 3146         struct umtx_key key;
 3147         int error, cnt;
 3148         uint32_t flags;
 3149 
 3150         flags = fuword32(&sem->_flags);
 3151         if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
 3152                 return (error); 
 3153         umtxq_lock(&key);
 3154         umtxq_busy(&key);
 3155         cnt = umtxq_count(&key);
 3156         if (cnt > 0) {
 3157                 umtxq_signal(&key, 1);
 3158                 /*
 3159                  * Check if count is greater than 0, this means the memory is
 3160                  * still being referenced by user code, so we can safely
 3161                  * update _has_waiters flag.
 3162                  */
 3163                 if (cnt == 1) {
 3164                         umtxq_unlock(&key);
 3165                         error = suword32(
 3166                             __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
 3167                         umtxq_lock(&key);
 3168                 }
 3169         }
 3170         umtxq_unbusy(&key);
 3171         umtxq_unlock(&key);
 3172         umtx_key_release(&key);
 3173         return (error);
 3174 }
 3175 
 3176 int
 3177 sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
 3178     /* struct umtx *umtx */
 3179 {
 3180         return do_lock_umtx(td, uap->umtx, td->td_tid, 0);
 3181 }
 3182 
 3183 int
 3184 sys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
 3185     /* struct umtx *umtx */
 3186 {
 3187         return do_unlock_umtx(td, uap->umtx, td->td_tid);
 3188 }
 3189 
 3190 inline int
 3191 umtx_copyin_timeout(const void *addr, struct timespec *tsp)
 3192 {
 3193         int error;
 3194 
 3195         error = copyin(addr, tsp, sizeof(struct timespec));
 3196         if (error == 0) {
 3197                 if (tsp->tv_sec < 0 ||
 3198                     tsp->tv_nsec >= 1000000000 ||
 3199                     tsp->tv_nsec < 0)
 3200                         error = EINVAL;
 3201         }
 3202         return (error);
 3203 }
 3204 
 3205 static inline int
 3206 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp)
 3207 {
 3208         int error;
 3209         
 3210         if (size <= sizeof(struct timespec)) {
 3211                 tp->_clockid = CLOCK_REALTIME;
 3212                 tp->_flags = 0;
 3213                 error = copyin(addr, &tp->_timeout, sizeof(struct timespec));
 3214         } else 
 3215                 error = copyin(addr, tp, sizeof(struct _umtx_time));
 3216         if (error != 0)
 3217                 return (error);
 3218         if (tp->_timeout.tv_sec < 0 ||
 3219             tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
 3220                 return (EINVAL);
 3221         return (0);
 3222 }
 3223 
 3224 static int
 3225 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
 3226 {
 3227         struct timespec *ts, timeout;
 3228         int error;
 3229 
 3230         /* Allow a null timespec (wait forever). */
 3231         if (uap->uaddr2 == NULL)
 3232                 ts = NULL;
 3233         else {
 3234                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3235                 if (error != 0)
 3236                         return (error);
 3237                 ts = &timeout;
 3238         }
 3239         return (do_lock_umtx(td, uap->obj, uap->val, ts));
 3240 }
 3241 
 3242 static int
 3243 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
 3244 {
 3245         return (do_unlock_umtx(td, uap->obj, uap->val));
 3246 }
 3247 
 3248 static int
 3249 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
 3250 {
 3251         struct _umtx_time timeout, *tm_p;
 3252         int error;
 3253 
 3254         if (uap->uaddr2 == NULL)
 3255                 tm_p = NULL;
 3256         else {
 3257                 error = umtx_copyin_umtx_time(
 3258                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3259                 if (error != 0)
 3260                         return (error);
 3261                 tm_p = &timeout;
 3262         }
 3263         return do_wait(td, uap->obj, uap->val, tm_p, 0, 0);
 3264 }
 3265 
 3266 static int
 3267 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
 3268 {
 3269         struct _umtx_time timeout, *tm_p;
 3270         int error;
 3271 
 3272         if (uap->uaddr2 == NULL)
 3273                 tm_p = NULL;
 3274         else {
 3275                 error = umtx_copyin_umtx_time(
 3276                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3277                 if (error != 0)
 3278                         return (error);
 3279                 tm_p = &timeout;
 3280         }
 3281         return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
 3282 }
 3283 
 3284 static int
 3285 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
 3286 {
 3287         struct _umtx_time *tm_p, timeout;
 3288         int error;
 3289 
 3290         if (uap->uaddr2 == NULL)
 3291                 tm_p = NULL;
 3292         else {
 3293                 error = umtx_copyin_umtx_time(
 3294                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3295                 if (error != 0)
 3296                         return (error);
 3297                 tm_p = &timeout;
 3298         }
 3299         return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
 3300 }
 3301 
 3302 static int
 3303 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
 3304 {
 3305         return (kern_umtx_wake(td, uap->obj, uap->val, 0));
 3306 }
 3307 
 3308 #define BATCH_SIZE      128
 3309 static int
 3310 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
 3311 {
 3312         int count = uap->val;
 3313         void *uaddrs[BATCH_SIZE];
 3314         char **upp = (char **)uap->obj;
 3315         int tocopy;
 3316         int error = 0;
 3317         int i, pos = 0;
 3318 
 3319         while (count > 0) {
 3320                 tocopy = count;
 3321                 if (tocopy > BATCH_SIZE)
 3322                         tocopy = BATCH_SIZE;
 3323                 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
 3324                 if (error != 0)
 3325                         break;
 3326                 for (i = 0; i < tocopy; ++i)
 3327                         kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
 3328                 count -= tocopy;
 3329                 pos += tocopy;
 3330         }
 3331         return (error);
 3332 }
 3333 
 3334 static int
 3335 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
 3336 {
 3337         return (kern_umtx_wake(td, uap->obj, uap->val, 1));
 3338 }
 3339 
 3340 static int
 3341 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
 3342 {
 3343         struct _umtx_time *tm_p, timeout;
 3344         int error;
 3345 
 3346         /* Allow a null timespec (wait forever). */
 3347         if (uap->uaddr2 == NULL)
 3348                 tm_p = NULL;
 3349         else {
 3350                 error = umtx_copyin_umtx_time(
 3351                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3352                 if (error != 0)
 3353                         return (error);
 3354                 tm_p = &timeout;
 3355         }
 3356         return do_lock_umutex(td, uap->obj, tm_p, 0);
 3357 }
 3358 
 3359 static int
 3360 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
 3361 {
 3362         return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
 3363 }
 3364 
 3365 static int
 3366 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
 3367 {
 3368         struct _umtx_time *tm_p, timeout;
 3369         int error;
 3370 
 3371         /* Allow a null timespec (wait forever). */
 3372         if (uap->uaddr2 == NULL)
 3373                 tm_p = NULL;
 3374         else {
 3375                 error = umtx_copyin_umtx_time(
 3376                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3377                 if (error != 0)
 3378                         return (error);
 3379                 tm_p = &timeout;
 3380         }
 3381         return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
 3382 }
 3383 
 3384 static int
 3385 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
 3386 {
 3387         return do_wake_umutex(td, uap->obj);
 3388 }
 3389 
 3390 static int
 3391 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
 3392 {
 3393         return do_unlock_umutex(td, uap->obj);
 3394 }
 3395 
 3396 static int
 3397 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
 3398 {
 3399         return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
 3400 }
 3401 
 3402 static int
 3403 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
 3404 {
 3405         struct timespec *ts, timeout;
 3406         int error;
 3407 
 3408         /* Allow a null timespec (wait forever). */
 3409         if (uap->uaddr2 == NULL)
 3410                 ts = NULL;
 3411         else {
 3412                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3413                 if (error != 0)
 3414                         return (error);
 3415                 ts = &timeout;
 3416         }
 3417         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 3418 }
 3419 
 3420 static int
 3421 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
 3422 {
 3423         return do_cv_signal(td, uap->obj);
 3424 }
 3425 
 3426 static int
 3427 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
 3428 {
 3429         return do_cv_broadcast(td, uap->obj);
 3430 }
 3431 
 3432 static int
 3433 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
 3434 {
 3435         struct _umtx_time timeout;
 3436         int error;
 3437 
 3438         /* Allow a null timespec (wait forever). */
 3439         if (uap->uaddr2 == NULL) {
 3440                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 3441         } else {
 3442                 error = umtx_copyin_umtx_time(uap->uaddr2,
 3443                    (size_t)uap->uaddr1, &timeout);
 3444                 if (error != 0)
 3445                         return (error);
 3446                 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
 3447         }
 3448         return (error);
 3449 }
 3450 
 3451 static int
 3452 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
 3453 {
 3454         struct _umtx_time timeout;
 3455         int error;
 3456 
 3457         /* Allow a null timespec (wait forever). */
 3458         if (uap->uaddr2 == NULL) {
 3459                 error = do_rw_wrlock(td, uap->obj, 0);
 3460         } else {
 3461                 error = umtx_copyin_umtx_time(uap->uaddr2, 
 3462                    (size_t)uap->uaddr1, &timeout);
 3463                 if (error != 0)
 3464                         return (error);
 3465 
 3466                 error = do_rw_wrlock(td, uap->obj, &timeout);
 3467         }
 3468         return (error);
 3469 }
 3470 
 3471 static int
 3472 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
 3473 {
 3474         return do_rw_unlock(td, uap->obj);
 3475 }
 3476 
 3477 static int
 3478 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
 3479 {
 3480         struct _umtx_time *tm_p, timeout;
 3481         int error;
 3482 
 3483         /* Allow a null timespec (wait forever). */
 3484         if (uap->uaddr2 == NULL)
 3485                 tm_p = NULL;
 3486         else {
 3487                 error = umtx_copyin_umtx_time(
 3488                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3489                 if (error != 0)
 3490                         return (error);
 3491                 tm_p = &timeout;
 3492         }
 3493         return (do_sem_wait(td, uap->obj, tm_p));
 3494 }
 3495 
 3496 static int
 3497 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
 3498 {
 3499         return do_sem_wake(td, uap->obj);
 3500 }
 3501 
 3502 static int
 3503 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
 3504 {
 3505         return do_wake2_umutex(td, uap->obj, uap->val);
 3506 }
 3507 
 3508 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
 3509 
 3510 static _umtx_op_func op_table[] = {
 3511         __umtx_op_lock_umtx,            /* UMTX_OP_LOCK */
 3512         __umtx_op_unlock_umtx,          /* UMTX_OP_UNLOCK */
 3513         __umtx_op_wait,                 /* UMTX_OP_WAIT */
 3514         __umtx_op_wake,                 /* UMTX_OP_WAKE */
 3515         __umtx_op_trylock_umutex,       /* UMTX_OP_MUTEX_TRYLOCK */
 3516         __umtx_op_lock_umutex,          /* UMTX_OP_MUTEX_LOCK */
 3517         __umtx_op_unlock_umutex,        /* UMTX_OP_MUTEX_UNLOCK */
 3518         __umtx_op_set_ceiling,          /* UMTX_OP_SET_CEILING */
 3519         __umtx_op_cv_wait,              /* UMTX_OP_CV_WAIT*/
 3520         __umtx_op_cv_signal,            /* UMTX_OP_CV_SIGNAL */
 3521         __umtx_op_cv_broadcast,         /* UMTX_OP_CV_BROADCAST */
 3522         __umtx_op_wait_uint,            /* UMTX_OP_WAIT_UINT */
 3523         __umtx_op_rw_rdlock,            /* UMTX_OP_RW_RDLOCK */
 3524         __umtx_op_rw_wrlock,            /* UMTX_OP_RW_WRLOCK */
 3525         __umtx_op_rw_unlock,            /* UMTX_OP_RW_UNLOCK */
 3526         __umtx_op_wait_uint_private,    /* UMTX_OP_WAIT_UINT_PRIVATE */
 3527         __umtx_op_wake_private,         /* UMTX_OP_WAKE_PRIVATE */
 3528         __umtx_op_wait_umutex,          /* UMTX_OP_UMUTEX_WAIT */
 3529         __umtx_op_wake_umutex,          /* UMTX_OP_UMUTEX_WAKE */
 3530         __umtx_op_sem_wait,             /* UMTX_OP_SEM_WAIT */
 3531         __umtx_op_sem_wake,             /* UMTX_OP_SEM_WAKE */
 3532         __umtx_op_nwake_private,        /* UMTX_OP_NWAKE_PRIVATE */
 3533         __umtx_op_wake2_umutex          /* UMTX_OP_UMUTEX_WAKE2 */
 3534 };
 3535 
 3536 int
 3537 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
 3538 {
 3539         if ((unsigned)uap->op < UMTX_OP_MAX)
 3540                 return (*op_table[uap->op])(td, uap);
 3541         return (EINVAL);
 3542 }
 3543 
 3544 #ifdef COMPAT_FREEBSD32
 3545 int
 3546 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
 3547     /* struct umtx *umtx */
 3548 {
 3549         return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
 3550 }
 3551 
 3552 int
 3553 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
 3554     /* struct umtx *umtx */
 3555 {
 3556         return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
 3557 }
 3558 
 3559 struct timespec32 {
 3560         int32_t tv_sec;
 3561         int32_t tv_nsec;
 3562 };
 3563 
 3564 struct umtx_time32 {
 3565         struct  timespec32      timeout;
 3566         uint32_t                flags;
 3567         uint32_t                clockid;
 3568 };
 3569 
 3570 static inline int
 3571 umtx_copyin_timeout32(void *addr, struct timespec *tsp)
 3572 {
 3573         struct timespec32 ts32;
 3574         int error;
 3575 
 3576         error = copyin(addr, &ts32, sizeof(struct timespec32));
 3577         if (error == 0) {
 3578                 if (ts32.tv_sec < 0 ||
 3579                     ts32.tv_nsec >= 1000000000 ||
 3580                     ts32.tv_nsec < 0)
 3581                         error = EINVAL;
 3582                 else {
 3583                         tsp->tv_sec = ts32.tv_sec;
 3584                         tsp->tv_nsec = ts32.tv_nsec;
 3585                 }
 3586         }
 3587         return (error);
 3588 }
 3589 
 3590 static inline int
 3591 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp)
 3592 {
 3593         struct umtx_time32 t32;
 3594         int error;
 3595         
 3596         t32.clockid = CLOCK_REALTIME;
 3597         t32.flags   = 0;
 3598         if (size <= sizeof(struct timespec32))
 3599                 error = copyin(addr, &t32.timeout, sizeof(struct timespec32));
 3600         else 
 3601                 error = copyin(addr, &t32, sizeof(struct umtx_time32));
 3602         if (error != 0)
 3603                 return (error);
 3604         if (t32.timeout.tv_sec < 0 ||
 3605             t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0)
 3606                 return (EINVAL);
 3607         tp->_timeout.tv_sec = t32.timeout.tv_sec;
 3608         tp->_timeout.tv_nsec = t32.timeout.tv_nsec;
 3609         tp->_flags = t32.flags;
 3610         tp->_clockid = t32.clockid;
 3611         return (0);
 3612 }
 3613 
 3614 static int
 3615 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 3616 {
 3617         struct timespec *ts, timeout;
 3618         int error;
 3619 
 3620         /* Allow a null timespec (wait forever). */
 3621         if (uap->uaddr2 == NULL)
 3622                 ts = NULL;
 3623         else {
 3624                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3625                 if (error != 0)
 3626                         return (error);
 3627                 ts = &timeout;
 3628         }
 3629         return (do_lock_umtx32(td, uap->obj, uap->val, ts));
 3630 }
 3631 
 3632 static int
 3633 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 3634 {
 3635         return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
 3636 }
 3637 
 3638 static int
 3639 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3640 {
 3641         struct _umtx_time *tm_p, timeout;
 3642         int error;
 3643 
 3644         if (uap->uaddr2 == NULL)
 3645                 tm_p = NULL;
 3646         else {
 3647                 error = umtx_copyin_umtx_time32(uap->uaddr2,
 3648                         (size_t)uap->uaddr1, &timeout);
 3649                 if (error != 0)
 3650                         return (error);
 3651                 tm_p = &timeout;
 3652         }
 3653         return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
 3654 }
 3655 
 3656 static int
 3657 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 3658 {
 3659         struct _umtx_time *tm_p, timeout;
 3660         int error;
 3661 
 3662         /* Allow a null timespec (wait forever). */
 3663         if (uap->uaddr2 == NULL)
 3664                 tm_p = NULL;
 3665         else {
 3666                 error = umtx_copyin_umtx_time(uap->uaddr2,
 3667                             (size_t)uap->uaddr1, &timeout);
 3668                 if (error != 0)
 3669                         return (error);
 3670                 tm_p = &timeout;
 3671         }
 3672         return do_lock_umutex(td, uap->obj, tm_p, 0);
 3673 }
 3674 
 3675 static int
 3676 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 3677 {
 3678         struct _umtx_time *tm_p, timeout;
 3679         int error;
 3680 
 3681         /* Allow a null timespec (wait forever). */
 3682         if (uap->uaddr2 == NULL)
 3683                 tm_p = NULL;
 3684         else {
 3685                 error = umtx_copyin_umtx_time32(uap->uaddr2, 
 3686                     (size_t)uap->uaddr1, &timeout);
 3687                 if (error != 0)
 3688                         return (error);
 3689                 tm_p = &timeout;
 3690         }
 3691         return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
 3692 }
 3693 
 3694 static int
 3695 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3696 {
 3697         struct timespec *ts, timeout;
 3698         int error;
 3699 
 3700         /* Allow a null timespec (wait forever). */
 3701         if (uap->uaddr2 == NULL)
 3702                 ts = NULL;
 3703         else {
 3704                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3705                 if (error != 0)
 3706                         return (error);
 3707                 ts = &timeout;
 3708         }
 3709         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 3710 }
 3711 
 3712 static int
 3713 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 3714 {
 3715         struct _umtx_time timeout;
 3716         int error;
 3717 
 3718         /* Allow a null timespec (wait forever). */
 3719         if (uap->uaddr2 == NULL) {
 3720                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 3721         } else {
 3722                 error = umtx_copyin_umtx_time32(uap->uaddr2,
 3723                     (size_t)uap->uaddr1, &timeout);
 3724                 if (error != 0)
 3725                         return (error);
 3726                 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
 3727         }
 3728         return (error);
 3729 }
 3730 
 3731 static int
 3732 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 3733 {
 3734         struct _umtx_time timeout;
 3735         int error;
 3736 
 3737         /* Allow a null timespec (wait forever). */
 3738         if (uap->uaddr2 == NULL) {
 3739                 error = do_rw_wrlock(td, uap->obj, 0);
 3740         } else {
 3741                 error = umtx_copyin_umtx_time32(uap->uaddr2,
 3742                     (size_t)uap->uaddr1, &timeout);
 3743                 if (error != 0)
 3744                         return (error);
 3745                 error = do_rw_wrlock(td, uap->obj, &timeout);
 3746         }
 3747         return (error);
 3748 }
 3749 
 3750 static int
 3751 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
 3752 {
 3753         struct _umtx_time *tm_p, timeout;
 3754         int error;
 3755 
 3756         if (uap->uaddr2 == NULL)
 3757                 tm_p = NULL;
 3758         else {
 3759                 error = umtx_copyin_umtx_time32(
 3760                     uap->uaddr2, (size_t)uap->uaddr1,&timeout);
 3761                 if (error != 0)
 3762                         return (error);
 3763                 tm_p = &timeout;
 3764         }
 3765         return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
 3766 }
 3767 
 3768 static int
 3769 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3770 {
 3771         struct _umtx_time *tm_p, timeout;
 3772         int error;
 3773 
 3774         /* Allow a null timespec (wait forever). */
 3775         if (uap->uaddr2 == NULL)
 3776                 tm_p = NULL;
 3777         else {
 3778                 error = umtx_copyin_umtx_time32(uap->uaddr2,
 3779                     (size_t)uap->uaddr1, &timeout);
 3780                 if (error != 0)
 3781                         return (error);
 3782                 tm_p = &timeout;
 3783         }
 3784         return (do_sem_wait(td, uap->obj, tm_p));
 3785 }
 3786 
 3787 static int
 3788 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
 3789 {
 3790         int count = uap->val;
 3791         uint32_t uaddrs[BATCH_SIZE];
 3792         uint32_t **upp = (uint32_t **)uap->obj;
 3793         int tocopy;
 3794         int error = 0;
 3795         int i, pos = 0;
 3796 
 3797         while (count > 0) {
 3798                 tocopy = count;
 3799                 if (tocopy > BATCH_SIZE)
 3800                         tocopy = BATCH_SIZE;
 3801                 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
 3802                 if (error != 0)
 3803                         break;
 3804                 for (i = 0; i < tocopy; ++i)
 3805                         kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
 3806                                 INT_MAX, 1);
 3807                 count -= tocopy;
 3808                 pos += tocopy;
 3809         }
 3810         return (error);
 3811 }
 3812 
 3813 static _umtx_op_func op_table_compat32[] = {
 3814         __umtx_op_lock_umtx_compat32,   /* UMTX_OP_LOCK */
 3815         __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
 3816         __umtx_op_wait_compat32,        /* UMTX_OP_WAIT */
 3817         __umtx_op_wake,                 /* UMTX_OP_WAKE */
 3818         __umtx_op_trylock_umutex,       /* UMTX_OP_MUTEX_LOCK */
 3819         __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
 3820         __umtx_op_unlock_umutex,        /* UMTX_OP_MUTEX_UNLOCK */
 3821         __umtx_op_set_ceiling,          /* UMTX_OP_SET_CEILING */
 3822         __umtx_op_cv_wait_compat32,     /* UMTX_OP_CV_WAIT*/
 3823         __umtx_op_cv_signal,            /* UMTX_OP_CV_SIGNAL */
 3824         __umtx_op_cv_broadcast,         /* UMTX_OP_CV_BROADCAST */
 3825         __umtx_op_wait_compat32,        /* UMTX_OP_WAIT_UINT */
 3826         __umtx_op_rw_rdlock_compat32,   /* UMTX_OP_RW_RDLOCK */
 3827         __umtx_op_rw_wrlock_compat32,   /* UMTX_OP_RW_WRLOCK */
 3828         __umtx_op_rw_unlock,            /* UMTX_OP_RW_UNLOCK */
 3829         __umtx_op_wait_uint_private_compat32,   /* UMTX_OP_WAIT_UINT_PRIVATE */
 3830         __umtx_op_wake_private,         /* UMTX_OP_WAKE_PRIVATE */
 3831         __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
 3832         __umtx_op_wake_umutex,          /* UMTX_OP_UMUTEX_WAKE */
 3833         __umtx_op_sem_wait_compat32,    /* UMTX_OP_SEM_WAIT */
 3834         __umtx_op_sem_wake,             /* UMTX_OP_SEM_WAKE */
 3835         __umtx_op_nwake_private32,      /* UMTX_OP_NWAKE_PRIVATE */
 3836         __umtx_op_wake2_umutex          /* UMTX_OP_UMUTEX_WAKE2 */
 3837 };
 3838 
 3839 int
 3840 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
 3841 {
 3842         if ((unsigned)uap->op < UMTX_OP_MAX)
 3843                 return (*op_table_compat32[uap->op])(td,
 3844                         (struct _umtx_op_args *)uap);
 3845         return (EINVAL);
 3846 }
 3847 #endif
 3848 
 3849 void
 3850 umtx_thread_init(struct thread *td)
 3851 {
 3852         td->td_umtxq = umtxq_alloc();
 3853         td->td_umtxq->uq_thread = td;
 3854 }
 3855 
 3856 void
 3857 umtx_thread_fini(struct thread *td)
 3858 {
 3859         umtxq_free(td->td_umtxq);
 3860 }
 3861 
 3862 /*
 3863  * It will be called when new thread is created, e.g fork().
 3864  */
 3865 void
 3866 umtx_thread_alloc(struct thread *td)
 3867 {
 3868         struct umtx_q *uq;
 3869 
 3870         uq = td->td_umtxq;
 3871         uq->uq_inherited_pri = PRI_MAX;
 3872 
 3873         KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
 3874         KASSERT(uq->uq_thread == td, ("uq_thread != td"));
 3875         KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
 3876         KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
 3877 }
 3878 
 3879 /*
 3880  * exec() hook.
 3881  */
 3882 static void
 3883 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
 3884         struct image_params *imgp __unused)
 3885 {
 3886         umtx_thread_cleanup(curthread);
 3887 }
 3888 
 3889 /*
 3890  * thread_exit() hook.
 3891  */
 3892 void
 3893 umtx_thread_exit(struct thread *td)
 3894 {
 3895         umtx_thread_cleanup(td);
 3896 }
 3897 
 3898 /*
 3899  * clean up umtx data.
 3900  */
 3901 static void
 3902 umtx_thread_cleanup(struct thread *td)
 3903 {
 3904         struct umtx_q *uq;
 3905         struct umtx_pi *pi;
 3906 
 3907         if ((uq = td->td_umtxq) == NULL)
 3908                 return;
 3909 
 3910         mtx_lock_spin(&umtx_lock);
 3911         uq->uq_inherited_pri = PRI_MAX;
 3912         while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
 3913                 pi->pi_owner = NULL;
 3914                 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
 3915         }
 3916         mtx_unlock_spin(&umtx_lock);
 3917         thread_lock(td);
 3918         sched_lend_user_prio(td, PRI_MAX);
 3919         thread_unlock(td);
 3920 }

Cache object: 5fafa022eabf77d6d0a525b73a3b5797


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.