The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
    3  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice unmodified, this list of conditions, and the following
   11  *    disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/10.0/sys/kern/kern_umtx.c 251684 2013-06-13 09:33:22Z kib $");
   30 
   31 #include "opt_compat.h"
   32 #include "opt_umtx_profiling.h"
   33 
   34 #include <sys/param.h>
   35 #include <sys/kernel.h>
   36 #include <sys/limits.h>
   37 #include <sys/lock.h>
   38 #include <sys/malloc.h>
   39 #include <sys/mutex.h>
   40 #include <sys/priv.h>
   41 #include <sys/proc.h>
   42 #include <sys/sbuf.h>
   43 #include <sys/sched.h>
   44 #include <sys/smp.h>
   45 #include <sys/sysctl.h>
   46 #include <sys/sysent.h>
   47 #include <sys/systm.h>
   48 #include <sys/sysproto.h>
   49 #include <sys/syscallsubr.h>
   50 #include <sys/eventhandler.h>
   51 #include <sys/umtx.h>
   52 
   53 #include <vm/vm.h>
   54 #include <vm/vm_param.h>
   55 #include <vm/pmap.h>
   56 #include <vm/vm_map.h>
   57 #include <vm/vm_object.h>
   58 
   59 #include <machine/cpu.h>
   60 
   61 #ifdef COMPAT_FREEBSD32
   62 #include <compat/freebsd32/freebsd32_proto.h>
   63 #endif
   64 
   65 #define _UMUTEX_TRY             1
   66 #define _UMUTEX_WAIT            2
   67 
   68 #ifdef UMTX_PROFILING
   69 #define UPROF_PERC_BIGGER(w, f, sw, sf)                                 \
   70         (((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
   71 #endif
   72 
   73 /* Priority inheritance mutex info. */
   74 struct umtx_pi {
   75         /* Owner thread */
   76         struct thread           *pi_owner;
   77 
   78         /* Reference count */
   79         int                     pi_refcount;
   80 
   81         /* List entry to link umtx holding by thread */
   82         TAILQ_ENTRY(umtx_pi)    pi_link;
   83 
   84         /* List entry in hash */
   85         TAILQ_ENTRY(umtx_pi)    pi_hashlink;
   86 
   87         /* List for waiters */
   88         TAILQ_HEAD(,umtx_q)     pi_blocked;
   89 
   90         /* Identify a userland lock object */
   91         struct umtx_key         pi_key;
   92 };
   93 
   94 /* A userland synchronous object user. */
   95 struct umtx_q {
   96         /* Linked list for the hash. */
   97         TAILQ_ENTRY(umtx_q)     uq_link;
   98 
   99         /* Umtx key. */
  100         struct umtx_key         uq_key;
  101 
  102         /* Umtx flags. */
  103         int                     uq_flags;
  104 #define UQF_UMTXQ       0x0001
  105 
  106         /* The thread waits on. */
  107         struct thread           *uq_thread;
  108 
  109         /*
  110          * Blocked on PI mutex. read can use chain lock
  111          * or umtx_lock, write must have both chain lock and
  112          * umtx_lock being hold.
  113          */
  114         struct umtx_pi          *uq_pi_blocked;
  115 
  116         /* On blocked list */
  117         TAILQ_ENTRY(umtx_q)     uq_lockq;
  118 
  119         /* Thread contending with us */
  120         TAILQ_HEAD(,umtx_pi)    uq_pi_contested;
  121 
  122         /* Inherited priority from PP mutex */
  123         u_char                  uq_inherited_pri;
  124         
  125         /* Spare queue ready to be reused */
  126         struct umtxq_queue      *uq_spare_queue;
  127 
  128         /* The queue we on */
  129         struct umtxq_queue      *uq_cur_queue;
  130 };
  131 
  132 TAILQ_HEAD(umtxq_head, umtx_q);
  133 
  134 /* Per-key wait-queue */
  135 struct umtxq_queue {
  136         struct umtxq_head       head;
  137         struct umtx_key         key;
  138         LIST_ENTRY(umtxq_queue) link;
  139         int                     length;
  140 };
  141 
  142 LIST_HEAD(umtxq_list, umtxq_queue);
  143 
  144 /* Userland lock object's wait-queue chain */
  145 struct umtxq_chain {
  146         /* Lock for this chain. */
  147         struct mtx              uc_lock;
  148 
  149         /* List of sleep queues. */
  150         struct umtxq_list       uc_queue[2];
  151 #define UMTX_SHARED_QUEUE       0
  152 #define UMTX_EXCLUSIVE_QUEUE    1
  153 
  154         LIST_HEAD(, umtxq_queue) uc_spare_queue;
  155 
  156         /* Busy flag */
  157         char                    uc_busy;
  158 
  159         /* Chain lock waiters */
  160         int                     uc_waiters;
  161 
  162         /* All PI in the list */
  163         TAILQ_HEAD(,umtx_pi)    uc_pi_list;
  164 
  165 #ifdef UMTX_PROFILING
  166         u_int                   length;
  167         u_int                   max_length;
  168 #endif
  169 };
  170 
  171 #define UMTXQ_LOCKED_ASSERT(uc)         mtx_assert(&(uc)->uc_lock, MA_OWNED)
  172 #define UMTXQ_BUSY_ASSERT(uc)   KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
  173 
  174 /*
  175  * Don't propagate time-sharing priority, there is a security reason,
  176  * a user can simply introduce PI-mutex, let thread A lock the mutex,
  177  * and let another thread B block on the mutex, because B is
  178  * sleeping, its priority will be boosted, this causes A's priority to
  179  * be boosted via priority propagating too and will never be lowered even
  180  * if it is using 100%CPU, this is unfair to other processes.
  181  */
  182 
  183 #define UPRI(td)        (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
  184                           (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
  185                          PRI_MAX_TIMESHARE : (td)->td_user_pri)
  186 
  187 #define GOLDEN_RATIO_PRIME      2654404609U
  188 #define UMTX_CHAINS             512
  189 #define UMTX_SHIFTS             (__WORD_BIT - 9)
  190 
  191 #define GET_SHARE(flags)        \
  192     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
  193 
  194 #define BUSY_SPINS              200
  195 
  196 struct abs_timeout {
  197         int clockid;
  198         struct timespec cur;
  199         struct timespec end;
  200 };
  201 
  202 static uma_zone_t               umtx_pi_zone;
  203 static struct umtxq_chain       umtxq_chains[2][UMTX_CHAINS];
  204 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
  205 static int                      umtx_pi_allocated;
  206 
  207 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
  208 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
  209     &umtx_pi_allocated, 0, "Allocated umtx_pi");
  210 
  211 #ifdef UMTX_PROFILING
  212 static long max_length;
  213 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
  214 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
  215 #endif
  216 
  217 static void umtxq_sysinit(void *);
  218 static void umtxq_hash(struct umtx_key *key);
  219 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
  220 static void umtxq_lock(struct umtx_key *key);
  221 static void umtxq_unlock(struct umtx_key *key);
  222 static void umtxq_busy(struct umtx_key *key);
  223 static void umtxq_unbusy(struct umtx_key *key);
  224 static void umtxq_insert_queue(struct umtx_q *uq, int q);
  225 static void umtxq_remove_queue(struct umtx_q *uq, int q);
  226 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *);
  227 static int umtxq_count(struct umtx_key *key);
  228 static struct umtx_pi *umtx_pi_alloc(int);
  229 static void umtx_pi_free(struct umtx_pi *pi);
  230 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
  231 static void umtx_thread_cleanup(struct thread *td);
  232 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
  233         struct image_params *imgp __unused);
  234 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
  235 
  236 #define umtxq_signal(key, nwake)        umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
  237 #define umtxq_insert(uq)        umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
  238 #define umtxq_remove(uq)        umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
  239 
  240 static struct mtx umtx_lock;
  241 
  242 #ifdef UMTX_PROFILING
  243 static void
  244 umtx_init_profiling(void) 
  245 {
  246         struct sysctl_oid *chain_oid;
  247         char chain_name[10];
  248         int i;
  249 
  250         for (i = 0; i < UMTX_CHAINS; ++i) {
  251                 snprintf(chain_name, sizeof(chain_name), "%d", i);
  252                 chain_oid = SYSCTL_ADD_NODE(NULL, 
  253                     SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, 
  254                     chain_name, CTLFLAG_RD, NULL, "umtx hash stats");
  255                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  256                     "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
  257                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  258                     "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
  259         }
  260 }
  261 
  262 static int
  263 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
  264 {
  265         char buf[512];
  266         struct sbuf sb;
  267         struct umtxq_chain *uc;
  268         u_int fract, i, j, tot, whole;
  269         u_int sf0, sf1, sf2, sf3, sf4;
  270         u_int si0, si1, si2, si3, si4;
  271         u_int sw0, sw1, sw2, sw3, sw4;
  272 
  273         sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
  274         for (i = 0; i < 2; i++) {
  275                 tot = 0;
  276                 for (j = 0; j < UMTX_CHAINS; ++j) {
  277                         uc = &umtxq_chains[i][j];
  278                         mtx_lock(&uc->uc_lock);
  279                         tot += uc->max_length;
  280                         mtx_unlock(&uc->uc_lock);
  281                 }
  282                 if (tot == 0)
  283                         sbuf_printf(&sb, "%u) Empty ", i);
  284                 else {
  285                         sf0 = sf1 = sf2 = sf3 = sf4 = 0;
  286                         si0 = si1 = si2 = si3 = si4 = 0;
  287                         sw0 = sw1 = sw2 = sw3 = sw4 = 0;
  288                         for (j = 0; j < UMTX_CHAINS; j++) {
  289                                 uc = &umtxq_chains[i][j];
  290                                 mtx_lock(&uc->uc_lock);
  291                                 whole = uc->max_length * 100;
  292                                 mtx_unlock(&uc->uc_lock);
  293                                 fract = (whole % tot) * 100;
  294                                 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
  295                                         sf0 = fract;
  296                                         si0 = j;
  297                                         sw0 = whole;
  298                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw1,
  299                                     sf1)) {
  300                                         sf1 = fract;
  301                                         si1 = j;
  302                                         sw1 = whole;
  303                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw2,
  304                                     sf2)) {
  305                                         sf2 = fract;
  306                                         si2 = j;
  307                                         sw2 = whole;
  308                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw3,
  309                                     sf3)) {
  310                                         sf3 = fract;
  311                                         si3 = j;
  312                                         sw3 = whole;
  313                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw4,
  314                                     sf4)) {
  315                                         sf4 = fract;
  316                                         si4 = j;
  317                                         sw4 = whole;
  318                                 }
  319                         }
  320                         sbuf_printf(&sb, "queue %u:\n", i);
  321                         sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
  322                             sf0 / tot, si0);
  323                         sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
  324                             sf1 / tot, si1);
  325                         sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
  326                             sf2 / tot, si2);
  327                         sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
  328                             sf3 / tot, si3);
  329                         sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
  330                             sf4 / tot, si4);
  331                 }
  332         }
  333         sbuf_trim(&sb);
  334         sbuf_finish(&sb);
  335         sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
  336         sbuf_delete(&sb);
  337         return (0);
  338 }
  339 
  340 static int
  341 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
  342 {
  343         struct umtxq_chain *uc;
  344         u_int i, j;
  345         int clear, error;
  346 
  347         clear = 0;
  348         error = sysctl_handle_int(oidp, &clear, 0, req);
  349         if (error != 0 || req->newptr == NULL)
  350                 return (error);
  351 
  352         if (clear != 0) {
  353                 for (i = 0; i < 2; ++i) {
  354                         for (j = 0; j < UMTX_CHAINS; ++j) {
  355                                 uc = &umtxq_chains[i][j];
  356                                 mtx_lock(&uc->uc_lock);
  357                                 uc->length = 0;
  358                                 uc->max_length = 0;     
  359                                 mtx_unlock(&uc->uc_lock);
  360                         }
  361                 }
  362         }
  363         return (0);
  364 }
  365 
  366 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
  367     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
  368     sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics");
  369 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
  370     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
  371     sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length");
  372 #endif
  373 
  374 static void
  375 umtxq_sysinit(void *arg __unused)
  376 {
  377         int i, j;
  378 
  379         umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
  380                 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  381         for (i = 0; i < 2; ++i) {
  382                 for (j = 0; j < UMTX_CHAINS; ++j) {
  383                         mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
  384                                  MTX_DEF | MTX_DUPOK);
  385                         LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
  386                         LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
  387                         LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
  388                         TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
  389                         umtxq_chains[i][j].uc_busy = 0;
  390                         umtxq_chains[i][j].uc_waiters = 0;
  391 #ifdef UMTX_PROFILING
  392                         umtxq_chains[i][j].length = 0;
  393                         umtxq_chains[i][j].max_length = 0;      
  394 #endif
  395                 }
  396         }
  397 #ifdef UMTX_PROFILING
  398         umtx_init_profiling();
  399 #endif
  400         mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
  401         EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
  402             EVENTHANDLER_PRI_ANY);
  403 }
  404 
  405 struct umtx_q *
  406 umtxq_alloc(void)
  407 {
  408         struct umtx_q *uq;
  409 
  410         uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
  411         uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO);
  412         TAILQ_INIT(&uq->uq_spare_queue->head);
  413         TAILQ_INIT(&uq->uq_pi_contested);
  414         uq->uq_inherited_pri = PRI_MAX;
  415         return (uq);
  416 }
  417 
  418 void
  419 umtxq_free(struct umtx_q *uq)
  420 {
  421         MPASS(uq->uq_spare_queue != NULL);
  422         free(uq->uq_spare_queue, M_UMTX);
  423         free(uq, M_UMTX);
  424 }
  425 
  426 static inline void
  427 umtxq_hash(struct umtx_key *key)
  428 {
  429         unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
  430         key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
  431 }
  432 
  433 static inline struct umtxq_chain *
  434 umtxq_getchain(struct umtx_key *key)
  435 {
  436         if (key->type <= TYPE_SEM)
  437                 return (&umtxq_chains[1][key->hash]);
  438         return (&umtxq_chains[0][key->hash]);
  439 }
  440 
  441 /*
  442  * Lock a chain.
  443  */
  444 static inline void
  445 umtxq_lock(struct umtx_key *key)
  446 {
  447         struct umtxq_chain *uc;
  448 
  449         uc = umtxq_getchain(key);
  450         mtx_lock(&uc->uc_lock);
  451 }
  452 
  453 /*
  454  * Unlock a chain.
  455  */
  456 static inline void
  457 umtxq_unlock(struct umtx_key *key)
  458 {
  459         struct umtxq_chain *uc;
  460 
  461         uc = umtxq_getchain(key);
  462         mtx_unlock(&uc->uc_lock);
  463 }
  464 
  465 /*
  466  * Set chain to busy state when following operation
  467  * may be blocked (kernel mutex can not be used).
  468  */
  469 static inline void
  470 umtxq_busy(struct umtx_key *key)
  471 {
  472         struct umtxq_chain *uc;
  473 
  474         uc = umtxq_getchain(key);
  475         mtx_assert(&uc->uc_lock, MA_OWNED);
  476         if (uc->uc_busy) {
  477 #ifdef SMP
  478                 if (smp_cpus > 1) {
  479                         int count = BUSY_SPINS;
  480                         if (count > 0) {
  481                                 umtxq_unlock(key);
  482                                 while (uc->uc_busy && --count > 0)
  483                                         cpu_spinwait();
  484                                 umtxq_lock(key);
  485                         }
  486                 }
  487 #endif
  488                 while (uc->uc_busy) {
  489                         uc->uc_waiters++;
  490                         msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
  491                         uc->uc_waiters--;
  492                 }
  493         }
  494         uc->uc_busy = 1;
  495 }
  496 
  497 /*
  498  * Unbusy a chain.
  499  */
  500 static inline void
  501 umtxq_unbusy(struct umtx_key *key)
  502 {
  503         struct umtxq_chain *uc;
  504 
  505         uc = umtxq_getchain(key);
  506         mtx_assert(&uc->uc_lock, MA_OWNED);
  507         KASSERT(uc->uc_busy != 0, ("not busy"));
  508         uc->uc_busy = 0;
  509         if (uc->uc_waiters)
  510                 wakeup_one(uc);
  511 }
  512 
  513 static struct umtxq_queue *
  514 umtxq_queue_lookup(struct umtx_key *key, int q)
  515 {
  516         struct umtxq_queue *uh;
  517         struct umtxq_chain *uc;
  518 
  519         uc = umtxq_getchain(key);
  520         UMTXQ_LOCKED_ASSERT(uc);
  521         LIST_FOREACH(uh, &uc->uc_queue[q], link) {
  522                 if (umtx_key_match(&uh->key, key))
  523                         return (uh);
  524         }
  525 
  526         return (NULL);
  527 }
  528 
  529 static inline void
  530 umtxq_insert_queue(struct umtx_q *uq, int q)
  531 {
  532         struct umtxq_queue *uh;
  533         struct umtxq_chain *uc;
  534 
  535         uc = umtxq_getchain(&uq->uq_key);
  536         UMTXQ_LOCKED_ASSERT(uc);
  537         KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
  538         uh = umtxq_queue_lookup(&uq->uq_key, q);
  539         if (uh != NULL) {
  540                 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
  541         } else {
  542                 uh = uq->uq_spare_queue;
  543                 uh->key = uq->uq_key;
  544                 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
  545 #ifdef UMTX_PROFILING
  546                 uc->length++;
  547                 if (uc->length > uc->max_length) {
  548                         uc->max_length = uc->length;
  549                         if (uc->max_length > max_length)
  550                                 max_length = uc->max_length;    
  551                 }
  552 #endif
  553         }
  554         uq->uq_spare_queue = NULL;
  555 
  556         TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
  557         uh->length++;
  558         uq->uq_flags |= UQF_UMTXQ;
  559         uq->uq_cur_queue = uh;
  560         return;
  561 }
  562 
  563 static inline void
  564 umtxq_remove_queue(struct umtx_q *uq, int q)
  565 {
  566         struct umtxq_chain *uc;
  567         struct umtxq_queue *uh;
  568 
  569         uc = umtxq_getchain(&uq->uq_key);
  570         UMTXQ_LOCKED_ASSERT(uc);
  571         if (uq->uq_flags & UQF_UMTXQ) {
  572                 uh = uq->uq_cur_queue;
  573                 TAILQ_REMOVE(&uh->head, uq, uq_link);
  574                 uh->length--;
  575                 uq->uq_flags &= ~UQF_UMTXQ;
  576                 if (TAILQ_EMPTY(&uh->head)) {
  577                         KASSERT(uh->length == 0,
  578                             ("inconsistent umtxq_queue length"));
  579 #ifdef UMTX_PROFILING
  580                         uc->length--;
  581 #endif
  582                         LIST_REMOVE(uh, link);
  583                 } else {
  584                         uh = LIST_FIRST(&uc->uc_spare_queue);
  585                         KASSERT(uh != NULL, ("uc_spare_queue is empty"));
  586                         LIST_REMOVE(uh, link);
  587                 }
  588                 uq->uq_spare_queue = uh;
  589                 uq->uq_cur_queue = NULL;
  590         }
  591 }
  592 
  593 /*
  594  * Check if there are multiple waiters
  595  */
  596 static int
  597 umtxq_count(struct umtx_key *key)
  598 {
  599         struct umtxq_chain *uc;
  600         struct umtxq_queue *uh;
  601 
  602         uc = umtxq_getchain(key);
  603         UMTXQ_LOCKED_ASSERT(uc);
  604         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  605         if (uh != NULL)
  606                 return (uh->length);
  607         return (0);
  608 }
  609 
  610 /*
  611  * Check if there are multiple PI waiters and returns first
  612  * waiter.
  613  */
  614 static int
  615 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
  616 {
  617         struct umtxq_chain *uc;
  618         struct umtxq_queue *uh;
  619 
  620         *first = NULL;
  621         uc = umtxq_getchain(key);
  622         UMTXQ_LOCKED_ASSERT(uc);
  623         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  624         if (uh != NULL) {
  625                 *first = TAILQ_FIRST(&uh->head);
  626                 return (uh->length);
  627         }
  628         return (0);
  629 }
  630 
  631 static int
  632 umtxq_check_susp(struct thread *td)
  633 {
  634         struct proc *p;
  635         int error;
  636 
  637         /*
  638          * The check for TDF_NEEDSUSPCHK is racy, but it is enough to
  639          * eventually break the lockstep loop.
  640          */
  641         if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
  642                 return (0);
  643         error = 0;
  644         p = td->td_proc;
  645         PROC_LOCK(p);
  646         if (P_SHOULDSTOP(p) ||
  647             ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
  648                 if (p->p_flag & P_SINGLE_EXIT)
  649                         error = EINTR;
  650                 else
  651                         error = ERESTART;
  652         }
  653         PROC_UNLOCK(p);
  654         return (error);
  655 }
  656 
  657 /*
  658  * Wake up threads waiting on an userland object.
  659  */
  660 
  661 static int
  662 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
  663 {
  664         struct umtxq_chain *uc;
  665         struct umtxq_queue *uh;
  666         struct umtx_q *uq;
  667         int ret;
  668 
  669         ret = 0;
  670         uc = umtxq_getchain(key);
  671         UMTXQ_LOCKED_ASSERT(uc);
  672         uh = umtxq_queue_lookup(key, q);
  673         if (uh != NULL) {
  674                 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
  675                         umtxq_remove_queue(uq, q);
  676                         wakeup(uq);
  677                         if (++ret >= n_wake)
  678                                 return (ret);
  679                 }
  680         }
  681         return (ret);
  682 }
  683 
  684 
  685 /*
  686  * Wake up specified thread.
  687  */
  688 static inline void
  689 umtxq_signal_thread(struct umtx_q *uq)
  690 {
  691         struct umtxq_chain *uc;
  692 
  693         uc = umtxq_getchain(&uq->uq_key);
  694         UMTXQ_LOCKED_ASSERT(uc);
  695         umtxq_remove(uq);
  696         wakeup(uq);
  697 }
  698 
  699 static inline int 
  700 tstohz(const struct timespec *tsp)
  701 {
  702         struct timeval tv;
  703 
  704         TIMESPEC_TO_TIMEVAL(&tv, tsp);
  705         return tvtohz(&tv);
  706 }
  707 
  708 static void
  709 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute,
  710         const struct timespec *timeout)
  711 {
  712 
  713         timo->clockid = clockid;
  714         if (!absolute) {
  715                 kern_clock_gettime(curthread, clockid, &timo->end);
  716                 timo->cur = timo->end;
  717                 timespecadd(&timo->end, timeout);
  718         } else {
  719                 timo->end = *timeout;
  720                 kern_clock_gettime(curthread, clockid, &timo->cur);
  721         }
  722 }
  723 
  724 static void
  725 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime)
  726 {
  727 
  728         abs_timeout_init(timo, umtxtime->_clockid,
  729                 (umtxtime->_flags & UMTX_ABSTIME) != 0,
  730                 &umtxtime->_timeout);
  731 }
  732 
  733 static inline void
  734 abs_timeout_update(struct abs_timeout *timo)
  735 {
  736         kern_clock_gettime(curthread, timo->clockid, &timo->cur);
  737 }
  738 
  739 static int
  740 abs_timeout_gethz(struct abs_timeout *timo)
  741 {
  742         struct timespec tts;
  743 
  744         if (timespeccmp(&timo->end, &timo->cur, <=))
  745                 return (-1); 
  746         tts = timo->end;
  747         timespecsub(&tts, &timo->cur);
  748         return (tstohz(&tts));
  749 }
  750 
  751 /*
  752  * Put thread into sleep state, before sleeping, check if
  753  * thread was removed from umtx queue.
  754  */
  755 static inline int
  756 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime)
  757 {
  758         struct umtxq_chain *uc;
  759         int error, timo;
  760 
  761         uc = umtxq_getchain(&uq->uq_key);
  762         UMTXQ_LOCKED_ASSERT(uc);
  763         for (;;) {
  764                 if (!(uq->uq_flags & UQF_UMTXQ))
  765                         return (0);
  766                 if (abstime != NULL) {
  767                         timo = abs_timeout_gethz(abstime);
  768                         if (timo < 0)
  769                                 return (ETIMEDOUT);
  770                 } else
  771                         timo = 0;
  772                 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo);
  773                 if (error != EWOULDBLOCK) {
  774                         umtxq_lock(&uq->uq_key);
  775                         break;
  776                 }
  777                 if (abstime != NULL)
  778                         abs_timeout_update(abstime);
  779                 umtxq_lock(&uq->uq_key);
  780         }
  781         return (error);
  782 }
  783 
  784 /*
  785  * Convert userspace address into unique logical address.
  786  */
  787 int
  788 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
  789 {
  790         struct thread *td = curthread;
  791         vm_map_t map;
  792         vm_map_entry_t entry;
  793         vm_pindex_t pindex;
  794         vm_prot_t prot;
  795         boolean_t wired;
  796 
  797         key->type = type;
  798         if (share == THREAD_SHARE) {
  799                 key->shared = 0;
  800                 key->info.private.vs = td->td_proc->p_vmspace;
  801                 key->info.private.addr = (uintptr_t)addr;
  802         } else {
  803                 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
  804                 map = &td->td_proc->p_vmspace->vm_map;
  805                 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
  806                     &entry, &key->info.shared.object, &pindex, &prot,
  807                     &wired) != KERN_SUCCESS) {
  808                         return EFAULT;
  809                 }
  810 
  811                 if ((share == PROCESS_SHARE) ||
  812                     (share == AUTO_SHARE &&
  813                      VM_INHERIT_SHARE == entry->inheritance)) {
  814                         key->shared = 1;
  815                         key->info.shared.offset = entry->offset + entry->start -
  816                                 (vm_offset_t)addr;
  817                         vm_object_reference(key->info.shared.object);
  818                 } else {
  819                         key->shared = 0;
  820                         key->info.private.vs = td->td_proc->p_vmspace;
  821                         key->info.private.addr = (uintptr_t)addr;
  822                 }
  823                 vm_map_lookup_done(map, entry);
  824         }
  825 
  826         umtxq_hash(key);
  827         return (0);
  828 }
  829 
  830 /*
  831  * Release key.
  832  */
  833 void
  834 umtx_key_release(struct umtx_key *key)
  835 {
  836         if (key->shared)
  837                 vm_object_deallocate(key->info.shared.object);
  838 }
  839 
  840 /*
  841  * Lock a umtx object.
  842  */
  843 static int
  844 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
  845         const struct timespec *timeout)
  846 {
  847         struct abs_timeout timo;
  848         struct umtx_q *uq;
  849         u_long owner;
  850         u_long old;
  851         int error = 0;
  852 
  853         uq = td->td_umtxq;
  854         if (timeout != NULL)
  855                 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
  856 
  857         /*
  858          * Care must be exercised when dealing with umtx structure. It
  859          * can fault on any access.
  860          */
  861         for (;;) {
  862                 /*
  863                  * Try the uncontested case.  This should be done in userland.
  864                  */
  865                 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
  866 
  867                 /* The acquire succeeded. */
  868                 if (owner == UMTX_UNOWNED)
  869                         return (0);
  870 
  871                 /* The address was invalid. */
  872                 if (owner == -1)
  873                         return (EFAULT);
  874 
  875                 /* If no one owns it but it is contested try to acquire it. */
  876                 if (owner == UMTX_CONTESTED) {
  877                         owner = casuword(&umtx->u_owner,
  878                             UMTX_CONTESTED, id | UMTX_CONTESTED);
  879 
  880                         if (owner == UMTX_CONTESTED)
  881                                 return (0);
  882 
  883                         /* The address was invalid. */
  884                         if (owner == -1)
  885                                 return (EFAULT);
  886 
  887                         error = umtxq_check_susp(td);
  888                         if (error != 0)
  889                                 break;
  890 
  891                         /* If this failed the lock has changed, restart. */
  892                         continue;
  893                 }
  894 
  895                 /*
  896                  * If we caught a signal, we have retried and now
  897                  * exit immediately.
  898                  */
  899                 if (error != 0)
  900                         break;
  901 
  902                 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
  903                         AUTO_SHARE, &uq->uq_key)) != 0)
  904                         return (error);
  905 
  906                 umtxq_lock(&uq->uq_key);
  907                 umtxq_busy(&uq->uq_key);
  908                 umtxq_insert(uq);
  909                 umtxq_unbusy(&uq->uq_key);
  910                 umtxq_unlock(&uq->uq_key);
  911 
  912                 /*
  913                  * Set the contested bit so that a release in user space
  914                  * knows to use the system call for unlock.  If this fails
  915                  * either some one else has acquired the lock or it has been
  916                  * released.
  917                  */
  918                 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
  919 
  920                 /* The address was invalid. */
  921                 if (old == -1) {
  922                         umtxq_lock(&uq->uq_key);
  923                         umtxq_remove(uq);
  924                         umtxq_unlock(&uq->uq_key);
  925                         umtx_key_release(&uq->uq_key);
  926                         return (EFAULT);
  927                 }
  928 
  929                 /*
  930                  * We set the contested bit, sleep. Otherwise the lock changed
  931                  * and we need to retry or we lost a race to the thread
  932                  * unlocking the umtx.
  933                  */
  934                 umtxq_lock(&uq->uq_key);
  935                 if (old == owner)
  936                         error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL :
  937                             &timo);
  938                 umtxq_remove(uq);
  939                 umtxq_unlock(&uq->uq_key);
  940                 umtx_key_release(&uq->uq_key);
  941 
  942                 if (error == 0)
  943                         error = umtxq_check_susp(td);
  944         }
  945 
  946         if (timeout == NULL) {
  947                 /* Mutex locking is restarted if it is interrupted. */
  948                 if (error == EINTR)
  949                         error = ERESTART;
  950         } else {
  951                 /* Timed-locking is not restarted. */
  952                 if (error == ERESTART)
  953                         error = EINTR;
  954         }
  955         return (error);
  956 }
  957 
  958 /*
  959  * Unlock a umtx object.
  960  */
  961 static int
  962 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
  963 {
  964         struct umtx_key key;
  965         u_long owner;
  966         u_long old;
  967         int error;
  968         int count;
  969 
  970         /*
  971          * Make sure we own this mtx.
  972          */
  973         owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
  974         if (owner == -1)
  975                 return (EFAULT);
  976 
  977         if ((owner & ~UMTX_CONTESTED) != id)
  978                 return (EPERM);
  979 
  980         /* This should be done in userland */
  981         if ((owner & UMTX_CONTESTED) == 0) {
  982                 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
  983                 if (old == -1)
  984                         return (EFAULT);
  985                 if (old == owner)
  986                         return (0);
  987                 owner = old;
  988         }
  989 
  990         /* We should only ever be in here for contested locks */
  991         if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
  992                 &key)) != 0)
  993                 return (error);
  994 
  995         umtxq_lock(&key);
  996         umtxq_busy(&key);
  997         count = umtxq_count(&key);
  998         umtxq_unlock(&key);
  999 
 1000         /*
 1001          * When unlocking the umtx, it must be marked as unowned if
 1002          * there is zero or one thread only waiting for it.
 1003          * Otherwise, it must be marked as contested.
 1004          */
 1005         old = casuword(&umtx->u_owner, owner,
 1006                 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
 1007         umtxq_lock(&key);
 1008         umtxq_signal(&key,1);
 1009         umtxq_unbusy(&key);
 1010         umtxq_unlock(&key);
 1011         umtx_key_release(&key);
 1012         if (old == -1)
 1013                 return (EFAULT);
 1014         if (old != owner)
 1015                 return (EINVAL);
 1016         return (0);
 1017 }
 1018 
 1019 #ifdef COMPAT_FREEBSD32
 1020 
 1021 /*
 1022  * Lock a umtx object.
 1023  */
 1024 static int
 1025 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id,
 1026         const struct timespec *timeout)
 1027 {
 1028         struct abs_timeout timo;
 1029         struct umtx_q *uq;
 1030         uint32_t owner;
 1031         uint32_t old;
 1032         int error = 0;
 1033 
 1034         uq = td->td_umtxq;
 1035 
 1036         if (timeout != NULL)
 1037                 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
 1038 
 1039         /*
 1040          * Care must be exercised when dealing with umtx structure. It
 1041          * can fault on any access.
 1042          */
 1043         for (;;) {
 1044                 /*
 1045                  * Try the uncontested case.  This should be done in userland.
 1046                  */
 1047                 owner = casuword32(m, UMUTEX_UNOWNED, id);
 1048 
 1049                 /* The acquire succeeded. */
 1050                 if (owner == UMUTEX_UNOWNED)
 1051                         return (0);
 1052 
 1053                 /* The address was invalid. */
 1054                 if (owner == -1)
 1055                         return (EFAULT);
 1056 
 1057                 /* If no one owns it but it is contested try to acquire it. */
 1058                 if (owner == UMUTEX_CONTESTED) {
 1059                         owner = casuword32(m,
 1060                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1061                         if (owner == UMUTEX_CONTESTED)
 1062                                 return (0);
 1063 
 1064                         /* The address was invalid. */
 1065                         if (owner == -1)
 1066                                 return (EFAULT);
 1067 
 1068                         error = umtxq_check_susp(td);
 1069                         if (error != 0)
 1070                                 break;
 1071 
 1072                         /* If this failed the lock has changed, restart. */
 1073                         continue;
 1074                 }
 1075 
 1076                 /*
 1077                  * If we caught a signal, we have retried and now
 1078                  * exit immediately.
 1079                  */
 1080                 if (error != 0)
 1081                         return (error);
 1082 
 1083                 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
 1084                         AUTO_SHARE, &uq->uq_key)) != 0)
 1085                         return (error);
 1086 
 1087                 umtxq_lock(&uq->uq_key);
 1088                 umtxq_busy(&uq->uq_key);
 1089                 umtxq_insert(uq);
 1090                 umtxq_unbusy(&uq->uq_key);
 1091                 umtxq_unlock(&uq->uq_key);
 1092 
 1093                 /*
 1094                  * Set the contested bit so that a release in user space
 1095                  * knows to use the system call for unlock.  If this fails
 1096                  * either some one else has acquired the lock or it has been
 1097                  * released.
 1098                  */
 1099                 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
 1100 
 1101                 /* The address was invalid. */
 1102                 if (old == -1) {
 1103                         umtxq_lock(&uq->uq_key);
 1104                         umtxq_remove(uq);
 1105                         umtxq_unlock(&uq->uq_key);
 1106                         umtx_key_release(&uq->uq_key);
 1107                         return (EFAULT);
 1108                 }
 1109 
 1110                 /*
 1111                  * We set the contested bit, sleep. Otherwise the lock changed
 1112                  * and we need to retry or we lost a race to the thread
 1113                  * unlocking the umtx.
 1114                  */
 1115                 umtxq_lock(&uq->uq_key);
 1116                 if (old == owner)
 1117                         error = umtxq_sleep(uq, "umtx", timeout == NULL ?
 1118                             NULL : &timo);
 1119                 umtxq_remove(uq);
 1120                 umtxq_unlock(&uq->uq_key);
 1121                 umtx_key_release(&uq->uq_key);
 1122 
 1123                 if (error == 0)
 1124                         error = umtxq_check_susp(td);
 1125         }
 1126 
 1127         if (timeout == NULL) {
 1128                 /* Mutex locking is restarted if it is interrupted. */
 1129                 if (error == EINTR)
 1130                         error = ERESTART;
 1131         } else {
 1132                 /* Timed-locking is not restarted. */
 1133                 if (error == ERESTART)
 1134                         error = EINTR;
 1135         }
 1136         return (error);
 1137 }
 1138 
 1139 /*
 1140  * Unlock a umtx object.
 1141  */
 1142 static int
 1143 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
 1144 {
 1145         struct umtx_key key;
 1146         uint32_t owner;
 1147         uint32_t old;
 1148         int error;
 1149         int count;
 1150 
 1151         /*
 1152          * Make sure we own this mtx.
 1153          */
 1154         owner = fuword32(m);
 1155         if (owner == -1)
 1156                 return (EFAULT);
 1157 
 1158         if ((owner & ~UMUTEX_CONTESTED) != id)
 1159                 return (EPERM);
 1160 
 1161         /* This should be done in userland */
 1162         if ((owner & UMUTEX_CONTESTED) == 0) {
 1163                 old = casuword32(m, owner, UMUTEX_UNOWNED);
 1164                 if (old == -1)
 1165                         return (EFAULT);
 1166                 if (old == owner)
 1167                         return (0);
 1168                 owner = old;
 1169         }
 1170 
 1171         /* We should only ever be in here for contested locks */
 1172         if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
 1173                 &key)) != 0)
 1174                 return (error);
 1175 
 1176         umtxq_lock(&key);
 1177         umtxq_busy(&key);
 1178         count = umtxq_count(&key);
 1179         umtxq_unlock(&key);
 1180 
 1181         /*
 1182          * When unlocking the umtx, it must be marked as unowned if
 1183          * there is zero or one thread only waiting for it.
 1184          * Otherwise, it must be marked as contested.
 1185          */
 1186         old = casuword32(m, owner,
 1187                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1188         umtxq_lock(&key);
 1189         umtxq_signal(&key,1);
 1190         umtxq_unbusy(&key);
 1191         umtxq_unlock(&key);
 1192         umtx_key_release(&key);
 1193         if (old == -1)
 1194                 return (EFAULT);
 1195         if (old != owner)
 1196                 return (EINVAL);
 1197         return (0);
 1198 }
 1199 #endif
 1200 
 1201 /*
 1202  * Fetch and compare value, sleep on the address if value is not changed.
 1203  */
 1204 static int
 1205 do_wait(struct thread *td, void *addr, u_long id,
 1206         struct _umtx_time *timeout, int compat32, int is_private)
 1207 {
 1208         struct abs_timeout timo;
 1209         struct umtx_q *uq;
 1210         u_long tmp;
 1211         int error = 0;
 1212 
 1213         uq = td->td_umtxq;
 1214         if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
 1215                 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
 1216                 return (error);
 1217 
 1218         if (timeout != NULL)
 1219                 abs_timeout_init2(&timo, timeout);
 1220 
 1221         umtxq_lock(&uq->uq_key);
 1222         umtxq_insert(uq);
 1223         umtxq_unlock(&uq->uq_key);
 1224         if (compat32 == 0)
 1225                 tmp = fuword(addr);
 1226         else
 1227                 tmp = (unsigned int)fuword32(addr);
 1228         umtxq_lock(&uq->uq_key);
 1229         if (tmp == id)
 1230                 error = umtxq_sleep(uq, "uwait", timeout == NULL ?
 1231                     NULL : &timo);
 1232         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 1233                 error = 0;
 1234         else
 1235                 umtxq_remove(uq);
 1236         umtxq_unlock(&uq->uq_key);
 1237         umtx_key_release(&uq->uq_key);
 1238         if (error == ERESTART)
 1239                 error = EINTR;
 1240         return (error);
 1241 }
 1242 
 1243 /*
 1244  * Wake up threads sleeping on the specified address.
 1245  */
 1246 int
 1247 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
 1248 {
 1249         struct umtx_key key;
 1250         int ret;
 1251         
 1252         if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
 1253                 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
 1254                 return (ret);
 1255         umtxq_lock(&key);
 1256         ret = umtxq_signal(&key, n_wake);
 1257         umtxq_unlock(&key);
 1258         umtx_key_release(&key);
 1259         return (0);
 1260 }
 1261 
 1262 /*
 1263  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1264  */
 1265 static int
 1266 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
 1267         struct _umtx_time *timeout, int mode)
 1268 {
 1269         struct abs_timeout timo;
 1270         struct umtx_q *uq;
 1271         uint32_t owner, old, id;
 1272         int error = 0;
 1273 
 1274         id = td->td_tid;
 1275         uq = td->td_umtxq;
 1276 
 1277         if (timeout != NULL)
 1278                 abs_timeout_init2(&timo, timeout);
 1279 
 1280         /*
 1281          * Care must be exercised when dealing with umtx structure. It
 1282          * can fault on any access.
 1283          */
 1284         for (;;) {
 1285                 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
 1286                 if (mode == _UMUTEX_WAIT) {
 1287                         if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
 1288                                 return (0);
 1289                 } else {
 1290                         /*
 1291                          * Try the uncontested case.  This should be done in userland.
 1292                          */
 1293                         owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 1294 
 1295                         /* The acquire succeeded. */
 1296                         if (owner == UMUTEX_UNOWNED)
 1297                                 return (0);
 1298 
 1299                         /* The address was invalid. */
 1300                         if (owner == -1)
 1301                                 return (EFAULT);
 1302 
 1303                         /* If no one owns it but it is contested try to acquire it. */
 1304                         if (owner == UMUTEX_CONTESTED) {
 1305                                 owner = casuword32(&m->m_owner,
 1306                                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1307 
 1308                                 if (owner == UMUTEX_CONTESTED)
 1309                                         return (0);
 1310 
 1311                                 /* The address was invalid. */
 1312                                 if (owner == -1)
 1313                                         return (EFAULT);
 1314 
 1315                                 error = umtxq_check_susp(td);
 1316                                 if (error != 0)
 1317                                         return (error);
 1318 
 1319                                 /* If this failed the lock has changed, restart. */
 1320                                 continue;
 1321                         }
 1322                 }
 1323 
 1324                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 1325                     (owner & ~UMUTEX_CONTESTED) == id)
 1326                         return (EDEADLK);
 1327 
 1328                 if (mode == _UMUTEX_TRY)
 1329                         return (EBUSY);
 1330 
 1331                 /*
 1332                  * If we caught a signal, we have retried and now
 1333                  * exit immediately.
 1334                  */
 1335                 if (error != 0)
 1336                         return (error);
 1337 
 1338                 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
 1339                     GET_SHARE(flags), &uq->uq_key)) != 0)
 1340                         return (error);
 1341 
 1342                 umtxq_lock(&uq->uq_key);
 1343                 umtxq_busy(&uq->uq_key);
 1344                 umtxq_insert(uq);
 1345                 umtxq_unlock(&uq->uq_key);
 1346 
 1347                 /*
 1348                  * Set the contested bit so that a release in user space
 1349                  * knows to use the system call for unlock.  If this fails
 1350                  * either some one else has acquired the lock or it has been
 1351                  * released.
 1352                  */
 1353                 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 1354 
 1355                 /* The address was invalid. */
 1356                 if (old == -1) {
 1357                         umtxq_lock(&uq->uq_key);
 1358                         umtxq_remove(uq);
 1359                         umtxq_unbusy(&uq->uq_key);
 1360                         umtxq_unlock(&uq->uq_key);
 1361                         umtx_key_release(&uq->uq_key);
 1362                         return (EFAULT);
 1363                 }
 1364 
 1365                 /*
 1366                  * We set the contested bit, sleep. Otherwise the lock changed
 1367                  * and we need to retry or we lost a race to the thread
 1368                  * unlocking the umtx.
 1369                  */
 1370                 umtxq_lock(&uq->uq_key);
 1371                 umtxq_unbusy(&uq->uq_key);
 1372                 if (old == owner)
 1373                         error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
 1374                             NULL : &timo);
 1375                 umtxq_remove(uq);
 1376                 umtxq_unlock(&uq->uq_key);
 1377                 umtx_key_release(&uq->uq_key);
 1378 
 1379                 if (error == 0)
 1380                         error = umtxq_check_susp(td);
 1381         }
 1382 
 1383         return (0);
 1384 }
 1385 
 1386 /*
 1387  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1388  */
 1389 static int
 1390 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
 1391 {
 1392         struct umtx_key key;
 1393         uint32_t owner, old, id;
 1394         int error;
 1395         int count;
 1396 
 1397         id = td->td_tid;
 1398         /*
 1399          * Make sure we own this mtx.
 1400          */
 1401         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1402         if (owner == -1)
 1403                 return (EFAULT);
 1404 
 1405         if ((owner & ~UMUTEX_CONTESTED) != id)
 1406                 return (EPERM);
 1407 
 1408         if ((owner & UMUTEX_CONTESTED) == 0) {
 1409                 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 1410                 if (old == -1)
 1411                         return (EFAULT);
 1412                 if (old == owner)
 1413                         return (0);
 1414                 owner = old;
 1415         }
 1416 
 1417         /* We should only ever be in here for contested locks */
 1418         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1419             &key)) != 0)
 1420                 return (error);
 1421 
 1422         umtxq_lock(&key);
 1423         umtxq_busy(&key);
 1424         count = umtxq_count(&key);
 1425         umtxq_unlock(&key);
 1426 
 1427         /*
 1428          * When unlocking the umtx, it must be marked as unowned if
 1429          * there is zero or one thread only waiting for it.
 1430          * Otherwise, it must be marked as contested.
 1431          */
 1432         old = casuword32(&m->m_owner, owner,
 1433                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1434         umtxq_lock(&key);
 1435         umtxq_signal(&key,1);
 1436         umtxq_unbusy(&key);
 1437         umtxq_unlock(&key);
 1438         umtx_key_release(&key);
 1439         if (old == -1)
 1440                 return (EFAULT);
 1441         if (old != owner)
 1442                 return (EINVAL);
 1443         return (0);
 1444 }
 1445 
 1446 /*
 1447  * Check if the mutex is available and wake up a waiter,
 1448  * only for simple mutex.
 1449  */
 1450 static int
 1451 do_wake_umutex(struct thread *td, struct umutex *m)
 1452 {
 1453         struct umtx_key key;
 1454         uint32_t owner;
 1455         uint32_t flags;
 1456         int error;
 1457         int count;
 1458 
 1459         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1460         if (owner == -1)
 1461                 return (EFAULT);
 1462 
 1463         if ((owner & ~UMUTEX_CONTESTED) != 0)
 1464                 return (0);
 1465 
 1466         flags = fuword32(&m->m_flags);
 1467 
 1468         /* We should only ever be in here for contested locks */
 1469         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1470             &key)) != 0)
 1471                 return (error);
 1472 
 1473         umtxq_lock(&key);
 1474         umtxq_busy(&key);
 1475         count = umtxq_count(&key);
 1476         umtxq_unlock(&key);
 1477 
 1478         if (count <= 1)
 1479                 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
 1480 
 1481         umtxq_lock(&key);
 1482         if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
 1483                 umtxq_signal(&key, 1);
 1484         umtxq_unbusy(&key);
 1485         umtxq_unlock(&key);
 1486         umtx_key_release(&key);
 1487         return (0);
 1488 }
 1489 
 1490 /*
 1491  * Check if the mutex has waiters and tries to fix contention bit.
 1492  */
 1493 static int
 1494 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
 1495 {
 1496         struct umtx_key key;
 1497         uint32_t owner, old;
 1498         int type;
 1499         int error;
 1500         int count;
 1501 
 1502         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 1503         case 0:
 1504                 type = TYPE_NORMAL_UMUTEX;
 1505                 break;
 1506         case UMUTEX_PRIO_INHERIT:
 1507                 type = TYPE_PI_UMUTEX;
 1508                 break;
 1509         case UMUTEX_PRIO_PROTECT:
 1510                 type = TYPE_PP_UMUTEX;
 1511                 break;
 1512         default:
 1513                 return (EINVAL);
 1514         }
 1515         if ((error = umtx_key_get(m, type, GET_SHARE(flags),
 1516             &key)) != 0)
 1517                 return (error);
 1518 
 1519         owner = 0;
 1520         umtxq_lock(&key);
 1521         umtxq_busy(&key);
 1522         count = umtxq_count(&key);
 1523         umtxq_unlock(&key);
 1524         /*
 1525          * Only repair contention bit if there is a waiter, this means the mutex
 1526          * is still being referenced by userland code, otherwise don't update
 1527          * any memory.
 1528          */
 1529         if (count > 1) {
 1530                 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1531                 while ((owner & UMUTEX_CONTESTED) ==0) {
 1532                         old = casuword32(&m->m_owner, owner,
 1533                             owner|UMUTEX_CONTESTED);
 1534                         if (old == owner)
 1535                                 break;
 1536                         owner = old;
 1537                         if (old == -1)
 1538                                 break;
 1539                         error = umtxq_check_susp(td);
 1540                         if (error != 0)
 1541                                 break;
 1542                 }
 1543         } else if (count == 1) {
 1544                 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 1545                 while ((owner & ~UMUTEX_CONTESTED) != 0 &&
 1546                        (owner & UMUTEX_CONTESTED) == 0) {
 1547                         old = casuword32(&m->m_owner, owner,
 1548                             owner|UMUTEX_CONTESTED);
 1549                         if (old == owner)
 1550                                 break;
 1551                         owner = old;
 1552                         if (old == -1)
 1553                                 break;
 1554                         error = umtxq_check_susp(td);
 1555                         if (error != 0)
 1556                                 break;
 1557                 }
 1558         }
 1559         umtxq_lock(&key);
 1560         if (owner == -1) {
 1561                 error = EFAULT;
 1562                 umtxq_signal(&key, INT_MAX);
 1563         }
 1564         else if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
 1565                 umtxq_signal(&key, 1);
 1566         umtxq_unbusy(&key);
 1567         umtxq_unlock(&key);
 1568         umtx_key_release(&key);
 1569         return (error);
 1570 }
 1571 
 1572 static inline struct umtx_pi *
 1573 umtx_pi_alloc(int flags)
 1574 {
 1575         struct umtx_pi *pi;
 1576 
 1577         pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
 1578         TAILQ_INIT(&pi->pi_blocked);
 1579         atomic_add_int(&umtx_pi_allocated, 1);
 1580         return (pi);
 1581 }
 1582 
 1583 static inline void
 1584 umtx_pi_free(struct umtx_pi *pi)
 1585 {
 1586         uma_zfree(umtx_pi_zone, pi);
 1587         atomic_add_int(&umtx_pi_allocated, -1);
 1588 }
 1589 
 1590 /*
 1591  * Adjust the thread's position on a pi_state after its priority has been
 1592  * changed.
 1593  */
 1594 static int
 1595 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
 1596 {
 1597         struct umtx_q *uq, *uq1, *uq2;
 1598         struct thread *td1;
 1599 
 1600         mtx_assert(&umtx_lock, MA_OWNED);
 1601         if (pi == NULL)
 1602                 return (0);
 1603 
 1604         uq = td->td_umtxq;
 1605 
 1606         /*
 1607          * Check if the thread needs to be moved on the blocked chain.
 1608          * It needs to be moved if either its priority is lower than
 1609          * the previous thread or higher than the next thread.
 1610          */
 1611         uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
 1612         uq2 = TAILQ_NEXT(uq, uq_lockq);
 1613         if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
 1614             (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
 1615                 /*
 1616                  * Remove thread from blocked chain and determine where
 1617                  * it should be moved to.
 1618                  */
 1619                 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1620                 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1621                         td1 = uq1->uq_thread;
 1622                         MPASS(td1->td_proc->p_magic == P_MAGIC);
 1623                         if (UPRI(td1) > UPRI(td))
 1624                                 break;
 1625                 }
 1626 
 1627                 if (uq1 == NULL)
 1628                         TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1629                 else
 1630                         TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1631         }
 1632         return (1);
 1633 }
 1634 
 1635 /*
 1636  * Propagate priority when a thread is blocked on POSIX
 1637  * PI mutex.
 1638  */ 
 1639 static void
 1640 umtx_propagate_priority(struct thread *td)
 1641 {
 1642         struct umtx_q *uq;
 1643         struct umtx_pi *pi;
 1644         int pri;
 1645 
 1646         mtx_assert(&umtx_lock, MA_OWNED);
 1647         pri = UPRI(td);
 1648         uq = td->td_umtxq;
 1649         pi = uq->uq_pi_blocked;
 1650         if (pi == NULL)
 1651                 return;
 1652 
 1653         for (;;) {
 1654                 td = pi->pi_owner;
 1655                 if (td == NULL || td == curthread)
 1656                         return;
 1657 
 1658                 MPASS(td->td_proc != NULL);
 1659                 MPASS(td->td_proc->p_magic == P_MAGIC);
 1660 
 1661                 thread_lock(td);
 1662                 if (td->td_lend_user_pri > pri)
 1663                         sched_lend_user_prio(td, pri);
 1664                 else {
 1665                         thread_unlock(td);
 1666                         break;
 1667                 }
 1668                 thread_unlock(td);
 1669 
 1670                 /*
 1671                  * Pick up the lock that td is blocked on.
 1672                  */
 1673                 uq = td->td_umtxq;
 1674                 pi = uq->uq_pi_blocked;
 1675                 if (pi == NULL)
 1676                         break;
 1677                 /* Resort td on the list if needed. */
 1678                 umtx_pi_adjust_thread(pi, td);
 1679         }
 1680 }
 1681 
 1682 /*
 1683  * Unpropagate priority for a PI mutex when a thread blocked on
 1684  * it is interrupted by signal or resumed by others.
 1685  */
 1686 static void
 1687 umtx_repropagate_priority(struct umtx_pi *pi)
 1688 {
 1689         struct umtx_q *uq, *uq_owner;
 1690         struct umtx_pi *pi2;
 1691         int pri;
 1692 
 1693         mtx_assert(&umtx_lock, MA_OWNED);
 1694 
 1695         while (pi != NULL && pi->pi_owner != NULL) {
 1696                 pri = PRI_MAX;
 1697                 uq_owner = pi->pi_owner->td_umtxq;
 1698 
 1699                 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
 1700                         uq = TAILQ_FIRST(&pi2->pi_blocked);
 1701                         if (uq != NULL) {
 1702                                 if (pri > UPRI(uq->uq_thread))
 1703                                         pri = UPRI(uq->uq_thread);
 1704                         }
 1705                 }
 1706 
 1707                 if (pri > uq_owner->uq_inherited_pri)
 1708                         pri = uq_owner->uq_inherited_pri;
 1709                 thread_lock(pi->pi_owner);
 1710                 sched_lend_user_prio(pi->pi_owner, pri);
 1711                 thread_unlock(pi->pi_owner);
 1712                 if ((pi = uq_owner->uq_pi_blocked) != NULL)
 1713                         umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
 1714         }
 1715 }
 1716 
 1717 /*
 1718  * Insert a PI mutex into owned list.
 1719  */
 1720 static void
 1721 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
 1722 {
 1723         struct umtx_q *uq_owner;
 1724 
 1725         uq_owner = owner->td_umtxq;
 1726         mtx_assert(&umtx_lock, MA_OWNED);
 1727         if (pi->pi_owner != NULL)
 1728                 panic("pi_ower != NULL");
 1729         pi->pi_owner = owner;
 1730         TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
 1731 }
 1732 
 1733 /*
 1734  * Claim ownership of a PI mutex.
 1735  */
 1736 static int
 1737 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
 1738 {
 1739         struct umtx_q *uq, *uq_owner;
 1740 
 1741         uq_owner = owner->td_umtxq;
 1742         mtx_lock_spin(&umtx_lock);
 1743         if (pi->pi_owner == owner) {
 1744                 mtx_unlock_spin(&umtx_lock);
 1745                 return (0);
 1746         }
 1747 
 1748         if (pi->pi_owner != NULL) {
 1749                 /*
 1750                  * userland may have already messed the mutex, sigh.
 1751                  */
 1752                 mtx_unlock_spin(&umtx_lock);
 1753                 return (EPERM);
 1754         }
 1755         umtx_pi_setowner(pi, owner);
 1756         uq = TAILQ_FIRST(&pi->pi_blocked);
 1757         if (uq != NULL) {
 1758                 int pri;
 1759 
 1760                 pri = UPRI(uq->uq_thread);
 1761                 thread_lock(owner);
 1762                 if (pri < UPRI(owner))
 1763                         sched_lend_user_prio(owner, pri);
 1764                 thread_unlock(owner);
 1765         }
 1766         mtx_unlock_spin(&umtx_lock);
 1767         return (0);
 1768 }
 1769 
 1770 /*
 1771  * Adjust a thread's order position in its blocked PI mutex,
 1772  * this may result new priority propagating process.
 1773  */
 1774 void
 1775 umtx_pi_adjust(struct thread *td, u_char oldpri)
 1776 {
 1777         struct umtx_q *uq;
 1778         struct umtx_pi *pi;
 1779 
 1780         uq = td->td_umtxq;
 1781         mtx_lock_spin(&umtx_lock);
 1782         /*
 1783          * Pick up the lock that td is blocked on.
 1784          */
 1785         pi = uq->uq_pi_blocked;
 1786         if (pi != NULL) {
 1787                 umtx_pi_adjust_thread(pi, td);
 1788                 umtx_repropagate_priority(pi);
 1789         }
 1790         mtx_unlock_spin(&umtx_lock);
 1791 }
 1792 
 1793 /*
 1794  * Sleep on a PI mutex.
 1795  */
 1796 static int
 1797 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
 1798         uint32_t owner, const char *wmesg, struct abs_timeout *timo)
 1799 {
 1800         struct umtxq_chain *uc;
 1801         struct thread *td, *td1;
 1802         struct umtx_q *uq1;
 1803         int pri;
 1804         int error = 0;
 1805 
 1806         td = uq->uq_thread;
 1807         KASSERT(td == curthread, ("inconsistent uq_thread"));
 1808         uc = umtxq_getchain(&uq->uq_key);
 1809         UMTXQ_LOCKED_ASSERT(uc);
 1810         UMTXQ_BUSY_ASSERT(uc);
 1811         umtxq_insert(uq);
 1812         mtx_lock_spin(&umtx_lock);
 1813         if (pi->pi_owner == NULL) {
 1814                 mtx_unlock_spin(&umtx_lock);
 1815                 /* XXX Only look up thread in current process. */
 1816                 td1 = tdfind(owner, curproc->p_pid);
 1817                 mtx_lock_spin(&umtx_lock);
 1818                 if (td1 != NULL) {
 1819                         if (pi->pi_owner == NULL)
 1820                                 umtx_pi_setowner(pi, td1);
 1821                         PROC_UNLOCK(td1->td_proc);
 1822                 }
 1823         }
 1824 
 1825         TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1826                 pri = UPRI(uq1->uq_thread);
 1827                 if (pri > UPRI(td))
 1828                         break;
 1829         }
 1830 
 1831         if (uq1 != NULL)
 1832                 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1833         else
 1834                 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1835 
 1836         uq->uq_pi_blocked = pi;
 1837         thread_lock(td);
 1838         td->td_flags |= TDF_UPIBLOCKED;
 1839         thread_unlock(td);
 1840         umtx_propagate_priority(td);
 1841         mtx_unlock_spin(&umtx_lock);
 1842         umtxq_unbusy(&uq->uq_key);
 1843 
 1844         error = umtxq_sleep(uq, wmesg, timo);
 1845         umtxq_remove(uq);
 1846 
 1847         mtx_lock_spin(&umtx_lock);
 1848         uq->uq_pi_blocked = NULL;
 1849         thread_lock(td);
 1850         td->td_flags &= ~TDF_UPIBLOCKED;
 1851         thread_unlock(td);
 1852         TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1853         umtx_repropagate_priority(pi);
 1854         mtx_unlock_spin(&umtx_lock);
 1855         umtxq_unlock(&uq->uq_key);
 1856 
 1857         return (error);
 1858 }
 1859 
 1860 /*
 1861  * Add reference count for a PI mutex.
 1862  */
 1863 static void
 1864 umtx_pi_ref(struct umtx_pi *pi)
 1865 {
 1866         struct umtxq_chain *uc;
 1867 
 1868         uc = umtxq_getchain(&pi->pi_key);
 1869         UMTXQ_LOCKED_ASSERT(uc);
 1870         pi->pi_refcount++;
 1871 }
 1872 
 1873 /*
 1874  * Decrease reference count for a PI mutex, if the counter
 1875  * is decreased to zero, its memory space is freed.
 1876  */ 
 1877 static void
 1878 umtx_pi_unref(struct umtx_pi *pi)
 1879 {
 1880         struct umtxq_chain *uc;
 1881 
 1882         uc = umtxq_getchain(&pi->pi_key);
 1883         UMTXQ_LOCKED_ASSERT(uc);
 1884         KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
 1885         if (--pi->pi_refcount == 0) {
 1886                 mtx_lock_spin(&umtx_lock);
 1887                 if (pi->pi_owner != NULL) {
 1888                         TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
 1889                                 pi, pi_link);
 1890                         pi->pi_owner = NULL;
 1891                 }
 1892                 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
 1893                         ("blocked queue not empty"));
 1894                 mtx_unlock_spin(&umtx_lock);
 1895                 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
 1896                 umtx_pi_free(pi);
 1897         }
 1898 }
 1899 
 1900 /*
 1901  * Find a PI mutex in hash table.
 1902  */
 1903 static struct umtx_pi *
 1904 umtx_pi_lookup(struct umtx_key *key)
 1905 {
 1906         struct umtxq_chain *uc;
 1907         struct umtx_pi *pi;
 1908 
 1909         uc = umtxq_getchain(key);
 1910         UMTXQ_LOCKED_ASSERT(uc);
 1911 
 1912         TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
 1913                 if (umtx_key_match(&pi->pi_key, key)) {
 1914                         return (pi);
 1915                 }
 1916         }
 1917         return (NULL);
 1918 }
 1919 
 1920 /*
 1921  * Insert a PI mutex into hash table.
 1922  */
 1923 static inline void
 1924 umtx_pi_insert(struct umtx_pi *pi)
 1925 {
 1926         struct umtxq_chain *uc;
 1927 
 1928         uc = umtxq_getchain(&pi->pi_key);
 1929         UMTXQ_LOCKED_ASSERT(uc);
 1930         TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
 1931 }
 1932 
 1933 /*
 1934  * Lock a PI mutex.
 1935  */
 1936 static int
 1937 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
 1938     struct _umtx_time *timeout, int try)
 1939 {
 1940         struct abs_timeout timo;
 1941         struct umtx_q *uq;
 1942         struct umtx_pi *pi, *new_pi;
 1943         uint32_t id, owner, old;
 1944         int error;
 1945 
 1946         id = td->td_tid;
 1947         uq = td->td_umtxq;
 1948 
 1949         if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 1950             &uq->uq_key)) != 0)
 1951                 return (error);
 1952 
 1953         if (timeout != NULL)
 1954                 abs_timeout_init2(&timo, timeout);
 1955 
 1956         umtxq_lock(&uq->uq_key);
 1957         pi = umtx_pi_lookup(&uq->uq_key);
 1958         if (pi == NULL) {
 1959                 new_pi = umtx_pi_alloc(M_NOWAIT);
 1960                 if (new_pi == NULL) {
 1961                         umtxq_unlock(&uq->uq_key);
 1962                         new_pi = umtx_pi_alloc(M_WAITOK);
 1963                         umtxq_lock(&uq->uq_key);
 1964                         pi = umtx_pi_lookup(&uq->uq_key);
 1965                         if (pi != NULL) {
 1966                                 umtx_pi_free(new_pi);
 1967                                 new_pi = NULL;
 1968                         }
 1969                 }
 1970                 if (new_pi != NULL) {
 1971                         new_pi->pi_key = uq->uq_key;
 1972                         umtx_pi_insert(new_pi);
 1973                         pi = new_pi;
 1974                 }
 1975         }
 1976         umtx_pi_ref(pi);
 1977         umtxq_unlock(&uq->uq_key);
 1978 
 1979         /*
 1980          * Care must be exercised when dealing with umtx structure.  It
 1981          * can fault on any access.
 1982          */
 1983         for (;;) {
 1984                 /*
 1985                  * Try the uncontested case.  This should be done in userland.
 1986                  */
 1987                 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
 1988 
 1989                 /* The acquire succeeded. */
 1990                 if (owner == UMUTEX_UNOWNED) {
 1991                         error = 0;
 1992                         break;
 1993                 }
 1994 
 1995                 /* The address was invalid. */
 1996                 if (owner == -1) {
 1997                         error = EFAULT;
 1998                         break;
 1999                 }
 2000 
 2001                 /* If no one owns it but it is contested try to acquire it. */
 2002                 if (owner == UMUTEX_CONTESTED) {
 2003                         owner = casuword32(&m->m_owner,
 2004                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 2005 
 2006                         if (owner == UMUTEX_CONTESTED) {
 2007                                 umtxq_lock(&uq->uq_key);
 2008                                 umtxq_busy(&uq->uq_key);
 2009                                 error = umtx_pi_claim(pi, td);
 2010                                 umtxq_unbusy(&uq->uq_key);
 2011                                 umtxq_unlock(&uq->uq_key);
 2012                                 break;
 2013                         }
 2014 
 2015                         /* The address was invalid. */
 2016                         if (owner == -1) {
 2017                                 error = EFAULT;
 2018                                 break;
 2019                         }
 2020 
 2021                         error = umtxq_check_susp(td);
 2022                         if (error != 0)
 2023                                 break;
 2024 
 2025                         /* If this failed the lock has changed, restart. */
 2026                         continue;
 2027                 }
 2028 
 2029                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 2030                     (owner & ~UMUTEX_CONTESTED) == id) {
 2031                         error = EDEADLK;
 2032                         break;
 2033                 }
 2034 
 2035                 if (try != 0) {
 2036                         error = EBUSY;
 2037                         break;
 2038                 }
 2039 
 2040                 /*
 2041                  * If we caught a signal, we have retried and now
 2042                  * exit immediately.
 2043                  */
 2044                 if (error != 0)
 2045                         break;
 2046                         
 2047                 umtxq_lock(&uq->uq_key);
 2048                 umtxq_busy(&uq->uq_key);
 2049                 umtxq_unlock(&uq->uq_key);
 2050 
 2051                 /*
 2052                  * Set the contested bit so that a release in user space
 2053                  * knows to use the system call for unlock.  If this fails
 2054                  * either some one else has acquired the lock or it has been
 2055                  * released.
 2056                  */
 2057                 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
 2058 
 2059                 /* The address was invalid. */
 2060                 if (old == -1) {
 2061                         umtxq_lock(&uq->uq_key);
 2062                         umtxq_unbusy(&uq->uq_key);
 2063                         umtxq_unlock(&uq->uq_key);
 2064                         error = EFAULT;
 2065                         break;
 2066                 }
 2067 
 2068                 umtxq_lock(&uq->uq_key);
 2069                 /*
 2070                  * We set the contested bit, sleep. Otherwise the lock changed
 2071                  * and we need to retry or we lost a race to the thread
 2072                  * unlocking the umtx.
 2073                  */
 2074                 if (old == owner)
 2075                         error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
 2076                             "umtxpi", timeout == NULL ? NULL : &timo);
 2077                 else {
 2078                         umtxq_unbusy(&uq->uq_key);
 2079                         umtxq_unlock(&uq->uq_key);
 2080                 }
 2081 
 2082                 error = umtxq_check_susp(td);
 2083                 if (error != 0)
 2084                         break;
 2085         }
 2086 
 2087         umtxq_lock(&uq->uq_key);
 2088         umtx_pi_unref(pi);
 2089         umtxq_unlock(&uq->uq_key);
 2090 
 2091         umtx_key_release(&uq->uq_key);
 2092         return (error);
 2093 }
 2094 
 2095 /*
 2096  * Unlock a PI mutex.
 2097  */
 2098 static int
 2099 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
 2100 {
 2101         struct umtx_key key;
 2102         struct umtx_q *uq_first, *uq_first2, *uq_me;
 2103         struct umtx_pi *pi, *pi2;
 2104         uint32_t owner, old, id;
 2105         int error;
 2106         int count;
 2107         int pri;
 2108 
 2109         id = td->td_tid;
 2110         /*
 2111          * Make sure we own this mtx.
 2112          */
 2113         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 2114         if (owner == -1)
 2115                 return (EFAULT);
 2116 
 2117         if ((owner & ~UMUTEX_CONTESTED) != id)
 2118                 return (EPERM);
 2119 
 2120         /* This should be done in userland */
 2121         if ((owner & UMUTEX_CONTESTED) == 0) {
 2122                 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
 2123                 if (old == -1)
 2124                         return (EFAULT);
 2125                 if (old == owner)
 2126                         return (0);
 2127                 owner = old;
 2128         }
 2129 
 2130         /* We should only ever be in here for contested locks */
 2131         if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
 2132             &key)) != 0)
 2133                 return (error);
 2134 
 2135         umtxq_lock(&key);
 2136         umtxq_busy(&key);
 2137         count = umtxq_count_pi(&key, &uq_first);
 2138         if (uq_first != NULL) {
 2139                 mtx_lock_spin(&umtx_lock);
 2140                 pi = uq_first->uq_pi_blocked;
 2141                 KASSERT(pi != NULL, ("pi == NULL?"));
 2142                 if (pi->pi_owner != curthread) {
 2143                         mtx_unlock_spin(&umtx_lock);
 2144                         umtxq_unbusy(&key);
 2145                         umtxq_unlock(&key);
 2146                         umtx_key_release(&key);
 2147                         /* userland messed the mutex */
 2148                         return (EPERM);
 2149                 }
 2150                 uq_me = curthread->td_umtxq;
 2151                 pi->pi_owner = NULL;
 2152                 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
 2153                 /* get highest priority thread which is still sleeping. */
 2154                 uq_first = TAILQ_FIRST(&pi->pi_blocked);
 2155                 while (uq_first != NULL && 
 2156                        (uq_first->uq_flags & UQF_UMTXQ) == 0) {
 2157                         uq_first = TAILQ_NEXT(uq_first, uq_lockq);
 2158                 }
 2159                 pri = PRI_MAX;
 2160                 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
 2161                         uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
 2162                         if (uq_first2 != NULL) {
 2163                                 if (pri > UPRI(uq_first2->uq_thread))
 2164                                         pri = UPRI(uq_first2->uq_thread);
 2165                         }
 2166                 }
 2167                 thread_lock(curthread);
 2168                 sched_lend_user_prio(curthread, pri);
 2169                 thread_unlock(curthread);
 2170                 mtx_unlock_spin(&umtx_lock);
 2171                 if (uq_first)
 2172                         umtxq_signal_thread(uq_first);
 2173         }
 2174         umtxq_unlock(&key);
 2175 
 2176         /*
 2177          * When unlocking the umtx, it must be marked as unowned if
 2178          * there is zero or one thread only waiting for it.
 2179          * Otherwise, it must be marked as contested.
 2180          */
 2181         old = casuword32(&m->m_owner, owner,
 2182                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 2183 
 2184         umtxq_lock(&key);
 2185         umtxq_unbusy(&key);
 2186         umtxq_unlock(&key);
 2187         umtx_key_release(&key);
 2188         if (old == -1)
 2189                 return (EFAULT);
 2190         if (old != owner)
 2191                 return (EINVAL);
 2192         return (0);
 2193 }
 2194 
 2195 /*
 2196  * Lock a PP mutex.
 2197  */
 2198 static int
 2199 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
 2200     struct _umtx_time *timeout, int try)
 2201 {
 2202         struct abs_timeout timo;
 2203         struct umtx_q *uq, *uq2;
 2204         struct umtx_pi *pi;
 2205         uint32_t ceiling;
 2206         uint32_t owner, id;
 2207         int error, pri, old_inherited_pri, su;
 2208 
 2209         id = td->td_tid;
 2210         uq = td->td_umtxq;
 2211         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2212             &uq->uq_key)) != 0)
 2213                 return (error);
 2214 
 2215         if (timeout != NULL)
 2216                 abs_timeout_init2(&timo, timeout);
 2217 
 2218         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2219         for (;;) {
 2220                 old_inherited_pri = uq->uq_inherited_pri;
 2221                 umtxq_lock(&uq->uq_key);
 2222                 umtxq_busy(&uq->uq_key);
 2223                 umtxq_unlock(&uq->uq_key);
 2224 
 2225                 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
 2226                 if (ceiling > RTP_PRIO_MAX) {
 2227                         error = EINVAL;
 2228                         goto out;
 2229                 }
 2230 
 2231                 mtx_lock_spin(&umtx_lock);
 2232                 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
 2233                         mtx_unlock_spin(&umtx_lock);
 2234                         error = EINVAL;
 2235                         goto out;
 2236                 }
 2237                 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
 2238                         uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
 2239                         thread_lock(td);
 2240                         if (uq->uq_inherited_pri < UPRI(td))
 2241                                 sched_lend_user_prio(td, uq->uq_inherited_pri);
 2242                         thread_unlock(td);
 2243                 }
 2244                 mtx_unlock_spin(&umtx_lock);
 2245 
 2246                 owner = casuword32(&m->m_owner,
 2247                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 2248 
 2249                 if (owner == UMUTEX_CONTESTED) {
 2250                         error = 0;
 2251                         break;
 2252                 }
 2253 
 2254                 /* The address was invalid. */
 2255                 if (owner == -1) {
 2256                         error = EFAULT;
 2257                         break;
 2258                 }
 2259 
 2260                 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
 2261                     (owner & ~UMUTEX_CONTESTED) == id) {
 2262                         error = EDEADLK;
 2263                         break;
 2264                 }
 2265 
 2266                 if (try != 0) {
 2267                         error = EBUSY;
 2268                         break;
 2269                 }
 2270 
 2271                 /*
 2272                  * If we caught a signal, we have retried and now
 2273                  * exit immediately.
 2274                  */
 2275                 if (error != 0)
 2276                         break;
 2277 
 2278                 umtxq_lock(&uq->uq_key);
 2279                 umtxq_insert(uq);
 2280                 umtxq_unbusy(&uq->uq_key);
 2281                 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
 2282                     NULL : &timo);
 2283                 umtxq_remove(uq);
 2284                 umtxq_unlock(&uq->uq_key);
 2285 
 2286                 mtx_lock_spin(&umtx_lock);
 2287                 uq->uq_inherited_pri = old_inherited_pri;
 2288                 pri = PRI_MAX;
 2289                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2290                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2291                         if (uq2 != NULL) {
 2292                                 if (pri > UPRI(uq2->uq_thread))
 2293                                         pri = UPRI(uq2->uq_thread);
 2294                         }
 2295                 }
 2296                 if (pri > uq->uq_inherited_pri)
 2297                         pri = uq->uq_inherited_pri;
 2298                 thread_lock(td);
 2299                 sched_lend_user_prio(td, pri);
 2300                 thread_unlock(td);
 2301                 mtx_unlock_spin(&umtx_lock);
 2302         }
 2303 
 2304         if (error != 0) {
 2305                 mtx_lock_spin(&umtx_lock);
 2306                 uq->uq_inherited_pri = old_inherited_pri;
 2307                 pri = PRI_MAX;
 2308                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2309                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2310                         if (uq2 != NULL) {
 2311                                 if (pri > UPRI(uq2->uq_thread))
 2312                                         pri = UPRI(uq2->uq_thread);
 2313                         }
 2314                 }
 2315                 if (pri > uq->uq_inherited_pri)
 2316                         pri = uq->uq_inherited_pri;
 2317                 thread_lock(td);
 2318                 sched_lend_user_prio(td, pri);
 2319                 thread_unlock(td);
 2320                 mtx_unlock_spin(&umtx_lock);
 2321         }
 2322 
 2323 out:
 2324         umtxq_lock(&uq->uq_key);
 2325         umtxq_unbusy(&uq->uq_key);
 2326         umtxq_unlock(&uq->uq_key);
 2327         umtx_key_release(&uq->uq_key);
 2328         return (error);
 2329 }
 2330 
 2331 /*
 2332  * Unlock a PP mutex.
 2333  */
 2334 static int
 2335 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
 2336 {
 2337         struct umtx_key key;
 2338         struct umtx_q *uq, *uq2;
 2339         struct umtx_pi *pi;
 2340         uint32_t owner, id;
 2341         uint32_t rceiling;
 2342         int error, pri, new_inherited_pri, su;
 2343 
 2344         id = td->td_tid;
 2345         uq = td->td_umtxq;
 2346         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2347 
 2348         /*
 2349          * Make sure we own this mtx.
 2350          */
 2351         owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
 2352         if (owner == -1)
 2353                 return (EFAULT);
 2354 
 2355         if ((owner & ~UMUTEX_CONTESTED) != id)
 2356                 return (EPERM);
 2357 
 2358         error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
 2359         if (error != 0)
 2360                 return (error);
 2361 
 2362         if (rceiling == -1)
 2363                 new_inherited_pri = PRI_MAX;
 2364         else {
 2365                 rceiling = RTP_PRIO_MAX - rceiling;
 2366                 if (rceiling > RTP_PRIO_MAX)
 2367                         return (EINVAL);
 2368                 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
 2369         }
 2370 
 2371         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2372             &key)) != 0)
 2373                 return (error);
 2374         umtxq_lock(&key);
 2375         umtxq_busy(&key);
 2376         umtxq_unlock(&key);
 2377         /*
 2378          * For priority protected mutex, always set unlocked state
 2379          * to UMUTEX_CONTESTED, so that userland always enters kernel
 2380          * to lock the mutex, it is necessary because thread priority
 2381          * has to be adjusted for such mutex.
 2382          */
 2383         error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 2384                 UMUTEX_CONTESTED);
 2385 
 2386         umtxq_lock(&key);
 2387         if (error == 0)
 2388                 umtxq_signal(&key, 1);
 2389         umtxq_unbusy(&key);
 2390         umtxq_unlock(&key);
 2391 
 2392         if (error == -1)
 2393                 error = EFAULT;
 2394         else {
 2395                 mtx_lock_spin(&umtx_lock);
 2396                 if (su != 0)
 2397                         uq->uq_inherited_pri = new_inherited_pri;
 2398                 pri = PRI_MAX;
 2399                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2400                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2401                         if (uq2 != NULL) {
 2402                                 if (pri > UPRI(uq2->uq_thread))
 2403                                         pri = UPRI(uq2->uq_thread);
 2404                         }
 2405                 }
 2406                 if (pri > uq->uq_inherited_pri)
 2407                         pri = uq->uq_inherited_pri;
 2408                 thread_lock(td);
 2409                 sched_lend_user_prio(td, pri);
 2410                 thread_unlock(td);
 2411                 mtx_unlock_spin(&umtx_lock);
 2412         }
 2413         umtx_key_release(&key);
 2414         return (error);
 2415 }
 2416 
 2417 static int
 2418 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
 2419         uint32_t *old_ceiling)
 2420 {
 2421         struct umtx_q *uq;
 2422         uint32_t save_ceiling;
 2423         uint32_t owner, id;
 2424         uint32_t flags;
 2425         int error;
 2426 
 2427         flags = fuword32(&m->m_flags);
 2428         if ((flags & UMUTEX_PRIO_PROTECT) == 0)
 2429                 return (EINVAL);
 2430         if (ceiling > RTP_PRIO_MAX)
 2431                 return (EINVAL);
 2432         id = td->td_tid;
 2433         uq = td->td_umtxq;
 2434         if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
 2435            &uq->uq_key)) != 0)
 2436                 return (error);
 2437         for (;;) {
 2438                 umtxq_lock(&uq->uq_key);
 2439                 umtxq_busy(&uq->uq_key);
 2440                 umtxq_unlock(&uq->uq_key);
 2441 
 2442                 save_ceiling = fuword32(&m->m_ceilings[0]);
 2443 
 2444                 owner = casuword32(&m->m_owner,
 2445                     UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 2446 
 2447                 if (owner == UMUTEX_CONTESTED) {
 2448                         suword32(&m->m_ceilings[0], ceiling);
 2449                         suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
 2450                                 UMUTEX_CONTESTED);
 2451                         error = 0;
 2452                         break;
 2453                 }
 2454 
 2455                 /* The address was invalid. */
 2456                 if (owner == -1) {
 2457                         error = EFAULT;
 2458                         break;
 2459                 }
 2460 
 2461                 if ((owner & ~UMUTEX_CONTESTED) == id) {
 2462                         suword32(&m->m_ceilings[0], ceiling);
 2463                         error = 0;
 2464                         break;
 2465                 }
 2466 
 2467                 /*
 2468                  * If we caught a signal, we have retried and now
 2469                  * exit immediately.
 2470                  */
 2471                 if (error != 0)
 2472                         break;
 2473 
 2474                 /*
 2475                  * We set the contested bit, sleep. Otherwise the lock changed
 2476                  * and we need to retry or we lost a race to the thread
 2477                  * unlocking the umtx.
 2478                  */
 2479                 umtxq_lock(&uq->uq_key);
 2480                 umtxq_insert(uq);
 2481                 umtxq_unbusy(&uq->uq_key);
 2482                 error = umtxq_sleep(uq, "umtxpp", NULL);
 2483                 umtxq_remove(uq);
 2484                 umtxq_unlock(&uq->uq_key);
 2485         }
 2486         umtxq_lock(&uq->uq_key);
 2487         if (error == 0)
 2488                 umtxq_signal(&uq->uq_key, INT_MAX);
 2489         umtxq_unbusy(&uq->uq_key);
 2490         umtxq_unlock(&uq->uq_key);
 2491         umtx_key_release(&uq->uq_key);
 2492         if (error == 0 && old_ceiling != NULL)
 2493                 suword32(old_ceiling, save_ceiling);
 2494         return (error);
 2495 }
 2496 
 2497 /*
 2498  * Lock a userland POSIX mutex.
 2499  */
 2500 static int
 2501 do_lock_umutex(struct thread *td, struct umutex *m,
 2502     struct _umtx_time *timeout, int mode)
 2503 {
 2504         uint32_t flags;
 2505         int error;
 2506 
 2507         flags = fuword32(&m->m_flags);
 2508         if (flags == -1)
 2509                 return (EFAULT);
 2510 
 2511         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2512         case 0:
 2513                 error = do_lock_normal(td, m, flags, timeout, mode);
 2514                 break;
 2515         case UMUTEX_PRIO_INHERIT:
 2516                 error = do_lock_pi(td, m, flags, timeout, mode);
 2517                 break;
 2518         case UMUTEX_PRIO_PROTECT:
 2519                 error = do_lock_pp(td, m, flags, timeout, mode);
 2520                 break;
 2521         default:
 2522                 return (EINVAL);
 2523         }
 2524         if (timeout == NULL) {
 2525                 if (error == EINTR && mode != _UMUTEX_WAIT)
 2526                         error = ERESTART;
 2527         } else {
 2528                 /* Timed-locking is not restarted. */
 2529                 if (error == ERESTART)
 2530                         error = EINTR;
 2531         }
 2532         return (error);
 2533 }
 2534 
 2535 /*
 2536  * Unlock a userland POSIX mutex.
 2537  */
 2538 static int
 2539 do_unlock_umutex(struct thread *td, struct umutex *m)
 2540 {
 2541         uint32_t flags;
 2542 
 2543         flags = fuword32(&m->m_flags);
 2544         if (flags == -1)
 2545                 return (EFAULT);
 2546 
 2547         switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2548         case 0:
 2549                 return (do_unlock_normal(td, m, flags));
 2550         case UMUTEX_PRIO_INHERIT:
 2551                 return (do_unlock_pi(td, m, flags));
 2552         case UMUTEX_PRIO_PROTECT:
 2553                 return (do_unlock_pp(td, m, flags));
 2554         }
 2555 
 2556         return (EINVAL);
 2557 }
 2558 
 2559 static int
 2560 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
 2561         struct timespec *timeout, u_long wflags)
 2562 {
 2563         struct abs_timeout timo;
 2564         struct umtx_q *uq;
 2565         uint32_t flags;
 2566         uint32_t clockid;
 2567         int error;
 2568 
 2569         uq = td->td_umtxq;
 2570         flags = fuword32(&cv->c_flags);
 2571         error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
 2572         if (error != 0)
 2573                 return (error);
 2574 
 2575         if ((wflags & CVWAIT_CLOCKID) != 0) {
 2576                 clockid = fuword32(&cv->c_clockid);
 2577                 if (clockid < CLOCK_REALTIME ||
 2578                     clockid >= CLOCK_THREAD_CPUTIME_ID) {
 2579                         /* hmm, only HW clock id will work. */
 2580                         return (EINVAL);
 2581                 }
 2582         } else {
 2583                 clockid = CLOCK_REALTIME;
 2584         }
 2585 
 2586         umtxq_lock(&uq->uq_key);
 2587         umtxq_busy(&uq->uq_key);
 2588         umtxq_insert(uq);
 2589         umtxq_unlock(&uq->uq_key);
 2590 
 2591         /*
 2592          * Set c_has_waiters to 1 before releasing user mutex, also
 2593          * don't modify cache line when unnecessary.
 2594          */
 2595         if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0)
 2596                 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
 2597 
 2598         umtxq_lock(&uq->uq_key);
 2599         umtxq_unbusy(&uq->uq_key);
 2600         umtxq_unlock(&uq->uq_key);
 2601 
 2602         error = do_unlock_umutex(td, m);
 2603 
 2604         if (timeout != NULL)
 2605                 abs_timeout_init(&timo, clockid, ((wflags & CVWAIT_ABSTIME) != 0),
 2606                         timeout);
 2607         
 2608         umtxq_lock(&uq->uq_key);
 2609         if (error == 0) {
 2610                 error = umtxq_sleep(uq, "ucond", timeout == NULL ?
 2611                     NULL : &timo);
 2612         }
 2613 
 2614         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 2615                 error = 0;
 2616         else {
 2617                 /*
 2618                  * This must be timeout,interrupted by signal or
 2619                  * surprious wakeup, clear c_has_waiter flag when
 2620                  * necessary.
 2621                  */
 2622                 umtxq_busy(&uq->uq_key);
 2623                 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
 2624                         int oldlen = uq->uq_cur_queue->length;
 2625                         umtxq_remove(uq);
 2626                         if (oldlen == 1) {
 2627                                 umtxq_unlock(&uq->uq_key);
 2628                                 suword32(
 2629                                     __DEVOLATILE(uint32_t *,
 2630                                          &cv->c_has_waiters), 0);
 2631                                 umtxq_lock(&uq->uq_key);
 2632                         }
 2633                 }
 2634                 umtxq_unbusy(&uq->uq_key);
 2635                 if (error == ERESTART)
 2636                         error = EINTR;
 2637         }
 2638 
 2639         umtxq_unlock(&uq->uq_key);
 2640         umtx_key_release(&uq->uq_key);
 2641         return (error);
 2642 }
 2643 
 2644 /*
 2645  * Signal a userland condition variable.
 2646  */
 2647 static int
 2648 do_cv_signal(struct thread *td, struct ucond *cv)
 2649 {
 2650         struct umtx_key key;
 2651         int error, cnt, nwake;
 2652         uint32_t flags;
 2653 
 2654         flags = fuword32(&cv->c_flags);
 2655         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2656                 return (error); 
 2657         umtxq_lock(&key);
 2658         umtxq_busy(&key);
 2659         cnt = umtxq_count(&key);
 2660         nwake = umtxq_signal(&key, 1);
 2661         if (cnt <= nwake) {
 2662                 umtxq_unlock(&key);
 2663                 error = suword32(
 2664                     __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 2665                 umtxq_lock(&key);
 2666         }
 2667         umtxq_unbusy(&key);
 2668         umtxq_unlock(&key);
 2669         umtx_key_release(&key);
 2670         return (error);
 2671 }
 2672 
 2673 static int
 2674 do_cv_broadcast(struct thread *td, struct ucond *cv)
 2675 {
 2676         struct umtx_key key;
 2677         int error;
 2678         uint32_t flags;
 2679 
 2680         flags = fuword32(&cv->c_flags);
 2681         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2682                 return (error); 
 2683 
 2684         umtxq_lock(&key);
 2685         umtxq_busy(&key);
 2686         umtxq_signal(&key, INT_MAX);
 2687         umtxq_unlock(&key);
 2688 
 2689         error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
 2690 
 2691         umtxq_lock(&key);
 2692         umtxq_unbusy(&key);
 2693         umtxq_unlock(&key);
 2694 
 2695         umtx_key_release(&key);
 2696         return (error);
 2697 }
 2698 
 2699 static int
 2700 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout)
 2701 {
 2702         struct abs_timeout timo;
 2703         struct umtx_q *uq;
 2704         uint32_t flags, wrflags;
 2705         int32_t state, oldstate;
 2706         int32_t blocked_readers;
 2707         int error;
 2708 
 2709         uq = td->td_umtxq;
 2710         flags = fuword32(&rwlock->rw_flags);
 2711         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2712         if (error != 0)
 2713                 return (error);
 2714 
 2715         if (timeout != NULL)
 2716                 abs_timeout_init2(&timo, timeout);
 2717 
 2718         wrflags = URWLOCK_WRITE_OWNER;
 2719         if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
 2720                 wrflags |= URWLOCK_WRITE_WAITERS;
 2721 
 2722         for (;;) {
 2723                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2724                 /* try to lock it */
 2725                 while (!(state & wrflags)) {
 2726                         if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
 2727                                 umtx_key_release(&uq->uq_key);
 2728                                 return (EAGAIN);
 2729                         }
 2730                         oldstate = casuword32(&rwlock->rw_state, state, state + 1);
 2731                         if (oldstate == -1) {
 2732                                 umtx_key_release(&uq->uq_key);
 2733                                 return (EFAULT);
 2734                         }
 2735                         if (oldstate == state) {
 2736                                 umtx_key_release(&uq->uq_key);
 2737                                 return (0);
 2738                         }
 2739                         error = umtxq_check_susp(td);
 2740                         if (error != 0)
 2741                                 break;
 2742                         state = oldstate;
 2743                 }
 2744 
 2745                 if (error)
 2746                         break;
 2747 
 2748                 /* grab monitor lock */
 2749                 umtxq_lock(&uq->uq_key);
 2750                 umtxq_busy(&uq->uq_key);
 2751                 umtxq_unlock(&uq->uq_key);
 2752 
 2753                 /*
 2754                  * re-read the state, in case it changed between the try-lock above
 2755                  * and the check below
 2756                  */
 2757                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2758 
 2759                 /* set read contention bit */
 2760                 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
 2761                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
 2762                         if (oldstate == -1) {
 2763                                 error = EFAULT;
 2764                                 break;
 2765                         }
 2766                         if (oldstate == state)
 2767                                 goto sleep;
 2768                         state = oldstate;
 2769                         error = umtxq_check_susp(td);
 2770                         if (error != 0)
 2771                                 break;
 2772                 }
 2773                 if (error != 0) {
 2774                         umtxq_lock(&uq->uq_key);
 2775                         umtxq_unbusy(&uq->uq_key);
 2776                         umtxq_unlock(&uq->uq_key);
 2777                         break;
 2778                 }
 2779 
 2780                 /* state is changed while setting flags, restart */
 2781                 if (!(state & wrflags)) {
 2782                         umtxq_lock(&uq->uq_key);
 2783                         umtxq_unbusy(&uq->uq_key);
 2784                         umtxq_unlock(&uq->uq_key);
 2785                         error = umtxq_check_susp(td);
 2786                         if (error != 0)
 2787                                 break;
 2788                         continue;
 2789                 }
 2790 
 2791 sleep:
 2792                 /* contention bit is set, before sleeping, increase read waiter count */
 2793                 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2794                 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
 2795 
 2796                 while (state & wrflags) {
 2797                         umtxq_lock(&uq->uq_key);
 2798                         umtxq_insert(uq);
 2799                         umtxq_unbusy(&uq->uq_key);
 2800 
 2801                         error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
 2802                             NULL : &timo);
 2803 
 2804                         umtxq_busy(&uq->uq_key);
 2805                         umtxq_remove(uq);
 2806                         umtxq_unlock(&uq->uq_key);
 2807                         if (error)
 2808                                 break;
 2809                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2810                 }
 2811 
 2812                 /* decrease read waiter count, and may clear read contention bit */
 2813                 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2814                 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
 2815                 if (blocked_readers == 1) {
 2816                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2817                         for (;;) {
 2818                                 oldstate = casuword32(&rwlock->rw_state, state,
 2819                                          state & ~URWLOCK_READ_WAITERS);
 2820                                 if (oldstate == -1) {
 2821                                         error = EFAULT;
 2822                                         break;
 2823                                 }
 2824                                 if (oldstate == state)
 2825                                         break;
 2826                                 state = oldstate;
 2827                                 error = umtxq_check_susp(td);
 2828                                 if (error != 0)
 2829                                         break;
 2830                         }
 2831                 }
 2832 
 2833                 umtxq_lock(&uq->uq_key);
 2834                 umtxq_unbusy(&uq->uq_key);
 2835                 umtxq_unlock(&uq->uq_key);
 2836                 if (error != 0)
 2837                         break;
 2838         }
 2839         umtx_key_release(&uq->uq_key);
 2840         if (error == ERESTART)
 2841                 error = EINTR;
 2842         return (error);
 2843 }
 2844 
 2845 static int
 2846 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
 2847 {
 2848         struct abs_timeout timo;
 2849         struct umtx_q *uq;
 2850         uint32_t flags;
 2851         int32_t state, oldstate;
 2852         int32_t blocked_writers;
 2853         int32_t blocked_readers;
 2854         int error;
 2855 
 2856         uq = td->td_umtxq;
 2857         flags = fuword32(&rwlock->rw_flags);
 2858         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 2859         if (error != 0)
 2860                 return (error);
 2861 
 2862         if (timeout != NULL)
 2863                 abs_timeout_init2(&timo, timeout);
 2864 
 2865         blocked_readers = 0;
 2866         for (;;) {
 2867                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2868                 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 2869                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
 2870                         if (oldstate == -1) {
 2871                                 umtx_key_release(&uq->uq_key);
 2872                                 return (EFAULT);
 2873                         }
 2874                         if (oldstate == state) {
 2875                                 umtx_key_release(&uq->uq_key);
 2876                                 return (0);
 2877                         }
 2878                         state = oldstate;
 2879                         error = umtxq_check_susp(td);
 2880                         if (error != 0)
 2881                                 break;
 2882                 }
 2883 
 2884                 if (error) {
 2885                         if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
 2886                             blocked_readers != 0) {
 2887                                 umtxq_lock(&uq->uq_key);
 2888                                 umtxq_busy(&uq->uq_key);
 2889                                 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
 2890                                 umtxq_unbusy(&uq->uq_key);
 2891                                 umtxq_unlock(&uq->uq_key);
 2892                         }
 2893 
 2894                         break;
 2895                 }
 2896 
 2897                 /* grab monitor lock */
 2898                 umtxq_lock(&uq->uq_key);
 2899                 umtxq_busy(&uq->uq_key);
 2900                 umtxq_unlock(&uq->uq_key);
 2901 
 2902                 /*
 2903                  * re-read the state, in case it changed between the try-lock above
 2904                  * and the check below
 2905                  */
 2906                 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2907 
 2908                 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
 2909                        (state & URWLOCK_WRITE_WAITERS) == 0) {
 2910                         oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
 2911                         if (oldstate == -1) {
 2912                                 error = EFAULT;
 2913                                 break;
 2914                         }
 2915                         if (oldstate == state)
 2916                                 goto sleep;
 2917                         state = oldstate;
 2918                         error = umtxq_check_susp(td);
 2919                         if (error != 0)
 2920                                 break;
 2921                 }
 2922                 if (error != 0) {
 2923                         umtxq_lock(&uq->uq_key);
 2924                         umtxq_unbusy(&uq->uq_key);
 2925                         umtxq_unlock(&uq->uq_key);
 2926                         break;
 2927                 }
 2928 
 2929                 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
 2930                         umtxq_lock(&uq->uq_key);
 2931                         umtxq_unbusy(&uq->uq_key);
 2932                         umtxq_unlock(&uq->uq_key);
 2933                         error = umtxq_check_susp(td);
 2934                         if (error != 0)
 2935                                 break;
 2936                         continue;
 2937                 }
 2938 sleep:
 2939                 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 2940                 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
 2941 
 2942                 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
 2943                         umtxq_lock(&uq->uq_key);
 2944                         umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 2945                         umtxq_unbusy(&uq->uq_key);
 2946 
 2947                         error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
 2948                             NULL : &timo);
 2949 
 2950                         umtxq_busy(&uq->uq_key);
 2951                         umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 2952                         umtxq_unlock(&uq->uq_key);
 2953                         if (error)
 2954                                 break;
 2955                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2956                 }
 2957 
 2958                 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
 2959                 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
 2960                 if (blocked_writers == 1) {
 2961                         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 2962                         for (;;) {
 2963                                 oldstate = casuword32(&rwlock->rw_state, state,
 2964                                          state & ~URWLOCK_WRITE_WAITERS);
 2965                                 if (oldstate == -1) {
 2966                                         error = EFAULT;
 2967                                         break;
 2968                                 }
 2969                                 if (oldstate == state)
 2970                                         break;
 2971                                 state = oldstate;
 2972                                 error = umtxq_check_susp(td);
 2973                                 /*
 2974                                  * We are leaving the URWLOCK_WRITE_WAITERS
 2975                                  * behind, but this should not harm the
 2976                                  * correctness.
 2977                                  */
 2978                                 if (error != 0)
 2979                                         break;
 2980                         }
 2981                         blocked_readers = fuword32(&rwlock->rw_blocked_readers);
 2982                 } else
 2983                         blocked_readers = 0;
 2984 
 2985                 umtxq_lock(&uq->uq_key);
 2986                 umtxq_unbusy(&uq->uq_key);
 2987                 umtxq_unlock(&uq->uq_key);
 2988         }
 2989 
 2990         umtx_key_release(&uq->uq_key);
 2991         if (error == ERESTART)
 2992                 error = EINTR;
 2993         return (error);
 2994 }
 2995 
 2996 static int
 2997 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
 2998 {
 2999         struct umtx_q *uq;
 3000         uint32_t flags;
 3001         int32_t state, oldstate;
 3002         int error, q, count;
 3003 
 3004         uq = td->td_umtxq;
 3005         flags = fuword32(&rwlock->rw_flags);
 3006         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 3007         if (error != 0)
 3008                 return (error);
 3009 
 3010         state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
 3011         if (state & URWLOCK_WRITE_OWNER) {
 3012                 for (;;) {
 3013                         oldstate = casuword32(&rwlock->rw_state, state, 
 3014                                 state & ~URWLOCK_WRITE_OWNER);
 3015                         if (oldstate == -1) {
 3016                                 error = EFAULT;
 3017                                 goto out;
 3018                         }
 3019                         if (oldstate != state) {
 3020                                 state = oldstate;
 3021                                 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
 3022                                         error = EPERM;
 3023                                         goto out;
 3024                                 }
 3025                                 error = umtxq_check_susp(td);
 3026                                 if (error != 0)
 3027                                         goto out;
 3028                         } else
 3029                                 break;
 3030                 }
 3031         } else if (URWLOCK_READER_COUNT(state) != 0) {
 3032                 for (;;) {
 3033                         oldstate = casuword32(&rwlock->rw_state, state,
 3034                                 state - 1);
 3035                         if (oldstate == -1) {
 3036                                 error = EFAULT;
 3037                                 goto out;
 3038                         }
 3039                         if (oldstate != state) {
 3040                                 state = oldstate;
 3041                                 if (URWLOCK_READER_COUNT(oldstate) == 0) {
 3042                                         error = EPERM;
 3043                                         goto out;
 3044                                 }
 3045                                 error = umtxq_check_susp(td);
 3046                                 if (error != 0)
 3047                                         goto out;
 3048                         } else
 3049                                 break;
 3050                 }
 3051         } else {
 3052                 error = EPERM;
 3053                 goto out;
 3054         }
 3055 
 3056         count = 0;
 3057 
 3058         if (!(flags & URWLOCK_PREFER_READER)) {
 3059                 if (state & URWLOCK_WRITE_WAITERS) {
 3060                         count = 1;
 3061                         q = UMTX_EXCLUSIVE_QUEUE;
 3062                 } else if (state & URWLOCK_READ_WAITERS) {
 3063                         count = INT_MAX;
 3064                         q = UMTX_SHARED_QUEUE;
 3065                 }
 3066         } else {
 3067                 if (state & URWLOCK_READ_WAITERS) {
 3068                         count = INT_MAX;
 3069                         q = UMTX_SHARED_QUEUE;
 3070                 } else if (state & URWLOCK_WRITE_WAITERS) {
 3071                         count = 1;
 3072                         q = UMTX_EXCLUSIVE_QUEUE;
 3073                 }
 3074         }
 3075 
 3076         if (count) {
 3077                 umtxq_lock(&uq->uq_key);
 3078                 umtxq_busy(&uq->uq_key);
 3079                 umtxq_signal_queue(&uq->uq_key, count, q);
 3080                 umtxq_unbusy(&uq->uq_key);
 3081                 umtxq_unlock(&uq->uq_key);
 3082         }
 3083 out:
 3084         umtx_key_release(&uq->uq_key);
 3085         return (error);
 3086 }
 3087 
 3088 static int
 3089 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
 3090 {
 3091         struct abs_timeout timo;
 3092         struct umtx_q *uq;
 3093         uint32_t flags, count;
 3094         int error;
 3095 
 3096         uq = td->td_umtxq;
 3097         flags = fuword32(&sem->_flags);
 3098         error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
 3099         if (error != 0)
 3100                 return (error);
 3101 
 3102         if (timeout != NULL)
 3103                 abs_timeout_init2(&timo, timeout);
 3104 
 3105         umtxq_lock(&uq->uq_key);
 3106         umtxq_busy(&uq->uq_key);
 3107         umtxq_insert(uq);
 3108         umtxq_unlock(&uq->uq_key);
 3109         casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
 3110         count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
 3111         if (count != 0) {
 3112                 umtxq_lock(&uq->uq_key);
 3113                 umtxq_unbusy(&uq->uq_key);
 3114                 umtxq_remove(uq);
 3115                 umtxq_unlock(&uq->uq_key);
 3116                 umtx_key_release(&uq->uq_key);
 3117                 return (0);
 3118         }
 3119         umtxq_lock(&uq->uq_key);
 3120         umtxq_unbusy(&uq->uq_key);
 3121 
 3122         error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
 3123 
 3124         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 3125                 error = 0;
 3126         else {
 3127                 umtxq_remove(uq);
 3128                 /* A relative timeout cannot be restarted. */
 3129                 if (error == ERESTART && timeout != NULL &&
 3130                     (timeout->_flags & UMTX_ABSTIME) == 0)
 3131                         error = EINTR;
 3132         }
 3133         umtxq_unlock(&uq->uq_key);
 3134         umtx_key_release(&uq->uq_key);
 3135         return (error);
 3136 }
 3137 
 3138 /*
 3139  * Signal a userland condition variable.
 3140  */
 3141 static int
 3142 do_sem_wake(struct thread *td, struct _usem *sem)
 3143 {
 3144         struct umtx_key key;
 3145         int error, cnt;
 3146         uint32_t flags;
 3147 
 3148         flags = fuword32(&sem->_flags);
 3149         if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
 3150                 return (error); 
 3151         umtxq_lock(&key);
 3152         umtxq_busy(&key);
 3153         cnt = umtxq_count(&key);
 3154         if (cnt > 0) {
 3155                 umtxq_signal(&key, 1);
 3156                 /*
 3157                  * Check if count is greater than 0, this means the memory is
 3158                  * still being referenced by user code, so we can safely
 3159                  * update _has_waiters flag.
 3160                  */
 3161                 if (cnt == 1) {
 3162                         umtxq_unlock(&key);
 3163                         error = suword32(
 3164                             __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0);
 3165                         umtxq_lock(&key);
 3166                 }
 3167         }
 3168         umtxq_unbusy(&key);
 3169         umtxq_unlock(&key);
 3170         umtx_key_release(&key);
 3171         return (error);
 3172 }
 3173 
 3174 int
 3175 sys__umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
 3176     /* struct umtx *umtx */
 3177 {
 3178         return do_lock_umtx(td, uap->umtx, td->td_tid, 0);
 3179 }
 3180 
 3181 int
 3182 sys__umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
 3183     /* struct umtx *umtx */
 3184 {
 3185         return do_unlock_umtx(td, uap->umtx, td->td_tid);
 3186 }
 3187 
 3188 inline int
 3189 umtx_copyin_timeout(const void *addr, struct timespec *tsp)
 3190 {
 3191         int error;
 3192 
 3193         error = copyin(addr, tsp, sizeof(struct timespec));
 3194         if (error == 0) {
 3195                 if (tsp->tv_sec < 0 ||
 3196                     tsp->tv_nsec >= 1000000000 ||
 3197                     tsp->tv_nsec < 0)
 3198                         error = EINVAL;
 3199         }
 3200         return (error);
 3201 }
 3202 
 3203 static inline int
 3204 umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp)
 3205 {
 3206         int error;
 3207         
 3208         if (size <= sizeof(struct timespec)) {
 3209                 tp->_clockid = CLOCK_REALTIME;
 3210                 tp->_flags = 0;
 3211                 error = copyin(addr, &tp->_timeout, sizeof(struct timespec));
 3212         } else 
 3213                 error = copyin(addr, tp, sizeof(struct _umtx_time));
 3214         if (error != 0)
 3215                 return (error);
 3216         if (tp->_timeout.tv_sec < 0 ||
 3217             tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
 3218                 return (EINVAL);
 3219         return (0);
 3220 }
 3221 
 3222 static int
 3223 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
 3224 {
 3225         struct timespec *ts, timeout;
 3226         int error;
 3227 
 3228         /* Allow a null timespec (wait forever). */
 3229         if (uap->uaddr2 == NULL)
 3230                 ts = NULL;
 3231         else {
 3232                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3233                 if (error != 0)
 3234                         return (error);
 3235                 ts = &timeout;
 3236         }
 3237         return (do_lock_umtx(td, uap->obj, uap->val, ts));
 3238 }
 3239 
 3240 static int
 3241 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
 3242 {
 3243         return (do_unlock_umtx(td, uap->obj, uap->val));
 3244 }
 3245 
 3246 static int
 3247 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
 3248 {
 3249         struct _umtx_time timeout, *tm_p;
 3250         int error;
 3251 
 3252         if (uap->uaddr2 == NULL)
 3253                 tm_p = NULL;
 3254         else {
 3255                 error = umtx_copyin_umtx_time(
 3256                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3257                 if (error != 0)
 3258                         return (error);
 3259                 tm_p = &timeout;
 3260         }
 3261         return do_wait(td, uap->obj, uap->val, tm_p, 0, 0);
 3262 }
 3263 
 3264 static int
 3265 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
 3266 {
 3267         struct _umtx_time timeout, *tm_p;
 3268         int error;
 3269 
 3270         if (uap->uaddr2 == NULL)
 3271                 tm_p = NULL;
 3272         else {
 3273                 error = umtx_copyin_umtx_time(
 3274                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3275                 if (error != 0)
 3276                         return (error);
 3277                 tm_p = &timeout;
 3278         }
 3279         return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
 3280 }
 3281 
 3282 static int
 3283 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
 3284 {
 3285         struct _umtx_time *tm_p, timeout;
 3286         int error;
 3287 
 3288         if (uap->uaddr2 == NULL)
 3289                 tm_p = NULL;
 3290         else {
 3291                 error = umtx_copyin_umtx_time(
 3292                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3293                 if (error != 0)
 3294                         return (error);
 3295                 tm_p = &timeout;
 3296         }
 3297         return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
 3298 }
 3299 
 3300 static int
 3301 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
 3302 {
 3303         return (kern_umtx_wake(td, uap->obj, uap->val, 0));
 3304 }
 3305 
 3306 #define BATCH_SIZE      128
 3307 static int
 3308 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap)
 3309 {
 3310         int count = uap->val;
 3311         void *uaddrs[BATCH_SIZE];
 3312         char **upp = (char **)uap->obj;
 3313         int tocopy;
 3314         int error = 0;
 3315         int i, pos = 0;
 3316 
 3317         while (count > 0) {
 3318                 tocopy = count;
 3319                 if (tocopy > BATCH_SIZE)
 3320                         tocopy = BATCH_SIZE;
 3321                 error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *));
 3322                 if (error != 0)
 3323                         break;
 3324                 for (i = 0; i < tocopy; ++i)
 3325                         kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
 3326                 count -= tocopy;
 3327                 pos += tocopy;
 3328         }
 3329         return (error);
 3330 }
 3331 
 3332 static int
 3333 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
 3334 {
 3335         return (kern_umtx_wake(td, uap->obj, uap->val, 1));
 3336 }
 3337 
 3338 static int
 3339 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
 3340 {
 3341         struct _umtx_time *tm_p, timeout;
 3342         int error;
 3343 
 3344         /* Allow a null timespec (wait forever). */
 3345         if (uap->uaddr2 == NULL)
 3346                 tm_p = NULL;
 3347         else {
 3348                 error = umtx_copyin_umtx_time(
 3349                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3350                 if (error != 0)
 3351                         return (error);
 3352                 tm_p = &timeout;
 3353         }
 3354         return do_lock_umutex(td, uap->obj, tm_p, 0);
 3355 }
 3356 
 3357 static int
 3358 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
 3359 {
 3360         return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
 3361 }
 3362 
 3363 static int
 3364 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
 3365 {
 3366         struct _umtx_time *tm_p, timeout;
 3367         int error;
 3368 
 3369         /* Allow a null timespec (wait forever). */
 3370         if (uap->uaddr2 == NULL)
 3371                 tm_p = NULL;
 3372         else {
 3373                 error = umtx_copyin_umtx_time(
 3374                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3375                 if (error != 0)
 3376                         return (error);
 3377                 tm_p = &timeout;
 3378         }
 3379         return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
 3380 }
 3381 
 3382 static int
 3383 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
 3384 {
 3385         return do_wake_umutex(td, uap->obj);
 3386 }
 3387 
 3388 static int
 3389 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
 3390 {
 3391         return do_unlock_umutex(td, uap->obj);
 3392 }
 3393 
 3394 static int
 3395 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
 3396 {
 3397         return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
 3398 }
 3399 
 3400 static int
 3401 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
 3402 {
 3403         struct timespec *ts, timeout;
 3404         int error;
 3405 
 3406         /* Allow a null timespec (wait forever). */
 3407         if (uap->uaddr2 == NULL)
 3408                 ts = NULL;
 3409         else {
 3410                 error = umtx_copyin_timeout(uap->uaddr2, &timeout);
 3411                 if (error != 0)
 3412                         return (error);
 3413                 ts = &timeout;
 3414         }
 3415         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 3416 }
 3417 
 3418 static int
 3419 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
 3420 {
 3421         return do_cv_signal(td, uap->obj);
 3422 }
 3423 
 3424 static int
 3425 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
 3426 {
 3427         return do_cv_broadcast(td, uap->obj);
 3428 }
 3429 
 3430 static int
 3431 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
 3432 {
 3433         struct _umtx_time timeout;
 3434         int error;
 3435 
 3436         /* Allow a null timespec (wait forever). */
 3437         if (uap->uaddr2 == NULL) {
 3438                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 3439         } else {
 3440                 error = umtx_copyin_umtx_time(uap->uaddr2,
 3441                    (size_t)uap->uaddr1, &timeout);
 3442                 if (error != 0)
 3443                         return (error);
 3444                 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
 3445         }
 3446         return (error);
 3447 }
 3448 
 3449 static int
 3450 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
 3451 {
 3452         struct _umtx_time timeout;
 3453         int error;
 3454 
 3455         /* Allow a null timespec (wait forever). */
 3456         if (uap->uaddr2 == NULL) {
 3457                 error = do_rw_wrlock(td, uap->obj, 0);
 3458         } else {
 3459                 error = umtx_copyin_umtx_time(uap->uaddr2, 
 3460                    (size_t)uap->uaddr1, &timeout);
 3461                 if (error != 0)
 3462                         return (error);
 3463 
 3464                 error = do_rw_wrlock(td, uap->obj, &timeout);
 3465         }
 3466         return (error);
 3467 }
 3468 
 3469 static int
 3470 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
 3471 {
 3472         return do_rw_unlock(td, uap->obj);
 3473 }
 3474 
 3475 static int
 3476 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
 3477 {
 3478         struct _umtx_time *tm_p, timeout;
 3479         int error;
 3480 
 3481         /* Allow a null timespec (wait forever). */
 3482         if (uap->uaddr2 == NULL)
 3483                 tm_p = NULL;
 3484         else {
 3485                 error = umtx_copyin_umtx_time(
 3486                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3487                 if (error != 0)
 3488                         return (error);
 3489                 tm_p = &timeout;
 3490         }
 3491         return (do_sem_wait(td, uap->obj, tm_p));
 3492 }
 3493 
 3494 static int
 3495 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
 3496 {
 3497         return do_sem_wake(td, uap->obj);
 3498 }
 3499 
 3500 static int
 3501 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
 3502 {
 3503         return do_wake2_umutex(td, uap->obj, uap->val);
 3504 }
 3505 
 3506 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
 3507 
 3508 static _umtx_op_func op_table[] = {
 3509         __umtx_op_lock_umtx,            /* UMTX_OP_LOCK */
 3510         __umtx_op_unlock_umtx,          /* UMTX_OP_UNLOCK */
 3511         __umtx_op_wait,                 /* UMTX_OP_WAIT */
 3512         __umtx_op_wake,                 /* UMTX_OP_WAKE */
 3513         __umtx_op_trylock_umutex,       /* UMTX_OP_MUTEX_TRYLOCK */
 3514         __umtx_op_lock_umutex,          /* UMTX_OP_MUTEX_LOCK */
 3515         __umtx_op_unlock_umutex,        /* UMTX_OP_MUTEX_UNLOCK */
 3516         __umtx_op_set_ceiling,          /* UMTX_OP_SET_CEILING */
 3517         __umtx_op_cv_wait,              /* UMTX_OP_CV_WAIT*/
 3518         __umtx_op_cv_signal,            /* UMTX_OP_CV_SIGNAL */
 3519         __umtx_op_cv_broadcast,         /* UMTX_OP_CV_BROADCAST */
 3520         __umtx_op_wait_uint,            /* UMTX_OP_WAIT_UINT */
 3521         __umtx_op_rw_rdlock,            /* UMTX_OP_RW_RDLOCK */
 3522         __umtx_op_rw_wrlock,            /* UMTX_OP_RW_WRLOCK */
 3523         __umtx_op_rw_unlock,            /* UMTX_OP_RW_UNLOCK */
 3524         __umtx_op_wait_uint_private,    /* UMTX_OP_WAIT_UINT_PRIVATE */
 3525         __umtx_op_wake_private,         /* UMTX_OP_WAKE_PRIVATE */
 3526         __umtx_op_wait_umutex,          /* UMTX_OP_UMUTEX_WAIT */
 3527         __umtx_op_wake_umutex,          /* UMTX_OP_UMUTEX_WAKE */
 3528         __umtx_op_sem_wait,             /* UMTX_OP_SEM_WAIT */
 3529         __umtx_op_sem_wake,             /* UMTX_OP_SEM_WAKE */
 3530         __umtx_op_nwake_private,        /* UMTX_OP_NWAKE_PRIVATE */
 3531         __umtx_op_wake2_umutex          /* UMTX_OP_UMUTEX_WAKE2 */
 3532 };
 3533 
 3534 int
 3535 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
 3536 {
 3537         if ((unsigned)uap->op < UMTX_OP_MAX)
 3538                 return (*op_table[uap->op])(td, uap);
 3539         return (EINVAL);
 3540 }
 3541 
 3542 #ifdef COMPAT_FREEBSD32
 3543 int
 3544 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
 3545     /* struct umtx *umtx */
 3546 {
 3547         return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
 3548 }
 3549 
 3550 int
 3551 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
 3552     /* struct umtx *umtx */
 3553 {
 3554         return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
 3555 }
 3556 
 3557 struct timespec32 {
 3558         int32_t tv_sec;
 3559         int32_t tv_nsec;
 3560 };
 3561 
 3562 struct umtx_time32 {
 3563         struct  timespec32      timeout;
 3564         uint32_t                flags;
 3565         uint32_t                clockid;
 3566 };
 3567 
 3568 static inline int
 3569 umtx_copyin_timeout32(void *addr, struct timespec *tsp)
 3570 {
 3571         struct timespec32 ts32;
 3572         int error;
 3573 
 3574         error = copyin(addr, &ts32, sizeof(struct timespec32));
 3575         if (error == 0) {
 3576                 if (ts32.tv_sec < 0 ||
 3577                     ts32.tv_nsec >= 1000000000 ||
 3578                     ts32.tv_nsec < 0)
 3579                         error = EINVAL;
 3580                 else {
 3581                         tsp->tv_sec = ts32.tv_sec;
 3582                         tsp->tv_nsec = ts32.tv_nsec;
 3583                 }
 3584         }
 3585         return (error);
 3586 }
 3587 
 3588 static inline int
 3589 umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp)
 3590 {
 3591         struct umtx_time32 t32;
 3592         int error;
 3593         
 3594         t32.clockid = CLOCK_REALTIME;
 3595         t32.flags   = 0;
 3596         if (size <= sizeof(struct timespec32))
 3597                 error = copyin(addr, &t32.timeout, sizeof(struct timespec32));
 3598         else 
 3599                 error = copyin(addr, &t32, sizeof(struct umtx_time32));
 3600         if (error != 0)
 3601                 return (error);
 3602         if (t32.timeout.tv_sec < 0 ||
 3603             t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0)
 3604                 return (EINVAL);
 3605         tp->_timeout.tv_sec = t32.timeout.tv_sec;
 3606         tp->_timeout.tv_nsec = t32.timeout.tv_nsec;
 3607         tp->_flags = t32.flags;
 3608         tp->_clockid = t32.clockid;
 3609         return (0);
 3610 }
 3611 
 3612 static int
 3613 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 3614 {
 3615         struct timespec *ts, timeout;
 3616         int error;
 3617 
 3618         /* Allow a null timespec (wait forever). */
 3619         if (uap->uaddr2 == NULL)
 3620                 ts = NULL;
 3621         else {
 3622                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3623                 if (error != 0)
 3624                         return (error);
 3625                 ts = &timeout;
 3626         }
 3627         return (do_lock_umtx32(td, uap->obj, uap->val, ts));
 3628 }
 3629 
 3630 static int
 3631 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
 3632 {
 3633         return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
 3634 }
 3635 
 3636 static int
 3637 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3638 {
 3639         struct _umtx_time *tm_p, timeout;
 3640         int error;
 3641 
 3642         if (uap->uaddr2 == NULL)
 3643                 tm_p = NULL;
 3644         else {
 3645                 error = umtx_copyin_umtx_time32(uap->uaddr2,
 3646                         (size_t)uap->uaddr1, &timeout);
 3647                 if (error != 0)
 3648                         return (error);
 3649                 tm_p = &timeout;
 3650         }
 3651         return do_wait(td, uap->obj, uap->val, tm_p, 1, 0);
 3652 }
 3653 
 3654 static int
 3655 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 3656 {
 3657         struct _umtx_time *tm_p, timeout;
 3658         int error;
 3659 
 3660         /* Allow a null timespec (wait forever). */
 3661         if (uap->uaddr2 == NULL)
 3662                 tm_p = NULL;
 3663         else {
 3664                 error = umtx_copyin_umtx_time(uap->uaddr2,
 3665                             (size_t)uap->uaddr1, &timeout);
 3666                 if (error != 0)
 3667                         return (error);
 3668                 tm_p = &timeout;
 3669         }
 3670         return do_lock_umutex(td, uap->obj, tm_p, 0);
 3671 }
 3672 
 3673 static int
 3674 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
 3675 {
 3676         struct _umtx_time *tm_p, timeout;
 3677         int error;
 3678 
 3679         /* Allow a null timespec (wait forever). */
 3680         if (uap->uaddr2 == NULL)
 3681                 tm_p = NULL;
 3682         else {
 3683                 error = umtx_copyin_umtx_time32(uap->uaddr2, 
 3684                     (size_t)uap->uaddr1, &timeout);
 3685                 if (error != 0)
 3686                         return (error);
 3687                 tm_p = &timeout;
 3688         }
 3689         return do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT);
 3690 }
 3691 
 3692 static int
 3693 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3694 {
 3695         struct timespec *ts, timeout;
 3696         int error;
 3697 
 3698         /* Allow a null timespec (wait forever). */
 3699         if (uap->uaddr2 == NULL)
 3700                 ts = NULL;
 3701         else {
 3702                 error = umtx_copyin_timeout32(uap->uaddr2, &timeout);
 3703                 if (error != 0)
 3704                         return (error);
 3705                 ts = &timeout;
 3706         }
 3707         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 3708 }
 3709 
 3710 static int
 3711 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 3712 {
 3713         struct _umtx_time timeout;
 3714         int error;
 3715 
 3716         /* Allow a null timespec (wait forever). */
 3717         if (uap->uaddr2 == NULL) {
 3718                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 3719         } else {
 3720                 error = umtx_copyin_umtx_time32(uap->uaddr2,
 3721                     (size_t)uap->uaddr1, &timeout);
 3722                 if (error != 0)
 3723                         return (error);
 3724                 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
 3725         }
 3726         return (error);
 3727 }
 3728 
 3729 static int
 3730 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
 3731 {
 3732         struct _umtx_time timeout;
 3733         int error;
 3734 
 3735         /* Allow a null timespec (wait forever). */
 3736         if (uap->uaddr2 == NULL) {
 3737                 error = do_rw_wrlock(td, uap->obj, 0);
 3738         } else {
 3739                 error = umtx_copyin_umtx_time32(uap->uaddr2,
 3740                     (size_t)uap->uaddr1, &timeout);
 3741                 if (error != 0)
 3742                         return (error);
 3743                 error = do_rw_wrlock(td, uap->obj, &timeout);
 3744         }
 3745         return (error);
 3746 }
 3747 
 3748 static int
 3749 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
 3750 {
 3751         struct _umtx_time *tm_p, timeout;
 3752         int error;
 3753 
 3754         if (uap->uaddr2 == NULL)
 3755                 tm_p = NULL;
 3756         else {
 3757                 error = umtx_copyin_umtx_time32(
 3758                     uap->uaddr2, (size_t)uap->uaddr1,&timeout);
 3759                 if (error != 0)
 3760                         return (error);
 3761                 tm_p = &timeout;
 3762         }
 3763         return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
 3764 }
 3765 
 3766 static int
 3767 __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
 3768 {
 3769         struct _umtx_time *tm_p, timeout;
 3770         int error;
 3771 
 3772         /* Allow a null timespec (wait forever). */
 3773         if (uap->uaddr2 == NULL)
 3774                 tm_p = NULL;
 3775         else {
 3776                 error = umtx_copyin_umtx_time32(uap->uaddr2,
 3777                     (size_t)uap->uaddr1, &timeout);
 3778                 if (error != 0)
 3779                         return (error);
 3780                 tm_p = &timeout;
 3781         }
 3782         return (do_sem_wait(td, uap->obj, tm_p));
 3783 }
 3784 
 3785 static int
 3786 __umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
 3787 {
 3788         int count = uap->val;
 3789         uint32_t uaddrs[BATCH_SIZE];
 3790         uint32_t **upp = (uint32_t **)uap->obj;
 3791         int tocopy;
 3792         int error = 0;
 3793         int i, pos = 0;
 3794 
 3795         while (count > 0) {
 3796                 tocopy = count;
 3797                 if (tocopy > BATCH_SIZE)
 3798                         tocopy = BATCH_SIZE;
 3799                 error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t));
 3800                 if (error != 0)
 3801                         break;
 3802                 for (i = 0; i < tocopy; ++i)
 3803                         kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i],
 3804                                 INT_MAX, 1);
 3805                 count -= tocopy;
 3806                 pos += tocopy;
 3807         }
 3808         return (error);
 3809 }
 3810 
 3811 static _umtx_op_func op_table_compat32[] = {
 3812         __umtx_op_lock_umtx_compat32,   /* UMTX_OP_LOCK */
 3813         __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
 3814         __umtx_op_wait_compat32,        /* UMTX_OP_WAIT */
 3815         __umtx_op_wake,                 /* UMTX_OP_WAKE */
 3816         __umtx_op_trylock_umutex,       /* UMTX_OP_MUTEX_LOCK */
 3817         __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
 3818         __umtx_op_unlock_umutex,        /* UMTX_OP_MUTEX_UNLOCK */
 3819         __umtx_op_set_ceiling,          /* UMTX_OP_SET_CEILING */
 3820         __umtx_op_cv_wait_compat32,     /* UMTX_OP_CV_WAIT*/
 3821         __umtx_op_cv_signal,            /* UMTX_OP_CV_SIGNAL */
 3822         __umtx_op_cv_broadcast,         /* UMTX_OP_CV_BROADCAST */
 3823         __umtx_op_wait_compat32,        /* UMTX_OP_WAIT_UINT */
 3824         __umtx_op_rw_rdlock_compat32,   /* UMTX_OP_RW_RDLOCK */
 3825         __umtx_op_rw_wrlock_compat32,   /* UMTX_OP_RW_WRLOCK */
 3826         __umtx_op_rw_unlock,            /* UMTX_OP_RW_UNLOCK */
 3827         __umtx_op_wait_uint_private_compat32,   /* UMTX_OP_WAIT_UINT_PRIVATE */
 3828         __umtx_op_wake_private,         /* UMTX_OP_WAKE_PRIVATE */
 3829         __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
 3830         __umtx_op_wake_umutex,          /* UMTX_OP_UMUTEX_WAKE */
 3831         __umtx_op_sem_wait_compat32,    /* UMTX_OP_SEM_WAIT */
 3832         __umtx_op_sem_wake,             /* UMTX_OP_SEM_WAKE */
 3833         __umtx_op_nwake_private32,      /* UMTX_OP_NWAKE_PRIVATE */
 3834         __umtx_op_wake2_umutex          /* UMTX_OP_UMUTEX_WAKE2 */
 3835 };
 3836 
 3837 int
 3838 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
 3839 {
 3840         if ((unsigned)uap->op < UMTX_OP_MAX)
 3841                 return (*op_table_compat32[uap->op])(td,
 3842                         (struct _umtx_op_args *)uap);
 3843         return (EINVAL);
 3844 }
 3845 #endif
 3846 
 3847 void
 3848 umtx_thread_init(struct thread *td)
 3849 {
 3850         td->td_umtxq = umtxq_alloc();
 3851         td->td_umtxq->uq_thread = td;
 3852 }
 3853 
 3854 void
 3855 umtx_thread_fini(struct thread *td)
 3856 {
 3857         umtxq_free(td->td_umtxq);
 3858 }
 3859 
 3860 /*
 3861  * It will be called when new thread is created, e.g fork().
 3862  */
 3863 void
 3864 umtx_thread_alloc(struct thread *td)
 3865 {
 3866         struct umtx_q *uq;
 3867 
 3868         uq = td->td_umtxq;
 3869         uq->uq_inherited_pri = PRI_MAX;
 3870 
 3871         KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
 3872         KASSERT(uq->uq_thread == td, ("uq_thread != td"));
 3873         KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
 3874         KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
 3875 }
 3876 
 3877 /*
 3878  * exec() hook.
 3879  */
 3880 static void
 3881 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
 3882         struct image_params *imgp __unused)
 3883 {
 3884         umtx_thread_cleanup(curthread);
 3885 }
 3886 
 3887 /*
 3888  * thread_exit() hook.
 3889  */
 3890 void
 3891 umtx_thread_exit(struct thread *td)
 3892 {
 3893         umtx_thread_cleanup(td);
 3894 }
 3895 
 3896 /*
 3897  * clean up umtx data.
 3898  */
 3899 static void
 3900 umtx_thread_cleanup(struct thread *td)
 3901 {
 3902         struct umtx_q *uq;
 3903         struct umtx_pi *pi;
 3904 
 3905         if ((uq = td->td_umtxq) == NULL)
 3906                 return;
 3907 
 3908         mtx_lock_spin(&umtx_lock);
 3909         uq->uq_inherited_pri = PRI_MAX;
 3910         while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
 3911                 pi->pi_owner = NULL;
 3912                 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
 3913         }
 3914         mtx_unlock_spin(&umtx_lock);
 3915         thread_lock(td);
 3916         sched_lend_user_prio(td, PRI_MAX);
 3917         thread_unlock(td);
 3918 }

Cache object: 72b3ce646fa5b6e88f0f48ea7bf1e076


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.