The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2015, 2016 The FreeBSD Foundation
    5  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
    6  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
    7  * All rights reserved.
    8  *
    9  * Portions of this software were developed by Konstantin Belousov
   10  * under sponsorship from the FreeBSD Foundation.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice unmodified, this list of conditions, and the following
   17  *    disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   27  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include "opt_umtx_profiling.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/kernel.h>
   41 #include <sys/fcntl.h>
   42 #include <sys/file.h>
   43 #include <sys/filedesc.h>
   44 #include <sys/limits.h>
   45 #include <sys/lock.h>
   46 #include <sys/malloc.h>
   47 #include <sys/mman.h>
   48 #include <sys/mutex.h>
   49 #include <sys/priv.h>
   50 #include <sys/proc.h>
   51 #include <sys/resource.h>
   52 #include <sys/resourcevar.h>
   53 #include <sys/rwlock.h>
   54 #include <sys/sbuf.h>
   55 #include <sys/sched.h>
   56 #include <sys/smp.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/sysent.h>
   59 #include <sys/systm.h>
   60 #include <sys/sysproto.h>
   61 #include <sys/syscallsubr.h>
   62 #include <sys/taskqueue.h>
   63 #include <sys/time.h>
   64 #include <sys/eventhandler.h>
   65 #include <sys/umtx.h>
   66 
   67 #include <security/mac/mac_framework.h>
   68 
   69 #include <vm/vm.h>
   70 #include <vm/vm_param.h>
   71 #include <vm/pmap.h>
   72 #include <vm/vm_map.h>
   73 #include <vm/vm_object.h>
   74 
   75 #include <machine/atomic.h>
   76 #include <machine/cpu.h>
   77 
   78 #include <compat/freebsd32/freebsd32.h>
   79 #ifdef COMPAT_FREEBSD32
   80 #include <compat/freebsd32/freebsd32_proto.h>
   81 #endif
   82 
   83 #define _UMUTEX_TRY             1
   84 #define _UMUTEX_WAIT            2
   85 
   86 #ifdef UMTX_PROFILING
   87 #define UPROF_PERC_BIGGER(w, f, sw, sf)                                 \
   88         (((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
   89 #endif
   90 
   91 /* Priority inheritance mutex info. */
   92 struct umtx_pi {
   93         /* Owner thread */
   94         struct thread           *pi_owner;
   95 
   96         /* Reference count */
   97         int                     pi_refcount;
   98 
   99         /* List entry to link umtx holding by thread */
  100         TAILQ_ENTRY(umtx_pi)    pi_link;
  101 
  102         /* List entry in hash */
  103         TAILQ_ENTRY(umtx_pi)    pi_hashlink;
  104 
  105         /* List for waiters */
  106         TAILQ_HEAD(,umtx_q)     pi_blocked;
  107 
  108         /* Identify a userland lock object */
  109         struct umtx_key         pi_key;
  110 };
  111 
  112 /* A userland synchronous object user. */
  113 struct umtx_q {
  114         /* Linked list for the hash. */
  115         TAILQ_ENTRY(umtx_q)     uq_link;
  116 
  117         /* Umtx key. */
  118         struct umtx_key         uq_key;
  119 
  120         /* Umtx flags. */
  121         int                     uq_flags;
  122 #define UQF_UMTXQ       0x0001
  123 
  124         /* The thread waits on. */
  125         struct thread           *uq_thread;
  126 
  127         /*
  128          * Blocked on PI mutex. read can use chain lock
  129          * or umtx_lock, write must have both chain lock and
  130          * umtx_lock being hold.
  131          */
  132         struct umtx_pi          *uq_pi_blocked;
  133 
  134         /* On blocked list */
  135         TAILQ_ENTRY(umtx_q)     uq_lockq;
  136 
  137         /* Thread contending with us */
  138         TAILQ_HEAD(,umtx_pi)    uq_pi_contested;
  139 
  140         /* Inherited priority from PP mutex */
  141         u_char                  uq_inherited_pri;
  142 
  143         /* Spare queue ready to be reused */
  144         struct umtxq_queue      *uq_spare_queue;
  145 
  146         /* The queue we on */
  147         struct umtxq_queue      *uq_cur_queue;
  148 };
  149 
  150 TAILQ_HEAD(umtxq_head, umtx_q);
  151 
  152 /* Per-key wait-queue */
  153 struct umtxq_queue {
  154         struct umtxq_head       head;
  155         struct umtx_key         key;
  156         LIST_ENTRY(umtxq_queue) link;
  157         int                     length;
  158 };
  159 
  160 LIST_HEAD(umtxq_list, umtxq_queue);
  161 
  162 /* Userland lock object's wait-queue chain */
  163 struct umtxq_chain {
  164         /* Lock for this chain. */
  165         struct mtx              uc_lock;
  166 
  167         /* List of sleep queues. */
  168         struct umtxq_list       uc_queue[2];
  169 #define UMTX_SHARED_QUEUE       0
  170 #define UMTX_EXCLUSIVE_QUEUE    1
  171 
  172         LIST_HEAD(, umtxq_queue) uc_spare_queue;
  173 
  174         /* Busy flag */
  175         char                    uc_busy;
  176 
  177         /* Chain lock waiters */
  178         int                     uc_waiters;
  179 
  180         /* All PI in the list */
  181         TAILQ_HEAD(,umtx_pi)    uc_pi_list;
  182 
  183 #ifdef UMTX_PROFILING
  184         u_int                   length;
  185         u_int                   max_length;
  186 #endif
  187 };
  188 
  189 #define UMTXQ_LOCKED_ASSERT(uc)         mtx_assert(&(uc)->uc_lock, MA_OWNED)
  190 
  191 /*
  192  * Don't propagate time-sharing priority, there is a security reason,
  193  * a user can simply introduce PI-mutex, let thread A lock the mutex,
  194  * and let another thread B block on the mutex, because B is
  195  * sleeping, its priority will be boosted, this causes A's priority to
  196  * be boosted via priority propagating too and will never be lowered even
  197  * if it is using 100%CPU, this is unfair to other processes.
  198  */
  199 
  200 #define UPRI(td)        (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
  201                           (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
  202                          PRI_MAX_TIMESHARE : (td)->td_user_pri)
  203 
  204 #define GOLDEN_RATIO_PRIME      2654404609U
  205 #ifndef UMTX_CHAINS
  206 #define UMTX_CHAINS             512
  207 #endif
  208 #define UMTX_SHIFTS             (__WORD_BIT - 9)
  209 
  210 #define GET_SHARE(flags)        \
  211     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
  212 
  213 #define BUSY_SPINS              200
  214 
  215 struct abs_timeout {
  216         int clockid;
  217         bool is_abs_real;       /* TIMER_ABSTIME && CLOCK_REALTIME* */
  218         struct timespec cur;
  219         struct timespec end;
  220 };
  221 
  222 struct umtx_copyops {
  223         int     (*copyin_timeout)(const void *uaddr, struct timespec *tsp);
  224         int     (*copyin_umtx_time)(const void *uaddr, size_t size,
  225             struct _umtx_time *tp);
  226         int     (*copyin_robust_lists)(const void *uaddr, size_t size,
  227             struct umtx_robust_lists_params *rbp);
  228         int     (*copyout_timeout)(void *uaddr, size_t size,
  229             struct timespec *tsp);
  230         const size_t    timespec_sz;
  231         const size_t    umtx_time_sz;
  232         const bool      compat32;
  233 };
  234 
  235 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32");
  236 _Static_assert(__offsetof(struct umutex, m_spare[0]) ==
  237     __offsetof(struct umutex32, m_spare[0]), "m_spare32");
  238 
  239 int umtx_shm_vnobj_persistent = 0;
  240 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN,
  241     &umtx_shm_vnobj_persistent, 0,
  242     "False forces destruction of umtx attached to file, on last close");
  243 static int umtx_max_rb = 1000;
  244 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN,
  245     &umtx_max_rb, 0,
  246     "Maximum number of robust mutexes allowed for each thread");
  247 
  248 static uma_zone_t               umtx_pi_zone;
  249 static struct umtxq_chain       umtxq_chains[2][UMTX_CHAINS];
  250 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
  251 static int                      umtx_pi_allocated;
  252 
  253 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  254     "umtx debug");
  255 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
  256     &umtx_pi_allocated, 0, "Allocated umtx_pi");
  257 static int umtx_verbose_rb = 1;
  258 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN,
  259     &umtx_verbose_rb, 0,
  260     "");
  261 
  262 #ifdef UMTX_PROFILING
  263 static long max_length;
  264 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
  265 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  266     "umtx chain stats");
  267 #endif
  268 
  269 static void abs_timeout_update(struct abs_timeout *timo);
  270 
  271 static void umtx_shm_init(void);
  272 static void umtxq_sysinit(void *);
  273 static void umtxq_hash(struct umtx_key *key);
  274 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
  275 static void umtxq_unlock(struct umtx_key *key);
  276 static void umtxq_busy(struct umtx_key *key);
  277 static void umtxq_unbusy(struct umtx_key *key);
  278 static void umtxq_insert_queue(struct umtx_q *uq, int q);
  279 static void umtxq_remove_queue(struct umtx_q *uq, int q);
  280 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *);
  281 static int umtxq_count(struct umtx_key *key);
  282 static struct umtx_pi *umtx_pi_alloc(int);
  283 static void umtx_pi_free(struct umtx_pi *pi);
  284 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags,
  285     bool rb);
  286 static void umtx_thread_cleanup(struct thread *td);
  287 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
  288 
  289 #define umtxq_signal(key, nwake)        umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
  290 #define umtxq_insert(uq)        umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
  291 #define umtxq_remove(uq)        umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
  292 
  293 static struct mtx umtx_lock;
  294 
  295 #ifdef UMTX_PROFILING
  296 static void
  297 umtx_init_profiling(void)
  298 {
  299         struct sysctl_oid *chain_oid;
  300         char chain_name[10];
  301         int i;
  302 
  303         for (i = 0; i < UMTX_CHAINS; ++i) {
  304                 snprintf(chain_name, sizeof(chain_name), "%d", i);
  305                 chain_oid = SYSCTL_ADD_NODE(NULL,
  306                     SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
  307                     chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
  308                     "umtx hash stats");
  309                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  310                     "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
  311                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  312                     "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
  313         }
  314 }
  315 
  316 static int
  317 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
  318 {
  319         char buf[512];
  320         struct sbuf sb;
  321         struct umtxq_chain *uc;
  322         u_int fract, i, j, tot, whole;
  323         u_int sf0, sf1, sf2, sf3, sf4;
  324         u_int si0, si1, si2, si3, si4;
  325         u_int sw0, sw1, sw2, sw3, sw4;
  326 
  327         sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
  328         for (i = 0; i < 2; i++) {
  329                 tot = 0;
  330                 for (j = 0; j < UMTX_CHAINS; ++j) {
  331                         uc = &umtxq_chains[i][j];
  332                         mtx_lock(&uc->uc_lock);
  333                         tot += uc->max_length;
  334                         mtx_unlock(&uc->uc_lock);
  335                 }
  336                 if (tot == 0)
  337                         sbuf_printf(&sb, "%u) Empty ", i);
  338                 else {
  339                         sf0 = sf1 = sf2 = sf3 = sf4 = 0;
  340                         si0 = si1 = si2 = si3 = si4 = 0;
  341                         sw0 = sw1 = sw2 = sw3 = sw4 = 0;
  342                         for (j = 0; j < UMTX_CHAINS; j++) {
  343                                 uc = &umtxq_chains[i][j];
  344                                 mtx_lock(&uc->uc_lock);
  345                                 whole = uc->max_length * 100;
  346                                 mtx_unlock(&uc->uc_lock);
  347                                 fract = (whole % tot) * 100;
  348                                 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
  349                                         sf0 = fract;
  350                                         si0 = j;
  351                                         sw0 = whole;
  352                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw1,
  353                                     sf1)) {
  354                                         sf1 = fract;
  355                                         si1 = j;
  356                                         sw1 = whole;
  357                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw2,
  358                                     sf2)) {
  359                                         sf2 = fract;
  360                                         si2 = j;
  361                                         sw2 = whole;
  362                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw3,
  363                                     sf3)) {
  364                                         sf3 = fract;
  365                                         si3 = j;
  366                                         sw3 = whole;
  367                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw4,
  368                                     sf4)) {
  369                                         sf4 = fract;
  370                                         si4 = j;
  371                                         sw4 = whole;
  372                                 }
  373                         }
  374                         sbuf_printf(&sb, "queue %u:\n", i);
  375                         sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
  376                             sf0 / tot, si0);
  377                         sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
  378                             sf1 / tot, si1);
  379                         sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
  380                             sf2 / tot, si2);
  381                         sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
  382                             sf3 / tot, si3);
  383                         sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
  384                             sf4 / tot, si4);
  385                 }
  386         }
  387         sbuf_trim(&sb);
  388         sbuf_finish(&sb);
  389         sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
  390         sbuf_delete(&sb);
  391         return (0);
  392 }
  393 
  394 static int
  395 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
  396 {
  397         struct umtxq_chain *uc;
  398         u_int i, j;
  399         int clear, error;
  400 
  401         clear = 0;
  402         error = sysctl_handle_int(oidp, &clear, 0, req);
  403         if (error != 0 || req->newptr == NULL)
  404                 return (error);
  405 
  406         if (clear != 0) {
  407                 for (i = 0; i < 2; ++i) {
  408                         for (j = 0; j < UMTX_CHAINS; ++j) {
  409                                 uc = &umtxq_chains[i][j];
  410                                 mtx_lock(&uc->uc_lock);
  411                                 uc->length = 0;
  412                                 uc->max_length = 0;
  413                                 mtx_unlock(&uc->uc_lock);
  414                         }
  415                 }
  416         }
  417         return (0);
  418 }
  419 
  420 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
  421     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
  422     sysctl_debug_umtx_chains_clear, "I",
  423     "Clear umtx chains statistics");
  424 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
  425     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
  426     sysctl_debug_umtx_chains_peaks, "A",
  427     "Highest peaks in chains max length");
  428 #endif
  429 
  430 static void
  431 umtxq_sysinit(void *arg __unused)
  432 {
  433         int i, j;
  434 
  435         umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
  436                 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  437         for (i = 0; i < 2; ++i) {
  438                 for (j = 0; j < UMTX_CHAINS; ++j) {
  439                         mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
  440                                  MTX_DEF | MTX_DUPOK);
  441                         LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
  442                         LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
  443                         LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
  444                         TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
  445                         umtxq_chains[i][j].uc_busy = 0;
  446                         umtxq_chains[i][j].uc_waiters = 0;
  447 #ifdef UMTX_PROFILING
  448                         umtxq_chains[i][j].length = 0;
  449                         umtxq_chains[i][j].max_length = 0;
  450 #endif
  451                 }
  452         }
  453 #ifdef UMTX_PROFILING
  454         umtx_init_profiling();
  455 #endif
  456         mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF);
  457         umtx_shm_init();
  458 }
  459 
  460 struct umtx_q *
  461 umtxq_alloc(void)
  462 {
  463         struct umtx_q *uq;
  464 
  465         uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
  466         uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX,
  467             M_WAITOK | M_ZERO);
  468         TAILQ_INIT(&uq->uq_spare_queue->head);
  469         TAILQ_INIT(&uq->uq_pi_contested);
  470         uq->uq_inherited_pri = PRI_MAX;
  471         return (uq);
  472 }
  473 
  474 void
  475 umtxq_free(struct umtx_q *uq)
  476 {
  477 
  478         MPASS(uq->uq_spare_queue != NULL);
  479         free(uq->uq_spare_queue, M_UMTX);
  480         free(uq, M_UMTX);
  481 }
  482 
  483 static inline void
  484 umtxq_hash(struct umtx_key *key)
  485 {
  486         unsigned n;
  487 
  488         n = (uintptr_t)key->info.both.a + key->info.both.b;
  489         key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
  490 }
  491 
  492 static inline struct umtxq_chain *
  493 umtxq_getchain(struct umtx_key *key)
  494 {
  495 
  496         if (key->type <= TYPE_SEM)
  497                 return (&umtxq_chains[1][key->hash]);
  498         return (&umtxq_chains[0][key->hash]);
  499 }
  500 
  501 /*
  502  * Lock a chain.
  503  *
  504  * The code is a macro so that file/line information is taken from the caller.
  505  */
  506 #define umtxq_lock(key) do {            \
  507         struct umtx_key *_key = (key);  \
  508         struct umtxq_chain *_uc;        \
  509                                         \
  510         _uc = umtxq_getchain(_key);     \
  511         mtx_lock(&_uc->uc_lock);        \
  512 } while (0)
  513 
  514 /*
  515  * Unlock a chain.
  516  */
  517 static inline void
  518 umtxq_unlock(struct umtx_key *key)
  519 {
  520         struct umtxq_chain *uc;
  521 
  522         uc = umtxq_getchain(key);
  523         mtx_unlock(&uc->uc_lock);
  524 }
  525 
  526 /*
  527  * Set chain to busy state when following operation
  528  * may be blocked (kernel mutex can not be used).
  529  */
  530 static inline void
  531 umtxq_busy(struct umtx_key *key)
  532 {
  533         struct umtxq_chain *uc;
  534 
  535         uc = umtxq_getchain(key);
  536         mtx_assert(&uc->uc_lock, MA_OWNED);
  537         if (uc->uc_busy) {
  538 #ifdef SMP
  539                 if (smp_cpus > 1) {
  540                         int count = BUSY_SPINS;
  541                         if (count > 0) {
  542                                 umtxq_unlock(key);
  543                                 while (uc->uc_busy && --count > 0)
  544                                         cpu_spinwait();
  545                                 umtxq_lock(key);
  546                         }
  547                 }
  548 #endif
  549                 while (uc->uc_busy) {
  550                         uc->uc_waiters++;
  551                         msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
  552                         uc->uc_waiters--;
  553                 }
  554         }
  555         uc->uc_busy = 1;
  556 }
  557 
  558 /*
  559  * Unbusy a chain.
  560  */
  561 static inline void
  562 umtxq_unbusy(struct umtx_key *key)
  563 {
  564         struct umtxq_chain *uc;
  565 
  566         uc = umtxq_getchain(key);
  567         mtx_assert(&uc->uc_lock, MA_OWNED);
  568         KASSERT(uc->uc_busy != 0, ("not busy"));
  569         uc->uc_busy = 0;
  570         if (uc->uc_waiters)
  571                 wakeup_one(uc);
  572 }
  573 
  574 static inline void
  575 umtxq_unbusy_unlocked(struct umtx_key *key)
  576 {
  577 
  578         umtxq_lock(key);
  579         umtxq_unbusy(key);
  580         umtxq_unlock(key);
  581 }
  582 
  583 static struct umtxq_queue *
  584 umtxq_queue_lookup(struct umtx_key *key, int q)
  585 {
  586         struct umtxq_queue *uh;
  587         struct umtxq_chain *uc;
  588 
  589         uc = umtxq_getchain(key);
  590         UMTXQ_LOCKED_ASSERT(uc);
  591         LIST_FOREACH(uh, &uc->uc_queue[q], link) {
  592                 if (umtx_key_match(&uh->key, key))
  593                         return (uh);
  594         }
  595 
  596         return (NULL);
  597 }
  598 
  599 static inline void
  600 umtxq_insert_queue(struct umtx_q *uq, int q)
  601 {
  602         struct umtxq_queue *uh;
  603         struct umtxq_chain *uc;
  604 
  605         uc = umtxq_getchain(&uq->uq_key);
  606         UMTXQ_LOCKED_ASSERT(uc);
  607         KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
  608         uh = umtxq_queue_lookup(&uq->uq_key, q);
  609         if (uh != NULL) {
  610                 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
  611         } else {
  612                 uh = uq->uq_spare_queue;
  613                 uh->key = uq->uq_key;
  614                 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
  615 #ifdef UMTX_PROFILING
  616                 uc->length++;
  617                 if (uc->length > uc->max_length) {
  618                         uc->max_length = uc->length;
  619                         if (uc->max_length > max_length)
  620                                 max_length = uc->max_length;
  621                 }
  622 #endif
  623         }
  624         uq->uq_spare_queue = NULL;
  625 
  626         TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
  627         uh->length++;
  628         uq->uq_flags |= UQF_UMTXQ;
  629         uq->uq_cur_queue = uh;
  630         return;
  631 }
  632 
  633 static inline void
  634 umtxq_remove_queue(struct umtx_q *uq, int q)
  635 {
  636         struct umtxq_chain *uc;
  637         struct umtxq_queue *uh;
  638 
  639         uc = umtxq_getchain(&uq->uq_key);
  640         UMTXQ_LOCKED_ASSERT(uc);
  641         if (uq->uq_flags & UQF_UMTXQ) {
  642                 uh = uq->uq_cur_queue;
  643                 TAILQ_REMOVE(&uh->head, uq, uq_link);
  644                 uh->length--;
  645                 uq->uq_flags &= ~UQF_UMTXQ;
  646                 if (TAILQ_EMPTY(&uh->head)) {
  647                         KASSERT(uh->length == 0,
  648                             ("inconsistent umtxq_queue length"));
  649 #ifdef UMTX_PROFILING
  650                         uc->length--;
  651 #endif
  652                         LIST_REMOVE(uh, link);
  653                 } else {
  654                         uh = LIST_FIRST(&uc->uc_spare_queue);
  655                         KASSERT(uh != NULL, ("uc_spare_queue is empty"));
  656                         LIST_REMOVE(uh, link);
  657                 }
  658                 uq->uq_spare_queue = uh;
  659                 uq->uq_cur_queue = NULL;
  660         }
  661 }
  662 
  663 /*
  664  * Check if there are multiple waiters
  665  */
  666 static int
  667 umtxq_count(struct umtx_key *key)
  668 {
  669         struct umtxq_queue *uh;
  670 
  671         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  672         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  673         if (uh != NULL)
  674                 return (uh->length);
  675         return (0);
  676 }
  677 
  678 /*
  679  * Check if there are multiple PI waiters and returns first
  680  * waiter.
  681  */
  682 static int
  683 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
  684 {
  685         struct umtxq_queue *uh;
  686 
  687         *first = NULL;
  688         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  689         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  690         if (uh != NULL) {
  691                 *first = TAILQ_FIRST(&uh->head);
  692                 return (uh->length);
  693         }
  694         return (0);
  695 }
  696 
  697 /*
  698  * Wake up threads waiting on an userland object.
  699  */
  700 
  701 static int
  702 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
  703 {
  704         struct umtxq_queue *uh;
  705         struct umtx_q *uq;
  706         int ret;
  707 
  708         ret = 0;
  709         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  710         uh = umtxq_queue_lookup(key, q);
  711         if (uh != NULL) {
  712                 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
  713                         umtxq_remove_queue(uq, q);
  714                         wakeup(uq);
  715                         if (++ret >= n_wake)
  716                                 return (ret);
  717                 }
  718         }
  719         return (ret);
  720 }
  721 
  722 /*
  723  * Wake up specified thread.
  724  */
  725 static inline void
  726 umtxq_signal_thread(struct umtx_q *uq)
  727 {
  728 
  729         UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key));
  730         umtxq_remove(uq);
  731         wakeup(uq);
  732 }
  733 
  734 static inline int
  735 tstohz(const struct timespec *tsp)
  736 {
  737         struct timeval tv;
  738 
  739         TIMESPEC_TO_TIMEVAL(&tv, tsp);
  740         return tvtohz(&tv);
  741 }
  742 
  743 static void
  744 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute,
  745         const struct timespec *timeout)
  746 {
  747 
  748         timo->clockid = clockid;
  749         if (!absolute) {
  750                 timo->is_abs_real = false;
  751                 abs_timeout_update(timo);
  752                 timespecadd(&timo->cur, timeout, &timo->end);
  753         } else {
  754                 timo->end = *timeout;
  755                 timo->is_abs_real = clockid == CLOCK_REALTIME ||
  756                     clockid == CLOCK_REALTIME_FAST ||
  757                     clockid == CLOCK_REALTIME_PRECISE;
  758                 /*
  759                  * If is_abs_real, umtxq_sleep will read the clock
  760                  * after setting td_rtcgen; otherwise, read it here.
  761                  */
  762                 if (!timo->is_abs_real) {
  763                         abs_timeout_update(timo);
  764                 }
  765         }
  766 }
  767 
  768 static void
  769 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime)
  770 {
  771 
  772         abs_timeout_init(timo, umtxtime->_clockid,
  773             (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout);
  774 }
  775 
  776 static inline void
  777 abs_timeout_update(struct abs_timeout *timo)
  778 {
  779 
  780         kern_clock_gettime(curthread, timo->clockid, &timo->cur);
  781 }
  782 
  783 static int
  784 abs_timeout_gethz(struct abs_timeout *timo)
  785 {
  786         struct timespec tts;
  787 
  788         if (timespeccmp(&timo->end, &timo->cur, <=))
  789                 return (-1);
  790         timespecsub(&timo->end, &timo->cur, &tts);
  791         return (tstohz(&tts));
  792 }
  793 
  794 static uint32_t
  795 umtx_unlock_val(uint32_t flags, bool rb)
  796 {
  797 
  798         if (rb)
  799                 return (UMUTEX_RB_OWNERDEAD);
  800         else if ((flags & UMUTEX_NONCONSISTENT) != 0)
  801                 return (UMUTEX_RB_NOTRECOV);
  802         else
  803                 return (UMUTEX_UNOWNED);
  804 
  805 }
  806 
  807 /*
  808  * Put thread into sleep state, before sleeping, check if
  809  * thread was removed from umtx queue.
  810  */
  811 static inline int
  812 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime)
  813 {
  814         struct umtxq_chain *uc;
  815         int error, timo;
  816 
  817         if (abstime != NULL && abstime->is_abs_real) {
  818                 curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation);
  819                 abs_timeout_update(abstime);
  820         }
  821 
  822         uc = umtxq_getchain(&uq->uq_key);
  823         UMTXQ_LOCKED_ASSERT(uc);
  824         for (;;) {
  825                 if (!(uq->uq_flags & UQF_UMTXQ)) {
  826                         error = 0;
  827                         break;
  828                 }
  829                 if (abstime != NULL) {
  830                         timo = abs_timeout_gethz(abstime);
  831                         if (timo < 0) {
  832                                 error = ETIMEDOUT;
  833                                 break;
  834                         }
  835                 } else
  836                         timo = 0;
  837                 error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo);
  838                 if (error == EINTR || error == ERESTART) {
  839                         umtxq_lock(&uq->uq_key);
  840                         break;
  841                 }
  842                 if (abstime != NULL) {
  843                         if (abstime->is_abs_real)
  844                                 curthread->td_rtcgen =
  845                                     atomic_load_acq_int(&rtc_generation);
  846                         abs_timeout_update(abstime);
  847                 }
  848                 umtxq_lock(&uq->uq_key);
  849         }
  850 
  851         curthread->td_rtcgen = 0;
  852         return (error);
  853 }
  854 
  855 /*
  856  * Convert userspace address into unique logical address.
  857  */
  858 int
  859 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key)
  860 {
  861         struct thread *td = curthread;
  862         vm_map_t map;
  863         vm_map_entry_t entry;
  864         vm_pindex_t pindex;
  865         vm_prot_t prot;
  866         boolean_t wired;
  867 
  868         key->type = type;
  869         if (share == THREAD_SHARE) {
  870                 key->shared = 0;
  871                 key->info.private.vs = td->td_proc->p_vmspace;
  872                 key->info.private.addr = (uintptr_t)addr;
  873         } else {
  874                 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
  875                 map = &td->td_proc->p_vmspace->vm_map;
  876                 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
  877                     &entry, &key->info.shared.object, &pindex, &prot,
  878                     &wired) != KERN_SUCCESS) {
  879                         return (EFAULT);
  880                 }
  881 
  882                 if ((share == PROCESS_SHARE) ||
  883                     (share == AUTO_SHARE &&
  884                      VM_INHERIT_SHARE == entry->inheritance)) {
  885                         key->shared = 1;
  886                         key->info.shared.offset = (vm_offset_t)addr -
  887                             entry->start + entry->offset;
  888                         vm_object_reference(key->info.shared.object);
  889                 } else {
  890                         key->shared = 0;
  891                         key->info.private.vs = td->td_proc->p_vmspace;
  892                         key->info.private.addr = (uintptr_t)addr;
  893                 }
  894                 vm_map_lookup_done(map, entry);
  895         }
  896 
  897         umtxq_hash(key);
  898         return (0);
  899 }
  900 
  901 /*
  902  * Release key.
  903  */
  904 void
  905 umtx_key_release(struct umtx_key *key)
  906 {
  907         if (key->shared)
  908                 vm_object_deallocate(key->info.shared.object);
  909 }
  910 
  911 #ifdef COMPAT_FREEBSD10
  912 /*
  913  * Lock a umtx object.
  914  */
  915 static int
  916 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
  917     const struct timespec *timeout)
  918 {
  919         struct abs_timeout timo;
  920         struct umtx_q *uq;
  921         u_long owner;
  922         u_long old;
  923         int error = 0;
  924 
  925         uq = td->td_umtxq;
  926         if (timeout != NULL)
  927                 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
  928 
  929         /*
  930          * Care must be exercised when dealing with umtx structure. It
  931          * can fault on any access.
  932          */
  933         for (;;) {
  934                 /*
  935                  * Try the uncontested case.  This should be done in userland.
  936                  */
  937                 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
  938 
  939                 /* The acquire succeeded. */
  940                 if (owner == UMTX_UNOWNED)
  941                         return (0);
  942 
  943                 /* The address was invalid. */
  944                 if (owner == -1)
  945                         return (EFAULT);
  946 
  947                 /* If no one owns it but it is contested try to acquire it. */
  948                 if (owner == UMTX_CONTESTED) {
  949                         owner = casuword(&umtx->u_owner,
  950                             UMTX_CONTESTED, id | UMTX_CONTESTED);
  951 
  952                         if (owner == UMTX_CONTESTED)
  953                                 return (0);
  954 
  955                         /* The address was invalid. */
  956                         if (owner == -1)
  957                                 return (EFAULT);
  958 
  959                         error = thread_check_susp(td, false);
  960                         if (error != 0)
  961                                 break;
  962 
  963                         /* If this failed the lock has changed, restart. */
  964                         continue;
  965                 }
  966 
  967                 /*
  968                  * If we caught a signal, we have retried and now
  969                  * exit immediately.
  970                  */
  971                 if (error != 0)
  972                         break;
  973 
  974                 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
  975                         AUTO_SHARE, &uq->uq_key)) != 0)
  976                         return (error);
  977 
  978                 umtxq_lock(&uq->uq_key);
  979                 umtxq_busy(&uq->uq_key);
  980                 umtxq_insert(uq);
  981                 umtxq_unbusy(&uq->uq_key);
  982                 umtxq_unlock(&uq->uq_key);
  983 
  984                 /*
  985                  * Set the contested bit so that a release in user space
  986                  * knows to use the system call for unlock.  If this fails
  987                  * either some one else has acquired the lock or it has been
  988                  * released.
  989                  */
  990                 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
  991 
  992                 /* The address was invalid. */
  993                 if (old == -1) {
  994                         umtxq_lock(&uq->uq_key);
  995                         umtxq_remove(uq);
  996                         umtxq_unlock(&uq->uq_key);
  997                         umtx_key_release(&uq->uq_key);
  998                         return (EFAULT);
  999                 }
 1000 
 1001                 /*
 1002                  * We set the contested bit, sleep. Otherwise the lock changed
 1003                  * and we need to retry or we lost a race to the thread
 1004                  * unlocking the umtx.
 1005                  */
 1006                 umtxq_lock(&uq->uq_key);
 1007                 if (old == owner)
 1008                         error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL :
 1009                             &timo);
 1010                 umtxq_remove(uq);
 1011                 umtxq_unlock(&uq->uq_key);
 1012                 umtx_key_release(&uq->uq_key);
 1013 
 1014                 if (error == 0)
 1015                         error = thread_check_susp(td, false);
 1016         }
 1017 
 1018         if (timeout == NULL) {
 1019                 /* Mutex locking is restarted if it is interrupted. */
 1020                 if (error == EINTR)
 1021                         error = ERESTART;
 1022         } else {
 1023                 /* Timed-locking is not restarted. */
 1024                 if (error == ERESTART)
 1025                         error = EINTR;
 1026         }
 1027         return (error);
 1028 }
 1029 
 1030 /*
 1031  * Unlock a umtx object.
 1032  */
 1033 static int
 1034 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
 1035 {
 1036         struct umtx_key key;
 1037         u_long owner;
 1038         u_long old;
 1039         int error;
 1040         int count;
 1041 
 1042         /*
 1043          * Make sure we own this mtx.
 1044          */
 1045         owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
 1046         if (owner == -1)
 1047                 return (EFAULT);
 1048 
 1049         if ((owner & ~UMTX_CONTESTED) != id)
 1050                 return (EPERM);
 1051 
 1052         /* This should be done in userland */
 1053         if ((owner & UMTX_CONTESTED) == 0) {
 1054                 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
 1055                 if (old == -1)
 1056                         return (EFAULT);
 1057                 if (old == owner)
 1058                         return (0);
 1059                 owner = old;
 1060         }
 1061 
 1062         /* We should only ever be in here for contested locks */
 1063         if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
 1064             &key)) != 0)
 1065                 return (error);
 1066 
 1067         umtxq_lock(&key);
 1068         umtxq_busy(&key);
 1069         count = umtxq_count(&key);
 1070         umtxq_unlock(&key);
 1071 
 1072         /*
 1073          * When unlocking the umtx, it must be marked as unowned if
 1074          * there is zero or one thread only waiting for it.
 1075          * Otherwise, it must be marked as contested.
 1076          */
 1077         old = casuword(&umtx->u_owner, owner,
 1078             count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
 1079         umtxq_lock(&key);
 1080         umtxq_signal(&key,1);
 1081         umtxq_unbusy(&key);
 1082         umtxq_unlock(&key);
 1083         umtx_key_release(&key);
 1084         if (old == -1)
 1085                 return (EFAULT);
 1086         if (old != owner)
 1087                 return (EINVAL);
 1088         return (0);
 1089 }
 1090 
 1091 #ifdef COMPAT_FREEBSD32
 1092 
 1093 /*
 1094  * Lock a umtx object.
 1095  */
 1096 static int
 1097 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id,
 1098         const struct timespec *timeout)
 1099 {
 1100         struct abs_timeout timo;
 1101         struct umtx_q *uq;
 1102         uint32_t owner;
 1103         uint32_t old;
 1104         int error = 0;
 1105 
 1106         uq = td->td_umtxq;
 1107 
 1108         if (timeout != NULL)
 1109                 abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
 1110 
 1111         /*
 1112          * Care must be exercised when dealing with umtx structure. It
 1113          * can fault on any access.
 1114          */
 1115         for (;;) {
 1116                 /*
 1117                  * Try the uncontested case.  This should be done in userland.
 1118                  */
 1119                 owner = casuword32(m, UMUTEX_UNOWNED, id);
 1120 
 1121                 /* The acquire succeeded. */
 1122                 if (owner == UMUTEX_UNOWNED)
 1123                         return (0);
 1124 
 1125                 /* The address was invalid. */
 1126                 if (owner == -1)
 1127                         return (EFAULT);
 1128 
 1129                 /* If no one owns it but it is contested try to acquire it. */
 1130                 if (owner == UMUTEX_CONTESTED) {
 1131                         owner = casuword32(m,
 1132                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1133                         if (owner == UMUTEX_CONTESTED)
 1134                                 return (0);
 1135 
 1136                         /* The address was invalid. */
 1137                         if (owner == -1)
 1138                                 return (EFAULT);
 1139 
 1140                         error = thread_check_susp(td, false);
 1141                         if (error != 0)
 1142                                 break;
 1143 
 1144                         /* If this failed the lock has changed, restart. */
 1145                         continue;
 1146                 }
 1147 
 1148                 /*
 1149                  * If we caught a signal, we have retried and now
 1150                  * exit immediately.
 1151                  */
 1152                 if (error != 0)
 1153                         return (error);
 1154 
 1155                 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
 1156                         AUTO_SHARE, &uq->uq_key)) != 0)
 1157                         return (error);
 1158 
 1159                 umtxq_lock(&uq->uq_key);
 1160                 umtxq_busy(&uq->uq_key);
 1161                 umtxq_insert(uq);
 1162                 umtxq_unbusy(&uq->uq_key);
 1163                 umtxq_unlock(&uq->uq_key);
 1164 
 1165                 /*
 1166                  * Set the contested bit so that a release in user space
 1167                  * knows to use the system call for unlock.  If this fails
 1168                  * either some one else has acquired the lock or it has been
 1169                  * released.
 1170                  */
 1171                 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
 1172 
 1173                 /* The address was invalid. */
 1174                 if (old == -1) {
 1175                         umtxq_lock(&uq->uq_key);
 1176                         umtxq_remove(uq);
 1177                         umtxq_unlock(&uq->uq_key);
 1178                         umtx_key_release(&uq->uq_key);
 1179                         return (EFAULT);
 1180                 }
 1181 
 1182                 /*
 1183                  * We set the contested bit, sleep. Otherwise the lock changed
 1184                  * and we need to retry or we lost a race to the thread
 1185                  * unlocking the umtx.
 1186                  */
 1187                 umtxq_lock(&uq->uq_key);
 1188                 if (old == owner)
 1189                         error = umtxq_sleep(uq, "umtx", timeout == NULL ?
 1190                             NULL : &timo);
 1191                 umtxq_remove(uq);
 1192                 umtxq_unlock(&uq->uq_key);
 1193                 umtx_key_release(&uq->uq_key);
 1194 
 1195                 if (error == 0)
 1196                         error = thread_check_susp(td, false);
 1197         }
 1198 
 1199         if (timeout == NULL) {
 1200                 /* Mutex locking is restarted if it is interrupted. */
 1201                 if (error == EINTR)
 1202                         error = ERESTART;
 1203         } else {
 1204                 /* Timed-locking is not restarted. */
 1205                 if (error == ERESTART)
 1206                         error = EINTR;
 1207         }
 1208         return (error);
 1209 }
 1210 
 1211 /*
 1212  * Unlock a umtx object.
 1213  */
 1214 static int
 1215 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
 1216 {
 1217         struct umtx_key key;
 1218         uint32_t owner;
 1219         uint32_t old;
 1220         int error;
 1221         int count;
 1222 
 1223         /*
 1224          * Make sure we own this mtx.
 1225          */
 1226         owner = fuword32(m);
 1227         if (owner == -1)
 1228                 return (EFAULT);
 1229 
 1230         if ((owner & ~UMUTEX_CONTESTED) != id)
 1231                 return (EPERM);
 1232 
 1233         /* This should be done in userland */
 1234         if ((owner & UMUTEX_CONTESTED) == 0) {
 1235                 old = casuword32(m, owner, UMUTEX_UNOWNED);
 1236                 if (old == -1)
 1237                         return (EFAULT);
 1238                 if (old == owner)
 1239                         return (0);
 1240                 owner = old;
 1241         }
 1242 
 1243         /* We should only ever be in here for contested locks */
 1244         if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
 1245                 &key)) != 0)
 1246                 return (error);
 1247 
 1248         umtxq_lock(&key);
 1249         umtxq_busy(&key);
 1250         count = umtxq_count(&key);
 1251         umtxq_unlock(&key);
 1252 
 1253         /*
 1254          * When unlocking the umtx, it must be marked as unowned if
 1255          * there is zero or one thread only waiting for it.
 1256          * Otherwise, it must be marked as contested.
 1257          */
 1258         old = casuword32(m, owner,
 1259                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1260         umtxq_lock(&key);
 1261         umtxq_signal(&key,1);
 1262         umtxq_unbusy(&key);
 1263         umtxq_unlock(&key);
 1264         umtx_key_release(&key);
 1265         if (old == -1)
 1266                 return (EFAULT);
 1267         if (old != owner)
 1268                 return (EINVAL);
 1269         return (0);
 1270 }
 1271 #endif  /* COMPAT_FREEBSD32 */
 1272 #endif  /* COMPAT_FREEBSD10 */
 1273 
 1274 /*
 1275  * Fetch and compare value, sleep on the address if value is not changed.
 1276  */
 1277 static int
 1278 do_wait(struct thread *td, void *addr, u_long id,
 1279     struct _umtx_time *timeout, int compat32, int is_private)
 1280 {
 1281         struct abs_timeout timo;
 1282         struct umtx_q *uq;
 1283         u_long tmp;
 1284         uint32_t tmp32;
 1285         int error = 0;
 1286 
 1287         uq = td->td_umtxq;
 1288         if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
 1289                 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
 1290                 return (error);
 1291 
 1292         if (timeout != NULL)
 1293                 abs_timeout_init2(&timo, timeout);
 1294 
 1295         umtxq_lock(&uq->uq_key);
 1296         umtxq_insert(uq);
 1297         umtxq_unlock(&uq->uq_key);
 1298         if (compat32 == 0) {
 1299                 error = fueword(addr, &tmp);
 1300                 if (error != 0)
 1301                         error = EFAULT;
 1302         } else {
 1303                 error = fueword32(addr, &tmp32);
 1304                 if (error == 0)
 1305                         tmp = tmp32;
 1306                 else
 1307                         error = EFAULT;
 1308         }
 1309         umtxq_lock(&uq->uq_key);
 1310         if (error == 0) {
 1311                 if (tmp == id)
 1312                         error = umtxq_sleep(uq, "uwait", timeout == NULL ?
 1313                             NULL : &timo);
 1314                 if ((uq->uq_flags & UQF_UMTXQ) == 0)
 1315                         error = 0;
 1316                 else
 1317                         umtxq_remove(uq);
 1318         } else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
 1319                 umtxq_remove(uq);
 1320         }
 1321         umtxq_unlock(&uq->uq_key);
 1322         umtx_key_release(&uq->uq_key);
 1323         if (error == ERESTART)
 1324                 error = EINTR;
 1325         return (error);
 1326 }
 1327 
 1328 /*
 1329  * Wake up threads sleeping on the specified address.
 1330  */
 1331 int
 1332 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
 1333 {
 1334         struct umtx_key key;
 1335         int ret;
 1336 
 1337         if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
 1338             is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
 1339                 return (ret);
 1340         umtxq_lock(&key);
 1341         umtxq_signal(&key, n_wake);
 1342         umtxq_unlock(&key);
 1343         umtx_key_release(&key);
 1344         return (0);
 1345 }
 1346 
 1347 /*
 1348  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1349  */
 1350 static int
 1351 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
 1352     struct _umtx_time *timeout, int mode)
 1353 {
 1354         struct abs_timeout timo;
 1355         struct umtx_q *uq;
 1356         uint32_t owner, old, id;
 1357         int error, rv;
 1358 
 1359         id = td->td_tid;
 1360         uq = td->td_umtxq;
 1361         error = 0;
 1362         if (timeout != NULL)
 1363                 abs_timeout_init2(&timo, timeout);
 1364 
 1365         /*
 1366          * Care must be exercised when dealing with umtx structure. It
 1367          * can fault on any access.
 1368          */
 1369         for (;;) {
 1370                 rv = fueword32(&m->m_owner, &owner);
 1371                 if (rv == -1)
 1372                         return (EFAULT);
 1373                 if (mode == _UMUTEX_WAIT) {
 1374                         if (owner == UMUTEX_UNOWNED ||
 1375                             owner == UMUTEX_CONTESTED ||
 1376                             owner == UMUTEX_RB_OWNERDEAD ||
 1377                             owner == UMUTEX_RB_NOTRECOV)
 1378                                 return (0);
 1379                 } else {
 1380                         /*
 1381                          * Robust mutex terminated.  Kernel duty is to
 1382                          * return EOWNERDEAD to the userspace.  The
 1383                          * umutex.m_flags UMUTEX_NONCONSISTENT is set
 1384                          * by the common userspace code.
 1385                          */
 1386                         if (owner == UMUTEX_RB_OWNERDEAD) {
 1387                                 rv = casueword32(&m->m_owner,
 1388                                     UMUTEX_RB_OWNERDEAD, &owner,
 1389                                     id | UMUTEX_CONTESTED);
 1390                                 if (rv == -1)
 1391                                         return (EFAULT);
 1392                                 if (rv == 0) {
 1393                                         MPASS(owner == UMUTEX_RB_OWNERDEAD);
 1394                                         return (EOWNERDEAD); /* success */
 1395                                 }
 1396                                 MPASS(rv == 1);
 1397                                 rv = thread_check_susp(td, false);
 1398                                 if (rv != 0)
 1399                                         return (rv);
 1400                                 continue;
 1401                         }
 1402                         if (owner == UMUTEX_RB_NOTRECOV)
 1403                                 return (ENOTRECOVERABLE);
 1404 
 1405                         /*
 1406                          * Try the uncontested case.  This should be
 1407                          * done in userland.
 1408                          */
 1409                         rv = casueword32(&m->m_owner, UMUTEX_UNOWNED,
 1410                             &owner, id);
 1411                         /* The address was invalid. */
 1412                         if (rv == -1)
 1413                                 return (EFAULT);
 1414 
 1415                         /* The acquire succeeded. */
 1416                         if (rv == 0) {
 1417                                 MPASS(owner == UMUTEX_UNOWNED);
 1418                                 return (0);
 1419                         }
 1420 
 1421                         /*
 1422                          * If no one owns it but it is contested try
 1423                          * to acquire it.
 1424                          */
 1425                         MPASS(rv == 1);
 1426                         if (owner == UMUTEX_CONTESTED) {
 1427                                 rv = casueword32(&m->m_owner,
 1428                                     UMUTEX_CONTESTED, &owner,
 1429                                     id | UMUTEX_CONTESTED);
 1430                                 /* The address was invalid. */
 1431                                 if (rv == -1)
 1432                                         return (EFAULT);
 1433                                 if (rv == 0) {
 1434                                         MPASS(owner == UMUTEX_CONTESTED);
 1435                                         return (0);
 1436                                 }
 1437                                 if (rv == 1) {
 1438                                         rv = thread_check_susp(td, false);
 1439                                         if (rv != 0)
 1440                                                 return (rv);
 1441                                 }
 1442 
 1443                                 /*
 1444                                  * If this failed the lock has
 1445                                  * changed, restart.
 1446                                  */
 1447                                 continue;
 1448                         }
 1449 
 1450                         /* rv == 1 but not contested, likely store failure */
 1451                         rv = thread_check_susp(td, false);
 1452                         if (rv != 0)
 1453                                 return (rv);
 1454                 }
 1455 
 1456                 if (mode == _UMUTEX_TRY)
 1457                         return (EBUSY);
 1458 
 1459                 /*
 1460                  * If we caught a signal, we have retried and now
 1461                  * exit immediately.
 1462                  */
 1463                 if (error != 0)
 1464                         return (error);
 1465 
 1466                 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
 1467                     GET_SHARE(flags), &uq->uq_key)) != 0)
 1468                         return (error);
 1469 
 1470                 umtxq_lock(&uq->uq_key);
 1471                 umtxq_busy(&uq->uq_key);
 1472                 umtxq_insert(uq);
 1473                 umtxq_unlock(&uq->uq_key);
 1474 
 1475                 /*
 1476                  * Set the contested bit so that a release in user space
 1477                  * knows to use the system call for unlock.  If this fails
 1478                  * either some one else has acquired the lock or it has been
 1479                  * released.
 1480                  */
 1481                 rv = casueword32(&m->m_owner, owner, &old,
 1482                     owner | UMUTEX_CONTESTED);
 1483 
 1484                 /* The address was invalid or casueword failed to store. */
 1485                 if (rv == -1 || rv == 1) {
 1486                         umtxq_lock(&uq->uq_key);
 1487                         umtxq_remove(uq);
 1488                         umtxq_unbusy(&uq->uq_key);
 1489                         umtxq_unlock(&uq->uq_key);
 1490                         umtx_key_release(&uq->uq_key);
 1491                         if (rv == -1)
 1492                                 return (EFAULT);
 1493                         if (rv == 1) {
 1494                                 rv = thread_check_susp(td, false);
 1495                                 if (rv != 0)
 1496                                         return (rv);
 1497                         }
 1498                         continue;
 1499                 }
 1500 
 1501                 /*
 1502                  * We set the contested bit, sleep. Otherwise the lock changed
 1503                  * and we need to retry or we lost a race to the thread
 1504                  * unlocking the umtx.
 1505                  */
 1506                 umtxq_lock(&uq->uq_key);
 1507                 umtxq_unbusy(&uq->uq_key);
 1508                 MPASS(old == owner);
 1509                 error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
 1510                     NULL : &timo);
 1511                 umtxq_remove(uq);
 1512                 umtxq_unlock(&uq->uq_key);
 1513                 umtx_key_release(&uq->uq_key);
 1514 
 1515                 if (error == 0)
 1516                         error = thread_check_susp(td, false);
 1517         }
 1518 
 1519         return (0);
 1520 }
 1521 
 1522 /*
 1523  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1524  */
 1525 static int
 1526 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
 1527 {
 1528         struct umtx_key key;
 1529         uint32_t owner, old, id, newlock;
 1530         int error, count;
 1531 
 1532         id = td->td_tid;
 1533 
 1534 again:
 1535         /*
 1536          * Make sure we own this mtx.
 1537          */
 1538         error = fueword32(&m->m_owner, &owner);
 1539         if (error == -1)
 1540                 return (EFAULT);
 1541 
 1542         if ((owner & ~UMUTEX_CONTESTED) != id)
 1543                 return (EPERM);
 1544 
 1545         newlock = umtx_unlock_val(flags, rb);
 1546         if ((owner & UMUTEX_CONTESTED) == 0) {
 1547                 error = casueword32(&m->m_owner, owner, &old, newlock);
 1548                 if (error == -1)
 1549                         return (EFAULT);
 1550                 if (error == 1) {
 1551                         error = thread_check_susp(td, false);
 1552                         if (error != 0)
 1553                                 return (error);
 1554                         goto again;
 1555                 }
 1556                 MPASS(old == owner);
 1557                 return (0);
 1558         }
 1559 
 1560         /* We should only ever be in here for contested locks */
 1561         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1562             &key)) != 0)
 1563                 return (error);
 1564 
 1565         umtxq_lock(&key);
 1566         umtxq_busy(&key);
 1567         count = umtxq_count(&key);
 1568         umtxq_unlock(&key);
 1569 
 1570         /*
 1571          * When unlocking the umtx, it must be marked as unowned if
 1572          * there is zero or one thread only waiting for it.
 1573          * Otherwise, it must be marked as contested.
 1574          */
 1575         if (count > 1)
 1576                 newlock |= UMUTEX_CONTESTED;
 1577         error = casueword32(&m->m_owner, owner, &old, newlock);
 1578         umtxq_lock(&key);
 1579         umtxq_signal(&key, 1);
 1580         umtxq_unbusy(&key);
 1581         umtxq_unlock(&key);
 1582         umtx_key_release(&key);
 1583         if (error == -1)
 1584                 return (EFAULT);
 1585         if (error == 1) {
 1586                 if (old != owner)
 1587                         return (EINVAL);
 1588                 error = thread_check_susp(td, false);
 1589                 if (error != 0)
 1590                         return (error);
 1591                 goto again;
 1592         }
 1593         return (0);
 1594 }
 1595 
 1596 /*
 1597  * Check if the mutex is available and wake up a waiter,
 1598  * only for simple mutex.
 1599  */
 1600 static int
 1601 do_wake_umutex(struct thread *td, struct umutex *m)
 1602 {
 1603         struct umtx_key key;
 1604         uint32_t owner;
 1605         uint32_t flags;
 1606         int error;
 1607         int count;
 1608 
 1609 again:
 1610         error = fueword32(&m->m_owner, &owner);
 1611         if (error == -1)
 1612                 return (EFAULT);
 1613 
 1614         if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD &&
 1615             owner != UMUTEX_RB_NOTRECOV)
 1616                 return (0);
 1617 
 1618         error = fueword32(&m->m_flags, &flags);
 1619         if (error == -1)
 1620                 return (EFAULT);
 1621 
 1622         /* We should only ever be in here for contested locks */
 1623         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1624             &key)) != 0)
 1625                 return (error);
 1626 
 1627         umtxq_lock(&key);
 1628         umtxq_busy(&key);
 1629         count = umtxq_count(&key);
 1630         umtxq_unlock(&key);
 1631 
 1632         if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD &&
 1633             owner != UMUTEX_RB_NOTRECOV) {
 1634                 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
 1635                     UMUTEX_UNOWNED);
 1636                 if (error == -1) {
 1637                         error = EFAULT;
 1638                 } else if (error == 1) {
 1639                         umtxq_lock(&key);
 1640                         umtxq_unbusy(&key);
 1641                         umtxq_unlock(&key);
 1642                         umtx_key_release(&key);
 1643                         error = thread_check_susp(td, false);
 1644                         if (error != 0)
 1645                                 return (error);
 1646                         goto again;
 1647                 }
 1648         }
 1649 
 1650         umtxq_lock(&key);
 1651         if (error == 0 && count != 0) {
 1652                 MPASS((owner & ~UMUTEX_CONTESTED) == 0 ||
 1653                     owner == UMUTEX_RB_OWNERDEAD ||
 1654                     owner == UMUTEX_RB_NOTRECOV);
 1655                 umtxq_signal(&key, 1);
 1656         }
 1657         umtxq_unbusy(&key);
 1658         umtxq_unlock(&key);
 1659         umtx_key_release(&key);
 1660         return (error);
 1661 }
 1662 
 1663 /*
 1664  * Check if the mutex has waiters and tries to fix contention bit.
 1665  */
 1666 static int
 1667 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
 1668 {
 1669         struct umtx_key key;
 1670         uint32_t owner, old;
 1671         int type;
 1672         int error;
 1673         int count;
 1674 
 1675         switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT |
 1676             UMUTEX_ROBUST)) {
 1677         case 0:
 1678         case UMUTEX_ROBUST:
 1679                 type = TYPE_NORMAL_UMUTEX;
 1680                 break;
 1681         case UMUTEX_PRIO_INHERIT:
 1682                 type = TYPE_PI_UMUTEX;
 1683                 break;
 1684         case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST):
 1685                 type = TYPE_PI_ROBUST_UMUTEX;
 1686                 break;
 1687         case UMUTEX_PRIO_PROTECT:
 1688                 type = TYPE_PP_UMUTEX;
 1689                 break;
 1690         case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST):
 1691                 type = TYPE_PP_ROBUST_UMUTEX;
 1692                 break;
 1693         default:
 1694                 return (EINVAL);
 1695         }
 1696         if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0)
 1697                 return (error);
 1698 
 1699         owner = 0;
 1700         umtxq_lock(&key);
 1701         umtxq_busy(&key);
 1702         count = umtxq_count(&key);
 1703         umtxq_unlock(&key);
 1704 
 1705         error = fueword32(&m->m_owner, &owner);
 1706         if (error == -1)
 1707                 error = EFAULT;
 1708 
 1709         /*
 1710          * Only repair contention bit if there is a waiter, this means
 1711          * the mutex is still being referenced by userland code,
 1712          * otherwise don't update any memory.
 1713          */
 1714         while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 &&
 1715             (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) {
 1716                 error = casueword32(&m->m_owner, owner, &old,
 1717                     owner | UMUTEX_CONTESTED);
 1718                 if (error == -1) {
 1719                         error = EFAULT;
 1720                         break;
 1721                 }
 1722                 if (error == 0) {
 1723                         MPASS(old == owner);
 1724                         break;
 1725                 }
 1726                 owner = old;
 1727                 error = thread_check_susp(td, false);
 1728         }
 1729 
 1730         umtxq_lock(&key);
 1731         if (error == EFAULT) {
 1732                 umtxq_signal(&key, INT_MAX);
 1733         } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
 1734             owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
 1735                 umtxq_signal(&key, 1);
 1736         umtxq_unbusy(&key);
 1737         umtxq_unlock(&key);
 1738         umtx_key_release(&key);
 1739         return (error);
 1740 }
 1741 
 1742 static inline struct umtx_pi *
 1743 umtx_pi_alloc(int flags)
 1744 {
 1745         struct umtx_pi *pi;
 1746 
 1747         pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
 1748         TAILQ_INIT(&pi->pi_blocked);
 1749         atomic_add_int(&umtx_pi_allocated, 1);
 1750         return (pi);
 1751 }
 1752 
 1753 static inline void
 1754 umtx_pi_free(struct umtx_pi *pi)
 1755 {
 1756         uma_zfree(umtx_pi_zone, pi);
 1757         atomic_add_int(&umtx_pi_allocated, -1);
 1758 }
 1759 
 1760 /*
 1761  * Adjust the thread's position on a pi_state after its priority has been
 1762  * changed.
 1763  */
 1764 static int
 1765 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
 1766 {
 1767         struct umtx_q *uq, *uq1, *uq2;
 1768         struct thread *td1;
 1769 
 1770         mtx_assert(&umtx_lock, MA_OWNED);
 1771         if (pi == NULL)
 1772                 return (0);
 1773 
 1774         uq = td->td_umtxq;
 1775 
 1776         /*
 1777          * Check if the thread needs to be moved on the blocked chain.
 1778          * It needs to be moved if either its priority is lower than
 1779          * the previous thread or higher than the next thread.
 1780          */
 1781         uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
 1782         uq2 = TAILQ_NEXT(uq, uq_lockq);
 1783         if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
 1784             (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
 1785                 /*
 1786                  * Remove thread from blocked chain and determine where
 1787                  * it should be moved to.
 1788                  */
 1789                 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1790                 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1791                         td1 = uq1->uq_thread;
 1792                         MPASS(td1->td_proc->p_magic == P_MAGIC);
 1793                         if (UPRI(td1) > UPRI(td))
 1794                                 break;
 1795                 }
 1796 
 1797                 if (uq1 == NULL)
 1798                         TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1799                 else
 1800                         TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1801         }
 1802         return (1);
 1803 }
 1804 
 1805 static struct umtx_pi *
 1806 umtx_pi_next(struct umtx_pi *pi)
 1807 {
 1808         struct umtx_q *uq_owner;
 1809 
 1810         if (pi->pi_owner == NULL)
 1811                 return (NULL);
 1812         uq_owner = pi->pi_owner->td_umtxq;
 1813         if (uq_owner == NULL)
 1814                 return (NULL);
 1815         return (uq_owner->uq_pi_blocked);
 1816 }
 1817 
 1818 /*
 1819  * Floyd's Cycle-Finding Algorithm.
 1820  */
 1821 static bool
 1822 umtx_pi_check_loop(struct umtx_pi *pi)
 1823 {
 1824         struct umtx_pi *pi1;    /* fast iterator */
 1825 
 1826         mtx_assert(&umtx_lock, MA_OWNED);
 1827         if (pi == NULL)
 1828                 return (false);
 1829         pi1 = pi;
 1830         for (;;) {
 1831                 pi = umtx_pi_next(pi);
 1832                 if (pi == NULL)
 1833                         break;
 1834                 pi1 = umtx_pi_next(pi1);
 1835                 if (pi1 == NULL)
 1836                         break;
 1837                 pi1 = umtx_pi_next(pi1);
 1838                 if (pi1 == NULL)
 1839                         break;
 1840                 if (pi == pi1)
 1841                         return (true);
 1842         }
 1843         return (false);
 1844 }
 1845 
 1846 /*
 1847  * Propagate priority when a thread is blocked on POSIX
 1848  * PI mutex.
 1849  */
 1850 static void
 1851 umtx_propagate_priority(struct thread *td)
 1852 {
 1853         struct umtx_q *uq;
 1854         struct umtx_pi *pi;
 1855         int pri;
 1856 
 1857         mtx_assert(&umtx_lock, MA_OWNED);
 1858         pri = UPRI(td);
 1859         uq = td->td_umtxq;
 1860         pi = uq->uq_pi_blocked;
 1861         if (pi == NULL)
 1862                 return;
 1863         if (umtx_pi_check_loop(pi))
 1864                 return;
 1865 
 1866         for (;;) {
 1867                 td = pi->pi_owner;
 1868                 if (td == NULL || td == curthread)
 1869                         return;
 1870 
 1871                 MPASS(td->td_proc != NULL);
 1872                 MPASS(td->td_proc->p_magic == P_MAGIC);
 1873 
 1874                 thread_lock(td);
 1875                 if (td->td_lend_user_pri > pri)
 1876                         sched_lend_user_prio(td, pri);
 1877                 else {
 1878                         thread_unlock(td);
 1879                         break;
 1880                 }
 1881                 thread_unlock(td);
 1882 
 1883                 /*
 1884                  * Pick up the lock that td is blocked on.
 1885                  */
 1886                 uq = td->td_umtxq;
 1887                 pi = uq->uq_pi_blocked;
 1888                 if (pi == NULL)
 1889                         break;
 1890                 /* Resort td on the list if needed. */
 1891                 umtx_pi_adjust_thread(pi, td);
 1892         }
 1893 }
 1894 
 1895 /*
 1896  * Unpropagate priority for a PI mutex when a thread blocked on
 1897  * it is interrupted by signal or resumed by others.
 1898  */
 1899 static void
 1900 umtx_repropagate_priority(struct umtx_pi *pi)
 1901 {
 1902         struct umtx_q *uq, *uq_owner;
 1903         struct umtx_pi *pi2;
 1904         int pri;
 1905 
 1906         mtx_assert(&umtx_lock, MA_OWNED);
 1907 
 1908         if (umtx_pi_check_loop(pi))
 1909                 return;
 1910         while (pi != NULL && pi->pi_owner != NULL) {
 1911                 pri = PRI_MAX;
 1912                 uq_owner = pi->pi_owner->td_umtxq;
 1913 
 1914                 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
 1915                         uq = TAILQ_FIRST(&pi2->pi_blocked);
 1916                         if (uq != NULL) {
 1917                                 if (pri > UPRI(uq->uq_thread))
 1918                                         pri = UPRI(uq->uq_thread);
 1919                         }
 1920                 }
 1921 
 1922                 if (pri > uq_owner->uq_inherited_pri)
 1923                         pri = uq_owner->uq_inherited_pri;
 1924                 thread_lock(pi->pi_owner);
 1925                 sched_lend_user_prio(pi->pi_owner, pri);
 1926                 thread_unlock(pi->pi_owner);
 1927                 if ((pi = uq_owner->uq_pi_blocked) != NULL)
 1928                         umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
 1929         }
 1930 }
 1931 
 1932 /*
 1933  * Insert a PI mutex into owned list.
 1934  */
 1935 static void
 1936 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
 1937 {
 1938         struct umtx_q *uq_owner;
 1939 
 1940         uq_owner = owner->td_umtxq;
 1941         mtx_assert(&umtx_lock, MA_OWNED);
 1942         MPASS(pi->pi_owner == NULL);
 1943         pi->pi_owner = owner;
 1944         TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
 1945 }
 1946 
 1947 /*
 1948  * Disown a PI mutex, and remove it from the owned list.
 1949  */
 1950 static void
 1951 umtx_pi_disown(struct umtx_pi *pi)
 1952 {
 1953 
 1954         mtx_assert(&umtx_lock, MA_OWNED);
 1955         TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link);
 1956         pi->pi_owner = NULL;
 1957 }
 1958 
 1959 /*
 1960  * Claim ownership of a PI mutex.
 1961  */
 1962 static int
 1963 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
 1964 {
 1965         struct umtx_q *uq;
 1966         int pri;
 1967 
 1968         mtx_lock(&umtx_lock);
 1969         if (pi->pi_owner == owner) {
 1970                 mtx_unlock(&umtx_lock);
 1971                 return (0);
 1972         }
 1973 
 1974         if (pi->pi_owner != NULL) {
 1975                 /*
 1976                  * userland may have already messed the mutex, sigh.
 1977                  */
 1978                 mtx_unlock(&umtx_lock);
 1979                 return (EPERM);
 1980         }
 1981         umtx_pi_setowner(pi, owner);
 1982         uq = TAILQ_FIRST(&pi->pi_blocked);
 1983         if (uq != NULL) {
 1984                 pri = UPRI(uq->uq_thread);
 1985                 thread_lock(owner);
 1986                 if (pri < UPRI(owner))
 1987                         sched_lend_user_prio(owner, pri);
 1988                 thread_unlock(owner);
 1989         }
 1990         mtx_unlock(&umtx_lock);
 1991         return (0);
 1992 }
 1993 
 1994 /*
 1995  * Adjust a thread's order position in its blocked PI mutex,
 1996  * this may result new priority propagating process.
 1997  */
 1998 void
 1999 umtx_pi_adjust(struct thread *td, u_char oldpri)
 2000 {
 2001         struct umtx_q *uq;
 2002         struct umtx_pi *pi;
 2003 
 2004         uq = td->td_umtxq;
 2005         mtx_lock(&umtx_lock);
 2006         /*
 2007          * Pick up the lock that td is blocked on.
 2008          */
 2009         pi = uq->uq_pi_blocked;
 2010         if (pi != NULL) {
 2011                 umtx_pi_adjust_thread(pi, td);
 2012                 umtx_repropagate_priority(pi);
 2013         }
 2014         mtx_unlock(&umtx_lock);
 2015 }
 2016 
 2017 /*
 2018  * Sleep on a PI mutex.
 2019  */
 2020 static int
 2021 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner,
 2022     const char *wmesg, struct abs_timeout *timo, bool shared)
 2023 {
 2024         struct thread *td, *td1;
 2025         struct umtx_q *uq1;
 2026         int error, pri;
 2027 #ifdef INVARIANTS
 2028         struct umtxq_chain *uc;
 2029 
 2030         uc = umtxq_getchain(&pi->pi_key);
 2031 #endif
 2032         error = 0;
 2033         td = uq->uq_thread;
 2034         KASSERT(td == curthread, ("inconsistent uq_thread"));
 2035         UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key));
 2036         KASSERT(uc->uc_busy != 0, ("umtx chain is not busy"));
 2037         umtxq_insert(uq);
 2038         mtx_lock(&umtx_lock);
 2039         if (pi->pi_owner == NULL) {
 2040                 mtx_unlock(&umtx_lock);
 2041                 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid);
 2042                 mtx_lock(&umtx_lock);
 2043                 if (td1 != NULL) {
 2044                         if (pi->pi_owner == NULL)
 2045                                 umtx_pi_setowner(pi, td1);
 2046                         PROC_UNLOCK(td1->td_proc);
 2047                 }
 2048         }
 2049 
 2050         TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 2051                 pri = UPRI(uq1->uq_thread);
 2052                 if (pri > UPRI(td))
 2053                         break;
 2054         }
 2055 
 2056         if (uq1 != NULL)
 2057                 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 2058         else
 2059                 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 2060 
 2061         uq->uq_pi_blocked = pi;
 2062         thread_lock(td);
 2063         td->td_flags |= TDF_UPIBLOCKED;
 2064         thread_unlock(td);
 2065         umtx_propagate_priority(td);
 2066         mtx_unlock(&umtx_lock);
 2067         umtxq_unbusy(&uq->uq_key);
 2068 
 2069         error = umtxq_sleep(uq, wmesg, timo);
 2070         umtxq_remove(uq);
 2071 
 2072         mtx_lock(&umtx_lock);
 2073         uq->uq_pi_blocked = NULL;
 2074         thread_lock(td);
 2075         td->td_flags &= ~TDF_UPIBLOCKED;
 2076         thread_unlock(td);
 2077         TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 2078         umtx_repropagate_priority(pi);
 2079         mtx_unlock(&umtx_lock);
 2080         umtxq_unlock(&uq->uq_key);
 2081 
 2082         return (error);
 2083 }
 2084 
 2085 /*
 2086  * Add reference count for a PI mutex.
 2087  */
 2088 static void
 2089 umtx_pi_ref(struct umtx_pi *pi)
 2090 {
 2091 
 2092         UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key));
 2093         pi->pi_refcount++;
 2094 }
 2095 
 2096 /*
 2097  * Decrease reference count for a PI mutex, if the counter
 2098  * is decreased to zero, its memory space is freed.
 2099  */
 2100 static void
 2101 umtx_pi_unref(struct umtx_pi *pi)
 2102 {
 2103         struct umtxq_chain *uc;
 2104 
 2105         uc = umtxq_getchain(&pi->pi_key);
 2106         UMTXQ_LOCKED_ASSERT(uc);
 2107         KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
 2108         if (--pi->pi_refcount == 0) {
 2109                 mtx_lock(&umtx_lock);
 2110                 if (pi->pi_owner != NULL)
 2111                         umtx_pi_disown(pi);
 2112                 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
 2113                         ("blocked queue not empty"));
 2114                 mtx_unlock(&umtx_lock);
 2115                 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
 2116                 umtx_pi_free(pi);
 2117         }
 2118 }
 2119 
 2120 /*
 2121  * Find a PI mutex in hash table.
 2122  */
 2123 static struct umtx_pi *
 2124 umtx_pi_lookup(struct umtx_key *key)
 2125 {
 2126         struct umtxq_chain *uc;
 2127         struct umtx_pi *pi;
 2128 
 2129         uc = umtxq_getchain(key);
 2130         UMTXQ_LOCKED_ASSERT(uc);
 2131 
 2132         TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
 2133                 if (umtx_key_match(&pi->pi_key, key)) {
 2134                         return (pi);
 2135                 }
 2136         }
 2137         return (NULL);
 2138 }
 2139 
 2140 /*
 2141  * Insert a PI mutex into hash table.
 2142  */
 2143 static inline void
 2144 umtx_pi_insert(struct umtx_pi *pi)
 2145 {
 2146         struct umtxq_chain *uc;
 2147 
 2148         uc = umtxq_getchain(&pi->pi_key);
 2149         UMTXQ_LOCKED_ASSERT(uc);
 2150         TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
 2151 }
 2152 
 2153 /*
 2154  * Lock a PI mutex.
 2155  */
 2156 static int
 2157 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
 2158     struct _umtx_time *timeout, int try)
 2159 {
 2160         struct abs_timeout timo;
 2161         struct umtx_q *uq;
 2162         struct umtx_pi *pi, *new_pi;
 2163         uint32_t id, old_owner, owner, old;
 2164         int error, rv;
 2165 
 2166         id = td->td_tid;
 2167         uq = td->td_umtxq;
 2168 
 2169         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2170             TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
 2171             &uq->uq_key)) != 0)
 2172                 return (error);
 2173 
 2174         if (timeout != NULL)
 2175                 abs_timeout_init2(&timo, timeout);
 2176 
 2177         umtxq_lock(&uq->uq_key);
 2178         pi = umtx_pi_lookup(&uq->uq_key);
 2179         if (pi == NULL) {
 2180                 new_pi = umtx_pi_alloc(M_NOWAIT);
 2181                 if (new_pi == NULL) {
 2182                         umtxq_unlock(&uq->uq_key);
 2183                         new_pi = umtx_pi_alloc(M_WAITOK);
 2184                         umtxq_lock(&uq->uq_key);
 2185                         pi = umtx_pi_lookup(&uq->uq_key);
 2186                         if (pi != NULL) {
 2187                                 umtx_pi_free(new_pi);
 2188                                 new_pi = NULL;
 2189                         }
 2190                 }
 2191                 if (new_pi != NULL) {
 2192                         new_pi->pi_key = uq->uq_key;
 2193                         umtx_pi_insert(new_pi);
 2194                         pi = new_pi;
 2195                 }
 2196         }
 2197         umtx_pi_ref(pi);
 2198         umtxq_unlock(&uq->uq_key);
 2199 
 2200         /*
 2201          * Care must be exercised when dealing with umtx structure.  It
 2202          * can fault on any access.
 2203          */
 2204         for (;;) {
 2205                 /*
 2206                  * Try the uncontested case.  This should be done in userland.
 2207                  */
 2208                 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id);
 2209                 /* The address was invalid. */
 2210                 if (rv == -1) {
 2211                         error = EFAULT;
 2212                         break;
 2213                 }
 2214                 /* The acquire succeeded. */
 2215                 if (rv == 0) {
 2216                         MPASS(owner == UMUTEX_UNOWNED);
 2217                         error = 0;
 2218                         break;
 2219                 }
 2220 
 2221                 if (owner == UMUTEX_RB_NOTRECOV) {
 2222                         error = ENOTRECOVERABLE;
 2223                         break;
 2224                 }
 2225 
 2226                 /*
 2227                  * Avoid overwriting a possible error from sleep due
 2228                  * to the pending signal with suspension check result.
 2229                  */
 2230                 if (error == 0) {
 2231                         error = thread_check_susp(td, true);
 2232                         if (error != 0)
 2233                                 break;
 2234                 }
 2235 
 2236                 /* If no one owns it but it is contested try to acquire it. */
 2237                 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) {
 2238                         old_owner = owner;
 2239                         rv = casueword32(&m->m_owner, owner, &owner,
 2240                             id | UMUTEX_CONTESTED);
 2241                         /* The address was invalid. */
 2242                         if (rv == -1) {
 2243                                 error = EFAULT;
 2244                                 break;
 2245                         }
 2246                         if (rv == 1) {
 2247                                 if (error == 0) {
 2248                                         error = thread_check_susp(td, true);
 2249                                         if (error != 0)
 2250                                                 break;
 2251                                 }
 2252 
 2253                                 /*
 2254                                  * If this failed the lock could
 2255                                  * changed, restart.
 2256                                  */
 2257                                 continue;
 2258                         }
 2259 
 2260                         MPASS(rv == 0);
 2261                         MPASS(owner == old_owner);
 2262                         umtxq_lock(&uq->uq_key);
 2263                         umtxq_busy(&uq->uq_key);
 2264                         error = umtx_pi_claim(pi, td);
 2265                         umtxq_unbusy(&uq->uq_key);
 2266                         umtxq_unlock(&uq->uq_key);
 2267                         if (error != 0) {
 2268                                 /*
 2269                                  * Since we're going to return an
 2270                                  * error, restore the m_owner to its
 2271                                  * previous, unowned state to avoid
 2272                                  * compounding the problem.
 2273                                  */
 2274                                 (void)casuword32(&m->m_owner,
 2275                                     id | UMUTEX_CONTESTED, old_owner);
 2276                         }
 2277                         if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD)
 2278                                 error = EOWNERDEAD;
 2279                         break;
 2280                 }
 2281 
 2282                 if ((owner & ~UMUTEX_CONTESTED) == id) {
 2283                         error = EDEADLK;
 2284                         break;
 2285                 }
 2286 
 2287                 if (try != 0) {
 2288                         error = EBUSY;
 2289                         break;
 2290                 }
 2291 
 2292                 /*
 2293                  * If we caught a signal, we have retried and now
 2294                  * exit immediately.
 2295                  */
 2296                 if (error != 0)
 2297                         break;
 2298 
 2299                 umtxq_lock(&uq->uq_key);
 2300                 umtxq_busy(&uq->uq_key);
 2301                 umtxq_unlock(&uq->uq_key);
 2302 
 2303                 /*
 2304                  * Set the contested bit so that a release in user space
 2305                  * knows to use the system call for unlock.  If this fails
 2306                  * either some one else has acquired the lock or it has been
 2307                  * released.
 2308                  */
 2309                 rv = casueword32(&m->m_owner, owner, &old, owner |
 2310                     UMUTEX_CONTESTED);
 2311 
 2312                 /* The address was invalid. */
 2313                 if (rv == -1) {
 2314                         umtxq_unbusy_unlocked(&uq->uq_key);
 2315                         error = EFAULT;
 2316                         break;
 2317                 }
 2318                 if (rv == 1) {
 2319                         umtxq_unbusy_unlocked(&uq->uq_key);
 2320                         error = thread_check_susp(td, true);
 2321                         if (error != 0)
 2322                                 break;
 2323 
 2324                         /*
 2325                          * The lock changed and we need to retry or we
 2326                          * lost a race to the thread unlocking the
 2327                          * umtx.  Note that the UMUTEX_RB_OWNERDEAD
 2328                          * value for owner is impossible there.
 2329                          */
 2330                         continue;
 2331                 }
 2332 
 2333                 umtxq_lock(&uq->uq_key);
 2334 
 2335                 /* We set the contested bit, sleep. */
 2336                 MPASS(old == owner);
 2337                 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
 2338                     "umtxpi", timeout == NULL ? NULL : &timo,
 2339                     (flags & USYNC_PROCESS_SHARED) != 0);
 2340                 if (error != 0)
 2341                         continue;
 2342 
 2343                 error = thread_check_susp(td, false);
 2344                 if (error != 0)
 2345                         break;
 2346         }
 2347 
 2348         umtxq_lock(&uq->uq_key);
 2349         umtx_pi_unref(pi);
 2350         umtxq_unlock(&uq->uq_key);
 2351 
 2352         umtx_key_release(&uq->uq_key);
 2353         return (error);
 2354 }
 2355 
 2356 /*
 2357  * Unlock a PI mutex.
 2358  */
 2359 static int
 2360 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
 2361 {
 2362         struct umtx_key key;
 2363         struct umtx_q *uq_first, *uq_first2, *uq_me;
 2364         struct umtx_pi *pi, *pi2;
 2365         uint32_t id, new_owner, old, owner;
 2366         int count, error, pri;
 2367 
 2368         id = td->td_tid;
 2369 
 2370 usrloop:
 2371         /*
 2372          * Make sure we own this mtx.
 2373          */
 2374         error = fueword32(&m->m_owner, &owner);
 2375         if (error == -1)
 2376                 return (EFAULT);
 2377 
 2378         if ((owner & ~UMUTEX_CONTESTED) != id)
 2379                 return (EPERM);
 2380 
 2381         new_owner = umtx_unlock_val(flags, rb);
 2382 
 2383         /* This should be done in userland */
 2384         if ((owner & UMUTEX_CONTESTED) == 0) {
 2385                 error = casueword32(&m->m_owner, owner, &old, new_owner);
 2386                 if (error == -1)
 2387                         return (EFAULT);
 2388                 if (error == 1) {
 2389                         error = thread_check_susp(td, true);
 2390                         if (error != 0)
 2391                                 return (error);
 2392                         goto usrloop;
 2393                 }
 2394                 if (old == owner)
 2395                         return (0);
 2396                 owner = old;
 2397         }
 2398 
 2399         /* We should only ever be in here for contested locks */
 2400         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2401             TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
 2402             &key)) != 0)
 2403                 return (error);
 2404 
 2405         umtxq_lock(&key);
 2406         umtxq_busy(&key);
 2407         count = umtxq_count_pi(&key, &uq_first);
 2408         if (uq_first != NULL) {
 2409                 mtx_lock(&umtx_lock);
 2410                 pi = uq_first->uq_pi_blocked;
 2411                 KASSERT(pi != NULL, ("pi == NULL?"));
 2412                 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) {
 2413                         mtx_unlock(&umtx_lock);
 2414                         umtxq_unbusy(&key);
 2415                         umtxq_unlock(&key);
 2416                         umtx_key_release(&key);
 2417                         /* userland messed the mutex */
 2418                         return (EPERM);
 2419                 }
 2420                 uq_me = td->td_umtxq;
 2421                 if (pi->pi_owner == td)
 2422                         umtx_pi_disown(pi);
 2423                 /* get highest priority thread which is still sleeping. */
 2424                 uq_first = TAILQ_FIRST(&pi->pi_blocked);
 2425                 while (uq_first != NULL &&
 2426                     (uq_first->uq_flags & UQF_UMTXQ) == 0) {
 2427                         uq_first = TAILQ_NEXT(uq_first, uq_lockq);
 2428                 }
 2429                 pri = PRI_MAX;
 2430                 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
 2431                         uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
 2432                         if (uq_first2 != NULL) {
 2433                                 if (pri > UPRI(uq_first2->uq_thread))
 2434                                         pri = UPRI(uq_first2->uq_thread);
 2435                         }
 2436                 }
 2437                 thread_lock(td);
 2438                 sched_lend_user_prio(td, pri);
 2439                 thread_unlock(td);
 2440                 mtx_unlock(&umtx_lock);
 2441                 if (uq_first)
 2442                         umtxq_signal_thread(uq_first);
 2443         } else {
 2444                 pi = umtx_pi_lookup(&key);
 2445                 /*
 2446                  * A umtx_pi can exist if a signal or timeout removed the
 2447                  * last waiter from the umtxq, but there is still
 2448                  * a thread in do_lock_pi() holding the umtx_pi.
 2449                  */
 2450                 if (pi != NULL) {
 2451                         /*
 2452                          * The umtx_pi can be unowned, such as when a thread
 2453                          * has just entered do_lock_pi(), allocated the
 2454                          * umtx_pi, and unlocked the umtxq.
 2455                          * If the current thread owns it, it must disown it.
 2456                          */
 2457                         mtx_lock(&umtx_lock);
 2458                         if (pi->pi_owner == td)
 2459                                 umtx_pi_disown(pi);
 2460                         mtx_unlock(&umtx_lock);
 2461                 }
 2462         }
 2463         umtxq_unlock(&key);
 2464 
 2465         /*
 2466          * When unlocking the umtx, it must be marked as unowned if
 2467          * there is zero or one thread only waiting for it.
 2468          * Otherwise, it must be marked as contested.
 2469          */
 2470 
 2471         if (count > 1)
 2472                 new_owner |= UMUTEX_CONTESTED;
 2473 again:
 2474         error = casueword32(&m->m_owner, owner, &old, new_owner);
 2475         if (error == 1) {
 2476                 error = thread_check_susp(td, false);
 2477                 if (error == 0)
 2478                         goto again;
 2479         }
 2480         umtxq_unbusy_unlocked(&key);
 2481         umtx_key_release(&key);
 2482         if (error == -1)
 2483                 return (EFAULT);
 2484         if (error == 0 && old != owner)
 2485                 return (EINVAL);
 2486         return (error);
 2487 }
 2488 
 2489 /*
 2490  * Lock a PP mutex.
 2491  */
 2492 static int
 2493 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
 2494     struct _umtx_time *timeout, int try)
 2495 {
 2496         struct abs_timeout timo;
 2497         struct umtx_q *uq, *uq2;
 2498         struct umtx_pi *pi;
 2499         uint32_t ceiling;
 2500         uint32_t owner, id;
 2501         int error, pri, old_inherited_pri, su, rv;
 2502 
 2503         id = td->td_tid;
 2504         uq = td->td_umtxq;
 2505         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2506             TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
 2507             &uq->uq_key)) != 0)
 2508                 return (error);
 2509 
 2510         if (timeout != NULL)
 2511                 abs_timeout_init2(&timo, timeout);
 2512 
 2513         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2514         for (;;) {
 2515                 old_inherited_pri = uq->uq_inherited_pri;
 2516                 umtxq_lock(&uq->uq_key);
 2517                 umtxq_busy(&uq->uq_key);
 2518                 umtxq_unlock(&uq->uq_key);
 2519 
 2520                 rv = fueword32(&m->m_ceilings[0], &ceiling);
 2521                 if (rv == -1) {
 2522                         error = EFAULT;
 2523                         goto out;
 2524                 }
 2525                 ceiling = RTP_PRIO_MAX - ceiling;
 2526                 if (ceiling > RTP_PRIO_MAX) {
 2527                         error = EINVAL;
 2528                         goto out;
 2529                 }
 2530 
 2531                 mtx_lock(&umtx_lock);
 2532                 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
 2533                         mtx_unlock(&umtx_lock);
 2534                         error = EINVAL;
 2535                         goto out;
 2536                 }
 2537                 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
 2538                         uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
 2539                         thread_lock(td);
 2540                         if (uq->uq_inherited_pri < UPRI(td))
 2541                                 sched_lend_user_prio(td, uq->uq_inherited_pri);
 2542                         thread_unlock(td);
 2543                 }
 2544                 mtx_unlock(&umtx_lock);
 2545 
 2546                 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
 2547                     id | UMUTEX_CONTESTED);
 2548                 /* The address was invalid. */
 2549                 if (rv == -1) {
 2550                         error = EFAULT;
 2551                         break;
 2552                 }
 2553                 if (rv == 0) {
 2554                         MPASS(owner == UMUTEX_CONTESTED);
 2555                         error = 0;
 2556                         break;
 2557                 }
 2558                 /* rv == 1 */
 2559                 if (owner == UMUTEX_RB_OWNERDEAD) {
 2560                         rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD,
 2561                             &owner, id | UMUTEX_CONTESTED);
 2562                         if (rv == -1) {
 2563                                 error = EFAULT;
 2564                                 break;
 2565                         }
 2566                         if (rv == 0) {
 2567                                 MPASS(owner == UMUTEX_RB_OWNERDEAD);
 2568                                 error = EOWNERDEAD; /* success */
 2569                                 break;
 2570                         }
 2571 
 2572                         /*
 2573                          *  rv == 1, only check for suspension if we
 2574                          *  did not already catched a signal.  If we
 2575                          *  get an error from the check, the same
 2576                          *  condition is checked by the umtxq_sleep()
 2577                          *  call below, so we should obliterate the
 2578                          *  error to not skip the last loop iteration.
 2579                          */
 2580                         if (error == 0) {
 2581                                 error = thread_check_susp(td, false);
 2582                                 if (error == 0) {
 2583                                         if (try != 0)
 2584                                                 error = EBUSY;
 2585                                         else
 2586                                                 continue;
 2587                                 }
 2588                                 error = 0;
 2589                         }
 2590                 } else if (owner == UMUTEX_RB_NOTRECOV) {
 2591                         error = ENOTRECOVERABLE;
 2592                 }
 2593 
 2594                 if (try != 0)
 2595                         error = EBUSY;
 2596 
 2597                 /*
 2598                  * If we caught a signal, we have retried and now
 2599                  * exit immediately.
 2600                  */
 2601                 if (error != 0)
 2602                         break;
 2603 
 2604                 umtxq_lock(&uq->uq_key);
 2605                 umtxq_insert(uq);
 2606                 umtxq_unbusy(&uq->uq_key);
 2607                 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
 2608                     NULL : &timo);
 2609                 umtxq_remove(uq);
 2610                 umtxq_unlock(&uq->uq_key);
 2611 
 2612                 mtx_lock(&umtx_lock);
 2613                 uq->uq_inherited_pri = old_inherited_pri;
 2614                 pri = PRI_MAX;
 2615                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2616                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2617                         if (uq2 != NULL) {
 2618                                 if (pri > UPRI(uq2->uq_thread))
 2619                                         pri = UPRI(uq2->uq_thread);
 2620                         }
 2621                 }
 2622                 if (pri > uq->uq_inherited_pri)
 2623                         pri = uq->uq_inherited_pri;
 2624                 thread_lock(td);
 2625                 sched_lend_user_prio(td, pri);
 2626                 thread_unlock(td);
 2627                 mtx_unlock(&umtx_lock);
 2628         }
 2629 
 2630         if (error != 0 && error != EOWNERDEAD) {
 2631                 mtx_lock(&umtx_lock);
 2632                 uq->uq_inherited_pri = old_inherited_pri;
 2633                 pri = PRI_MAX;
 2634                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2635                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2636                         if (uq2 != NULL) {
 2637                                 if (pri > UPRI(uq2->uq_thread))
 2638                                         pri = UPRI(uq2->uq_thread);
 2639                         }
 2640                 }
 2641                 if (pri > uq->uq_inherited_pri)
 2642                         pri = uq->uq_inherited_pri;
 2643                 thread_lock(td);
 2644                 sched_lend_user_prio(td, pri);
 2645                 thread_unlock(td);
 2646                 mtx_unlock(&umtx_lock);
 2647         }
 2648 
 2649 out:
 2650         umtxq_unbusy_unlocked(&uq->uq_key);
 2651         umtx_key_release(&uq->uq_key);
 2652         return (error);
 2653 }
 2654 
 2655 /*
 2656  * Unlock a PP mutex.
 2657  */
 2658 static int
 2659 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
 2660 {
 2661         struct umtx_key key;
 2662         struct umtx_q *uq, *uq2;
 2663         struct umtx_pi *pi;
 2664         uint32_t id, owner, rceiling;
 2665         int error, pri, new_inherited_pri, su;
 2666 
 2667         id = td->td_tid;
 2668         uq = td->td_umtxq;
 2669         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2670 
 2671         /*
 2672          * Make sure we own this mtx.
 2673          */
 2674         error = fueword32(&m->m_owner, &owner);
 2675         if (error == -1)
 2676                 return (EFAULT);
 2677 
 2678         if ((owner & ~UMUTEX_CONTESTED) != id)
 2679                 return (EPERM);
 2680 
 2681         error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
 2682         if (error != 0)
 2683                 return (error);
 2684 
 2685         if (rceiling == -1)
 2686                 new_inherited_pri = PRI_MAX;
 2687         else {
 2688                 rceiling = RTP_PRIO_MAX - rceiling;
 2689                 if (rceiling > RTP_PRIO_MAX)
 2690                         return (EINVAL);
 2691                 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
 2692         }
 2693 
 2694         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2695             TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
 2696             &key)) != 0)
 2697                 return (error);
 2698         umtxq_lock(&key);
 2699         umtxq_busy(&key);
 2700         umtxq_unlock(&key);
 2701         /*
 2702          * For priority protected mutex, always set unlocked state
 2703          * to UMUTEX_CONTESTED, so that userland always enters kernel
 2704          * to lock the mutex, it is necessary because thread priority
 2705          * has to be adjusted for such mutex.
 2706          */
 2707         error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) |
 2708             UMUTEX_CONTESTED);
 2709 
 2710         umtxq_lock(&key);
 2711         if (error == 0)
 2712                 umtxq_signal(&key, 1);
 2713         umtxq_unbusy(&key);
 2714         umtxq_unlock(&key);
 2715 
 2716         if (error == -1)
 2717                 error = EFAULT;
 2718         else {
 2719                 mtx_lock(&umtx_lock);
 2720                 if (su != 0)
 2721                         uq->uq_inherited_pri = new_inherited_pri;
 2722                 pri = PRI_MAX;
 2723                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2724                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2725                         if (uq2 != NULL) {
 2726                                 if (pri > UPRI(uq2->uq_thread))
 2727                                         pri = UPRI(uq2->uq_thread);
 2728                         }
 2729                 }
 2730                 if (pri > uq->uq_inherited_pri)
 2731                         pri = uq->uq_inherited_pri;
 2732                 thread_lock(td);
 2733                 sched_lend_user_prio(td, pri);
 2734                 thread_unlock(td);
 2735                 mtx_unlock(&umtx_lock);
 2736         }
 2737         umtx_key_release(&key);
 2738         return (error);
 2739 }
 2740 
 2741 static int
 2742 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
 2743     uint32_t *old_ceiling)
 2744 {
 2745         struct umtx_q *uq;
 2746         uint32_t flags, id, owner, save_ceiling;
 2747         int error, rv, rv1;
 2748 
 2749         error = fueword32(&m->m_flags, &flags);
 2750         if (error == -1)
 2751                 return (EFAULT);
 2752         if ((flags & UMUTEX_PRIO_PROTECT) == 0)
 2753                 return (EINVAL);
 2754         if (ceiling > RTP_PRIO_MAX)
 2755                 return (EINVAL);
 2756         id = td->td_tid;
 2757         uq = td->td_umtxq;
 2758         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2759             TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
 2760             &uq->uq_key)) != 0)
 2761                 return (error);
 2762         for (;;) {
 2763                 umtxq_lock(&uq->uq_key);
 2764                 umtxq_busy(&uq->uq_key);
 2765                 umtxq_unlock(&uq->uq_key);
 2766 
 2767                 rv = fueword32(&m->m_ceilings[0], &save_ceiling);
 2768                 if (rv == -1) {
 2769                         error = EFAULT;
 2770                         break;
 2771                 }
 2772 
 2773                 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
 2774                     id | UMUTEX_CONTESTED);
 2775                 if (rv == -1) {
 2776                         error = EFAULT;
 2777                         break;
 2778                 }
 2779 
 2780                 if (rv == 0) {
 2781                         MPASS(owner == UMUTEX_CONTESTED);
 2782                         rv = suword32(&m->m_ceilings[0], ceiling);
 2783                         rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED);
 2784                         error = (rv == 0 && rv1 == 0) ? 0: EFAULT;
 2785                         break;
 2786                 }
 2787 
 2788                 if ((owner & ~UMUTEX_CONTESTED) == id) {
 2789                         rv = suword32(&m->m_ceilings[0], ceiling);
 2790                         error = rv == 0 ? 0 : EFAULT;
 2791                         break;
 2792                 }
 2793 
 2794                 if (owner == UMUTEX_RB_OWNERDEAD) {
 2795                         error = EOWNERDEAD;
 2796                         break;
 2797                 } else if (owner == UMUTEX_RB_NOTRECOV) {
 2798                         error = ENOTRECOVERABLE;
 2799                         break;
 2800                 }
 2801 
 2802                 /*
 2803                  * If we caught a signal, we have retried and now
 2804                  * exit immediately.
 2805                  */
 2806                 if (error != 0)
 2807                         break;
 2808 
 2809                 /*
 2810                  * We set the contested bit, sleep. Otherwise the lock changed
 2811                  * and we need to retry or we lost a race to the thread
 2812                  * unlocking the umtx.
 2813                  */
 2814                 umtxq_lock(&uq->uq_key);
 2815                 umtxq_insert(uq);
 2816                 umtxq_unbusy(&uq->uq_key);
 2817                 error = umtxq_sleep(uq, "umtxpp", NULL);
 2818                 umtxq_remove(uq);
 2819                 umtxq_unlock(&uq->uq_key);
 2820         }
 2821         umtxq_lock(&uq->uq_key);
 2822         if (error == 0)
 2823                 umtxq_signal(&uq->uq_key, INT_MAX);
 2824         umtxq_unbusy(&uq->uq_key);
 2825         umtxq_unlock(&uq->uq_key);
 2826         umtx_key_release(&uq->uq_key);
 2827         if (error == 0 && old_ceiling != NULL) {
 2828                 rv = suword32(old_ceiling, save_ceiling);
 2829                 error = rv == 0 ? 0 : EFAULT;
 2830         }
 2831         return (error);
 2832 }
 2833 
 2834 /*
 2835  * Lock a userland POSIX mutex.
 2836  */
 2837 static int
 2838 do_lock_umutex(struct thread *td, struct umutex *m,
 2839     struct _umtx_time *timeout, int mode)
 2840 {
 2841         uint32_t flags;
 2842         int error;
 2843 
 2844         error = fueword32(&m->m_flags, &flags);
 2845         if (error == -1)
 2846                 return (EFAULT);
 2847 
 2848         switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2849         case 0:
 2850                 error = do_lock_normal(td, m, flags, timeout, mode);
 2851                 break;
 2852         case UMUTEX_PRIO_INHERIT:
 2853                 error = do_lock_pi(td, m, flags, timeout, mode);
 2854                 break;
 2855         case UMUTEX_PRIO_PROTECT:
 2856                 error = do_lock_pp(td, m, flags, timeout, mode);
 2857                 break;
 2858         default:
 2859                 return (EINVAL);
 2860         }
 2861         if (timeout == NULL) {
 2862                 if (error == EINTR && mode != _UMUTEX_WAIT)
 2863                         error = ERESTART;
 2864         } else {
 2865                 /* Timed-locking is not restarted. */
 2866                 if (error == ERESTART)
 2867                         error = EINTR;
 2868         }
 2869         return (error);
 2870 }
 2871 
 2872 /*
 2873  * Unlock a userland POSIX mutex.
 2874  */
 2875 static int
 2876 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb)
 2877 {
 2878         uint32_t flags;
 2879         int error;
 2880 
 2881         error = fueword32(&m->m_flags, &flags);
 2882         if (error == -1)
 2883                 return (EFAULT);
 2884 
 2885         switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2886         case 0:
 2887                 return (do_unlock_normal(td, m, flags, rb));
 2888         case UMUTEX_PRIO_INHERIT:
 2889                 return (do_unlock_pi(td, m, flags, rb));
 2890         case UMUTEX_PRIO_PROTECT:
 2891                 return (do_unlock_pp(td, m, flags, rb));
 2892         }
 2893 
 2894         return (EINVAL);
 2895 }
 2896 
 2897 static int
 2898 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
 2899     struct timespec *timeout, u_long wflags)
 2900 {
 2901         struct abs_timeout timo;
 2902         struct umtx_q *uq;
 2903         uint32_t flags, clockid, hasw;
 2904         int error;
 2905 
 2906         uq = td->td_umtxq;
 2907         error = fueword32(&cv->c_flags, &flags);
 2908         if (error == -1)
 2909                 return (EFAULT);
 2910         error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
 2911         if (error != 0)
 2912                 return (error);
 2913 
 2914         if ((wflags & CVWAIT_CLOCKID) != 0) {
 2915                 error = fueword32(&cv->c_clockid, &clockid);
 2916                 if (error == -1) {
 2917                         umtx_key_release(&uq->uq_key);
 2918                         return (EFAULT);
 2919                 }
 2920                 if (clockid < CLOCK_REALTIME ||
 2921                     clockid >= CLOCK_THREAD_CPUTIME_ID) {
 2922                         /* hmm, only HW clock id will work. */
 2923                         umtx_key_release(&uq->uq_key);
 2924                         return (EINVAL);
 2925                 }
 2926         } else {
 2927                 clockid = CLOCK_REALTIME;
 2928         }
 2929 
 2930         umtxq_lock(&uq->uq_key);
 2931         umtxq_busy(&uq->uq_key);
 2932         umtxq_insert(uq);
 2933         umtxq_unlock(&uq->uq_key);
 2934 
 2935         /*
 2936          * Set c_has_waiters to 1 before releasing user mutex, also
 2937          * don't modify cache line when unnecessary.
 2938          */
 2939         error = fueword32(&cv->c_has_waiters, &hasw);
 2940         if (error == 0 && hasw == 0)
 2941                 suword32(&cv->c_has_waiters, 1);
 2942 
 2943         umtxq_unbusy_unlocked(&uq->uq_key);
 2944 
 2945         error = do_unlock_umutex(td, m, false);
 2946 
 2947         if (timeout != NULL)
 2948                 abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0,
 2949                     timeout);
 2950 
 2951         umtxq_lock(&uq->uq_key);
 2952         if (error == 0) {
 2953                 error = umtxq_sleep(uq, "ucond", timeout == NULL ?
 2954                     NULL : &timo);
 2955         }
 2956 
 2957         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 2958                 error = 0;
 2959         else {
 2960                 /*
 2961                  * This must be timeout,interrupted by signal or
 2962                  * surprious wakeup, clear c_has_waiter flag when
 2963                  * necessary.
 2964                  */
 2965                 umtxq_busy(&uq->uq_key);
 2966                 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
 2967                         int oldlen = uq->uq_cur_queue->length;
 2968                         umtxq_remove(uq);
 2969                         if (oldlen == 1) {
 2970                                 umtxq_unlock(&uq->uq_key);
 2971                                 suword32(&cv->c_has_waiters, 0);
 2972                                 umtxq_lock(&uq->uq_key);
 2973                         }
 2974                 }
 2975                 umtxq_unbusy(&uq->uq_key);
 2976                 if (error == ERESTART)
 2977                         error = EINTR;
 2978         }
 2979 
 2980         umtxq_unlock(&uq->uq_key);
 2981         umtx_key_release(&uq->uq_key);
 2982         return (error);
 2983 }
 2984 
 2985 /*
 2986  * Signal a userland condition variable.
 2987  */
 2988 static int
 2989 do_cv_signal(struct thread *td, struct ucond *cv)
 2990 {
 2991         struct umtx_key key;
 2992         int error, cnt, nwake;
 2993         uint32_t flags;
 2994 
 2995         error = fueword32(&cv->c_flags, &flags);
 2996         if (error == -1)
 2997                 return (EFAULT);
 2998         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 2999                 return (error);
 3000         umtxq_lock(&key);
 3001         umtxq_busy(&key);
 3002         cnt = umtxq_count(&key);
 3003         nwake = umtxq_signal(&key, 1);
 3004         if (cnt <= nwake) {
 3005                 umtxq_unlock(&key);
 3006                 error = suword32(&cv->c_has_waiters, 0);
 3007                 if (error == -1)
 3008                         error = EFAULT;
 3009                 umtxq_lock(&key);
 3010         }
 3011         umtxq_unbusy(&key);
 3012         umtxq_unlock(&key);
 3013         umtx_key_release(&key);
 3014         return (error);
 3015 }
 3016 
 3017 static int
 3018 do_cv_broadcast(struct thread *td, struct ucond *cv)
 3019 {
 3020         struct umtx_key key;
 3021         int error;
 3022         uint32_t flags;
 3023 
 3024         error = fueword32(&cv->c_flags, &flags);
 3025         if (error == -1)
 3026                 return (EFAULT);
 3027         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 3028                 return (error);
 3029 
 3030         umtxq_lock(&key);
 3031         umtxq_busy(&key);
 3032         umtxq_signal(&key, INT_MAX);
 3033         umtxq_unlock(&key);
 3034 
 3035         error = suword32(&cv->c_has_waiters, 0);
 3036         if (error == -1)
 3037                 error = EFAULT;
 3038 
 3039         umtxq_unbusy_unlocked(&key);
 3040 
 3041         umtx_key_release(&key);
 3042         return (error);
 3043 }
 3044 
 3045 static int
 3046 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag,
 3047     struct _umtx_time *timeout)
 3048 {
 3049         struct abs_timeout timo;
 3050         struct umtx_q *uq;
 3051         uint32_t flags, wrflags;
 3052         int32_t state, oldstate;
 3053         int32_t blocked_readers;
 3054         int error, error1, rv;
 3055 
 3056         uq = td->td_umtxq;
 3057         error = fueword32(&rwlock->rw_flags, &flags);
 3058         if (error == -1)
 3059                 return (EFAULT);
 3060         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 3061         if (error != 0)
 3062                 return (error);
 3063 
 3064         if (timeout != NULL)
 3065                 abs_timeout_init2(&timo, timeout);
 3066 
 3067         wrflags = URWLOCK_WRITE_OWNER;
 3068         if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
 3069                 wrflags |= URWLOCK_WRITE_WAITERS;
 3070 
 3071         for (;;) {
 3072                 rv = fueword32(&rwlock->rw_state, &state);
 3073                 if (rv == -1) {
 3074                         umtx_key_release(&uq->uq_key);
 3075                         return (EFAULT);
 3076                 }
 3077 
 3078                 /* try to lock it */
 3079                 while (!(state & wrflags)) {
 3080                         if (__predict_false(URWLOCK_READER_COUNT(state) ==
 3081                             URWLOCK_MAX_READERS)) {
 3082                                 umtx_key_release(&uq->uq_key);
 3083                                 return (EAGAIN);
 3084                         }
 3085                         rv = casueword32(&rwlock->rw_state, state,
 3086                             &oldstate, state + 1);
 3087                         if (rv == -1) {
 3088                                 umtx_key_release(&uq->uq_key);
 3089                                 return (EFAULT);
 3090                         }
 3091                         if (rv == 0) {
 3092                                 MPASS(oldstate == state);
 3093                                 umtx_key_release(&uq->uq_key);
 3094                                 return (0);
 3095                         }
 3096                         error = thread_check_susp(td, true);
 3097                         if (error != 0)
 3098                                 break;
 3099                         state = oldstate;
 3100                 }
 3101 
 3102                 if (error)
 3103                         break;
 3104 
 3105                 /* grab monitor lock */
 3106                 umtxq_lock(&uq->uq_key);
 3107                 umtxq_busy(&uq->uq_key);
 3108                 umtxq_unlock(&uq->uq_key);
 3109 
 3110                 /*
 3111                  * re-read the state, in case it changed between the try-lock above
 3112                  * and the check below
 3113                  */
 3114                 rv = fueword32(&rwlock->rw_state, &state);
 3115                 if (rv == -1)
 3116                         error = EFAULT;
 3117 
 3118                 /* set read contention bit */
 3119                 while (error == 0 && (state & wrflags) &&
 3120                     !(state & URWLOCK_READ_WAITERS)) {
 3121                         rv = casueword32(&rwlock->rw_state, state,
 3122                             &oldstate, state | URWLOCK_READ_WAITERS);
 3123                         if (rv == -1) {
 3124                                 error = EFAULT;
 3125                                 break;
 3126                         }
 3127                         if (rv == 0) {
 3128                                 MPASS(oldstate == state);
 3129                                 goto sleep;
 3130                         }
 3131                         state = oldstate;
 3132                         error = thread_check_susp(td, false);
 3133                         if (error != 0)
 3134                                 break;
 3135                 }
 3136                 if (error != 0) {
 3137                         umtxq_unbusy_unlocked(&uq->uq_key);
 3138                         break;
 3139                 }
 3140 
 3141                 /* state is changed while setting flags, restart */
 3142                 if (!(state & wrflags)) {
 3143                         umtxq_unbusy_unlocked(&uq->uq_key);
 3144                         error = thread_check_susp(td, true);
 3145                         if (error != 0)
 3146                                 break;
 3147                         continue;
 3148                 }
 3149 
 3150 sleep:
 3151                 /*
 3152                  * Contention bit is set, before sleeping, increase
 3153                  * read waiter count.
 3154                  */
 3155                 rv = fueword32(&rwlock->rw_blocked_readers,
 3156                     &blocked_readers);
 3157                 if (rv == -1) {
 3158                         umtxq_unbusy_unlocked(&uq->uq_key);
 3159                         error = EFAULT;
 3160                         break;
 3161                 }
 3162                 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
 3163 
 3164                 while (state & wrflags) {
 3165                         umtxq_lock(&uq->uq_key);
 3166                         umtxq_insert(uq);
 3167                         umtxq_unbusy(&uq->uq_key);
 3168 
 3169                         error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
 3170                             NULL : &timo);
 3171 
 3172                         umtxq_busy(&uq->uq_key);
 3173                         umtxq_remove(uq);
 3174                         umtxq_unlock(&uq->uq_key);
 3175                         if (error)
 3176                                 break;
 3177                         rv = fueword32(&rwlock->rw_state, &state);
 3178                         if (rv == -1) {
 3179                                 error = EFAULT;
 3180                                 break;
 3181                         }
 3182                 }
 3183 
 3184                 /* decrease read waiter count, and may clear read contention bit */
 3185                 rv = fueword32(&rwlock->rw_blocked_readers,
 3186                     &blocked_readers);
 3187                 if (rv == -1) {
 3188                         umtxq_unbusy_unlocked(&uq->uq_key);
 3189                         error = EFAULT;
 3190                         break;
 3191                 }
 3192                 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
 3193                 if (blocked_readers == 1) {
 3194                         rv = fueword32(&rwlock->rw_state, &state);
 3195                         if (rv == -1) {
 3196                                 umtxq_unbusy_unlocked(&uq->uq_key);
 3197                                 error = EFAULT;
 3198                                 break;
 3199                         }
 3200                         for (;;) {
 3201                                 rv = casueword32(&rwlock->rw_state, state,
 3202                                     &oldstate, state & ~URWLOCK_READ_WAITERS);
 3203                                 if (rv == -1) {
 3204                                         error = EFAULT;
 3205                                         break;
 3206                                 }
 3207                                 if (rv == 0) {
 3208                                         MPASS(oldstate == state);
 3209                                         break;
 3210                                 }
 3211                                 state = oldstate;
 3212                                 error1 = thread_check_susp(td, false);
 3213                                 if (error1 != 0) {
 3214                                         if (error == 0)
 3215                                                 error = error1;
 3216                                         break;
 3217                                 }
 3218                         }
 3219                 }
 3220 
 3221                 umtxq_unbusy_unlocked(&uq->uq_key);
 3222                 if (error != 0)
 3223                         break;
 3224         }
 3225         umtx_key_release(&uq->uq_key);
 3226         if (error == ERESTART)
 3227                 error = EINTR;
 3228         return (error);
 3229 }
 3230 
 3231 static int
 3232 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
 3233 {
 3234         struct abs_timeout timo;
 3235         struct umtx_q *uq;
 3236         uint32_t flags;
 3237         int32_t state, oldstate;
 3238         int32_t blocked_writers;
 3239         int32_t blocked_readers;
 3240         int error, error1, rv;
 3241 
 3242         uq = td->td_umtxq;
 3243         error = fueword32(&rwlock->rw_flags, &flags);
 3244         if (error == -1)
 3245                 return (EFAULT);
 3246         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 3247         if (error != 0)
 3248                 return (error);
 3249 
 3250         if (timeout != NULL)
 3251                 abs_timeout_init2(&timo, timeout);
 3252 
 3253         blocked_readers = 0;
 3254         for (;;) {
 3255                 rv = fueword32(&rwlock->rw_state, &state);
 3256                 if (rv == -1) {
 3257                         umtx_key_release(&uq->uq_key);
 3258                         return (EFAULT);
 3259                 }
 3260                 while ((state & URWLOCK_WRITE_OWNER) == 0 &&
 3261                     URWLOCK_READER_COUNT(state) == 0) {
 3262                         rv = casueword32(&rwlock->rw_state, state,
 3263                             &oldstate, state | URWLOCK_WRITE_OWNER);
 3264                         if (rv == -1) {
 3265                                 umtx_key_release(&uq->uq_key);
 3266                                 return (EFAULT);
 3267                         }
 3268                         if (rv == 0) {
 3269                                 MPASS(oldstate == state);
 3270                                 umtx_key_release(&uq->uq_key);
 3271                                 return (0);
 3272                         }
 3273                         state = oldstate;
 3274                         error = thread_check_susp(td, true);
 3275                         if (error != 0)
 3276                                 break;
 3277                 }
 3278 
 3279                 if (error) {
 3280                         if ((state & (URWLOCK_WRITE_OWNER |
 3281                             URWLOCK_WRITE_WAITERS)) == 0 &&
 3282                             blocked_readers != 0) {
 3283                                 umtxq_lock(&uq->uq_key);
 3284                                 umtxq_busy(&uq->uq_key);
 3285                                 umtxq_signal_queue(&uq->uq_key, INT_MAX,
 3286                                     UMTX_SHARED_QUEUE);
 3287                                 umtxq_unbusy(&uq->uq_key);
 3288                                 umtxq_unlock(&uq->uq_key);
 3289                         }
 3290 
 3291                         break;
 3292                 }
 3293 
 3294                 /* grab monitor lock */
 3295                 umtxq_lock(&uq->uq_key);
 3296                 umtxq_busy(&uq->uq_key);
 3297                 umtxq_unlock(&uq->uq_key);
 3298 
 3299                 /*
 3300                  * Re-read the state, in case it changed between the
 3301                  * try-lock above and the check below.
 3302                  */
 3303                 rv = fueword32(&rwlock->rw_state, &state);
 3304                 if (rv == -1)
 3305                         error = EFAULT;
 3306 
 3307                 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) ||
 3308                     URWLOCK_READER_COUNT(state) != 0) &&
 3309                     (state & URWLOCK_WRITE_WAITERS) == 0) {
 3310                         rv = casueword32(&rwlock->rw_state, state,
 3311                             &oldstate, state | URWLOCK_WRITE_WAITERS);
 3312                         if (rv == -1) {
 3313                                 error = EFAULT;
 3314                                 break;
 3315                         }
 3316                         if (rv == 0) {
 3317                                 MPASS(oldstate == state);
 3318                                 goto sleep;
 3319                         }
 3320                         state = oldstate;
 3321                         error = thread_check_susp(td, false);
 3322                         if (error != 0)
 3323                                 break;
 3324                 }
 3325                 if (error != 0) {
 3326                         umtxq_unbusy_unlocked(&uq->uq_key);
 3327                         break;
 3328                 }
 3329 
 3330                 if ((state & URWLOCK_WRITE_OWNER) == 0 &&
 3331                     URWLOCK_READER_COUNT(state) == 0) {
 3332                         umtxq_unbusy_unlocked(&uq->uq_key);
 3333                         error = thread_check_susp(td, false);
 3334                         if (error != 0)
 3335                                 break;
 3336                         continue;
 3337                 }
 3338 sleep:
 3339                 rv = fueword32(&rwlock->rw_blocked_writers,
 3340                     &blocked_writers);
 3341                 if (rv == -1) {
 3342                         umtxq_unbusy_unlocked(&uq->uq_key);
 3343                         error = EFAULT;
 3344                         break;
 3345                 }
 3346                 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1);
 3347 
 3348                 while ((state & URWLOCK_WRITE_OWNER) ||
 3349                     URWLOCK_READER_COUNT(state) != 0) {
 3350                         umtxq_lock(&uq->uq_key);
 3351                         umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 3352                         umtxq_unbusy(&uq->uq_key);
 3353 
 3354                         error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
 3355                             NULL : &timo);
 3356 
 3357                         umtxq_busy(&uq->uq_key);
 3358                         umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 3359                         umtxq_unlock(&uq->uq_key);
 3360                         if (error)
 3361                                 break;
 3362                         rv = fueword32(&rwlock->rw_state, &state);
 3363                         if (rv == -1) {
 3364                                 error = EFAULT;
 3365                                 break;
 3366                         }
 3367                 }
 3368 
 3369                 rv = fueword32(&rwlock->rw_blocked_writers,
 3370                     &blocked_writers);
 3371                 if (rv == -1) {
 3372                         umtxq_unbusy_unlocked(&uq->uq_key);
 3373                         error = EFAULT;
 3374                         break;
 3375                 }
 3376                 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
 3377                 if (blocked_writers == 1) {
 3378                         rv = fueword32(&rwlock->rw_state, &state);
 3379                         if (rv == -1) {
 3380                                 umtxq_unbusy_unlocked(&uq->uq_key);
 3381                                 error = EFAULT;
 3382                                 break;
 3383                         }
 3384                         for (;;) {
 3385                                 rv = casueword32(&rwlock->rw_state, state,
 3386                                     &oldstate, state & ~URWLOCK_WRITE_WAITERS);
 3387                                 if (rv == -1) {
 3388                                         error = EFAULT;
 3389                                         break;
 3390                                 }
 3391                                 if (rv == 0) {
 3392                                         MPASS(oldstate == state);
 3393                                         break;
 3394                                 }
 3395                                 state = oldstate;
 3396                                 error1 = thread_check_susp(td, false);
 3397                                 /*
 3398                                  * We are leaving the URWLOCK_WRITE_WAITERS
 3399                                  * behind, but this should not harm the
 3400                                  * correctness.
 3401                                  */
 3402                                 if (error1 != 0) {
 3403                                         if (error == 0)
 3404                                                 error = error1;
 3405                                         break;
 3406                                 }
 3407                         }
 3408                         rv = fueword32(&rwlock->rw_blocked_readers,
 3409                             &blocked_readers);
 3410                         if (rv == -1) {
 3411                                 umtxq_unbusy_unlocked(&uq->uq_key);
 3412                                 error = EFAULT;
 3413                                 break;
 3414                         }
 3415                 } else
 3416                         blocked_readers = 0;
 3417 
 3418                 umtxq_unbusy_unlocked(&uq->uq_key);
 3419         }
 3420 
 3421         umtx_key_release(&uq->uq_key);
 3422         if (error == ERESTART)
 3423                 error = EINTR;
 3424         return (error);
 3425 }
 3426 
 3427 static int
 3428 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
 3429 {
 3430         struct umtx_q *uq;
 3431         uint32_t flags;
 3432         int32_t state, oldstate;
 3433         int error, rv, q, count;
 3434 
 3435         uq = td->td_umtxq;
 3436         error = fueword32(&rwlock->rw_flags, &flags);
 3437         if (error == -1)
 3438                 return (EFAULT);
 3439         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 3440         if (error != 0)
 3441                 return (error);
 3442 
 3443         error = fueword32(&rwlock->rw_state, &state);
 3444         if (error == -1) {
 3445                 error = EFAULT;
 3446                 goto out;
 3447         }
 3448         if (state & URWLOCK_WRITE_OWNER) {
 3449                 for (;;) {
 3450                         rv = casueword32(&rwlock->rw_state, state,
 3451                             &oldstate, state & ~URWLOCK_WRITE_OWNER);
 3452                         if (rv == -1) {
 3453                                 error = EFAULT;
 3454                                 goto out;
 3455                         }
 3456                         if (rv == 1) {
 3457                                 state = oldstate;
 3458                                 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
 3459                                         error = EPERM;
 3460                                         goto out;
 3461                                 }
 3462                                 error = thread_check_susp(td, true);
 3463                                 if (error != 0)
 3464                                         goto out;
 3465                         } else
 3466                                 break;
 3467                 }
 3468         } else if (URWLOCK_READER_COUNT(state) != 0) {
 3469                 for (;;) {
 3470                         rv = casueword32(&rwlock->rw_state, state,
 3471                             &oldstate, state - 1);
 3472                         if (rv == -1) {
 3473                                 error = EFAULT;
 3474                                 goto out;
 3475                         }
 3476                         if (rv == 1) {
 3477                                 state = oldstate;
 3478                                 if (URWLOCK_READER_COUNT(oldstate) == 0) {
 3479                                         error = EPERM;
 3480                                         goto out;
 3481                                 }
 3482                                 error = thread_check_susp(td, true);
 3483                                 if (error != 0)
 3484                                         goto out;
 3485                         } else
 3486                                 break;
 3487                 }
 3488         } else {
 3489                 error = EPERM;
 3490                 goto out;
 3491         }
 3492 
 3493         count = 0;
 3494 
 3495         if (!(flags & URWLOCK_PREFER_READER)) {
 3496                 if (state & URWLOCK_WRITE_WAITERS) {
 3497                         count = 1;
 3498                         q = UMTX_EXCLUSIVE_QUEUE;
 3499                 } else if (state & URWLOCK_READ_WAITERS) {
 3500                         count = INT_MAX;
 3501                         q = UMTX_SHARED_QUEUE;
 3502                 }
 3503         } else {
 3504                 if (state & URWLOCK_READ_WAITERS) {
 3505                         count = INT_MAX;
 3506                         q = UMTX_SHARED_QUEUE;
 3507                 } else if (state & URWLOCK_WRITE_WAITERS) {
 3508                         count = 1;
 3509                         q = UMTX_EXCLUSIVE_QUEUE;
 3510                 }
 3511         }
 3512 
 3513         if (count) {
 3514                 umtxq_lock(&uq->uq_key);
 3515                 umtxq_busy(&uq->uq_key);
 3516                 umtxq_signal_queue(&uq->uq_key, count, q);
 3517                 umtxq_unbusy(&uq->uq_key);
 3518                 umtxq_unlock(&uq->uq_key);
 3519         }
 3520 out:
 3521         umtx_key_release(&uq->uq_key);
 3522         return (error);
 3523 }
 3524 
 3525 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
 3526 static int
 3527 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
 3528 {
 3529         struct abs_timeout timo;
 3530         struct umtx_q *uq;
 3531         uint32_t flags, count, count1;
 3532         int error, rv, rv1;
 3533 
 3534         uq = td->td_umtxq;
 3535         error = fueword32(&sem->_flags, &flags);
 3536         if (error == -1)
 3537                 return (EFAULT);
 3538         error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
 3539         if (error != 0)
 3540                 return (error);
 3541 
 3542         if (timeout != NULL)
 3543                 abs_timeout_init2(&timo, timeout);
 3544 
 3545 again:
 3546         umtxq_lock(&uq->uq_key);
 3547         umtxq_busy(&uq->uq_key);
 3548         umtxq_insert(uq);
 3549         umtxq_unlock(&uq->uq_key);
 3550         rv = casueword32(&sem->_has_waiters, 0, &count1, 1);
 3551         if (rv == 0)
 3552                 rv1 = fueword32(&sem->_count, &count);
 3553         if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) ||
 3554             (rv == 1 && count1 == 0)) {
 3555                 umtxq_lock(&uq->uq_key);
 3556                 umtxq_unbusy(&uq->uq_key);
 3557                 umtxq_remove(uq);
 3558                 umtxq_unlock(&uq->uq_key);
 3559                 if (rv == 1) {
 3560                         rv = thread_check_susp(td, true);
 3561                         if (rv == 0)
 3562                                 goto again;
 3563                         error = rv;
 3564                         goto out;
 3565                 }
 3566                 if (rv == 0)
 3567                         rv = rv1;
 3568                 error = rv == -1 ? EFAULT : 0;
 3569                 goto out;
 3570         }
 3571         umtxq_lock(&uq->uq_key);
 3572         umtxq_unbusy(&uq->uq_key);
 3573 
 3574         error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
 3575 
 3576         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 3577                 error = 0;
 3578         else {
 3579                 umtxq_remove(uq);
 3580                 /* A relative timeout cannot be restarted. */
 3581                 if (error == ERESTART && timeout != NULL &&
 3582                     (timeout->_flags & UMTX_ABSTIME) == 0)
 3583                         error = EINTR;
 3584         }
 3585         umtxq_unlock(&uq->uq_key);
 3586 out:
 3587         umtx_key_release(&uq->uq_key);
 3588         return (error);
 3589 }
 3590 
 3591 /*
 3592  * Signal a userland semaphore.
 3593  */
 3594 static int
 3595 do_sem_wake(struct thread *td, struct _usem *sem)
 3596 {
 3597         struct umtx_key key;
 3598         int error, cnt;
 3599         uint32_t flags;
 3600 
 3601         error = fueword32(&sem->_flags, &flags);
 3602         if (error == -1)
 3603                 return (EFAULT);
 3604         if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
 3605                 return (error);
 3606         umtxq_lock(&key);
 3607         umtxq_busy(&key);
 3608         cnt = umtxq_count(&key);
 3609         if (cnt > 0) {
 3610                 /*
 3611                  * Check if count is greater than 0, this means the memory is
 3612                  * still being referenced by user code, so we can safely
 3613                  * update _has_waiters flag.
 3614                  */
 3615                 if (cnt == 1) {
 3616                         umtxq_unlock(&key);
 3617                         error = suword32(&sem->_has_waiters, 0);
 3618                         umtxq_lock(&key);
 3619                         if (error == -1)
 3620                                 error = EFAULT;
 3621                 }
 3622                 umtxq_signal(&key, 1);
 3623         }
 3624         umtxq_unbusy(&key);
 3625         umtxq_unlock(&key);
 3626         umtx_key_release(&key);
 3627         return (error);
 3628 }
 3629 #endif
 3630 
 3631 static int
 3632 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout)
 3633 {
 3634         struct abs_timeout timo;
 3635         struct umtx_q *uq;
 3636         uint32_t count, flags;
 3637         int error, rv;
 3638 
 3639         uq = td->td_umtxq;
 3640         flags = fuword32(&sem->_flags);
 3641         if (timeout != NULL)
 3642                 abs_timeout_init2(&timo, timeout);
 3643 
 3644 again:
 3645         error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
 3646         if (error != 0)
 3647                 return (error);
 3648         umtxq_lock(&uq->uq_key);
 3649         umtxq_busy(&uq->uq_key);
 3650         umtxq_insert(uq);
 3651         umtxq_unlock(&uq->uq_key);
 3652         rv = fueword32(&sem->_count, &count);
 3653         if (rv == -1) {
 3654                 umtxq_lock(&uq->uq_key);
 3655                 umtxq_unbusy(&uq->uq_key);
 3656                 umtxq_remove(uq);
 3657                 umtxq_unlock(&uq->uq_key);
 3658                 umtx_key_release(&uq->uq_key);
 3659                 return (EFAULT);
 3660         }
 3661         for (;;) {
 3662                 if (USEM_COUNT(count) != 0) {
 3663                         umtxq_lock(&uq->uq_key);
 3664                         umtxq_unbusy(&uq->uq_key);
 3665                         umtxq_remove(uq);
 3666                         umtxq_unlock(&uq->uq_key);
 3667                         umtx_key_release(&uq->uq_key);
 3668                         return (0);
 3669                 }
 3670                 if (count == USEM_HAS_WAITERS)
 3671                         break;
 3672                 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS);
 3673                 if (rv == 0)
 3674                         break;
 3675                 umtxq_lock(&uq->uq_key);
 3676                 umtxq_unbusy(&uq->uq_key);
 3677                 umtxq_remove(uq);
 3678                 umtxq_unlock(&uq->uq_key);
 3679                 umtx_key_release(&uq->uq_key);
 3680                 if (rv == -1)
 3681                         return (EFAULT);
 3682                 rv = thread_check_susp(td, true);
 3683                 if (rv != 0)
 3684                         return (rv);
 3685                 goto again;
 3686         }
 3687         umtxq_lock(&uq->uq_key);
 3688         umtxq_unbusy(&uq->uq_key);
 3689 
 3690         error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
 3691 
 3692         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 3693                 error = 0;
 3694         else {
 3695                 umtxq_remove(uq);
 3696                 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) {
 3697                         /* A relative timeout cannot be restarted. */
 3698                         if (error == ERESTART)
 3699                                 error = EINTR;
 3700                         if (error == EINTR) {
 3701                                 abs_timeout_update(&timo);
 3702                                 timespecsub(&timo.end, &timo.cur,
 3703                                     &timeout->_timeout);
 3704                         }
 3705                 }
 3706         }
 3707         umtxq_unlock(&uq->uq_key);
 3708         umtx_key_release(&uq->uq_key);
 3709         return (error);
 3710 }
 3711 
 3712 /*
 3713  * Signal a userland semaphore.
 3714  */
 3715 static int
 3716 do_sem2_wake(struct thread *td, struct _usem2 *sem)
 3717 {
 3718         struct umtx_key key;
 3719         int error, cnt, rv;
 3720         uint32_t count, flags;
 3721 
 3722         rv = fueword32(&sem->_flags, &flags);
 3723         if (rv == -1)
 3724                 return (EFAULT);
 3725         if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
 3726                 return (error);
 3727         umtxq_lock(&key);
 3728         umtxq_busy(&key);
 3729         cnt = umtxq_count(&key);
 3730         if (cnt > 0) {
 3731                 /*
 3732                  * If this was the last sleeping thread, clear the waiters
 3733                  * flag in _count.
 3734                  */
 3735                 if (cnt == 1) {
 3736                         umtxq_unlock(&key);
 3737                         rv = fueword32(&sem->_count, &count);
 3738                         while (rv != -1 && count & USEM_HAS_WAITERS) {
 3739                                 rv = casueword32(&sem->_count, count, &count,
 3740                                     count & ~USEM_HAS_WAITERS);
 3741                                 if (rv == 1) {
 3742                                         rv = thread_check_susp(td, true);
 3743                                         if (rv != 0)
 3744                                                 break;
 3745                                 }
 3746                         }
 3747                         if (rv == -1)
 3748                                 error = EFAULT;
 3749                         else if (rv > 0) {
 3750                                 error = rv;
 3751                         }
 3752                         umtxq_lock(&key);
 3753                 }
 3754 
 3755                 umtxq_signal(&key, 1);
 3756         }
 3757         umtxq_unbusy(&key);
 3758         umtxq_unlock(&key);
 3759         umtx_key_release(&key);
 3760         return (error);
 3761 }
 3762 
 3763 #ifdef COMPAT_FREEBSD10
 3764 int
 3765 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap)
 3766 {
 3767         return (do_lock_umtx(td, uap->umtx, td->td_tid, 0));
 3768 }
 3769 
 3770 int
 3771 freebsd10__umtx_unlock(struct thread *td,
 3772     struct freebsd10__umtx_unlock_args *uap)
 3773 {
 3774         return (do_unlock_umtx(td, uap->umtx, td->td_tid));
 3775 }
 3776 #endif
 3777 
 3778 inline int
 3779 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp)
 3780 {
 3781         int error;
 3782 
 3783         error = copyin(uaddr, tsp, sizeof(*tsp));
 3784         if (error == 0) {
 3785                 if (tsp->tv_sec < 0 ||
 3786                     tsp->tv_nsec >= 1000000000 ||
 3787                     tsp->tv_nsec < 0)
 3788                         error = EINVAL;
 3789         }
 3790         return (error);
 3791 }
 3792 
 3793 static inline int
 3794 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp)
 3795 {
 3796         int error;
 3797 
 3798         if (size <= sizeof(tp->_timeout)) {
 3799                 tp->_clockid = CLOCK_REALTIME;
 3800                 tp->_flags = 0;
 3801                 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout));
 3802         } else
 3803                 error = copyin(uaddr, tp, sizeof(*tp));
 3804         if (error != 0)
 3805                 return (error);
 3806         if (tp->_timeout.tv_sec < 0 ||
 3807             tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
 3808                 return (EINVAL);
 3809         return (0);
 3810 }
 3811 
 3812 static int
 3813 umtx_copyin_robust_lists(const void *uaddr, size_t size,
 3814     struct umtx_robust_lists_params *rb)
 3815 {
 3816 
 3817         if (size > sizeof(*rb))
 3818                 return (EINVAL);
 3819         return (copyin(uaddr, rb, size));
 3820 }
 3821 
 3822 static int
 3823 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp)
 3824 {
 3825 
 3826         /*
 3827          * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
 3828          * and we're only called if sz >= sizeof(timespec) as supplied in the
 3829          * copyops.
 3830          */
 3831         KASSERT(sz >= sizeof(*tsp),
 3832             ("umtx_copyops specifies incorrect sizes"));
 3833 
 3834         return (copyout(tsp, uaddr, sizeof(*tsp)));
 3835 }
 3836 
 3837 #ifdef COMPAT_FREEBSD10
 3838 static int
 3839 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap,
 3840     const struct umtx_copyops *ops)
 3841 {
 3842         struct timespec *ts, timeout;
 3843         int error;
 3844 
 3845         /* Allow a null timespec (wait forever). */
 3846         if (uap->uaddr2 == NULL)
 3847                 ts = NULL;
 3848         else {
 3849                 error = ops->copyin_timeout(uap->uaddr2, &timeout);
 3850                 if (error != 0)
 3851                         return (error);
 3852                 ts = &timeout;
 3853         }
 3854 #ifdef COMPAT_FREEBSD32
 3855         if (ops->compat32)
 3856                 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
 3857 #endif
 3858         return (do_lock_umtx(td, uap->obj, uap->val, ts));
 3859 }
 3860 
 3861 static int
 3862 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap,
 3863     const struct umtx_copyops *ops)
 3864 {
 3865 #ifdef COMPAT_FREEBSD32
 3866         if (ops->compat32)
 3867                 return (do_unlock_umtx32(td, uap->obj, uap->val));
 3868 #endif
 3869         return (do_unlock_umtx(td, uap->obj, uap->val));
 3870 }
 3871 #endif  /* COMPAT_FREEBSD10 */
 3872 
 3873 #if !defined(COMPAT_FREEBSD10)
 3874 static int
 3875 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused,
 3876     const struct umtx_copyops *ops __unused)
 3877 {
 3878         return (EOPNOTSUPP);
 3879 }
 3880 #endif  /* COMPAT_FREEBSD10 */
 3881 
 3882 static int
 3883 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap,
 3884     const struct umtx_copyops *ops)
 3885 {
 3886         struct _umtx_time timeout, *tm_p;
 3887         int error;
 3888 
 3889         if (uap->uaddr2 == NULL)
 3890                 tm_p = NULL;
 3891         else {
 3892                 error = ops->copyin_umtx_time(
 3893                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3894                 if (error != 0)
 3895                         return (error);
 3896                 tm_p = &timeout;
 3897         }
 3898         return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0));
 3899 }
 3900 
 3901 static int
 3902 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap,
 3903     const struct umtx_copyops *ops)
 3904 {
 3905         struct _umtx_time timeout, *tm_p;
 3906         int error;
 3907 
 3908         if (uap->uaddr2 == NULL)
 3909                 tm_p = NULL;
 3910         else {
 3911                 error = ops->copyin_umtx_time(
 3912                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3913                 if (error != 0)
 3914                         return (error);
 3915                 tm_p = &timeout;
 3916         }
 3917         return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0));
 3918 }
 3919 
 3920 static int
 3921 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap,
 3922     const struct umtx_copyops *ops)
 3923 {
 3924         struct _umtx_time *tm_p, timeout;
 3925         int error;
 3926 
 3927         if (uap->uaddr2 == NULL)
 3928                 tm_p = NULL;
 3929         else {
 3930                 error = ops->copyin_umtx_time(
 3931                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3932                 if (error != 0)
 3933                         return (error);
 3934                 tm_p = &timeout;
 3935         }
 3936         return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1));
 3937 }
 3938 
 3939 static int
 3940 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap,
 3941     const struct umtx_copyops *ops __unused)
 3942 {
 3943 
 3944         return (kern_umtx_wake(td, uap->obj, uap->val, 0));
 3945 }
 3946 
 3947 #define BATCH_SIZE      128
 3948 static int
 3949 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap)
 3950 {
 3951         char *uaddrs[BATCH_SIZE], **upp;
 3952         int count, error, i, pos, tocopy;
 3953 
 3954         upp = (char **)uap->obj;
 3955         error = 0;
 3956         for (count = uap->val, pos = 0; count > 0; count -= tocopy,
 3957             pos += tocopy) {
 3958                 tocopy = MIN(count, BATCH_SIZE);
 3959                 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *));
 3960                 if (error != 0)
 3961                         break;
 3962                 for (i = 0; i < tocopy; ++i) {
 3963                         kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
 3964                 }
 3965                 maybe_yield();
 3966         }
 3967         return (error);
 3968 }
 3969 
 3970 static int
 3971 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap)
 3972 {
 3973         uint32_t uaddrs[BATCH_SIZE], *upp;
 3974         int count, error, i, pos, tocopy;
 3975 
 3976         upp = (uint32_t *)uap->obj;
 3977         error = 0;
 3978         for (count = uap->val, pos = 0; count > 0; count -= tocopy,
 3979             pos += tocopy) {
 3980                 tocopy = MIN(count, BATCH_SIZE);
 3981                 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t));
 3982                 if (error != 0)
 3983                         break;
 3984                 for (i = 0; i < tocopy; ++i) {
 3985                         kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i],
 3986                             INT_MAX, 1);
 3987                 }
 3988                 maybe_yield();
 3989         }
 3990         return (error);
 3991 }
 3992 
 3993 static int
 3994 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap,
 3995     const struct umtx_copyops *ops)
 3996 {
 3997 
 3998         if (ops->compat32)
 3999                 return (__umtx_op_nwake_private_compat32(td, uap));
 4000         return (__umtx_op_nwake_private_native(td, uap));
 4001 }
 4002 
 4003 static int
 4004 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap,
 4005     const struct umtx_copyops *ops __unused)
 4006 {
 4007 
 4008         return (kern_umtx_wake(td, uap->obj, uap->val, 1));
 4009 }
 4010 
 4011 static int
 4012 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap,
 4013    const struct umtx_copyops *ops)
 4014 {
 4015         struct _umtx_time *tm_p, timeout;
 4016         int error;
 4017 
 4018         /* Allow a null timespec (wait forever). */
 4019         if (uap->uaddr2 == NULL)
 4020                 tm_p = NULL;
 4021         else {
 4022                 error = ops->copyin_umtx_time(
 4023                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 4024                 if (error != 0)
 4025                         return (error);
 4026                 tm_p = &timeout;
 4027         }
 4028         return (do_lock_umutex(td, uap->obj, tm_p, 0));
 4029 }
 4030 
 4031 static int
 4032 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap,
 4033     const struct umtx_copyops *ops __unused)
 4034 {
 4035 
 4036         return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY));
 4037 }
 4038 
 4039 static int
 4040 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap,
 4041     const struct umtx_copyops *ops)
 4042 {
 4043         struct _umtx_time *tm_p, timeout;
 4044         int error;
 4045 
 4046         /* Allow a null timespec (wait forever). */
 4047         if (uap->uaddr2 == NULL)
 4048                 tm_p = NULL;
 4049         else {
 4050                 error = ops->copyin_umtx_time(
 4051                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 4052                 if (error != 0)
 4053                         return (error);
 4054                 tm_p = &timeout;
 4055         }
 4056         return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT));
 4057 }
 4058 
 4059 static int
 4060 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap,
 4061     const struct umtx_copyops *ops __unused)
 4062 {
 4063 
 4064         return (do_wake_umutex(td, uap->obj));
 4065 }
 4066 
 4067 static int
 4068 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap,
 4069     const struct umtx_copyops *ops __unused)
 4070 {
 4071 
 4072         return (do_unlock_umutex(td, uap->obj, false));
 4073 }
 4074 
 4075 static int
 4076 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap,
 4077     const struct umtx_copyops *ops __unused)
 4078 {
 4079 
 4080         return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1));
 4081 }
 4082 
 4083 static int
 4084 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap,
 4085     const struct umtx_copyops *ops)
 4086 {
 4087         struct timespec *ts, timeout;
 4088         int error;
 4089 
 4090         /* Allow a null timespec (wait forever). */
 4091         if (uap->uaddr2 == NULL)
 4092                 ts = NULL;
 4093         else {
 4094                 error = ops->copyin_timeout(uap->uaddr2, &timeout);
 4095                 if (error != 0)
 4096                         return (error);
 4097                 ts = &timeout;
 4098         }
 4099         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 4100 }
 4101 
 4102 static int
 4103 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap,
 4104     const struct umtx_copyops *ops __unused)
 4105 {
 4106 
 4107         return (do_cv_signal(td, uap->obj));
 4108 }
 4109 
 4110 static int
 4111 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap,
 4112     const struct umtx_copyops *ops __unused)
 4113 {
 4114 
 4115         return (do_cv_broadcast(td, uap->obj));
 4116 }
 4117 
 4118 static int
 4119 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap,
 4120     const struct umtx_copyops *ops)
 4121 {
 4122         struct _umtx_time timeout;
 4123         int error;
 4124 
 4125         /* Allow a null timespec (wait forever). */
 4126         if (uap->uaddr2 == NULL) {
 4127                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 4128         } else {
 4129                 error = ops->copyin_umtx_time(uap->uaddr2,
 4130                    (size_t)uap->uaddr1, &timeout);
 4131                 if (error != 0)
 4132                         return (error);
 4133                 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
 4134         }
 4135         return (error);
 4136 }
 4137 
 4138 static int
 4139 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap,
 4140     const struct umtx_copyops *ops)
 4141 {
 4142         struct _umtx_time timeout;
 4143         int error;
 4144 
 4145         /* Allow a null timespec (wait forever). */
 4146         if (uap->uaddr2 == NULL) {
 4147                 error = do_rw_wrlock(td, uap->obj, 0);
 4148         } else {
 4149                 error = ops->copyin_umtx_time(uap->uaddr2,
 4150                    (size_t)uap->uaddr1, &timeout);
 4151                 if (error != 0)
 4152                         return (error);
 4153 
 4154                 error = do_rw_wrlock(td, uap->obj, &timeout);
 4155         }
 4156         return (error);
 4157 }
 4158 
 4159 static int
 4160 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap,
 4161     const struct umtx_copyops *ops __unused)
 4162 {
 4163 
 4164         return (do_rw_unlock(td, uap->obj));
 4165 }
 4166 
 4167 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
 4168 static int
 4169 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap,
 4170     const struct umtx_copyops *ops)
 4171 {
 4172         struct _umtx_time *tm_p, timeout;
 4173         int error;
 4174 
 4175         /* Allow a null timespec (wait forever). */
 4176         if (uap->uaddr2 == NULL)
 4177                 tm_p = NULL;
 4178         else {
 4179                 error = ops->copyin_umtx_time(
 4180                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 4181                 if (error != 0)
 4182                         return (error);
 4183                 tm_p = &timeout;
 4184         }
 4185         return (do_sem_wait(td, uap->obj, tm_p));
 4186 }
 4187 
 4188 static int
 4189 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap,
 4190     const struct umtx_copyops *ops __unused)
 4191 {
 4192 
 4193         return (do_sem_wake(td, uap->obj));
 4194 }
 4195 #endif
 4196 
 4197 static int
 4198 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap,
 4199     const struct umtx_copyops *ops __unused)
 4200 {
 4201 
 4202         return (do_wake2_umutex(td, uap->obj, uap->val));
 4203 }
 4204 
 4205 static int
 4206 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap,
 4207     const struct umtx_copyops *ops)
 4208 {
 4209         struct _umtx_time *tm_p, timeout;
 4210         size_t uasize;
 4211         int error;
 4212 
 4213         /* Allow a null timespec (wait forever). */
 4214         if (uap->uaddr2 == NULL) {
 4215                 uasize = 0;
 4216                 tm_p = NULL;
 4217         } else {
 4218                 uasize = (size_t)uap->uaddr1;
 4219                 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout);
 4220                 if (error != 0)
 4221                         return (error);
 4222                 tm_p = &timeout;
 4223         }
 4224         error = do_sem2_wait(td, uap->obj, tm_p);
 4225         if (error == EINTR && uap->uaddr2 != NULL &&
 4226             (timeout._flags & UMTX_ABSTIME) == 0 &&
 4227             uasize >= ops->umtx_time_sz + ops->timespec_sz) {
 4228                 error = ops->copyout_timeout(
 4229                     (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz),
 4230                     uasize - ops->umtx_time_sz, &timeout._timeout);
 4231                 if (error == 0) {
 4232                         error = EINTR;
 4233                 }
 4234         }
 4235 
 4236         return (error);
 4237 }
 4238 
 4239 static int
 4240 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap,
 4241     const struct umtx_copyops *ops __unused)
 4242 {
 4243 
 4244         return (do_sem2_wake(td, uap->obj));
 4245 }
 4246 
 4247 #define USHM_OBJ_UMTX(o)                                                \
 4248     ((struct umtx_shm_obj_list *)(&(o)->umtx_data))
 4249 
 4250 #define USHMF_REG_LINKED        0x0001
 4251 #define USHMF_OBJ_LINKED        0x0002
 4252 struct umtx_shm_reg {
 4253         TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link;
 4254         LIST_ENTRY(umtx_shm_reg) ushm_obj_link;
 4255         struct umtx_key         ushm_key;
 4256         struct ucred            *ushm_cred;
 4257         struct shmfd            *ushm_obj;
 4258         u_int                   ushm_refcnt;
 4259         u_int                   ushm_flags;
 4260 };
 4261 
 4262 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg);
 4263 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg);
 4264 
 4265 static uma_zone_t umtx_shm_reg_zone;
 4266 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS];
 4267 static struct mtx umtx_shm_lock;
 4268 static struct umtx_shm_reg_head umtx_shm_reg_delfree =
 4269     TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree);
 4270 
 4271 static void umtx_shm_free_reg(struct umtx_shm_reg *reg);
 4272 
 4273 static void
 4274 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused)
 4275 {
 4276         struct umtx_shm_reg_head d;
 4277         struct umtx_shm_reg *reg, *reg1;
 4278 
 4279         TAILQ_INIT(&d);
 4280         mtx_lock(&umtx_shm_lock);
 4281         TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link);
 4282         mtx_unlock(&umtx_shm_lock);
 4283         TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) {
 4284                 TAILQ_REMOVE(&d, reg, ushm_reg_link);
 4285                 umtx_shm_free_reg(reg);
 4286         }
 4287 }
 4288 
 4289 static struct task umtx_shm_reg_delfree_task =
 4290     TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL);
 4291 
 4292 static struct umtx_shm_reg *
 4293 umtx_shm_find_reg_locked(const struct umtx_key *key)
 4294 {
 4295         struct umtx_shm_reg *reg;
 4296         struct umtx_shm_reg_head *reg_head;
 4297 
 4298         KASSERT(key->shared, ("umtx_p_find_rg: private key"));
 4299         mtx_assert(&umtx_shm_lock, MA_OWNED);
 4300         reg_head = &umtx_shm_registry[key->hash];
 4301         TAILQ_FOREACH(reg, reg_head, ushm_reg_link) {
 4302                 KASSERT(reg->ushm_key.shared,
 4303                     ("non-shared key on reg %p %d", reg, reg->ushm_key.shared));
 4304                 if (reg->ushm_key.info.shared.object ==
 4305                     key->info.shared.object &&
 4306                     reg->ushm_key.info.shared.offset ==
 4307                     key->info.shared.offset) {
 4308                         KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM"));
 4309                         KASSERT(reg->ushm_refcnt > 0,
 4310                             ("reg %p refcnt 0 onlist", reg));
 4311                         KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0,
 4312                             ("reg %p not linked", reg));
 4313                         reg->ushm_refcnt++;
 4314                         return (reg);
 4315                 }
 4316         }
 4317         return (NULL);
 4318 }
 4319 
 4320 static struct umtx_shm_reg *
 4321 umtx_shm_find_reg(const struct umtx_key *key)
 4322 {
 4323         struct umtx_shm_reg *reg;
 4324 
 4325         mtx_lock(&umtx_shm_lock);
 4326         reg = umtx_shm_find_reg_locked(key);
 4327         mtx_unlock(&umtx_shm_lock);
 4328         return (reg);
 4329 }
 4330 
 4331 static void
 4332 umtx_shm_free_reg(struct umtx_shm_reg *reg)
 4333 {
 4334 
 4335         chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0);
 4336         crfree(reg->ushm_cred);
 4337         shm_drop(reg->ushm_obj);
 4338         uma_zfree(umtx_shm_reg_zone, reg);
 4339 }
 4340 
 4341 static bool
 4342 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force)
 4343 {
 4344         bool res;
 4345 
 4346         mtx_assert(&umtx_shm_lock, MA_OWNED);
 4347         KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg));
 4348         reg->ushm_refcnt--;
 4349         res = reg->ushm_refcnt == 0;
 4350         if (res || force) {
 4351                 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) {
 4352                         TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash],
 4353                             reg, ushm_reg_link);
 4354                         reg->ushm_flags &= ~USHMF_REG_LINKED;
 4355                 }
 4356                 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) {
 4357                         LIST_REMOVE(reg, ushm_obj_link);
 4358                         reg->ushm_flags &= ~USHMF_OBJ_LINKED;
 4359                 }
 4360         }
 4361         return (res);
 4362 }
 4363 
 4364 static void
 4365 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force)
 4366 {
 4367         vm_object_t object;
 4368         bool dofree;
 4369 
 4370         if (force) {
 4371                 object = reg->ushm_obj->shm_object;
 4372                 VM_OBJECT_WLOCK(object);
 4373                 object->flags |= OBJ_UMTXDEAD;
 4374                 VM_OBJECT_WUNLOCK(object);
 4375         }
 4376         mtx_lock(&umtx_shm_lock);
 4377         dofree = umtx_shm_unref_reg_locked(reg, force);
 4378         mtx_unlock(&umtx_shm_lock);
 4379         if (dofree)
 4380                 umtx_shm_free_reg(reg);
 4381 }
 4382 
 4383 void
 4384 umtx_shm_object_init(vm_object_t object)
 4385 {
 4386 
 4387         LIST_INIT(USHM_OBJ_UMTX(object));
 4388 }
 4389 
 4390 void
 4391 umtx_shm_object_terminated(vm_object_t object)
 4392 {
 4393         struct umtx_shm_reg *reg, *reg1;
 4394         bool dofree;
 4395 
 4396         if (LIST_EMPTY(USHM_OBJ_UMTX(object)))
 4397                 return;
 4398 
 4399         dofree = false;
 4400         mtx_lock(&umtx_shm_lock);
 4401         LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) {
 4402                 if (umtx_shm_unref_reg_locked(reg, true)) {
 4403                         TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg,
 4404                             ushm_reg_link);
 4405                         dofree = true;
 4406                 }
 4407         }
 4408         mtx_unlock(&umtx_shm_lock);
 4409         if (dofree)
 4410                 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task);
 4411 }
 4412 
 4413 static int
 4414 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key,
 4415     struct umtx_shm_reg **res)
 4416 {
 4417         struct umtx_shm_reg *reg, *reg1;
 4418         struct ucred *cred;
 4419         int error;
 4420 
 4421         reg = umtx_shm_find_reg(key);
 4422         if (reg != NULL) {
 4423                 *res = reg;
 4424                 return (0);
 4425         }
 4426         cred = td->td_ucred;
 4427         if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP)))
 4428                 return (ENOMEM);
 4429         reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO);
 4430         reg->ushm_refcnt = 1;
 4431         bcopy(key, &reg->ushm_key, sizeof(*key));
 4432         reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false);
 4433         reg->ushm_cred = crhold(cred);
 4434         error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE);
 4435         if (error != 0) {
 4436                 umtx_shm_free_reg(reg);
 4437                 return (error);
 4438         }
 4439         mtx_lock(&umtx_shm_lock);
 4440         reg1 = umtx_shm_find_reg_locked(key);
 4441         if (reg1 != NULL) {
 4442                 mtx_unlock(&umtx_shm_lock);
 4443                 umtx_shm_free_reg(reg);
 4444                 *res = reg1;
 4445                 return (0);
 4446         }
 4447         reg->ushm_refcnt++;
 4448         TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link);
 4449         LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg,
 4450             ushm_obj_link);
 4451         reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED;
 4452         mtx_unlock(&umtx_shm_lock);
 4453         *res = reg;
 4454         return (0);
 4455 }
 4456 
 4457 static int
 4458 umtx_shm_alive(struct thread *td, void *addr)
 4459 {
 4460         vm_map_t map;
 4461         vm_map_entry_t entry;
 4462         vm_object_t object;
 4463         vm_pindex_t pindex;
 4464         vm_prot_t prot;
 4465         int res, ret;
 4466         boolean_t wired;
 4467 
 4468         map = &td->td_proc->p_vmspace->vm_map;
 4469         res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry,
 4470             &object, &pindex, &prot, &wired);
 4471         if (res != KERN_SUCCESS)
 4472                 return (EFAULT);
 4473         if (object == NULL)
 4474                 ret = EINVAL;
 4475         else
 4476                 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0;
 4477         vm_map_lookup_done(map, entry);
 4478         return (ret);
 4479 }
 4480 
 4481 static void
 4482 umtx_shm_init(void)
 4483 {
 4484         int i;
 4485 
 4486         umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg),
 4487             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 4488         mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF);
 4489         for (i = 0; i < nitems(umtx_shm_registry); i++)
 4490                 TAILQ_INIT(&umtx_shm_registry[i]);
 4491 }
 4492 
 4493 static int
 4494 umtx_shm(struct thread *td, void *addr, u_int flags)
 4495 {
 4496         struct umtx_key key;
 4497         struct umtx_shm_reg *reg;
 4498         struct file *fp;
 4499         int error, fd;
 4500 
 4501         if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP |
 4502             UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1)
 4503                 return (EINVAL);
 4504         if ((flags & UMTX_SHM_ALIVE) != 0)
 4505                 return (umtx_shm_alive(td, addr));
 4506         error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key);
 4507         if (error != 0)
 4508                 return (error);
 4509         KASSERT(key.shared == 1, ("non-shared key"));
 4510         if ((flags & UMTX_SHM_CREAT) != 0) {
 4511                 error = umtx_shm_create_reg(td, &key, &reg);
 4512         } else {
 4513                 reg = umtx_shm_find_reg(&key);
 4514                 if (reg == NULL)
 4515                         error = ESRCH;
 4516         }
 4517         umtx_key_release(&key);
 4518         if (error != 0)
 4519                 return (error);
 4520         KASSERT(reg != NULL, ("no reg"));
 4521         if ((flags & UMTX_SHM_DESTROY) != 0) {
 4522                 umtx_shm_unref_reg(reg, true);
 4523         } else {
 4524 #if 0
 4525 #ifdef MAC
 4526                 error = mac_posixshm_check_open(td->td_ucred,
 4527                     reg->ushm_obj, FFLAGS(O_RDWR));
 4528                 if (error == 0)
 4529 #endif
 4530                         error = shm_access(reg->ushm_obj, td->td_ucred,
 4531                             FFLAGS(O_RDWR));
 4532                 if (error == 0)
 4533 #endif
 4534                         error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL);
 4535                 if (error == 0) {
 4536                         shm_hold(reg->ushm_obj);
 4537                         finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj,
 4538                             &shm_ops);
 4539                         td->td_retval[0] = fd;
 4540                         fdrop(fp, td);
 4541                 }
 4542         }
 4543         umtx_shm_unref_reg(reg, false);
 4544         return (error);
 4545 }
 4546 
 4547 static int
 4548 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap,
 4549     const struct umtx_copyops *ops __unused)
 4550 {
 4551 
 4552         return (umtx_shm(td, uap->uaddr1, uap->val));
 4553 }
 4554 
 4555 static int
 4556 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap,
 4557     const struct umtx_copyops *ops)
 4558 {
 4559         struct umtx_robust_lists_params rb;
 4560         int error;
 4561 
 4562         if (ops->compat32) {
 4563                 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 &&
 4564                     (td->td_rb_list != 0 || td->td_rbp_list != 0 ||
 4565                     td->td_rb_inact != 0))
 4566                         return (EBUSY);
 4567         } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) {
 4568                 return (EBUSY);
 4569         }
 4570 
 4571         bzero(&rb, sizeof(rb));
 4572         error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb);
 4573         if (error != 0)
 4574                 return (error);
 4575 
 4576         if (ops->compat32)
 4577                 td->td_pflags2 |= TDP2_COMPAT32RB;
 4578 
 4579         td->td_rb_list = rb.robust_list_offset;
 4580         td->td_rbp_list = rb.robust_priv_list_offset;
 4581         td->td_rb_inact = rb.robust_inact_offset;
 4582         return (0);
 4583 }
 4584 
 4585 #if defined(__i386__) || defined(__amd64__)
 4586 /*
 4587  * Provide the standard 32-bit definitions for x86, since native/compat32 use a
 4588  * 32-bit time_t there.  Other architectures just need the i386 definitions
 4589  * along with their standard compat32.
 4590  */
 4591 struct timespecx32 {
 4592         int64_t                 tv_sec;
 4593         int32_t                 tv_nsec;
 4594 };
 4595 
 4596 struct umtx_timex32 {
 4597         struct  timespecx32     _timeout;
 4598         uint32_t                _flags;
 4599         uint32_t                _clockid;
 4600 };
 4601 
 4602 #ifndef __i386__
 4603 #define timespeci386    timespec32
 4604 #define umtx_timei386   umtx_time32
 4605 #endif
 4606 #else /* !__i386__ && !__amd64__ */
 4607 /* 32-bit architectures can emulate i386, so define these almost everywhere. */
 4608 struct timespeci386 {
 4609         int32_t                 tv_sec;
 4610         int32_t                 tv_nsec;
 4611 };
 4612 
 4613 struct umtx_timei386 {
 4614         struct  timespeci386    _timeout;
 4615         uint32_t                _flags;
 4616         uint32_t                _clockid;
 4617 };
 4618 
 4619 #if defined(__LP64__)
 4620 #define timespecx32     timespec32
 4621 #define umtx_timex32    umtx_time32
 4622 #endif
 4623 #endif
 4624 
 4625 static int
 4626 umtx_copyin_robust_lists32(const void *uaddr, size_t size,
 4627     struct umtx_robust_lists_params *rbp)
 4628 {
 4629         struct umtx_robust_lists_params_compat32 rb32;
 4630         int error;
 4631 
 4632         if (size > sizeof(rb32))
 4633                 return (EINVAL);
 4634         bzero(&rb32, sizeof(rb32));
 4635         error = copyin(uaddr, &rb32, size);
 4636         if (error != 0)
 4637                 return (error);
 4638         CP(rb32, *rbp, robust_list_offset);
 4639         CP(rb32, *rbp, robust_priv_list_offset);
 4640         CP(rb32, *rbp, robust_inact_offset);
 4641         return (0);
 4642 }
 4643 
 4644 #ifndef __i386__
 4645 static inline int
 4646 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp)
 4647 {
 4648         struct timespeci386 ts32;
 4649         int error;
 4650 
 4651         error = copyin(uaddr, &ts32, sizeof(ts32));
 4652         if (error == 0) {
 4653                 if (ts32.tv_sec < 0 ||
 4654                     ts32.tv_nsec >= 1000000000 ||
 4655                     ts32.tv_nsec < 0)
 4656                         error = EINVAL;
 4657                 else {
 4658                         CP(ts32, *tsp, tv_sec);
 4659                         CP(ts32, *tsp, tv_nsec);
 4660                 }
 4661         }
 4662         return (error);
 4663 }
 4664 
 4665 static inline int
 4666 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp)
 4667 {
 4668         struct umtx_timei386 t32;
 4669         int error;
 4670 
 4671         t32._clockid = CLOCK_REALTIME;
 4672         t32._flags   = 0;
 4673         if (size <= sizeof(t32._timeout))
 4674                 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout));
 4675         else
 4676                 error = copyin(uaddr, &t32, sizeof(t32));
 4677         if (error != 0)
 4678                 return (error);
 4679         if (t32._timeout.tv_sec < 0 ||
 4680             t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0)
 4681                 return (EINVAL);
 4682         TS_CP(t32, *tp, _timeout);
 4683         CP(t32, *tp, _flags);
 4684         CP(t32, *tp, _clockid);
 4685         return (0);
 4686 }
 4687 
 4688 static int
 4689 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp)
 4690 {
 4691         struct timespeci386 remain32 = {
 4692                 .tv_sec = tsp->tv_sec,
 4693                 .tv_nsec = tsp->tv_nsec,
 4694         };
 4695 
 4696         /*
 4697          * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
 4698          * and we're only called if sz >= sizeof(timespec) as supplied in the
 4699          * copyops.
 4700          */
 4701         KASSERT(sz >= sizeof(remain32),
 4702             ("umtx_copyops specifies incorrect sizes"));
 4703 
 4704         return (copyout(&remain32, uaddr, sizeof(remain32)));
 4705 }
 4706 #endif /* !__i386__ */
 4707 
 4708 #if defined(__i386__) || defined(__LP64__)
 4709 static inline int
 4710 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp)
 4711 {
 4712         struct timespecx32 ts32;
 4713         int error;
 4714 
 4715         error = copyin(uaddr, &ts32, sizeof(ts32));
 4716         if (error == 0) {
 4717                 if (ts32.tv_sec < 0 ||
 4718                     ts32.tv_nsec >= 1000000000 ||
 4719                     ts32.tv_nsec < 0)
 4720                         error = EINVAL;
 4721                 else {
 4722                         CP(ts32, *tsp, tv_sec);
 4723                         CP(ts32, *tsp, tv_nsec);
 4724                 }
 4725         }
 4726         return (error);
 4727 }
 4728 
 4729 static inline int
 4730 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp)
 4731 {
 4732         struct umtx_timex32 t32;
 4733         int error;
 4734 
 4735         t32._clockid = CLOCK_REALTIME;
 4736         t32._flags   = 0;
 4737         if (size <= sizeof(t32._timeout))
 4738                 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout));
 4739         else
 4740                 error = copyin(uaddr, &t32, sizeof(t32));
 4741         if (error != 0)
 4742                 return (error);
 4743         if (t32._timeout.tv_sec < 0 ||
 4744             t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0)
 4745                 return (EINVAL);
 4746         TS_CP(t32, *tp, _timeout);
 4747         CP(t32, *tp, _flags);
 4748         CP(t32, *tp, _clockid);
 4749         return (0);
 4750 }
 4751 
 4752 static int
 4753 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp)
 4754 {
 4755         struct timespecx32 remain32 = {
 4756                 .tv_sec = tsp->tv_sec,
 4757                 .tv_nsec = tsp->tv_nsec,
 4758         };
 4759 
 4760         /*
 4761          * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
 4762          * and we're only called if sz >= sizeof(timespec) as supplied in the
 4763          * copyops.
 4764          */
 4765         KASSERT(sz >= sizeof(remain32),
 4766             ("umtx_copyops specifies incorrect sizes"));
 4767 
 4768         return (copyout(&remain32, uaddr, sizeof(remain32)));
 4769 }
 4770 #endif /* __i386__ || __LP64__ */
 4771 
 4772 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap,
 4773     const struct umtx_copyops *umtx_ops);
 4774 
 4775 static const _umtx_op_func op_table[] = {
 4776 #ifdef COMPAT_FREEBSD10
 4777         [UMTX_OP_LOCK]          = __umtx_op_lock_umtx,
 4778         [UMTX_OP_UNLOCK]        = __umtx_op_unlock_umtx,
 4779 #else
 4780         [UMTX_OP_LOCK]          = __umtx_op_unimpl,
 4781         [UMTX_OP_UNLOCK]        = __umtx_op_unimpl,
 4782 #endif
 4783         [UMTX_OP_WAIT]          = __umtx_op_wait,
 4784         [UMTX_OP_WAKE]          = __umtx_op_wake,
 4785         [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex,
 4786         [UMTX_OP_MUTEX_LOCK]    = __umtx_op_lock_umutex,
 4787         [UMTX_OP_MUTEX_UNLOCK]  = __umtx_op_unlock_umutex,
 4788         [UMTX_OP_SET_CEILING]   = __umtx_op_set_ceiling,
 4789         [UMTX_OP_CV_WAIT]       = __umtx_op_cv_wait,
 4790         [UMTX_OP_CV_SIGNAL]     = __umtx_op_cv_signal,
 4791         [UMTX_OP_CV_BROADCAST]  = __umtx_op_cv_broadcast,
 4792         [UMTX_OP_WAIT_UINT]     = __umtx_op_wait_uint,
 4793         [UMTX_OP_RW_RDLOCK]     = __umtx_op_rw_rdlock,
 4794         [UMTX_OP_RW_WRLOCK]     = __umtx_op_rw_wrlock,
 4795         [UMTX_OP_RW_UNLOCK]     = __umtx_op_rw_unlock,
 4796         [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private,
 4797         [UMTX_OP_WAKE_PRIVATE]  = __umtx_op_wake_private,
 4798         [UMTX_OP_MUTEX_WAIT]    = __umtx_op_wait_umutex,
 4799         [UMTX_OP_MUTEX_WAKE]    = __umtx_op_wake_umutex,
 4800 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
 4801         [UMTX_OP_SEM_WAIT]      = __umtx_op_sem_wait,
 4802         [UMTX_OP_SEM_WAKE]      = __umtx_op_sem_wake,
 4803 #else
 4804         [UMTX_OP_SEM_WAIT]      = __umtx_op_unimpl,
 4805         [UMTX_OP_SEM_WAKE]      = __umtx_op_unimpl,
 4806 #endif
 4807         [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private,
 4808         [UMTX_OP_MUTEX_WAKE2]   = __umtx_op_wake2_umutex,
 4809         [UMTX_OP_SEM2_WAIT]     = __umtx_op_sem2_wait,
 4810         [UMTX_OP_SEM2_WAKE]     = __umtx_op_sem2_wake,
 4811         [UMTX_OP_SHM]           = __umtx_op_shm,
 4812         [UMTX_OP_ROBUST_LISTS]  = __umtx_op_robust_lists,
 4813 };
 4814 
 4815 static const struct umtx_copyops umtx_native_ops = {
 4816         .copyin_timeout = umtx_copyin_timeout,
 4817         .copyin_umtx_time = umtx_copyin_umtx_time,
 4818         .copyin_robust_lists = umtx_copyin_robust_lists,
 4819         .copyout_timeout = umtx_copyout_timeout,
 4820         .timespec_sz = sizeof(struct timespec),
 4821         .umtx_time_sz = sizeof(struct _umtx_time),
 4822 };
 4823 
 4824 #ifndef __i386__
 4825 static const struct umtx_copyops umtx_native_opsi386 = {
 4826         .copyin_timeout = umtx_copyin_timeouti386,
 4827         .copyin_umtx_time = umtx_copyin_umtx_timei386,
 4828         .copyin_robust_lists = umtx_copyin_robust_lists32,
 4829         .copyout_timeout = umtx_copyout_timeouti386,
 4830         .timespec_sz = sizeof(struct timespeci386),
 4831         .umtx_time_sz = sizeof(struct umtx_timei386),
 4832         .compat32 = true,
 4833 };
 4834 #endif
 4835 
 4836 #if defined(__i386__) || defined(__LP64__)
 4837 /* i386 can emulate other 32-bit archs, too! */
 4838 static const struct umtx_copyops umtx_native_opsx32 = {
 4839         .copyin_timeout = umtx_copyin_timeoutx32,
 4840         .copyin_umtx_time = umtx_copyin_umtx_timex32,
 4841         .copyin_robust_lists = umtx_copyin_robust_lists32,
 4842         .copyout_timeout = umtx_copyout_timeoutx32,
 4843         .timespec_sz = sizeof(struct timespecx32),
 4844         .umtx_time_sz = sizeof(struct umtx_timex32),
 4845         .compat32 = true,
 4846 };
 4847 
 4848 #ifdef COMPAT_FREEBSD32
 4849 #ifdef __amd64__
 4850 #define umtx_native_ops32       umtx_native_opsi386
 4851 #else
 4852 #define umtx_native_ops32       umtx_native_opsx32
 4853 #endif
 4854 #endif /* COMPAT_FREEBSD32 */
 4855 #endif /* __i386__ || __LP64__ */
 4856 
 4857 #define UMTX_OP__FLAGS  (UMTX_OP__32BIT | UMTX_OP__I386)
 4858 
 4859 static int
 4860 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val,
 4861     void *uaddr1, void *uaddr2, const struct umtx_copyops *ops)
 4862 {
 4863         struct _umtx_op_args uap = {
 4864                 .obj = obj,
 4865                 .op = op & ~UMTX_OP__FLAGS,
 4866                 .val = val,
 4867                 .uaddr1 = uaddr1,
 4868                 .uaddr2 = uaddr2
 4869         };
 4870 
 4871         if ((uap.op >= nitems(op_table)))
 4872                 return (EINVAL);
 4873         return ((*op_table[uap.op])(td, &uap, ops));
 4874 }
 4875 
 4876 int
 4877 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
 4878 {
 4879         static const struct umtx_copyops *umtx_ops;
 4880 
 4881         umtx_ops = &umtx_native_ops;
 4882 #ifdef __LP64__
 4883         if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) {
 4884                 if ((uap->op & UMTX_OP__I386) != 0)
 4885                         umtx_ops = &umtx_native_opsi386;
 4886                 else
 4887                         umtx_ops = &umtx_native_opsx32;
 4888         }
 4889 #elif !defined(__i386__)
 4890         /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */
 4891         if ((uap->op & UMTX_OP__I386) != 0)
 4892                 umtx_ops = &umtx_native_opsi386;
 4893 #else
 4894         /* Likewise, UMTX_OP__I386 is a nop on i386. */
 4895         if ((uap->op & UMTX_OP__32BIT) != 0)
 4896                 umtx_ops = &umtx_native_opsx32;
 4897 #endif
 4898         return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1,
 4899             uap->uaddr2, umtx_ops));
 4900 }
 4901 
 4902 #ifdef COMPAT_FREEBSD32
 4903 #ifdef COMPAT_FREEBSD10
 4904 int
 4905 freebsd10_freebsd32_umtx_lock(struct thread *td,
 4906     struct freebsd10_freebsd32_umtx_lock_args *uap)
 4907 {
 4908         return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
 4909 }
 4910 
 4911 int
 4912 freebsd10_freebsd32_umtx_unlock(struct thread *td,
 4913     struct freebsd10_freebsd32_umtx_unlock_args *uap)
 4914 {
 4915         return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
 4916 }
 4917 #endif /* COMPAT_FREEBSD10 */
 4918 
 4919 int
 4920 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap)
 4921 {
 4922 
 4923         return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr,
 4924             uap->uaddr2, &umtx_native_ops32));
 4925 }
 4926 #endif /* COMPAT_FREEBSD32 */
 4927 
 4928 void
 4929 umtx_thread_init(struct thread *td)
 4930 {
 4931 
 4932         td->td_umtxq = umtxq_alloc();
 4933         td->td_umtxq->uq_thread = td;
 4934 }
 4935 
 4936 void
 4937 umtx_thread_fini(struct thread *td)
 4938 {
 4939 
 4940         umtxq_free(td->td_umtxq);
 4941 }
 4942 
 4943 /*
 4944  * It will be called when new thread is created, e.g fork().
 4945  */
 4946 void
 4947 umtx_thread_alloc(struct thread *td)
 4948 {
 4949         struct umtx_q *uq;
 4950 
 4951         uq = td->td_umtxq;
 4952         uq->uq_inherited_pri = PRI_MAX;
 4953 
 4954         KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
 4955         KASSERT(uq->uq_thread == td, ("uq_thread != td"));
 4956         KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
 4957         KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
 4958 }
 4959 
 4960 /*
 4961  * exec() hook.
 4962  *
 4963  * Clear robust lists for all process' threads, not delaying the
 4964  * cleanup to thread exit, since the relevant address space is
 4965  * destroyed right now.
 4966  */
 4967 void
 4968 umtx_exec(struct proc *p)
 4969 {
 4970         struct thread *td;
 4971 
 4972         KASSERT(p == curproc, ("need curproc"));
 4973         KASSERT((p->p_flag & P_HADTHREADS) == 0 ||
 4974             (p->p_flag & P_STOPPED_SINGLE) != 0,
 4975             ("curproc must be single-threaded"));
 4976         /*
 4977          * There is no need to lock the list as only this thread can be
 4978          * running.
 4979          */
 4980         FOREACH_THREAD_IN_PROC(p, td) {
 4981                 KASSERT(td == curthread ||
 4982                     ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)),
 4983                     ("running thread %p %p", p, td));
 4984                 umtx_thread_cleanup(td);
 4985                 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0;
 4986         }
 4987 }
 4988 
 4989 /*
 4990  * thread exit hook.
 4991  */
 4992 void
 4993 umtx_thread_exit(struct thread *td)
 4994 {
 4995 
 4996         umtx_thread_cleanup(td);
 4997 }
 4998 
 4999 static int
 5000 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32)
 5001 {
 5002         u_long res1;
 5003         uint32_t res32;
 5004         int error;
 5005 
 5006         if (compat32) {
 5007                 error = fueword32((void *)ptr, &res32);
 5008                 if (error == 0)
 5009                         res1 = res32;
 5010         } else {
 5011                 error = fueword((void *)ptr, &res1);
 5012         }
 5013         if (error == 0)
 5014                 *res = res1;
 5015         else
 5016                 error = EFAULT;
 5017         return (error);
 5018 }
 5019 
 5020 static void
 5021 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list,
 5022     bool compat32)
 5023 {
 5024         struct umutex32 m32;
 5025 
 5026         if (compat32) {
 5027                 memcpy(&m32, m, sizeof(m32));
 5028                 *rb_list = m32.m_rb_lnk;
 5029         } else {
 5030                 *rb_list = m->m_rb_lnk;
 5031         }
 5032 }
 5033 
 5034 static int
 5035 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact,
 5036     bool compat32)
 5037 {
 5038         struct umutex m;
 5039         int error;
 5040 
 5041         KASSERT(td->td_proc == curproc, ("need current vmspace"));
 5042         error = copyin((void *)rbp, &m, sizeof(m));
 5043         if (error != 0)
 5044                 return (error);
 5045         if (rb_list != NULL)
 5046                 umtx_read_rb_list(td, &m, rb_list, compat32);
 5047         if ((m.m_flags & UMUTEX_ROBUST) == 0)
 5048                 return (EINVAL);
 5049         if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid)
 5050                 /* inact is cleared after unlock, allow the inconsistency */
 5051                 return (inact ? 0 : EINVAL);
 5052         return (do_unlock_umutex(td, (struct umutex *)rbp, true));
 5053 }
 5054 
 5055 static void
 5056 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact,
 5057     const char *name, bool compat32)
 5058 {
 5059         int error, i;
 5060         uintptr_t rbp;
 5061         bool inact;
 5062 
 5063         if (rb_list == 0)
 5064                 return;
 5065         error = umtx_read_uptr(td, rb_list, &rbp, compat32);
 5066         for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) {
 5067                 if (rbp == *rb_inact) {
 5068                         inact = true;
 5069                         *rb_inact = 0;
 5070                 } else
 5071                         inact = false;
 5072                 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32);
 5073         }
 5074         if (i == umtx_max_rb && umtx_verbose_rb) {
 5075                 uprintf("comm %s pid %d: reached umtx %smax rb %d\n",
 5076                     td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb);
 5077         }
 5078         if (error != 0 && umtx_verbose_rb) {
 5079                 uprintf("comm %s pid %d: handling %srb error %d\n",
 5080                     td->td_proc->p_comm, td->td_proc->p_pid, name, error);
 5081         }
 5082 }
 5083 
 5084 /*
 5085  * Clean up umtx data.
 5086  */
 5087 static void
 5088 umtx_thread_cleanup(struct thread *td)
 5089 {
 5090         struct umtx_q *uq;
 5091         struct umtx_pi *pi;
 5092         uintptr_t rb_inact;
 5093         bool compat32;
 5094 
 5095         /*
 5096          * Disown pi mutexes.
 5097          */
 5098         uq = td->td_umtxq;
 5099         if (uq != NULL) {
 5100                 if (uq->uq_inherited_pri != PRI_MAX ||
 5101                     !TAILQ_EMPTY(&uq->uq_pi_contested)) {
 5102                         mtx_lock(&umtx_lock);
 5103                         uq->uq_inherited_pri = PRI_MAX;
 5104                         while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
 5105                                 pi->pi_owner = NULL;
 5106                                 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
 5107                         }
 5108                         mtx_unlock(&umtx_lock);
 5109                 }
 5110                 sched_lend_user_prio_cond(td, PRI_MAX);
 5111         }
 5112 
 5113         compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0;
 5114         td->td_pflags2 &= ~TDP2_COMPAT32RB;
 5115 
 5116         if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0)
 5117                 return;
 5118 
 5119         /*
 5120          * Handle terminated robust mutexes.  Must be done after
 5121          * robust pi disown, otherwise unlock could see unowned
 5122          * entries.
 5123          */
 5124         rb_inact = td->td_rb_inact;
 5125         if (rb_inact != 0)
 5126                 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32);
 5127         umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32);
 5128         umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32);
 5129         if (rb_inact != 0)
 5130                 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32);
 5131 }

Cache object: f4583d6772ebd96651d52f777e28f689


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.