The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2015, 2016 The FreeBSD Foundation
    5  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
    6  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
    7  * All rights reserved.
    8  *
    9  * Portions of this software were developed by Konstantin Belousov
   10  * under sponsorship from the FreeBSD Foundation.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice unmodified, this list of conditions, and the following
   17  *    disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   27  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include "opt_umtx_profiling.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/kernel.h>
   41 #include <sys/fcntl.h>
   42 #include <sys/file.h>
   43 #include <sys/filedesc.h>
   44 #include <sys/limits.h>
   45 #include <sys/lock.h>
   46 #include <sys/malloc.h>
   47 #include <sys/mman.h>
   48 #include <sys/mutex.h>
   49 #include <sys/priv.h>
   50 #include <sys/proc.h>
   51 #include <sys/resource.h>
   52 #include <sys/resourcevar.h>
   53 #include <sys/rwlock.h>
   54 #include <sys/sbuf.h>
   55 #include <sys/sched.h>
   56 #include <sys/smp.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/systm.h>
   59 #include <sys/sysproto.h>
   60 #include <sys/syscallsubr.h>
   61 #include <sys/taskqueue.h>
   62 #include <sys/time.h>
   63 #include <sys/eventhandler.h>
   64 #include <sys/umtx.h>
   65 #include <sys/umtxvar.h>
   66 
   67 #include <security/mac/mac_framework.h>
   68 
   69 #include <vm/vm.h>
   70 #include <vm/vm_param.h>
   71 #include <vm/pmap.h>
   72 #include <vm/vm_map.h>
   73 #include <vm/vm_object.h>
   74 
   75 #include <machine/atomic.h>
   76 #include <machine/cpu.h>
   77 
   78 #include <compat/freebsd32/freebsd32.h>
   79 #ifdef COMPAT_FREEBSD32
   80 #include <compat/freebsd32/freebsd32_proto.h>
   81 #endif
   82 
   83 #define _UMUTEX_TRY             1
   84 #define _UMUTEX_WAIT            2
   85 
   86 #ifdef UMTX_PROFILING
   87 #define UPROF_PERC_BIGGER(w, f, sw, sf)                                 \
   88         (((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
   89 #endif
   90 
   91 #define UMTXQ_LOCKED_ASSERT(uc)         mtx_assert(&(uc)->uc_lock, MA_OWNED)
   92 #ifdef INVARIANTS
   93 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {                              \
   94         struct umtxq_chain *uc;                                         \
   95                                                                         \
   96         uc = umtxq_getchain(key);                                       \
   97         mtx_assert(&uc->uc_lock, MA_OWNED);                             \
   98         KASSERT(uc->uc_busy != 0, ("umtx chain is not busy"));          \
   99 } while (0)
  100 #else
  101 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0)
  102 #endif
  103 
  104 /*
  105  * Don't propagate time-sharing priority, there is a security reason,
  106  * a user can simply introduce PI-mutex, let thread A lock the mutex,
  107  * and let another thread B block on the mutex, because B is
  108  * sleeping, its priority will be boosted, this causes A's priority to
  109  * be boosted via priority propagating too and will never be lowered even
  110  * if it is using 100%CPU, this is unfair to other processes.
  111  */
  112 
  113 #define UPRI(td)        (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
  114                           (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
  115                          PRI_MAX_TIMESHARE : (td)->td_user_pri)
  116 
  117 #define GOLDEN_RATIO_PRIME      2654404609U
  118 #ifndef UMTX_CHAINS
  119 #define UMTX_CHAINS             512
  120 #endif
  121 #define UMTX_SHIFTS             (__WORD_BIT - 9)
  122 
  123 #define GET_SHARE(flags)        \
  124     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
  125 
  126 #define BUSY_SPINS              200
  127 
  128 struct umtx_copyops {
  129         int     (*copyin_timeout)(const void *uaddr, struct timespec *tsp);
  130         int     (*copyin_umtx_time)(const void *uaddr, size_t size,
  131             struct _umtx_time *tp);
  132         int     (*copyin_robust_lists)(const void *uaddr, size_t size,
  133             struct umtx_robust_lists_params *rbp);
  134         int     (*copyout_timeout)(void *uaddr, size_t size,
  135             struct timespec *tsp);
  136         const size_t    timespec_sz;
  137         const size_t    umtx_time_sz;
  138         const bool      compat32;
  139 };
  140 
  141 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32");
  142 _Static_assert(__offsetof(struct umutex, m_spare[0]) ==
  143     __offsetof(struct umutex32, m_spare[0]), "m_spare32");
  144 
  145 int umtx_shm_vnobj_persistent = 0;
  146 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN,
  147     &umtx_shm_vnobj_persistent, 0,
  148     "False forces destruction of umtx attached to file, on last close");
  149 static int umtx_max_rb = 1000;
  150 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN,
  151     &umtx_max_rb, 0,
  152     "Maximum number of robust mutexes allowed for each thread");
  153 
  154 static uma_zone_t               umtx_pi_zone;
  155 static struct umtxq_chain       umtxq_chains[2][UMTX_CHAINS];
  156 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
  157 static int                      umtx_pi_allocated;
  158 
  159 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  160     "umtx debug");
  161 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
  162     &umtx_pi_allocated, 0, "Allocated umtx_pi");
  163 static int umtx_verbose_rb = 1;
  164 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN,
  165     &umtx_verbose_rb, 0,
  166     "");
  167 
  168 #ifdef UMTX_PROFILING
  169 static long max_length;
  170 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
  171 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  172     "umtx chain stats");
  173 #endif
  174 
  175 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo,
  176     const struct _umtx_time *umtxtime);
  177 
  178 static void umtx_shm_init(void);
  179 static void umtxq_sysinit(void *);
  180 static void umtxq_hash(struct umtx_key *key);
  181 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags,
  182     bool rb);
  183 static void umtx_thread_cleanup(struct thread *td);
  184 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
  185 
  186 #define umtxq_signal(key, nwake)        umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
  187 
  188 static struct mtx umtx_lock;
  189 
  190 #ifdef UMTX_PROFILING
  191 static void
  192 umtx_init_profiling(void)
  193 {
  194         struct sysctl_oid *chain_oid;
  195         char chain_name[10];
  196         int i;
  197 
  198         for (i = 0; i < UMTX_CHAINS; ++i) {
  199                 snprintf(chain_name, sizeof(chain_name), "%d", i);
  200                 chain_oid = SYSCTL_ADD_NODE(NULL,
  201                     SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
  202                     chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
  203                     "umtx hash stats");
  204                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  205                     "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
  206                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  207                     "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
  208         }
  209 }
  210 
  211 static int
  212 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
  213 {
  214         char buf[512];
  215         struct sbuf sb;
  216         struct umtxq_chain *uc;
  217         u_int fract, i, j, tot, whole;
  218         u_int sf0, sf1, sf2, sf3, sf4;
  219         u_int si0, si1, si2, si3, si4;
  220         u_int sw0, sw1, sw2, sw3, sw4;
  221 
  222         sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
  223         for (i = 0; i < 2; i++) {
  224                 tot = 0;
  225                 for (j = 0; j < UMTX_CHAINS; ++j) {
  226                         uc = &umtxq_chains[i][j];
  227                         mtx_lock(&uc->uc_lock);
  228                         tot += uc->max_length;
  229                         mtx_unlock(&uc->uc_lock);
  230                 }
  231                 if (tot == 0)
  232                         sbuf_printf(&sb, "%u) Empty ", i);
  233                 else {
  234                         sf0 = sf1 = sf2 = sf3 = sf4 = 0;
  235                         si0 = si1 = si2 = si3 = si4 = 0;
  236                         sw0 = sw1 = sw2 = sw3 = sw4 = 0;
  237                         for (j = 0; j < UMTX_CHAINS; j++) {
  238                                 uc = &umtxq_chains[i][j];
  239                                 mtx_lock(&uc->uc_lock);
  240                                 whole = uc->max_length * 100;
  241                                 mtx_unlock(&uc->uc_lock);
  242                                 fract = (whole % tot) * 100;
  243                                 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
  244                                         sf0 = fract;
  245                                         si0 = j;
  246                                         sw0 = whole;
  247                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw1,
  248                                     sf1)) {
  249                                         sf1 = fract;
  250                                         si1 = j;
  251                                         sw1 = whole;
  252                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw2,
  253                                     sf2)) {
  254                                         sf2 = fract;
  255                                         si2 = j;
  256                                         sw2 = whole;
  257                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw3,
  258                                     sf3)) {
  259                                         sf3 = fract;
  260                                         si3 = j;
  261                                         sw3 = whole;
  262                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw4,
  263                                     sf4)) {
  264                                         sf4 = fract;
  265                                         si4 = j;
  266                                         sw4 = whole;
  267                                 }
  268                         }
  269                         sbuf_printf(&sb, "queue %u:\n", i);
  270                         sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
  271                             sf0 / tot, si0);
  272                         sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
  273                             sf1 / tot, si1);
  274                         sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
  275                             sf2 / tot, si2);
  276                         sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
  277                             sf3 / tot, si3);
  278                         sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
  279                             sf4 / tot, si4);
  280                 }
  281         }
  282         sbuf_trim(&sb);
  283         sbuf_finish(&sb);
  284         sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
  285         sbuf_delete(&sb);
  286         return (0);
  287 }
  288 
  289 static int
  290 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
  291 {
  292         struct umtxq_chain *uc;
  293         u_int i, j;
  294         int clear, error;
  295 
  296         clear = 0;
  297         error = sysctl_handle_int(oidp, &clear, 0, req);
  298         if (error != 0 || req->newptr == NULL)
  299                 return (error);
  300 
  301         if (clear != 0) {
  302                 for (i = 0; i < 2; ++i) {
  303                         for (j = 0; j < UMTX_CHAINS; ++j) {
  304                                 uc = &umtxq_chains[i][j];
  305                                 mtx_lock(&uc->uc_lock);
  306                                 uc->length = 0;
  307                                 uc->max_length = 0;
  308                                 mtx_unlock(&uc->uc_lock);
  309                         }
  310                 }
  311         }
  312         return (0);
  313 }
  314 
  315 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
  316     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
  317     sysctl_debug_umtx_chains_clear, "I",
  318     "Clear umtx chains statistics");
  319 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
  320     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
  321     sysctl_debug_umtx_chains_peaks, "A",
  322     "Highest peaks in chains max length");
  323 #endif
  324 
  325 static void
  326 umtxq_sysinit(void *arg __unused)
  327 {
  328         int i, j;
  329 
  330         umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
  331                 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  332         for (i = 0; i < 2; ++i) {
  333                 for (j = 0; j < UMTX_CHAINS; ++j) {
  334                         mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
  335                                  MTX_DEF | MTX_DUPOK);
  336                         LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
  337                         LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
  338                         LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
  339                         TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
  340                         umtxq_chains[i][j].uc_busy = 0;
  341                         umtxq_chains[i][j].uc_waiters = 0;
  342 #ifdef UMTX_PROFILING
  343                         umtxq_chains[i][j].length = 0;
  344                         umtxq_chains[i][j].max_length = 0;
  345 #endif
  346                 }
  347         }
  348 #ifdef UMTX_PROFILING
  349         umtx_init_profiling();
  350 #endif
  351         mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF);
  352         umtx_shm_init();
  353 }
  354 
  355 struct umtx_q *
  356 umtxq_alloc(void)
  357 {
  358         struct umtx_q *uq;
  359 
  360         uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
  361         uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX,
  362             M_WAITOK | M_ZERO);
  363         TAILQ_INIT(&uq->uq_spare_queue->head);
  364         TAILQ_INIT(&uq->uq_pi_contested);
  365         uq->uq_inherited_pri = PRI_MAX;
  366         return (uq);
  367 }
  368 
  369 void
  370 umtxq_free(struct umtx_q *uq)
  371 {
  372 
  373         MPASS(uq->uq_spare_queue != NULL);
  374         free(uq->uq_spare_queue, M_UMTX);
  375         free(uq, M_UMTX);
  376 }
  377 
  378 static inline void
  379 umtxq_hash(struct umtx_key *key)
  380 {
  381         unsigned n;
  382 
  383         n = (uintptr_t)key->info.both.a + key->info.both.b;
  384         key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
  385 }
  386 
  387 struct umtxq_chain *
  388 umtxq_getchain(struct umtx_key *key)
  389 {
  390 
  391         if (key->type <= TYPE_SEM)
  392                 return (&umtxq_chains[1][key->hash]);
  393         return (&umtxq_chains[0][key->hash]);
  394 }
  395 
  396 /*
  397  * Set chain to busy state when following operation
  398  * may be blocked (kernel mutex can not be used).
  399  */
  400 void
  401 umtxq_busy(struct umtx_key *key)
  402 {
  403         struct umtxq_chain *uc;
  404 
  405         uc = umtxq_getchain(key);
  406         mtx_assert(&uc->uc_lock, MA_OWNED);
  407         if (uc->uc_busy) {
  408 #ifdef SMP
  409                 if (smp_cpus > 1) {
  410                         int count = BUSY_SPINS;
  411                         if (count > 0) {
  412                                 umtxq_unlock(key);
  413                                 while (uc->uc_busy && --count > 0)
  414                                         cpu_spinwait();
  415                                 umtxq_lock(key);
  416                         }
  417                 }
  418 #endif
  419                 while (uc->uc_busy) {
  420                         uc->uc_waiters++;
  421                         msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
  422                         uc->uc_waiters--;
  423                 }
  424         }
  425         uc->uc_busy = 1;
  426 }
  427 
  428 /*
  429  * Unbusy a chain.
  430  */
  431 void
  432 umtxq_unbusy(struct umtx_key *key)
  433 {
  434         struct umtxq_chain *uc;
  435 
  436         uc = umtxq_getchain(key);
  437         mtx_assert(&uc->uc_lock, MA_OWNED);
  438         KASSERT(uc->uc_busy != 0, ("not busy"));
  439         uc->uc_busy = 0;
  440         if (uc->uc_waiters)
  441                 wakeup_one(uc);
  442 }
  443 
  444 void
  445 umtxq_unbusy_unlocked(struct umtx_key *key)
  446 {
  447 
  448         umtxq_lock(key);
  449         umtxq_unbusy(key);
  450         umtxq_unlock(key);
  451 }
  452 
  453 static struct umtxq_queue *
  454 umtxq_queue_lookup(struct umtx_key *key, int q)
  455 {
  456         struct umtxq_queue *uh;
  457         struct umtxq_chain *uc;
  458 
  459         uc = umtxq_getchain(key);
  460         UMTXQ_LOCKED_ASSERT(uc);
  461         LIST_FOREACH(uh, &uc->uc_queue[q], link) {
  462                 if (umtx_key_match(&uh->key, key))
  463                         return (uh);
  464         }
  465 
  466         return (NULL);
  467 }
  468 
  469 void
  470 umtxq_insert_queue(struct umtx_q *uq, int q)
  471 {
  472         struct umtxq_queue *uh;
  473         struct umtxq_chain *uc;
  474 
  475         uc = umtxq_getchain(&uq->uq_key);
  476         UMTXQ_LOCKED_ASSERT(uc);
  477         KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
  478         uh = umtxq_queue_lookup(&uq->uq_key, q);
  479         if (uh != NULL) {
  480                 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
  481         } else {
  482                 uh = uq->uq_spare_queue;
  483                 uh->key = uq->uq_key;
  484                 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
  485 #ifdef UMTX_PROFILING
  486                 uc->length++;
  487                 if (uc->length > uc->max_length) {
  488                         uc->max_length = uc->length;
  489                         if (uc->max_length > max_length)
  490                                 max_length = uc->max_length;
  491                 }
  492 #endif
  493         }
  494         uq->uq_spare_queue = NULL;
  495 
  496         TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
  497         uh->length++;
  498         uq->uq_flags |= UQF_UMTXQ;
  499         uq->uq_cur_queue = uh;
  500         return;
  501 }
  502 
  503 void
  504 umtxq_remove_queue(struct umtx_q *uq, int q)
  505 {
  506         struct umtxq_chain *uc;
  507         struct umtxq_queue *uh;
  508 
  509         uc = umtxq_getchain(&uq->uq_key);
  510         UMTXQ_LOCKED_ASSERT(uc);
  511         if (uq->uq_flags & UQF_UMTXQ) {
  512                 uh = uq->uq_cur_queue;
  513                 TAILQ_REMOVE(&uh->head, uq, uq_link);
  514                 uh->length--;
  515                 uq->uq_flags &= ~UQF_UMTXQ;
  516                 if (TAILQ_EMPTY(&uh->head)) {
  517                         KASSERT(uh->length == 0,
  518                             ("inconsistent umtxq_queue length"));
  519 #ifdef UMTX_PROFILING
  520                         uc->length--;
  521 #endif
  522                         LIST_REMOVE(uh, link);
  523                 } else {
  524                         uh = LIST_FIRST(&uc->uc_spare_queue);
  525                         KASSERT(uh != NULL, ("uc_spare_queue is empty"));
  526                         LIST_REMOVE(uh, link);
  527                 }
  528                 uq->uq_spare_queue = uh;
  529                 uq->uq_cur_queue = NULL;
  530         }
  531 }
  532 
  533 /*
  534  * Check if there are multiple waiters
  535  */
  536 int
  537 umtxq_count(struct umtx_key *key)
  538 {
  539         struct umtxq_queue *uh;
  540 
  541         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  542         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  543         if (uh != NULL)
  544                 return (uh->length);
  545         return (0);
  546 }
  547 
  548 /*
  549  * Check if there are multiple PI waiters and returns first
  550  * waiter.
  551  */
  552 static int
  553 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
  554 {
  555         struct umtxq_queue *uh;
  556 
  557         *first = NULL;
  558         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  559         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  560         if (uh != NULL) {
  561                 *first = TAILQ_FIRST(&uh->head);
  562                 return (uh->length);
  563         }
  564         return (0);
  565 }
  566 
  567 /*
  568  * Wake up threads waiting on an userland object by a bit mask.
  569  */
  570 int
  571 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset)
  572 {
  573         struct umtxq_queue *uh;
  574         struct umtx_q *uq, *uq_temp;
  575         int ret;
  576 
  577         ret = 0;
  578         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  579         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  580         if (uh == NULL)
  581                 return (0);
  582         TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) {
  583                 if ((uq->uq_bitset & bitset) == 0)
  584                         continue;
  585                 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE);
  586                 wakeup_one(uq);
  587                 if (++ret >= n_wake)
  588                         break;
  589         }
  590         return (ret);
  591 }
  592 
  593 /*
  594  * Wake up threads waiting on an userland object.
  595  */
  596 
  597 static int
  598 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
  599 {
  600         struct umtxq_queue *uh;
  601         struct umtx_q *uq;
  602         int ret;
  603 
  604         ret = 0;
  605         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  606         uh = umtxq_queue_lookup(key, q);
  607         if (uh != NULL) {
  608                 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
  609                         umtxq_remove_queue(uq, q);
  610                         wakeup(uq);
  611                         if (++ret >= n_wake)
  612                                 return (ret);
  613                 }
  614         }
  615         return (ret);
  616 }
  617 
  618 /*
  619  * Wake up specified thread.
  620  */
  621 static inline void
  622 umtxq_signal_thread(struct umtx_q *uq)
  623 {
  624 
  625         UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key));
  626         umtxq_remove(uq);
  627         wakeup(uq);
  628 }
  629 
  630 /*
  631  * Wake up a maximum of n_wake threads that are waiting on an userland
  632  * object identified by key. The remaining threads are removed from queue
  633  * identified by key and added to the queue identified by key2 (requeued).
  634  * The n_requeue specifies an upper limit on the number of threads that
  635  * are requeued to the second queue.
  636  */
  637 int
  638 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2,
  639     int n_requeue)
  640 {
  641         struct umtxq_queue *uh, *uh2;
  642         struct umtx_q *uq, *uq_temp;
  643         int ret;
  644 
  645         ret = 0;
  646         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  647         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2));
  648         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  649         uh2 = umtxq_queue_lookup(key2, UMTX_SHARED_QUEUE);
  650         if (uh == NULL)
  651                 return (0);
  652         TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) {
  653                 if (++ret <= n_wake) {
  654                         umtxq_remove(uq);
  655                         wakeup_one(uq);
  656                 } else {
  657                         umtxq_remove(uq);
  658                         uq->uq_key = *key2;
  659                         umtxq_insert(uq);
  660                         if (ret - n_wake == n_requeue)
  661                                 break;
  662                 }
  663         }
  664         return (ret);
  665 }
  666 
  667 static inline int
  668 tstohz(const struct timespec *tsp)
  669 {
  670         struct timeval tv;
  671 
  672         TIMESPEC_TO_TIMEVAL(&tv, tsp);
  673         return tvtohz(&tv);
  674 }
  675 
  676 void
  677 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid,
  678     int absolute, const struct timespec *timeout)
  679 {
  680 
  681         timo->clockid = clockid;
  682         if (!absolute) {
  683                 timo->is_abs_real = false;
  684                 kern_clock_gettime(curthread, timo->clockid, &timo->cur);
  685                 timespecadd(&timo->cur, timeout, &timo->end);
  686         } else {
  687                 timo->end = *timeout;
  688                 timo->is_abs_real = clockid == CLOCK_REALTIME ||
  689                     clockid == CLOCK_REALTIME_FAST ||
  690                     clockid == CLOCK_REALTIME_PRECISE ||
  691                     clockid == CLOCK_SECOND;
  692         }
  693 }
  694 
  695 static void
  696 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo,
  697     const struct _umtx_time *umtxtime)
  698 {
  699 
  700         umtx_abs_timeout_init(timo, umtxtime->_clockid,
  701             (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout);
  702 }
  703 
  704 static int
  705 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt,
  706     int *flags)
  707 {
  708         struct bintime bt, bbt;
  709         struct timespec tts;
  710         sbintime_t rem;
  711 
  712         switch (timo->clockid) {
  713 
  714         /* Clocks that can be converted into absolute time. */
  715         case CLOCK_REALTIME:
  716         case CLOCK_REALTIME_PRECISE:
  717         case CLOCK_REALTIME_FAST:
  718         case CLOCK_MONOTONIC:
  719         case CLOCK_MONOTONIC_PRECISE:
  720         case CLOCK_MONOTONIC_FAST:
  721         case CLOCK_UPTIME:
  722         case CLOCK_UPTIME_PRECISE:
  723         case CLOCK_UPTIME_FAST:
  724         case CLOCK_SECOND:
  725                 timespec2bintime(&timo->end, &bt);
  726                 switch (timo->clockid) {
  727                 case CLOCK_REALTIME:
  728                 case CLOCK_REALTIME_PRECISE:
  729                 case CLOCK_REALTIME_FAST:
  730                 case CLOCK_SECOND:
  731                         getboottimebin(&bbt);
  732                         bintime_sub(&bt, &bbt);
  733                         break;
  734                 }
  735                 if (bt.sec < 0)
  736                         return (ETIMEDOUT);
  737                 if (bt.sec >= (SBT_MAX >> 32)) {
  738                         *sbt = 0;
  739                         *flags = 0;
  740                         return (0);
  741                 }
  742                 *sbt = bttosbt(bt);
  743 
  744                 /*
  745                  * Check if the absolute time should be aligned to
  746                  * avoid firing multiple timer events in non-periodic
  747                  * timer mode.
  748                  */
  749                 switch (timo->clockid) {
  750                 case CLOCK_REALTIME_FAST:
  751                 case CLOCK_MONOTONIC_FAST:
  752                 case CLOCK_UPTIME_FAST:
  753                         rem = *sbt % tc_tick_sbt;
  754                         if (__predict_true(rem != 0))
  755                                 *sbt += tc_tick_sbt - rem;
  756                         break;
  757                 case CLOCK_SECOND:
  758                         rem = *sbt % SBT_1S;
  759                         if (__predict_true(rem != 0))
  760                                 *sbt += SBT_1S - rem;
  761                         break;
  762                 }
  763                 *flags = C_ABSOLUTE;
  764                 return (0);
  765 
  766         /* Clocks that has to be periodically polled. */
  767         case CLOCK_VIRTUAL:
  768         case CLOCK_PROF:
  769         case CLOCK_THREAD_CPUTIME_ID:
  770         case CLOCK_PROCESS_CPUTIME_ID:
  771         default:
  772                 kern_clock_gettime(curthread, timo->clockid, &timo->cur);
  773                 if (timespeccmp(&timo->end, &timo->cur, <=))
  774                         return (ETIMEDOUT);
  775                 timespecsub(&timo->end, &timo->cur, &tts);
  776                 *sbt = tick_sbt * tstohz(&tts);
  777                 *flags = C_HARDCLOCK;
  778                 return (0);
  779         }
  780 }
  781 
  782 static uint32_t
  783 umtx_unlock_val(uint32_t flags, bool rb)
  784 {
  785 
  786         if (rb)
  787                 return (UMUTEX_RB_OWNERDEAD);
  788         else if ((flags & UMUTEX_NONCONSISTENT) != 0)
  789                 return (UMUTEX_RB_NOTRECOV);
  790         else
  791                 return (UMUTEX_UNOWNED);
  792 
  793 }
  794 
  795 /*
  796  * Put thread into sleep state, before sleeping, check if
  797  * thread was removed from umtx queue.
  798  */
  799 int
  800 umtxq_sleep(struct umtx_q *uq, const char *wmesg,
  801     struct umtx_abs_timeout *timo)
  802 {
  803         struct umtxq_chain *uc;
  804         sbintime_t sbt = 0;
  805         int error, flags = 0;
  806 
  807         uc = umtxq_getchain(&uq->uq_key);
  808         UMTXQ_LOCKED_ASSERT(uc);
  809         for (;;) {
  810                 if (!(uq->uq_flags & UQF_UMTXQ)) {
  811                         error = 0;
  812                         break;
  813                 }
  814                 if (timo != NULL) {
  815                         if (timo->is_abs_real)
  816                                 curthread->td_rtcgen =
  817                                     atomic_load_acq_int(&rtc_generation);
  818                         error = umtx_abs_timeout_getsbt(timo, &sbt, &flags);
  819                         if (error != 0)
  820                                 break;
  821                 }
  822                 error = msleep_sbt(uq, &uc->uc_lock, PCATCH | PDROP, wmesg,
  823                     sbt, 0, flags);
  824                 uc = umtxq_getchain(&uq->uq_key);
  825                 mtx_lock(&uc->uc_lock);
  826                 if (error == EINTR || error == ERESTART)
  827                         break;
  828                 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) {
  829                         error = ETIMEDOUT;
  830                         break;
  831                 }
  832         }
  833 
  834         curthread->td_rtcgen = 0;
  835         return (error);
  836 }
  837 
  838 /*
  839  * Convert userspace address into unique logical address.
  840  */
  841 int
  842 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key)
  843 {
  844         struct thread *td = curthread;
  845         vm_map_t map;
  846         vm_map_entry_t entry;
  847         vm_pindex_t pindex;
  848         vm_prot_t prot;
  849         boolean_t wired;
  850 
  851         key->type = type;
  852         if (share == THREAD_SHARE) {
  853                 key->shared = 0;
  854                 key->info.private.vs = td->td_proc->p_vmspace;
  855                 key->info.private.addr = (uintptr_t)addr;
  856         } else {
  857                 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
  858                 map = &td->td_proc->p_vmspace->vm_map;
  859                 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
  860                     &entry, &key->info.shared.object, &pindex, &prot,
  861                     &wired) != KERN_SUCCESS) {
  862                         return (EFAULT);
  863                 }
  864 
  865                 if ((share == PROCESS_SHARE) ||
  866                     (share == AUTO_SHARE &&
  867                      VM_INHERIT_SHARE == entry->inheritance)) {
  868                         key->shared = 1;
  869                         key->info.shared.offset = (vm_offset_t)addr -
  870                             entry->start + entry->offset;
  871                         vm_object_reference(key->info.shared.object);
  872                 } else {
  873                         key->shared = 0;
  874                         key->info.private.vs = td->td_proc->p_vmspace;
  875                         key->info.private.addr = (uintptr_t)addr;
  876                 }
  877                 vm_map_lookup_done(map, entry);
  878         }
  879 
  880         umtxq_hash(key);
  881         return (0);
  882 }
  883 
  884 /*
  885  * Release key.
  886  */
  887 void
  888 umtx_key_release(struct umtx_key *key)
  889 {
  890         if (key->shared)
  891                 vm_object_deallocate(key->info.shared.object);
  892 }
  893 
  894 #ifdef COMPAT_FREEBSD10
  895 /*
  896  * Lock a umtx object.
  897  */
  898 static int
  899 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
  900     const struct timespec *timeout)
  901 {
  902         struct umtx_abs_timeout timo;
  903         struct umtx_q *uq;
  904         u_long owner;
  905         u_long old;
  906         int error = 0;
  907 
  908         uq = td->td_umtxq;
  909         if (timeout != NULL)
  910                 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
  911 
  912         /*
  913          * Care must be exercised when dealing with umtx structure. It
  914          * can fault on any access.
  915          */
  916         for (;;) {
  917                 /*
  918                  * Try the uncontested case.  This should be done in userland.
  919                  */
  920                 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
  921 
  922                 /* The acquire succeeded. */
  923                 if (owner == UMTX_UNOWNED)
  924                         return (0);
  925 
  926                 /* The address was invalid. */
  927                 if (owner == -1)
  928                         return (EFAULT);
  929 
  930                 /* If no one owns it but it is contested try to acquire it. */
  931                 if (owner == UMTX_CONTESTED) {
  932                         owner = casuword(&umtx->u_owner,
  933                             UMTX_CONTESTED, id | UMTX_CONTESTED);
  934 
  935                         if (owner == UMTX_CONTESTED)
  936                                 return (0);
  937 
  938                         /* The address was invalid. */
  939                         if (owner == -1)
  940                                 return (EFAULT);
  941 
  942                         error = thread_check_susp(td, false);
  943                         if (error != 0)
  944                                 break;
  945 
  946                         /* If this failed the lock has changed, restart. */
  947                         continue;
  948                 }
  949 
  950                 /*
  951                  * If we caught a signal, we have retried and now
  952                  * exit immediately.
  953                  */
  954                 if (error != 0)
  955                         break;
  956 
  957                 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
  958                         AUTO_SHARE, &uq->uq_key)) != 0)
  959                         return (error);
  960 
  961                 umtxq_lock(&uq->uq_key);
  962                 umtxq_busy(&uq->uq_key);
  963                 umtxq_insert(uq);
  964                 umtxq_unbusy(&uq->uq_key);
  965                 umtxq_unlock(&uq->uq_key);
  966 
  967                 /*
  968                  * Set the contested bit so that a release in user space
  969                  * knows to use the system call for unlock.  If this fails
  970                  * either some one else has acquired the lock or it has been
  971                  * released.
  972                  */
  973                 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
  974 
  975                 /* The address was invalid. */
  976                 if (old == -1) {
  977                         umtxq_lock(&uq->uq_key);
  978                         umtxq_remove(uq);
  979                         umtxq_unlock(&uq->uq_key);
  980                         umtx_key_release(&uq->uq_key);
  981                         return (EFAULT);
  982                 }
  983 
  984                 /*
  985                  * We set the contested bit, sleep. Otherwise the lock changed
  986                  * and we need to retry or we lost a race to the thread
  987                  * unlocking the umtx.
  988                  */
  989                 umtxq_lock(&uq->uq_key);
  990                 if (old == owner)
  991                         error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL :
  992                             &timo);
  993                 umtxq_remove(uq);
  994                 umtxq_unlock(&uq->uq_key);
  995                 umtx_key_release(&uq->uq_key);
  996 
  997                 if (error == 0)
  998                         error = thread_check_susp(td, false);
  999         }
 1000 
 1001         if (timeout == NULL) {
 1002                 /* Mutex locking is restarted if it is interrupted. */
 1003                 if (error == EINTR)
 1004                         error = ERESTART;
 1005         } else {
 1006                 /* Timed-locking is not restarted. */
 1007                 if (error == ERESTART)
 1008                         error = EINTR;
 1009         }
 1010         return (error);
 1011 }
 1012 
 1013 /*
 1014  * Unlock a umtx object.
 1015  */
 1016 static int
 1017 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
 1018 {
 1019         struct umtx_key key;
 1020         u_long owner;
 1021         u_long old;
 1022         int error;
 1023         int count;
 1024 
 1025         /*
 1026          * Make sure we own this mtx.
 1027          */
 1028         owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
 1029         if (owner == -1)
 1030                 return (EFAULT);
 1031 
 1032         if ((owner & ~UMTX_CONTESTED) != id)
 1033                 return (EPERM);
 1034 
 1035         /* This should be done in userland */
 1036         if ((owner & UMTX_CONTESTED) == 0) {
 1037                 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
 1038                 if (old == -1)
 1039                         return (EFAULT);
 1040                 if (old == owner)
 1041                         return (0);
 1042                 owner = old;
 1043         }
 1044 
 1045         /* We should only ever be in here for contested locks */
 1046         if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
 1047             &key)) != 0)
 1048                 return (error);
 1049 
 1050         umtxq_lock(&key);
 1051         umtxq_busy(&key);
 1052         count = umtxq_count(&key);
 1053         umtxq_unlock(&key);
 1054 
 1055         /*
 1056          * When unlocking the umtx, it must be marked as unowned if
 1057          * there is zero or one thread only waiting for it.
 1058          * Otherwise, it must be marked as contested.
 1059          */
 1060         old = casuword(&umtx->u_owner, owner,
 1061             count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
 1062         umtxq_lock(&key);
 1063         umtxq_signal(&key,1);
 1064         umtxq_unbusy(&key);
 1065         umtxq_unlock(&key);
 1066         umtx_key_release(&key);
 1067         if (old == -1)
 1068                 return (EFAULT);
 1069         if (old != owner)
 1070                 return (EINVAL);
 1071         return (0);
 1072 }
 1073 
 1074 #ifdef COMPAT_FREEBSD32
 1075 
 1076 /*
 1077  * Lock a umtx object.
 1078  */
 1079 static int
 1080 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id,
 1081         const struct timespec *timeout)
 1082 {
 1083         struct umtx_abs_timeout timo;
 1084         struct umtx_q *uq;
 1085         uint32_t owner;
 1086         uint32_t old;
 1087         int error = 0;
 1088 
 1089         uq = td->td_umtxq;
 1090 
 1091         if (timeout != NULL)
 1092                 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
 1093 
 1094         /*
 1095          * Care must be exercised when dealing with umtx structure. It
 1096          * can fault on any access.
 1097          */
 1098         for (;;) {
 1099                 /*
 1100                  * Try the uncontested case.  This should be done in userland.
 1101                  */
 1102                 owner = casuword32(m, UMUTEX_UNOWNED, id);
 1103 
 1104                 /* The acquire succeeded. */
 1105                 if (owner == UMUTEX_UNOWNED)
 1106                         return (0);
 1107 
 1108                 /* The address was invalid. */
 1109                 if (owner == -1)
 1110                         return (EFAULT);
 1111 
 1112                 /* If no one owns it but it is contested try to acquire it. */
 1113                 if (owner == UMUTEX_CONTESTED) {
 1114                         owner = casuword32(m,
 1115                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1116                         if (owner == UMUTEX_CONTESTED)
 1117                                 return (0);
 1118 
 1119                         /* The address was invalid. */
 1120                         if (owner == -1)
 1121                                 return (EFAULT);
 1122 
 1123                         error = thread_check_susp(td, false);
 1124                         if (error != 0)
 1125                                 break;
 1126 
 1127                         /* If this failed the lock has changed, restart. */
 1128                         continue;
 1129                 }
 1130 
 1131                 /*
 1132                  * If we caught a signal, we have retried and now
 1133                  * exit immediately.
 1134                  */
 1135                 if (error != 0)
 1136                         return (error);
 1137 
 1138                 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
 1139                         AUTO_SHARE, &uq->uq_key)) != 0)
 1140                         return (error);
 1141 
 1142                 umtxq_lock(&uq->uq_key);
 1143                 umtxq_busy(&uq->uq_key);
 1144                 umtxq_insert(uq);
 1145                 umtxq_unbusy(&uq->uq_key);
 1146                 umtxq_unlock(&uq->uq_key);
 1147 
 1148                 /*
 1149                  * Set the contested bit so that a release in user space
 1150                  * knows to use the system call for unlock.  If this fails
 1151                  * either some one else has acquired the lock or it has been
 1152                  * released.
 1153                  */
 1154                 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
 1155 
 1156                 /* The address was invalid. */
 1157                 if (old == -1) {
 1158                         umtxq_lock(&uq->uq_key);
 1159                         umtxq_remove(uq);
 1160                         umtxq_unlock(&uq->uq_key);
 1161                         umtx_key_release(&uq->uq_key);
 1162                         return (EFAULT);
 1163                 }
 1164 
 1165                 /*
 1166                  * We set the contested bit, sleep. Otherwise the lock changed
 1167                  * and we need to retry or we lost a race to the thread
 1168                  * unlocking the umtx.
 1169                  */
 1170                 umtxq_lock(&uq->uq_key);
 1171                 if (old == owner)
 1172                         error = umtxq_sleep(uq, "umtx", timeout == NULL ?
 1173                             NULL : &timo);
 1174                 umtxq_remove(uq);
 1175                 umtxq_unlock(&uq->uq_key);
 1176                 umtx_key_release(&uq->uq_key);
 1177 
 1178                 if (error == 0)
 1179                         error = thread_check_susp(td, false);
 1180         }
 1181 
 1182         if (timeout == NULL) {
 1183                 /* Mutex locking is restarted if it is interrupted. */
 1184                 if (error == EINTR)
 1185                         error = ERESTART;
 1186         } else {
 1187                 /* Timed-locking is not restarted. */
 1188                 if (error == ERESTART)
 1189                         error = EINTR;
 1190         }
 1191         return (error);
 1192 }
 1193 
 1194 /*
 1195  * Unlock a umtx object.
 1196  */
 1197 static int
 1198 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
 1199 {
 1200         struct umtx_key key;
 1201         uint32_t owner;
 1202         uint32_t old;
 1203         int error;
 1204         int count;
 1205 
 1206         /*
 1207          * Make sure we own this mtx.
 1208          */
 1209         owner = fuword32(m);
 1210         if (owner == -1)
 1211                 return (EFAULT);
 1212 
 1213         if ((owner & ~UMUTEX_CONTESTED) != id)
 1214                 return (EPERM);
 1215 
 1216         /* This should be done in userland */
 1217         if ((owner & UMUTEX_CONTESTED) == 0) {
 1218                 old = casuword32(m, owner, UMUTEX_UNOWNED);
 1219                 if (old == -1)
 1220                         return (EFAULT);
 1221                 if (old == owner)
 1222                         return (0);
 1223                 owner = old;
 1224         }
 1225 
 1226         /* We should only ever be in here for contested locks */
 1227         if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
 1228                 &key)) != 0)
 1229                 return (error);
 1230 
 1231         umtxq_lock(&key);
 1232         umtxq_busy(&key);
 1233         count = umtxq_count(&key);
 1234         umtxq_unlock(&key);
 1235 
 1236         /*
 1237          * When unlocking the umtx, it must be marked as unowned if
 1238          * there is zero or one thread only waiting for it.
 1239          * Otherwise, it must be marked as contested.
 1240          */
 1241         old = casuword32(m, owner,
 1242                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1243         umtxq_lock(&key);
 1244         umtxq_signal(&key,1);
 1245         umtxq_unbusy(&key);
 1246         umtxq_unlock(&key);
 1247         umtx_key_release(&key);
 1248         if (old == -1)
 1249                 return (EFAULT);
 1250         if (old != owner)
 1251                 return (EINVAL);
 1252         return (0);
 1253 }
 1254 #endif  /* COMPAT_FREEBSD32 */
 1255 #endif  /* COMPAT_FREEBSD10 */
 1256 
 1257 /*
 1258  * Fetch and compare value, sleep on the address if value is not changed.
 1259  */
 1260 static int
 1261 do_wait(struct thread *td, void *addr, u_long id,
 1262     struct _umtx_time *timeout, int compat32, int is_private)
 1263 {
 1264         struct umtx_abs_timeout timo;
 1265         struct umtx_q *uq;
 1266         u_long tmp;
 1267         uint32_t tmp32;
 1268         int error = 0;
 1269 
 1270         uq = td->td_umtxq;
 1271         if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
 1272             is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
 1273                 return (error);
 1274 
 1275         if (timeout != NULL)
 1276                 umtx_abs_timeout_init2(&timo, timeout);
 1277 
 1278         umtxq_lock(&uq->uq_key);
 1279         umtxq_insert(uq);
 1280         umtxq_unlock(&uq->uq_key);
 1281         if (compat32 == 0) {
 1282                 error = fueword(addr, &tmp);
 1283                 if (error != 0)
 1284                         error = EFAULT;
 1285         } else {
 1286                 error = fueword32(addr, &tmp32);
 1287                 if (error == 0)
 1288                         tmp = tmp32;
 1289                 else
 1290                         error = EFAULT;
 1291         }
 1292         umtxq_lock(&uq->uq_key);
 1293         if (error == 0) {
 1294                 if (tmp == id)
 1295                         error = umtxq_sleep(uq, "uwait", timeout == NULL ?
 1296                             NULL : &timo);
 1297                 if ((uq->uq_flags & UQF_UMTXQ) == 0)
 1298                         error = 0;
 1299                 else
 1300                         umtxq_remove(uq);
 1301         } else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
 1302                 umtxq_remove(uq);
 1303         }
 1304         umtxq_unlock(&uq->uq_key);
 1305         umtx_key_release(&uq->uq_key);
 1306         if (error == ERESTART)
 1307                 error = EINTR;
 1308         return (error);
 1309 }
 1310 
 1311 /*
 1312  * Wake up threads sleeping on the specified address.
 1313  */
 1314 int
 1315 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
 1316 {
 1317         struct umtx_key key;
 1318         int ret;
 1319 
 1320         if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
 1321             is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
 1322                 return (ret);
 1323         umtxq_lock(&key);
 1324         umtxq_signal(&key, n_wake);
 1325         umtxq_unlock(&key);
 1326         umtx_key_release(&key);
 1327         return (0);
 1328 }
 1329 
 1330 /*
 1331  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1332  */
 1333 static int
 1334 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
 1335     struct _umtx_time *timeout, int mode)
 1336 {
 1337         struct umtx_abs_timeout timo;
 1338         struct umtx_q *uq;
 1339         uint32_t owner, old, id;
 1340         int error, rv;
 1341 
 1342         id = td->td_tid;
 1343         uq = td->td_umtxq;
 1344         error = 0;
 1345         if (timeout != NULL)
 1346                 umtx_abs_timeout_init2(&timo, timeout);
 1347 
 1348         /*
 1349          * Care must be exercised when dealing with umtx structure. It
 1350          * can fault on any access.
 1351          */
 1352         for (;;) {
 1353                 rv = fueword32(&m->m_owner, &owner);
 1354                 if (rv == -1)
 1355                         return (EFAULT);
 1356                 if (mode == _UMUTEX_WAIT) {
 1357                         if (owner == UMUTEX_UNOWNED ||
 1358                             owner == UMUTEX_CONTESTED ||
 1359                             owner == UMUTEX_RB_OWNERDEAD ||
 1360                             owner == UMUTEX_RB_NOTRECOV)
 1361                                 return (0);
 1362                 } else {
 1363                         /*
 1364                          * Robust mutex terminated.  Kernel duty is to
 1365                          * return EOWNERDEAD to the userspace.  The
 1366                          * umutex.m_flags UMUTEX_NONCONSISTENT is set
 1367                          * by the common userspace code.
 1368                          */
 1369                         if (owner == UMUTEX_RB_OWNERDEAD) {
 1370                                 rv = casueword32(&m->m_owner,
 1371                                     UMUTEX_RB_OWNERDEAD, &owner,
 1372                                     id | UMUTEX_CONTESTED);
 1373                                 if (rv == -1)
 1374                                         return (EFAULT);
 1375                                 if (rv == 0) {
 1376                                         MPASS(owner == UMUTEX_RB_OWNERDEAD);
 1377                                         return (EOWNERDEAD); /* success */
 1378                                 }
 1379                                 MPASS(rv == 1);
 1380                                 rv = thread_check_susp(td, false);
 1381                                 if (rv != 0)
 1382                                         return (rv);
 1383                                 continue;
 1384                         }
 1385                         if (owner == UMUTEX_RB_NOTRECOV)
 1386                                 return (ENOTRECOVERABLE);
 1387 
 1388                         /*
 1389                          * Try the uncontested case.  This should be
 1390                          * done in userland.
 1391                          */
 1392                         rv = casueword32(&m->m_owner, UMUTEX_UNOWNED,
 1393                             &owner, id);
 1394                         /* The address was invalid. */
 1395                         if (rv == -1)
 1396                                 return (EFAULT);
 1397 
 1398                         /* The acquire succeeded. */
 1399                         if (rv == 0) {
 1400                                 MPASS(owner == UMUTEX_UNOWNED);
 1401                                 return (0);
 1402                         }
 1403 
 1404                         /*
 1405                          * If no one owns it but it is contested try
 1406                          * to acquire it.
 1407                          */
 1408                         MPASS(rv == 1);
 1409                         if (owner == UMUTEX_CONTESTED) {
 1410                                 rv = casueword32(&m->m_owner,
 1411                                     UMUTEX_CONTESTED, &owner,
 1412                                     id | UMUTEX_CONTESTED);
 1413                                 /* The address was invalid. */
 1414                                 if (rv == -1)
 1415                                         return (EFAULT);
 1416                                 if (rv == 0) {
 1417                                         MPASS(owner == UMUTEX_CONTESTED);
 1418                                         return (0);
 1419                                 }
 1420                                 if (rv == 1) {
 1421                                         rv = thread_check_susp(td, false);
 1422                                         if (rv != 0)
 1423                                                 return (rv);
 1424                                 }
 1425 
 1426                                 /*
 1427                                  * If this failed the lock has
 1428                                  * changed, restart.
 1429                                  */
 1430                                 continue;
 1431                         }
 1432 
 1433                         /* rv == 1 but not contested, likely store failure */
 1434                         rv = thread_check_susp(td, false);
 1435                         if (rv != 0)
 1436                                 return (rv);
 1437                 }
 1438 
 1439                 if (mode == _UMUTEX_TRY)
 1440                         return (EBUSY);
 1441 
 1442                 /*
 1443                  * If we caught a signal, we have retried and now
 1444                  * exit immediately.
 1445                  */
 1446                 if (error != 0)
 1447                         return (error);
 1448 
 1449                 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
 1450                     GET_SHARE(flags), &uq->uq_key)) != 0)
 1451                         return (error);
 1452 
 1453                 umtxq_lock(&uq->uq_key);
 1454                 umtxq_busy(&uq->uq_key);
 1455                 umtxq_insert(uq);
 1456                 umtxq_unlock(&uq->uq_key);
 1457 
 1458                 /*
 1459                  * Set the contested bit so that a release in user space
 1460                  * knows to use the system call for unlock.  If this fails
 1461                  * either some one else has acquired the lock or it has been
 1462                  * released.
 1463                  */
 1464                 rv = casueword32(&m->m_owner, owner, &old,
 1465                     owner | UMUTEX_CONTESTED);
 1466 
 1467                 /* The address was invalid or casueword failed to store. */
 1468                 if (rv == -1 || rv == 1) {
 1469                         umtxq_lock(&uq->uq_key);
 1470                         umtxq_remove(uq);
 1471                         umtxq_unbusy(&uq->uq_key);
 1472                         umtxq_unlock(&uq->uq_key);
 1473                         umtx_key_release(&uq->uq_key);
 1474                         if (rv == -1)
 1475                                 return (EFAULT);
 1476                         if (rv == 1) {
 1477                                 rv = thread_check_susp(td, false);
 1478                                 if (rv != 0)
 1479                                         return (rv);
 1480                         }
 1481                         continue;
 1482                 }
 1483 
 1484                 /*
 1485                  * We set the contested bit, sleep. Otherwise the lock changed
 1486                  * and we need to retry or we lost a race to the thread
 1487                  * unlocking the umtx.
 1488                  */
 1489                 umtxq_lock(&uq->uq_key);
 1490                 umtxq_unbusy(&uq->uq_key);
 1491                 MPASS(old == owner);
 1492                 error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
 1493                     NULL : &timo);
 1494                 umtxq_remove(uq);
 1495                 umtxq_unlock(&uq->uq_key);
 1496                 umtx_key_release(&uq->uq_key);
 1497 
 1498                 if (error == 0)
 1499                         error = thread_check_susp(td, false);
 1500         }
 1501 
 1502         return (0);
 1503 }
 1504 
 1505 /*
 1506  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1507  */
 1508 static int
 1509 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
 1510 {
 1511         struct umtx_key key;
 1512         uint32_t owner, old, id, newlock;
 1513         int error, count;
 1514 
 1515         id = td->td_tid;
 1516 
 1517 again:
 1518         /*
 1519          * Make sure we own this mtx.
 1520          */
 1521         error = fueword32(&m->m_owner, &owner);
 1522         if (error == -1)
 1523                 return (EFAULT);
 1524 
 1525         if ((owner & ~UMUTEX_CONTESTED) != id)
 1526                 return (EPERM);
 1527 
 1528         newlock = umtx_unlock_val(flags, rb);
 1529         if ((owner & UMUTEX_CONTESTED) == 0) {
 1530                 error = casueword32(&m->m_owner, owner, &old, newlock);
 1531                 if (error == -1)
 1532                         return (EFAULT);
 1533                 if (error == 1) {
 1534                         error = thread_check_susp(td, false);
 1535                         if (error != 0)
 1536                                 return (error);
 1537                         goto again;
 1538                 }
 1539                 MPASS(old == owner);
 1540                 return (0);
 1541         }
 1542 
 1543         /* We should only ever be in here for contested locks */
 1544         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1545             &key)) != 0)
 1546                 return (error);
 1547 
 1548         umtxq_lock(&key);
 1549         umtxq_busy(&key);
 1550         count = umtxq_count(&key);
 1551         umtxq_unlock(&key);
 1552 
 1553         /*
 1554          * When unlocking the umtx, it must be marked as unowned if
 1555          * there is zero or one thread only waiting for it.
 1556          * Otherwise, it must be marked as contested.
 1557          */
 1558         if (count > 1)
 1559                 newlock |= UMUTEX_CONTESTED;
 1560         error = casueword32(&m->m_owner, owner, &old, newlock);
 1561         umtxq_lock(&key);
 1562         umtxq_signal(&key, 1);
 1563         umtxq_unbusy(&key);
 1564         umtxq_unlock(&key);
 1565         umtx_key_release(&key);
 1566         if (error == -1)
 1567                 return (EFAULT);
 1568         if (error == 1) {
 1569                 if (old != owner)
 1570                         return (EINVAL);
 1571                 error = thread_check_susp(td, false);
 1572                 if (error != 0)
 1573                         return (error);
 1574                 goto again;
 1575         }
 1576         return (0);
 1577 }
 1578 
 1579 /*
 1580  * Check if the mutex is available and wake up a waiter,
 1581  * only for simple mutex.
 1582  */
 1583 static int
 1584 do_wake_umutex(struct thread *td, struct umutex *m)
 1585 {
 1586         struct umtx_key key;
 1587         uint32_t owner;
 1588         uint32_t flags;
 1589         int error;
 1590         int count;
 1591 
 1592 again:
 1593         error = fueword32(&m->m_owner, &owner);
 1594         if (error == -1)
 1595                 return (EFAULT);
 1596 
 1597         if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD &&
 1598             owner != UMUTEX_RB_NOTRECOV)
 1599                 return (0);
 1600 
 1601         error = fueword32(&m->m_flags, &flags);
 1602         if (error == -1)
 1603                 return (EFAULT);
 1604 
 1605         /* We should only ever be in here for contested locks */
 1606         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1607             &key)) != 0)
 1608                 return (error);
 1609 
 1610         umtxq_lock(&key);
 1611         umtxq_busy(&key);
 1612         count = umtxq_count(&key);
 1613         umtxq_unlock(&key);
 1614 
 1615         if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD &&
 1616             owner != UMUTEX_RB_NOTRECOV) {
 1617                 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
 1618                     UMUTEX_UNOWNED);
 1619                 if (error == -1) {
 1620                         error = EFAULT;
 1621                 } else if (error == 1) {
 1622                         umtxq_lock(&key);
 1623                         umtxq_unbusy(&key);
 1624                         umtxq_unlock(&key);
 1625                         umtx_key_release(&key);
 1626                         error = thread_check_susp(td, false);
 1627                         if (error != 0)
 1628                                 return (error);
 1629                         goto again;
 1630                 }
 1631         }
 1632 
 1633         umtxq_lock(&key);
 1634         if (error == 0 && count != 0) {
 1635                 MPASS((owner & ~UMUTEX_CONTESTED) == 0 ||
 1636                     owner == UMUTEX_RB_OWNERDEAD ||
 1637                     owner == UMUTEX_RB_NOTRECOV);
 1638                 umtxq_signal(&key, 1);
 1639         }
 1640         umtxq_unbusy(&key);
 1641         umtxq_unlock(&key);
 1642         umtx_key_release(&key);
 1643         return (error);
 1644 }
 1645 
 1646 /*
 1647  * Check if the mutex has waiters and tries to fix contention bit.
 1648  */
 1649 static int
 1650 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
 1651 {
 1652         struct umtx_key key;
 1653         uint32_t owner, old;
 1654         int type;
 1655         int error;
 1656         int count;
 1657 
 1658         switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT |
 1659             UMUTEX_ROBUST)) {
 1660         case 0:
 1661         case UMUTEX_ROBUST:
 1662                 type = TYPE_NORMAL_UMUTEX;
 1663                 break;
 1664         case UMUTEX_PRIO_INHERIT:
 1665                 type = TYPE_PI_UMUTEX;
 1666                 break;
 1667         case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST):
 1668                 type = TYPE_PI_ROBUST_UMUTEX;
 1669                 break;
 1670         case UMUTEX_PRIO_PROTECT:
 1671                 type = TYPE_PP_UMUTEX;
 1672                 break;
 1673         case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST):
 1674                 type = TYPE_PP_ROBUST_UMUTEX;
 1675                 break;
 1676         default:
 1677                 return (EINVAL);
 1678         }
 1679         if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0)
 1680                 return (error);
 1681 
 1682         owner = 0;
 1683         umtxq_lock(&key);
 1684         umtxq_busy(&key);
 1685         count = umtxq_count(&key);
 1686         umtxq_unlock(&key);
 1687 
 1688         error = fueword32(&m->m_owner, &owner);
 1689         if (error == -1)
 1690                 error = EFAULT;
 1691 
 1692         /*
 1693          * Only repair contention bit if there is a waiter, this means
 1694          * the mutex is still being referenced by userland code,
 1695          * otherwise don't update any memory.
 1696          */
 1697         while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 &&
 1698             (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) {
 1699                 error = casueword32(&m->m_owner, owner, &old,
 1700                     owner | UMUTEX_CONTESTED);
 1701                 if (error == -1) {
 1702                         error = EFAULT;
 1703                         break;
 1704                 }
 1705                 if (error == 0) {
 1706                         MPASS(old == owner);
 1707                         break;
 1708                 }
 1709                 owner = old;
 1710                 error = thread_check_susp(td, false);
 1711         }
 1712 
 1713         umtxq_lock(&key);
 1714         if (error == EFAULT) {
 1715                 umtxq_signal(&key, INT_MAX);
 1716         } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
 1717             owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
 1718                 umtxq_signal(&key, 1);
 1719         umtxq_unbusy(&key);
 1720         umtxq_unlock(&key);
 1721         umtx_key_release(&key);
 1722         return (error);
 1723 }
 1724 
 1725 struct umtx_pi *
 1726 umtx_pi_alloc(int flags)
 1727 {
 1728         struct umtx_pi *pi;
 1729 
 1730         pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
 1731         TAILQ_INIT(&pi->pi_blocked);
 1732         atomic_add_int(&umtx_pi_allocated, 1);
 1733         return (pi);
 1734 }
 1735 
 1736 void
 1737 umtx_pi_free(struct umtx_pi *pi)
 1738 {
 1739         uma_zfree(umtx_pi_zone, pi);
 1740         atomic_add_int(&umtx_pi_allocated, -1);
 1741 }
 1742 
 1743 /*
 1744  * Adjust the thread's position on a pi_state after its priority has been
 1745  * changed.
 1746  */
 1747 static int
 1748 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
 1749 {
 1750         struct umtx_q *uq, *uq1, *uq2;
 1751         struct thread *td1;
 1752 
 1753         mtx_assert(&umtx_lock, MA_OWNED);
 1754         if (pi == NULL)
 1755                 return (0);
 1756 
 1757         uq = td->td_umtxq;
 1758 
 1759         /*
 1760          * Check if the thread needs to be moved on the blocked chain.
 1761          * It needs to be moved if either its priority is lower than
 1762          * the previous thread or higher than the next thread.
 1763          */
 1764         uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
 1765         uq2 = TAILQ_NEXT(uq, uq_lockq);
 1766         if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
 1767             (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
 1768                 /*
 1769                  * Remove thread from blocked chain and determine where
 1770                  * it should be moved to.
 1771                  */
 1772                 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1773                 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1774                         td1 = uq1->uq_thread;
 1775                         MPASS(td1->td_proc->p_magic == P_MAGIC);
 1776                         if (UPRI(td1) > UPRI(td))
 1777                                 break;
 1778                 }
 1779 
 1780                 if (uq1 == NULL)
 1781                         TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1782                 else
 1783                         TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1784         }
 1785         return (1);
 1786 }
 1787 
 1788 static struct umtx_pi *
 1789 umtx_pi_next(struct umtx_pi *pi)
 1790 {
 1791         struct umtx_q *uq_owner;
 1792 
 1793         if (pi->pi_owner == NULL)
 1794                 return (NULL);
 1795         uq_owner = pi->pi_owner->td_umtxq;
 1796         if (uq_owner == NULL)
 1797                 return (NULL);
 1798         return (uq_owner->uq_pi_blocked);
 1799 }
 1800 
 1801 /*
 1802  * Floyd's Cycle-Finding Algorithm.
 1803  */
 1804 static bool
 1805 umtx_pi_check_loop(struct umtx_pi *pi)
 1806 {
 1807         struct umtx_pi *pi1;    /* fast iterator */
 1808 
 1809         mtx_assert(&umtx_lock, MA_OWNED);
 1810         if (pi == NULL)
 1811                 return (false);
 1812         pi1 = pi;
 1813         for (;;) {
 1814                 pi = umtx_pi_next(pi);
 1815                 if (pi == NULL)
 1816                         break;
 1817                 pi1 = umtx_pi_next(pi1);
 1818                 if (pi1 == NULL)
 1819                         break;
 1820                 pi1 = umtx_pi_next(pi1);
 1821                 if (pi1 == NULL)
 1822                         break;
 1823                 if (pi == pi1)
 1824                         return (true);
 1825         }
 1826         return (false);
 1827 }
 1828 
 1829 /*
 1830  * Propagate priority when a thread is blocked on POSIX
 1831  * PI mutex.
 1832  */
 1833 static void
 1834 umtx_propagate_priority(struct thread *td)
 1835 {
 1836         struct umtx_q *uq;
 1837         struct umtx_pi *pi;
 1838         int pri;
 1839 
 1840         mtx_assert(&umtx_lock, MA_OWNED);
 1841         pri = UPRI(td);
 1842         uq = td->td_umtxq;
 1843         pi = uq->uq_pi_blocked;
 1844         if (pi == NULL)
 1845                 return;
 1846         if (umtx_pi_check_loop(pi))
 1847                 return;
 1848 
 1849         for (;;) {
 1850                 td = pi->pi_owner;
 1851                 if (td == NULL || td == curthread)
 1852                         return;
 1853 
 1854                 MPASS(td->td_proc != NULL);
 1855                 MPASS(td->td_proc->p_magic == P_MAGIC);
 1856 
 1857                 thread_lock(td);
 1858                 if (td->td_lend_user_pri > pri)
 1859                         sched_lend_user_prio(td, pri);
 1860                 else {
 1861                         thread_unlock(td);
 1862                         break;
 1863                 }
 1864                 thread_unlock(td);
 1865 
 1866                 /*
 1867                  * Pick up the lock that td is blocked on.
 1868                  */
 1869                 uq = td->td_umtxq;
 1870                 pi = uq->uq_pi_blocked;
 1871                 if (pi == NULL)
 1872                         break;
 1873                 /* Resort td on the list if needed. */
 1874                 umtx_pi_adjust_thread(pi, td);
 1875         }
 1876 }
 1877 
 1878 /*
 1879  * Unpropagate priority for a PI mutex when a thread blocked on
 1880  * it is interrupted by signal or resumed by others.
 1881  */
 1882 static void
 1883 umtx_repropagate_priority(struct umtx_pi *pi)
 1884 {
 1885         struct umtx_q *uq, *uq_owner;
 1886         struct umtx_pi *pi2;
 1887         int pri;
 1888 
 1889         mtx_assert(&umtx_lock, MA_OWNED);
 1890 
 1891         if (umtx_pi_check_loop(pi))
 1892                 return;
 1893         while (pi != NULL && pi->pi_owner != NULL) {
 1894                 pri = PRI_MAX;
 1895                 uq_owner = pi->pi_owner->td_umtxq;
 1896 
 1897                 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
 1898                         uq = TAILQ_FIRST(&pi2->pi_blocked);
 1899                         if (uq != NULL) {
 1900                                 if (pri > UPRI(uq->uq_thread))
 1901                                         pri = UPRI(uq->uq_thread);
 1902                         }
 1903                 }
 1904 
 1905                 if (pri > uq_owner->uq_inherited_pri)
 1906                         pri = uq_owner->uq_inherited_pri;
 1907                 thread_lock(pi->pi_owner);
 1908                 sched_lend_user_prio(pi->pi_owner, pri);
 1909                 thread_unlock(pi->pi_owner);
 1910                 if ((pi = uq_owner->uq_pi_blocked) != NULL)
 1911                         umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
 1912         }
 1913 }
 1914 
 1915 /*
 1916  * Insert a PI mutex into owned list.
 1917  */
 1918 static void
 1919 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
 1920 {
 1921         struct umtx_q *uq_owner;
 1922 
 1923         uq_owner = owner->td_umtxq;
 1924         mtx_assert(&umtx_lock, MA_OWNED);
 1925         MPASS(pi->pi_owner == NULL);
 1926         pi->pi_owner = owner;
 1927         TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
 1928 }
 1929 
 1930 /*
 1931  * Disown a PI mutex, and remove it from the owned list.
 1932  */
 1933 static void
 1934 umtx_pi_disown(struct umtx_pi *pi)
 1935 {
 1936 
 1937         mtx_assert(&umtx_lock, MA_OWNED);
 1938         TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link);
 1939         pi->pi_owner = NULL;
 1940 }
 1941 
 1942 /*
 1943  * Claim ownership of a PI mutex.
 1944  */
 1945 int
 1946 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
 1947 {
 1948         struct umtx_q *uq;
 1949         int pri;
 1950 
 1951         mtx_lock(&umtx_lock);
 1952         if (pi->pi_owner == owner) {
 1953                 mtx_unlock(&umtx_lock);
 1954                 return (0);
 1955         }
 1956 
 1957         if (pi->pi_owner != NULL) {
 1958                 /*
 1959                  * userland may have already messed the mutex, sigh.
 1960                  */
 1961                 mtx_unlock(&umtx_lock);
 1962                 return (EPERM);
 1963         }
 1964         umtx_pi_setowner(pi, owner);
 1965         uq = TAILQ_FIRST(&pi->pi_blocked);
 1966         if (uq != NULL) {
 1967                 pri = UPRI(uq->uq_thread);
 1968                 thread_lock(owner);
 1969                 if (pri < UPRI(owner))
 1970                         sched_lend_user_prio(owner, pri);
 1971                 thread_unlock(owner);
 1972         }
 1973         mtx_unlock(&umtx_lock);
 1974         return (0);
 1975 }
 1976 
 1977 /*
 1978  * Adjust a thread's order position in its blocked PI mutex,
 1979  * this may result new priority propagating process.
 1980  */
 1981 void
 1982 umtx_pi_adjust(struct thread *td, u_char oldpri)
 1983 {
 1984         struct umtx_q *uq;
 1985         struct umtx_pi *pi;
 1986 
 1987         uq = td->td_umtxq;
 1988         mtx_lock(&umtx_lock);
 1989         /*
 1990          * Pick up the lock that td is blocked on.
 1991          */
 1992         pi = uq->uq_pi_blocked;
 1993         if (pi != NULL) {
 1994                 umtx_pi_adjust_thread(pi, td);
 1995                 umtx_repropagate_priority(pi);
 1996         }
 1997         mtx_unlock(&umtx_lock);
 1998 }
 1999 
 2000 /*
 2001  * Sleep on a PI mutex.
 2002  */
 2003 int
 2004 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner,
 2005     const char *wmesg, struct umtx_abs_timeout *timo, bool shared)
 2006 {
 2007         struct thread *td, *td1;
 2008         struct umtx_q *uq1;
 2009         int error, pri;
 2010 #ifdef INVARIANTS
 2011         struct umtxq_chain *uc;
 2012 
 2013         uc = umtxq_getchain(&pi->pi_key);
 2014 #endif
 2015         error = 0;
 2016         td = uq->uq_thread;
 2017         KASSERT(td == curthread, ("inconsistent uq_thread"));
 2018         UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key));
 2019         KASSERT(uc->uc_busy != 0, ("umtx chain is not busy"));
 2020         umtxq_insert(uq);
 2021         mtx_lock(&umtx_lock);
 2022         if (pi->pi_owner == NULL) {
 2023                 mtx_unlock(&umtx_lock);
 2024                 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid);
 2025                 mtx_lock(&umtx_lock);
 2026                 if (td1 != NULL) {
 2027                         if (pi->pi_owner == NULL)
 2028                                 umtx_pi_setowner(pi, td1);
 2029                         PROC_UNLOCK(td1->td_proc);
 2030                 }
 2031         }
 2032 
 2033         TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 2034                 pri = UPRI(uq1->uq_thread);
 2035                 if (pri > UPRI(td))
 2036                         break;
 2037         }
 2038 
 2039         if (uq1 != NULL)
 2040                 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 2041         else
 2042                 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 2043 
 2044         uq->uq_pi_blocked = pi;
 2045         thread_lock(td);
 2046         td->td_flags |= TDF_UPIBLOCKED;
 2047         thread_unlock(td);
 2048         umtx_propagate_priority(td);
 2049         mtx_unlock(&umtx_lock);
 2050         umtxq_unbusy(&uq->uq_key);
 2051 
 2052         error = umtxq_sleep(uq, wmesg, timo);
 2053         umtxq_remove(uq);
 2054 
 2055         mtx_lock(&umtx_lock);
 2056         uq->uq_pi_blocked = NULL;
 2057         thread_lock(td);
 2058         td->td_flags &= ~TDF_UPIBLOCKED;
 2059         thread_unlock(td);
 2060         TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 2061         umtx_repropagate_priority(pi);
 2062         mtx_unlock(&umtx_lock);
 2063         umtxq_unlock(&uq->uq_key);
 2064 
 2065         return (error);
 2066 }
 2067 
 2068 /*
 2069  * Add reference count for a PI mutex.
 2070  */
 2071 void
 2072 umtx_pi_ref(struct umtx_pi *pi)
 2073 {
 2074 
 2075         UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key));
 2076         pi->pi_refcount++;
 2077 }
 2078 
 2079 /*
 2080  * Decrease reference count for a PI mutex, if the counter
 2081  * is decreased to zero, its memory space is freed.
 2082  */
 2083 void
 2084 umtx_pi_unref(struct umtx_pi *pi)
 2085 {
 2086         struct umtxq_chain *uc;
 2087 
 2088         uc = umtxq_getchain(&pi->pi_key);
 2089         UMTXQ_LOCKED_ASSERT(uc);
 2090         KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
 2091         if (--pi->pi_refcount == 0) {
 2092                 mtx_lock(&umtx_lock);
 2093                 if (pi->pi_owner != NULL)
 2094                         umtx_pi_disown(pi);
 2095                 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
 2096                         ("blocked queue not empty"));
 2097                 mtx_unlock(&umtx_lock);
 2098                 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
 2099                 umtx_pi_free(pi);
 2100         }
 2101 }
 2102 
 2103 /*
 2104  * Find a PI mutex in hash table.
 2105  */
 2106 struct umtx_pi *
 2107 umtx_pi_lookup(struct umtx_key *key)
 2108 {
 2109         struct umtxq_chain *uc;
 2110         struct umtx_pi *pi;
 2111 
 2112         uc = umtxq_getchain(key);
 2113         UMTXQ_LOCKED_ASSERT(uc);
 2114 
 2115         TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
 2116                 if (umtx_key_match(&pi->pi_key, key)) {
 2117                         return (pi);
 2118                 }
 2119         }
 2120         return (NULL);
 2121 }
 2122 
 2123 /*
 2124  * Insert a PI mutex into hash table.
 2125  */
 2126 void
 2127 umtx_pi_insert(struct umtx_pi *pi)
 2128 {
 2129         struct umtxq_chain *uc;
 2130 
 2131         uc = umtxq_getchain(&pi->pi_key);
 2132         UMTXQ_LOCKED_ASSERT(uc);
 2133         TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
 2134 }
 2135 
 2136 /*
 2137  * Drop a PI mutex and wakeup a top waiter.
 2138  */
 2139 int
 2140 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count)
 2141 {
 2142         struct umtx_q *uq_first, *uq_first2, *uq_me;
 2143         struct umtx_pi *pi, *pi2;
 2144         int pri;
 2145 
 2146         UMTXQ_ASSERT_LOCKED_BUSY(key);
 2147         *count = umtxq_count_pi(key, &uq_first);
 2148         if (uq_first != NULL) {
 2149                 mtx_lock(&umtx_lock);
 2150                 pi = uq_first->uq_pi_blocked;
 2151                 KASSERT(pi != NULL, ("pi == NULL?"));
 2152                 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) {
 2153                         mtx_unlock(&umtx_lock);
 2154                         /* userland messed the mutex */
 2155                         return (EPERM);
 2156                 }
 2157                 uq_me = td->td_umtxq;
 2158                 if (pi->pi_owner == td)
 2159                         umtx_pi_disown(pi);
 2160                 /* get highest priority thread which is still sleeping. */
 2161                 uq_first = TAILQ_FIRST(&pi->pi_blocked);
 2162                 while (uq_first != NULL &&
 2163                     (uq_first->uq_flags & UQF_UMTXQ) == 0) {
 2164                         uq_first = TAILQ_NEXT(uq_first, uq_lockq);
 2165                 }
 2166                 pri = PRI_MAX;
 2167                 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
 2168                         uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
 2169                         if (uq_first2 != NULL) {
 2170                                 if (pri > UPRI(uq_first2->uq_thread))
 2171                                         pri = UPRI(uq_first2->uq_thread);
 2172                         }
 2173                 }
 2174                 thread_lock(td);
 2175                 sched_lend_user_prio(td, pri);
 2176                 thread_unlock(td);
 2177                 mtx_unlock(&umtx_lock);
 2178                 if (uq_first)
 2179                         umtxq_signal_thread(uq_first);
 2180         } else {
 2181                 pi = umtx_pi_lookup(key);
 2182                 /*
 2183                  * A umtx_pi can exist if a signal or timeout removed the
 2184                  * last waiter from the umtxq, but there is still
 2185                  * a thread in do_lock_pi() holding the umtx_pi.
 2186                  */
 2187                 if (pi != NULL) {
 2188                         /*
 2189                          * The umtx_pi can be unowned, such as when a thread
 2190                          * has just entered do_lock_pi(), allocated the
 2191                          * umtx_pi, and unlocked the umtxq.
 2192                          * If the current thread owns it, it must disown it.
 2193                          */
 2194                         mtx_lock(&umtx_lock);
 2195                         if (pi->pi_owner == td)
 2196                                 umtx_pi_disown(pi);
 2197                         mtx_unlock(&umtx_lock);
 2198                 }
 2199         }
 2200         return (0);
 2201 }
 2202 
 2203 /*
 2204  * Lock a PI mutex.
 2205  */
 2206 static int
 2207 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
 2208     struct _umtx_time *timeout, int try)
 2209 {
 2210         struct umtx_abs_timeout timo;
 2211         struct umtx_q *uq;
 2212         struct umtx_pi *pi, *new_pi;
 2213         uint32_t id, old_owner, owner, old;
 2214         int error, rv;
 2215 
 2216         id = td->td_tid;
 2217         uq = td->td_umtxq;
 2218 
 2219         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2220             TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
 2221             &uq->uq_key)) != 0)
 2222                 return (error);
 2223 
 2224         if (timeout != NULL)
 2225                 umtx_abs_timeout_init2(&timo, timeout);
 2226 
 2227         umtxq_lock(&uq->uq_key);
 2228         pi = umtx_pi_lookup(&uq->uq_key);
 2229         if (pi == NULL) {
 2230                 new_pi = umtx_pi_alloc(M_NOWAIT);
 2231                 if (new_pi == NULL) {
 2232                         umtxq_unlock(&uq->uq_key);
 2233                         new_pi = umtx_pi_alloc(M_WAITOK);
 2234                         umtxq_lock(&uq->uq_key);
 2235                         pi = umtx_pi_lookup(&uq->uq_key);
 2236                         if (pi != NULL) {
 2237                                 umtx_pi_free(new_pi);
 2238                                 new_pi = NULL;
 2239                         }
 2240                 }
 2241                 if (new_pi != NULL) {
 2242                         new_pi->pi_key = uq->uq_key;
 2243                         umtx_pi_insert(new_pi);
 2244                         pi = new_pi;
 2245                 }
 2246         }
 2247         umtx_pi_ref(pi);
 2248         umtxq_unlock(&uq->uq_key);
 2249 
 2250         /*
 2251          * Care must be exercised when dealing with umtx structure.  It
 2252          * can fault on any access.
 2253          */
 2254         for (;;) {
 2255                 /*
 2256                  * Try the uncontested case.  This should be done in userland.
 2257                  */
 2258                 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id);
 2259                 /* The address was invalid. */
 2260                 if (rv == -1) {
 2261                         error = EFAULT;
 2262                         break;
 2263                 }
 2264                 /* The acquire succeeded. */
 2265                 if (rv == 0) {
 2266                         MPASS(owner == UMUTEX_UNOWNED);
 2267                         error = 0;
 2268                         break;
 2269                 }
 2270 
 2271                 if (owner == UMUTEX_RB_NOTRECOV) {
 2272                         error = ENOTRECOVERABLE;
 2273                         break;
 2274                 }
 2275 
 2276                 /*
 2277                  * Nobody owns it, but the acquire failed. This can happen
 2278                  * with ll/sc atomics.
 2279                  */
 2280                 if (owner == UMUTEX_UNOWNED) {
 2281                         error = thread_check_susp(td, true);
 2282                         if (error != 0)
 2283                                 break;
 2284                         continue;
 2285                 }
 2286 
 2287                 /*
 2288                  * Avoid overwriting a possible error from sleep due
 2289                  * to the pending signal with suspension check result.
 2290                  */
 2291                 if (error == 0) {
 2292                         error = thread_check_susp(td, true);
 2293                         if (error != 0)
 2294                                 break;
 2295                 }
 2296 
 2297                 /* If no one owns it but it is contested try to acquire it. */
 2298                 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) {
 2299                         old_owner = owner;
 2300                         rv = casueword32(&m->m_owner, owner, &owner,
 2301                             id | UMUTEX_CONTESTED);
 2302                         /* The address was invalid. */
 2303                         if (rv == -1) {
 2304                                 error = EFAULT;
 2305                                 break;
 2306                         }
 2307                         if (rv == 1) {
 2308                                 if (error == 0) {
 2309                                         error = thread_check_susp(td, true);
 2310                                         if (error != 0)
 2311                                                 break;
 2312                                 }
 2313 
 2314                                 /*
 2315                                  * If this failed the lock could
 2316                                  * changed, restart.
 2317                                  */
 2318                                 continue;
 2319                         }
 2320 
 2321                         MPASS(rv == 0);
 2322                         MPASS(owner == old_owner);
 2323                         umtxq_lock(&uq->uq_key);
 2324                         umtxq_busy(&uq->uq_key);
 2325                         error = umtx_pi_claim(pi, td);
 2326                         umtxq_unbusy(&uq->uq_key);
 2327                         umtxq_unlock(&uq->uq_key);
 2328                         if (error != 0) {
 2329                                 /*
 2330                                  * Since we're going to return an
 2331                                  * error, restore the m_owner to its
 2332                                  * previous, unowned state to avoid
 2333                                  * compounding the problem.
 2334                                  */
 2335                                 (void)casuword32(&m->m_owner,
 2336                                     id | UMUTEX_CONTESTED, old_owner);
 2337                         }
 2338                         if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD)
 2339                                 error = EOWNERDEAD;
 2340                         break;
 2341                 }
 2342 
 2343                 if ((owner & ~UMUTEX_CONTESTED) == id) {
 2344                         error = EDEADLK;
 2345                         break;
 2346                 }
 2347 
 2348                 if (try != 0) {
 2349                         error = EBUSY;
 2350                         break;
 2351                 }
 2352 
 2353                 /*
 2354                  * If we caught a signal, we have retried and now
 2355                  * exit immediately.
 2356                  */
 2357                 if (error != 0)
 2358                         break;
 2359 
 2360                 umtxq_lock(&uq->uq_key);
 2361                 umtxq_busy(&uq->uq_key);
 2362                 umtxq_unlock(&uq->uq_key);
 2363 
 2364                 /*
 2365                  * Set the contested bit so that a release in user space
 2366                  * knows to use the system call for unlock.  If this fails
 2367                  * either some one else has acquired the lock or it has been
 2368                  * released.
 2369                  */
 2370                 rv = casueword32(&m->m_owner, owner, &old, owner |
 2371                     UMUTEX_CONTESTED);
 2372 
 2373                 /* The address was invalid. */
 2374                 if (rv == -1) {
 2375                         umtxq_unbusy_unlocked(&uq->uq_key);
 2376                         error = EFAULT;
 2377                         break;
 2378                 }
 2379                 if (rv == 1) {
 2380                         umtxq_unbusy_unlocked(&uq->uq_key);
 2381                         error = thread_check_susp(td, true);
 2382                         if (error != 0)
 2383                                 break;
 2384 
 2385                         /*
 2386                          * The lock changed and we need to retry or we
 2387                          * lost a race to the thread unlocking the
 2388                          * umtx.  Note that the UMUTEX_RB_OWNERDEAD
 2389                          * value for owner is impossible there.
 2390                          */
 2391                         continue;
 2392                 }
 2393 
 2394                 umtxq_lock(&uq->uq_key);
 2395 
 2396                 /* We set the contested bit, sleep. */
 2397                 MPASS(old == owner);
 2398                 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
 2399                     "umtxpi", timeout == NULL ? NULL : &timo,
 2400                     (flags & USYNC_PROCESS_SHARED) != 0);
 2401                 if (error != 0)
 2402                         continue;
 2403 
 2404                 error = thread_check_susp(td, false);
 2405                 if (error != 0)
 2406                         break;
 2407         }
 2408 
 2409         umtxq_lock(&uq->uq_key);
 2410         umtx_pi_unref(pi);
 2411         umtxq_unlock(&uq->uq_key);
 2412 
 2413         umtx_key_release(&uq->uq_key);
 2414         return (error);
 2415 }
 2416 
 2417 /*
 2418  * Unlock a PI mutex.
 2419  */
 2420 static int
 2421 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
 2422 {
 2423         struct umtx_key key;
 2424         uint32_t id, new_owner, old, owner;
 2425         int count, error;
 2426 
 2427         id = td->td_tid;
 2428 
 2429 usrloop:
 2430         /*
 2431          * Make sure we own this mtx.
 2432          */
 2433         error = fueword32(&m->m_owner, &owner);
 2434         if (error == -1)
 2435                 return (EFAULT);
 2436 
 2437         if ((owner & ~UMUTEX_CONTESTED) != id)
 2438                 return (EPERM);
 2439 
 2440         new_owner = umtx_unlock_val(flags, rb);
 2441 
 2442         /* This should be done in userland */
 2443         if ((owner & UMUTEX_CONTESTED) == 0) {
 2444                 error = casueword32(&m->m_owner, owner, &old, new_owner);
 2445                 if (error == -1)
 2446                         return (EFAULT);
 2447                 if (error == 1) {
 2448                         error = thread_check_susp(td, true);
 2449                         if (error != 0)
 2450                                 return (error);
 2451                         goto usrloop;
 2452                 }
 2453                 if (old == owner)
 2454                         return (0);
 2455                 owner = old;
 2456         }
 2457 
 2458         /* We should only ever be in here for contested locks */
 2459         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2460             TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
 2461             &key)) != 0)
 2462                 return (error);
 2463 
 2464         umtxq_lock(&key);
 2465         umtxq_busy(&key);
 2466         error = umtx_pi_drop(td, &key, rb, &count);
 2467         if (error != 0) {
 2468                 umtxq_unbusy(&key);
 2469                 umtxq_unlock(&key);
 2470                 umtx_key_release(&key);
 2471                 /* userland messed the mutex */
 2472                 return (error);
 2473         }
 2474         umtxq_unlock(&key);
 2475 
 2476         /*
 2477          * When unlocking the umtx, it must be marked as unowned if
 2478          * there is zero or one thread only waiting for it.
 2479          * Otherwise, it must be marked as contested.
 2480          */
 2481 
 2482         if (count > 1)
 2483                 new_owner |= UMUTEX_CONTESTED;
 2484 again:
 2485         error = casueword32(&m->m_owner, owner, &old, new_owner);
 2486         if (error == 1) {
 2487                 error = thread_check_susp(td, false);
 2488                 if (error == 0)
 2489                         goto again;
 2490         }
 2491         umtxq_unbusy_unlocked(&key);
 2492         umtx_key_release(&key);
 2493         if (error == -1)
 2494                 return (EFAULT);
 2495         if (error == 0 && old != owner)
 2496                 return (EINVAL);
 2497         return (error);
 2498 }
 2499 
 2500 /*
 2501  * Lock a PP mutex.
 2502  */
 2503 static int
 2504 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
 2505     struct _umtx_time *timeout, int try)
 2506 {
 2507         struct umtx_abs_timeout timo;
 2508         struct umtx_q *uq, *uq2;
 2509         struct umtx_pi *pi;
 2510         uint32_t ceiling;
 2511         uint32_t owner, id;
 2512         int error, pri, old_inherited_pri, su, rv;
 2513 
 2514         id = td->td_tid;
 2515         uq = td->td_umtxq;
 2516         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2517             TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
 2518             &uq->uq_key)) != 0)
 2519                 return (error);
 2520 
 2521         if (timeout != NULL)
 2522                 umtx_abs_timeout_init2(&timo, timeout);
 2523 
 2524         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2525         for (;;) {
 2526                 old_inherited_pri = uq->uq_inherited_pri;
 2527                 umtxq_lock(&uq->uq_key);
 2528                 umtxq_busy(&uq->uq_key);
 2529                 umtxq_unlock(&uq->uq_key);
 2530 
 2531                 rv = fueword32(&m->m_ceilings[0], &ceiling);
 2532                 if (rv == -1) {
 2533                         error = EFAULT;
 2534                         goto out;
 2535                 }
 2536                 ceiling = RTP_PRIO_MAX - ceiling;
 2537                 if (ceiling > RTP_PRIO_MAX) {
 2538                         error = EINVAL;
 2539                         goto out;
 2540                 }
 2541 
 2542                 mtx_lock(&umtx_lock);
 2543                 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
 2544                         mtx_unlock(&umtx_lock);
 2545                         error = EINVAL;
 2546                         goto out;
 2547                 }
 2548                 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
 2549                         uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
 2550                         thread_lock(td);
 2551                         if (uq->uq_inherited_pri < UPRI(td))
 2552                                 sched_lend_user_prio(td, uq->uq_inherited_pri);
 2553                         thread_unlock(td);
 2554                 }
 2555                 mtx_unlock(&umtx_lock);
 2556 
 2557                 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
 2558                     id | UMUTEX_CONTESTED);
 2559                 /* The address was invalid. */
 2560                 if (rv == -1) {
 2561                         error = EFAULT;
 2562                         break;
 2563                 }
 2564                 if (rv == 0) {
 2565                         MPASS(owner == UMUTEX_CONTESTED);
 2566                         error = 0;
 2567                         break;
 2568                 }
 2569                 /* rv == 1 */
 2570                 if (owner == UMUTEX_RB_OWNERDEAD) {
 2571                         rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD,
 2572                             &owner, id | UMUTEX_CONTESTED);
 2573                         if (rv == -1) {
 2574                                 error = EFAULT;
 2575                                 break;
 2576                         }
 2577                         if (rv == 0) {
 2578                                 MPASS(owner == UMUTEX_RB_OWNERDEAD);
 2579                                 error = EOWNERDEAD; /* success */
 2580                                 break;
 2581                         }
 2582 
 2583                         /*
 2584                          *  rv == 1, only check for suspension if we
 2585                          *  did not already catched a signal.  If we
 2586                          *  get an error from the check, the same
 2587                          *  condition is checked by the umtxq_sleep()
 2588                          *  call below, so we should obliterate the
 2589                          *  error to not skip the last loop iteration.
 2590                          */
 2591                         if (error == 0) {
 2592                                 error = thread_check_susp(td, false);
 2593                                 if (error == 0) {
 2594                                         if (try != 0)
 2595                                                 error = EBUSY;
 2596                                         else
 2597                                                 continue;
 2598                                 }
 2599                                 error = 0;
 2600                         }
 2601                 } else if (owner == UMUTEX_RB_NOTRECOV) {
 2602                         error = ENOTRECOVERABLE;
 2603                 }
 2604 
 2605                 if (try != 0)
 2606                         error = EBUSY;
 2607 
 2608                 /*
 2609                  * If we caught a signal, we have retried and now
 2610                  * exit immediately.
 2611                  */
 2612                 if (error != 0)
 2613                         break;
 2614 
 2615                 umtxq_lock(&uq->uq_key);
 2616                 umtxq_insert(uq);
 2617                 umtxq_unbusy(&uq->uq_key);
 2618                 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
 2619                     NULL : &timo);
 2620                 umtxq_remove(uq);
 2621                 umtxq_unlock(&uq->uq_key);
 2622 
 2623                 mtx_lock(&umtx_lock);
 2624                 uq->uq_inherited_pri = old_inherited_pri;
 2625                 pri = PRI_MAX;
 2626                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2627                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2628                         if (uq2 != NULL) {
 2629                                 if (pri > UPRI(uq2->uq_thread))
 2630                                         pri = UPRI(uq2->uq_thread);
 2631                         }
 2632                 }
 2633                 if (pri > uq->uq_inherited_pri)
 2634                         pri = uq->uq_inherited_pri;
 2635                 thread_lock(td);
 2636                 sched_lend_user_prio(td, pri);
 2637                 thread_unlock(td);
 2638                 mtx_unlock(&umtx_lock);
 2639         }
 2640 
 2641         if (error != 0 && error != EOWNERDEAD) {
 2642                 mtx_lock(&umtx_lock);
 2643                 uq->uq_inherited_pri = old_inherited_pri;
 2644                 pri = PRI_MAX;
 2645                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2646                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2647                         if (uq2 != NULL) {
 2648                                 if (pri > UPRI(uq2->uq_thread))
 2649                                         pri = UPRI(uq2->uq_thread);
 2650                         }
 2651                 }
 2652                 if (pri > uq->uq_inherited_pri)
 2653                         pri = uq->uq_inherited_pri;
 2654                 thread_lock(td);
 2655                 sched_lend_user_prio(td, pri);
 2656                 thread_unlock(td);
 2657                 mtx_unlock(&umtx_lock);
 2658         }
 2659 
 2660 out:
 2661         umtxq_unbusy_unlocked(&uq->uq_key);
 2662         umtx_key_release(&uq->uq_key);
 2663         return (error);
 2664 }
 2665 
 2666 /*
 2667  * Unlock a PP mutex.
 2668  */
 2669 static int
 2670 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
 2671 {
 2672         struct umtx_key key;
 2673         struct umtx_q *uq, *uq2;
 2674         struct umtx_pi *pi;
 2675         uint32_t id, owner, rceiling;
 2676         int error, pri, new_inherited_pri, su;
 2677 
 2678         id = td->td_tid;
 2679         uq = td->td_umtxq;
 2680         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2681 
 2682         /*
 2683          * Make sure we own this mtx.
 2684          */
 2685         error = fueword32(&m->m_owner, &owner);
 2686         if (error == -1)
 2687                 return (EFAULT);
 2688 
 2689         if ((owner & ~UMUTEX_CONTESTED) != id)
 2690                 return (EPERM);
 2691 
 2692         error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
 2693         if (error != 0)
 2694                 return (error);
 2695 
 2696         if (rceiling == -1)
 2697                 new_inherited_pri = PRI_MAX;
 2698         else {
 2699                 rceiling = RTP_PRIO_MAX - rceiling;
 2700                 if (rceiling > RTP_PRIO_MAX)
 2701                         return (EINVAL);
 2702                 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
 2703         }
 2704 
 2705         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2706             TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
 2707             &key)) != 0)
 2708                 return (error);
 2709         umtxq_lock(&key);
 2710         umtxq_busy(&key);
 2711         umtxq_unlock(&key);
 2712         /*
 2713          * For priority protected mutex, always set unlocked state
 2714          * to UMUTEX_CONTESTED, so that userland always enters kernel
 2715          * to lock the mutex, it is necessary because thread priority
 2716          * has to be adjusted for such mutex.
 2717          */
 2718         error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) |
 2719             UMUTEX_CONTESTED);
 2720 
 2721         umtxq_lock(&key);
 2722         if (error == 0)
 2723                 umtxq_signal(&key, 1);
 2724         umtxq_unbusy(&key);
 2725         umtxq_unlock(&key);
 2726 
 2727         if (error == -1)
 2728                 error = EFAULT;
 2729         else {
 2730                 mtx_lock(&umtx_lock);
 2731                 if (su != 0)
 2732                         uq->uq_inherited_pri = new_inherited_pri;
 2733                 pri = PRI_MAX;
 2734                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2735                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2736                         if (uq2 != NULL) {
 2737                                 if (pri > UPRI(uq2->uq_thread))
 2738                                         pri = UPRI(uq2->uq_thread);
 2739                         }
 2740                 }
 2741                 if (pri > uq->uq_inherited_pri)
 2742                         pri = uq->uq_inherited_pri;
 2743                 thread_lock(td);
 2744                 sched_lend_user_prio(td, pri);
 2745                 thread_unlock(td);
 2746                 mtx_unlock(&umtx_lock);
 2747         }
 2748         umtx_key_release(&key);
 2749         return (error);
 2750 }
 2751 
 2752 static int
 2753 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
 2754     uint32_t *old_ceiling)
 2755 {
 2756         struct umtx_q *uq;
 2757         uint32_t flags, id, owner, save_ceiling;
 2758         int error, rv, rv1;
 2759 
 2760         error = fueword32(&m->m_flags, &flags);
 2761         if (error == -1)
 2762                 return (EFAULT);
 2763         if ((flags & UMUTEX_PRIO_PROTECT) == 0)
 2764                 return (EINVAL);
 2765         if (ceiling > RTP_PRIO_MAX)
 2766                 return (EINVAL);
 2767         id = td->td_tid;
 2768         uq = td->td_umtxq;
 2769         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2770             TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
 2771             &uq->uq_key)) != 0)
 2772                 return (error);
 2773         for (;;) {
 2774                 umtxq_lock(&uq->uq_key);
 2775                 umtxq_busy(&uq->uq_key);
 2776                 umtxq_unlock(&uq->uq_key);
 2777 
 2778                 rv = fueword32(&m->m_ceilings[0], &save_ceiling);
 2779                 if (rv == -1) {
 2780                         error = EFAULT;
 2781                         break;
 2782                 }
 2783 
 2784                 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
 2785                     id | UMUTEX_CONTESTED);
 2786                 if (rv == -1) {
 2787                         error = EFAULT;
 2788                         break;
 2789                 }
 2790 
 2791                 if (rv == 0) {
 2792                         MPASS(owner == UMUTEX_CONTESTED);
 2793                         rv = suword32(&m->m_ceilings[0], ceiling);
 2794                         rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED);
 2795                         error = (rv == 0 && rv1 == 0) ? 0: EFAULT;
 2796                         break;
 2797                 }
 2798 
 2799                 if ((owner & ~UMUTEX_CONTESTED) == id) {
 2800                         rv = suword32(&m->m_ceilings[0], ceiling);
 2801                         error = rv == 0 ? 0 : EFAULT;
 2802                         break;
 2803                 }
 2804 
 2805                 if (owner == UMUTEX_RB_OWNERDEAD) {
 2806                         error = EOWNERDEAD;
 2807                         break;
 2808                 } else if (owner == UMUTEX_RB_NOTRECOV) {
 2809                         error = ENOTRECOVERABLE;
 2810                         break;
 2811                 }
 2812 
 2813                 /*
 2814                  * If we caught a signal, we have retried and now
 2815                  * exit immediately.
 2816                  */
 2817                 if (error != 0)
 2818                         break;
 2819 
 2820                 /*
 2821                  * We set the contested bit, sleep. Otherwise the lock changed
 2822                  * and we need to retry or we lost a race to the thread
 2823                  * unlocking the umtx.
 2824                  */
 2825                 umtxq_lock(&uq->uq_key);
 2826                 umtxq_insert(uq);
 2827                 umtxq_unbusy(&uq->uq_key);
 2828                 error = umtxq_sleep(uq, "umtxpp", NULL);
 2829                 umtxq_remove(uq);
 2830                 umtxq_unlock(&uq->uq_key);
 2831         }
 2832         umtxq_lock(&uq->uq_key);
 2833         if (error == 0)
 2834                 umtxq_signal(&uq->uq_key, INT_MAX);
 2835         umtxq_unbusy(&uq->uq_key);
 2836         umtxq_unlock(&uq->uq_key);
 2837         umtx_key_release(&uq->uq_key);
 2838         if (error == 0 && old_ceiling != NULL) {
 2839                 rv = suword32(old_ceiling, save_ceiling);
 2840                 error = rv == 0 ? 0 : EFAULT;
 2841         }
 2842         return (error);
 2843 }
 2844 
 2845 /*
 2846  * Lock a userland POSIX mutex.
 2847  */
 2848 static int
 2849 do_lock_umutex(struct thread *td, struct umutex *m,
 2850     struct _umtx_time *timeout, int mode)
 2851 {
 2852         uint32_t flags;
 2853         int error;
 2854 
 2855         error = fueword32(&m->m_flags, &flags);
 2856         if (error == -1)
 2857                 return (EFAULT);
 2858 
 2859         switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2860         case 0:
 2861                 error = do_lock_normal(td, m, flags, timeout, mode);
 2862                 break;
 2863         case UMUTEX_PRIO_INHERIT:
 2864                 error = do_lock_pi(td, m, flags, timeout, mode);
 2865                 break;
 2866         case UMUTEX_PRIO_PROTECT:
 2867                 error = do_lock_pp(td, m, flags, timeout, mode);
 2868                 break;
 2869         default:
 2870                 return (EINVAL);
 2871         }
 2872         if (timeout == NULL) {
 2873                 if (error == EINTR && mode != _UMUTEX_WAIT)
 2874                         error = ERESTART;
 2875         } else {
 2876                 /* Timed-locking is not restarted. */
 2877                 if (error == ERESTART)
 2878                         error = EINTR;
 2879         }
 2880         return (error);
 2881 }
 2882 
 2883 /*
 2884  * Unlock a userland POSIX mutex.
 2885  */
 2886 static int
 2887 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb)
 2888 {
 2889         uint32_t flags;
 2890         int error;
 2891 
 2892         error = fueword32(&m->m_flags, &flags);
 2893         if (error == -1)
 2894                 return (EFAULT);
 2895 
 2896         switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2897         case 0:
 2898                 return (do_unlock_normal(td, m, flags, rb));
 2899         case UMUTEX_PRIO_INHERIT:
 2900                 return (do_unlock_pi(td, m, flags, rb));
 2901         case UMUTEX_PRIO_PROTECT:
 2902                 return (do_unlock_pp(td, m, flags, rb));
 2903         }
 2904 
 2905         return (EINVAL);
 2906 }
 2907 
 2908 static int
 2909 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
 2910     struct timespec *timeout, u_long wflags)
 2911 {
 2912         struct umtx_abs_timeout timo;
 2913         struct umtx_q *uq;
 2914         uint32_t flags, clockid, hasw;
 2915         int error;
 2916 
 2917         uq = td->td_umtxq;
 2918         error = fueword32(&cv->c_flags, &flags);
 2919         if (error == -1)
 2920                 return (EFAULT);
 2921         error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
 2922         if (error != 0)
 2923                 return (error);
 2924 
 2925         if ((wflags & CVWAIT_CLOCKID) != 0) {
 2926                 error = fueword32(&cv->c_clockid, &clockid);
 2927                 if (error == -1) {
 2928                         umtx_key_release(&uq->uq_key);
 2929                         return (EFAULT);
 2930                 }
 2931                 if (clockid < CLOCK_REALTIME ||
 2932                     clockid >= CLOCK_THREAD_CPUTIME_ID) {
 2933                         /* hmm, only HW clock id will work. */
 2934                         umtx_key_release(&uq->uq_key);
 2935                         return (EINVAL);
 2936                 }
 2937         } else {
 2938                 clockid = CLOCK_REALTIME;
 2939         }
 2940 
 2941         umtxq_lock(&uq->uq_key);
 2942         umtxq_busy(&uq->uq_key);
 2943         umtxq_insert(uq);
 2944         umtxq_unlock(&uq->uq_key);
 2945 
 2946         /*
 2947          * Set c_has_waiters to 1 before releasing user mutex, also
 2948          * don't modify cache line when unnecessary.
 2949          */
 2950         error = fueword32(&cv->c_has_waiters, &hasw);
 2951         if (error == 0 && hasw == 0)
 2952                 suword32(&cv->c_has_waiters, 1);
 2953 
 2954         umtxq_unbusy_unlocked(&uq->uq_key);
 2955 
 2956         error = do_unlock_umutex(td, m, false);
 2957 
 2958         if (timeout != NULL)
 2959                 umtx_abs_timeout_init(&timo, clockid,
 2960                     (wflags & CVWAIT_ABSTIME) != 0, timeout);
 2961 
 2962         umtxq_lock(&uq->uq_key);
 2963         if (error == 0) {
 2964                 error = umtxq_sleep(uq, "ucond", timeout == NULL ?
 2965                     NULL : &timo);
 2966         }
 2967 
 2968         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 2969                 error = 0;
 2970         else {
 2971                 /*
 2972                  * This must be timeout,interrupted by signal or
 2973                  * surprious wakeup, clear c_has_waiter flag when
 2974                  * necessary.
 2975                  */
 2976                 umtxq_busy(&uq->uq_key);
 2977                 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
 2978                         int oldlen = uq->uq_cur_queue->length;
 2979                         umtxq_remove(uq);
 2980                         if (oldlen == 1) {
 2981                                 umtxq_unlock(&uq->uq_key);
 2982                                 suword32(&cv->c_has_waiters, 0);
 2983                                 umtxq_lock(&uq->uq_key);
 2984                         }
 2985                 }
 2986                 umtxq_unbusy(&uq->uq_key);
 2987                 if (error == ERESTART)
 2988                         error = EINTR;
 2989         }
 2990 
 2991         umtxq_unlock(&uq->uq_key);
 2992         umtx_key_release(&uq->uq_key);
 2993         return (error);
 2994 }
 2995 
 2996 /*
 2997  * Signal a userland condition variable.
 2998  */
 2999 static int
 3000 do_cv_signal(struct thread *td, struct ucond *cv)
 3001 {
 3002         struct umtx_key key;
 3003         int error, cnt, nwake;
 3004         uint32_t flags;
 3005 
 3006         error = fueword32(&cv->c_flags, &flags);
 3007         if (error == -1)
 3008                 return (EFAULT);
 3009         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 3010                 return (error);
 3011         umtxq_lock(&key);
 3012         umtxq_busy(&key);
 3013         cnt = umtxq_count(&key);
 3014         nwake = umtxq_signal(&key, 1);
 3015         if (cnt <= nwake) {
 3016                 umtxq_unlock(&key);
 3017                 error = suword32(&cv->c_has_waiters, 0);
 3018                 if (error == -1)
 3019                         error = EFAULT;
 3020                 umtxq_lock(&key);
 3021         }
 3022         umtxq_unbusy(&key);
 3023         umtxq_unlock(&key);
 3024         umtx_key_release(&key);
 3025         return (error);
 3026 }
 3027 
 3028 static int
 3029 do_cv_broadcast(struct thread *td, struct ucond *cv)
 3030 {
 3031         struct umtx_key key;
 3032         int error;
 3033         uint32_t flags;
 3034 
 3035         error = fueword32(&cv->c_flags, &flags);
 3036         if (error == -1)
 3037                 return (EFAULT);
 3038         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 3039                 return (error);
 3040 
 3041         umtxq_lock(&key);
 3042         umtxq_busy(&key);
 3043         umtxq_signal(&key, INT_MAX);
 3044         umtxq_unlock(&key);
 3045 
 3046         error = suword32(&cv->c_has_waiters, 0);
 3047         if (error == -1)
 3048                 error = EFAULT;
 3049 
 3050         umtxq_unbusy_unlocked(&key);
 3051 
 3052         umtx_key_release(&key);
 3053         return (error);
 3054 }
 3055 
 3056 static int
 3057 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag,
 3058     struct _umtx_time *timeout)
 3059 {
 3060         struct umtx_abs_timeout timo;
 3061         struct umtx_q *uq;
 3062         uint32_t flags, wrflags;
 3063         int32_t state, oldstate;
 3064         int32_t blocked_readers;
 3065         int error, error1, rv;
 3066 
 3067         uq = td->td_umtxq;
 3068         error = fueword32(&rwlock->rw_flags, &flags);
 3069         if (error == -1)
 3070                 return (EFAULT);
 3071         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 3072         if (error != 0)
 3073                 return (error);
 3074 
 3075         if (timeout != NULL)
 3076                 umtx_abs_timeout_init2(&timo, timeout);
 3077 
 3078         wrflags = URWLOCK_WRITE_OWNER;
 3079         if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
 3080                 wrflags |= URWLOCK_WRITE_WAITERS;
 3081 
 3082         for (;;) {
 3083                 rv = fueword32(&rwlock->rw_state, &state);
 3084                 if (rv == -1) {
 3085                         umtx_key_release(&uq->uq_key);
 3086                         return (EFAULT);
 3087                 }
 3088 
 3089                 /* try to lock it */
 3090                 while (!(state & wrflags)) {
 3091                         if (__predict_false(URWLOCK_READER_COUNT(state) ==
 3092                             URWLOCK_MAX_READERS)) {
 3093                                 umtx_key_release(&uq->uq_key);
 3094                                 return (EAGAIN);
 3095                         }
 3096                         rv = casueword32(&rwlock->rw_state, state,
 3097                             &oldstate, state + 1);
 3098                         if (rv == -1) {
 3099                                 umtx_key_release(&uq->uq_key);
 3100                                 return (EFAULT);
 3101                         }
 3102                         if (rv == 0) {
 3103                                 MPASS(oldstate == state);
 3104                                 umtx_key_release(&uq->uq_key);
 3105                                 return (0);
 3106                         }
 3107                         error = thread_check_susp(td, true);
 3108                         if (error != 0)
 3109                                 break;
 3110                         state = oldstate;
 3111                 }
 3112 
 3113                 if (error)
 3114                         break;
 3115 
 3116                 /* grab monitor lock */
 3117                 umtxq_lock(&uq->uq_key);
 3118                 umtxq_busy(&uq->uq_key);
 3119                 umtxq_unlock(&uq->uq_key);
 3120 
 3121                 /*
 3122                  * re-read the state, in case it changed between the try-lock above
 3123                  * and the check below
 3124                  */
 3125                 rv = fueword32(&rwlock->rw_state, &state);
 3126                 if (rv == -1)
 3127                         error = EFAULT;
 3128 
 3129                 /* set read contention bit */
 3130                 while (error == 0 && (state & wrflags) &&
 3131                     !(state & URWLOCK_READ_WAITERS)) {
 3132                         rv = casueword32(&rwlock->rw_state, state,
 3133                             &oldstate, state | URWLOCK_READ_WAITERS);
 3134                         if (rv == -1) {
 3135                                 error = EFAULT;
 3136                                 break;
 3137                         }
 3138                         if (rv == 0) {
 3139                                 MPASS(oldstate == state);
 3140                                 goto sleep;
 3141                         }
 3142                         state = oldstate;
 3143                         error = thread_check_susp(td, false);
 3144                         if (error != 0)
 3145                                 break;
 3146                 }
 3147                 if (error != 0) {
 3148                         umtxq_unbusy_unlocked(&uq->uq_key);
 3149                         break;
 3150                 }
 3151 
 3152                 /* state is changed while setting flags, restart */
 3153                 if (!(state & wrflags)) {
 3154                         umtxq_unbusy_unlocked(&uq->uq_key);
 3155                         error = thread_check_susp(td, true);
 3156                         if (error != 0)
 3157                                 break;
 3158                         continue;
 3159                 }
 3160 
 3161 sleep:
 3162                 /*
 3163                  * Contention bit is set, before sleeping, increase
 3164                  * read waiter count.
 3165                  */
 3166                 rv = fueword32(&rwlock->rw_blocked_readers,
 3167                     &blocked_readers);
 3168                 if (rv == -1) {
 3169                         umtxq_unbusy_unlocked(&uq->uq_key);
 3170                         error = EFAULT;
 3171                         break;
 3172                 }
 3173                 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
 3174 
 3175                 while (state & wrflags) {
 3176                         umtxq_lock(&uq->uq_key);
 3177                         umtxq_insert(uq);
 3178                         umtxq_unbusy(&uq->uq_key);
 3179 
 3180                         error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
 3181                             NULL : &timo);
 3182 
 3183                         umtxq_busy(&uq->uq_key);
 3184                         umtxq_remove(uq);
 3185                         umtxq_unlock(&uq->uq_key);
 3186                         if (error)
 3187                                 break;
 3188                         rv = fueword32(&rwlock->rw_state, &state);
 3189                         if (rv == -1) {
 3190                                 error = EFAULT;
 3191                                 break;
 3192                         }
 3193                 }
 3194 
 3195                 /* decrease read waiter count, and may clear read contention bit */
 3196                 rv = fueword32(&rwlock->rw_blocked_readers,
 3197                     &blocked_readers);
 3198                 if (rv == -1) {
 3199                         umtxq_unbusy_unlocked(&uq->uq_key);
 3200                         error = EFAULT;
 3201                         break;
 3202                 }
 3203                 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
 3204                 if (blocked_readers == 1) {
 3205                         rv = fueword32(&rwlock->rw_state, &state);
 3206                         if (rv == -1) {
 3207                                 umtxq_unbusy_unlocked(&uq->uq_key);
 3208                                 error = EFAULT;
 3209                                 break;
 3210                         }
 3211                         for (;;) {
 3212                                 rv = casueword32(&rwlock->rw_state, state,
 3213                                     &oldstate, state & ~URWLOCK_READ_WAITERS);
 3214                                 if (rv == -1) {
 3215                                         error = EFAULT;
 3216                                         break;
 3217                                 }
 3218                                 if (rv == 0) {
 3219                                         MPASS(oldstate == state);
 3220                                         break;
 3221                                 }
 3222                                 state = oldstate;
 3223                                 error1 = thread_check_susp(td, false);
 3224                                 if (error1 != 0) {
 3225                                         if (error == 0)
 3226                                                 error = error1;
 3227                                         break;
 3228                                 }
 3229                         }
 3230                 }
 3231 
 3232                 umtxq_unbusy_unlocked(&uq->uq_key);
 3233                 if (error != 0)
 3234                         break;
 3235         }
 3236         umtx_key_release(&uq->uq_key);
 3237         if (error == ERESTART)
 3238                 error = EINTR;
 3239         return (error);
 3240 }
 3241 
 3242 static int
 3243 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
 3244 {
 3245         struct umtx_abs_timeout timo;
 3246         struct umtx_q *uq;
 3247         uint32_t flags;
 3248         int32_t state, oldstate;
 3249         int32_t blocked_writers;
 3250         int32_t blocked_readers;
 3251         int error, error1, rv;
 3252 
 3253         uq = td->td_umtxq;
 3254         error = fueword32(&rwlock->rw_flags, &flags);
 3255         if (error == -1)
 3256                 return (EFAULT);
 3257         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 3258         if (error != 0)
 3259                 return (error);
 3260 
 3261         if (timeout != NULL)
 3262                 umtx_abs_timeout_init2(&timo, timeout);
 3263 
 3264         blocked_readers = 0;
 3265         for (;;) {
 3266                 rv = fueword32(&rwlock->rw_state, &state);
 3267                 if (rv == -1) {
 3268                         umtx_key_release(&uq->uq_key);
 3269                         return (EFAULT);
 3270                 }
 3271                 while ((state & URWLOCK_WRITE_OWNER) == 0 &&
 3272                     URWLOCK_READER_COUNT(state) == 0) {
 3273                         rv = casueword32(&rwlock->rw_state, state,
 3274                             &oldstate, state | URWLOCK_WRITE_OWNER);
 3275                         if (rv == -1) {
 3276                                 umtx_key_release(&uq->uq_key);
 3277                                 return (EFAULT);
 3278                         }
 3279                         if (rv == 0) {
 3280                                 MPASS(oldstate == state);
 3281                                 umtx_key_release(&uq->uq_key);
 3282                                 return (0);
 3283                         }
 3284                         state = oldstate;
 3285                         error = thread_check_susp(td, true);
 3286                         if (error != 0)
 3287                                 break;
 3288                 }
 3289 
 3290                 if (error) {
 3291                         if ((state & (URWLOCK_WRITE_OWNER |
 3292                             URWLOCK_WRITE_WAITERS)) == 0 &&
 3293                             blocked_readers != 0) {
 3294                                 umtxq_lock(&uq->uq_key);
 3295                                 umtxq_busy(&uq->uq_key);
 3296                                 umtxq_signal_queue(&uq->uq_key, INT_MAX,
 3297                                     UMTX_SHARED_QUEUE);
 3298                                 umtxq_unbusy(&uq->uq_key);
 3299                                 umtxq_unlock(&uq->uq_key);
 3300                         }
 3301 
 3302                         break;
 3303                 }
 3304 
 3305                 /* grab monitor lock */
 3306                 umtxq_lock(&uq->uq_key);
 3307                 umtxq_busy(&uq->uq_key);
 3308                 umtxq_unlock(&uq->uq_key);
 3309 
 3310                 /*
 3311                  * Re-read the state, in case it changed between the
 3312                  * try-lock above and the check below.
 3313                  */
 3314                 rv = fueword32(&rwlock->rw_state, &state);
 3315                 if (rv == -1)
 3316                         error = EFAULT;
 3317 
 3318                 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) ||
 3319                     URWLOCK_READER_COUNT(state) != 0) &&
 3320                     (state & URWLOCK_WRITE_WAITERS) == 0) {
 3321                         rv = casueword32(&rwlock->rw_state, state,
 3322                             &oldstate, state | URWLOCK_WRITE_WAITERS);
 3323                         if (rv == -1) {
 3324                                 error = EFAULT;
 3325                                 break;
 3326                         }
 3327                         if (rv == 0) {
 3328                                 MPASS(oldstate == state);
 3329                                 goto sleep;
 3330                         }
 3331                         state = oldstate;
 3332                         error = thread_check_susp(td, false);
 3333                         if (error != 0)
 3334                                 break;
 3335                 }
 3336                 if (error != 0) {
 3337                         umtxq_unbusy_unlocked(&uq->uq_key);
 3338                         break;
 3339                 }
 3340 
 3341                 if ((state & URWLOCK_WRITE_OWNER) == 0 &&
 3342                     URWLOCK_READER_COUNT(state) == 0) {
 3343                         umtxq_unbusy_unlocked(&uq->uq_key);
 3344                         error = thread_check_susp(td, false);
 3345                         if (error != 0)
 3346                                 break;
 3347                         continue;
 3348                 }
 3349 sleep:
 3350                 rv = fueword32(&rwlock->rw_blocked_writers,
 3351                     &blocked_writers);
 3352                 if (rv == -1) {
 3353                         umtxq_unbusy_unlocked(&uq->uq_key);
 3354                         error = EFAULT;
 3355                         break;
 3356                 }
 3357                 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1);
 3358 
 3359                 while ((state & URWLOCK_WRITE_OWNER) ||
 3360                     URWLOCK_READER_COUNT(state) != 0) {
 3361                         umtxq_lock(&uq->uq_key);
 3362                         umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 3363                         umtxq_unbusy(&uq->uq_key);
 3364 
 3365                         error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
 3366                             NULL : &timo);
 3367 
 3368                         umtxq_busy(&uq->uq_key);
 3369                         umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 3370                         umtxq_unlock(&uq->uq_key);
 3371                         if (error)
 3372                                 break;
 3373                         rv = fueword32(&rwlock->rw_state, &state);
 3374                         if (rv == -1) {
 3375                                 error = EFAULT;
 3376                                 break;
 3377                         }
 3378                 }
 3379 
 3380                 rv = fueword32(&rwlock->rw_blocked_writers,
 3381                     &blocked_writers);
 3382                 if (rv == -1) {
 3383                         umtxq_unbusy_unlocked(&uq->uq_key);
 3384                         error = EFAULT;
 3385                         break;
 3386                 }
 3387                 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
 3388                 if (blocked_writers == 1) {
 3389                         rv = fueword32(&rwlock->rw_state, &state);
 3390                         if (rv == -1) {
 3391                                 umtxq_unbusy_unlocked(&uq->uq_key);
 3392                                 error = EFAULT;
 3393                                 break;
 3394                         }
 3395                         for (;;) {
 3396                                 rv = casueword32(&rwlock->rw_state, state,
 3397                                     &oldstate, state & ~URWLOCK_WRITE_WAITERS);
 3398                                 if (rv == -1) {
 3399                                         error = EFAULT;
 3400                                         break;
 3401                                 }
 3402                                 if (rv == 0) {
 3403                                         MPASS(oldstate == state);
 3404                                         break;
 3405                                 }
 3406                                 state = oldstate;
 3407                                 error1 = thread_check_susp(td, false);
 3408                                 /*
 3409                                  * We are leaving the URWLOCK_WRITE_WAITERS
 3410                                  * behind, but this should not harm the
 3411                                  * correctness.
 3412                                  */
 3413                                 if (error1 != 0) {
 3414                                         if (error == 0)
 3415                                                 error = error1;
 3416                                         break;
 3417                                 }
 3418                         }
 3419                         rv = fueword32(&rwlock->rw_blocked_readers,
 3420                             &blocked_readers);
 3421                         if (rv == -1) {
 3422                                 umtxq_unbusy_unlocked(&uq->uq_key);
 3423                                 error = EFAULT;
 3424                                 break;
 3425                         }
 3426                 } else
 3427                         blocked_readers = 0;
 3428 
 3429                 umtxq_unbusy_unlocked(&uq->uq_key);
 3430         }
 3431 
 3432         umtx_key_release(&uq->uq_key);
 3433         if (error == ERESTART)
 3434                 error = EINTR;
 3435         return (error);
 3436 }
 3437 
 3438 static int
 3439 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
 3440 {
 3441         struct umtx_q *uq;
 3442         uint32_t flags;
 3443         int32_t state, oldstate;
 3444         int error, rv, q, count;
 3445 
 3446         uq = td->td_umtxq;
 3447         error = fueword32(&rwlock->rw_flags, &flags);
 3448         if (error == -1)
 3449                 return (EFAULT);
 3450         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 3451         if (error != 0)
 3452                 return (error);
 3453 
 3454         error = fueword32(&rwlock->rw_state, &state);
 3455         if (error == -1) {
 3456                 error = EFAULT;
 3457                 goto out;
 3458         }
 3459         if (state & URWLOCK_WRITE_OWNER) {
 3460                 for (;;) {
 3461                         rv = casueword32(&rwlock->rw_state, state,
 3462                             &oldstate, state & ~URWLOCK_WRITE_OWNER);
 3463                         if (rv == -1) {
 3464                                 error = EFAULT;
 3465                                 goto out;
 3466                         }
 3467                         if (rv == 1) {
 3468                                 state = oldstate;
 3469                                 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
 3470                                         error = EPERM;
 3471                                         goto out;
 3472                                 }
 3473                                 error = thread_check_susp(td, true);
 3474                                 if (error != 0)
 3475                                         goto out;
 3476                         } else
 3477                                 break;
 3478                 }
 3479         } else if (URWLOCK_READER_COUNT(state) != 0) {
 3480                 for (;;) {
 3481                         rv = casueword32(&rwlock->rw_state, state,
 3482                             &oldstate, state - 1);
 3483                         if (rv == -1) {
 3484                                 error = EFAULT;
 3485                                 goto out;
 3486                         }
 3487                         if (rv == 1) {
 3488                                 state = oldstate;
 3489                                 if (URWLOCK_READER_COUNT(oldstate) == 0) {
 3490                                         error = EPERM;
 3491                                         goto out;
 3492                                 }
 3493                                 error = thread_check_susp(td, true);
 3494                                 if (error != 0)
 3495                                         goto out;
 3496                         } else
 3497                                 break;
 3498                 }
 3499         } else {
 3500                 error = EPERM;
 3501                 goto out;
 3502         }
 3503 
 3504         count = 0;
 3505 
 3506         if (!(flags & URWLOCK_PREFER_READER)) {
 3507                 if (state & URWLOCK_WRITE_WAITERS) {
 3508                         count = 1;
 3509                         q = UMTX_EXCLUSIVE_QUEUE;
 3510                 } else if (state & URWLOCK_READ_WAITERS) {
 3511                         count = INT_MAX;
 3512                         q = UMTX_SHARED_QUEUE;
 3513                 }
 3514         } else {
 3515                 if (state & URWLOCK_READ_WAITERS) {
 3516                         count = INT_MAX;
 3517                         q = UMTX_SHARED_QUEUE;
 3518                 } else if (state & URWLOCK_WRITE_WAITERS) {
 3519                         count = 1;
 3520                         q = UMTX_EXCLUSIVE_QUEUE;
 3521                 }
 3522         }
 3523 
 3524         if (count) {
 3525                 umtxq_lock(&uq->uq_key);
 3526                 umtxq_busy(&uq->uq_key);
 3527                 umtxq_signal_queue(&uq->uq_key, count, q);
 3528                 umtxq_unbusy(&uq->uq_key);
 3529                 umtxq_unlock(&uq->uq_key);
 3530         }
 3531 out:
 3532         umtx_key_release(&uq->uq_key);
 3533         return (error);
 3534 }
 3535 
 3536 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
 3537 static int
 3538 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
 3539 {
 3540         struct umtx_abs_timeout timo;
 3541         struct umtx_q *uq;
 3542         uint32_t flags, count, count1;
 3543         int error, rv, rv1;
 3544 
 3545         uq = td->td_umtxq;
 3546         error = fueword32(&sem->_flags, &flags);
 3547         if (error == -1)
 3548                 return (EFAULT);
 3549         error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
 3550         if (error != 0)
 3551                 return (error);
 3552 
 3553         if (timeout != NULL)
 3554                 umtx_abs_timeout_init2(&timo, timeout);
 3555 
 3556 again:
 3557         umtxq_lock(&uq->uq_key);
 3558         umtxq_busy(&uq->uq_key);
 3559         umtxq_insert(uq);
 3560         umtxq_unlock(&uq->uq_key);
 3561         rv = casueword32(&sem->_has_waiters, 0, &count1, 1);
 3562         if (rv != -1)
 3563                 rv1 = fueword32(&sem->_count, &count);
 3564         if (rv == -1 || rv1 == -1 || count != 0 || (rv == 1 && count1 == 0)) {
 3565                 if (rv == 0)
 3566                         suword32(&sem->_has_waiters, 0);
 3567                 umtxq_lock(&uq->uq_key);
 3568                 umtxq_unbusy(&uq->uq_key);
 3569                 umtxq_remove(uq);
 3570                 umtxq_unlock(&uq->uq_key);
 3571                 if (rv == -1 || rv1 == -1) {
 3572                         error = EFAULT;
 3573                         goto out;
 3574                 }
 3575                 if (count != 0) {
 3576                         error = 0;
 3577                         goto out;
 3578                 }
 3579                 MPASS(rv == 1 && count1 == 0);
 3580                 rv = thread_check_susp(td, true);
 3581                 if (rv == 0)
 3582                         goto again;
 3583                 error = rv;
 3584                 goto out;
 3585         }
 3586         umtxq_lock(&uq->uq_key);
 3587         umtxq_unbusy(&uq->uq_key);
 3588 
 3589         error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
 3590 
 3591         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 3592                 error = 0;
 3593         else {
 3594                 umtxq_remove(uq);
 3595                 /* A relative timeout cannot be restarted. */
 3596                 if (error == ERESTART && timeout != NULL &&
 3597                     (timeout->_flags & UMTX_ABSTIME) == 0)
 3598                         error = EINTR;
 3599         }
 3600         umtxq_unlock(&uq->uq_key);
 3601 out:
 3602         umtx_key_release(&uq->uq_key);
 3603         return (error);
 3604 }
 3605 
 3606 /*
 3607  * Signal a userland semaphore.
 3608  */
 3609 static int
 3610 do_sem_wake(struct thread *td, struct _usem *sem)
 3611 {
 3612         struct umtx_key key;
 3613         int error, cnt;
 3614         uint32_t flags;
 3615 
 3616         error = fueword32(&sem->_flags, &flags);
 3617         if (error == -1)
 3618                 return (EFAULT);
 3619         if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
 3620                 return (error);
 3621         umtxq_lock(&key);
 3622         umtxq_busy(&key);
 3623         cnt = umtxq_count(&key);
 3624         if (cnt > 0) {
 3625                 /*
 3626                  * Check if count is greater than 0, this means the memory is
 3627                  * still being referenced by user code, so we can safely
 3628                  * update _has_waiters flag.
 3629                  */
 3630                 if (cnt == 1) {
 3631                         umtxq_unlock(&key);
 3632                         error = suword32(&sem->_has_waiters, 0);
 3633                         umtxq_lock(&key);
 3634                         if (error == -1)
 3635                                 error = EFAULT;
 3636                 }
 3637                 umtxq_signal(&key, 1);
 3638         }
 3639         umtxq_unbusy(&key);
 3640         umtxq_unlock(&key);
 3641         umtx_key_release(&key);
 3642         return (error);
 3643 }
 3644 #endif
 3645 
 3646 static int
 3647 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout)
 3648 {
 3649         struct umtx_abs_timeout timo;
 3650         struct umtx_q *uq;
 3651         uint32_t count, flags;
 3652         int error, rv;
 3653 
 3654         uq = td->td_umtxq;
 3655         flags = fuword32(&sem->_flags);
 3656         if (timeout != NULL)
 3657                 umtx_abs_timeout_init2(&timo, timeout);
 3658 
 3659 again:
 3660         error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
 3661         if (error != 0)
 3662                 return (error);
 3663         umtxq_lock(&uq->uq_key);
 3664         umtxq_busy(&uq->uq_key);
 3665         umtxq_insert(uq);
 3666         umtxq_unlock(&uq->uq_key);
 3667         rv = fueword32(&sem->_count, &count);
 3668         if (rv == -1) {
 3669                 umtxq_lock(&uq->uq_key);
 3670                 umtxq_unbusy(&uq->uq_key);
 3671                 umtxq_remove(uq);
 3672                 umtxq_unlock(&uq->uq_key);
 3673                 umtx_key_release(&uq->uq_key);
 3674                 return (EFAULT);
 3675         }
 3676         for (;;) {
 3677                 if (USEM_COUNT(count) != 0) {
 3678                         umtxq_lock(&uq->uq_key);
 3679                         umtxq_unbusy(&uq->uq_key);
 3680                         umtxq_remove(uq);
 3681                         umtxq_unlock(&uq->uq_key);
 3682                         umtx_key_release(&uq->uq_key);
 3683                         return (0);
 3684                 }
 3685                 if (count == USEM_HAS_WAITERS)
 3686                         break;
 3687                 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS);
 3688                 if (rv == 0)
 3689                         break;
 3690                 umtxq_lock(&uq->uq_key);
 3691                 umtxq_unbusy(&uq->uq_key);
 3692                 umtxq_remove(uq);
 3693                 umtxq_unlock(&uq->uq_key);
 3694                 umtx_key_release(&uq->uq_key);
 3695                 if (rv == -1)
 3696                         return (EFAULT);
 3697                 rv = thread_check_susp(td, true);
 3698                 if (rv != 0)
 3699                         return (rv);
 3700                 goto again;
 3701         }
 3702         umtxq_lock(&uq->uq_key);
 3703         umtxq_unbusy(&uq->uq_key);
 3704 
 3705         error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
 3706 
 3707         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 3708                 error = 0;
 3709         else {
 3710                 umtxq_remove(uq);
 3711                 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) {
 3712                         /* A relative timeout cannot be restarted. */
 3713                         if (error == ERESTART)
 3714                                 error = EINTR;
 3715                         if (error == EINTR) {
 3716                                 kern_clock_gettime(curthread, timo.clockid,
 3717                                     &timo.cur);
 3718                                 timespecsub(&timo.end, &timo.cur,
 3719                                     &timeout->_timeout);
 3720                         }
 3721                 }
 3722         }
 3723         umtxq_unlock(&uq->uq_key);
 3724         umtx_key_release(&uq->uq_key);
 3725         return (error);
 3726 }
 3727 
 3728 /*
 3729  * Signal a userland semaphore.
 3730  */
 3731 static int
 3732 do_sem2_wake(struct thread *td, struct _usem2 *sem)
 3733 {
 3734         struct umtx_key key;
 3735         int error, cnt, rv;
 3736         uint32_t count, flags;
 3737 
 3738         rv = fueword32(&sem->_flags, &flags);
 3739         if (rv == -1)
 3740                 return (EFAULT);
 3741         if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
 3742                 return (error);
 3743         umtxq_lock(&key);
 3744         umtxq_busy(&key);
 3745         cnt = umtxq_count(&key);
 3746         if (cnt > 0) {
 3747                 /*
 3748                  * If this was the last sleeping thread, clear the waiters
 3749                  * flag in _count.
 3750                  */
 3751                 if (cnt == 1) {
 3752                         umtxq_unlock(&key);
 3753                         rv = fueword32(&sem->_count, &count);
 3754                         while (rv != -1 && count & USEM_HAS_WAITERS) {
 3755                                 rv = casueword32(&sem->_count, count, &count,
 3756                                     count & ~USEM_HAS_WAITERS);
 3757                                 if (rv == 1) {
 3758                                         rv = thread_check_susp(td, true);
 3759                                         if (rv != 0)
 3760                                                 break;
 3761                                 }
 3762                         }
 3763                         if (rv == -1)
 3764                                 error = EFAULT;
 3765                         else if (rv > 0) {
 3766                                 error = rv;
 3767                         }
 3768                         umtxq_lock(&key);
 3769                 }
 3770 
 3771                 umtxq_signal(&key, 1);
 3772         }
 3773         umtxq_unbusy(&key);
 3774         umtxq_unlock(&key);
 3775         umtx_key_release(&key);
 3776         return (error);
 3777 }
 3778 
 3779 #ifdef COMPAT_FREEBSD10
 3780 int
 3781 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap)
 3782 {
 3783         return (do_lock_umtx(td, uap->umtx, td->td_tid, 0));
 3784 }
 3785 
 3786 int
 3787 freebsd10__umtx_unlock(struct thread *td,
 3788     struct freebsd10__umtx_unlock_args *uap)
 3789 {
 3790         return (do_unlock_umtx(td, uap->umtx, td->td_tid));
 3791 }
 3792 #endif
 3793 
 3794 inline int
 3795 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp)
 3796 {
 3797         int error;
 3798 
 3799         error = copyin(uaddr, tsp, sizeof(*tsp));
 3800         if (error == 0) {
 3801                 if (!timespecvalid_interval(tsp))
 3802                         error = EINVAL;
 3803         }
 3804         return (error);
 3805 }
 3806 
 3807 static inline int
 3808 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp)
 3809 {
 3810         int error;
 3811 
 3812         if (size <= sizeof(tp->_timeout)) {
 3813                 tp->_clockid = CLOCK_REALTIME;
 3814                 tp->_flags = 0;
 3815                 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout));
 3816         } else
 3817                 error = copyin(uaddr, tp, sizeof(*tp));
 3818         if (error != 0)
 3819                 return (error);
 3820         if (!timespecvalid_interval(&tp->_timeout))
 3821                 return (EINVAL);
 3822         return (0);
 3823 }
 3824 
 3825 static int
 3826 umtx_copyin_robust_lists(const void *uaddr, size_t size,
 3827     struct umtx_robust_lists_params *rb)
 3828 {
 3829 
 3830         if (size > sizeof(*rb))
 3831                 return (EINVAL);
 3832         return (copyin(uaddr, rb, size));
 3833 }
 3834 
 3835 static int
 3836 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp)
 3837 {
 3838 
 3839         /*
 3840          * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
 3841          * and we're only called if sz >= sizeof(timespec) as supplied in the
 3842          * copyops.
 3843          */
 3844         KASSERT(sz >= sizeof(*tsp),
 3845             ("umtx_copyops specifies incorrect sizes"));
 3846 
 3847         return (copyout(tsp, uaddr, sizeof(*tsp)));
 3848 }
 3849 
 3850 #ifdef COMPAT_FREEBSD10
 3851 static int
 3852 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap,
 3853     const struct umtx_copyops *ops)
 3854 {
 3855         struct timespec *ts, timeout;
 3856         int error;
 3857 
 3858         /* Allow a null timespec (wait forever). */
 3859         if (uap->uaddr2 == NULL)
 3860                 ts = NULL;
 3861         else {
 3862                 error = ops->copyin_timeout(uap->uaddr2, &timeout);
 3863                 if (error != 0)
 3864                         return (error);
 3865                 ts = &timeout;
 3866         }
 3867 #ifdef COMPAT_FREEBSD32
 3868         if (ops->compat32)
 3869                 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
 3870 #endif
 3871         return (do_lock_umtx(td, uap->obj, uap->val, ts));
 3872 }
 3873 
 3874 static int
 3875 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap,
 3876     const struct umtx_copyops *ops)
 3877 {
 3878 #ifdef COMPAT_FREEBSD32
 3879         if (ops->compat32)
 3880                 return (do_unlock_umtx32(td, uap->obj, uap->val));
 3881 #endif
 3882         return (do_unlock_umtx(td, uap->obj, uap->val));
 3883 }
 3884 #endif  /* COMPAT_FREEBSD10 */
 3885 
 3886 #if !defined(COMPAT_FREEBSD10)
 3887 static int
 3888 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused,
 3889     const struct umtx_copyops *ops __unused)
 3890 {
 3891         return (EOPNOTSUPP);
 3892 }
 3893 #endif  /* COMPAT_FREEBSD10 */
 3894 
 3895 static int
 3896 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap,
 3897     const struct umtx_copyops *ops)
 3898 {
 3899         struct _umtx_time timeout, *tm_p;
 3900         int error;
 3901 
 3902         if (uap->uaddr2 == NULL)
 3903                 tm_p = NULL;
 3904         else {
 3905                 error = ops->copyin_umtx_time(
 3906                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3907                 if (error != 0)
 3908                         return (error);
 3909                 tm_p = &timeout;
 3910         }
 3911         return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0));
 3912 }
 3913 
 3914 static int
 3915 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap,
 3916     const struct umtx_copyops *ops)
 3917 {
 3918         struct _umtx_time timeout, *tm_p;
 3919         int error;
 3920 
 3921         if (uap->uaddr2 == NULL)
 3922                 tm_p = NULL;
 3923         else {
 3924                 error = ops->copyin_umtx_time(
 3925                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3926                 if (error != 0)
 3927                         return (error);
 3928                 tm_p = &timeout;
 3929         }
 3930         return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0));
 3931 }
 3932 
 3933 static int
 3934 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap,
 3935     const struct umtx_copyops *ops)
 3936 {
 3937         struct _umtx_time *tm_p, timeout;
 3938         int error;
 3939 
 3940         if (uap->uaddr2 == NULL)
 3941                 tm_p = NULL;
 3942         else {
 3943                 error = ops->copyin_umtx_time(
 3944                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3945                 if (error != 0)
 3946                         return (error);
 3947                 tm_p = &timeout;
 3948         }
 3949         return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1));
 3950 }
 3951 
 3952 static int
 3953 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap,
 3954     const struct umtx_copyops *ops __unused)
 3955 {
 3956 
 3957         return (kern_umtx_wake(td, uap->obj, uap->val, 0));
 3958 }
 3959 
 3960 #define BATCH_SIZE      128
 3961 static int
 3962 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap)
 3963 {
 3964         char *uaddrs[BATCH_SIZE], **upp;
 3965         int count, error, i, pos, tocopy;
 3966 
 3967         upp = (char **)uap->obj;
 3968         error = 0;
 3969         for (count = uap->val, pos = 0; count > 0; count -= tocopy,
 3970             pos += tocopy) {
 3971                 tocopy = MIN(count, BATCH_SIZE);
 3972                 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *));
 3973                 if (error != 0)
 3974                         break;
 3975                 for (i = 0; i < tocopy; ++i) {
 3976                         kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
 3977                 }
 3978                 maybe_yield();
 3979         }
 3980         return (error);
 3981 }
 3982 
 3983 static int
 3984 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap)
 3985 {
 3986         uint32_t uaddrs[BATCH_SIZE], *upp;
 3987         int count, error, i, pos, tocopy;
 3988 
 3989         upp = (uint32_t *)uap->obj;
 3990         error = 0;
 3991         for (count = uap->val, pos = 0; count > 0; count -= tocopy,
 3992             pos += tocopy) {
 3993                 tocopy = MIN(count, BATCH_SIZE);
 3994                 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t));
 3995                 if (error != 0)
 3996                         break;
 3997                 for (i = 0; i < tocopy; ++i) {
 3998                         kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i],
 3999                             INT_MAX, 1);
 4000                 }
 4001                 maybe_yield();
 4002         }
 4003         return (error);
 4004 }
 4005 
 4006 static int
 4007 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap,
 4008     const struct umtx_copyops *ops)
 4009 {
 4010 
 4011         if (ops->compat32)
 4012                 return (__umtx_op_nwake_private_compat32(td, uap));
 4013         return (__umtx_op_nwake_private_native(td, uap));
 4014 }
 4015 
 4016 static int
 4017 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap,
 4018     const struct umtx_copyops *ops __unused)
 4019 {
 4020 
 4021         return (kern_umtx_wake(td, uap->obj, uap->val, 1));
 4022 }
 4023 
 4024 static int
 4025 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap,
 4026    const struct umtx_copyops *ops)
 4027 {
 4028         struct _umtx_time *tm_p, timeout;
 4029         int error;
 4030 
 4031         /* Allow a null timespec (wait forever). */
 4032         if (uap->uaddr2 == NULL)
 4033                 tm_p = NULL;
 4034         else {
 4035                 error = ops->copyin_umtx_time(
 4036                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 4037                 if (error != 0)
 4038                         return (error);
 4039                 tm_p = &timeout;
 4040         }
 4041         return (do_lock_umutex(td, uap->obj, tm_p, 0));
 4042 }
 4043 
 4044 static int
 4045 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap,
 4046     const struct umtx_copyops *ops __unused)
 4047 {
 4048 
 4049         return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY));
 4050 }
 4051 
 4052 static int
 4053 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap,
 4054     const struct umtx_copyops *ops)
 4055 {
 4056         struct _umtx_time *tm_p, timeout;
 4057         int error;
 4058 
 4059         /* Allow a null timespec (wait forever). */
 4060         if (uap->uaddr2 == NULL)
 4061                 tm_p = NULL;
 4062         else {
 4063                 error = ops->copyin_umtx_time(
 4064                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 4065                 if (error != 0)
 4066                         return (error);
 4067                 tm_p = &timeout;
 4068         }
 4069         return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT));
 4070 }
 4071 
 4072 static int
 4073 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap,
 4074     const struct umtx_copyops *ops __unused)
 4075 {
 4076 
 4077         return (do_wake_umutex(td, uap->obj));
 4078 }
 4079 
 4080 static int
 4081 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap,
 4082     const struct umtx_copyops *ops __unused)
 4083 {
 4084 
 4085         return (do_unlock_umutex(td, uap->obj, false));
 4086 }
 4087 
 4088 static int
 4089 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap,
 4090     const struct umtx_copyops *ops __unused)
 4091 {
 4092 
 4093         return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1));
 4094 }
 4095 
 4096 static int
 4097 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap,
 4098     const struct umtx_copyops *ops)
 4099 {
 4100         struct timespec *ts, timeout;
 4101         int error;
 4102 
 4103         /* Allow a null timespec (wait forever). */
 4104         if (uap->uaddr2 == NULL)
 4105                 ts = NULL;
 4106         else {
 4107                 error = ops->copyin_timeout(uap->uaddr2, &timeout);
 4108                 if (error != 0)
 4109                         return (error);
 4110                 ts = &timeout;
 4111         }
 4112         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 4113 }
 4114 
 4115 static int
 4116 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap,
 4117     const struct umtx_copyops *ops __unused)
 4118 {
 4119 
 4120         return (do_cv_signal(td, uap->obj));
 4121 }
 4122 
 4123 static int
 4124 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap,
 4125     const struct umtx_copyops *ops __unused)
 4126 {
 4127 
 4128         return (do_cv_broadcast(td, uap->obj));
 4129 }
 4130 
 4131 static int
 4132 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap,
 4133     const struct umtx_copyops *ops)
 4134 {
 4135         struct _umtx_time timeout;
 4136         int error;
 4137 
 4138         /* Allow a null timespec (wait forever). */
 4139         if (uap->uaddr2 == NULL) {
 4140                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 4141         } else {
 4142                 error = ops->copyin_umtx_time(uap->uaddr2,
 4143                    (size_t)uap->uaddr1, &timeout);
 4144                 if (error != 0)
 4145                         return (error);
 4146                 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
 4147         }
 4148         return (error);
 4149 }
 4150 
 4151 static int
 4152 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap,
 4153     const struct umtx_copyops *ops)
 4154 {
 4155         struct _umtx_time timeout;
 4156         int error;
 4157 
 4158         /* Allow a null timespec (wait forever). */
 4159         if (uap->uaddr2 == NULL) {
 4160                 error = do_rw_wrlock(td, uap->obj, 0);
 4161         } else {
 4162                 error = ops->copyin_umtx_time(uap->uaddr2,
 4163                    (size_t)uap->uaddr1, &timeout);
 4164                 if (error != 0)
 4165                         return (error);
 4166 
 4167                 error = do_rw_wrlock(td, uap->obj, &timeout);
 4168         }
 4169         return (error);
 4170 }
 4171 
 4172 static int
 4173 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap,
 4174     const struct umtx_copyops *ops __unused)
 4175 {
 4176 
 4177         return (do_rw_unlock(td, uap->obj));
 4178 }
 4179 
 4180 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
 4181 static int
 4182 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap,
 4183     const struct umtx_copyops *ops)
 4184 {
 4185         struct _umtx_time *tm_p, timeout;
 4186         int error;
 4187 
 4188         /* Allow a null timespec (wait forever). */
 4189         if (uap->uaddr2 == NULL)
 4190                 tm_p = NULL;
 4191         else {
 4192                 error = ops->copyin_umtx_time(
 4193                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 4194                 if (error != 0)
 4195                         return (error);
 4196                 tm_p = &timeout;
 4197         }
 4198         return (do_sem_wait(td, uap->obj, tm_p));
 4199 }
 4200 
 4201 static int
 4202 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap,
 4203     const struct umtx_copyops *ops __unused)
 4204 {
 4205 
 4206         return (do_sem_wake(td, uap->obj));
 4207 }
 4208 #endif
 4209 
 4210 static int
 4211 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap,
 4212     const struct umtx_copyops *ops __unused)
 4213 {
 4214 
 4215         return (do_wake2_umutex(td, uap->obj, uap->val));
 4216 }
 4217 
 4218 static int
 4219 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap,
 4220     const struct umtx_copyops *ops)
 4221 {
 4222         struct _umtx_time *tm_p, timeout;
 4223         size_t uasize;
 4224         int error;
 4225 
 4226         /* Allow a null timespec (wait forever). */
 4227         if (uap->uaddr2 == NULL) {
 4228                 uasize = 0;
 4229                 tm_p = NULL;
 4230         } else {
 4231                 uasize = (size_t)uap->uaddr1;
 4232                 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout);
 4233                 if (error != 0)
 4234                         return (error);
 4235                 tm_p = &timeout;
 4236         }
 4237         error = do_sem2_wait(td, uap->obj, tm_p);
 4238         if (error == EINTR && uap->uaddr2 != NULL &&
 4239             (timeout._flags & UMTX_ABSTIME) == 0 &&
 4240             uasize >= ops->umtx_time_sz + ops->timespec_sz) {
 4241                 error = ops->copyout_timeout(
 4242                     (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz),
 4243                     uasize - ops->umtx_time_sz, &timeout._timeout);
 4244                 if (error == 0) {
 4245                         error = EINTR;
 4246                 }
 4247         }
 4248 
 4249         return (error);
 4250 }
 4251 
 4252 static int
 4253 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap,
 4254     const struct umtx_copyops *ops __unused)
 4255 {
 4256 
 4257         return (do_sem2_wake(td, uap->obj));
 4258 }
 4259 
 4260 #define USHM_OBJ_UMTX(o)                                                \
 4261     ((struct umtx_shm_obj_list *)(&(o)->umtx_data))
 4262 
 4263 #define USHMF_REG_LINKED        0x0001
 4264 #define USHMF_OBJ_LINKED        0x0002
 4265 struct umtx_shm_reg {
 4266         TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link;
 4267         LIST_ENTRY(umtx_shm_reg) ushm_obj_link;
 4268         struct umtx_key         ushm_key;
 4269         struct ucred            *ushm_cred;
 4270         struct shmfd            *ushm_obj;
 4271         u_int                   ushm_refcnt;
 4272         u_int                   ushm_flags;
 4273 };
 4274 
 4275 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg);
 4276 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg);
 4277 
 4278 static uma_zone_t umtx_shm_reg_zone;
 4279 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS];
 4280 static struct mtx umtx_shm_lock;
 4281 static struct umtx_shm_reg_head umtx_shm_reg_delfree =
 4282     TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree);
 4283 
 4284 static void umtx_shm_free_reg(struct umtx_shm_reg *reg);
 4285 
 4286 static void
 4287 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused)
 4288 {
 4289         struct umtx_shm_reg_head d;
 4290         struct umtx_shm_reg *reg, *reg1;
 4291 
 4292         TAILQ_INIT(&d);
 4293         mtx_lock(&umtx_shm_lock);
 4294         TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link);
 4295         mtx_unlock(&umtx_shm_lock);
 4296         TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) {
 4297                 TAILQ_REMOVE(&d, reg, ushm_reg_link);
 4298                 umtx_shm_free_reg(reg);
 4299         }
 4300 }
 4301 
 4302 static struct task umtx_shm_reg_delfree_task =
 4303     TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL);
 4304 
 4305 static struct umtx_shm_reg *
 4306 umtx_shm_find_reg_locked(const struct umtx_key *key)
 4307 {
 4308         struct umtx_shm_reg *reg;
 4309         struct umtx_shm_reg_head *reg_head;
 4310 
 4311         KASSERT(key->shared, ("umtx_p_find_rg: private key"));
 4312         mtx_assert(&umtx_shm_lock, MA_OWNED);
 4313         reg_head = &umtx_shm_registry[key->hash];
 4314         TAILQ_FOREACH(reg, reg_head, ushm_reg_link) {
 4315                 KASSERT(reg->ushm_key.shared,
 4316                     ("non-shared key on reg %p %d", reg, reg->ushm_key.shared));
 4317                 if (reg->ushm_key.info.shared.object ==
 4318                     key->info.shared.object &&
 4319                     reg->ushm_key.info.shared.offset ==
 4320                     key->info.shared.offset) {
 4321                         KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM"));
 4322                         KASSERT(reg->ushm_refcnt > 0,
 4323                             ("reg %p refcnt 0 onlist", reg));
 4324                         KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0,
 4325                             ("reg %p not linked", reg));
 4326                         reg->ushm_refcnt++;
 4327                         return (reg);
 4328                 }
 4329         }
 4330         return (NULL);
 4331 }
 4332 
 4333 static struct umtx_shm_reg *
 4334 umtx_shm_find_reg(const struct umtx_key *key)
 4335 {
 4336         struct umtx_shm_reg *reg;
 4337 
 4338         mtx_lock(&umtx_shm_lock);
 4339         reg = umtx_shm_find_reg_locked(key);
 4340         mtx_unlock(&umtx_shm_lock);
 4341         return (reg);
 4342 }
 4343 
 4344 static void
 4345 umtx_shm_free_reg(struct umtx_shm_reg *reg)
 4346 {
 4347 
 4348         chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0);
 4349         crfree(reg->ushm_cred);
 4350         shm_drop(reg->ushm_obj);
 4351         uma_zfree(umtx_shm_reg_zone, reg);
 4352 }
 4353 
 4354 static bool
 4355 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force)
 4356 {
 4357         bool res;
 4358 
 4359         mtx_assert(&umtx_shm_lock, MA_OWNED);
 4360         KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg));
 4361         reg->ushm_refcnt--;
 4362         res = reg->ushm_refcnt == 0;
 4363         if (res || force) {
 4364                 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) {
 4365                         TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash],
 4366                             reg, ushm_reg_link);
 4367                         reg->ushm_flags &= ~USHMF_REG_LINKED;
 4368                 }
 4369                 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) {
 4370                         LIST_REMOVE(reg, ushm_obj_link);
 4371                         reg->ushm_flags &= ~USHMF_OBJ_LINKED;
 4372                 }
 4373         }
 4374         return (res);
 4375 }
 4376 
 4377 static void
 4378 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force)
 4379 {
 4380         vm_object_t object;
 4381         bool dofree;
 4382 
 4383         if (force) {
 4384                 object = reg->ushm_obj->shm_object;
 4385                 VM_OBJECT_WLOCK(object);
 4386                 vm_object_set_flag(object, OBJ_UMTXDEAD);
 4387                 VM_OBJECT_WUNLOCK(object);
 4388         }
 4389         mtx_lock(&umtx_shm_lock);
 4390         dofree = umtx_shm_unref_reg_locked(reg, force);
 4391         mtx_unlock(&umtx_shm_lock);
 4392         if (dofree)
 4393                 umtx_shm_free_reg(reg);
 4394 }
 4395 
 4396 void
 4397 umtx_shm_object_init(vm_object_t object)
 4398 {
 4399 
 4400         LIST_INIT(USHM_OBJ_UMTX(object));
 4401 }
 4402 
 4403 void
 4404 umtx_shm_object_terminated(vm_object_t object)
 4405 {
 4406         struct umtx_shm_reg *reg, *reg1;
 4407         bool dofree;
 4408 
 4409         if (LIST_EMPTY(USHM_OBJ_UMTX(object)))
 4410                 return;
 4411 
 4412         dofree = false;
 4413         mtx_lock(&umtx_shm_lock);
 4414         LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) {
 4415                 if (umtx_shm_unref_reg_locked(reg, true)) {
 4416                         TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg,
 4417                             ushm_reg_link);
 4418                         dofree = true;
 4419                 }
 4420         }
 4421         mtx_unlock(&umtx_shm_lock);
 4422         if (dofree)
 4423                 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task);
 4424 }
 4425 
 4426 static int
 4427 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key,
 4428     struct umtx_shm_reg **res)
 4429 {
 4430         struct umtx_shm_reg *reg, *reg1;
 4431         struct ucred *cred;
 4432         int error;
 4433 
 4434         reg = umtx_shm_find_reg(key);
 4435         if (reg != NULL) {
 4436                 *res = reg;
 4437                 return (0);
 4438         }
 4439         cred = td->td_ucred;
 4440         if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP)))
 4441                 return (ENOMEM);
 4442         reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO);
 4443         reg->ushm_refcnt = 1;
 4444         bcopy(key, &reg->ushm_key, sizeof(*key));
 4445         reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false);
 4446         reg->ushm_cred = crhold(cred);
 4447         error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE);
 4448         if (error != 0) {
 4449                 umtx_shm_free_reg(reg);
 4450                 return (error);
 4451         }
 4452         mtx_lock(&umtx_shm_lock);
 4453         reg1 = umtx_shm_find_reg_locked(key);
 4454         if (reg1 != NULL) {
 4455                 mtx_unlock(&umtx_shm_lock);
 4456                 umtx_shm_free_reg(reg);
 4457                 *res = reg1;
 4458                 return (0);
 4459         }
 4460         reg->ushm_refcnt++;
 4461         TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link);
 4462         LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg,
 4463             ushm_obj_link);
 4464         reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED;
 4465         mtx_unlock(&umtx_shm_lock);
 4466         *res = reg;
 4467         return (0);
 4468 }
 4469 
 4470 static int
 4471 umtx_shm_alive(struct thread *td, void *addr)
 4472 {
 4473         vm_map_t map;
 4474         vm_map_entry_t entry;
 4475         vm_object_t object;
 4476         vm_pindex_t pindex;
 4477         vm_prot_t prot;
 4478         int res, ret;
 4479         boolean_t wired;
 4480 
 4481         map = &td->td_proc->p_vmspace->vm_map;
 4482         res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry,
 4483             &object, &pindex, &prot, &wired);
 4484         if (res != KERN_SUCCESS)
 4485                 return (EFAULT);
 4486         if (object == NULL)
 4487                 ret = EINVAL;
 4488         else
 4489                 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0;
 4490         vm_map_lookup_done(map, entry);
 4491         return (ret);
 4492 }
 4493 
 4494 static void
 4495 umtx_shm_init(void)
 4496 {
 4497         int i;
 4498 
 4499         umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg),
 4500             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 4501         mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF);
 4502         for (i = 0; i < nitems(umtx_shm_registry); i++)
 4503                 TAILQ_INIT(&umtx_shm_registry[i]);
 4504 }
 4505 
 4506 static int
 4507 umtx_shm(struct thread *td, void *addr, u_int flags)
 4508 {
 4509         struct umtx_key key;
 4510         struct umtx_shm_reg *reg;
 4511         struct file *fp;
 4512         int error, fd;
 4513 
 4514         if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP |
 4515             UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1)
 4516                 return (EINVAL);
 4517         if ((flags & UMTX_SHM_ALIVE) != 0)
 4518                 return (umtx_shm_alive(td, addr));
 4519         error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key);
 4520         if (error != 0)
 4521                 return (error);
 4522         KASSERT(key.shared == 1, ("non-shared key"));
 4523         if ((flags & UMTX_SHM_CREAT) != 0) {
 4524                 error = umtx_shm_create_reg(td, &key, &reg);
 4525         } else {
 4526                 reg = umtx_shm_find_reg(&key);
 4527                 if (reg == NULL)
 4528                         error = ESRCH;
 4529         }
 4530         umtx_key_release(&key);
 4531         if (error != 0)
 4532                 return (error);
 4533         KASSERT(reg != NULL, ("no reg"));
 4534         if ((flags & UMTX_SHM_DESTROY) != 0) {
 4535                 umtx_shm_unref_reg(reg, true);
 4536         } else {
 4537 #if 0
 4538 #ifdef MAC
 4539                 error = mac_posixshm_check_open(td->td_ucred,
 4540                     reg->ushm_obj, FFLAGS(O_RDWR));
 4541                 if (error == 0)
 4542 #endif
 4543                         error = shm_access(reg->ushm_obj, td->td_ucred,
 4544                             FFLAGS(O_RDWR));
 4545                 if (error == 0)
 4546 #endif
 4547                         error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL);
 4548                 if (error == 0) {
 4549                         shm_hold(reg->ushm_obj);
 4550                         finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj,
 4551                             &shm_ops);
 4552                         td->td_retval[0] = fd;
 4553                         fdrop(fp, td);
 4554                 }
 4555         }
 4556         umtx_shm_unref_reg(reg, false);
 4557         return (error);
 4558 }
 4559 
 4560 static int
 4561 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap,
 4562     const struct umtx_copyops *ops __unused)
 4563 {
 4564 
 4565         return (umtx_shm(td, uap->uaddr1, uap->val));
 4566 }
 4567 
 4568 static int
 4569 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap,
 4570     const struct umtx_copyops *ops)
 4571 {
 4572         struct umtx_robust_lists_params rb;
 4573         int error;
 4574 
 4575         if (ops->compat32) {
 4576                 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 &&
 4577                     (td->td_rb_list != 0 || td->td_rbp_list != 0 ||
 4578                     td->td_rb_inact != 0))
 4579                         return (EBUSY);
 4580         } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) {
 4581                 return (EBUSY);
 4582         }
 4583 
 4584         bzero(&rb, sizeof(rb));
 4585         error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb);
 4586         if (error != 0)
 4587                 return (error);
 4588 
 4589         if (ops->compat32)
 4590                 td->td_pflags2 |= TDP2_COMPAT32RB;
 4591 
 4592         td->td_rb_list = rb.robust_list_offset;
 4593         td->td_rbp_list = rb.robust_priv_list_offset;
 4594         td->td_rb_inact = rb.robust_inact_offset;
 4595         return (0);
 4596 }
 4597 
 4598 #if defined(__i386__) || defined(__amd64__)
 4599 /*
 4600  * Provide the standard 32-bit definitions for x86, since native/compat32 use a
 4601  * 32-bit time_t there.  Other architectures just need the i386 definitions
 4602  * along with their standard compat32.
 4603  */
 4604 struct timespecx32 {
 4605         int64_t                 tv_sec;
 4606         int32_t                 tv_nsec;
 4607 };
 4608 
 4609 struct umtx_timex32 {
 4610         struct  timespecx32     _timeout;
 4611         uint32_t                _flags;
 4612         uint32_t                _clockid;
 4613 };
 4614 
 4615 #ifndef __i386__
 4616 #define timespeci386    timespec32
 4617 #define umtx_timei386   umtx_time32
 4618 #endif
 4619 #else /* !__i386__ && !__amd64__ */
 4620 /* 32-bit architectures can emulate i386, so define these almost everywhere. */
 4621 struct timespeci386 {
 4622         int32_t                 tv_sec;
 4623         int32_t                 tv_nsec;
 4624 };
 4625 
 4626 struct umtx_timei386 {
 4627         struct  timespeci386    _timeout;
 4628         uint32_t                _flags;
 4629         uint32_t                _clockid;
 4630 };
 4631 
 4632 #if defined(__LP64__)
 4633 #define timespecx32     timespec32
 4634 #define umtx_timex32    umtx_time32
 4635 #endif
 4636 #endif
 4637 
 4638 static int
 4639 umtx_copyin_robust_lists32(const void *uaddr, size_t size,
 4640     struct umtx_robust_lists_params *rbp)
 4641 {
 4642         struct umtx_robust_lists_params_compat32 rb32;
 4643         int error;
 4644 
 4645         if (size > sizeof(rb32))
 4646                 return (EINVAL);
 4647         bzero(&rb32, sizeof(rb32));
 4648         error = copyin(uaddr, &rb32, size);
 4649         if (error != 0)
 4650                 return (error);
 4651         CP(rb32, *rbp, robust_list_offset);
 4652         CP(rb32, *rbp, robust_priv_list_offset);
 4653         CP(rb32, *rbp, robust_inact_offset);
 4654         return (0);
 4655 }
 4656 
 4657 #ifndef __i386__
 4658 static inline int
 4659 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp)
 4660 {
 4661         struct timespeci386 ts32;
 4662         int error;
 4663 
 4664         error = copyin(uaddr, &ts32, sizeof(ts32));
 4665         if (error == 0) {
 4666                 if (!timespecvalid_interval(&ts32))
 4667                         error = EINVAL;
 4668                 else {
 4669                         CP(ts32, *tsp, tv_sec);
 4670                         CP(ts32, *tsp, tv_nsec);
 4671                 }
 4672         }
 4673         return (error);
 4674 }
 4675 
 4676 static inline int
 4677 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp)
 4678 {
 4679         struct umtx_timei386 t32;
 4680         int error;
 4681 
 4682         t32._clockid = CLOCK_REALTIME;
 4683         t32._flags   = 0;
 4684         if (size <= sizeof(t32._timeout))
 4685                 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout));
 4686         else
 4687                 error = copyin(uaddr, &t32, sizeof(t32));
 4688         if (error != 0)
 4689                 return (error);
 4690         if (!timespecvalid_interval(&t32._timeout))
 4691                 return (EINVAL);
 4692         TS_CP(t32, *tp, _timeout);
 4693         CP(t32, *tp, _flags);
 4694         CP(t32, *tp, _clockid);
 4695         return (0);
 4696 }
 4697 
 4698 static int
 4699 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp)
 4700 {
 4701         struct timespeci386 remain32 = {
 4702                 .tv_sec = tsp->tv_sec,
 4703                 .tv_nsec = tsp->tv_nsec,
 4704         };
 4705 
 4706         /*
 4707          * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
 4708          * and we're only called if sz >= sizeof(timespec) as supplied in the
 4709          * copyops.
 4710          */
 4711         KASSERT(sz >= sizeof(remain32),
 4712             ("umtx_copyops specifies incorrect sizes"));
 4713 
 4714         return (copyout(&remain32, uaddr, sizeof(remain32)));
 4715 }
 4716 #endif /* !__i386__ */
 4717 
 4718 #if defined(__i386__) || defined(__LP64__)
 4719 static inline int
 4720 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp)
 4721 {
 4722         struct timespecx32 ts32;
 4723         int error;
 4724 
 4725         error = copyin(uaddr, &ts32, sizeof(ts32));
 4726         if (error == 0) {
 4727                 if (!timespecvalid_interval(&ts32))
 4728                         error = EINVAL;
 4729                 else {
 4730                         CP(ts32, *tsp, tv_sec);
 4731                         CP(ts32, *tsp, tv_nsec);
 4732                 }
 4733         }
 4734         return (error);
 4735 }
 4736 
 4737 static inline int
 4738 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp)
 4739 {
 4740         struct umtx_timex32 t32;
 4741         int error;
 4742 
 4743         t32._clockid = CLOCK_REALTIME;
 4744         t32._flags   = 0;
 4745         if (size <= sizeof(t32._timeout))
 4746                 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout));
 4747         else
 4748                 error = copyin(uaddr, &t32, sizeof(t32));
 4749         if (error != 0)
 4750                 return (error);
 4751         if (!timespecvalid_interval(&t32._timeout))
 4752                 return (EINVAL);
 4753         TS_CP(t32, *tp, _timeout);
 4754         CP(t32, *tp, _flags);
 4755         CP(t32, *tp, _clockid);
 4756         return (0);
 4757 }
 4758 
 4759 static int
 4760 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp)
 4761 {
 4762         struct timespecx32 remain32 = {
 4763                 .tv_sec = tsp->tv_sec,
 4764                 .tv_nsec = tsp->tv_nsec,
 4765         };
 4766 
 4767         /*
 4768          * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
 4769          * and we're only called if sz >= sizeof(timespec) as supplied in the
 4770          * copyops.
 4771          */
 4772         KASSERT(sz >= sizeof(remain32),
 4773             ("umtx_copyops specifies incorrect sizes"));
 4774 
 4775         return (copyout(&remain32, uaddr, sizeof(remain32)));
 4776 }
 4777 #endif /* __i386__ || __LP64__ */
 4778 
 4779 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap,
 4780     const struct umtx_copyops *umtx_ops);
 4781 
 4782 static const _umtx_op_func op_table[] = {
 4783 #ifdef COMPAT_FREEBSD10
 4784         [UMTX_OP_LOCK]          = __umtx_op_lock_umtx,
 4785         [UMTX_OP_UNLOCK]        = __umtx_op_unlock_umtx,
 4786 #else
 4787         [UMTX_OP_LOCK]          = __umtx_op_unimpl,
 4788         [UMTX_OP_UNLOCK]        = __umtx_op_unimpl,
 4789 #endif
 4790         [UMTX_OP_WAIT]          = __umtx_op_wait,
 4791         [UMTX_OP_WAKE]          = __umtx_op_wake,
 4792         [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex,
 4793         [UMTX_OP_MUTEX_LOCK]    = __umtx_op_lock_umutex,
 4794         [UMTX_OP_MUTEX_UNLOCK]  = __umtx_op_unlock_umutex,
 4795         [UMTX_OP_SET_CEILING]   = __umtx_op_set_ceiling,
 4796         [UMTX_OP_CV_WAIT]       = __umtx_op_cv_wait,
 4797         [UMTX_OP_CV_SIGNAL]     = __umtx_op_cv_signal,
 4798         [UMTX_OP_CV_BROADCAST]  = __umtx_op_cv_broadcast,
 4799         [UMTX_OP_WAIT_UINT]     = __umtx_op_wait_uint,
 4800         [UMTX_OP_RW_RDLOCK]     = __umtx_op_rw_rdlock,
 4801         [UMTX_OP_RW_WRLOCK]     = __umtx_op_rw_wrlock,
 4802         [UMTX_OP_RW_UNLOCK]     = __umtx_op_rw_unlock,
 4803         [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private,
 4804         [UMTX_OP_WAKE_PRIVATE]  = __umtx_op_wake_private,
 4805         [UMTX_OP_MUTEX_WAIT]    = __umtx_op_wait_umutex,
 4806         [UMTX_OP_MUTEX_WAKE]    = __umtx_op_wake_umutex,
 4807 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
 4808         [UMTX_OP_SEM_WAIT]      = __umtx_op_sem_wait,
 4809         [UMTX_OP_SEM_WAKE]      = __umtx_op_sem_wake,
 4810 #else
 4811         [UMTX_OP_SEM_WAIT]      = __umtx_op_unimpl,
 4812         [UMTX_OP_SEM_WAKE]      = __umtx_op_unimpl,
 4813 #endif
 4814         [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private,
 4815         [UMTX_OP_MUTEX_WAKE2]   = __umtx_op_wake2_umutex,
 4816         [UMTX_OP_SEM2_WAIT]     = __umtx_op_sem2_wait,
 4817         [UMTX_OP_SEM2_WAKE]     = __umtx_op_sem2_wake,
 4818         [UMTX_OP_SHM]           = __umtx_op_shm,
 4819         [UMTX_OP_ROBUST_LISTS]  = __umtx_op_robust_lists,
 4820 };
 4821 
 4822 static const struct umtx_copyops umtx_native_ops = {
 4823         .copyin_timeout = umtx_copyin_timeout,
 4824         .copyin_umtx_time = umtx_copyin_umtx_time,
 4825         .copyin_robust_lists = umtx_copyin_robust_lists,
 4826         .copyout_timeout = umtx_copyout_timeout,
 4827         .timespec_sz = sizeof(struct timespec),
 4828         .umtx_time_sz = sizeof(struct _umtx_time),
 4829 };
 4830 
 4831 #ifndef __i386__
 4832 static const struct umtx_copyops umtx_native_opsi386 = {
 4833         .copyin_timeout = umtx_copyin_timeouti386,
 4834         .copyin_umtx_time = umtx_copyin_umtx_timei386,
 4835         .copyin_robust_lists = umtx_copyin_robust_lists32,
 4836         .copyout_timeout = umtx_copyout_timeouti386,
 4837         .timespec_sz = sizeof(struct timespeci386),
 4838         .umtx_time_sz = sizeof(struct umtx_timei386),
 4839         .compat32 = true,
 4840 };
 4841 #endif
 4842 
 4843 #if defined(__i386__) || defined(__LP64__)
 4844 /* i386 can emulate other 32-bit archs, too! */
 4845 static const struct umtx_copyops umtx_native_opsx32 = {
 4846         .copyin_timeout = umtx_copyin_timeoutx32,
 4847         .copyin_umtx_time = umtx_copyin_umtx_timex32,
 4848         .copyin_robust_lists = umtx_copyin_robust_lists32,
 4849         .copyout_timeout = umtx_copyout_timeoutx32,
 4850         .timespec_sz = sizeof(struct timespecx32),
 4851         .umtx_time_sz = sizeof(struct umtx_timex32),
 4852         .compat32 = true,
 4853 };
 4854 
 4855 #ifdef COMPAT_FREEBSD32
 4856 #ifdef __amd64__
 4857 #define umtx_native_ops32       umtx_native_opsi386
 4858 #else
 4859 #define umtx_native_ops32       umtx_native_opsx32
 4860 #endif
 4861 #endif /* COMPAT_FREEBSD32 */
 4862 #endif /* __i386__ || __LP64__ */
 4863 
 4864 #define UMTX_OP__FLAGS  (UMTX_OP__32BIT | UMTX_OP__I386)
 4865 
 4866 static int
 4867 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val,
 4868     void *uaddr1, void *uaddr2, const struct umtx_copyops *ops)
 4869 {
 4870         struct _umtx_op_args uap = {
 4871                 .obj = obj,
 4872                 .op = op & ~UMTX_OP__FLAGS,
 4873                 .val = val,
 4874                 .uaddr1 = uaddr1,
 4875                 .uaddr2 = uaddr2
 4876         };
 4877 
 4878         if ((uap.op >= nitems(op_table)))
 4879                 return (EINVAL);
 4880         return ((*op_table[uap.op])(td, &uap, ops));
 4881 }
 4882 
 4883 int
 4884 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
 4885 {
 4886         static const struct umtx_copyops *umtx_ops;
 4887 
 4888         umtx_ops = &umtx_native_ops;
 4889 #ifdef __LP64__
 4890         if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) {
 4891                 if ((uap->op & UMTX_OP__I386) != 0)
 4892                         umtx_ops = &umtx_native_opsi386;
 4893                 else
 4894                         umtx_ops = &umtx_native_opsx32;
 4895         }
 4896 #elif !defined(__i386__)
 4897         /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */
 4898         if ((uap->op & UMTX_OP__I386) != 0)
 4899                 umtx_ops = &umtx_native_opsi386;
 4900 #else
 4901         /* Likewise, UMTX_OP__I386 is a nop on i386. */
 4902         if ((uap->op & UMTX_OP__32BIT) != 0)
 4903                 umtx_ops = &umtx_native_opsx32;
 4904 #endif
 4905         return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1,
 4906             uap->uaddr2, umtx_ops));
 4907 }
 4908 
 4909 #ifdef COMPAT_FREEBSD32
 4910 #ifdef COMPAT_FREEBSD10
 4911 int
 4912 freebsd10_freebsd32_umtx_lock(struct thread *td,
 4913     struct freebsd10_freebsd32_umtx_lock_args *uap)
 4914 {
 4915         return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
 4916 }
 4917 
 4918 int
 4919 freebsd10_freebsd32_umtx_unlock(struct thread *td,
 4920     struct freebsd10_freebsd32_umtx_unlock_args *uap)
 4921 {
 4922         return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
 4923 }
 4924 #endif /* COMPAT_FREEBSD10 */
 4925 
 4926 int
 4927 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap)
 4928 {
 4929 
 4930         return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr,
 4931             uap->uaddr2, &umtx_native_ops32));
 4932 }
 4933 #endif /* COMPAT_FREEBSD32 */
 4934 
 4935 void
 4936 umtx_thread_init(struct thread *td)
 4937 {
 4938 
 4939         td->td_umtxq = umtxq_alloc();
 4940         td->td_umtxq->uq_thread = td;
 4941 }
 4942 
 4943 void
 4944 umtx_thread_fini(struct thread *td)
 4945 {
 4946 
 4947         umtxq_free(td->td_umtxq);
 4948 }
 4949 
 4950 /*
 4951  * It will be called when new thread is created, e.g fork().
 4952  */
 4953 void
 4954 umtx_thread_alloc(struct thread *td)
 4955 {
 4956         struct umtx_q *uq;
 4957 
 4958         uq = td->td_umtxq;
 4959         uq->uq_inherited_pri = PRI_MAX;
 4960 
 4961         KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
 4962         KASSERT(uq->uq_thread == td, ("uq_thread != td"));
 4963         KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
 4964         KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
 4965 }
 4966 
 4967 /*
 4968  * exec() hook.
 4969  *
 4970  * Clear robust lists for all process' threads, not delaying the
 4971  * cleanup to thread exit, since the relevant address space is
 4972  * destroyed right now.
 4973  */
 4974 void
 4975 umtx_exec(struct proc *p)
 4976 {
 4977         struct thread *td;
 4978 
 4979         KASSERT(p == curproc, ("need curproc"));
 4980         KASSERT((p->p_flag & P_HADTHREADS) == 0 ||
 4981             (p->p_flag & P_STOPPED_SINGLE) != 0,
 4982             ("curproc must be single-threaded"));
 4983         /*
 4984          * There is no need to lock the list as only this thread can be
 4985          * running.
 4986          */
 4987         FOREACH_THREAD_IN_PROC(p, td) {
 4988                 KASSERT(td == curthread ||
 4989                     ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)),
 4990                     ("running thread %p %p", p, td));
 4991                 umtx_thread_cleanup(td);
 4992                 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0;
 4993         }
 4994 }
 4995 
 4996 /*
 4997  * thread exit hook.
 4998  */
 4999 void
 5000 umtx_thread_exit(struct thread *td)
 5001 {
 5002 
 5003         umtx_thread_cleanup(td);
 5004 }
 5005 
 5006 static int
 5007 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32)
 5008 {
 5009         u_long res1;
 5010         uint32_t res32;
 5011         int error;
 5012 
 5013         if (compat32) {
 5014                 error = fueword32((void *)ptr, &res32);
 5015                 if (error == 0)
 5016                         res1 = res32;
 5017         } else {
 5018                 error = fueword((void *)ptr, &res1);
 5019         }
 5020         if (error == 0)
 5021                 *res = res1;
 5022         else
 5023                 error = EFAULT;
 5024         return (error);
 5025 }
 5026 
 5027 static void
 5028 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list,
 5029     bool compat32)
 5030 {
 5031         struct umutex32 m32;
 5032 
 5033         if (compat32) {
 5034                 memcpy(&m32, m, sizeof(m32));
 5035                 *rb_list = m32.m_rb_lnk;
 5036         } else {
 5037                 *rb_list = m->m_rb_lnk;
 5038         }
 5039 }
 5040 
 5041 static int
 5042 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact,
 5043     bool compat32)
 5044 {
 5045         struct umutex m;
 5046         int error;
 5047 
 5048         KASSERT(td->td_proc == curproc, ("need current vmspace"));
 5049         error = copyin((void *)rbp, &m, sizeof(m));
 5050         if (error != 0)
 5051                 return (error);
 5052         if (rb_list != NULL)
 5053                 umtx_read_rb_list(td, &m, rb_list, compat32);
 5054         if ((m.m_flags & UMUTEX_ROBUST) == 0)
 5055                 return (EINVAL);
 5056         if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid)
 5057                 /* inact is cleared after unlock, allow the inconsistency */
 5058                 return (inact ? 0 : EINVAL);
 5059         return (do_unlock_umutex(td, (struct umutex *)rbp, true));
 5060 }
 5061 
 5062 static void
 5063 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact,
 5064     const char *name, bool compat32)
 5065 {
 5066         int error, i;
 5067         uintptr_t rbp;
 5068         bool inact;
 5069 
 5070         if (rb_list == 0)
 5071                 return;
 5072         error = umtx_read_uptr(td, rb_list, &rbp, compat32);
 5073         for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) {
 5074                 if (rbp == *rb_inact) {
 5075                         inact = true;
 5076                         *rb_inact = 0;
 5077                 } else
 5078                         inact = false;
 5079                 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32);
 5080         }
 5081         if (i == umtx_max_rb && umtx_verbose_rb) {
 5082                 uprintf("comm %s pid %d: reached umtx %smax rb %d\n",
 5083                     td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb);
 5084         }
 5085         if (error != 0 && umtx_verbose_rb) {
 5086                 uprintf("comm %s pid %d: handling %srb error %d\n",
 5087                     td->td_proc->p_comm, td->td_proc->p_pid, name, error);
 5088         }
 5089 }
 5090 
 5091 /*
 5092  * Clean up umtx data.
 5093  */
 5094 static void
 5095 umtx_thread_cleanup(struct thread *td)
 5096 {
 5097         struct umtx_q *uq;
 5098         struct umtx_pi *pi;
 5099         uintptr_t rb_inact;
 5100         bool compat32;
 5101 
 5102         /*
 5103          * Disown pi mutexes.
 5104          */
 5105         uq = td->td_umtxq;
 5106         if (uq != NULL) {
 5107                 if (uq->uq_inherited_pri != PRI_MAX ||
 5108                     !TAILQ_EMPTY(&uq->uq_pi_contested)) {
 5109                         mtx_lock(&umtx_lock);
 5110                         uq->uq_inherited_pri = PRI_MAX;
 5111                         while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
 5112                                 pi->pi_owner = NULL;
 5113                                 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
 5114                         }
 5115                         mtx_unlock(&umtx_lock);
 5116                 }
 5117                 sched_lend_user_prio_cond(td, PRI_MAX);
 5118         }
 5119 
 5120         compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0;
 5121         td->td_pflags2 &= ~TDP2_COMPAT32RB;
 5122 
 5123         if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0)
 5124                 return;
 5125 
 5126         /*
 5127          * Handle terminated robust mutexes.  Must be done after
 5128          * robust pi disown, otherwise unlock could see unowned
 5129          * entries.
 5130          */
 5131         rb_inact = td->td_rb_inact;
 5132         if (rb_inact != 0)
 5133                 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32);
 5134         umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32);
 5135         umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32);
 5136         if (rb_inact != 0)
 5137                 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32);
 5138 }

Cache object: 30ff82f82b413740d36f7a055b0638fc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.