The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2015, 2016 The FreeBSD Foundation
    5  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
    6  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
    7  * All rights reserved.
    8  *
    9  * Portions of this software were developed by Konstantin Belousov
   10  * under sponsorship from the FreeBSD Foundation.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice unmodified, this list of conditions, and the following
   17  *    disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   27  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include "opt_umtx_profiling.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/kernel.h>
   41 #include <sys/fcntl.h>
   42 #include <sys/file.h>
   43 #include <sys/filedesc.h>
   44 #include <sys/limits.h>
   45 #include <sys/lock.h>
   46 #include <sys/malloc.h>
   47 #include <sys/mman.h>
   48 #include <sys/mutex.h>
   49 #include <sys/priv.h>
   50 #include <sys/proc.h>
   51 #include <sys/resource.h>
   52 #include <sys/resourcevar.h>
   53 #include <sys/rwlock.h>
   54 #include <sys/sbuf.h>
   55 #include <sys/sched.h>
   56 #include <sys/smp.h>
   57 #include <sys/sysctl.h>
   58 #include <sys/systm.h>
   59 #include <sys/sysproto.h>
   60 #include <sys/syscallsubr.h>
   61 #include <sys/taskqueue.h>
   62 #include <sys/time.h>
   63 #include <sys/eventhandler.h>
   64 #include <sys/umtx.h>
   65 #include <sys/umtxvar.h>
   66 
   67 #include <security/mac/mac_framework.h>
   68 
   69 #include <vm/vm.h>
   70 #include <vm/vm_param.h>
   71 #include <vm/pmap.h>
   72 #include <vm/vm_map.h>
   73 #include <vm/vm_object.h>
   74 
   75 #include <machine/atomic.h>
   76 #include <machine/cpu.h>
   77 
   78 #include <compat/freebsd32/freebsd32.h>
   79 #ifdef COMPAT_FREEBSD32
   80 #include <compat/freebsd32/freebsd32_proto.h>
   81 #endif
   82 
   83 #define _UMUTEX_TRY             1
   84 #define _UMUTEX_WAIT            2
   85 
   86 #ifdef UMTX_PROFILING
   87 #define UPROF_PERC_BIGGER(w, f, sw, sf)                                 \
   88         (((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
   89 #endif
   90 
   91 #define UMTXQ_LOCKED_ASSERT(uc)         mtx_assert(&(uc)->uc_lock, MA_OWNED)
   92 #ifdef INVARIANTS
   93 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {                              \
   94         struct umtxq_chain *uc;                                         \
   95                                                                         \
   96         uc = umtxq_getchain(key);                                       \
   97         mtx_assert(&uc->uc_lock, MA_OWNED);                             \
   98         KASSERT(uc->uc_busy != 0, ("umtx chain is not busy"));          \
   99 } while (0)
  100 #else
  101 #define UMTXQ_ASSERT_LOCKED_BUSY(key) do {} while (0)
  102 #endif
  103 
  104 /*
  105  * Don't propagate time-sharing priority, there is a security reason,
  106  * a user can simply introduce PI-mutex, let thread A lock the mutex,
  107  * and let another thread B block on the mutex, because B is
  108  * sleeping, its priority will be boosted, this causes A's priority to
  109  * be boosted via priority propagating too and will never be lowered even
  110  * if it is using 100%CPU, this is unfair to other processes.
  111  */
  112 
  113 #define UPRI(td)        (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
  114                           (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
  115                          PRI_MAX_TIMESHARE : (td)->td_user_pri)
  116 
  117 #define GOLDEN_RATIO_PRIME      2654404609U
  118 #ifndef UMTX_CHAINS
  119 #define UMTX_CHAINS             512
  120 #endif
  121 #define UMTX_SHIFTS             (__WORD_BIT - 9)
  122 
  123 #define GET_SHARE(flags)        \
  124     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
  125 
  126 #define BUSY_SPINS              200
  127 
  128 struct umtx_copyops {
  129         int     (*copyin_timeout)(const void *uaddr, struct timespec *tsp);
  130         int     (*copyin_umtx_time)(const void *uaddr, size_t size,
  131             struct _umtx_time *tp);
  132         int     (*copyin_robust_lists)(const void *uaddr, size_t size,
  133             struct umtx_robust_lists_params *rbp);
  134         int     (*copyout_timeout)(void *uaddr, size_t size,
  135             struct timespec *tsp);
  136         const size_t    timespec_sz;
  137         const size_t    umtx_time_sz;
  138         const bool      compat32;
  139 };
  140 
  141 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32");
  142 _Static_assert(__offsetof(struct umutex, m_spare[0]) ==
  143     __offsetof(struct umutex32, m_spare[0]), "m_spare32");
  144 
  145 int umtx_shm_vnobj_persistent = 0;
  146 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN,
  147     &umtx_shm_vnobj_persistent, 0,
  148     "False forces destruction of umtx attached to file, on last close");
  149 static int umtx_max_rb = 1000;
  150 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN,
  151     &umtx_max_rb, 0,
  152     "Maximum number of robust mutexes allowed for each thread");
  153 
  154 static uma_zone_t               umtx_pi_zone;
  155 static struct umtxq_chain       umtxq_chains[2][UMTX_CHAINS];
  156 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
  157 static int                      umtx_pi_allocated;
  158 
  159 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  160     "umtx debug");
  161 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
  162     &umtx_pi_allocated, 0, "Allocated umtx_pi");
  163 static int umtx_verbose_rb = 1;
  164 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN,
  165     &umtx_verbose_rb, 0,
  166     "");
  167 
  168 #ifdef UMTX_PROFILING
  169 static long max_length;
  170 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
  171 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  172     "umtx chain stats");
  173 #endif
  174 
  175 static inline void umtx_abs_timeout_init2(struct umtx_abs_timeout *timo,
  176     const struct _umtx_time *umtxtime);
  177 
  178 static void umtx_shm_init(void);
  179 static void umtxq_sysinit(void *);
  180 static void umtxq_hash(struct umtx_key *key);
  181 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags,
  182     bool rb);
  183 static void umtx_thread_cleanup(struct thread *td);
  184 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
  185 
  186 #define umtxq_signal(key, nwake)        umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
  187 
  188 static struct mtx umtx_lock;
  189 
  190 #ifdef UMTX_PROFILING
  191 static void
  192 umtx_init_profiling(void)
  193 {
  194         struct sysctl_oid *chain_oid;
  195         char chain_name[10];
  196         int i;
  197 
  198         for (i = 0; i < UMTX_CHAINS; ++i) {
  199                 snprintf(chain_name, sizeof(chain_name), "%d", i);
  200                 chain_oid = SYSCTL_ADD_NODE(NULL,
  201                     SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
  202                     chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
  203                     "umtx hash stats");
  204                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  205                     "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
  206                 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  207                     "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
  208         }
  209 }
  210 
  211 static int
  212 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
  213 {
  214         char buf[512];
  215         struct sbuf sb;
  216         struct umtxq_chain *uc;
  217         u_int fract, i, j, tot, whole;
  218         u_int sf0, sf1, sf2, sf3, sf4;
  219         u_int si0, si1, si2, si3, si4;
  220         u_int sw0, sw1, sw2, sw3, sw4;
  221 
  222         sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
  223         for (i = 0; i < 2; i++) {
  224                 tot = 0;
  225                 for (j = 0; j < UMTX_CHAINS; ++j) {
  226                         uc = &umtxq_chains[i][j];
  227                         mtx_lock(&uc->uc_lock);
  228                         tot += uc->max_length;
  229                         mtx_unlock(&uc->uc_lock);
  230                 }
  231                 if (tot == 0)
  232                         sbuf_printf(&sb, "%u) Empty ", i);
  233                 else {
  234                         sf0 = sf1 = sf2 = sf3 = sf4 = 0;
  235                         si0 = si1 = si2 = si3 = si4 = 0;
  236                         sw0 = sw1 = sw2 = sw3 = sw4 = 0;
  237                         for (j = 0; j < UMTX_CHAINS; j++) {
  238                                 uc = &umtxq_chains[i][j];
  239                                 mtx_lock(&uc->uc_lock);
  240                                 whole = uc->max_length * 100;
  241                                 mtx_unlock(&uc->uc_lock);
  242                                 fract = (whole % tot) * 100;
  243                                 if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
  244                                         sf0 = fract;
  245                                         si0 = j;
  246                                         sw0 = whole;
  247                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw1,
  248                                     sf1)) {
  249                                         sf1 = fract;
  250                                         si1 = j;
  251                                         sw1 = whole;
  252                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw2,
  253                                     sf2)) {
  254                                         sf2 = fract;
  255                                         si2 = j;
  256                                         sw2 = whole;
  257                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw3,
  258                                     sf3)) {
  259                                         sf3 = fract;
  260                                         si3 = j;
  261                                         sw3 = whole;
  262                                 } else if (UPROF_PERC_BIGGER(whole, fract, sw4,
  263                                     sf4)) {
  264                                         sf4 = fract;
  265                                         si4 = j;
  266                                         sw4 = whole;
  267                                 }
  268                         }
  269                         sbuf_printf(&sb, "queue %u:\n", i);
  270                         sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
  271                             sf0 / tot, si0);
  272                         sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
  273                             sf1 / tot, si1);
  274                         sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
  275                             sf2 / tot, si2);
  276                         sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
  277                             sf3 / tot, si3);
  278                         sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
  279                             sf4 / tot, si4);
  280                 }
  281         }
  282         sbuf_trim(&sb);
  283         sbuf_finish(&sb);
  284         sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
  285         sbuf_delete(&sb);
  286         return (0);
  287 }
  288 
  289 static int
  290 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
  291 {
  292         struct umtxq_chain *uc;
  293         u_int i, j;
  294         int clear, error;
  295 
  296         clear = 0;
  297         error = sysctl_handle_int(oidp, &clear, 0, req);
  298         if (error != 0 || req->newptr == NULL)
  299                 return (error);
  300 
  301         if (clear != 0) {
  302                 for (i = 0; i < 2; ++i) {
  303                         for (j = 0; j < UMTX_CHAINS; ++j) {
  304                                 uc = &umtxq_chains[i][j];
  305                                 mtx_lock(&uc->uc_lock);
  306                                 uc->length = 0;
  307                                 uc->max_length = 0;
  308                                 mtx_unlock(&uc->uc_lock);
  309                         }
  310                 }
  311         }
  312         return (0);
  313 }
  314 
  315 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
  316     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
  317     sysctl_debug_umtx_chains_clear, "I",
  318     "Clear umtx chains statistics");
  319 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
  320     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
  321     sysctl_debug_umtx_chains_peaks, "A",
  322     "Highest peaks in chains max length");
  323 #endif
  324 
  325 static void
  326 umtxq_sysinit(void *arg __unused)
  327 {
  328         int i, j;
  329 
  330         umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
  331                 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  332         for (i = 0; i < 2; ++i) {
  333                 for (j = 0; j < UMTX_CHAINS; ++j) {
  334                         mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
  335                                  MTX_DEF | MTX_DUPOK);
  336                         LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
  337                         LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
  338                         LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
  339                         TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
  340                         umtxq_chains[i][j].uc_busy = 0;
  341                         umtxq_chains[i][j].uc_waiters = 0;
  342 #ifdef UMTX_PROFILING
  343                         umtxq_chains[i][j].length = 0;
  344                         umtxq_chains[i][j].max_length = 0;
  345 #endif
  346                 }
  347         }
  348 #ifdef UMTX_PROFILING
  349         umtx_init_profiling();
  350 #endif
  351         mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF);
  352         umtx_shm_init();
  353 }
  354 
  355 struct umtx_q *
  356 umtxq_alloc(void)
  357 {
  358         struct umtx_q *uq;
  359 
  360         uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
  361         uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX,
  362             M_WAITOK | M_ZERO);
  363         TAILQ_INIT(&uq->uq_spare_queue->head);
  364         TAILQ_INIT(&uq->uq_pi_contested);
  365         uq->uq_inherited_pri = PRI_MAX;
  366         return (uq);
  367 }
  368 
  369 void
  370 umtxq_free(struct umtx_q *uq)
  371 {
  372 
  373         MPASS(uq->uq_spare_queue != NULL);
  374         free(uq->uq_spare_queue, M_UMTX);
  375         free(uq, M_UMTX);
  376 }
  377 
  378 static inline void
  379 umtxq_hash(struct umtx_key *key)
  380 {
  381         unsigned n;
  382 
  383         n = (uintptr_t)key->info.both.a + key->info.both.b;
  384         key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
  385 }
  386 
  387 struct umtxq_chain *
  388 umtxq_getchain(struct umtx_key *key)
  389 {
  390 
  391         if (key->type <= TYPE_SEM)
  392                 return (&umtxq_chains[1][key->hash]);
  393         return (&umtxq_chains[0][key->hash]);
  394 }
  395 
  396 /*
  397  * Set chain to busy state when following operation
  398  * may be blocked (kernel mutex can not be used).
  399  */
  400 void
  401 umtxq_busy(struct umtx_key *key)
  402 {
  403         struct umtxq_chain *uc;
  404 
  405         uc = umtxq_getchain(key);
  406         mtx_assert(&uc->uc_lock, MA_OWNED);
  407         if (uc->uc_busy) {
  408 #ifdef SMP
  409                 if (smp_cpus > 1) {
  410                         int count = BUSY_SPINS;
  411                         if (count > 0) {
  412                                 umtxq_unlock(key);
  413                                 while (uc->uc_busy && --count > 0)
  414                                         cpu_spinwait();
  415                                 umtxq_lock(key);
  416                         }
  417                 }
  418 #endif
  419                 while (uc->uc_busy) {
  420                         uc->uc_waiters++;
  421                         msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
  422                         uc->uc_waiters--;
  423                 }
  424         }
  425         uc->uc_busy = 1;
  426 }
  427 
  428 /*
  429  * Unbusy a chain.
  430  */
  431 void
  432 umtxq_unbusy(struct umtx_key *key)
  433 {
  434         struct umtxq_chain *uc;
  435 
  436         uc = umtxq_getchain(key);
  437         mtx_assert(&uc->uc_lock, MA_OWNED);
  438         KASSERT(uc->uc_busy != 0, ("not busy"));
  439         uc->uc_busy = 0;
  440         if (uc->uc_waiters)
  441                 wakeup_one(uc);
  442 }
  443 
  444 void
  445 umtxq_unbusy_unlocked(struct umtx_key *key)
  446 {
  447 
  448         umtxq_lock(key);
  449         umtxq_unbusy(key);
  450         umtxq_unlock(key);
  451 }
  452 
  453 static struct umtxq_queue *
  454 umtxq_queue_lookup(struct umtx_key *key, int q)
  455 {
  456         struct umtxq_queue *uh;
  457         struct umtxq_chain *uc;
  458 
  459         uc = umtxq_getchain(key);
  460         UMTXQ_LOCKED_ASSERT(uc);
  461         LIST_FOREACH(uh, &uc->uc_queue[q], link) {
  462                 if (umtx_key_match(&uh->key, key))
  463                         return (uh);
  464         }
  465 
  466         return (NULL);
  467 }
  468 
  469 void
  470 umtxq_insert_queue(struct umtx_q *uq, int q)
  471 {
  472         struct umtxq_queue *uh;
  473         struct umtxq_chain *uc;
  474 
  475         uc = umtxq_getchain(&uq->uq_key);
  476         UMTXQ_LOCKED_ASSERT(uc);
  477         KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
  478         uh = umtxq_queue_lookup(&uq->uq_key, q);
  479         if (uh != NULL) {
  480                 LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
  481         } else {
  482                 uh = uq->uq_spare_queue;
  483                 uh->key = uq->uq_key;
  484                 LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
  485 #ifdef UMTX_PROFILING
  486                 uc->length++;
  487                 if (uc->length > uc->max_length) {
  488                         uc->max_length = uc->length;
  489                         if (uc->max_length > max_length)
  490                                 max_length = uc->max_length;
  491                 }
  492 #endif
  493         }
  494         uq->uq_spare_queue = NULL;
  495 
  496         TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
  497         uh->length++;
  498         uq->uq_flags |= UQF_UMTXQ;
  499         uq->uq_cur_queue = uh;
  500         return;
  501 }
  502 
  503 void
  504 umtxq_remove_queue(struct umtx_q *uq, int q)
  505 {
  506         struct umtxq_chain *uc;
  507         struct umtxq_queue *uh;
  508 
  509         uc = umtxq_getchain(&uq->uq_key);
  510         UMTXQ_LOCKED_ASSERT(uc);
  511         if (uq->uq_flags & UQF_UMTXQ) {
  512                 uh = uq->uq_cur_queue;
  513                 TAILQ_REMOVE(&uh->head, uq, uq_link);
  514                 uh->length--;
  515                 uq->uq_flags &= ~UQF_UMTXQ;
  516                 if (TAILQ_EMPTY(&uh->head)) {
  517                         KASSERT(uh->length == 0,
  518                             ("inconsistent umtxq_queue length"));
  519 #ifdef UMTX_PROFILING
  520                         uc->length--;
  521 #endif
  522                         LIST_REMOVE(uh, link);
  523                 } else {
  524                         uh = LIST_FIRST(&uc->uc_spare_queue);
  525                         KASSERT(uh != NULL, ("uc_spare_queue is empty"));
  526                         LIST_REMOVE(uh, link);
  527                 }
  528                 uq->uq_spare_queue = uh;
  529                 uq->uq_cur_queue = NULL;
  530         }
  531 }
  532 
  533 /*
  534  * Check if there are multiple waiters
  535  */
  536 int
  537 umtxq_count(struct umtx_key *key)
  538 {
  539         struct umtxq_queue *uh;
  540 
  541         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  542         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  543         if (uh != NULL)
  544                 return (uh->length);
  545         return (0);
  546 }
  547 
  548 /*
  549  * Check if there are multiple PI waiters and returns first
  550  * waiter.
  551  */
  552 static int
  553 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
  554 {
  555         struct umtxq_queue *uh;
  556 
  557         *first = NULL;
  558         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  559         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  560         if (uh != NULL) {
  561                 *first = TAILQ_FIRST(&uh->head);
  562                 return (uh->length);
  563         }
  564         return (0);
  565 }
  566 
  567 /*
  568  * Wake up threads waiting on an userland object by a bit mask.
  569  */
  570 int
  571 umtxq_signal_mask(struct umtx_key *key, int n_wake, u_int bitset)
  572 {
  573         struct umtxq_queue *uh;
  574         struct umtx_q *uq, *uq_temp;
  575         int ret;
  576 
  577         ret = 0;
  578         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  579         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  580         if (uh == NULL)
  581                 return (0);
  582         TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) {
  583                 if ((uq->uq_bitset & bitset) == 0)
  584                         continue;
  585                 umtxq_remove_queue(uq, UMTX_SHARED_QUEUE);
  586                 wakeup_one(uq);
  587                 if (++ret >= n_wake)
  588                         break;
  589         }
  590         return (ret);
  591 }
  592 
  593 /*
  594  * Wake up threads waiting on an userland object.
  595  */
  596 
  597 static int
  598 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
  599 {
  600         struct umtxq_queue *uh;
  601         struct umtx_q *uq;
  602         int ret;
  603 
  604         ret = 0;
  605         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  606         uh = umtxq_queue_lookup(key, q);
  607         if (uh != NULL) {
  608                 while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
  609                         umtxq_remove_queue(uq, q);
  610                         wakeup(uq);
  611                         if (++ret >= n_wake)
  612                                 return (ret);
  613                 }
  614         }
  615         return (ret);
  616 }
  617 
  618 /*
  619  * Wake up specified thread.
  620  */
  621 static inline void
  622 umtxq_signal_thread(struct umtx_q *uq)
  623 {
  624 
  625         UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key));
  626         umtxq_remove(uq);
  627         wakeup(uq);
  628 }
  629 
  630 /*
  631  * Wake up a maximum of n_wake threads that are waiting on an userland
  632  * object identified by key. The remaining threads are removed from queue
  633  * identified by key and added to the queue identified by key2 (requeued).
  634  * The n_requeue specifies an upper limit on the number of threads that
  635  * are requeued to the second queue.
  636  */
  637 int
  638 umtxq_requeue(struct umtx_key *key, int n_wake, struct umtx_key *key2,
  639     int n_requeue)
  640 {
  641         struct umtxq_queue *uh;
  642         struct umtx_q *uq, *uq_temp;
  643         int ret;
  644 
  645         ret = 0;
  646         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
  647         UMTXQ_LOCKED_ASSERT(umtxq_getchain(key2));
  648         uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
  649         if (uh == NULL)
  650                 return (0);
  651         TAILQ_FOREACH_SAFE(uq, &uh->head, uq_link, uq_temp) {
  652                 if (++ret <= n_wake) {
  653                         umtxq_remove(uq);
  654                         wakeup_one(uq);
  655                 } else {
  656                         umtxq_remove(uq);
  657                         uq->uq_key = *key2;
  658                         umtxq_insert(uq);
  659                         if (ret - n_wake == n_requeue)
  660                                 break;
  661                 }
  662         }
  663         return (ret);
  664 }
  665 
  666 static inline int
  667 tstohz(const struct timespec *tsp)
  668 {
  669         struct timeval tv;
  670 
  671         TIMESPEC_TO_TIMEVAL(&tv, tsp);
  672         return tvtohz(&tv);
  673 }
  674 
  675 void
  676 umtx_abs_timeout_init(struct umtx_abs_timeout *timo, int clockid,
  677     int absolute, const struct timespec *timeout)
  678 {
  679 
  680         timo->clockid = clockid;
  681         if (!absolute) {
  682                 timo->is_abs_real = false;
  683                 kern_clock_gettime(curthread, timo->clockid, &timo->cur);
  684                 timespecadd(&timo->cur, timeout, &timo->end);
  685         } else {
  686                 timo->end = *timeout;
  687                 timo->is_abs_real = clockid == CLOCK_REALTIME ||
  688                     clockid == CLOCK_REALTIME_FAST ||
  689                     clockid == CLOCK_REALTIME_PRECISE ||
  690                     clockid == CLOCK_SECOND;
  691         }
  692 }
  693 
  694 static void
  695 umtx_abs_timeout_init2(struct umtx_abs_timeout *timo,
  696     const struct _umtx_time *umtxtime)
  697 {
  698 
  699         umtx_abs_timeout_init(timo, umtxtime->_clockid,
  700             (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout);
  701 }
  702 
  703 static int
  704 umtx_abs_timeout_getsbt(struct umtx_abs_timeout *timo, sbintime_t *sbt,
  705     int *flags)
  706 {
  707         struct bintime bt, bbt;
  708         struct timespec tts;
  709         sbintime_t rem;
  710 
  711         switch (timo->clockid) {
  712 
  713         /* Clocks that can be converted into absolute time. */
  714         case CLOCK_REALTIME:
  715         case CLOCK_REALTIME_PRECISE:
  716         case CLOCK_REALTIME_FAST:
  717         case CLOCK_MONOTONIC:
  718         case CLOCK_MONOTONIC_PRECISE:
  719         case CLOCK_MONOTONIC_FAST:
  720         case CLOCK_UPTIME:
  721         case CLOCK_UPTIME_PRECISE:
  722         case CLOCK_UPTIME_FAST:
  723         case CLOCK_SECOND:
  724                 timespec2bintime(&timo->end, &bt);
  725                 switch (timo->clockid) {
  726                 case CLOCK_REALTIME:
  727                 case CLOCK_REALTIME_PRECISE:
  728                 case CLOCK_REALTIME_FAST:
  729                 case CLOCK_SECOND:
  730                         getboottimebin(&bbt);
  731                         bintime_sub(&bt, &bbt);
  732                         break;
  733                 }
  734                 if (bt.sec < 0)
  735                         return (ETIMEDOUT);
  736                 if (bt.sec >= (SBT_MAX >> 32)) {
  737                         *sbt = 0;
  738                         *flags = 0;
  739                         return (0);
  740                 }
  741                 *sbt = bttosbt(bt);
  742 
  743                 /*
  744                  * Check if the absolute time should be aligned to
  745                  * avoid firing multiple timer events in non-periodic
  746                  * timer mode.
  747                  */
  748                 switch (timo->clockid) {
  749                 case CLOCK_REALTIME_FAST:
  750                 case CLOCK_MONOTONIC_FAST:
  751                 case CLOCK_UPTIME_FAST:
  752                         rem = *sbt % tc_tick_sbt;
  753                         if (__predict_true(rem != 0))
  754                                 *sbt += tc_tick_sbt - rem;
  755                         break;
  756                 case CLOCK_SECOND:
  757                         rem = *sbt % SBT_1S;
  758                         if (__predict_true(rem != 0))
  759                                 *sbt += SBT_1S - rem;
  760                         break;
  761                 }
  762                 *flags = C_ABSOLUTE;
  763                 return (0);
  764 
  765         /* Clocks that has to be periodically polled. */
  766         case CLOCK_VIRTUAL:
  767         case CLOCK_PROF:
  768         case CLOCK_THREAD_CPUTIME_ID:
  769         case CLOCK_PROCESS_CPUTIME_ID:
  770         default:
  771                 kern_clock_gettime(curthread, timo->clockid, &timo->cur);
  772                 if (timespeccmp(&timo->end, &timo->cur, <=))
  773                         return (ETIMEDOUT);
  774                 timespecsub(&timo->end, &timo->cur, &tts);
  775                 *sbt = tick_sbt * tstohz(&tts);
  776                 *flags = C_HARDCLOCK;
  777                 return (0);
  778         }
  779 }
  780 
  781 static uint32_t
  782 umtx_unlock_val(uint32_t flags, bool rb)
  783 {
  784 
  785         if (rb)
  786                 return (UMUTEX_RB_OWNERDEAD);
  787         else if ((flags & UMUTEX_NONCONSISTENT) != 0)
  788                 return (UMUTEX_RB_NOTRECOV);
  789         else
  790                 return (UMUTEX_UNOWNED);
  791 
  792 }
  793 
  794 /*
  795  * Put thread into sleep state, before sleeping, check if
  796  * thread was removed from umtx queue.
  797  */
  798 int
  799 umtxq_sleep(struct umtx_q *uq, const char *wmesg,
  800     struct umtx_abs_timeout *timo)
  801 {
  802         struct umtxq_chain *uc;
  803         sbintime_t sbt = 0;
  804         int error, flags = 0;
  805 
  806         uc = umtxq_getchain(&uq->uq_key);
  807         UMTXQ_LOCKED_ASSERT(uc);
  808         for (;;) {
  809                 if (!(uq->uq_flags & UQF_UMTXQ)) {
  810                         error = 0;
  811                         break;
  812                 }
  813                 if (timo != NULL) {
  814                         if (timo->is_abs_real)
  815                                 curthread->td_rtcgen =
  816                                     atomic_load_acq_int(&rtc_generation);
  817                         error = umtx_abs_timeout_getsbt(timo, &sbt, &flags);
  818                         if (error != 0)
  819                                 break;
  820                 }
  821                 error = msleep_sbt(uq, &uc->uc_lock, PCATCH | PDROP, wmesg,
  822                     sbt, 0, flags);
  823                 uc = umtxq_getchain(&uq->uq_key);
  824                 mtx_lock(&uc->uc_lock);
  825                 if (error == EINTR || error == ERESTART)
  826                         break;
  827                 if (error == EWOULDBLOCK && (flags & C_ABSOLUTE) != 0) {
  828                         error = ETIMEDOUT;
  829                         break;
  830                 }
  831         }
  832 
  833         curthread->td_rtcgen = 0;
  834         return (error);
  835 }
  836 
  837 /*
  838  * Convert userspace address into unique logical address.
  839  */
  840 int
  841 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key)
  842 {
  843         struct thread *td = curthread;
  844         vm_map_t map;
  845         vm_map_entry_t entry;
  846         vm_pindex_t pindex;
  847         vm_prot_t prot;
  848         boolean_t wired;
  849 
  850         key->type = type;
  851         if (share == THREAD_SHARE) {
  852                 key->shared = 0;
  853                 key->info.private.vs = td->td_proc->p_vmspace;
  854                 key->info.private.addr = (uintptr_t)addr;
  855         } else {
  856                 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
  857                 map = &td->td_proc->p_vmspace->vm_map;
  858                 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
  859                     &entry, &key->info.shared.object, &pindex, &prot,
  860                     &wired) != KERN_SUCCESS) {
  861                         return (EFAULT);
  862                 }
  863 
  864                 if ((share == PROCESS_SHARE) ||
  865                     (share == AUTO_SHARE &&
  866                      VM_INHERIT_SHARE == entry->inheritance)) {
  867                         key->shared = 1;
  868                         key->info.shared.offset = (vm_offset_t)addr -
  869                             entry->start + entry->offset;
  870                         vm_object_reference(key->info.shared.object);
  871                 } else {
  872                         key->shared = 0;
  873                         key->info.private.vs = td->td_proc->p_vmspace;
  874                         key->info.private.addr = (uintptr_t)addr;
  875                 }
  876                 vm_map_lookup_done(map, entry);
  877         }
  878 
  879         umtxq_hash(key);
  880         return (0);
  881 }
  882 
  883 /*
  884  * Release key.
  885  */
  886 void
  887 umtx_key_release(struct umtx_key *key)
  888 {
  889         if (key->shared)
  890                 vm_object_deallocate(key->info.shared.object);
  891 }
  892 
  893 #ifdef COMPAT_FREEBSD10
  894 /*
  895  * Lock a umtx object.
  896  */
  897 static int
  898 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
  899     const struct timespec *timeout)
  900 {
  901         struct umtx_abs_timeout timo;
  902         struct umtx_q *uq;
  903         u_long owner;
  904         u_long old;
  905         int error = 0;
  906 
  907         uq = td->td_umtxq;
  908         if (timeout != NULL)
  909                 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
  910 
  911         /*
  912          * Care must be exercised when dealing with umtx structure. It
  913          * can fault on any access.
  914          */
  915         for (;;) {
  916                 /*
  917                  * Try the uncontested case.  This should be done in userland.
  918                  */
  919                 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
  920 
  921                 /* The acquire succeeded. */
  922                 if (owner == UMTX_UNOWNED)
  923                         return (0);
  924 
  925                 /* The address was invalid. */
  926                 if (owner == -1)
  927                         return (EFAULT);
  928 
  929                 /* If no one owns it but it is contested try to acquire it. */
  930                 if (owner == UMTX_CONTESTED) {
  931                         owner = casuword(&umtx->u_owner,
  932                             UMTX_CONTESTED, id | UMTX_CONTESTED);
  933 
  934                         if (owner == UMTX_CONTESTED)
  935                                 return (0);
  936 
  937                         /* The address was invalid. */
  938                         if (owner == -1)
  939                                 return (EFAULT);
  940 
  941                         error = thread_check_susp(td, false);
  942                         if (error != 0)
  943                                 break;
  944 
  945                         /* If this failed the lock has changed, restart. */
  946                         continue;
  947                 }
  948 
  949                 /*
  950                  * If we caught a signal, we have retried and now
  951                  * exit immediately.
  952                  */
  953                 if (error != 0)
  954                         break;
  955 
  956                 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
  957                         AUTO_SHARE, &uq->uq_key)) != 0)
  958                         return (error);
  959 
  960                 umtxq_lock(&uq->uq_key);
  961                 umtxq_busy(&uq->uq_key);
  962                 umtxq_insert(uq);
  963                 umtxq_unbusy(&uq->uq_key);
  964                 umtxq_unlock(&uq->uq_key);
  965 
  966                 /*
  967                  * Set the contested bit so that a release in user space
  968                  * knows to use the system call for unlock.  If this fails
  969                  * either some one else has acquired the lock or it has been
  970                  * released.
  971                  */
  972                 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
  973 
  974                 /* The address was invalid. */
  975                 if (old == -1) {
  976                         umtxq_lock(&uq->uq_key);
  977                         umtxq_remove(uq);
  978                         umtxq_unlock(&uq->uq_key);
  979                         umtx_key_release(&uq->uq_key);
  980                         return (EFAULT);
  981                 }
  982 
  983                 /*
  984                  * We set the contested bit, sleep. Otherwise the lock changed
  985                  * and we need to retry or we lost a race to the thread
  986                  * unlocking the umtx.
  987                  */
  988                 umtxq_lock(&uq->uq_key);
  989                 if (old == owner)
  990                         error = umtxq_sleep(uq, "umtx", timeout == NULL ? NULL :
  991                             &timo);
  992                 umtxq_remove(uq);
  993                 umtxq_unlock(&uq->uq_key);
  994                 umtx_key_release(&uq->uq_key);
  995 
  996                 if (error == 0)
  997                         error = thread_check_susp(td, false);
  998         }
  999 
 1000         if (timeout == NULL) {
 1001                 /* Mutex locking is restarted if it is interrupted. */
 1002                 if (error == EINTR)
 1003                         error = ERESTART;
 1004         } else {
 1005                 /* Timed-locking is not restarted. */
 1006                 if (error == ERESTART)
 1007                         error = EINTR;
 1008         }
 1009         return (error);
 1010 }
 1011 
 1012 /*
 1013  * Unlock a umtx object.
 1014  */
 1015 static int
 1016 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
 1017 {
 1018         struct umtx_key key;
 1019         u_long owner;
 1020         u_long old;
 1021         int error;
 1022         int count;
 1023 
 1024         /*
 1025          * Make sure we own this mtx.
 1026          */
 1027         owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
 1028         if (owner == -1)
 1029                 return (EFAULT);
 1030 
 1031         if ((owner & ~UMTX_CONTESTED) != id)
 1032                 return (EPERM);
 1033 
 1034         /* This should be done in userland */
 1035         if ((owner & UMTX_CONTESTED) == 0) {
 1036                 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
 1037                 if (old == -1)
 1038                         return (EFAULT);
 1039                 if (old == owner)
 1040                         return (0);
 1041                 owner = old;
 1042         }
 1043 
 1044         /* We should only ever be in here for contested locks */
 1045         if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
 1046             &key)) != 0)
 1047                 return (error);
 1048 
 1049         umtxq_lock(&key);
 1050         umtxq_busy(&key);
 1051         count = umtxq_count(&key);
 1052         umtxq_unlock(&key);
 1053 
 1054         /*
 1055          * When unlocking the umtx, it must be marked as unowned if
 1056          * there is zero or one thread only waiting for it.
 1057          * Otherwise, it must be marked as contested.
 1058          */
 1059         old = casuword(&umtx->u_owner, owner,
 1060             count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
 1061         umtxq_lock(&key);
 1062         umtxq_signal(&key,1);
 1063         umtxq_unbusy(&key);
 1064         umtxq_unlock(&key);
 1065         umtx_key_release(&key);
 1066         if (old == -1)
 1067                 return (EFAULT);
 1068         if (old != owner)
 1069                 return (EINVAL);
 1070         return (0);
 1071 }
 1072 
 1073 #ifdef COMPAT_FREEBSD32
 1074 
 1075 /*
 1076  * Lock a umtx object.
 1077  */
 1078 static int
 1079 do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id,
 1080         const struct timespec *timeout)
 1081 {
 1082         struct umtx_abs_timeout timo;
 1083         struct umtx_q *uq;
 1084         uint32_t owner;
 1085         uint32_t old;
 1086         int error = 0;
 1087 
 1088         uq = td->td_umtxq;
 1089 
 1090         if (timeout != NULL)
 1091                 umtx_abs_timeout_init(&timo, CLOCK_REALTIME, 0, timeout);
 1092 
 1093         /*
 1094          * Care must be exercised when dealing with umtx structure. It
 1095          * can fault on any access.
 1096          */
 1097         for (;;) {
 1098                 /*
 1099                  * Try the uncontested case.  This should be done in userland.
 1100                  */
 1101                 owner = casuword32(m, UMUTEX_UNOWNED, id);
 1102 
 1103                 /* The acquire succeeded. */
 1104                 if (owner == UMUTEX_UNOWNED)
 1105                         return (0);
 1106 
 1107                 /* The address was invalid. */
 1108                 if (owner == -1)
 1109                         return (EFAULT);
 1110 
 1111                 /* If no one owns it but it is contested try to acquire it. */
 1112                 if (owner == UMUTEX_CONTESTED) {
 1113                         owner = casuword32(m,
 1114                             UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
 1115                         if (owner == UMUTEX_CONTESTED)
 1116                                 return (0);
 1117 
 1118                         /* The address was invalid. */
 1119                         if (owner == -1)
 1120                                 return (EFAULT);
 1121 
 1122                         error = thread_check_susp(td, false);
 1123                         if (error != 0)
 1124                                 break;
 1125 
 1126                         /* If this failed the lock has changed, restart. */
 1127                         continue;
 1128                 }
 1129 
 1130                 /*
 1131                  * If we caught a signal, we have retried and now
 1132                  * exit immediately.
 1133                  */
 1134                 if (error != 0)
 1135                         return (error);
 1136 
 1137                 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
 1138                         AUTO_SHARE, &uq->uq_key)) != 0)
 1139                         return (error);
 1140 
 1141                 umtxq_lock(&uq->uq_key);
 1142                 umtxq_busy(&uq->uq_key);
 1143                 umtxq_insert(uq);
 1144                 umtxq_unbusy(&uq->uq_key);
 1145                 umtxq_unlock(&uq->uq_key);
 1146 
 1147                 /*
 1148                  * Set the contested bit so that a release in user space
 1149                  * knows to use the system call for unlock.  If this fails
 1150                  * either some one else has acquired the lock or it has been
 1151                  * released.
 1152                  */
 1153                 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
 1154 
 1155                 /* The address was invalid. */
 1156                 if (old == -1) {
 1157                         umtxq_lock(&uq->uq_key);
 1158                         umtxq_remove(uq);
 1159                         umtxq_unlock(&uq->uq_key);
 1160                         umtx_key_release(&uq->uq_key);
 1161                         return (EFAULT);
 1162                 }
 1163 
 1164                 /*
 1165                  * We set the contested bit, sleep. Otherwise the lock changed
 1166                  * and we need to retry or we lost a race to the thread
 1167                  * unlocking the umtx.
 1168                  */
 1169                 umtxq_lock(&uq->uq_key);
 1170                 if (old == owner)
 1171                         error = umtxq_sleep(uq, "umtx", timeout == NULL ?
 1172                             NULL : &timo);
 1173                 umtxq_remove(uq);
 1174                 umtxq_unlock(&uq->uq_key);
 1175                 umtx_key_release(&uq->uq_key);
 1176 
 1177                 if (error == 0)
 1178                         error = thread_check_susp(td, false);
 1179         }
 1180 
 1181         if (timeout == NULL) {
 1182                 /* Mutex locking is restarted if it is interrupted. */
 1183                 if (error == EINTR)
 1184                         error = ERESTART;
 1185         } else {
 1186                 /* Timed-locking is not restarted. */
 1187                 if (error == ERESTART)
 1188                         error = EINTR;
 1189         }
 1190         return (error);
 1191 }
 1192 
 1193 /*
 1194  * Unlock a umtx object.
 1195  */
 1196 static int
 1197 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
 1198 {
 1199         struct umtx_key key;
 1200         uint32_t owner;
 1201         uint32_t old;
 1202         int error;
 1203         int count;
 1204 
 1205         /*
 1206          * Make sure we own this mtx.
 1207          */
 1208         owner = fuword32(m);
 1209         if (owner == -1)
 1210                 return (EFAULT);
 1211 
 1212         if ((owner & ~UMUTEX_CONTESTED) != id)
 1213                 return (EPERM);
 1214 
 1215         /* This should be done in userland */
 1216         if ((owner & UMUTEX_CONTESTED) == 0) {
 1217                 old = casuword32(m, owner, UMUTEX_UNOWNED);
 1218                 if (old == -1)
 1219                         return (EFAULT);
 1220                 if (old == owner)
 1221                         return (0);
 1222                 owner = old;
 1223         }
 1224 
 1225         /* We should only ever be in here for contested locks */
 1226         if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
 1227                 &key)) != 0)
 1228                 return (error);
 1229 
 1230         umtxq_lock(&key);
 1231         umtxq_busy(&key);
 1232         count = umtxq_count(&key);
 1233         umtxq_unlock(&key);
 1234 
 1235         /*
 1236          * When unlocking the umtx, it must be marked as unowned if
 1237          * there is zero or one thread only waiting for it.
 1238          * Otherwise, it must be marked as contested.
 1239          */
 1240         old = casuword32(m, owner,
 1241                 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
 1242         umtxq_lock(&key);
 1243         umtxq_signal(&key,1);
 1244         umtxq_unbusy(&key);
 1245         umtxq_unlock(&key);
 1246         umtx_key_release(&key);
 1247         if (old == -1)
 1248                 return (EFAULT);
 1249         if (old != owner)
 1250                 return (EINVAL);
 1251         return (0);
 1252 }
 1253 #endif  /* COMPAT_FREEBSD32 */
 1254 #endif  /* COMPAT_FREEBSD10 */
 1255 
 1256 /*
 1257  * Fetch and compare value, sleep on the address if value is not changed.
 1258  */
 1259 static int
 1260 do_wait(struct thread *td, void *addr, u_long id,
 1261     struct _umtx_time *timeout, int compat32, int is_private)
 1262 {
 1263         struct umtx_abs_timeout timo;
 1264         struct umtx_q *uq;
 1265         u_long tmp;
 1266         uint32_t tmp32;
 1267         int error = 0;
 1268 
 1269         uq = td->td_umtxq;
 1270         if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
 1271             is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
 1272                 return (error);
 1273 
 1274         if (timeout != NULL)
 1275                 umtx_abs_timeout_init2(&timo, timeout);
 1276 
 1277         umtxq_lock(&uq->uq_key);
 1278         umtxq_insert(uq);
 1279         umtxq_unlock(&uq->uq_key);
 1280         if (compat32 == 0) {
 1281                 error = fueword(addr, &tmp);
 1282                 if (error != 0)
 1283                         error = EFAULT;
 1284         } else {
 1285                 error = fueword32(addr, &tmp32);
 1286                 if (error == 0)
 1287                         tmp = tmp32;
 1288                 else
 1289                         error = EFAULT;
 1290         }
 1291         umtxq_lock(&uq->uq_key);
 1292         if (error == 0) {
 1293                 if (tmp == id)
 1294                         error = umtxq_sleep(uq, "uwait", timeout == NULL ?
 1295                             NULL : &timo);
 1296                 if ((uq->uq_flags & UQF_UMTXQ) == 0)
 1297                         error = 0;
 1298                 else
 1299                         umtxq_remove(uq);
 1300         } else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
 1301                 umtxq_remove(uq);
 1302         }
 1303         umtxq_unlock(&uq->uq_key);
 1304         umtx_key_release(&uq->uq_key);
 1305         if (error == ERESTART)
 1306                 error = EINTR;
 1307         return (error);
 1308 }
 1309 
 1310 /*
 1311  * Wake up threads sleeping on the specified address.
 1312  */
 1313 int
 1314 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
 1315 {
 1316         struct umtx_key key;
 1317         int ret;
 1318 
 1319         if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
 1320             is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
 1321                 return (ret);
 1322         umtxq_lock(&key);
 1323         umtxq_signal(&key, n_wake);
 1324         umtxq_unlock(&key);
 1325         umtx_key_release(&key);
 1326         return (0);
 1327 }
 1328 
 1329 /*
 1330  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1331  */
 1332 static int
 1333 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
 1334     struct _umtx_time *timeout, int mode)
 1335 {
 1336         struct umtx_abs_timeout timo;
 1337         struct umtx_q *uq;
 1338         uint32_t owner, old, id;
 1339         int error, rv;
 1340 
 1341         id = td->td_tid;
 1342         uq = td->td_umtxq;
 1343         error = 0;
 1344         if (timeout != NULL)
 1345                 umtx_abs_timeout_init2(&timo, timeout);
 1346 
 1347         /*
 1348          * Care must be exercised when dealing with umtx structure. It
 1349          * can fault on any access.
 1350          */
 1351         for (;;) {
 1352                 rv = fueword32(&m->m_owner, &owner);
 1353                 if (rv == -1)
 1354                         return (EFAULT);
 1355                 if (mode == _UMUTEX_WAIT) {
 1356                         if (owner == UMUTEX_UNOWNED ||
 1357                             owner == UMUTEX_CONTESTED ||
 1358                             owner == UMUTEX_RB_OWNERDEAD ||
 1359                             owner == UMUTEX_RB_NOTRECOV)
 1360                                 return (0);
 1361                 } else {
 1362                         /*
 1363                          * Robust mutex terminated.  Kernel duty is to
 1364                          * return EOWNERDEAD to the userspace.  The
 1365                          * umutex.m_flags UMUTEX_NONCONSISTENT is set
 1366                          * by the common userspace code.
 1367                          */
 1368                         if (owner == UMUTEX_RB_OWNERDEAD) {
 1369                                 rv = casueword32(&m->m_owner,
 1370                                     UMUTEX_RB_OWNERDEAD, &owner,
 1371                                     id | UMUTEX_CONTESTED);
 1372                                 if (rv == -1)
 1373                                         return (EFAULT);
 1374                                 if (rv == 0) {
 1375                                         MPASS(owner == UMUTEX_RB_OWNERDEAD);
 1376                                         return (EOWNERDEAD); /* success */
 1377                                 }
 1378                                 MPASS(rv == 1);
 1379                                 rv = thread_check_susp(td, false);
 1380                                 if (rv != 0)
 1381                                         return (rv);
 1382                                 continue;
 1383                         }
 1384                         if (owner == UMUTEX_RB_NOTRECOV)
 1385                                 return (ENOTRECOVERABLE);
 1386 
 1387                         /*
 1388                          * Try the uncontested case.  This should be
 1389                          * done in userland.
 1390                          */
 1391                         rv = casueword32(&m->m_owner, UMUTEX_UNOWNED,
 1392                             &owner, id);
 1393                         /* The address was invalid. */
 1394                         if (rv == -1)
 1395                                 return (EFAULT);
 1396 
 1397                         /* The acquire succeeded. */
 1398                         if (rv == 0) {
 1399                                 MPASS(owner == UMUTEX_UNOWNED);
 1400                                 return (0);
 1401                         }
 1402 
 1403                         /*
 1404                          * If no one owns it but it is contested try
 1405                          * to acquire it.
 1406                          */
 1407                         MPASS(rv == 1);
 1408                         if (owner == UMUTEX_CONTESTED) {
 1409                                 rv = casueword32(&m->m_owner,
 1410                                     UMUTEX_CONTESTED, &owner,
 1411                                     id | UMUTEX_CONTESTED);
 1412                                 /* The address was invalid. */
 1413                                 if (rv == -1)
 1414                                         return (EFAULT);
 1415                                 if (rv == 0) {
 1416                                         MPASS(owner == UMUTEX_CONTESTED);
 1417                                         return (0);
 1418                                 }
 1419                                 if (rv == 1) {
 1420                                         rv = thread_check_susp(td, false);
 1421                                         if (rv != 0)
 1422                                                 return (rv);
 1423                                 }
 1424 
 1425                                 /*
 1426                                  * If this failed the lock has
 1427                                  * changed, restart.
 1428                                  */
 1429                                 continue;
 1430                         }
 1431 
 1432                         /* rv == 1 but not contested, likely store failure */
 1433                         rv = thread_check_susp(td, false);
 1434                         if (rv != 0)
 1435                                 return (rv);
 1436                 }
 1437 
 1438                 if (mode == _UMUTEX_TRY)
 1439                         return (EBUSY);
 1440 
 1441                 /*
 1442                  * If we caught a signal, we have retried and now
 1443                  * exit immediately.
 1444                  */
 1445                 if (error != 0)
 1446                         return (error);
 1447 
 1448                 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
 1449                     GET_SHARE(flags), &uq->uq_key)) != 0)
 1450                         return (error);
 1451 
 1452                 umtxq_lock(&uq->uq_key);
 1453                 umtxq_busy(&uq->uq_key);
 1454                 umtxq_insert(uq);
 1455                 umtxq_unlock(&uq->uq_key);
 1456 
 1457                 /*
 1458                  * Set the contested bit so that a release in user space
 1459                  * knows to use the system call for unlock.  If this fails
 1460                  * either some one else has acquired the lock or it has been
 1461                  * released.
 1462                  */
 1463                 rv = casueword32(&m->m_owner, owner, &old,
 1464                     owner | UMUTEX_CONTESTED);
 1465 
 1466                 /* The address was invalid or casueword failed to store. */
 1467                 if (rv == -1 || rv == 1) {
 1468                         umtxq_lock(&uq->uq_key);
 1469                         umtxq_remove(uq);
 1470                         umtxq_unbusy(&uq->uq_key);
 1471                         umtxq_unlock(&uq->uq_key);
 1472                         umtx_key_release(&uq->uq_key);
 1473                         if (rv == -1)
 1474                                 return (EFAULT);
 1475                         if (rv == 1) {
 1476                                 rv = thread_check_susp(td, false);
 1477                                 if (rv != 0)
 1478                                         return (rv);
 1479                         }
 1480                         continue;
 1481                 }
 1482 
 1483                 /*
 1484                  * We set the contested bit, sleep. Otherwise the lock changed
 1485                  * and we need to retry or we lost a race to the thread
 1486                  * unlocking the umtx.
 1487                  */
 1488                 umtxq_lock(&uq->uq_key);
 1489                 umtxq_unbusy(&uq->uq_key);
 1490                 MPASS(old == owner);
 1491                 error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
 1492                     NULL : &timo);
 1493                 umtxq_remove(uq);
 1494                 umtxq_unlock(&uq->uq_key);
 1495                 umtx_key_release(&uq->uq_key);
 1496 
 1497                 if (error == 0)
 1498                         error = thread_check_susp(td, false);
 1499         }
 1500 
 1501         return (0);
 1502 }
 1503 
 1504 /*
 1505  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
 1506  */
 1507 static int
 1508 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
 1509 {
 1510         struct umtx_key key;
 1511         uint32_t owner, old, id, newlock;
 1512         int error, count;
 1513 
 1514         id = td->td_tid;
 1515 
 1516 again:
 1517         /*
 1518          * Make sure we own this mtx.
 1519          */
 1520         error = fueword32(&m->m_owner, &owner);
 1521         if (error == -1)
 1522                 return (EFAULT);
 1523 
 1524         if ((owner & ~UMUTEX_CONTESTED) != id)
 1525                 return (EPERM);
 1526 
 1527         newlock = umtx_unlock_val(flags, rb);
 1528         if ((owner & UMUTEX_CONTESTED) == 0) {
 1529                 error = casueword32(&m->m_owner, owner, &old, newlock);
 1530                 if (error == -1)
 1531                         return (EFAULT);
 1532                 if (error == 1) {
 1533                         error = thread_check_susp(td, false);
 1534                         if (error != 0)
 1535                                 return (error);
 1536                         goto again;
 1537                 }
 1538                 MPASS(old == owner);
 1539                 return (0);
 1540         }
 1541 
 1542         /* We should only ever be in here for contested locks */
 1543         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1544             &key)) != 0)
 1545                 return (error);
 1546 
 1547         umtxq_lock(&key);
 1548         umtxq_busy(&key);
 1549         count = umtxq_count(&key);
 1550         umtxq_unlock(&key);
 1551 
 1552         /*
 1553          * When unlocking the umtx, it must be marked as unowned if
 1554          * there is zero or one thread only waiting for it.
 1555          * Otherwise, it must be marked as contested.
 1556          */
 1557         if (count > 1)
 1558                 newlock |= UMUTEX_CONTESTED;
 1559         error = casueword32(&m->m_owner, owner, &old, newlock);
 1560         umtxq_lock(&key);
 1561         umtxq_signal(&key, 1);
 1562         umtxq_unbusy(&key);
 1563         umtxq_unlock(&key);
 1564         umtx_key_release(&key);
 1565         if (error == -1)
 1566                 return (EFAULT);
 1567         if (error == 1) {
 1568                 if (old != owner)
 1569                         return (EINVAL);
 1570                 error = thread_check_susp(td, false);
 1571                 if (error != 0)
 1572                         return (error);
 1573                 goto again;
 1574         }
 1575         return (0);
 1576 }
 1577 
 1578 /*
 1579  * Check if the mutex is available and wake up a waiter,
 1580  * only for simple mutex.
 1581  */
 1582 static int
 1583 do_wake_umutex(struct thread *td, struct umutex *m)
 1584 {
 1585         struct umtx_key key;
 1586         uint32_t owner;
 1587         uint32_t flags;
 1588         int error;
 1589         int count;
 1590 
 1591 again:
 1592         error = fueword32(&m->m_owner, &owner);
 1593         if (error == -1)
 1594                 return (EFAULT);
 1595 
 1596         if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD &&
 1597             owner != UMUTEX_RB_NOTRECOV)
 1598                 return (0);
 1599 
 1600         error = fueword32(&m->m_flags, &flags);
 1601         if (error == -1)
 1602                 return (EFAULT);
 1603 
 1604         /* We should only ever be in here for contested locks */
 1605         if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
 1606             &key)) != 0)
 1607                 return (error);
 1608 
 1609         umtxq_lock(&key);
 1610         umtxq_busy(&key);
 1611         count = umtxq_count(&key);
 1612         umtxq_unlock(&key);
 1613 
 1614         if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD &&
 1615             owner != UMUTEX_RB_NOTRECOV) {
 1616                 error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
 1617                     UMUTEX_UNOWNED);
 1618                 if (error == -1) {
 1619                         error = EFAULT;
 1620                 } else if (error == 1) {
 1621                         umtxq_lock(&key);
 1622                         umtxq_unbusy(&key);
 1623                         umtxq_unlock(&key);
 1624                         umtx_key_release(&key);
 1625                         error = thread_check_susp(td, false);
 1626                         if (error != 0)
 1627                                 return (error);
 1628                         goto again;
 1629                 }
 1630         }
 1631 
 1632         umtxq_lock(&key);
 1633         if (error == 0 && count != 0) {
 1634                 MPASS((owner & ~UMUTEX_CONTESTED) == 0 ||
 1635                     owner == UMUTEX_RB_OWNERDEAD ||
 1636                     owner == UMUTEX_RB_NOTRECOV);
 1637                 umtxq_signal(&key, 1);
 1638         }
 1639         umtxq_unbusy(&key);
 1640         umtxq_unlock(&key);
 1641         umtx_key_release(&key);
 1642         return (error);
 1643 }
 1644 
 1645 /*
 1646  * Check if the mutex has waiters and tries to fix contention bit.
 1647  */
 1648 static int
 1649 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
 1650 {
 1651         struct umtx_key key;
 1652         uint32_t owner, old;
 1653         int type;
 1654         int error;
 1655         int count;
 1656 
 1657         switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT |
 1658             UMUTEX_ROBUST)) {
 1659         case 0:
 1660         case UMUTEX_ROBUST:
 1661                 type = TYPE_NORMAL_UMUTEX;
 1662                 break;
 1663         case UMUTEX_PRIO_INHERIT:
 1664                 type = TYPE_PI_UMUTEX;
 1665                 break;
 1666         case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST):
 1667                 type = TYPE_PI_ROBUST_UMUTEX;
 1668                 break;
 1669         case UMUTEX_PRIO_PROTECT:
 1670                 type = TYPE_PP_UMUTEX;
 1671                 break;
 1672         case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST):
 1673                 type = TYPE_PP_ROBUST_UMUTEX;
 1674                 break;
 1675         default:
 1676                 return (EINVAL);
 1677         }
 1678         if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0)
 1679                 return (error);
 1680 
 1681         owner = 0;
 1682         umtxq_lock(&key);
 1683         umtxq_busy(&key);
 1684         count = umtxq_count(&key);
 1685         umtxq_unlock(&key);
 1686 
 1687         error = fueword32(&m->m_owner, &owner);
 1688         if (error == -1)
 1689                 error = EFAULT;
 1690 
 1691         /*
 1692          * Only repair contention bit if there is a waiter, this means
 1693          * the mutex is still being referenced by userland code,
 1694          * otherwise don't update any memory.
 1695          */
 1696         while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 &&
 1697             (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) {
 1698                 error = casueword32(&m->m_owner, owner, &old,
 1699                     owner | UMUTEX_CONTESTED);
 1700                 if (error == -1) {
 1701                         error = EFAULT;
 1702                         break;
 1703                 }
 1704                 if (error == 0) {
 1705                         MPASS(old == owner);
 1706                         break;
 1707                 }
 1708                 owner = old;
 1709                 error = thread_check_susp(td, false);
 1710         }
 1711 
 1712         umtxq_lock(&key);
 1713         if (error == EFAULT) {
 1714                 umtxq_signal(&key, INT_MAX);
 1715         } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
 1716             owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
 1717                 umtxq_signal(&key, 1);
 1718         umtxq_unbusy(&key);
 1719         umtxq_unlock(&key);
 1720         umtx_key_release(&key);
 1721         return (error);
 1722 }
 1723 
 1724 struct umtx_pi *
 1725 umtx_pi_alloc(int flags)
 1726 {
 1727         struct umtx_pi *pi;
 1728 
 1729         pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
 1730         TAILQ_INIT(&pi->pi_blocked);
 1731         atomic_add_int(&umtx_pi_allocated, 1);
 1732         return (pi);
 1733 }
 1734 
 1735 void
 1736 umtx_pi_free(struct umtx_pi *pi)
 1737 {
 1738         uma_zfree(umtx_pi_zone, pi);
 1739         atomic_add_int(&umtx_pi_allocated, -1);
 1740 }
 1741 
 1742 /*
 1743  * Adjust the thread's position on a pi_state after its priority has been
 1744  * changed.
 1745  */
 1746 static int
 1747 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
 1748 {
 1749         struct umtx_q *uq, *uq1, *uq2;
 1750         struct thread *td1;
 1751 
 1752         mtx_assert(&umtx_lock, MA_OWNED);
 1753         if (pi == NULL)
 1754                 return (0);
 1755 
 1756         uq = td->td_umtxq;
 1757 
 1758         /*
 1759          * Check if the thread needs to be moved on the blocked chain.
 1760          * It needs to be moved if either its priority is lower than
 1761          * the previous thread or higher than the next thread.
 1762          */
 1763         uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
 1764         uq2 = TAILQ_NEXT(uq, uq_lockq);
 1765         if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
 1766             (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
 1767                 /*
 1768                  * Remove thread from blocked chain and determine where
 1769                  * it should be moved to.
 1770                  */
 1771                 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 1772                 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 1773                         td1 = uq1->uq_thread;
 1774                         MPASS(td1->td_proc->p_magic == P_MAGIC);
 1775                         if (UPRI(td1) > UPRI(td))
 1776                                 break;
 1777                 }
 1778 
 1779                 if (uq1 == NULL)
 1780                         TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 1781                 else
 1782                         TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 1783         }
 1784         return (1);
 1785 }
 1786 
 1787 static struct umtx_pi *
 1788 umtx_pi_next(struct umtx_pi *pi)
 1789 {
 1790         struct umtx_q *uq_owner;
 1791 
 1792         if (pi->pi_owner == NULL)
 1793                 return (NULL);
 1794         uq_owner = pi->pi_owner->td_umtxq;
 1795         if (uq_owner == NULL)
 1796                 return (NULL);
 1797         return (uq_owner->uq_pi_blocked);
 1798 }
 1799 
 1800 /*
 1801  * Floyd's Cycle-Finding Algorithm.
 1802  */
 1803 static bool
 1804 umtx_pi_check_loop(struct umtx_pi *pi)
 1805 {
 1806         struct umtx_pi *pi1;    /* fast iterator */
 1807 
 1808         mtx_assert(&umtx_lock, MA_OWNED);
 1809         if (pi == NULL)
 1810                 return (false);
 1811         pi1 = pi;
 1812         for (;;) {
 1813                 pi = umtx_pi_next(pi);
 1814                 if (pi == NULL)
 1815                         break;
 1816                 pi1 = umtx_pi_next(pi1);
 1817                 if (pi1 == NULL)
 1818                         break;
 1819                 pi1 = umtx_pi_next(pi1);
 1820                 if (pi1 == NULL)
 1821                         break;
 1822                 if (pi == pi1)
 1823                         return (true);
 1824         }
 1825         return (false);
 1826 }
 1827 
 1828 /*
 1829  * Propagate priority when a thread is blocked on POSIX
 1830  * PI mutex.
 1831  */
 1832 static void
 1833 umtx_propagate_priority(struct thread *td)
 1834 {
 1835         struct umtx_q *uq;
 1836         struct umtx_pi *pi;
 1837         int pri;
 1838 
 1839         mtx_assert(&umtx_lock, MA_OWNED);
 1840         pri = UPRI(td);
 1841         uq = td->td_umtxq;
 1842         pi = uq->uq_pi_blocked;
 1843         if (pi == NULL)
 1844                 return;
 1845         if (umtx_pi_check_loop(pi))
 1846                 return;
 1847 
 1848         for (;;) {
 1849                 td = pi->pi_owner;
 1850                 if (td == NULL || td == curthread)
 1851                         return;
 1852 
 1853                 MPASS(td->td_proc != NULL);
 1854                 MPASS(td->td_proc->p_magic == P_MAGIC);
 1855 
 1856                 thread_lock(td);
 1857                 if (td->td_lend_user_pri > pri)
 1858                         sched_lend_user_prio(td, pri);
 1859                 else {
 1860                         thread_unlock(td);
 1861                         break;
 1862                 }
 1863                 thread_unlock(td);
 1864 
 1865                 /*
 1866                  * Pick up the lock that td is blocked on.
 1867                  */
 1868                 uq = td->td_umtxq;
 1869                 pi = uq->uq_pi_blocked;
 1870                 if (pi == NULL)
 1871                         break;
 1872                 /* Resort td on the list if needed. */
 1873                 umtx_pi_adjust_thread(pi, td);
 1874         }
 1875 }
 1876 
 1877 /*
 1878  * Unpropagate priority for a PI mutex when a thread blocked on
 1879  * it is interrupted by signal or resumed by others.
 1880  */
 1881 static void
 1882 umtx_repropagate_priority(struct umtx_pi *pi)
 1883 {
 1884         struct umtx_q *uq, *uq_owner;
 1885         struct umtx_pi *pi2;
 1886         int pri;
 1887 
 1888         mtx_assert(&umtx_lock, MA_OWNED);
 1889 
 1890         if (umtx_pi_check_loop(pi))
 1891                 return;
 1892         while (pi != NULL && pi->pi_owner != NULL) {
 1893                 pri = PRI_MAX;
 1894                 uq_owner = pi->pi_owner->td_umtxq;
 1895 
 1896                 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
 1897                         uq = TAILQ_FIRST(&pi2->pi_blocked);
 1898                         if (uq != NULL) {
 1899                                 if (pri > UPRI(uq->uq_thread))
 1900                                         pri = UPRI(uq->uq_thread);
 1901                         }
 1902                 }
 1903 
 1904                 if (pri > uq_owner->uq_inherited_pri)
 1905                         pri = uq_owner->uq_inherited_pri;
 1906                 thread_lock(pi->pi_owner);
 1907                 sched_lend_user_prio(pi->pi_owner, pri);
 1908                 thread_unlock(pi->pi_owner);
 1909                 if ((pi = uq_owner->uq_pi_blocked) != NULL)
 1910                         umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
 1911         }
 1912 }
 1913 
 1914 /*
 1915  * Insert a PI mutex into owned list.
 1916  */
 1917 static void
 1918 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
 1919 {
 1920         struct umtx_q *uq_owner;
 1921 
 1922         uq_owner = owner->td_umtxq;
 1923         mtx_assert(&umtx_lock, MA_OWNED);
 1924         MPASS(pi->pi_owner == NULL);
 1925         pi->pi_owner = owner;
 1926         TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
 1927 }
 1928 
 1929 /*
 1930  * Disown a PI mutex, and remove it from the owned list.
 1931  */
 1932 static void
 1933 umtx_pi_disown(struct umtx_pi *pi)
 1934 {
 1935 
 1936         mtx_assert(&umtx_lock, MA_OWNED);
 1937         TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link);
 1938         pi->pi_owner = NULL;
 1939 }
 1940 
 1941 /*
 1942  * Claim ownership of a PI mutex.
 1943  */
 1944 int
 1945 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
 1946 {
 1947         struct umtx_q *uq;
 1948         int pri;
 1949 
 1950         mtx_lock(&umtx_lock);
 1951         if (pi->pi_owner == owner) {
 1952                 mtx_unlock(&umtx_lock);
 1953                 return (0);
 1954         }
 1955 
 1956         if (pi->pi_owner != NULL) {
 1957                 /*
 1958                  * userland may have already messed the mutex, sigh.
 1959                  */
 1960                 mtx_unlock(&umtx_lock);
 1961                 return (EPERM);
 1962         }
 1963         umtx_pi_setowner(pi, owner);
 1964         uq = TAILQ_FIRST(&pi->pi_blocked);
 1965         if (uq != NULL) {
 1966                 pri = UPRI(uq->uq_thread);
 1967                 thread_lock(owner);
 1968                 if (pri < UPRI(owner))
 1969                         sched_lend_user_prio(owner, pri);
 1970                 thread_unlock(owner);
 1971         }
 1972         mtx_unlock(&umtx_lock);
 1973         return (0);
 1974 }
 1975 
 1976 /*
 1977  * Adjust a thread's order position in its blocked PI mutex,
 1978  * this may result new priority propagating process.
 1979  */
 1980 void
 1981 umtx_pi_adjust(struct thread *td, u_char oldpri)
 1982 {
 1983         struct umtx_q *uq;
 1984         struct umtx_pi *pi;
 1985 
 1986         uq = td->td_umtxq;
 1987         mtx_lock(&umtx_lock);
 1988         /*
 1989          * Pick up the lock that td is blocked on.
 1990          */
 1991         pi = uq->uq_pi_blocked;
 1992         if (pi != NULL) {
 1993                 umtx_pi_adjust_thread(pi, td);
 1994                 umtx_repropagate_priority(pi);
 1995         }
 1996         mtx_unlock(&umtx_lock);
 1997 }
 1998 
 1999 /*
 2000  * Sleep on a PI mutex.
 2001  */
 2002 int
 2003 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner,
 2004     const char *wmesg, struct umtx_abs_timeout *timo, bool shared)
 2005 {
 2006         struct thread *td, *td1;
 2007         struct umtx_q *uq1;
 2008         int error, pri;
 2009 #ifdef INVARIANTS
 2010         struct umtxq_chain *uc;
 2011 
 2012         uc = umtxq_getchain(&pi->pi_key);
 2013 #endif
 2014         error = 0;
 2015         td = uq->uq_thread;
 2016         KASSERT(td == curthread, ("inconsistent uq_thread"));
 2017         UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key));
 2018         KASSERT(uc->uc_busy != 0, ("umtx chain is not busy"));
 2019         umtxq_insert(uq);
 2020         mtx_lock(&umtx_lock);
 2021         if (pi->pi_owner == NULL) {
 2022                 mtx_unlock(&umtx_lock);
 2023                 td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid);
 2024                 mtx_lock(&umtx_lock);
 2025                 if (td1 != NULL) {
 2026                         if (pi->pi_owner == NULL)
 2027                                 umtx_pi_setowner(pi, td1);
 2028                         PROC_UNLOCK(td1->td_proc);
 2029                 }
 2030         }
 2031 
 2032         TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
 2033                 pri = UPRI(uq1->uq_thread);
 2034                 if (pri > UPRI(td))
 2035                         break;
 2036         }
 2037 
 2038         if (uq1 != NULL)
 2039                 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
 2040         else
 2041                 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
 2042 
 2043         uq->uq_pi_blocked = pi;
 2044         thread_lock(td);
 2045         td->td_flags |= TDF_UPIBLOCKED;
 2046         thread_unlock(td);
 2047         umtx_propagate_priority(td);
 2048         mtx_unlock(&umtx_lock);
 2049         umtxq_unbusy(&uq->uq_key);
 2050 
 2051         error = umtxq_sleep(uq, wmesg, timo);
 2052         umtxq_remove(uq);
 2053 
 2054         mtx_lock(&umtx_lock);
 2055         uq->uq_pi_blocked = NULL;
 2056         thread_lock(td);
 2057         td->td_flags &= ~TDF_UPIBLOCKED;
 2058         thread_unlock(td);
 2059         TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
 2060         umtx_repropagate_priority(pi);
 2061         mtx_unlock(&umtx_lock);
 2062         umtxq_unlock(&uq->uq_key);
 2063 
 2064         return (error);
 2065 }
 2066 
 2067 /*
 2068  * Add reference count for a PI mutex.
 2069  */
 2070 void
 2071 umtx_pi_ref(struct umtx_pi *pi)
 2072 {
 2073 
 2074         UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key));
 2075         pi->pi_refcount++;
 2076 }
 2077 
 2078 /*
 2079  * Decrease reference count for a PI mutex, if the counter
 2080  * is decreased to zero, its memory space is freed.
 2081  */
 2082 void
 2083 umtx_pi_unref(struct umtx_pi *pi)
 2084 {
 2085         struct umtxq_chain *uc;
 2086 
 2087         uc = umtxq_getchain(&pi->pi_key);
 2088         UMTXQ_LOCKED_ASSERT(uc);
 2089         KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
 2090         if (--pi->pi_refcount == 0) {
 2091                 mtx_lock(&umtx_lock);
 2092                 if (pi->pi_owner != NULL)
 2093                         umtx_pi_disown(pi);
 2094                 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
 2095                         ("blocked queue not empty"));
 2096                 mtx_unlock(&umtx_lock);
 2097                 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
 2098                 umtx_pi_free(pi);
 2099         }
 2100 }
 2101 
 2102 /*
 2103  * Find a PI mutex in hash table.
 2104  */
 2105 struct umtx_pi *
 2106 umtx_pi_lookup(struct umtx_key *key)
 2107 {
 2108         struct umtxq_chain *uc;
 2109         struct umtx_pi *pi;
 2110 
 2111         uc = umtxq_getchain(key);
 2112         UMTXQ_LOCKED_ASSERT(uc);
 2113 
 2114         TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
 2115                 if (umtx_key_match(&pi->pi_key, key)) {
 2116                         return (pi);
 2117                 }
 2118         }
 2119         return (NULL);
 2120 }
 2121 
 2122 /*
 2123  * Insert a PI mutex into hash table.
 2124  */
 2125 void
 2126 umtx_pi_insert(struct umtx_pi *pi)
 2127 {
 2128         struct umtxq_chain *uc;
 2129 
 2130         uc = umtxq_getchain(&pi->pi_key);
 2131         UMTXQ_LOCKED_ASSERT(uc);
 2132         TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
 2133 }
 2134 
 2135 /*
 2136  * Drop a PI mutex and wakeup a top waiter.
 2137  */
 2138 int
 2139 umtx_pi_drop(struct thread *td, struct umtx_key *key, bool rb, int *count)
 2140 {
 2141         struct umtx_q *uq_first, *uq_first2, *uq_me;
 2142         struct umtx_pi *pi, *pi2;
 2143         int pri;
 2144 
 2145         UMTXQ_ASSERT_LOCKED_BUSY(key);
 2146         *count = umtxq_count_pi(key, &uq_first);
 2147         if (uq_first != NULL) {
 2148                 mtx_lock(&umtx_lock);
 2149                 pi = uq_first->uq_pi_blocked;
 2150                 KASSERT(pi != NULL, ("pi == NULL?"));
 2151                 if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) {
 2152                         mtx_unlock(&umtx_lock);
 2153                         /* userland messed the mutex */
 2154                         return (EPERM);
 2155                 }
 2156                 uq_me = td->td_umtxq;
 2157                 if (pi->pi_owner == td)
 2158                         umtx_pi_disown(pi);
 2159                 /* get highest priority thread which is still sleeping. */
 2160                 uq_first = TAILQ_FIRST(&pi->pi_blocked);
 2161                 while (uq_first != NULL &&
 2162                     (uq_first->uq_flags & UQF_UMTXQ) == 0) {
 2163                         uq_first = TAILQ_NEXT(uq_first, uq_lockq);
 2164                 }
 2165                 pri = PRI_MAX;
 2166                 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
 2167                         uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
 2168                         if (uq_first2 != NULL) {
 2169                                 if (pri > UPRI(uq_first2->uq_thread))
 2170                                         pri = UPRI(uq_first2->uq_thread);
 2171                         }
 2172                 }
 2173                 thread_lock(td);
 2174                 sched_lend_user_prio(td, pri);
 2175                 thread_unlock(td);
 2176                 mtx_unlock(&umtx_lock);
 2177                 if (uq_first)
 2178                         umtxq_signal_thread(uq_first);
 2179         } else {
 2180                 pi = umtx_pi_lookup(key);
 2181                 /*
 2182                  * A umtx_pi can exist if a signal or timeout removed the
 2183                  * last waiter from the umtxq, but there is still
 2184                  * a thread in do_lock_pi() holding the umtx_pi.
 2185                  */
 2186                 if (pi != NULL) {
 2187                         /*
 2188                          * The umtx_pi can be unowned, such as when a thread
 2189                          * has just entered do_lock_pi(), allocated the
 2190                          * umtx_pi, and unlocked the umtxq.
 2191                          * If the current thread owns it, it must disown it.
 2192                          */
 2193                         mtx_lock(&umtx_lock);
 2194                         if (pi->pi_owner == td)
 2195                                 umtx_pi_disown(pi);
 2196                         mtx_unlock(&umtx_lock);
 2197                 }
 2198         }
 2199         return (0);
 2200 }
 2201 
 2202 /*
 2203  * Lock a PI mutex.
 2204  */
 2205 static int
 2206 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
 2207     struct _umtx_time *timeout, int try)
 2208 {
 2209         struct umtx_abs_timeout timo;
 2210         struct umtx_q *uq;
 2211         struct umtx_pi *pi, *new_pi;
 2212         uint32_t id, old_owner, owner, old;
 2213         int error, rv;
 2214 
 2215         id = td->td_tid;
 2216         uq = td->td_umtxq;
 2217 
 2218         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2219             TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
 2220             &uq->uq_key)) != 0)
 2221                 return (error);
 2222 
 2223         if (timeout != NULL)
 2224                 umtx_abs_timeout_init2(&timo, timeout);
 2225 
 2226         umtxq_lock(&uq->uq_key);
 2227         pi = umtx_pi_lookup(&uq->uq_key);
 2228         if (pi == NULL) {
 2229                 new_pi = umtx_pi_alloc(M_NOWAIT);
 2230                 if (new_pi == NULL) {
 2231                         umtxq_unlock(&uq->uq_key);
 2232                         new_pi = umtx_pi_alloc(M_WAITOK);
 2233                         umtxq_lock(&uq->uq_key);
 2234                         pi = umtx_pi_lookup(&uq->uq_key);
 2235                         if (pi != NULL) {
 2236                                 umtx_pi_free(new_pi);
 2237                                 new_pi = NULL;
 2238                         }
 2239                 }
 2240                 if (new_pi != NULL) {
 2241                         new_pi->pi_key = uq->uq_key;
 2242                         umtx_pi_insert(new_pi);
 2243                         pi = new_pi;
 2244                 }
 2245         }
 2246         umtx_pi_ref(pi);
 2247         umtxq_unlock(&uq->uq_key);
 2248 
 2249         /*
 2250          * Care must be exercised when dealing with umtx structure.  It
 2251          * can fault on any access.
 2252          */
 2253         for (;;) {
 2254                 /*
 2255                  * Try the uncontested case.  This should be done in userland.
 2256                  */
 2257                 rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id);
 2258                 /* The address was invalid. */
 2259                 if (rv == -1) {
 2260                         error = EFAULT;
 2261                         break;
 2262                 }
 2263                 /* The acquire succeeded. */
 2264                 if (rv == 0) {
 2265                         MPASS(owner == UMUTEX_UNOWNED);
 2266                         error = 0;
 2267                         break;
 2268                 }
 2269 
 2270                 if (owner == UMUTEX_RB_NOTRECOV) {
 2271                         error = ENOTRECOVERABLE;
 2272                         break;
 2273                 }
 2274 
 2275                 /*
 2276                  * Nobody owns it, but the acquire failed. This can happen
 2277                  * with ll/sc atomics.
 2278                  */
 2279                 if (owner == UMUTEX_UNOWNED) {
 2280                         error = thread_check_susp(td, true);
 2281                         if (error != 0)
 2282                                 break;
 2283                         continue;
 2284                 }
 2285 
 2286                 /*
 2287                  * Avoid overwriting a possible error from sleep due
 2288                  * to the pending signal with suspension check result.
 2289                  */
 2290                 if (error == 0) {
 2291                         error = thread_check_susp(td, true);
 2292                         if (error != 0)
 2293                                 break;
 2294                 }
 2295 
 2296                 /* If no one owns it but it is contested try to acquire it. */
 2297                 if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) {
 2298                         old_owner = owner;
 2299                         rv = casueword32(&m->m_owner, owner, &owner,
 2300                             id | UMUTEX_CONTESTED);
 2301                         /* The address was invalid. */
 2302                         if (rv == -1) {
 2303                                 error = EFAULT;
 2304                                 break;
 2305                         }
 2306                         if (rv == 1) {
 2307                                 if (error == 0) {
 2308                                         error = thread_check_susp(td, true);
 2309                                         if (error != 0)
 2310                                                 break;
 2311                                 }
 2312 
 2313                                 /*
 2314                                  * If this failed the lock could
 2315                                  * changed, restart.
 2316                                  */
 2317                                 continue;
 2318                         }
 2319 
 2320                         MPASS(rv == 0);
 2321                         MPASS(owner == old_owner);
 2322                         umtxq_lock(&uq->uq_key);
 2323                         umtxq_busy(&uq->uq_key);
 2324                         error = umtx_pi_claim(pi, td);
 2325                         umtxq_unbusy(&uq->uq_key);
 2326                         umtxq_unlock(&uq->uq_key);
 2327                         if (error != 0) {
 2328                                 /*
 2329                                  * Since we're going to return an
 2330                                  * error, restore the m_owner to its
 2331                                  * previous, unowned state to avoid
 2332                                  * compounding the problem.
 2333                                  */
 2334                                 (void)casuword32(&m->m_owner,
 2335                                     id | UMUTEX_CONTESTED, old_owner);
 2336                         }
 2337                         if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD)
 2338                                 error = EOWNERDEAD;
 2339                         break;
 2340                 }
 2341 
 2342                 if ((owner & ~UMUTEX_CONTESTED) == id) {
 2343                         error = EDEADLK;
 2344                         break;
 2345                 }
 2346 
 2347                 if (try != 0) {
 2348                         error = EBUSY;
 2349                         break;
 2350                 }
 2351 
 2352                 /*
 2353                  * If we caught a signal, we have retried and now
 2354                  * exit immediately.
 2355                  */
 2356                 if (error != 0)
 2357                         break;
 2358 
 2359                 umtxq_lock(&uq->uq_key);
 2360                 umtxq_busy(&uq->uq_key);
 2361                 umtxq_unlock(&uq->uq_key);
 2362 
 2363                 /*
 2364                  * Set the contested bit so that a release in user space
 2365                  * knows to use the system call for unlock.  If this fails
 2366                  * either some one else has acquired the lock or it has been
 2367                  * released.
 2368                  */
 2369                 rv = casueword32(&m->m_owner, owner, &old, owner |
 2370                     UMUTEX_CONTESTED);
 2371 
 2372                 /* The address was invalid. */
 2373                 if (rv == -1) {
 2374                         umtxq_unbusy_unlocked(&uq->uq_key);
 2375                         error = EFAULT;
 2376                         break;
 2377                 }
 2378                 if (rv == 1) {
 2379                         umtxq_unbusy_unlocked(&uq->uq_key);
 2380                         error = thread_check_susp(td, true);
 2381                         if (error != 0)
 2382                                 break;
 2383 
 2384                         /*
 2385                          * The lock changed and we need to retry or we
 2386                          * lost a race to the thread unlocking the
 2387                          * umtx.  Note that the UMUTEX_RB_OWNERDEAD
 2388                          * value for owner is impossible there.
 2389                          */
 2390                         continue;
 2391                 }
 2392 
 2393                 umtxq_lock(&uq->uq_key);
 2394 
 2395                 /* We set the contested bit, sleep. */
 2396                 MPASS(old == owner);
 2397                 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
 2398                     "umtxpi", timeout == NULL ? NULL : &timo,
 2399                     (flags & USYNC_PROCESS_SHARED) != 0);
 2400                 if (error != 0)
 2401                         continue;
 2402 
 2403                 error = thread_check_susp(td, false);
 2404                 if (error != 0)
 2405                         break;
 2406         }
 2407 
 2408         umtxq_lock(&uq->uq_key);
 2409         umtx_pi_unref(pi);
 2410         umtxq_unlock(&uq->uq_key);
 2411 
 2412         umtx_key_release(&uq->uq_key);
 2413         return (error);
 2414 }
 2415 
 2416 /*
 2417  * Unlock a PI mutex.
 2418  */
 2419 static int
 2420 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
 2421 {
 2422         struct umtx_key key;
 2423         uint32_t id, new_owner, old, owner;
 2424         int count, error;
 2425 
 2426         id = td->td_tid;
 2427 
 2428 usrloop:
 2429         /*
 2430          * Make sure we own this mtx.
 2431          */
 2432         error = fueword32(&m->m_owner, &owner);
 2433         if (error == -1)
 2434                 return (EFAULT);
 2435 
 2436         if ((owner & ~UMUTEX_CONTESTED) != id)
 2437                 return (EPERM);
 2438 
 2439         new_owner = umtx_unlock_val(flags, rb);
 2440 
 2441         /* This should be done in userland */
 2442         if ((owner & UMUTEX_CONTESTED) == 0) {
 2443                 error = casueword32(&m->m_owner, owner, &old, new_owner);
 2444                 if (error == -1)
 2445                         return (EFAULT);
 2446                 if (error == 1) {
 2447                         error = thread_check_susp(td, true);
 2448                         if (error != 0)
 2449                                 return (error);
 2450                         goto usrloop;
 2451                 }
 2452                 if (old == owner)
 2453                         return (0);
 2454                 owner = old;
 2455         }
 2456 
 2457         /* We should only ever be in here for contested locks */
 2458         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2459             TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
 2460             &key)) != 0)
 2461                 return (error);
 2462 
 2463         umtxq_lock(&key);
 2464         umtxq_busy(&key);
 2465         error = umtx_pi_drop(td, &key, rb, &count);
 2466         if (error != 0) {
 2467                 umtxq_unbusy(&key);
 2468                 umtxq_unlock(&key);
 2469                 umtx_key_release(&key);
 2470                 /* userland messed the mutex */
 2471                 return (error);
 2472         }
 2473         umtxq_unlock(&key);
 2474 
 2475         /*
 2476          * When unlocking the umtx, it must be marked as unowned if
 2477          * there is zero or one thread only waiting for it.
 2478          * Otherwise, it must be marked as contested.
 2479          */
 2480 
 2481         if (count > 1)
 2482                 new_owner |= UMUTEX_CONTESTED;
 2483 again:
 2484         error = casueword32(&m->m_owner, owner, &old, new_owner);
 2485         if (error == 1) {
 2486                 error = thread_check_susp(td, false);
 2487                 if (error == 0)
 2488                         goto again;
 2489         }
 2490         umtxq_unbusy_unlocked(&key);
 2491         umtx_key_release(&key);
 2492         if (error == -1)
 2493                 return (EFAULT);
 2494         if (error == 0 && old != owner)
 2495                 return (EINVAL);
 2496         return (error);
 2497 }
 2498 
 2499 /*
 2500  * Lock a PP mutex.
 2501  */
 2502 static int
 2503 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
 2504     struct _umtx_time *timeout, int try)
 2505 {
 2506         struct umtx_abs_timeout timo;
 2507         struct umtx_q *uq, *uq2;
 2508         struct umtx_pi *pi;
 2509         uint32_t ceiling;
 2510         uint32_t owner, id;
 2511         int error, pri, old_inherited_pri, su, rv;
 2512 
 2513         id = td->td_tid;
 2514         uq = td->td_umtxq;
 2515         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2516             TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
 2517             &uq->uq_key)) != 0)
 2518                 return (error);
 2519 
 2520         if (timeout != NULL)
 2521                 umtx_abs_timeout_init2(&timo, timeout);
 2522 
 2523         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2524         for (;;) {
 2525                 old_inherited_pri = uq->uq_inherited_pri;
 2526                 umtxq_lock(&uq->uq_key);
 2527                 umtxq_busy(&uq->uq_key);
 2528                 umtxq_unlock(&uq->uq_key);
 2529 
 2530                 rv = fueword32(&m->m_ceilings[0], &ceiling);
 2531                 if (rv == -1) {
 2532                         error = EFAULT;
 2533                         goto out;
 2534                 }
 2535                 ceiling = RTP_PRIO_MAX - ceiling;
 2536                 if (ceiling > RTP_PRIO_MAX) {
 2537                         error = EINVAL;
 2538                         goto out;
 2539                 }
 2540 
 2541                 mtx_lock(&umtx_lock);
 2542                 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
 2543                         mtx_unlock(&umtx_lock);
 2544                         error = EINVAL;
 2545                         goto out;
 2546                 }
 2547                 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
 2548                         uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
 2549                         thread_lock(td);
 2550                         if (uq->uq_inherited_pri < UPRI(td))
 2551                                 sched_lend_user_prio(td, uq->uq_inherited_pri);
 2552                         thread_unlock(td);
 2553                 }
 2554                 mtx_unlock(&umtx_lock);
 2555 
 2556                 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
 2557                     id | UMUTEX_CONTESTED);
 2558                 /* The address was invalid. */
 2559                 if (rv == -1) {
 2560                         error = EFAULT;
 2561                         break;
 2562                 }
 2563                 if (rv == 0) {
 2564                         MPASS(owner == UMUTEX_CONTESTED);
 2565                         error = 0;
 2566                         break;
 2567                 }
 2568                 /* rv == 1 */
 2569                 if (owner == UMUTEX_RB_OWNERDEAD) {
 2570                         rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD,
 2571                             &owner, id | UMUTEX_CONTESTED);
 2572                         if (rv == -1) {
 2573                                 error = EFAULT;
 2574                                 break;
 2575                         }
 2576                         if (rv == 0) {
 2577                                 MPASS(owner == UMUTEX_RB_OWNERDEAD);
 2578                                 error = EOWNERDEAD; /* success */
 2579                                 break;
 2580                         }
 2581 
 2582                         /*
 2583                          *  rv == 1, only check for suspension if we
 2584                          *  did not already catched a signal.  If we
 2585                          *  get an error from the check, the same
 2586                          *  condition is checked by the umtxq_sleep()
 2587                          *  call below, so we should obliterate the
 2588                          *  error to not skip the last loop iteration.
 2589                          */
 2590                         if (error == 0) {
 2591                                 error = thread_check_susp(td, false);
 2592                                 if (error == 0) {
 2593                                         if (try != 0)
 2594                                                 error = EBUSY;
 2595                                         else
 2596                                                 continue;
 2597                                 }
 2598                                 error = 0;
 2599                         }
 2600                 } else if (owner == UMUTEX_RB_NOTRECOV) {
 2601                         error = ENOTRECOVERABLE;
 2602                 }
 2603 
 2604                 if (try != 0)
 2605                         error = EBUSY;
 2606 
 2607                 /*
 2608                  * If we caught a signal, we have retried and now
 2609                  * exit immediately.
 2610                  */
 2611                 if (error != 0)
 2612                         break;
 2613 
 2614                 umtxq_lock(&uq->uq_key);
 2615                 umtxq_insert(uq);
 2616                 umtxq_unbusy(&uq->uq_key);
 2617                 error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
 2618                     NULL : &timo);
 2619                 umtxq_remove(uq);
 2620                 umtxq_unlock(&uq->uq_key);
 2621 
 2622                 mtx_lock(&umtx_lock);
 2623                 uq->uq_inherited_pri = old_inherited_pri;
 2624                 pri = PRI_MAX;
 2625                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2626                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2627                         if (uq2 != NULL) {
 2628                                 if (pri > UPRI(uq2->uq_thread))
 2629                                         pri = UPRI(uq2->uq_thread);
 2630                         }
 2631                 }
 2632                 if (pri > uq->uq_inherited_pri)
 2633                         pri = uq->uq_inherited_pri;
 2634                 thread_lock(td);
 2635                 sched_lend_user_prio(td, pri);
 2636                 thread_unlock(td);
 2637                 mtx_unlock(&umtx_lock);
 2638         }
 2639 
 2640         if (error != 0 && error != EOWNERDEAD) {
 2641                 mtx_lock(&umtx_lock);
 2642                 uq->uq_inherited_pri = old_inherited_pri;
 2643                 pri = PRI_MAX;
 2644                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2645                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2646                         if (uq2 != NULL) {
 2647                                 if (pri > UPRI(uq2->uq_thread))
 2648                                         pri = UPRI(uq2->uq_thread);
 2649                         }
 2650                 }
 2651                 if (pri > uq->uq_inherited_pri)
 2652                         pri = uq->uq_inherited_pri;
 2653                 thread_lock(td);
 2654                 sched_lend_user_prio(td, pri);
 2655                 thread_unlock(td);
 2656                 mtx_unlock(&umtx_lock);
 2657         }
 2658 
 2659 out:
 2660         umtxq_unbusy_unlocked(&uq->uq_key);
 2661         umtx_key_release(&uq->uq_key);
 2662         return (error);
 2663 }
 2664 
 2665 /*
 2666  * Unlock a PP mutex.
 2667  */
 2668 static int
 2669 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
 2670 {
 2671         struct umtx_key key;
 2672         struct umtx_q *uq, *uq2;
 2673         struct umtx_pi *pi;
 2674         uint32_t id, owner, rceiling;
 2675         int error, pri, new_inherited_pri, su;
 2676 
 2677         id = td->td_tid;
 2678         uq = td->td_umtxq;
 2679         su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
 2680 
 2681         /*
 2682          * Make sure we own this mtx.
 2683          */
 2684         error = fueword32(&m->m_owner, &owner);
 2685         if (error == -1)
 2686                 return (EFAULT);
 2687 
 2688         if ((owner & ~UMUTEX_CONTESTED) != id)
 2689                 return (EPERM);
 2690 
 2691         error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
 2692         if (error != 0)
 2693                 return (error);
 2694 
 2695         if (rceiling == -1)
 2696                 new_inherited_pri = PRI_MAX;
 2697         else {
 2698                 rceiling = RTP_PRIO_MAX - rceiling;
 2699                 if (rceiling > RTP_PRIO_MAX)
 2700                         return (EINVAL);
 2701                 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
 2702         }
 2703 
 2704         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2705             TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
 2706             &key)) != 0)
 2707                 return (error);
 2708         umtxq_lock(&key);
 2709         umtxq_busy(&key);
 2710         umtxq_unlock(&key);
 2711         /*
 2712          * For priority protected mutex, always set unlocked state
 2713          * to UMUTEX_CONTESTED, so that userland always enters kernel
 2714          * to lock the mutex, it is necessary because thread priority
 2715          * has to be adjusted for such mutex.
 2716          */
 2717         error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) |
 2718             UMUTEX_CONTESTED);
 2719 
 2720         umtxq_lock(&key);
 2721         if (error == 0)
 2722                 umtxq_signal(&key, 1);
 2723         umtxq_unbusy(&key);
 2724         umtxq_unlock(&key);
 2725 
 2726         if (error == -1)
 2727                 error = EFAULT;
 2728         else {
 2729                 mtx_lock(&umtx_lock);
 2730                 if (su != 0)
 2731                         uq->uq_inherited_pri = new_inherited_pri;
 2732                 pri = PRI_MAX;
 2733                 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
 2734                         uq2 = TAILQ_FIRST(&pi->pi_blocked);
 2735                         if (uq2 != NULL) {
 2736                                 if (pri > UPRI(uq2->uq_thread))
 2737                                         pri = UPRI(uq2->uq_thread);
 2738                         }
 2739                 }
 2740                 if (pri > uq->uq_inherited_pri)
 2741                         pri = uq->uq_inherited_pri;
 2742                 thread_lock(td);
 2743                 sched_lend_user_prio(td, pri);
 2744                 thread_unlock(td);
 2745                 mtx_unlock(&umtx_lock);
 2746         }
 2747         umtx_key_release(&key);
 2748         return (error);
 2749 }
 2750 
 2751 static int
 2752 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
 2753     uint32_t *old_ceiling)
 2754 {
 2755         struct umtx_q *uq;
 2756         uint32_t flags, id, owner, save_ceiling;
 2757         int error, rv, rv1;
 2758 
 2759         error = fueword32(&m->m_flags, &flags);
 2760         if (error == -1)
 2761                 return (EFAULT);
 2762         if ((flags & UMUTEX_PRIO_PROTECT) == 0)
 2763                 return (EINVAL);
 2764         if (ceiling > RTP_PRIO_MAX)
 2765                 return (EINVAL);
 2766         id = td->td_tid;
 2767         uq = td->td_umtxq;
 2768         if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
 2769             TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
 2770             &uq->uq_key)) != 0)
 2771                 return (error);
 2772         for (;;) {
 2773                 umtxq_lock(&uq->uq_key);
 2774                 umtxq_busy(&uq->uq_key);
 2775                 umtxq_unlock(&uq->uq_key);
 2776 
 2777                 rv = fueword32(&m->m_ceilings[0], &save_ceiling);
 2778                 if (rv == -1) {
 2779                         error = EFAULT;
 2780                         break;
 2781                 }
 2782 
 2783                 rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
 2784                     id | UMUTEX_CONTESTED);
 2785                 if (rv == -1) {
 2786                         error = EFAULT;
 2787                         break;
 2788                 }
 2789 
 2790                 if (rv == 0) {
 2791                         MPASS(owner == UMUTEX_CONTESTED);
 2792                         rv = suword32(&m->m_ceilings[0], ceiling);
 2793                         rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED);
 2794                         error = (rv == 0 && rv1 == 0) ? 0: EFAULT;
 2795                         break;
 2796                 }
 2797 
 2798                 if ((owner & ~UMUTEX_CONTESTED) == id) {
 2799                         rv = suword32(&m->m_ceilings[0], ceiling);
 2800                         error = rv == 0 ? 0 : EFAULT;
 2801                         break;
 2802                 }
 2803 
 2804                 if (owner == UMUTEX_RB_OWNERDEAD) {
 2805                         error = EOWNERDEAD;
 2806                         break;
 2807                 } else if (owner == UMUTEX_RB_NOTRECOV) {
 2808                         error = ENOTRECOVERABLE;
 2809                         break;
 2810                 }
 2811 
 2812                 /*
 2813                  * If we caught a signal, we have retried and now
 2814                  * exit immediately.
 2815                  */
 2816                 if (error != 0)
 2817                         break;
 2818 
 2819                 /*
 2820                  * We set the contested bit, sleep. Otherwise the lock changed
 2821                  * and we need to retry or we lost a race to the thread
 2822                  * unlocking the umtx.
 2823                  */
 2824                 umtxq_lock(&uq->uq_key);
 2825                 umtxq_insert(uq);
 2826                 umtxq_unbusy(&uq->uq_key);
 2827                 error = umtxq_sleep(uq, "umtxpp", NULL);
 2828                 umtxq_remove(uq);
 2829                 umtxq_unlock(&uq->uq_key);
 2830         }
 2831         umtxq_lock(&uq->uq_key);
 2832         if (error == 0)
 2833                 umtxq_signal(&uq->uq_key, INT_MAX);
 2834         umtxq_unbusy(&uq->uq_key);
 2835         umtxq_unlock(&uq->uq_key);
 2836         umtx_key_release(&uq->uq_key);
 2837         if (error == 0 && old_ceiling != NULL) {
 2838                 rv = suword32(old_ceiling, save_ceiling);
 2839                 error = rv == 0 ? 0 : EFAULT;
 2840         }
 2841         return (error);
 2842 }
 2843 
 2844 /*
 2845  * Lock a userland POSIX mutex.
 2846  */
 2847 static int
 2848 do_lock_umutex(struct thread *td, struct umutex *m,
 2849     struct _umtx_time *timeout, int mode)
 2850 {
 2851         uint32_t flags;
 2852         int error;
 2853 
 2854         error = fueword32(&m->m_flags, &flags);
 2855         if (error == -1)
 2856                 return (EFAULT);
 2857 
 2858         switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2859         case 0:
 2860                 error = do_lock_normal(td, m, flags, timeout, mode);
 2861                 break;
 2862         case UMUTEX_PRIO_INHERIT:
 2863                 error = do_lock_pi(td, m, flags, timeout, mode);
 2864                 break;
 2865         case UMUTEX_PRIO_PROTECT:
 2866                 error = do_lock_pp(td, m, flags, timeout, mode);
 2867                 break;
 2868         default:
 2869                 return (EINVAL);
 2870         }
 2871         if (timeout == NULL) {
 2872                 if (error == EINTR && mode != _UMUTEX_WAIT)
 2873                         error = ERESTART;
 2874         } else {
 2875                 /* Timed-locking is not restarted. */
 2876                 if (error == ERESTART)
 2877                         error = EINTR;
 2878         }
 2879         return (error);
 2880 }
 2881 
 2882 /*
 2883  * Unlock a userland POSIX mutex.
 2884  */
 2885 static int
 2886 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb)
 2887 {
 2888         uint32_t flags;
 2889         int error;
 2890 
 2891         error = fueword32(&m->m_flags, &flags);
 2892         if (error == -1)
 2893                 return (EFAULT);
 2894 
 2895         switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
 2896         case 0:
 2897                 return (do_unlock_normal(td, m, flags, rb));
 2898         case UMUTEX_PRIO_INHERIT:
 2899                 return (do_unlock_pi(td, m, flags, rb));
 2900         case UMUTEX_PRIO_PROTECT:
 2901                 return (do_unlock_pp(td, m, flags, rb));
 2902         }
 2903 
 2904         return (EINVAL);
 2905 }
 2906 
 2907 static int
 2908 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
 2909     struct timespec *timeout, u_long wflags)
 2910 {
 2911         struct umtx_abs_timeout timo;
 2912         struct umtx_q *uq;
 2913         uint32_t flags, clockid, hasw;
 2914         int error;
 2915 
 2916         uq = td->td_umtxq;
 2917         error = fueword32(&cv->c_flags, &flags);
 2918         if (error == -1)
 2919                 return (EFAULT);
 2920         error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
 2921         if (error != 0)
 2922                 return (error);
 2923 
 2924         if ((wflags & CVWAIT_CLOCKID) != 0) {
 2925                 error = fueword32(&cv->c_clockid, &clockid);
 2926                 if (error == -1) {
 2927                         umtx_key_release(&uq->uq_key);
 2928                         return (EFAULT);
 2929                 }
 2930                 if (clockid < CLOCK_REALTIME ||
 2931                     clockid >= CLOCK_THREAD_CPUTIME_ID) {
 2932                         /* hmm, only HW clock id will work. */
 2933                         umtx_key_release(&uq->uq_key);
 2934                         return (EINVAL);
 2935                 }
 2936         } else {
 2937                 clockid = CLOCK_REALTIME;
 2938         }
 2939 
 2940         umtxq_lock(&uq->uq_key);
 2941         umtxq_busy(&uq->uq_key);
 2942         umtxq_insert(uq);
 2943         umtxq_unlock(&uq->uq_key);
 2944 
 2945         /*
 2946          * Set c_has_waiters to 1 before releasing user mutex, also
 2947          * don't modify cache line when unnecessary.
 2948          */
 2949         error = fueword32(&cv->c_has_waiters, &hasw);
 2950         if (error == 0 && hasw == 0)
 2951                 suword32(&cv->c_has_waiters, 1);
 2952 
 2953         umtxq_unbusy_unlocked(&uq->uq_key);
 2954 
 2955         error = do_unlock_umutex(td, m, false);
 2956 
 2957         if (timeout != NULL)
 2958                 umtx_abs_timeout_init(&timo, clockid,
 2959                     (wflags & CVWAIT_ABSTIME) != 0, timeout);
 2960 
 2961         umtxq_lock(&uq->uq_key);
 2962         if (error == 0) {
 2963                 error = umtxq_sleep(uq, "ucond", timeout == NULL ?
 2964                     NULL : &timo);
 2965         }
 2966 
 2967         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 2968                 error = 0;
 2969         else {
 2970                 /*
 2971                  * This must be timeout,interrupted by signal or
 2972                  * surprious wakeup, clear c_has_waiter flag when
 2973                  * necessary.
 2974                  */
 2975                 umtxq_busy(&uq->uq_key);
 2976                 if ((uq->uq_flags & UQF_UMTXQ) != 0) {
 2977                         int oldlen = uq->uq_cur_queue->length;
 2978                         umtxq_remove(uq);
 2979                         if (oldlen == 1) {
 2980                                 umtxq_unlock(&uq->uq_key);
 2981                                 suword32(&cv->c_has_waiters, 0);
 2982                                 umtxq_lock(&uq->uq_key);
 2983                         }
 2984                 }
 2985                 umtxq_unbusy(&uq->uq_key);
 2986                 if (error == ERESTART)
 2987                         error = EINTR;
 2988         }
 2989 
 2990         umtxq_unlock(&uq->uq_key);
 2991         umtx_key_release(&uq->uq_key);
 2992         return (error);
 2993 }
 2994 
 2995 /*
 2996  * Signal a userland condition variable.
 2997  */
 2998 static int
 2999 do_cv_signal(struct thread *td, struct ucond *cv)
 3000 {
 3001         struct umtx_key key;
 3002         int error, cnt, nwake;
 3003         uint32_t flags;
 3004 
 3005         error = fueword32(&cv->c_flags, &flags);
 3006         if (error == -1)
 3007                 return (EFAULT);
 3008         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 3009                 return (error);
 3010         umtxq_lock(&key);
 3011         umtxq_busy(&key);
 3012         cnt = umtxq_count(&key);
 3013         nwake = umtxq_signal(&key, 1);
 3014         if (cnt <= nwake) {
 3015                 umtxq_unlock(&key);
 3016                 error = suword32(&cv->c_has_waiters, 0);
 3017                 if (error == -1)
 3018                         error = EFAULT;
 3019                 umtxq_lock(&key);
 3020         }
 3021         umtxq_unbusy(&key);
 3022         umtxq_unlock(&key);
 3023         umtx_key_release(&key);
 3024         return (error);
 3025 }
 3026 
 3027 static int
 3028 do_cv_broadcast(struct thread *td, struct ucond *cv)
 3029 {
 3030         struct umtx_key key;
 3031         int error;
 3032         uint32_t flags;
 3033 
 3034         error = fueword32(&cv->c_flags, &flags);
 3035         if (error == -1)
 3036                 return (EFAULT);
 3037         if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
 3038                 return (error);
 3039 
 3040         umtxq_lock(&key);
 3041         umtxq_busy(&key);
 3042         umtxq_signal(&key, INT_MAX);
 3043         umtxq_unlock(&key);
 3044 
 3045         error = suword32(&cv->c_has_waiters, 0);
 3046         if (error == -1)
 3047                 error = EFAULT;
 3048 
 3049         umtxq_unbusy_unlocked(&key);
 3050 
 3051         umtx_key_release(&key);
 3052         return (error);
 3053 }
 3054 
 3055 static int
 3056 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag,
 3057     struct _umtx_time *timeout)
 3058 {
 3059         struct umtx_abs_timeout timo;
 3060         struct umtx_q *uq;
 3061         uint32_t flags, wrflags;
 3062         int32_t state, oldstate;
 3063         int32_t blocked_readers;
 3064         int error, error1, rv;
 3065 
 3066         uq = td->td_umtxq;
 3067         error = fueword32(&rwlock->rw_flags, &flags);
 3068         if (error == -1)
 3069                 return (EFAULT);
 3070         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 3071         if (error != 0)
 3072                 return (error);
 3073 
 3074         if (timeout != NULL)
 3075                 umtx_abs_timeout_init2(&timo, timeout);
 3076 
 3077         wrflags = URWLOCK_WRITE_OWNER;
 3078         if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
 3079                 wrflags |= URWLOCK_WRITE_WAITERS;
 3080 
 3081         for (;;) {
 3082                 rv = fueword32(&rwlock->rw_state, &state);
 3083                 if (rv == -1) {
 3084                         umtx_key_release(&uq->uq_key);
 3085                         return (EFAULT);
 3086                 }
 3087 
 3088                 /* try to lock it */
 3089                 while (!(state & wrflags)) {
 3090                         if (__predict_false(URWLOCK_READER_COUNT(state) ==
 3091                             URWLOCK_MAX_READERS)) {
 3092                                 umtx_key_release(&uq->uq_key);
 3093                                 return (EAGAIN);
 3094                         }
 3095                         rv = casueword32(&rwlock->rw_state, state,
 3096                             &oldstate, state + 1);
 3097                         if (rv == -1) {
 3098                                 umtx_key_release(&uq->uq_key);
 3099                                 return (EFAULT);
 3100                         }
 3101                         if (rv == 0) {
 3102                                 MPASS(oldstate == state);
 3103                                 umtx_key_release(&uq->uq_key);
 3104                                 return (0);
 3105                         }
 3106                         error = thread_check_susp(td, true);
 3107                         if (error != 0)
 3108                                 break;
 3109                         state = oldstate;
 3110                 }
 3111 
 3112                 if (error)
 3113                         break;
 3114 
 3115                 /* grab monitor lock */
 3116                 umtxq_lock(&uq->uq_key);
 3117                 umtxq_busy(&uq->uq_key);
 3118                 umtxq_unlock(&uq->uq_key);
 3119 
 3120                 /*
 3121                  * re-read the state, in case it changed between the try-lock above
 3122                  * and the check below
 3123                  */
 3124                 rv = fueword32(&rwlock->rw_state, &state);
 3125                 if (rv == -1)
 3126                         error = EFAULT;
 3127 
 3128                 /* set read contention bit */
 3129                 while (error == 0 && (state & wrflags) &&
 3130                     !(state & URWLOCK_READ_WAITERS)) {
 3131                         rv = casueword32(&rwlock->rw_state, state,
 3132                             &oldstate, state | URWLOCK_READ_WAITERS);
 3133                         if (rv == -1) {
 3134                                 error = EFAULT;
 3135                                 break;
 3136                         }
 3137                         if (rv == 0) {
 3138                                 MPASS(oldstate == state);
 3139                                 goto sleep;
 3140                         }
 3141                         state = oldstate;
 3142                         error = thread_check_susp(td, false);
 3143                         if (error != 0)
 3144                                 break;
 3145                 }
 3146                 if (error != 0) {
 3147                         umtxq_unbusy_unlocked(&uq->uq_key);
 3148                         break;
 3149                 }
 3150 
 3151                 /* state is changed while setting flags, restart */
 3152                 if (!(state & wrflags)) {
 3153                         umtxq_unbusy_unlocked(&uq->uq_key);
 3154                         error = thread_check_susp(td, true);
 3155                         if (error != 0)
 3156                                 break;
 3157                         continue;
 3158                 }
 3159 
 3160 sleep:
 3161                 /*
 3162                  * Contention bit is set, before sleeping, increase
 3163                  * read waiter count.
 3164                  */
 3165                 rv = fueword32(&rwlock->rw_blocked_readers,
 3166                     &blocked_readers);
 3167                 if (rv == -1) {
 3168                         umtxq_unbusy_unlocked(&uq->uq_key);
 3169                         error = EFAULT;
 3170                         break;
 3171                 }
 3172                 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
 3173 
 3174                 while (state & wrflags) {
 3175                         umtxq_lock(&uq->uq_key);
 3176                         umtxq_insert(uq);
 3177                         umtxq_unbusy(&uq->uq_key);
 3178 
 3179                         error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
 3180                             NULL : &timo);
 3181 
 3182                         umtxq_busy(&uq->uq_key);
 3183                         umtxq_remove(uq);
 3184                         umtxq_unlock(&uq->uq_key);
 3185                         if (error)
 3186                                 break;
 3187                         rv = fueword32(&rwlock->rw_state, &state);
 3188                         if (rv == -1) {
 3189                                 error = EFAULT;
 3190                                 break;
 3191                         }
 3192                 }
 3193 
 3194                 /* decrease read waiter count, and may clear read contention bit */
 3195                 rv = fueword32(&rwlock->rw_blocked_readers,
 3196                     &blocked_readers);
 3197                 if (rv == -1) {
 3198                         umtxq_unbusy_unlocked(&uq->uq_key);
 3199                         error = EFAULT;
 3200                         break;
 3201                 }
 3202                 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
 3203                 if (blocked_readers == 1) {
 3204                         rv = fueword32(&rwlock->rw_state, &state);
 3205                         if (rv == -1) {
 3206                                 umtxq_unbusy_unlocked(&uq->uq_key);
 3207                                 error = EFAULT;
 3208                                 break;
 3209                         }
 3210                         for (;;) {
 3211                                 rv = casueword32(&rwlock->rw_state, state,
 3212                                     &oldstate, state & ~URWLOCK_READ_WAITERS);
 3213                                 if (rv == -1) {
 3214                                         error = EFAULT;
 3215                                         break;
 3216                                 }
 3217                                 if (rv == 0) {
 3218                                         MPASS(oldstate == state);
 3219                                         break;
 3220                                 }
 3221                                 state = oldstate;
 3222                                 error1 = thread_check_susp(td, false);
 3223                                 if (error1 != 0) {
 3224                                         if (error == 0)
 3225                                                 error = error1;
 3226                                         break;
 3227                                 }
 3228                         }
 3229                 }
 3230 
 3231                 umtxq_unbusy_unlocked(&uq->uq_key);
 3232                 if (error != 0)
 3233                         break;
 3234         }
 3235         umtx_key_release(&uq->uq_key);
 3236         if (error == ERESTART)
 3237                 error = EINTR;
 3238         return (error);
 3239 }
 3240 
 3241 static int
 3242 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
 3243 {
 3244         struct umtx_abs_timeout timo;
 3245         struct umtx_q *uq;
 3246         uint32_t flags;
 3247         int32_t state, oldstate;
 3248         int32_t blocked_writers;
 3249         int32_t blocked_readers;
 3250         int error, error1, rv;
 3251 
 3252         uq = td->td_umtxq;
 3253         error = fueword32(&rwlock->rw_flags, &flags);
 3254         if (error == -1)
 3255                 return (EFAULT);
 3256         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 3257         if (error != 0)
 3258                 return (error);
 3259 
 3260         if (timeout != NULL)
 3261                 umtx_abs_timeout_init2(&timo, timeout);
 3262 
 3263         blocked_readers = 0;
 3264         for (;;) {
 3265                 rv = fueword32(&rwlock->rw_state, &state);
 3266                 if (rv == -1) {
 3267                         umtx_key_release(&uq->uq_key);
 3268                         return (EFAULT);
 3269                 }
 3270                 while ((state & URWLOCK_WRITE_OWNER) == 0 &&
 3271                     URWLOCK_READER_COUNT(state) == 0) {
 3272                         rv = casueword32(&rwlock->rw_state, state,
 3273                             &oldstate, state | URWLOCK_WRITE_OWNER);
 3274                         if (rv == -1) {
 3275                                 umtx_key_release(&uq->uq_key);
 3276                                 return (EFAULT);
 3277                         }
 3278                         if (rv == 0) {
 3279                                 MPASS(oldstate == state);
 3280                                 umtx_key_release(&uq->uq_key);
 3281                                 return (0);
 3282                         }
 3283                         state = oldstate;
 3284                         error = thread_check_susp(td, true);
 3285                         if (error != 0)
 3286                                 break;
 3287                 }
 3288 
 3289                 if (error) {
 3290                         if ((state & (URWLOCK_WRITE_OWNER |
 3291                             URWLOCK_WRITE_WAITERS)) == 0 &&
 3292                             blocked_readers != 0) {
 3293                                 umtxq_lock(&uq->uq_key);
 3294                                 umtxq_busy(&uq->uq_key);
 3295                                 umtxq_signal_queue(&uq->uq_key, INT_MAX,
 3296                                     UMTX_SHARED_QUEUE);
 3297                                 umtxq_unbusy(&uq->uq_key);
 3298                                 umtxq_unlock(&uq->uq_key);
 3299                         }
 3300 
 3301                         break;
 3302                 }
 3303 
 3304                 /* grab monitor lock */
 3305                 umtxq_lock(&uq->uq_key);
 3306                 umtxq_busy(&uq->uq_key);
 3307                 umtxq_unlock(&uq->uq_key);
 3308 
 3309                 /*
 3310                  * Re-read the state, in case it changed between the
 3311                  * try-lock above and the check below.
 3312                  */
 3313                 rv = fueword32(&rwlock->rw_state, &state);
 3314                 if (rv == -1)
 3315                         error = EFAULT;
 3316 
 3317                 while (error == 0 && ((state & URWLOCK_WRITE_OWNER) ||
 3318                     URWLOCK_READER_COUNT(state) != 0) &&
 3319                     (state & URWLOCK_WRITE_WAITERS) == 0) {
 3320                         rv = casueword32(&rwlock->rw_state, state,
 3321                             &oldstate, state | URWLOCK_WRITE_WAITERS);
 3322                         if (rv == -1) {
 3323                                 error = EFAULT;
 3324                                 break;
 3325                         }
 3326                         if (rv == 0) {
 3327                                 MPASS(oldstate == state);
 3328                                 goto sleep;
 3329                         }
 3330                         state = oldstate;
 3331                         error = thread_check_susp(td, false);
 3332                         if (error != 0)
 3333                                 break;
 3334                 }
 3335                 if (error != 0) {
 3336                         umtxq_unbusy_unlocked(&uq->uq_key);
 3337                         break;
 3338                 }
 3339 
 3340                 if ((state & URWLOCK_WRITE_OWNER) == 0 &&
 3341                     URWLOCK_READER_COUNT(state) == 0) {
 3342                         umtxq_unbusy_unlocked(&uq->uq_key);
 3343                         error = thread_check_susp(td, false);
 3344                         if (error != 0)
 3345                                 break;
 3346                         continue;
 3347                 }
 3348 sleep:
 3349                 rv = fueword32(&rwlock->rw_blocked_writers,
 3350                     &blocked_writers);
 3351                 if (rv == -1) {
 3352                         umtxq_unbusy_unlocked(&uq->uq_key);
 3353                         error = EFAULT;
 3354                         break;
 3355                 }
 3356                 suword32(&rwlock->rw_blocked_writers, blocked_writers + 1);
 3357 
 3358                 while ((state & URWLOCK_WRITE_OWNER) ||
 3359                     URWLOCK_READER_COUNT(state) != 0) {
 3360                         umtxq_lock(&uq->uq_key);
 3361                         umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 3362                         umtxq_unbusy(&uq->uq_key);
 3363 
 3364                         error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
 3365                             NULL : &timo);
 3366 
 3367                         umtxq_busy(&uq->uq_key);
 3368                         umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
 3369                         umtxq_unlock(&uq->uq_key);
 3370                         if (error)
 3371                                 break;
 3372                         rv = fueword32(&rwlock->rw_state, &state);
 3373                         if (rv == -1) {
 3374                                 error = EFAULT;
 3375                                 break;
 3376                         }
 3377                 }
 3378 
 3379                 rv = fueword32(&rwlock->rw_blocked_writers,
 3380                     &blocked_writers);
 3381                 if (rv == -1) {
 3382                         umtxq_unbusy_unlocked(&uq->uq_key);
 3383                         error = EFAULT;
 3384                         break;
 3385                 }
 3386                 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
 3387                 if (blocked_writers == 1) {
 3388                         rv = fueword32(&rwlock->rw_state, &state);
 3389                         if (rv == -1) {
 3390                                 umtxq_unbusy_unlocked(&uq->uq_key);
 3391                                 error = EFAULT;
 3392                                 break;
 3393                         }
 3394                         for (;;) {
 3395                                 rv = casueword32(&rwlock->rw_state, state,
 3396                                     &oldstate, state & ~URWLOCK_WRITE_WAITERS);
 3397                                 if (rv == -1) {
 3398                                         error = EFAULT;
 3399                                         break;
 3400                                 }
 3401                                 if (rv == 0) {
 3402                                         MPASS(oldstate == state);
 3403                                         break;
 3404                                 }
 3405                                 state = oldstate;
 3406                                 error1 = thread_check_susp(td, false);
 3407                                 /*
 3408                                  * We are leaving the URWLOCK_WRITE_WAITERS
 3409                                  * behind, but this should not harm the
 3410                                  * correctness.
 3411                                  */
 3412                                 if (error1 != 0) {
 3413                                         if (error == 0)
 3414                                                 error = error1;
 3415                                         break;
 3416                                 }
 3417                         }
 3418                         rv = fueword32(&rwlock->rw_blocked_readers,
 3419                             &blocked_readers);
 3420                         if (rv == -1) {
 3421                                 umtxq_unbusy_unlocked(&uq->uq_key);
 3422                                 error = EFAULT;
 3423                                 break;
 3424                         }
 3425                 } else
 3426                         blocked_readers = 0;
 3427 
 3428                 umtxq_unbusy_unlocked(&uq->uq_key);
 3429         }
 3430 
 3431         umtx_key_release(&uq->uq_key);
 3432         if (error == ERESTART)
 3433                 error = EINTR;
 3434         return (error);
 3435 }
 3436 
 3437 static int
 3438 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
 3439 {
 3440         struct umtx_q *uq;
 3441         uint32_t flags;
 3442         int32_t state, oldstate;
 3443         int error, rv, q, count;
 3444 
 3445         uq = td->td_umtxq;
 3446         error = fueword32(&rwlock->rw_flags, &flags);
 3447         if (error == -1)
 3448                 return (EFAULT);
 3449         error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
 3450         if (error != 0)
 3451                 return (error);
 3452 
 3453         error = fueword32(&rwlock->rw_state, &state);
 3454         if (error == -1) {
 3455                 error = EFAULT;
 3456                 goto out;
 3457         }
 3458         if (state & URWLOCK_WRITE_OWNER) {
 3459                 for (;;) {
 3460                         rv = casueword32(&rwlock->rw_state, state,
 3461                             &oldstate, state & ~URWLOCK_WRITE_OWNER);
 3462                         if (rv == -1) {
 3463                                 error = EFAULT;
 3464                                 goto out;
 3465                         }
 3466                         if (rv == 1) {
 3467                                 state = oldstate;
 3468                                 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
 3469                                         error = EPERM;
 3470                                         goto out;
 3471                                 }
 3472                                 error = thread_check_susp(td, true);
 3473                                 if (error != 0)
 3474                                         goto out;
 3475                         } else
 3476                                 break;
 3477                 }
 3478         } else if (URWLOCK_READER_COUNT(state) != 0) {
 3479                 for (;;) {
 3480                         rv = casueword32(&rwlock->rw_state, state,
 3481                             &oldstate, state - 1);
 3482                         if (rv == -1) {
 3483                                 error = EFAULT;
 3484                                 goto out;
 3485                         }
 3486                         if (rv == 1) {
 3487                                 state = oldstate;
 3488                                 if (URWLOCK_READER_COUNT(oldstate) == 0) {
 3489                                         error = EPERM;
 3490                                         goto out;
 3491                                 }
 3492                                 error = thread_check_susp(td, true);
 3493                                 if (error != 0)
 3494                                         goto out;
 3495                         } else
 3496                                 break;
 3497                 }
 3498         } else {
 3499                 error = EPERM;
 3500                 goto out;
 3501         }
 3502 
 3503         count = 0;
 3504 
 3505         if (!(flags & URWLOCK_PREFER_READER)) {
 3506                 if (state & URWLOCK_WRITE_WAITERS) {
 3507                         count = 1;
 3508                         q = UMTX_EXCLUSIVE_QUEUE;
 3509                 } else if (state & URWLOCK_READ_WAITERS) {
 3510                         count = INT_MAX;
 3511                         q = UMTX_SHARED_QUEUE;
 3512                 }
 3513         } else {
 3514                 if (state & URWLOCK_READ_WAITERS) {
 3515                         count = INT_MAX;
 3516                         q = UMTX_SHARED_QUEUE;
 3517                 } else if (state & URWLOCK_WRITE_WAITERS) {
 3518                         count = 1;
 3519                         q = UMTX_EXCLUSIVE_QUEUE;
 3520                 }
 3521         }
 3522 
 3523         if (count) {
 3524                 umtxq_lock(&uq->uq_key);
 3525                 umtxq_busy(&uq->uq_key);
 3526                 umtxq_signal_queue(&uq->uq_key, count, q);
 3527                 umtxq_unbusy(&uq->uq_key);
 3528                 umtxq_unlock(&uq->uq_key);
 3529         }
 3530 out:
 3531         umtx_key_release(&uq->uq_key);
 3532         return (error);
 3533 }
 3534 
 3535 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
 3536 static int
 3537 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
 3538 {
 3539         struct umtx_abs_timeout timo;
 3540         struct umtx_q *uq;
 3541         uint32_t flags, count, count1;
 3542         int error, rv, rv1;
 3543 
 3544         uq = td->td_umtxq;
 3545         error = fueword32(&sem->_flags, &flags);
 3546         if (error == -1)
 3547                 return (EFAULT);
 3548         error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
 3549         if (error != 0)
 3550                 return (error);
 3551 
 3552         if (timeout != NULL)
 3553                 umtx_abs_timeout_init2(&timo, timeout);
 3554 
 3555 again:
 3556         umtxq_lock(&uq->uq_key);
 3557         umtxq_busy(&uq->uq_key);
 3558         umtxq_insert(uq);
 3559         umtxq_unlock(&uq->uq_key);
 3560         rv = casueword32(&sem->_has_waiters, 0, &count1, 1);
 3561         if (rv != -1)
 3562                 rv1 = fueword32(&sem->_count, &count);
 3563         if (rv == -1 || rv1 == -1 || count != 0 || (rv == 1 && count1 == 0)) {
 3564                 if (rv == 0)
 3565                         suword32(&sem->_has_waiters, 0);
 3566                 umtxq_lock(&uq->uq_key);
 3567                 umtxq_unbusy(&uq->uq_key);
 3568                 umtxq_remove(uq);
 3569                 umtxq_unlock(&uq->uq_key);
 3570                 if (rv == -1 || rv1 == -1) {
 3571                         error = EFAULT;
 3572                         goto out;
 3573                 }
 3574                 if (count != 0) {
 3575                         error = 0;
 3576                         goto out;
 3577                 }
 3578                 MPASS(rv == 1 && count1 == 0);
 3579                 rv = thread_check_susp(td, true);
 3580                 if (rv == 0)
 3581                         goto again;
 3582                 error = rv;
 3583                 goto out;
 3584         }
 3585         umtxq_lock(&uq->uq_key);
 3586         umtxq_unbusy(&uq->uq_key);
 3587 
 3588         error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
 3589 
 3590         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 3591                 error = 0;
 3592         else {
 3593                 umtxq_remove(uq);
 3594                 /* A relative timeout cannot be restarted. */
 3595                 if (error == ERESTART && timeout != NULL &&
 3596                     (timeout->_flags & UMTX_ABSTIME) == 0)
 3597                         error = EINTR;
 3598         }
 3599         umtxq_unlock(&uq->uq_key);
 3600 out:
 3601         umtx_key_release(&uq->uq_key);
 3602         return (error);
 3603 }
 3604 
 3605 /*
 3606  * Signal a userland semaphore.
 3607  */
 3608 static int
 3609 do_sem_wake(struct thread *td, struct _usem *sem)
 3610 {
 3611         struct umtx_key key;
 3612         int error, cnt;
 3613         uint32_t flags;
 3614 
 3615         error = fueword32(&sem->_flags, &flags);
 3616         if (error == -1)
 3617                 return (EFAULT);
 3618         if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
 3619                 return (error);
 3620         umtxq_lock(&key);
 3621         umtxq_busy(&key);
 3622         cnt = umtxq_count(&key);
 3623         if (cnt > 0) {
 3624                 /*
 3625                  * Check if count is greater than 0, this means the memory is
 3626                  * still being referenced by user code, so we can safely
 3627                  * update _has_waiters flag.
 3628                  */
 3629                 if (cnt == 1) {
 3630                         umtxq_unlock(&key);
 3631                         error = suword32(&sem->_has_waiters, 0);
 3632                         umtxq_lock(&key);
 3633                         if (error == -1)
 3634                                 error = EFAULT;
 3635                 }
 3636                 umtxq_signal(&key, 1);
 3637         }
 3638         umtxq_unbusy(&key);
 3639         umtxq_unlock(&key);
 3640         umtx_key_release(&key);
 3641         return (error);
 3642 }
 3643 #endif
 3644 
 3645 static int
 3646 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout)
 3647 {
 3648         struct umtx_abs_timeout timo;
 3649         struct umtx_q *uq;
 3650         uint32_t count, flags;
 3651         int error, rv;
 3652 
 3653         uq = td->td_umtxq;
 3654         flags = fuword32(&sem->_flags);
 3655         if (timeout != NULL)
 3656                 umtx_abs_timeout_init2(&timo, timeout);
 3657 
 3658 again:
 3659         error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
 3660         if (error != 0)
 3661                 return (error);
 3662         umtxq_lock(&uq->uq_key);
 3663         umtxq_busy(&uq->uq_key);
 3664         umtxq_insert(uq);
 3665         umtxq_unlock(&uq->uq_key);
 3666         rv = fueword32(&sem->_count, &count);
 3667         if (rv == -1) {
 3668                 umtxq_lock(&uq->uq_key);
 3669                 umtxq_unbusy(&uq->uq_key);
 3670                 umtxq_remove(uq);
 3671                 umtxq_unlock(&uq->uq_key);
 3672                 umtx_key_release(&uq->uq_key);
 3673                 return (EFAULT);
 3674         }
 3675         for (;;) {
 3676                 if (USEM_COUNT(count) != 0) {
 3677                         umtxq_lock(&uq->uq_key);
 3678                         umtxq_unbusy(&uq->uq_key);
 3679                         umtxq_remove(uq);
 3680                         umtxq_unlock(&uq->uq_key);
 3681                         umtx_key_release(&uq->uq_key);
 3682                         return (0);
 3683                 }
 3684                 if (count == USEM_HAS_WAITERS)
 3685                         break;
 3686                 rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS);
 3687                 if (rv == 0)
 3688                         break;
 3689                 umtxq_lock(&uq->uq_key);
 3690                 umtxq_unbusy(&uq->uq_key);
 3691                 umtxq_remove(uq);
 3692                 umtxq_unlock(&uq->uq_key);
 3693                 umtx_key_release(&uq->uq_key);
 3694                 if (rv == -1)
 3695                         return (EFAULT);
 3696                 rv = thread_check_susp(td, true);
 3697                 if (rv != 0)
 3698                         return (rv);
 3699                 goto again;
 3700         }
 3701         umtxq_lock(&uq->uq_key);
 3702         umtxq_unbusy(&uq->uq_key);
 3703 
 3704         error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
 3705 
 3706         if ((uq->uq_flags & UQF_UMTXQ) == 0)
 3707                 error = 0;
 3708         else {
 3709                 umtxq_remove(uq);
 3710                 if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) {
 3711                         /* A relative timeout cannot be restarted. */
 3712                         if (error == ERESTART)
 3713                                 error = EINTR;
 3714                         if (error == EINTR) {
 3715                                 kern_clock_gettime(curthread, timo.clockid,
 3716                                     &timo.cur);
 3717                                 timespecsub(&timo.end, &timo.cur,
 3718                                     &timeout->_timeout);
 3719                         }
 3720                 }
 3721         }
 3722         umtxq_unlock(&uq->uq_key);
 3723         umtx_key_release(&uq->uq_key);
 3724         return (error);
 3725 }
 3726 
 3727 /*
 3728  * Signal a userland semaphore.
 3729  */
 3730 static int
 3731 do_sem2_wake(struct thread *td, struct _usem2 *sem)
 3732 {
 3733         struct umtx_key key;
 3734         int error, cnt, rv;
 3735         uint32_t count, flags;
 3736 
 3737         rv = fueword32(&sem->_flags, &flags);
 3738         if (rv == -1)
 3739                 return (EFAULT);
 3740         if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
 3741                 return (error);
 3742         umtxq_lock(&key);
 3743         umtxq_busy(&key);
 3744         cnt = umtxq_count(&key);
 3745         if (cnt > 0) {
 3746                 /*
 3747                  * If this was the last sleeping thread, clear the waiters
 3748                  * flag in _count.
 3749                  */
 3750                 if (cnt == 1) {
 3751                         umtxq_unlock(&key);
 3752                         rv = fueword32(&sem->_count, &count);
 3753                         while (rv != -1 && count & USEM_HAS_WAITERS) {
 3754                                 rv = casueword32(&sem->_count, count, &count,
 3755                                     count & ~USEM_HAS_WAITERS);
 3756                                 if (rv == 1) {
 3757                                         rv = thread_check_susp(td, true);
 3758                                         if (rv != 0)
 3759                                                 break;
 3760                                 }
 3761                         }
 3762                         if (rv == -1)
 3763                                 error = EFAULT;
 3764                         else if (rv > 0) {
 3765                                 error = rv;
 3766                         }
 3767                         umtxq_lock(&key);
 3768                 }
 3769 
 3770                 umtxq_signal(&key, 1);
 3771         }
 3772         umtxq_unbusy(&key);
 3773         umtxq_unlock(&key);
 3774         umtx_key_release(&key);
 3775         return (error);
 3776 }
 3777 
 3778 #ifdef COMPAT_FREEBSD10
 3779 int
 3780 freebsd10__umtx_lock(struct thread *td, struct freebsd10__umtx_lock_args *uap)
 3781 {
 3782         return (do_lock_umtx(td, uap->umtx, td->td_tid, 0));
 3783 }
 3784 
 3785 int
 3786 freebsd10__umtx_unlock(struct thread *td,
 3787     struct freebsd10__umtx_unlock_args *uap)
 3788 {
 3789         return (do_unlock_umtx(td, uap->umtx, td->td_tid));
 3790 }
 3791 #endif
 3792 
 3793 inline int
 3794 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp)
 3795 {
 3796         int error;
 3797 
 3798         error = copyin(uaddr, tsp, sizeof(*tsp));
 3799         if (error == 0) {
 3800                 if (!timespecvalid_interval(tsp))
 3801                         error = EINVAL;
 3802         }
 3803         return (error);
 3804 }
 3805 
 3806 static inline int
 3807 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp)
 3808 {
 3809         int error;
 3810 
 3811         if (size <= sizeof(tp->_timeout)) {
 3812                 tp->_clockid = CLOCK_REALTIME;
 3813                 tp->_flags = 0;
 3814                 error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout));
 3815         } else
 3816                 error = copyin(uaddr, tp, sizeof(*tp));
 3817         if (error != 0)
 3818                 return (error);
 3819         if (!timespecvalid_interval(&tp->_timeout))
 3820                 return (EINVAL);
 3821         return (0);
 3822 }
 3823 
 3824 static int
 3825 umtx_copyin_robust_lists(const void *uaddr, size_t size,
 3826     struct umtx_robust_lists_params *rb)
 3827 {
 3828 
 3829         if (size > sizeof(*rb))
 3830                 return (EINVAL);
 3831         return (copyin(uaddr, rb, size));
 3832 }
 3833 
 3834 static int
 3835 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp)
 3836 {
 3837 
 3838         /*
 3839          * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
 3840          * and we're only called if sz >= sizeof(timespec) as supplied in the
 3841          * copyops.
 3842          */
 3843         KASSERT(sz >= sizeof(*tsp),
 3844             ("umtx_copyops specifies incorrect sizes"));
 3845 
 3846         return (copyout(tsp, uaddr, sizeof(*tsp)));
 3847 }
 3848 
 3849 #ifdef COMPAT_FREEBSD10
 3850 static int
 3851 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap,
 3852     const struct umtx_copyops *ops)
 3853 {
 3854         struct timespec *ts, timeout;
 3855         int error;
 3856 
 3857         /* Allow a null timespec (wait forever). */
 3858         if (uap->uaddr2 == NULL)
 3859                 ts = NULL;
 3860         else {
 3861                 error = ops->copyin_timeout(uap->uaddr2, &timeout);
 3862                 if (error != 0)
 3863                         return (error);
 3864                 ts = &timeout;
 3865         }
 3866 #ifdef COMPAT_FREEBSD32
 3867         if (ops->compat32)
 3868                 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
 3869 #endif
 3870         return (do_lock_umtx(td, uap->obj, uap->val, ts));
 3871 }
 3872 
 3873 static int
 3874 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap,
 3875     const struct umtx_copyops *ops)
 3876 {
 3877 #ifdef COMPAT_FREEBSD32
 3878         if (ops->compat32)
 3879                 return (do_unlock_umtx32(td, uap->obj, uap->val));
 3880 #endif
 3881         return (do_unlock_umtx(td, uap->obj, uap->val));
 3882 }
 3883 #endif  /* COMPAT_FREEBSD10 */
 3884 
 3885 #if !defined(COMPAT_FREEBSD10)
 3886 static int
 3887 __umtx_op_unimpl(struct thread *td __unused, struct _umtx_op_args *uap __unused,
 3888     const struct umtx_copyops *ops __unused)
 3889 {
 3890         return (EOPNOTSUPP);
 3891 }
 3892 #endif  /* COMPAT_FREEBSD10 */
 3893 
 3894 static int
 3895 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap,
 3896     const struct umtx_copyops *ops)
 3897 {
 3898         struct _umtx_time timeout, *tm_p;
 3899         int error;
 3900 
 3901         if (uap->uaddr2 == NULL)
 3902                 tm_p = NULL;
 3903         else {
 3904                 error = ops->copyin_umtx_time(
 3905                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3906                 if (error != 0)
 3907                         return (error);
 3908                 tm_p = &timeout;
 3909         }
 3910         return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0));
 3911 }
 3912 
 3913 static int
 3914 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap,
 3915     const struct umtx_copyops *ops)
 3916 {
 3917         struct _umtx_time timeout, *tm_p;
 3918         int error;
 3919 
 3920         if (uap->uaddr2 == NULL)
 3921                 tm_p = NULL;
 3922         else {
 3923                 error = ops->copyin_umtx_time(
 3924                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3925                 if (error != 0)
 3926                         return (error);
 3927                 tm_p = &timeout;
 3928         }
 3929         return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0));
 3930 }
 3931 
 3932 static int
 3933 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap,
 3934     const struct umtx_copyops *ops)
 3935 {
 3936         struct _umtx_time *tm_p, timeout;
 3937         int error;
 3938 
 3939         if (uap->uaddr2 == NULL)
 3940                 tm_p = NULL;
 3941         else {
 3942                 error = ops->copyin_umtx_time(
 3943                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 3944                 if (error != 0)
 3945                         return (error);
 3946                 tm_p = &timeout;
 3947         }
 3948         return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1));
 3949 }
 3950 
 3951 static int
 3952 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap,
 3953     const struct umtx_copyops *ops __unused)
 3954 {
 3955 
 3956         return (kern_umtx_wake(td, uap->obj, uap->val, 0));
 3957 }
 3958 
 3959 #define BATCH_SIZE      128
 3960 static int
 3961 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap)
 3962 {
 3963         char *uaddrs[BATCH_SIZE], **upp;
 3964         int count, error, i, pos, tocopy;
 3965 
 3966         upp = (char **)uap->obj;
 3967         error = 0;
 3968         for (count = uap->val, pos = 0; count > 0; count -= tocopy,
 3969             pos += tocopy) {
 3970                 tocopy = MIN(count, BATCH_SIZE);
 3971                 error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *));
 3972                 if (error != 0)
 3973                         break;
 3974                 for (i = 0; i < tocopy; ++i) {
 3975                         kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
 3976                 }
 3977                 maybe_yield();
 3978         }
 3979         return (error);
 3980 }
 3981 
 3982 static int
 3983 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap)
 3984 {
 3985         uint32_t uaddrs[BATCH_SIZE], *upp;
 3986         int count, error, i, pos, tocopy;
 3987 
 3988         upp = (uint32_t *)uap->obj;
 3989         error = 0;
 3990         for (count = uap->val, pos = 0; count > 0; count -= tocopy,
 3991             pos += tocopy) {
 3992                 tocopy = MIN(count, BATCH_SIZE);
 3993                 error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t));
 3994                 if (error != 0)
 3995                         break;
 3996                 for (i = 0; i < tocopy; ++i) {
 3997                         kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i],
 3998                             INT_MAX, 1);
 3999                 }
 4000                 maybe_yield();
 4001         }
 4002         return (error);
 4003 }
 4004 
 4005 static int
 4006 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap,
 4007     const struct umtx_copyops *ops)
 4008 {
 4009 
 4010         if (ops->compat32)
 4011                 return (__umtx_op_nwake_private_compat32(td, uap));
 4012         return (__umtx_op_nwake_private_native(td, uap));
 4013 }
 4014 
 4015 static int
 4016 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap,
 4017     const struct umtx_copyops *ops __unused)
 4018 {
 4019 
 4020         return (kern_umtx_wake(td, uap->obj, uap->val, 1));
 4021 }
 4022 
 4023 static int
 4024 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap,
 4025    const struct umtx_copyops *ops)
 4026 {
 4027         struct _umtx_time *tm_p, timeout;
 4028         int error;
 4029 
 4030         /* Allow a null timespec (wait forever). */
 4031         if (uap->uaddr2 == NULL)
 4032                 tm_p = NULL;
 4033         else {
 4034                 error = ops->copyin_umtx_time(
 4035                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 4036                 if (error != 0)
 4037                         return (error);
 4038                 tm_p = &timeout;
 4039         }
 4040         return (do_lock_umutex(td, uap->obj, tm_p, 0));
 4041 }
 4042 
 4043 static int
 4044 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap,
 4045     const struct umtx_copyops *ops __unused)
 4046 {
 4047 
 4048         return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY));
 4049 }
 4050 
 4051 static int
 4052 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap,
 4053     const struct umtx_copyops *ops)
 4054 {
 4055         struct _umtx_time *tm_p, timeout;
 4056         int error;
 4057 
 4058         /* Allow a null timespec (wait forever). */
 4059         if (uap->uaddr2 == NULL)
 4060                 tm_p = NULL;
 4061         else {
 4062                 error = ops->copyin_umtx_time(
 4063                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 4064                 if (error != 0)
 4065                         return (error);
 4066                 tm_p = &timeout;
 4067         }
 4068         return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT));
 4069 }
 4070 
 4071 static int
 4072 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap,
 4073     const struct umtx_copyops *ops __unused)
 4074 {
 4075 
 4076         return (do_wake_umutex(td, uap->obj));
 4077 }
 4078 
 4079 static int
 4080 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap,
 4081     const struct umtx_copyops *ops __unused)
 4082 {
 4083 
 4084         return (do_unlock_umutex(td, uap->obj, false));
 4085 }
 4086 
 4087 static int
 4088 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap,
 4089     const struct umtx_copyops *ops __unused)
 4090 {
 4091 
 4092         return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1));
 4093 }
 4094 
 4095 static int
 4096 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap,
 4097     const struct umtx_copyops *ops)
 4098 {
 4099         struct timespec *ts, timeout;
 4100         int error;
 4101 
 4102         /* Allow a null timespec (wait forever). */
 4103         if (uap->uaddr2 == NULL)
 4104                 ts = NULL;
 4105         else {
 4106                 error = ops->copyin_timeout(uap->uaddr2, &timeout);
 4107                 if (error != 0)
 4108                         return (error);
 4109                 ts = &timeout;
 4110         }
 4111         return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
 4112 }
 4113 
 4114 static int
 4115 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap,
 4116     const struct umtx_copyops *ops __unused)
 4117 {
 4118 
 4119         return (do_cv_signal(td, uap->obj));
 4120 }
 4121 
 4122 static int
 4123 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap,
 4124     const struct umtx_copyops *ops __unused)
 4125 {
 4126 
 4127         return (do_cv_broadcast(td, uap->obj));
 4128 }
 4129 
 4130 static int
 4131 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap,
 4132     const struct umtx_copyops *ops)
 4133 {
 4134         struct _umtx_time timeout;
 4135         int error;
 4136 
 4137         /* Allow a null timespec (wait forever). */
 4138         if (uap->uaddr2 == NULL) {
 4139                 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
 4140         } else {
 4141                 error = ops->copyin_umtx_time(uap->uaddr2,
 4142                    (size_t)uap->uaddr1, &timeout);
 4143                 if (error != 0)
 4144                         return (error);
 4145                 error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
 4146         }
 4147         return (error);
 4148 }
 4149 
 4150 static int
 4151 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap,
 4152     const struct umtx_copyops *ops)
 4153 {
 4154         struct _umtx_time timeout;
 4155         int error;
 4156 
 4157         /* Allow a null timespec (wait forever). */
 4158         if (uap->uaddr2 == NULL) {
 4159                 error = do_rw_wrlock(td, uap->obj, 0);
 4160         } else {
 4161                 error = ops->copyin_umtx_time(uap->uaddr2,
 4162                    (size_t)uap->uaddr1, &timeout);
 4163                 if (error != 0)
 4164                         return (error);
 4165 
 4166                 error = do_rw_wrlock(td, uap->obj, &timeout);
 4167         }
 4168         return (error);
 4169 }
 4170 
 4171 static int
 4172 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap,
 4173     const struct umtx_copyops *ops __unused)
 4174 {
 4175 
 4176         return (do_rw_unlock(td, uap->obj));
 4177 }
 4178 
 4179 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
 4180 static int
 4181 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap,
 4182     const struct umtx_copyops *ops)
 4183 {
 4184         struct _umtx_time *tm_p, timeout;
 4185         int error;
 4186 
 4187         /* Allow a null timespec (wait forever). */
 4188         if (uap->uaddr2 == NULL)
 4189                 tm_p = NULL;
 4190         else {
 4191                 error = ops->copyin_umtx_time(
 4192                     uap->uaddr2, (size_t)uap->uaddr1, &timeout);
 4193                 if (error != 0)
 4194                         return (error);
 4195                 tm_p = &timeout;
 4196         }
 4197         return (do_sem_wait(td, uap->obj, tm_p));
 4198 }
 4199 
 4200 static int
 4201 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap,
 4202     const struct umtx_copyops *ops __unused)
 4203 {
 4204 
 4205         return (do_sem_wake(td, uap->obj));
 4206 }
 4207 #endif
 4208 
 4209 static int
 4210 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap,
 4211     const struct umtx_copyops *ops __unused)
 4212 {
 4213 
 4214         return (do_wake2_umutex(td, uap->obj, uap->val));
 4215 }
 4216 
 4217 static int
 4218 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap,
 4219     const struct umtx_copyops *ops)
 4220 {
 4221         struct _umtx_time *tm_p, timeout;
 4222         size_t uasize;
 4223         int error;
 4224 
 4225         /* Allow a null timespec (wait forever). */
 4226         if (uap->uaddr2 == NULL) {
 4227                 uasize = 0;
 4228                 tm_p = NULL;
 4229         } else {
 4230                 uasize = (size_t)uap->uaddr1;
 4231                 error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout);
 4232                 if (error != 0)
 4233                         return (error);
 4234                 tm_p = &timeout;
 4235         }
 4236         error = do_sem2_wait(td, uap->obj, tm_p);
 4237         if (error == EINTR && uap->uaddr2 != NULL &&
 4238             (timeout._flags & UMTX_ABSTIME) == 0 &&
 4239             uasize >= ops->umtx_time_sz + ops->timespec_sz) {
 4240                 error = ops->copyout_timeout(
 4241                     (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz),
 4242                     uasize - ops->umtx_time_sz, &timeout._timeout);
 4243                 if (error == 0) {
 4244                         error = EINTR;
 4245                 }
 4246         }
 4247 
 4248         return (error);
 4249 }
 4250 
 4251 static int
 4252 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap,
 4253     const struct umtx_copyops *ops __unused)
 4254 {
 4255 
 4256         return (do_sem2_wake(td, uap->obj));
 4257 }
 4258 
 4259 #define USHM_OBJ_UMTX(o)                                                \
 4260     ((struct umtx_shm_obj_list *)(&(o)->umtx_data))
 4261 
 4262 #define USHMF_REG_LINKED        0x0001
 4263 #define USHMF_OBJ_LINKED        0x0002
 4264 struct umtx_shm_reg {
 4265         TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link;
 4266         LIST_ENTRY(umtx_shm_reg) ushm_obj_link;
 4267         struct umtx_key         ushm_key;
 4268         struct ucred            *ushm_cred;
 4269         struct shmfd            *ushm_obj;
 4270         u_int                   ushm_refcnt;
 4271         u_int                   ushm_flags;
 4272 };
 4273 
 4274 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg);
 4275 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg);
 4276 
 4277 static uma_zone_t umtx_shm_reg_zone;
 4278 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS];
 4279 static struct mtx umtx_shm_lock;
 4280 static struct umtx_shm_reg_head umtx_shm_reg_delfree =
 4281     TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree);
 4282 
 4283 static void umtx_shm_free_reg(struct umtx_shm_reg *reg);
 4284 
 4285 static void
 4286 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused)
 4287 {
 4288         struct umtx_shm_reg_head d;
 4289         struct umtx_shm_reg *reg, *reg1;
 4290 
 4291         TAILQ_INIT(&d);
 4292         mtx_lock(&umtx_shm_lock);
 4293         TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link);
 4294         mtx_unlock(&umtx_shm_lock);
 4295         TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) {
 4296                 TAILQ_REMOVE(&d, reg, ushm_reg_link);
 4297                 umtx_shm_free_reg(reg);
 4298         }
 4299 }
 4300 
 4301 static struct task umtx_shm_reg_delfree_task =
 4302     TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL);
 4303 
 4304 static struct umtx_shm_reg *
 4305 umtx_shm_find_reg_locked(const struct umtx_key *key)
 4306 {
 4307         struct umtx_shm_reg *reg;
 4308         struct umtx_shm_reg_head *reg_head;
 4309 
 4310         KASSERT(key->shared, ("umtx_p_find_rg: private key"));
 4311         mtx_assert(&umtx_shm_lock, MA_OWNED);
 4312         reg_head = &umtx_shm_registry[key->hash];
 4313         TAILQ_FOREACH(reg, reg_head, ushm_reg_link) {
 4314                 KASSERT(reg->ushm_key.shared,
 4315                     ("non-shared key on reg %p %d", reg, reg->ushm_key.shared));
 4316                 if (reg->ushm_key.info.shared.object ==
 4317                     key->info.shared.object &&
 4318                     reg->ushm_key.info.shared.offset ==
 4319                     key->info.shared.offset) {
 4320                         KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM"));
 4321                         KASSERT(reg->ushm_refcnt > 0,
 4322                             ("reg %p refcnt 0 onlist", reg));
 4323                         KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0,
 4324                             ("reg %p not linked", reg));
 4325                         reg->ushm_refcnt++;
 4326                         return (reg);
 4327                 }
 4328         }
 4329         return (NULL);
 4330 }
 4331 
 4332 static struct umtx_shm_reg *
 4333 umtx_shm_find_reg(const struct umtx_key *key)
 4334 {
 4335         struct umtx_shm_reg *reg;
 4336 
 4337         mtx_lock(&umtx_shm_lock);
 4338         reg = umtx_shm_find_reg_locked(key);
 4339         mtx_unlock(&umtx_shm_lock);
 4340         return (reg);
 4341 }
 4342 
 4343 static void
 4344 umtx_shm_free_reg(struct umtx_shm_reg *reg)
 4345 {
 4346 
 4347         chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0);
 4348         crfree(reg->ushm_cred);
 4349         shm_drop(reg->ushm_obj);
 4350         uma_zfree(umtx_shm_reg_zone, reg);
 4351 }
 4352 
 4353 static bool
 4354 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force)
 4355 {
 4356         bool res;
 4357 
 4358         mtx_assert(&umtx_shm_lock, MA_OWNED);
 4359         KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg));
 4360         reg->ushm_refcnt--;
 4361         res = reg->ushm_refcnt == 0;
 4362         if (res || force) {
 4363                 if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) {
 4364                         TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash],
 4365                             reg, ushm_reg_link);
 4366                         reg->ushm_flags &= ~USHMF_REG_LINKED;
 4367                 }
 4368                 if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) {
 4369                         LIST_REMOVE(reg, ushm_obj_link);
 4370                         reg->ushm_flags &= ~USHMF_OBJ_LINKED;
 4371                 }
 4372         }
 4373         return (res);
 4374 }
 4375 
 4376 static void
 4377 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force)
 4378 {
 4379         vm_object_t object;
 4380         bool dofree;
 4381 
 4382         if (force) {
 4383                 object = reg->ushm_obj->shm_object;
 4384                 VM_OBJECT_WLOCK(object);
 4385                 vm_object_set_flag(object, OBJ_UMTXDEAD);
 4386                 VM_OBJECT_WUNLOCK(object);
 4387         }
 4388         mtx_lock(&umtx_shm_lock);
 4389         dofree = umtx_shm_unref_reg_locked(reg, force);
 4390         mtx_unlock(&umtx_shm_lock);
 4391         if (dofree)
 4392                 umtx_shm_free_reg(reg);
 4393 }
 4394 
 4395 void
 4396 umtx_shm_object_init(vm_object_t object)
 4397 {
 4398 
 4399         LIST_INIT(USHM_OBJ_UMTX(object));
 4400 }
 4401 
 4402 void
 4403 umtx_shm_object_terminated(vm_object_t object)
 4404 {
 4405         struct umtx_shm_reg *reg, *reg1;
 4406         bool dofree;
 4407 
 4408         if (LIST_EMPTY(USHM_OBJ_UMTX(object)))
 4409                 return;
 4410 
 4411         dofree = false;
 4412         mtx_lock(&umtx_shm_lock);
 4413         LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) {
 4414                 if (umtx_shm_unref_reg_locked(reg, true)) {
 4415                         TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg,
 4416                             ushm_reg_link);
 4417                         dofree = true;
 4418                 }
 4419         }
 4420         mtx_unlock(&umtx_shm_lock);
 4421         if (dofree)
 4422                 taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task);
 4423 }
 4424 
 4425 static int
 4426 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key,
 4427     struct umtx_shm_reg **res)
 4428 {
 4429         struct umtx_shm_reg *reg, *reg1;
 4430         struct ucred *cred;
 4431         int error;
 4432 
 4433         reg = umtx_shm_find_reg(key);
 4434         if (reg != NULL) {
 4435                 *res = reg;
 4436                 return (0);
 4437         }
 4438         cred = td->td_ucred;
 4439         if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP)))
 4440                 return (ENOMEM);
 4441         reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO);
 4442         reg->ushm_refcnt = 1;
 4443         bcopy(key, &reg->ushm_key, sizeof(*key));
 4444         reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false);
 4445         reg->ushm_cred = crhold(cred);
 4446         error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE);
 4447         if (error != 0) {
 4448                 umtx_shm_free_reg(reg);
 4449                 return (error);
 4450         }
 4451         mtx_lock(&umtx_shm_lock);
 4452         reg1 = umtx_shm_find_reg_locked(key);
 4453         if (reg1 != NULL) {
 4454                 mtx_unlock(&umtx_shm_lock);
 4455                 umtx_shm_free_reg(reg);
 4456                 *res = reg1;
 4457                 return (0);
 4458         }
 4459         reg->ushm_refcnt++;
 4460         TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link);
 4461         LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg,
 4462             ushm_obj_link);
 4463         reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED;
 4464         mtx_unlock(&umtx_shm_lock);
 4465         *res = reg;
 4466         return (0);
 4467 }
 4468 
 4469 static int
 4470 umtx_shm_alive(struct thread *td, void *addr)
 4471 {
 4472         vm_map_t map;
 4473         vm_map_entry_t entry;
 4474         vm_object_t object;
 4475         vm_pindex_t pindex;
 4476         vm_prot_t prot;
 4477         int res, ret;
 4478         boolean_t wired;
 4479 
 4480         map = &td->td_proc->p_vmspace->vm_map;
 4481         res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry,
 4482             &object, &pindex, &prot, &wired);
 4483         if (res != KERN_SUCCESS)
 4484                 return (EFAULT);
 4485         if (object == NULL)
 4486                 ret = EINVAL;
 4487         else
 4488                 ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0;
 4489         vm_map_lookup_done(map, entry);
 4490         return (ret);
 4491 }
 4492 
 4493 static void
 4494 umtx_shm_init(void)
 4495 {
 4496         int i;
 4497 
 4498         umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg),
 4499             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 4500         mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF);
 4501         for (i = 0; i < nitems(umtx_shm_registry); i++)
 4502                 TAILQ_INIT(&umtx_shm_registry[i]);
 4503 }
 4504 
 4505 static int
 4506 umtx_shm(struct thread *td, void *addr, u_int flags)
 4507 {
 4508         struct umtx_key key;
 4509         struct umtx_shm_reg *reg;
 4510         struct file *fp;
 4511         int error, fd;
 4512 
 4513         if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP |
 4514             UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1)
 4515                 return (EINVAL);
 4516         if ((flags & UMTX_SHM_ALIVE) != 0)
 4517                 return (umtx_shm_alive(td, addr));
 4518         error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key);
 4519         if (error != 0)
 4520                 return (error);
 4521         KASSERT(key.shared == 1, ("non-shared key"));
 4522         if ((flags & UMTX_SHM_CREAT) != 0) {
 4523                 error = umtx_shm_create_reg(td, &key, &reg);
 4524         } else {
 4525                 reg = umtx_shm_find_reg(&key);
 4526                 if (reg == NULL)
 4527                         error = ESRCH;
 4528         }
 4529         umtx_key_release(&key);
 4530         if (error != 0)
 4531                 return (error);
 4532         KASSERT(reg != NULL, ("no reg"));
 4533         if ((flags & UMTX_SHM_DESTROY) != 0) {
 4534                 umtx_shm_unref_reg(reg, true);
 4535         } else {
 4536 #if 0
 4537 #ifdef MAC
 4538                 error = mac_posixshm_check_open(td->td_ucred,
 4539                     reg->ushm_obj, FFLAGS(O_RDWR));
 4540                 if (error == 0)
 4541 #endif
 4542                         error = shm_access(reg->ushm_obj, td->td_ucred,
 4543                             FFLAGS(O_RDWR));
 4544                 if (error == 0)
 4545 #endif
 4546                         error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL);
 4547                 if (error == 0) {
 4548                         shm_hold(reg->ushm_obj);
 4549                         finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj,
 4550                             &shm_ops);
 4551                         td->td_retval[0] = fd;
 4552                         fdrop(fp, td);
 4553                 }
 4554         }
 4555         umtx_shm_unref_reg(reg, false);
 4556         return (error);
 4557 }
 4558 
 4559 static int
 4560 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap,
 4561     const struct umtx_copyops *ops __unused)
 4562 {
 4563 
 4564         return (umtx_shm(td, uap->uaddr1, uap->val));
 4565 }
 4566 
 4567 static int
 4568 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap,
 4569     const struct umtx_copyops *ops)
 4570 {
 4571         struct umtx_robust_lists_params rb;
 4572         int error;
 4573 
 4574         if (ops->compat32) {
 4575                 if ((td->td_pflags2 & TDP2_COMPAT32RB) == 0 &&
 4576                     (td->td_rb_list != 0 || td->td_rbp_list != 0 ||
 4577                     td->td_rb_inact != 0))
 4578                         return (EBUSY);
 4579         } else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) {
 4580                 return (EBUSY);
 4581         }
 4582 
 4583         bzero(&rb, sizeof(rb));
 4584         error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb);
 4585         if (error != 0)
 4586                 return (error);
 4587 
 4588         if (ops->compat32)
 4589                 td->td_pflags2 |= TDP2_COMPAT32RB;
 4590 
 4591         td->td_rb_list = rb.robust_list_offset;
 4592         td->td_rbp_list = rb.robust_priv_list_offset;
 4593         td->td_rb_inact = rb.robust_inact_offset;
 4594         return (0);
 4595 }
 4596 
 4597 #if defined(__i386__) || defined(__amd64__)
 4598 /*
 4599  * Provide the standard 32-bit definitions for x86, since native/compat32 use a
 4600  * 32-bit time_t there.  Other architectures just need the i386 definitions
 4601  * along with their standard compat32.
 4602  */
 4603 struct timespecx32 {
 4604         int64_t                 tv_sec;
 4605         int32_t                 tv_nsec;
 4606 };
 4607 
 4608 struct umtx_timex32 {
 4609         struct  timespecx32     _timeout;
 4610         uint32_t                _flags;
 4611         uint32_t                _clockid;
 4612 };
 4613 
 4614 #ifndef __i386__
 4615 #define timespeci386    timespec32
 4616 #define umtx_timei386   umtx_time32
 4617 #endif
 4618 #else /* !__i386__ && !__amd64__ */
 4619 /* 32-bit architectures can emulate i386, so define these almost everywhere. */
 4620 struct timespeci386 {
 4621         int32_t                 tv_sec;
 4622         int32_t                 tv_nsec;
 4623 };
 4624 
 4625 struct umtx_timei386 {
 4626         struct  timespeci386    _timeout;
 4627         uint32_t                _flags;
 4628         uint32_t                _clockid;
 4629 };
 4630 
 4631 #if defined(__LP64__)
 4632 #define timespecx32     timespec32
 4633 #define umtx_timex32    umtx_time32
 4634 #endif
 4635 #endif
 4636 
 4637 static int
 4638 umtx_copyin_robust_lists32(const void *uaddr, size_t size,
 4639     struct umtx_robust_lists_params *rbp)
 4640 {
 4641         struct umtx_robust_lists_params_compat32 rb32;
 4642         int error;
 4643 
 4644         if (size > sizeof(rb32))
 4645                 return (EINVAL);
 4646         bzero(&rb32, sizeof(rb32));
 4647         error = copyin(uaddr, &rb32, size);
 4648         if (error != 0)
 4649                 return (error);
 4650         CP(rb32, *rbp, robust_list_offset);
 4651         CP(rb32, *rbp, robust_priv_list_offset);
 4652         CP(rb32, *rbp, robust_inact_offset);
 4653         return (0);
 4654 }
 4655 
 4656 #ifndef __i386__
 4657 static inline int
 4658 umtx_copyin_timeouti386(const void *uaddr, struct timespec *tsp)
 4659 {
 4660         struct timespeci386 ts32;
 4661         int error;
 4662 
 4663         error = copyin(uaddr, &ts32, sizeof(ts32));
 4664         if (error == 0) {
 4665                 if (!timespecvalid_interval(&ts32))
 4666                         error = EINVAL;
 4667                 else {
 4668                         CP(ts32, *tsp, tv_sec);
 4669                         CP(ts32, *tsp, tv_nsec);
 4670                 }
 4671         }
 4672         return (error);
 4673 }
 4674 
 4675 static inline int
 4676 umtx_copyin_umtx_timei386(const void *uaddr, size_t size, struct _umtx_time *tp)
 4677 {
 4678         struct umtx_timei386 t32;
 4679         int error;
 4680 
 4681         t32._clockid = CLOCK_REALTIME;
 4682         t32._flags   = 0;
 4683         if (size <= sizeof(t32._timeout))
 4684                 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout));
 4685         else
 4686                 error = copyin(uaddr, &t32, sizeof(t32));
 4687         if (error != 0)
 4688                 return (error);
 4689         if (!timespecvalid_interval(&t32._timeout))
 4690                 return (EINVAL);
 4691         TS_CP(t32, *tp, _timeout);
 4692         CP(t32, *tp, _flags);
 4693         CP(t32, *tp, _clockid);
 4694         return (0);
 4695 }
 4696 
 4697 static int
 4698 umtx_copyout_timeouti386(void *uaddr, size_t sz, struct timespec *tsp)
 4699 {
 4700         struct timespeci386 remain32 = {
 4701                 .tv_sec = tsp->tv_sec,
 4702                 .tv_nsec = tsp->tv_nsec,
 4703         };
 4704 
 4705         /*
 4706          * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
 4707          * and we're only called if sz >= sizeof(timespec) as supplied in the
 4708          * copyops.
 4709          */
 4710         KASSERT(sz >= sizeof(remain32),
 4711             ("umtx_copyops specifies incorrect sizes"));
 4712 
 4713         return (copyout(&remain32, uaddr, sizeof(remain32)));
 4714 }
 4715 #endif /* !__i386__ */
 4716 
 4717 #if defined(__i386__) || defined(__LP64__)
 4718 static inline int
 4719 umtx_copyin_timeoutx32(const void *uaddr, struct timespec *tsp)
 4720 {
 4721         struct timespecx32 ts32;
 4722         int error;
 4723 
 4724         error = copyin(uaddr, &ts32, sizeof(ts32));
 4725         if (error == 0) {
 4726                 if (!timespecvalid_interval(&ts32))
 4727                         error = EINVAL;
 4728                 else {
 4729                         CP(ts32, *tsp, tv_sec);
 4730                         CP(ts32, *tsp, tv_nsec);
 4731                 }
 4732         }
 4733         return (error);
 4734 }
 4735 
 4736 static inline int
 4737 umtx_copyin_umtx_timex32(const void *uaddr, size_t size, struct _umtx_time *tp)
 4738 {
 4739         struct umtx_timex32 t32;
 4740         int error;
 4741 
 4742         t32._clockid = CLOCK_REALTIME;
 4743         t32._flags   = 0;
 4744         if (size <= sizeof(t32._timeout))
 4745                 error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout));
 4746         else
 4747                 error = copyin(uaddr, &t32, sizeof(t32));
 4748         if (error != 0)
 4749                 return (error);
 4750         if (!timespecvalid_interval(&t32._timeout))
 4751                 return (EINVAL);
 4752         TS_CP(t32, *tp, _timeout);
 4753         CP(t32, *tp, _flags);
 4754         CP(t32, *tp, _clockid);
 4755         return (0);
 4756 }
 4757 
 4758 static int
 4759 umtx_copyout_timeoutx32(void *uaddr, size_t sz, struct timespec *tsp)
 4760 {
 4761         struct timespecx32 remain32 = {
 4762                 .tv_sec = tsp->tv_sec,
 4763                 .tv_nsec = tsp->tv_nsec,
 4764         };
 4765 
 4766         /*
 4767          * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
 4768          * and we're only called if sz >= sizeof(timespec) as supplied in the
 4769          * copyops.
 4770          */
 4771         KASSERT(sz >= sizeof(remain32),
 4772             ("umtx_copyops specifies incorrect sizes"));
 4773 
 4774         return (copyout(&remain32, uaddr, sizeof(remain32)));
 4775 }
 4776 #endif /* __i386__ || __LP64__ */
 4777 
 4778 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap,
 4779     const struct umtx_copyops *umtx_ops);
 4780 
 4781 static const _umtx_op_func op_table[] = {
 4782 #ifdef COMPAT_FREEBSD10
 4783         [UMTX_OP_LOCK]          = __umtx_op_lock_umtx,
 4784         [UMTX_OP_UNLOCK]        = __umtx_op_unlock_umtx,
 4785 #else
 4786         [UMTX_OP_LOCK]          = __umtx_op_unimpl,
 4787         [UMTX_OP_UNLOCK]        = __umtx_op_unimpl,
 4788 #endif
 4789         [UMTX_OP_WAIT]          = __umtx_op_wait,
 4790         [UMTX_OP_WAKE]          = __umtx_op_wake,
 4791         [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex,
 4792         [UMTX_OP_MUTEX_LOCK]    = __umtx_op_lock_umutex,
 4793         [UMTX_OP_MUTEX_UNLOCK]  = __umtx_op_unlock_umutex,
 4794         [UMTX_OP_SET_CEILING]   = __umtx_op_set_ceiling,
 4795         [UMTX_OP_CV_WAIT]       = __umtx_op_cv_wait,
 4796         [UMTX_OP_CV_SIGNAL]     = __umtx_op_cv_signal,
 4797         [UMTX_OP_CV_BROADCAST]  = __umtx_op_cv_broadcast,
 4798         [UMTX_OP_WAIT_UINT]     = __umtx_op_wait_uint,
 4799         [UMTX_OP_RW_RDLOCK]     = __umtx_op_rw_rdlock,
 4800         [UMTX_OP_RW_WRLOCK]     = __umtx_op_rw_wrlock,
 4801         [UMTX_OP_RW_UNLOCK]     = __umtx_op_rw_unlock,
 4802         [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private,
 4803         [UMTX_OP_WAKE_PRIVATE]  = __umtx_op_wake_private,
 4804         [UMTX_OP_MUTEX_WAIT]    = __umtx_op_wait_umutex,
 4805         [UMTX_OP_MUTEX_WAKE]    = __umtx_op_wake_umutex,
 4806 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
 4807         [UMTX_OP_SEM_WAIT]      = __umtx_op_sem_wait,
 4808         [UMTX_OP_SEM_WAKE]      = __umtx_op_sem_wake,
 4809 #else
 4810         [UMTX_OP_SEM_WAIT]      = __umtx_op_unimpl,
 4811         [UMTX_OP_SEM_WAKE]      = __umtx_op_unimpl,
 4812 #endif
 4813         [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private,
 4814         [UMTX_OP_MUTEX_WAKE2]   = __umtx_op_wake2_umutex,
 4815         [UMTX_OP_SEM2_WAIT]     = __umtx_op_sem2_wait,
 4816         [UMTX_OP_SEM2_WAKE]     = __umtx_op_sem2_wake,
 4817         [UMTX_OP_SHM]           = __umtx_op_shm,
 4818         [UMTX_OP_ROBUST_LISTS]  = __umtx_op_robust_lists,
 4819 };
 4820 
 4821 static const struct umtx_copyops umtx_native_ops = {
 4822         .copyin_timeout = umtx_copyin_timeout,
 4823         .copyin_umtx_time = umtx_copyin_umtx_time,
 4824         .copyin_robust_lists = umtx_copyin_robust_lists,
 4825         .copyout_timeout = umtx_copyout_timeout,
 4826         .timespec_sz = sizeof(struct timespec),
 4827         .umtx_time_sz = sizeof(struct _umtx_time),
 4828 };
 4829 
 4830 #ifndef __i386__
 4831 static const struct umtx_copyops umtx_native_opsi386 = {
 4832         .copyin_timeout = umtx_copyin_timeouti386,
 4833         .copyin_umtx_time = umtx_copyin_umtx_timei386,
 4834         .copyin_robust_lists = umtx_copyin_robust_lists32,
 4835         .copyout_timeout = umtx_copyout_timeouti386,
 4836         .timespec_sz = sizeof(struct timespeci386),
 4837         .umtx_time_sz = sizeof(struct umtx_timei386),
 4838         .compat32 = true,
 4839 };
 4840 #endif
 4841 
 4842 #if defined(__i386__) || defined(__LP64__)
 4843 /* i386 can emulate other 32-bit archs, too! */
 4844 static const struct umtx_copyops umtx_native_opsx32 = {
 4845         .copyin_timeout = umtx_copyin_timeoutx32,
 4846         .copyin_umtx_time = umtx_copyin_umtx_timex32,
 4847         .copyin_robust_lists = umtx_copyin_robust_lists32,
 4848         .copyout_timeout = umtx_copyout_timeoutx32,
 4849         .timespec_sz = sizeof(struct timespecx32),
 4850         .umtx_time_sz = sizeof(struct umtx_timex32),
 4851         .compat32 = true,
 4852 };
 4853 
 4854 #ifdef COMPAT_FREEBSD32
 4855 #ifdef __amd64__
 4856 #define umtx_native_ops32       umtx_native_opsi386
 4857 #else
 4858 #define umtx_native_ops32       umtx_native_opsx32
 4859 #endif
 4860 #endif /* COMPAT_FREEBSD32 */
 4861 #endif /* __i386__ || __LP64__ */
 4862 
 4863 #define UMTX_OP__FLAGS  (UMTX_OP__32BIT | UMTX_OP__I386)
 4864 
 4865 static int
 4866 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val,
 4867     void *uaddr1, void *uaddr2, const struct umtx_copyops *ops)
 4868 {
 4869         struct _umtx_op_args uap = {
 4870                 .obj = obj,
 4871                 .op = op & ~UMTX_OP__FLAGS,
 4872                 .val = val,
 4873                 .uaddr1 = uaddr1,
 4874                 .uaddr2 = uaddr2
 4875         };
 4876 
 4877         if ((uap.op >= nitems(op_table)))
 4878                 return (EINVAL);
 4879         return ((*op_table[uap.op])(td, &uap, ops));
 4880 }
 4881 
 4882 int
 4883 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
 4884 {
 4885         static const struct umtx_copyops *umtx_ops;
 4886 
 4887         umtx_ops = &umtx_native_ops;
 4888 #ifdef __LP64__
 4889         if ((uap->op & (UMTX_OP__32BIT | UMTX_OP__I386)) != 0) {
 4890                 if ((uap->op & UMTX_OP__I386) != 0)
 4891                         umtx_ops = &umtx_native_opsi386;
 4892                 else
 4893                         umtx_ops = &umtx_native_opsx32;
 4894         }
 4895 #elif !defined(__i386__)
 4896         /* We consider UMTX_OP__32BIT a nop on !i386 ILP32. */
 4897         if ((uap->op & UMTX_OP__I386) != 0)
 4898                 umtx_ops = &umtx_native_opsi386;
 4899 #else
 4900         /* Likewise, UMTX_OP__I386 is a nop on i386. */
 4901         if ((uap->op & UMTX_OP__32BIT) != 0)
 4902                 umtx_ops = &umtx_native_opsx32;
 4903 #endif
 4904         return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1,
 4905             uap->uaddr2, umtx_ops));
 4906 }
 4907 
 4908 #ifdef COMPAT_FREEBSD32
 4909 #ifdef COMPAT_FREEBSD10
 4910 int
 4911 freebsd10_freebsd32__umtx_lock(struct thread *td,
 4912     struct freebsd10_freebsd32__umtx_lock_args *uap)
 4913 {
 4914         return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
 4915 }
 4916 
 4917 int
 4918 freebsd10_freebsd32__umtx_unlock(struct thread *td,
 4919     struct freebsd10_freebsd32__umtx_unlock_args *uap)
 4920 {
 4921         return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
 4922 }
 4923 #endif /* COMPAT_FREEBSD10 */
 4924 
 4925 int
 4926 freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap)
 4927 {
 4928 
 4929         return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1,
 4930             uap->uaddr2, &umtx_native_ops32));
 4931 }
 4932 #endif /* COMPAT_FREEBSD32 */
 4933 
 4934 void
 4935 umtx_thread_init(struct thread *td)
 4936 {
 4937 
 4938         td->td_umtxq = umtxq_alloc();
 4939         td->td_umtxq->uq_thread = td;
 4940 }
 4941 
 4942 void
 4943 umtx_thread_fini(struct thread *td)
 4944 {
 4945 
 4946         umtxq_free(td->td_umtxq);
 4947 }
 4948 
 4949 /*
 4950  * It will be called when new thread is created, e.g fork().
 4951  */
 4952 void
 4953 umtx_thread_alloc(struct thread *td)
 4954 {
 4955         struct umtx_q *uq;
 4956 
 4957         uq = td->td_umtxq;
 4958         uq->uq_inherited_pri = PRI_MAX;
 4959 
 4960         KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
 4961         KASSERT(uq->uq_thread == td, ("uq_thread != td"));
 4962         KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
 4963         KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
 4964 }
 4965 
 4966 /*
 4967  * exec() hook.
 4968  *
 4969  * Clear robust lists for all process' threads, not delaying the
 4970  * cleanup to thread exit, since the relevant address space is
 4971  * destroyed right now.
 4972  */
 4973 void
 4974 umtx_exec(struct proc *p)
 4975 {
 4976         struct thread *td;
 4977 
 4978         KASSERT(p == curproc, ("need curproc"));
 4979         KASSERT((p->p_flag & P_HADTHREADS) == 0 ||
 4980             (p->p_flag & P_STOPPED_SINGLE) != 0,
 4981             ("curproc must be single-threaded"));
 4982         /*
 4983          * There is no need to lock the list as only this thread can be
 4984          * running.
 4985          */
 4986         FOREACH_THREAD_IN_PROC(p, td) {
 4987                 KASSERT(td == curthread ||
 4988                     ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)),
 4989                     ("running thread %p %p", p, td));
 4990                 umtx_thread_cleanup(td);
 4991                 td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0;
 4992         }
 4993 }
 4994 
 4995 /*
 4996  * thread exit hook.
 4997  */
 4998 void
 4999 umtx_thread_exit(struct thread *td)
 5000 {
 5001 
 5002         umtx_thread_cleanup(td);
 5003 }
 5004 
 5005 static int
 5006 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32)
 5007 {
 5008         u_long res1;
 5009         uint32_t res32;
 5010         int error;
 5011 
 5012         if (compat32) {
 5013                 error = fueword32((void *)ptr, &res32);
 5014                 if (error == 0)
 5015                         res1 = res32;
 5016         } else {
 5017                 error = fueword((void *)ptr, &res1);
 5018         }
 5019         if (error == 0)
 5020                 *res = res1;
 5021         else
 5022                 error = EFAULT;
 5023         return (error);
 5024 }
 5025 
 5026 static void
 5027 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list,
 5028     bool compat32)
 5029 {
 5030         struct umutex32 m32;
 5031 
 5032         if (compat32) {
 5033                 memcpy(&m32, m, sizeof(m32));
 5034                 *rb_list = m32.m_rb_lnk;
 5035         } else {
 5036                 *rb_list = m->m_rb_lnk;
 5037         }
 5038 }
 5039 
 5040 static int
 5041 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact,
 5042     bool compat32)
 5043 {
 5044         struct umutex m;
 5045         int error;
 5046 
 5047         KASSERT(td->td_proc == curproc, ("need current vmspace"));
 5048         error = copyin((void *)rbp, &m, sizeof(m));
 5049         if (error != 0)
 5050                 return (error);
 5051         if (rb_list != NULL)
 5052                 umtx_read_rb_list(td, &m, rb_list, compat32);
 5053         if ((m.m_flags & UMUTEX_ROBUST) == 0)
 5054                 return (EINVAL);
 5055         if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid)
 5056                 /* inact is cleared after unlock, allow the inconsistency */
 5057                 return (inact ? 0 : EINVAL);
 5058         return (do_unlock_umutex(td, (struct umutex *)rbp, true));
 5059 }
 5060 
 5061 static void
 5062 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact,
 5063     const char *name, bool compat32)
 5064 {
 5065         int error, i;
 5066         uintptr_t rbp;
 5067         bool inact;
 5068 
 5069         if (rb_list == 0)
 5070                 return;
 5071         error = umtx_read_uptr(td, rb_list, &rbp, compat32);
 5072         for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) {
 5073                 if (rbp == *rb_inact) {
 5074                         inact = true;
 5075                         *rb_inact = 0;
 5076                 } else
 5077                         inact = false;
 5078                 error = umtx_handle_rb(td, rbp, &rbp, inact, compat32);
 5079         }
 5080         if (i == umtx_max_rb && umtx_verbose_rb) {
 5081                 uprintf("comm %s pid %d: reached umtx %smax rb %d\n",
 5082                     td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb);
 5083         }
 5084         if (error != 0 && umtx_verbose_rb) {
 5085                 uprintf("comm %s pid %d: handling %srb error %d\n",
 5086                     td->td_proc->p_comm, td->td_proc->p_pid, name, error);
 5087         }
 5088 }
 5089 
 5090 /*
 5091  * Clean up umtx data.
 5092  */
 5093 static void
 5094 umtx_thread_cleanup(struct thread *td)
 5095 {
 5096         struct umtx_q *uq;
 5097         struct umtx_pi *pi;
 5098         uintptr_t rb_inact;
 5099         bool compat32;
 5100 
 5101         /*
 5102          * Disown pi mutexes.
 5103          */
 5104         uq = td->td_umtxq;
 5105         if (uq != NULL) {
 5106                 if (uq->uq_inherited_pri != PRI_MAX ||
 5107                     !TAILQ_EMPTY(&uq->uq_pi_contested)) {
 5108                         mtx_lock(&umtx_lock);
 5109                         uq->uq_inherited_pri = PRI_MAX;
 5110                         while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
 5111                                 pi->pi_owner = NULL;
 5112                                 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
 5113                         }
 5114                         mtx_unlock(&umtx_lock);
 5115                 }
 5116                 sched_lend_user_prio_cond(td, PRI_MAX);
 5117         }
 5118 
 5119         compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0;
 5120         td->td_pflags2 &= ~TDP2_COMPAT32RB;
 5121 
 5122         if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0)
 5123                 return;
 5124 
 5125         /*
 5126          * Handle terminated robust mutexes.  Must be done after
 5127          * robust pi disown, otherwise unlock could see unowned
 5128          * entries.
 5129          */
 5130         rb_inact = td->td_rb_inact;
 5131         if (rb_inact != 0)
 5132                 (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32);
 5133         umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32);
 5134         umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32);
 5135         if (rb_inact != 0)
 5136                 (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32);
 5137 }

Cache object: 0c66613badbac9ab8e73fabd1ce59b04


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.