The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/compat/cloudabi/cloudabi_futex.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2015 Nuxi, https://nuxi.nl/
    3  *
    4  * Redistribution and use in source and binary forms, with or without
    5  * modification, are permitted provided that the following conditions
    6  * are met:
    7  * 1. Redistributions of source code must retain the above copyright
    8  *    notice, this list of conditions and the following disclaimer.
    9  * 2. Redistributions in binary form must reproduce the above copyright
   10  *    notice, this list of conditions and the following disclaimer in the
   11  *    documentation and/or other materials provided with the distribution.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  */
   25 
   26 #include <sys/cdefs.h>
   27 __FBSDID("$FreeBSD$");
   28 
   29 #include <sys/param.h>
   30 #include <sys/kernel.h>
   31 #include <sys/limits.h>
   32 #include <sys/lock.h>
   33 #include <sys/malloc.h>
   34 #include <sys/mutex.h>
   35 #include <sys/proc.h>
   36 #include <sys/sx.h>
   37 #include <sys/systm.h>
   38 #include <sys/umtx.h>
   39 
   40 #include <contrib/cloudabi/cloudabi_types_common.h>
   41 
   42 #include <compat/cloudabi/cloudabi_proto.h>
   43 #include <compat/cloudabi/cloudabi_util.h>
   44 
   45 /*
   46  * Futexes for CloudABI.
   47  *
   48  * On most systems, futexes are implemented as objects of a single type
   49  * on which a set of operations can be performed. CloudABI makes a clear
   50  * distinction between locks and condition variables. A lock may have
   51  * zero or more associated condition variables. A condition variable is
   52  * always associated with exactly one lock. There is a strict topology.
   53  * This approach has two advantages:
   54  *
   55  * - This topology is guaranteed to be acyclic. Requeueing of threads
   56  *   only happens in one direction (from condition variables to locks).
   57  *   This eases locking.
   58  * - It means that a futex object for a lock exists when it is unlocked,
   59  *   but has threads waiting on associated condition variables. Threads
   60  *   can be requeued to a lock even if the thread performing the wakeup
   61  *   does not have the lock mapped in its address space.
   62  *
   63  * This futex implementation only implements a single lock type, namely
   64  * a read-write lock. A regular mutex type would not be necessary, as
   65  * the read-write lock is as efficient as a mutex if used as such.
   66  * Userspace futex locks are 32 bits in size:
   67  *
   68  * - 1 bit: has threads waiting in kernel-space.
   69  * - 1 bit: is write-locked.
   70  * - 30 bits:
   71  *   - if write-locked: thread ID of owner.
   72  *   - if not write-locked: number of read locks held.
   73  *
   74  * Condition variables are also 32 bits in size. Its value is modified
   75  * by kernel-space exclusively. Zero indicates that it has no waiting
   76  * threads. Non-zero indicates the opposite.
   77  *
   78  * This implementation is optimal, in the sense that it only wakes up
   79  * threads if they can actually continue execution. It does not suffer
   80  * from the thundering herd problem. If multiple threads waiting on a
   81  * condition variable need to be woken up, only a single thread is
   82  * scheduled. All other threads are 'donated' to this thread. After the
   83  * thread manages to reacquire the lock, it requeues its donated threads
   84  * to the lock.
   85  *
   86  * TODO(ed): Integrate this functionality into kern_umtx.c instead.
   87  * TODO(ed): Store futex objects in a hash table.
   88  * TODO(ed): Add actual priority inheritance.
   89  * TODO(ed): Let futex_queue also take priorities into account.
   90  * TODO(ed): Make locking fine-grained.
   91  * TODO(ed): Perform sleeps until an actual absolute point in time,
   92  *           instead of converting the timestamp to a relative value.
   93  */
   94 
   95 struct futex_address;
   96 struct futex_condvar;
   97 struct futex_lock;
   98 struct futex_queue;
   99 struct futex_waiter;
  100 
  101 /* Identifier of a location in memory. */
  102 struct futex_address {
  103         struct umtx_key                 fa_key;
  104 };
  105 
  106 /* A set of waiting threads. */
  107 struct futex_queue {
  108         STAILQ_HEAD(, futex_waiter)     fq_list;
  109         unsigned int                    fq_count;
  110 };
  111 
  112 /* Condition variables. */
  113 struct futex_condvar {
  114         /* Address of the condition variable. */
  115         struct futex_address            fc_address;
  116 
  117         /* The lock the waiters should be moved to when signalled. */
  118         struct futex_lock *             fc_lock;
  119 
  120         /* Threads waiting on the condition variable. */
  121         struct futex_queue              fc_waiters;
  122         /*
  123          * Number of threads blocked on this condition variable, or
  124          * being blocked on the lock after being requeued.
  125          */
  126         unsigned int                    fc_waitcount;
  127 
  128         /* Global list pointers. */
  129         LIST_ENTRY(futex_condvar)       fc_next;
  130 };
  131 
  132 /* Read-write locks. */
  133 struct futex_lock {
  134         /* Address of the lock. */
  135         struct futex_address            fl_address;
  136 
  137         /*
  138          * Current owner of the lock. LOCK_UNMANAGED if the lock is
  139          * currently not owned by the kernel. LOCK_OWNER_UNKNOWN in case
  140          * the owner is not known (e.g., when the lock is read-locked).
  141          */
  142         cloudabi_tid_t                  fl_owner;
  143 #define LOCK_UNMANAGED 0x0
  144 #define LOCK_OWNER_UNKNOWN 0x1
  145 
  146         /* Writers blocked on the lock. */
  147         struct futex_queue              fl_writers;
  148         /* Readers blocked on the lock. */
  149         struct futex_queue              fl_readers;
  150         /* Number of threads blocked on this lock + condition variables. */
  151         unsigned int                    fl_waitcount;
  152 
  153         /* Global list pointers. */
  154         LIST_ENTRY(futex_lock)          fl_next;
  155 };
  156 
  157 /* Information associated with a thread blocked on an object. */
  158 struct futex_waiter {
  159         /* Thread ID. */
  160         cloudabi_tid_t                  fw_tid;
  161         /* Condition variable used for waiting. */
  162         struct cv                       fw_wait;
  163 
  164         /* Queue this waiter is currently placed in. */
  165         struct futex_queue *            fw_queue;
  166         /* List pointers of fw_queue. */
  167         STAILQ_ENTRY(futex_waiter)      fw_next;
  168 
  169         /* Lock has been acquired. */
  170         bool                            fw_locked;
  171         /* If not locked, threads that should block after acquiring. */
  172         struct futex_queue              fw_donated;
  173 };
  174 
  175 /* Global data structures. */
  176 static MALLOC_DEFINE(M_FUTEX, "futex", "CloudABI futex");
  177 
  178 static struct sx futex_global_lock;
  179 SX_SYSINIT(futex_global_lock, &futex_global_lock, "CloudABI futex global lock");
  180 
  181 static LIST_HEAD(, futex_lock) futex_lock_list =
  182     LIST_HEAD_INITIALIZER(&futex_lock_list);
  183 static LIST_HEAD(, futex_condvar) futex_condvar_list =
  184     LIST_HEAD_INITIALIZER(&futex_condvar_list);
  185 
  186 /* Utility functions. */
  187 static void futex_lock_assert(const struct futex_lock *);
  188 static struct futex_lock *futex_lock_lookup_locked(struct futex_address *);
  189 static void futex_lock_release(struct futex_lock *);
  190 static int futex_lock_tryrdlock(struct futex_lock *, cloudabi_lock_t *);
  191 static int futex_lock_unmanage(struct futex_lock *, cloudabi_lock_t *);
  192 static int futex_lock_update_owner(struct futex_lock *, cloudabi_lock_t *);
  193 static int futex_lock_wake_up_next(struct futex_lock *, cloudabi_lock_t *);
  194 static unsigned int futex_queue_count(const struct futex_queue *);
  195 static void futex_queue_init(struct futex_queue *);
  196 static void futex_queue_requeue(struct futex_queue *, struct futex_queue *,
  197     unsigned int);
  198 static int futex_queue_sleep(struct futex_queue *, struct futex_lock *,
  199     struct futex_waiter *, struct thread *, cloudabi_clockid_t,
  200     cloudabi_timestamp_t, cloudabi_timestamp_t, bool);
  201 static cloudabi_tid_t futex_queue_tid_best(const struct futex_queue *);
  202 static void futex_queue_wake_up_all(struct futex_queue *);
  203 static void futex_queue_wake_up_best(struct futex_queue *);
  204 static void futex_queue_wake_up_donate(struct futex_queue *, unsigned int);
  205 static int futex_user_load(uint32_t *, uint32_t *);
  206 static int futex_user_store(uint32_t *, uint32_t);
  207 static int futex_user_cmpxchg(uint32_t *, uint32_t, uint32_t *, uint32_t);
  208 
  209 /*
  210  * futex_address operations.
  211  */
  212 
  213 static int
  214 futex_address_create(struct futex_address *fa, struct thread *td,
  215     const void *object, cloudabi_scope_t scope)
  216 {
  217 
  218         KASSERT(td == curthread,
  219             ("Can only create umtx keys for the current thread"));
  220         switch (scope) {
  221         case CLOUDABI_SCOPE_PRIVATE:
  222                 return (umtx_key_get(object, TYPE_FUTEX, THREAD_SHARE,
  223                     &fa->fa_key));
  224         case CLOUDABI_SCOPE_SHARED:
  225                 return (umtx_key_get(object, TYPE_FUTEX, AUTO_SHARE,
  226                     &fa->fa_key));
  227         default:
  228                 return (EINVAL);
  229         }
  230 }
  231 
  232 static void
  233 futex_address_free(struct futex_address *fa)
  234 {
  235 
  236         umtx_key_release(&fa->fa_key);
  237 }
  238 
  239 static bool
  240 futex_address_match(const struct futex_address *fa1,
  241     const struct futex_address *fa2)
  242 {
  243 
  244         return (umtx_key_match(&fa1->fa_key, &fa2->fa_key));
  245 }
  246 
  247 /*
  248  * futex_condvar operations.
  249  */
  250 
  251 static void
  252 futex_condvar_assert(const struct futex_condvar *fc)
  253 {
  254 
  255         KASSERT(fc->fc_waitcount >= futex_queue_count(&fc->fc_waiters),
  256             ("Total number of waiters cannot be smaller than the wait queue"));
  257         futex_lock_assert(fc->fc_lock);
  258 }
  259 
  260 static int
  261 futex_condvar_lookup(struct thread *td, const cloudabi_condvar_t *address,
  262     cloudabi_scope_t scope, struct futex_condvar **fcret)
  263 {
  264         struct futex_address fa_condvar;
  265         struct futex_condvar *fc;
  266         int error;
  267 
  268         error = futex_address_create(&fa_condvar, td, address, scope);
  269         if (error != 0)
  270                 return (error);
  271 
  272         sx_xlock(&futex_global_lock);
  273         LIST_FOREACH(fc, &futex_condvar_list, fc_next) {
  274                 if (futex_address_match(&fc->fc_address, &fa_condvar)) {
  275                         /* Found matching lock object. */
  276                         futex_address_free(&fa_condvar);
  277                         futex_condvar_assert(fc);
  278                         *fcret = fc;
  279                         return (0);
  280                 }
  281         }
  282         sx_xunlock(&futex_global_lock);
  283         futex_address_free(&fa_condvar);
  284         return (ENOENT);
  285 }
  286 
  287 static int
  288 futex_condvar_lookup_or_create(struct thread *td,
  289     const cloudabi_condvar_t *condvar, cloudabi_scope_t condvar_scope,
  290     const cloudabi_lock_t *lock, cloudabi_scope_t lock_scope,
  291     struct futex_condvar **fcret)
  292 {
  293         struct futex_address fa_condvar, fa_lock;
  294         struct futex_condvar *fc;
  295         struct futex_lock *fl;
  296         int error;
  297 
  298         error = futex_address_create(&fa_condvar, td, condvar, condvar_scope);
  299         if (error != 0)
  300                 return (error);
  301         error = futex_address_create(&fa_lock, td, lock, lock_scope);
  302         if (error != 0) {
  303                 futex_address_free(&fa_condvar);
  304                 return (error);
  305         }
  306 
  307         sx_xlock(&futex_global_lock);
  308         LIST_FOREACH(fc, &futex_condvar_list, fc_next) {
  309                 if (!futex_address_match(&fc->fc_address, &fa_condvar))
  310                         continue;
  311                 fl = fc->fc_lock;
  312                 if (!futex_address_match(&fl->fl_address, &fa_lock)) {
  313                         /* Condition variable is owned by a different lock. */
  314                         futex_address_free(&fa_condvar);
  315                         futex_address_free(&fa_lock);
  316                         sx_xunlock(&futex_global_lock);
  317                         return (EINVAL);
  318                 }
  319 
  320                 /* Found fully matching condition variable. */
  321                 futex_address_free(&fa_condvar);
  322                 futex_address_free(&fa_lock);
  323                 futex_condvar_assert(fc);
  324                 *fcret = fc;
  325                 return (0);
  326         }
  327 
  328         /* None found. Create new condition variable object. */
  329         fc = malloc(sizeof(*fc), M_FUTEX, M_WAITOK);
  330         fc->fc_address = fa_condvar;
  331         fc->fc_lock = futex_lock_lookup_locked(&fa_lock);
  332         futex_queue_init(&fc->fc_waiters);
  333         fc->fc_waitcount = 0;
  334         LIST_INSERT_HEAD(&futex_condvar_list, fc, fc_next);
  335         *fcret = fc;
  336         return (0);
  337 }
  338 
  339 static void
  340 futex_condvar_release(struct futex_condvar *fc)
  341 {
  342         struct futex_lock *fl;
  343 
  344         futex_condvar_assert(fc);
  345         fl = fc->fc_lock;
  346         if (fc->fc_waitcount == 0) {
  347                 /* Condition variable has no waiters. Deallocate it. */
  348                 futex_address_free(&fc->fc_address);
  349                 LIST_REMOVE(fc, fc_next);
  350                 free(fc, M_FUTEX);
  351         }
  352         futex_lock_release(fl);
  353 }
  354 
  355 static int
  356 futex_condvar_unmanage(struct futex_condvar *fc,
  357     cloudabi_condvar_t *condvar)
  358 {
  359 
  360         if (futex_queue_count(&fc->fc_waiters) != 0)
  361                 return (0);
  362         return (futex_user_store(condvar, CLOUDABI_CONDVAR_HAS_NO_WAITERS));
  363 }
  364 
  365 /*
  366  * futex_lock operations.
  367  */
  368 
  369 static void
  370 futex_lock_assert(const struct futex_lock *fl)
  371 {
  372 
  373         /*
  374          * A futex lock can only be kernel-managed if it has waiters.
  375          * Vice versa: if a futex lock has waiters, it must be
  376          * kernel-managed.
  377          */
  378         KASSERT((fl->fl_owner == LOCK_UNMANAGED) ==
  379             (futex_queue_count(&fl->fl_readers) == 0 &&
  380             futex_queue_count(&fl->fl_writers) == 0),
  381             ("Managed locks must have waiting threads"));
  382         KASSERT(fl->fl_waitcount != 0 || fl->fl_owner == LOCK_UNMANAGED,
  383             ("Lock with no waiters must be unmanaged"));
  384 }
  385 
  386 static int
  387 futex_lock_lookup(struct thread *td, const cloudabi_lock_t *address,
  388     cloudabi_scope_t scope, struct futex_lock **flret)
  389 {
  390         struct futex_address fa;
  391         int error;
  392 
  393         error = futex_address_create(&fa, td, address, scope);
  394         if (error != 0)
  395                 return (error);
  396 
  397         sx_xlock(&futex_global_lock);
  398         *flret = futex_lock_lookup_locked(&fa);
  399         return (0);
  400 }
  401 
  402 static struct futex_lock *
  403 futex_lock_lookup_locked(struct futex_address *fa)
  404 {
  405         struct futex_lock *fl;
  406 
  407         LIST_FOREACH(fl, &futex_lock_list, fl_next) {
  408                 if (futex_address_match(&fl->fl_address, fa)) {
  409                         /* Found matching lock object. */
  410                         futex_address_free(fa);
  411                         futex_lock_assert(fl);
  412                         return (fl);
  413                 }
  414         }
  415 
  416         /* None found. Create new lock object. */
  417         fl = malloc(sizeof(*fl), M_FUTEX, M_WAITOK);
  418         fl->fl_address = *fa;
  419         fl->fl_owner = LOCK_UNMANAGED;
  420         futex_queue_init(&fl->fl_readers);
  421         futex_queue_init(&fl->fl_writers);
  422         fl->fl_waitcount = 0;
  423         LIST_INSERT_HEAD(&futex_lock_list, fl, fl_next);
  424         return (fl);
  425 }
  426 
  427 static int
  428 futex_lock_rdlock(struct futex_lock *fl, struct thread *td,
  429     cloudabi_lock_t *lock, cloudabi_clockid_t clock_id,
  430     cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
  431 {
  432         struct futex_waiter fw;
  433         int error;
  434 
  435         error = futex_lock_tryrdlock(fl, lock);
  436         if (error == EBUSY) {
  437                 /* Suspend execution. */
  438                 KASSERT(fl->fl_owner != LOCK_UNMANAGED,
  439                     ("Attempted to sleep on an unmanaged lock"));
  440                 error = futex_queue_sleep(&fl->fl_readers, fl, &fw, td,
  441                     clock_id, timeout, precision, abstime);
  442                 KASSERT((error == 0) == fw.fw_locked,
  443                     ("Should have locked write lock on success"));
  444                 KASSERT(futex_queue_count(&fw.fw_donated) == 0,
  445                     ("Lock functions cannot receive threads"));
  446         }
  447         if (error != 0)
  448                 futex_lock_unmanage(fl, lock);
  449         return (error);
  450 }
  451 
  452 static void
  453 futex_lock_release(struct futex_lock *fl)
  454 {
  455 
  456         futex_lock_assert(fl);
  457         if (fl->fl_waitcount == 0) {
  458                 /* Lock object is unreferenced. Deallocate it. */
  459                 KASSERT(fl->fl_owner == LOCK_UNMANAGED,
  460                     ("Attempted to free a managed lock"));
  461                 futex_address_free(&fl->fl_address);
  462                 LIST_REMOVE(fl, fl_next);
  463                 free(fl, M_FUTEX);
  464         }
  465         sx_xunlock(&futex_global_lock);
  466 }
  467 
  468 static int
  469 futex_lock_unmanage(struct futex_lock *fl, cloudabi_lock_t *lock)
  470 {
  471         cloudabi_lock_t cmp, old;
  472         int error;
  473 
  474         if (futex_queue_count(&fl->fl_readers) == 0 &&
  475             futex_queue_count(&fl->fl_writers) == 0) {
  476                 /* Lock should be unmanaged. */
  477                 fl->fl_owner = LOCK_UNMANAGED;
  478 
  479                 /* Clear kernel-managed bit. */
  480                 error = futex_user_load(lock, &old);
  481                 if (error != 0)
  482                         return (error);
  483                 for (;;) {
  484                         cmp = old;
  485                         error = futex_user_cmpxchg(lock, cmp, &old,
  486                             cmp & ~CLOUDABI_LOCK_KERNEL_MANAGED);
  487                         if (error != 0)
  488                                 return (error);
  489                         if (old == cmp)
  490                                 break;
  491                 }
  492         }
  493         return (0);
  494 }
  495 
  496 /* Sets an owner of a lock, based on a userspace lock value. */
  497 static void
  498 futex_lock_set_owner(struct futex_lock *fl, cloudabi_lock_t lock)
  499 {
  500 
  501         /* Lock has no explicit owner. */
  502         if ((lock & ~CLOUDABI_LOCK_WRLOCKED) == 0) {
  503                 fl->fl_owner = LOCK_OWNER_UNKNOWN;
  504                 return;
  505         }
  506         lock &= ~(CLOUDABI_LOCK_WRLOCKED | CLOUDABI_LOCK_KERNEL_MANAGED);
  507 
  508         /* Don't allow userspace to silently unlock. */
  509         if (lock == LOCK_UNMANAGED) {
  510                 fl->fl_owner = LOCK_OWNER_UNKNOWN;
  511                 return;
  512         }
  513         fl->fl_owner = lock;
  514 }
  515 
  516 static int
  517 futex_lock_unlock(struct futex_lock *fl, struct thread *td,
  518     cloudabi_lock_t *lock)
  519 {
  520         int error;
  521 
  522         /* Validate that this thread is allowed to unlock. */
  523         error = futex_lock_update_owner(fl, lock);
  524         if (error != 0)
  525                 return (error);
  526         if (fl->fl_owner != LOCK_UNMANAGED && fl->fl_owner != td->td_tid)
  527                 return (EPERM);
  528         return (futex_lock_wake_up_next(fl, lock));
  529 }
  530 
  531 /* Syncs in the owner of the lock from userspace if needed. */
  532 static int
  533 futex_lock_update_owner(struct futex_lock *fl, cloudabi_lock_t *address)
  534 {
  535         cloudabi_lock_t lock;
  536         int error;
  537 
  538         if (fl->fl_owner == LOCK_OWNER_UNKNOWN) {
  539                 error = futex_user_load(address, &lock);
  540                 if (error != 0)
  541                         return (error);
  542                 futex_lock_set_owner(fl, lock);
  543         }
  544         return (0);
  545 }
  546 
  547 static int
  548 futex_lock_tryrdlock(struct futex_lock *fl, cloudabi_lock_t *address)
  549 {
  550         cloudabi_lock_t old, cmp;
  551         int error;
  552 
  553         if (fl->fl_owner != LOCK_UNMANAGED) {
  554                 /* Lock is already acquired. */
  555                 return (EBUSY);
  556         }
  557 
  558         old = CLOUDABI_LOCK_UNLOCKED;
  559         for (;;) {
  560                 if ((old & CLOUDABI_LOCK_KERNEL_MANAGED) != 0) {
  561                         /*
  562                          * Userspace lock is kernel-managed, even though
  563                          * the kernel disagrees.
  564                          */
  565                         return (EINVAL);
  566                 }
  567 
  568                 if ((old & CLOUDABI_LOCK_WRLOCKED) == 0) {
  569                         /*
  570                          * Lock is not write-locked. Attempt to acquire
  571                          * it by increasing the read count.
  572                          */
  573                         cmp = old;
  574                         error = futex_user_cmpxchg(address, cmp, &old, cmp + 1);
  575                         if (error != 0)
  576                                 return (error);
  577                         if (old == cmp) {
  578                                 /* Success. */
  579                                 return (0);
  580                         }
  581                 } else {
  582                         /* Lock is write-locked. Make it kernel-managed. */
  583                         cmp = old;
  584                         error = futex_user_cmpxchg(address, cmp, &old,
  585                             cmp | CLOUDABI_LOCK_KERNEL_MANAGED);
  586                         if (error != 0)
  587                                 return (error);
  588                         if (old == cmp) {
  589                                 /* Success. */
  590                                 futex_lock_set_owner(fl, cmp);
  591                                 return (EBUSY);
  592                         }
  593                 }
  594         }
  595 }
  596 
  597 static int
  598 futex_lock_trywrlock(struct futex_lock *fl, cloudabi_lock_t *address,
  599     cloudabi_tid_t tid, bool force_kernel_managed)
  600 {
  601         cloudabi_lock_t old, new, cmp;
  602         int error;
  603 
  604         if (fl->fl_owner == tid) {
  605                 /* Attempted to acquire lock recursively. */
  606                 return (EDEADLK);
  607         }
  608         if (fl->fl_owner != LOCK_UNMANAGED) {
  609                 /* Lock is already acquired. */
  610                 return (EBUSY);
  611         }
  612 
  613         old = CLOUDABI_LOCK_UNLOCKED;
  614         for (;;) {
  615                 if ((old & CLOUDABI_LOCK_KERNEL_MANAGED) != 0) {
  616                         /*
  617                          * Userspace lock is kernel-managed, even though
  618                          * the kernel disagrees.
  619                          */
  620                         return (EINVAL);
  621                 }
  622                 if (old == (tid | CLOUDABI_LOCK_WRLOCKED)) {
  623                         /* Attempted to acquire lock recursively. */
  624                         return (EDEADLK);
  625                 }
  626 
  627                 if (old == CLOUDABI_LOCK_UNLOCKED) {
  628                         /* Lock is unlocked. Attempt to acquire it. */
  629                         new = tid | CLOUDABI_LOCK_WRLOCKED;
  630                         if (force_kernel_managed)
  631                                 new |= CLOUDABI_LOCK_KERNEL_MANAGED;
  632                         error = futex_user_cmpxchg(address,
  633                             CLOUDABI_LOCK_UNLOCKED, &old, new);
  634                         if (error != 0)
  635                                 return (error);
  636                         if (old == CLOUDABI_LOCK_UNLOCKED) {
  637                                 /* Success. */
  638                                 if (force_kernel_managed)
  639                                         fl->fl_owner = tid;
  640                                 return (0);
  641                         }
  642                 } else {
  643                         /* Lock is still locked. Make it kernel-managed. */
  644                         cmp = old;
  645                         error = futex_user_cmpxchg(address, cmp, &old,
  646                             cmp | CLOUDABI_LOCK_KERNEL_MANAGED);
  647                         if (error != 0)
  648                                 return (error);
  649                         if (old == cmp) {
  650                                 /* Success. */
  651                                 futex_lock_set_owner(fl, cmp);
  652                                 return (EBUSY);
  653                         }
  654                 }
  655         }
  656 }
  657 
  658 static int
  659 futex_lock_wake_up_next(struct futex_lock *fl, cloudabi_lock_t *lock)
  660 {
  661         cloudabi_tid_t tid;
  662         int error;
  663 
  664         /*
  665          * Determine which thread(s) to wake up. Prefer waking up
  666          * writers over readers to prevent write starvation.
  667          */
  668         if (futex_queue_count(&fl->fl_writers) > 0) {
  669                 /* Transfer ownership to a single write-locker. */
  670                 if (futex_queue_count(&fl->fl_writers) > 1 ||
  671                     futex_queue_count(&fl->fl_readers) > 0) {
  672                         /* Lock should remain managed afterwards. */
  673                         tid = futex_queue_tid_best(&fl->fl_writers);
  674                         error = futex_user_store(lock,
  675                             tid | CLOUDABI_LOCK_WRLOCKED |
  676                             CLOUDABI_LOCK_KERNEL_MANAGED);
  677                         if (error != 0)
  678                                 return (error);
  679 
  680                         futex_queue_wake_up_best(&fl->fl_writers);
  681                         fl->fl_owner = tid;
  682                 } else {
  683                         /* Lock can become unmanaged afterwards. */
  684                         error = futex_user_store(lock,
  685                             futex_queue_tid_best(&fl->fl_writers) |
  686                             CLOUDABI_LOCK_WRLOCKED);
  687                         if (error != 0)
  688                                 return (error);
  689 
  690                         futex_queue_wake_up_best(&fl->fl_writers);
  691                         fl->fl_owner = LOCK_UNMANAGED;
  692                 }
  693         } else {
  694                 /* Transfer ownership to all read-lockers (if any). */
  695                 error = futex_user_store(lock,
  696                     futex_queue_count(&fl->fl_readers));
  697                 if (error != 0)
  698                         return (error);
  699 
  700                 /* Wake up all threads. */
  701                 futex_queue_wake_up_all(&fl->fl_readers);
  702                 fl->fl_owner = LOCK_UNMANAGED;
  703         }
  704         return (0);
  705 }
  706 
  707 static int
  708 futex_lock_wrlock(struct futex_lock *fl, struct thread *td,
  709     cloudabi_lock_t *lock, cloudabi_clockid_t clock_id,
  710     cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime,
  711     struct futex_queue *donated)
  712 {
  713         struct futex_waiter fw;
  714         int error;
  715 
  716         error = futex_lock_trywrlock(fl, lock, td->td_tid,
  717             futex_queue_count(donated) > 0);
  718 
  719         if (error == 0 || error == EBUSY) {
  720                 /* Put donated threads in queue before suspending. */
  721                 KASSERT(futex_queue_count(donated) == 0 ||
  722                     fl->fl_owner != LOCK_UNMANAGED,
  723                     ("Lock should be managed if we are going to donate"));
  724                 futex_queue_requeue(donated, &fl->fl_writers, UINT_MAX);
  725         } else {
  726                 /*
  727                  * This thread cannot deal with the donated threads.
  728                  * Wake up the next thread and let it try it by itself.
  729                  */
  730                 futex_queue_wake_up_donate(donated, UINT_MAX);
  731         }
  732 
  733         if (error == EBUSY) {
  734                 /* Suspend execution if the lock was busy. */
  735                 KASSERT(fl->fl_owner != LOCK_UNMANAGED,
  736                     ("Attempted to sleep on an unmanaged lock"));
  737                 error = futex_queue_sleep(&fl->fl_writers, fl, &fw, td,
  738                     clock_id, timeout, precision, abstime);
  739                 KASSERT((error == 0) == fw.fw_locked,
  740                     ("Should have locked write lock on success"));
  741                 KASSERT(futex_queue_count(&fw.fw_donated) == 0,
  742                     ("Lock functions cannot receive threads"));
  743         }
  744         if (error != 0)
  745                 futex_lock_unmanage(fl, lock);
  746         return (error);
  747 }
  748 
  749 /*
  750  * futex_queue operations.
  751  */
  752 
  753 static cloudabi_tid_t
  754 futex_queue_tid_best(const struct futex_queue *fq)
  755 {
  756 
  757         return (STAILQ_FIRST(&fq->fq_list)->fw_tid);
  758 }
  759 
  760 static unsigned int
  761 futex_queue_count(const struct futex_queue *fq)
  762 {
  763 
  764         return (fq->fq_count);
  765 }
  766 
  767 static void
  768 futex_queue_init(struct futex_queue *fq)
  769 {
  770 
  771         STAILQ_INIT(&fq->fq_list);
  772         fq->fq_count = 0;
  773 }
  774 
  775 /* Converts a relative timestamp to an sbintime. */
  776 static sbintime_t
  777 futex_queue_convert_timestamp_relative(cloudabi_timestamp_t ts)
  778 {
  779         cloudabi_timestamp_t s, ns;
  780 
  781         s = ts / 1000000000;
  782         ns = ts % 1000000000;
  783         if (s > INT32_MAX)
  784                 return (INT64_MAX);
  785         return ((s << 32) + (ns << 32) / 1000000000);
  786 }
  787 
  788 /* Converts an absolute timestamp and precision to a pair of sbintime values. */
  789 static int
  790 futex_queue_convert_timestamp(struct thread *td, cloudabi_clockid_t clock_id,
  791     cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision,
  792     sbintime_t *sbttimeout, sbintime_t *sbtprecision, bool abstime)
  793 {
  794         cloudabi_timestamp_t now;
  795         int error;
  796 
  797         if (abstime) {
  798                 /* Make the time relative. */
  799                 error = cloudabi_clock_time_get(td, clock_id, &now);
  800                 if (error != 0)
  801                         return (error);
  802                 timeout = timeout < now ? 0 : timeout - now;
  803         }
  804 
  805         *sbttimeout = futex_queue_convert_timestamp_relative(timeout);
  806         *sbtprecision = futex_queue_convert_timestamp_relative(precision);
  807         return (0);
  808 }
  809 
  810 static int
  811 futex_queue_sleep(struct futex_queue *fq, struct futex_lock *fl,
  812     struct futex_waiter *fw, struct thread *td, cloudabi_clockid_t clock_id,
  813     cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
  814 {
  815         sbintime_t sbttimeout, sbtprecision;
  816         int error;
  817 
  818         /* Initialize futex_waiter object. */
  819         fw->fw_tid = td->td_tid;
  820         fw->fw_locked = false;
  821         futex_queue_init(&fw->fw_donated);
  822 
  823         if (timeout != UINT64_MAX) {
  824                 /* Convert timeout duration. */
  825                 error = futex_queue_convert_timestamp(td, clock_id, timeout,
  826                     precision, &sbttimeout, &sbtprecision, abstime);
  827                 if (error != 0)
  828                         return (error);
  829         }
  830 
  831         /* Place object in the queue. */
  832         fw->fw_queue = fq;
  833         STAILQ_INSERT_TAIL(&fq->fq_list, fw, fw_next);
  834         ++fq->fq_count;
  835 
  836         cv_init(&fw->fw_wait, "futex");
  837         ++fl->fl_waitcount;
  838 
  839         futex_lock_assert(fl);
  840         if (timeout == UINT64_MAX) {
  841                 /* Wait without a timeout. */
  842                 error = cv_wait_sig(&fw->fw_wait, &futex_global_lock);
  843         } else {
  844                 /* Wait respecting the timeout. */
  845                 error = cv_timedwait_sig_sbt(&fw->fw_wait, &futex_global_lock,
  846                     sbttimeout, sbtprecision, 0);
  847                 futex_lock_assert(fl);
  848                 if (error == EWOULDBLOCK &&
  849                     fw->fw_queue != NULL && fw->fw_queue != fq) {
  850                         /*
  851                          * We got signalled on a condition variable, but
  852                          * observed a timeout while waiting to reacquire
  853                          * the lock. In other words, we didn't actually
  854                          * time out. Go back to sleep and wait for the
  855                          * lock to be reacquired.
  856                          */
  857                         error = cv_wait_sig(&fw->fw_wait, &futex_global_lock);
  858                 }
  859         }
  860         futex_lock_assert(fl);
  861 
  862         --fl->fl_waitcount;
  863         cv_destroy(&fw->fw_wait);
  864 
  865         fq = fw->fw_queue;
  866         if (fq == NULL) {
  867                 /* Thread got dequeued, so we've slept successfully. */
  868                 return (0);
  869         }
  870 
  871         /* Thread is still enqueued. Remove it. */
  872         KASSERT(error != 0, ("Woken up thread is still enqueued"));
  873         STAILQ_REMOVE(&fq->fq_list, fw, futex_waiter, fw_next);
  874         --fq->fq_count;
  875         return (error == EWOULDBLOCK ? ETIMEDOUT : error);
  876 }
  877 
  878 /* Moves up to nwaiters waiters from one queue to another. */
  879 static void
  880 futex_queue_requeue(struct futex_queue *fqfrom, struct futex_queue *fqto,
  881     unsigned int nwaiters)
  882 {
  883         struct futex_waiter *fw;
  884 
  885         /* Move waiters to the target queue. */
  886         while (nwaiters-- > 0 && !STAILQ_EMPTY(&fqfrom->fq_list)) {
  887                 fw = STAILQ_FIRST(&fqfrom->fq_list);
  888                 STAILQ_REMOVE_HEAD(&fqfrom->fq_list, fw_next);
  889                 --fqfrom->fq_count;
  890 
  891                 fw->fw_queue = fqto;
  892                 STAILQ_INSERT_TAIL(&fqto->fq_list, fw, fw_next);
  893                 ++fqto->fq_count;
  894         }
  895 }
  896 
  897 /* Wakes up all waiters in a queue. */
  898 static void
  899 futex_queue_wake_up_all(struct futex_queue *fq)
  900 {
  901         struct futex_waiter *fw;
  902 
  903         STAILQ_FOREACH(fw, &fq->fq_list, fw_next) {
  904                 fw->fw_locked = true;
  905                 fw->fw_queue = NULL;
  906                 cv_signal(&fw->fw_wait);
  907         }
  908 
  909         STAILQ_INIT(&fq->fq_list);
  910         fq->fq_count = 0;
  911 }
  912 
  913 /*
  914  * Wakes up the best waiter (i.e., the waiter having the highest
  915  * priority) in a queue.
  916  */
  917 static void
  918 futex_queue_wake_up_best(struct futex_queue *fq)
  919 {
  920         struct futex_waiter *fw;
  921 
  922         fw = STAILQ_FIRST(&fq->fq_list);
  923         fw->fw_locked = true;
  924         fw->fw_queue = NULL;
  925         cv_signal(&fw->fw_wait);
  926 
  927         STAILQ_REMOVE_HEAD(&fq->fq_list, fw_next);
  928         --fq->fq_count;
  929 }
  930 
  931 static void
  932 futex_queue_wake_up_donate(struct futex_queue *fq, unsigned int nwaiters)
  933 {
  934         struct futex_waiter *fw;
  935 
  936         fw = STAILQ_FIRST(&fq->fq_list);
  937         if (fw == NULL)
  938                 return;
  939         fw->fw_locked = false;
  940         fw->fw_queue = NULL;
  941         cv_signal(&fw->fw_wait);
  942 
  943         STAILQ_REMOVE_HEAD(&fq->fq_list, fw_next);
  944         --fq->fq_count;
  945         futex_queue_requeue(fq, &fw->fw_donated, nwaiters);
  946 }
  947 
  948 /*
  949  * futex_user operations. Used to adjust values in userspace.
  950  */
  951 
  952 static int
  953 futex_user_load(uint32_t *obj, uint32_t *val)
  954 {
  955 
  956         return (fueword32(obj, val) != 0 ? EFAULT : 0);
  957 }
  958 
  959 static int
  960 futex_user_store(uint32_t *obj, uint32_t val)
  961 {
  962 
  963         return (suword32(obj, val) != 0 ? EFAULT : 0);
  964 }
  965 
  966 static int
  967 futex_user_cmpxchg(uint32_t *obj, uint32_t cmp, uint32_t *old, uint32_t new)
  968 {
  969 
  970         return (casueword32(obj, cmp, old, new) != 0 ? EFAULT : 0);
  971 }
  972 
  973 /*
  974  * Blocking calls: acquiring locks, waiting on condition variables.
  975  */
  976 
  977 int
  978 cloudabi_futex_condvar_wait(struct thread *td, cloudabi_condvar_t *condvar,
  979     cloudabi_scope_t condvar_scope, cloudabi_lock_t *lock,
  980     cloudabi_scope_t lock_scope, cloudabi_clockid_t clock_id,
  981     cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
  982 {
  983         struct futex_condvar *fc;
  984         struct futex_lock *fl;
  985         struct futex_waiter fw;
  986         int error, error2;
  987 
  988         /* Lookup condition variable object. */
  989         error = futex_condvar_lookup_or_create(td, condvar, condvar_scope, lock,
  990             lock_scope, &fc);
  991         if (error != 0)
  992                 return (error);
  993         fl = fc->fc_lock;
  994 
  995         /*
  996          * Set the condition variable to something other than
  997          * CLOUDABI_CONDVAR_HAS_NO_WAITERS to make userspace threads
  998          * call into the kernel to perform wakeups.
  999          */
 1000         error = futex_user_store(condvar, ~CLOUDABI_CONDVAR_HAS_NO_WAITERS);
 1001         if (error != 0) {
 1002                 futex_condvar_release(fc);
 1003                 return (error);
 1004         }
 1005 
 1006         /* Drop the lock. */
 1007         error = futex_lock_unlock(fl, td, lock);
 1008         if (error != 0) {
 1009                 futex_condvar_unmanage(fc, condvar);
 1010                 futex_condvar_release(fc);
 1011                 return (error);
 1012         }
 1013 
 1014         /* Go to sleep. */
 1015         ++fc->fc_waitcount;
 1016         error = futex_queue_sleep(&fc->fc_waiters, fc->fc_lock, &fw, td,
 1017             clock_id, timeout, precision, abstime);
 1018         if (fw.fw_locked) {
 1019                 /* Waited and got the lock assigned to us. */
 1020                 KASSERT(futex_queue_count(&fw.fw_donated) == 0,
 1021                     ("Received threads while being locked"));
 1022         } else if (error == 0 || error == ETIMEDOUT) {
 1023                 if (error != 0)
 1024                         futex_condvar_unmanage(fc, condvar);
 1025                 /*
 1026                  * Got woken up without having the lock assigned to us.
 1027                  * This can happen in two cases:
 1028                  *
 1029                  * 1. We observed a timeout on a condition variable.
 1030                  * 2. We got signalled on a condition variable while the
 1031                  *    associated lock is unlocked. We are the first
 1032                  *    thread that gets woken up. This thread is
 1033                  *    responsible for reacquiring the userspace lock.
 1034                  */
 1035                 error2 = futex_lock_wrlock(fl, td, lock,
 1036                     CLOUDABI_CLOCK_MONOTONIC, UINT64_MAX, 0, abstime,
 1037                     &fw.fw_donated);
 1038                 if (error2 != 0)
 1039                         error = error2;
 1040         } else {
 1041                 KASSERT(futex_queue_count(&fw.fw_donated) == 0,
 1042                     ("Received threads on error"));
 1043                 futex_condvar_unmanage(fc, condvar);
 1044                 futex_lock_unmanage(fl, lock);
 1045         }
 1046         --fc->fc_waitcount;
 1047         futex_condvar_release(fc);
 1048         return (error);
 1049 }
 1050 
 1051 int
 1052 cloudabi_futex_lock_rdlock(struct thread *td, cloudabi_lock_t *lock,
 1053     cloudabi_scope_t scope, cloudabi_clockid_t clock_id,
 1054     cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
 1055 {
 1056         struct futex_lock *fl;
 1057         int error;
 1058 
 1059         /* Look up lock object. */
 1060         error = futex_lock_lookup(td, lock, scope, &fl);
 1061         if (error != 0)
 1062                 return (error);
 1063 
 1064         error = futex_lock_rdlock(fl, td, lock, clock_id, timeout,
 1065             precision, abstime);
 1066         futex_lock_release(fl);
 1067         return (error);
 1068 }
 1069 
 1070 int
 1071 cloudabi_futex_lock_wrlock(struct thread *td, cloudabi_lock_t *lock,
 1072     cloudabi_scope_t scope, cloudabi_clockid_t clock_id,
 1073     cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision, bool abstime)
 1074 {
 1075         struct futex_lock *fl;
 1076         struct futex_queue fq;
 1077         int error;
 1078 
 1079         /* Look up lock object. */
 1080         error = futex_lock_lookup(td, lock, scope, &fl);
 1081         if (error != 0)
 1082                 return (error);
 1083 
 1084         futex_queue_init(&fq);
 1085         error = futex_lock_wrlock(fl, td, lock, clock_id, timeout,
 1086             precision, abstime, &fq);
 1087         futex_lock_release(fl);
 1088         return (error);
 1089 }
 1090 
 1091 /*
 1092  * Non-blocking calls: releasing locks, signalling condition variables.
 1093  */
 1094 
 1095 int
 1096 cloudabi_sys_condvar_signal(struct thread *td,
 1097     struct cloudabi_sys_condvar_signal_args *uap)
 1098 {
 1099         struct futex_condvar *fc;
 1100         struct futex_lock *fl;
 1101         cloudabi_nthreads_t nwaiters;
 1102         int error;
 1103 
 1104         nwaiters = uap->nwaiters;
 1105         if (nwaiters == 0) {
 1106                 /* No threads to wake up. */
 1107                 return (0);
 1108         }
 1109 
 1110         /* Look up futex object. */
 1111         error = futex_condvar_lookup(td, uap->condvar, uap->scope, &fc);
 1112         if (error != 0) {
 1113                 /* Race condition: condition variable with no waiters. */
 1114                 return (error == ENOENT ? 0 : error);
 1115         }
 1116         fl = fc->fc_lock;
 1117 
 1118         if (fl->fl_owner == LOCK_UNMANAGED) {
 1119                 /*
 1120                  * The lock is currently not managed by the kernel,
 1121                  * meaning we must attempt to acquire the userspace lock
 1122                  * first. We cannot requeue threads to an unmanaged lock,
 1123                  * as these threads will then never be scheduled.
 1124                  *
 1125                  * Unfortunately, the memory address of the lock is
 1126                  * unknown from this context, meaning that we cannot
 1127                  * acquire the lock on behalf of the first thread to be
 1128                  * scheduled. The lock may even not be mapped within the
 1129                  * address space of the current thread.
 1130                  *
 1131                  * To solve this, wake up a single waiter that will
 1132                  * attempt to acquire the lock. Donate all of the other
 1133                  * waiters that need to be woken up to this waiter, so
 1134                  * it can requeue them after acquiring the lock.
 1135                  */
 1136                 futex_queue_wake_up_donate(&fc->fc_waiters, nwaiters - 1);
 1137         } else {
 1138                 /*
 1139                  * Lock is already managed by the kernel. This makes it
 1140                  * easy, as we can requeue the threads from the
 1141                  * condition variable directly to the associated lock.
 1142                  */
 1143                 futex_queue_requeue(&fc->fc_waiters, &fl->fl_writers, nwaiters);
 1144         }
 1145 
 1146         /* Clear userspace condition variable if all waiters are gone. */
 1147         error = futex_condvar_unmanage(fc, uap->condvar);
 1148         futex_condvar_release(fc);
 1149         return (error);
 1150 }
 1151 
 1152 int
 1153 cloudabi_sys_lock_unlock(struct thread *td,
 1154     struct cloudabi_sys_lock_unlock_args *uap)
 1155 {
 1156         struct futex_lock *fl;
 1157         int error;
 1158 
 1159         error = futex_lock_lookup(td, uap->lock, uap->scope, &fl);
 1160         if (error != 0)
 1161                 return (error);
 1162         error = futex_lock_unlock(fl, td, uap->lock);
 1163         futex_lock_release(fl);
 1164         return (error);
 1165 }

Cache object: 07e9b0606c090817e12ed96ae9649a37


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.