The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/compat/linux/linux_futex.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2009-2021 Dmitry Chagin <dchagin@FreeBSD.org>
    5  * Copyright (c) 2008 Roman Divacky
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include "opt_compat.h"
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/imgact.h>
   37 #include <sys/imgact_elf.h>
   38 #include <sys/ktr.h>
   39 #include <sys/mutex.h>
   40 #include <sys/priv.h>
   41 #include <sys/proc.h>
   42 #include <sys/sched.h>
   43 #include <sys/umtxvar.h>
   44 
   45 #ifdef COMPAT_LINUX32
   46 #include <machine/../linux32/linux.h>
   47 #include <machine/../linux32/linux32_proto.h>
   48 #else
   49 #include <machine/../linux/linux.h>
   50 #include <machine/../linux/linux_proto.h>
   51 #endif
   52 #include <compat/linux/linux_emul.h>
   53 #include <compat/linux/linux_futex.h>
   54 #include <compat/linux/linux_misc.h>
   55 #include <compat/linux/linux_timer.h>
   56 #include <compat/linux/linux_util.h>
   57 
   58 #define FUTEX_SHARED    0x8     /* shared futex */
   59 #define FUTEX_UNOWNED   0
   60 
   61 #define GET_SHARED(a)   (a->flags & FUTEX_SHARED) ? AUTO_SHARE : THREAD_SHARE
   62 
   63 static int futex_atomic_op(struct thread *, int, uint32_t *, int *);
   64 static int handle_futex_death(struct thread *td, struct linux_emuldata *,
   65     uint32_t *, unsigned int, bool);
   66 static int fetch_robust_entry(struct linux_robust_list **,
   67     struct linux_robust_list **, unsigned int *);
   68 
   69 struct linux_futex_args {
   70         uint32_t        *uaddr;
   71         int32_t         op;
   72         uint32_t        flags;
   73         bool            clockrt;
   74         uint32_t        val;
   75         struct timespec *ts;
   76         uint32_t        *uaddr2;
   77         uint32_t        val3;
   78         bool            val3_compare;
   79         struct timespec kts;
   80 };
   81 
   82 static inline int futex_key_get(const void *, int, int, struct umtx_key *);
   83 static void linux_umtx_abs_timeout_init(struct umtx_abs_timeout *,
   84             struct linux_futex_args *);
   85 static int linux_futex(struct thread *, struct linux_futex_args *);
   86 static int linux_futex_wait(struct thread *, struct linux_futex_args *);
   87 static int linux_futex_wake(struct thread *, struct linux_futex_args *);
   88 static int linux_futex_requeue(struct thread *, struct linux_futex_args *);
   89 static int linux_futex_wakeop(struct thread *, struct linux_futex_args *);
   90 static int linux_futex_lock_pi(struct thread *, bool, struct linux_futex_args *);
   91 static int linux_futex_unlock_pi(struct thread *, bool,
   92             struct linux_futex_args *);
   93 static int futex_wake_pi(struct thread *, uint32_t *, bool);
   94 
   95 static int
   96 futex_key_get(const void *uaddr, int type, int share, struct umtx_key *key)
   97 {
   98 
   99         /* Check that futex address is a 32bit aligned. */
  100         if (!__is_aligned(uaddr, sizeof(uint32_t)))
  101                 return (EINVAL);
  102         return (umtx_key_get(uaddr, type, share, key));
  103 }
  104 
  105 int
  106 futex_wake(struct thread *td, uint32_t *uaddr, int val, bool shared)
  107 {
  108         struct linux_futex_args args;
  109 
  110         bzero(&args, sizeof(args));
  111         args.op = LINUX_FUTEX_WAKE;
  112         args.uaddr = uaddr;
  113         args.flags = shared == true ? FUTEX_SHARED : 0;
  114         args.val = val;
  115         args.val3 = FUTEX_BITSET_MATCH_ANY;
  116 
  117         return (linux_futex_wake(td, &args));
  118 }
  119 
  120 static int
  121 futex_wake_pi(struct thread *td, uint32_t *uaddr, bool shared)
  122 {
  123         struct linux_futex_args args;
  124 
  125         bzero(&args, sizeof(args));
  126         args.op = LINUX_FUTEX_UNLOCK_PI;
  127         args.uaddr = uaddr;
  128         args.flags = shared == true ? FUTEX_SHARED : 0;
  129 
  130         return (linux_futex_unlock_pi(td, true, &args));
  131 }
  132 
  133 static int
  134 futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr,
  135     int *res)
  136 {
  137         int op = (encoded_op >> 28) & 7;
  138         int cmp = (encoded_op >> 24) & 15;
  139         int oparg = (encoded_op << 8) >> 20;
  140         int cmparg = (encoded_op << 20) >> 20;
  141         int oldval = 0, ret;
  142 
  143         if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
  144                 oparg = 1 << oparg;
  145 
  146         switch (op) {
  147         case FUTEX_OP_SET:
  148                 ret = futex_xchgl(oparg, uaddr, &oldval);
  149                 break;
  150         case FUTEX_OP_ADD:
  151                 ret = futex_addl(oparg, uaddr, &oldval);
  152                 break;
  153         case FUTEX_OP_OR:
  154                 ret = futex_orl(oparg, uaddr, &oldval);
  155                 break;
  156         case FUTEX_OP_ANDN:
  157                 ret = futex_andl(~oparg, uaddr, &oldval);
  158                 break;
  159         case FUTEX_OP_XOR:
  160                 ret = futex_xorl(oparg, uaddr, &oldval);
  161                 break;
  162         default:
  163                 ret = ENOSYS;
  164                 break;
  165         }
  166 
  167         if (ret != 0)
  168                 return (ret);
  169 
  170         switch (cmp) {
  171         case FUTEX_OP_CMP_EQ:
  172                 *res = (oldval == cmparg);
  173                 break;
  174         case FUTEX_OP_CMP_NE:
  175                 *res = (oldval != cmparg);
  176                 break;
  177         case FUTEX_OP_CMP_LT:
  178                 *res = (oldval < cmparg);
  179                 break;
  180         case FUTEX_OP_CMP_GE:
  181                 *res = (oldval >= cmparg);
  182                 break;
  183         case FUTEX_OP_CMP_LE:
  184                 *res = (oldval <= cmparg);
  185                 break;
  186         case FUTEX_OP_CMP_GT:
  187                 *res = (oldval > cmparg);
  188                 break;
  189         default:
  190                 ret = ENOSYS;
  191         }
  192 
  193         return (ret);
  194 }
  195 
  196 static int
  197 linux_futex(struct thread *td, struct linux_futex_args *args)
  198 {
  199         struct linux_pemuldata *pem;
  200         struct proc *p;
  201 
  202         if (args->op & LINUX_FUTEX_PRIVATE_FLAG) {
  203                 args->flags = 0;
  204                 args->op &= ~LINUX_FUTEX_PRIVATE_FLAG;
  205         } else
  206                 args->flags = FUTEX_SHARED;
  207 
  208         args->clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME;
  209         args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME;
  210 
  211         if (args->clockrt &&
  212             args->op != LINUX_FUTEX_WAIT_BITSET &&
  213             args->op != LINUX_FUTEX_WAIT_REQUEUE_PI &&
  214             args->op != LINUX_FUTEX_LOCK_PI2)
  215                 return (ENOSYS);
  216 
  217         switch (args->op) {
  218         case LINUX_FUTEX_WAIT:
  219                 args->val3 = FUTEX_BITSET_MATCH_ANY;
  220                 /* FALLTHROUGH */
  221 
  222         case LINUX_FUTEX_WAIT_BITSET:
  223                 LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x",
  224                     args->uaddr, args->val, args->val3);
  225 
  226                 return (linux_futex_wait(td, args));
  227 
  228         case LINUX_FUTEX_WAKE:
  229                 args->val3 = FUTEX_BITSET_MATCH_ANY;
  230                 /* FALLTHROUGH */
  231 
  232         case LINUX_FUTEX_WAKE_BITSET:
  233                 LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x",
  234                     args->uaddr, args->val, args->val3);
  235 
  236                 return (linux_futex_wake(td, args));
  237 
  238         case LINUX_FUTEX_REQUEUE:
  239                 /*
  240                  * Glibc does not use this operation since version 2.3.3,
  241                  * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
  242                  * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
  243                  * FUTEX_REQUEUE returned EINVAL.
  244                  */
  245                 pem = pem_find(td->td_proc);
  246                 if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
  247                         linux_msg(td, "unsupported FUTEX_REQUEUE");
  248                         pem->flags |= LINUX_XDEPR_REQUEUEOP;
  249                 }
  250 
  251                 /*
  252                  * The above is true, however musl libc does make use of the
  253                  * futex requeue operation, allow operation for brands which
  254                  * set LINUX_BI_FUTEX_REQUEUE bit of Brandinfo flags.
  255                  */
  256                 p = td->td_proc;
  257                 Elf_Brandinfo *bi = p->p_elf_brandinfo;
  258                 if (bi == NULL || ((bi->flags & LINUX_BI_FUTEX_REQUEUE)) == 0)
  259                         return (EINVAL);
  260                 args->val3_compare = false;
  261                 /* FALLTHROUGH */
  262 
  263         case LINUX_FUTEX_CMP_REQUEUE:
  264                 LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p "
  265                     "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x",
  266                     args->uaddr, args->val, args->val3, args->uaddr2,
  267                     args->ts);
  268 
  269                 return (linux_futex_requeue(td, args));
  270 
  271         case LINUX_FUTEX_WAKE_OP:
  272                 LINUX_CTR5(sys_futex, "WAKE_OP "
  273                     "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x",
  274                     args->uaddr, args->val, args->uaddr2, args->val3,
  275                     args->ts);
  276 
  277                 return (linux_futex_wakeop(td, args));
  278 
  279         case LINUX_FUTEX_LOCK_PI:
  280                 args->clockrt = true;
  281                 /* FALLTHROUGH */
  282 
  283         case LINUX_FUTEX_LOCK_PI2:
  284                 LINUX_CTR2(sys_futex, "LOCKPI uaddr %p val 0x%x",
  285                     args->uaddr, args->val);
  286 
  287                 return (linux_futex_lock_pi(td, false, args));
  288 
  289         case LINUX_FUTEX_UNLOCK_PI:
  290                 LINUX_CTR1(sys_futex, "UNLOCKPI uaddr %p",
  291                     args->uaddr);
  292 
  293                 return (linux_futex_unlock_pi(td, false, args));
  294 
  295         case LINUX_FUTEX_TRYLOCK_PI:
  296                 LINUX_CTR1(sys_futex, "TRYLOCKPI uaddr %p",
  297                     args->uaddr);
  298 
  299                 return (linux_futex_lock_pi(td, true, args));
  300 
  301         /*
  302          * Current implementation of FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI
  303          * can't be used anymore to implement conditional variables.
  304          * A detailed explanation can be found here:
  305          *
  306          * https://sourceware.org/bugzilla/show_bug.cgi?id=13165
  307          * and here http://austingroupbugs.net/view.php?id=609
  308          *
  309          * And since commit
  310          * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=ed19993b5b0d05d62cc883571519a67dae481a14
  311          * glibc does not use them.
  312          */
  313         case LINUX_FUTEX_WAIT_REQUEUE_PI:
  314                 /* not yet implemented */
  315                 pem = pem_find(td->td_proc);
  316                 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
  317                         linux_msg(td, "unsupported FUTEX_WAIT_REQUEUE_PI");
  318                         pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
  319                 }
  320                 return (ENOSYS);
  321 
  322         case LINUX_FUTEX_CMP_REQUEUE_PI:
  323                 /* not yet implemented */
  324                 pem = pem_find(td->td_proc);
  325                 if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
  326                         linux_msg(td, "unsupported FUTEX_CMP_REQUEUE_PI");
  327                         pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
  328                 }
  329                 return (ENOSYS);
  330 
  331         default:
  332                 linux_msg(td, "unsupported futex op %d", args->op);
  333                 return (ENOSYS);
  334         }
  335 }
  336 
  337 /*
  338  * pi protocol:
  339  * - 0 futex word value means unlocked.
  340  * - TID futex word value means locked.
  341  * Userspace uses atomic ops to lock/unlock these futexes without entering the
  342  * kernel. If the lock-acquire fastpath fails, (transition from 0 to TID fails),
  343  * then FUTEX_LOCK_PI is called.
  344  * The kernel atomically set FUTEX_WAITERS bit in the futex word value, if no
  345  * other waiters exists looks up the thread that owns the futex (it has put its
  346  * own TID into the futex value) and made this thread the owner of the internal
  347  * pi-aware lock object (mutex). Then the kernel tries to lock the internal lock
  348  * object, on which it blocks. Once it returns, it has the mutex acquired, and it
  349  * sets the futex value to its own TID and returns (futex value contains
  350  * FUTEX_WAITERS|TID).
  351  * The unlock fastpath would fail (because the FUTEX_WAITERS bit is set) and
  352  * FUTEX_UNLOCK_PI will be called.
  353  * If a futex is found to be held at exit time, the kernel sets the OWNER_DIED
  354  * bit of the futex word and wakes up the next futex waiter (if any), WAITERS
  355  * bit is preserved (if any).
  356  * If OWNER_DIED bit is set the kernel sanity checks the futex word value against
  357  * the internal futex state and if correct, acquire futex.
  358  */
  359 static int
  360 linux_futex_lock_pi(struct thread *td, bool try, struct linux_futex_args *args)
  361 {
  362         struct umtx_abs_timeout timo;
  363         struct linux_emuldata *em;
  364         struct umtx_pi *pi, *new_pi;
  365         struct thread *td1;
  366         struct umtx_q *uq;
  367         int error, rv;
  368         uint32_t owner, old_owner;
  369 
  370         em = em_find(td);
  371         uq = td->td_umtxq;
  372         error = futex_key_get(args->uaddr, TYPE_PI_FUTEX, GET_SHARED(args),
  373             &uq->uq_key);
  374         if (error != 0)
  375                 return (error);
  376         if (args->ts != NULL)
  377                 linux_umtx_abs_timeout_init(&timo, args);
  378 
  379         umtxq_lock(&uq->uq_key);
  380         pi = umtx_pi_lookup(&uq->uq_key);
  381         if (pi == NULL) {
  382                 new_pi = umtx_pi_alloc(M_NOWAIT);
  383                 if (new_pi == NULL) {
  384                         umtxq_unlock(&uq->uq_key);
  385                         new_pi = umtx_pi_alloc(M_WAITOK);
  386                         umtxq_lock(&uq->uq_key);
  387                         pi = umtx_pi_lookup(&uq->uq_key);
  388                         if (pi != NULL) {
  389                                 umtx_pi_free(new_pi);
  390                                 new_pi = NULL;
  391                         }
  392                 }
  393                 if (new_pi != NULL) {
  394                         new_pi->pi_key = uq->uq_key;
  395                         umtx_pi_insert(new_pi);
  396                         pi = new_pi;
  397                 }
  398         }
  399         umtx_pi_ref(pi);
  400         umtxq_unlock(&uq->uq_key);
  401         for (;;) {
  402                 /* Try uncontested case first. */
  403                 rv = casueword32(args->uaddr, FUTEX_UNOWNED, &owner, em->em_tid);
  404                 /* The acquire succeeded. */
  405                 if (rv == 0) {
  406                         error = 0;
  407                         break;
  408                 }
  409                 if (rv == -1) {
  410                         error = EFAULT;
  411                         break;
  412                 }
  413 
  414                 /*
  415                  * Nobody owns it, but the acquire failed. This can happen
  416                  * with ll/sc atomic.
  417                  */
  418                 if (owner == FUTEX_UNOWNED) {
  419                         error = thread_check_susp(td, true);
  420                         if (error != 0)
  421                                 break;
  422                         continue;
  423                 }
  424 
  425                 /*
  426                  * Avoid overwriting a possible error from sleep due
  427                  * to the pending signal with suspension check result.
  428                  */
  429                 if (error == 0) {
  430                         error = thread_check_susp(td, true);
  431                         if (error != 0)
  432                                 break;
  433                 }
  434 
  435                 /* The futex word at *uaddr is already locked by the caller. */
  436                 if ((owner & FUTEX_TID_MASK) == em->em_tid) {
  437                         error = EDEADLK;
  438                         break;
  439                 }
  440 
  441                 /*
  442                  * Futex owner died, handle_futex_death() set the OWNER_DIED bit
  443                  * and clear tid. Try to acquire it.
  444                  */
  445                 if ((owner & FUTEX_TID_MASK) == FUTEX_UNOWNED) {
  446                         old_owner = owner;
  447                         owner = owner & (FUTEX_WAITERS | FUTEX_OWNER_DIED);
  448                         owner |= em->em_tid;
  449                         rv = casueword32(args->uaddr, old_owner, &owner, owner);
  450                         if (rv == -1) {
  451                                 error = EFAULT;
  452                                 break;
  453                         }
  454                         if (rv == 1) {
  455                                 if (error == 0) {
  456                                         error = thread_check_susp(td, true);
  457                                         if (error != 0)
  458                                                 break;
  459                                 }
  460 
  461                                 /*
  462                                  * If this failed the lock could
  463                                  * changed, restart.
  464                                  */
  465                                 continue;
  466                         }
  467 
  468                         umtxq_lock(&uq->uq_key);
  469                         umtxq_busy(&uq->uq_key);
  470                         error = umtx_pi_claim(pi, td);
  471                         umtxq_unbusy(&uq->uq_key);
  472                         umtxq_unlock(&uq->uq_key);
  473                         if (error != 0) {
  474                                 /*
  475                                  * Since we're going to return an
  476                                  * error, restore the futex to its
  477                                  * previous, unowned state to avoid
  478                                  * compounding the problem.
  479                                  */
  480                                 (void)casuword32(args->uaddr, owner, old_owner);
  481                         }
  482                         break;
  483                 }
  484 
  485                 /*
  486                  * Inconsistent state: OWNER_DIED is set and tid is not 0.
  487                  * Linux does some checks of futex state, we return EINVAL,
  488                  * as the user space can take care of this.
  489                  */
  490                 if ((owner & FUTEX_OWNER_DIED) != FUTEX_UNOWNED) {
  491                         error = EINVAL;
  492                         break;
  493                 }
  494 
  495                 if (try != 0) {
  496                         error = EBUSY;
  497                         break;
  498                 }
  499 
  500                 /*
  501                  * If we caught a signal, we have retried and now
  502                  * exit immediately.
  503                  */
  504                 if (error != 0)
  505                         break;
  506 
  507                 umtxq_lock(&uq->uq_key);
  508                 umtxq_busy(&uq->uq_key);
  509                 umtxq_unlock(&uq->uq_key);
  510 
  511                 /*
  512                  * Set the contested bit so that a release in user space knows
  513                  * to use the system call for unlock. If this fails either some
  514                  * one else has acquired the lock or it has been released.
  515                  */
  516                 rv = casueword32(args->uaddr, owner, &owner,
  517                     owner | FUTEX_WAITERS);
  518                 if (rv == -1) {
  519                         umtxq_unbusy_unlocked(&uq->uq_key);
  520                         error = EFAULT;
  521                         break;
  522                 }
  523                 if (rv == 1) {
  524                         umtxq_unbusy_unlocked(&uq->uq_key);
  525                         error = thread_check_susp(td, true);
  526                         if (error != 0)
  527                                 break;
  528 
  529                         /*
  530                          * The lock changed and we need to retry or we
  531                          * lost a race to the thread unlocking the umtx.
  532                          */
  533                         continue;
  534                 }
  535 
  536                 /*
  537                  * Substitute Linux thread id by native thread id to
  538                  * avoid refactoring code of umtxq_sleep_pi().
  539                  */
  540                 td1 = linux_tdfind(td, owner & FUTEX_TID_MASK, -1);
  541                 if (td1 != NULL) {
  542                         owner = td1->td_tid;
  543                         PROC_UNLOCK(td1->td_proc);
  544                 } else {
  545                         umtxq_unbusy_unlocked(&uq->uq_key);
  546                         error = EINVAL;
  547                         break;
  548                 }
  549 
  550                 umtxq_lock(&uq->uq_key);
  551 
  552                 /* We set the contested bit, sleep. */
  553                 error = umtxq_sleep_pi(uq, pi, owner, "futexp",
  554                     args->ts == NULL ? NULL : &timo,
  555                     (args->flags & FUTEX_SHARED) != 0);
  556                 if (error != 0)
  557                         continue;
  558 
  559                 error = thread_check_susp(td, false);
  560                 if (error != 0)
  561                         break;
  562         }
  563 
  564         umtxq_lock(&uq->uq_key);
  565         umtx_pi_unref(pi);
  566         umtxq_unlock(&uq->uq_key);
  567         umtx_key_release(&uq->uq_key);
  568         return (error);
  569 }
  570 
  571 static int
  572 linux_futex_unlock_pi(struct thread *td, bool rb, struct linux_futex_args *args)
  573 {
  574         struct linux_emuldata *em;
  575         struct umtx_key key;
  576         uint32_t old, owner, new_owner;
  577         int count, error;
  578 
  579         em = em_find(td);
  580 
  581         /*
  582          * Make sure we own this mtx.
  583          */
  584         error = fueword32(args->uaddr, &owner);
  585         if (error == -1)
  586                 return (EFAULT);
  587         if (!rb && (owner & FUTEX_TID_MASK) != em->em_tid)
  588                 return (EPERM);
  589 
  590         error = futex_key_get(args->uaddr, TYPE_PI_FUTEX, GET_SHARED(args), &key);
  591         if (error != 0)
  592                 return (error);
  593         umtxq_lock(&key);
  594         umtxq_busy(&key);
  595         error = umtx_pi_drop(td, &key, rb, &count);
  596         if (error != 0 || rb) {
  597                 umtxq_unbusy(&key);
  598                 umtxq_unlock(&key);
  599                 umtx_key_release(&key);
  600                 return (error);
  601         }
  602         umtxq_unlock(&key);
  603 
  604         /*
  605          * When unlocking the futex, it must be marked as unowned if
  606          * there is zero or one thread only waiting for it.
  607          * Otherwise, it must be marked as contested.
  608          */
  609         if (count > 1)
  610                 new_owner = FUTEX_WAITERS;
  611         else
  612                 new_owner = FUTEX_UNOWNED;
  613 
  614 again:
  615         error = casueword32(args->uaddr, owner, &old, new_owner);
  616         if (error == 1) {
  617                 error = thread_check_susp(td, false);
  618                 if (error == 0)
  619                         goto again;
  620         }
  621         umtxq_unbusy_unlocked(&key);
  622         umtx_key_release(&key);
  623         if (error == -1)
  624                 return (EFAULT);
  625         if (error == 0 && old != owner)
  626                 return (EINVAL);
  627         return (error);
  628 }
  629 
  630 static int
  631 linux_futex_wakeop(struct thread *td, struct linux_futex_args *args)
  632 {
  633         struct umtx_key key, key2;
  634         int nrwake, op_ret, ret;
  635         int error, count;
  636 
  637         if (args->uaddr == args->uaddr2)
  638                 return (EINVAL);
  639 
  640         error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), &key);
  641         if (error != 0)
  642                 return (error);
  643         error = futex_key_get(args->uaddr2, TYPE_FUTEX, GET_SHARED(args), &key2);
  644         if (error != 0) {
  645                 umtx_key_release(&key);
  646                 return (error);
  647         }
  648         umtxq_lock(&key);
  649         umtxq_busy(&key);
  650         umtxq_unlock(&key);
  651         error = futex_atomic_op(td, args->val3, args->uaddr2, &op_ret);
  652         umtxq_lock(&key);
  653         umtxq_unbusy(&key);
  654         if (error != 0)
  655                 goto out;
  656         ret = umtxq_signal_mask(&key, args->val, args->val3);
  657         if (op_ret > 0) {
  658                 nrwake = (int)(unsigned long)args->ts;
  659                 umtxq_lock(&key2);
  660                 count = umtxq_count(&key2);
  661                 if (count > 0)
  662                         ret += umtxq_signal_mask(&key2, nrwake, args->val3);
  663                 else
  664                         ret += umtxq_signal_mask(&key, nrwake, args->val3);
  665                 umtxq_unlock(&key2);
  666         }
  667         td->td_retval[0] = ret;
  668 out:
  669         umtxq_unlock(&key);
  670         umtx_key_release(&key2);
  671         umtx_key_release(&key);
  672         return (error);
  673 }
  674 
  675 static int
  676 linux_futex_requeue(struct thread *td, struct linux_futex_args *args)
  677 {
  678         int nrwake, nrrequeue;
  679         struct umtx_key key, key2;
  680         int error;
  681         uint32_t uval;
  682 
  683         /*
  684          * Linux allows this, we would not, it is an incorrect
  685          * usage of declared ABI, so return EINVAL.
  686          */
  687         if (args->uaddr == args->uaddr2)
  688                 return (EINVAL);
  689 
  690         nrrequeue = (int)(unsigned long)args->ts;
  691         nrwake = args->val;
  692         /*
  693          * Sanity check to prevent signed integer overflow,
  694          * see Linux CVE-2018-6927
  695          */
  696         if (nrwake < 0 || nrrequeue < 0)
  697                 return (EINVAL);
  698 
  699         error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), &key);
  700         if (error != 0)
  701                 return (error);
  702         error = futex_key_get(args->uaddr2, TYPE_FUTEX, GET_SHARED(args), &key2);
  703         if (error != 0) {
  704                 umtx_key_release(&key);
  705                 return (error);
  706         }
  707         umtxq_lock(&key);
  708         umtxq_busy(&key);
  709         umtxq_unlock(&key);
  710         error = fueword32(args->uaddr, &uval);
  711         if (error != 0)
  712                 error = EFAULT;
  713         else if (args->val3_compare == true && uval != args->val3)
  714                 error = EWOULDBLOCK;
  715         umtxq_lock(&key);
  716         umtxq_unbusy(&key);
  717         if (error == 0) {
  718                 umtxq_lock(&key2);
  719                 td->td_retval[0] = umtxq_requeue(&key, nrwake, &key2, nrrequeue);
  720                 umtxq_unlock(&key2);
  721         }
  722         umtxq_unlock(&key);
  723         umtx_key_release(&key2);
  724         umtx_key_release(&key);
  725         return (error);
  726 }
  727 
  728 static int
  729 linux_futex_wake(struct thread *td, struct linux_futex_args *args)
  730 {
  731         struct umtx_key key;
  732         int error;
  733 
  734         if (args->val3 == 0)
  735                 return (EINVAL);
  736 
  737         error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args), &key);
  738         if (error != 0)
  739                 return (error);
  740         umtxq_lock(&key);
  741         td->td_retval[0] = umtxq_signal_mask(&key, args->val, args->val3);
  742         umtxq_unlock(&key);
  743         umtx_key_release(&key);
  744         return (0);
  745 }
  746 
  747 static int
  748 linux_futex_wait(struct thread *td, struct linux_futex_args *args)
  749 {
  750         struct umtx_abs_timeout timo;
  751         struct umtx_q *uq;
  752         uint32_t uval;
  753         int error;
  754 
  755         if (args->val3 == 0)
  756                 error = EINVAL;
  757 
  758         uq = td->td_umtxq;
  759         error = futex_key_get(args->uaddr, TYPE_FUTEX, GET_SHARED(args),
  760             &uq->uq_key);
  761         if (error != 0)
  762                 return (error);
  763         if (args->ts != NULL)
  764                 linux_umtx_abs_timeout_init(&timo, args);
  765         umtxq_lock(&uq->uq_key);
  766         umtxq_busy(&uq->uq_key);
  767         uq->uq_bitset = args->val3;
  768         umtxq_insert(uq);
  769         umtxq_unlock(&uq->uq_key);
  770         error = fueword32(args->uaddr, &uval);
  771         if (error != 0)
  772                 error = EFAULT;
  773         else if (uval != args->val)
  774                 error = EWOULDBLOCK;
  775         umtxq_lock(&uq->uq_key);
  776         umtxq_unbusy(&uq->uq_key);
  777         if (error == 0) {
  778                 error = umtxq_sleep(uq, "futex",
  779                     args->ts == NULL ? NULL : &timo);
  780                 if ((uq->uq_flags & UQF_UMTXQ) == 0)
  781                         error = 0;
  782                 else
  783                         umtxq_remove(uq);
  784         } else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
  785                 umtxq_remove(uq);
  786         }
  787         umtxq_unlock(&uq->uq_key);
  788         umtx_key_release(&uq->uq_key);
  789         return (error);
  790 }
  791 
  792 static void
  793 linux_umtx_abs_timeout_init(struct umtx_abs_timeout *timo,
  794     struct linux_futex_args *args)
  795 {
  796         int clockid, absolute;
  797 
  798         /*
  799          * The FUTEX_CLOCK_REALTIME option bit can be employed only with the
  800          * FUTEX_WAIT_BITSET, FUTEX_WAIT_REQUEUE_PI, FUTEX_LOCK_PI2.
  801          * For FUTEX_WAIT, timeout is interpreted as a relative value, for other
  802          * futex operations timeout is interpreted as an absolute value.
  803          * If FUTEX_CLOCK_REALTIME option bit is set, the Linux kernel measures
  804          * the timeout against the CLOCK_REALTIME clock, otherwise the kernel
  805          * measures the timeout against the CLOCK_MONOTONIC clock.
  806          */
  807         clockid = args->clockrt ? CLOCK_REALTIME : CLOCK_MONOTONIC;
  808         absolute = args->op == LINUX_FUTEX_WAIT ? false : true;
  809         umtx_abs_timeout_init(timo, clockid, absolute, args->ts);
  810 }
  811 
  812 int
  813 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
  814 {
  815         struct linux_futex_args fargs = {
  816                 .uaddr = args->uaddr,
  817                 .op = args->op,
  818                 .val = args->val,
  819                 .ts = NULL,
  820                 .uaddr2 = args->uaddr2,
  821                 .val3 = args->val3,
  822                 .val3_compare = true,
  823         };
  824         int error;
  825 
  826         switch (args->op & LINUX_FUTEX_CMD_MASK) {
  827         case LINUX_FUTEX_WAIT:
  828         case LINUX_FUTEX_WAIT_BITSET:
  829         case LINUX_FUTEX_LOCK_PI:
  830         case LINUX_FUTEX_LOCK_PI2:
  831                 if (args->timeout != NULL) {
  832                         error = linux_get_timespec(&fargs.kts, args->timeout);
  833                         if (error != 0)
  834                                 return (error);
  835                         fargs.ts = &fargs.kts;
  836                 }
  837                 break;
  838         default:
  839                 fargs.ts = PTRIN(args->timeout);
  840         }
  841         return (linux_futex(td, &fargs));
  842 }
  843 
  844 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
  845 int
  846 linux_sys_futex_time64(struct thread *td,
  847     struct linux_sys_futex_time64_args *args)
  848 {
  849         struct linux_futex_args fargs = {
  850                 .uaddr = args->uaddr,
  851                 .op = args->op,
  852                 .val = args->val,
  853                 .ts = NULL,
  854                 .uaddr2 = args->uaddr2,
  855                 .val3 = args->val3,
  856                 .val3_compare = true,
  857         };
  858         int error;
  859 
  860         switch (args->op & LINUX_FUTEX_CMD_MASK) {
  861         case LINUX_FUTEX_WAIT:
  862         case LINUX_FUTEX_WAIT_BITSET:
  863         case LINUX_FUTEX_LOCK_PI:
  864         case LINUX_FUTEX_LOCK_PI2:
  865                 if (args->timeout != NULL) {
  866                         error = linux_get_timespec64(&fargs.kts, args->timeout);
  867                         if (error != 0)
  868                                 return (error);
  869                         fargs.ts = &fargs.kts;
  870                 }
  871                 break;
  872         default:
  873                 fargs.ts = PTRIN(args->timeout);
  874         }
  875         return (linux_futex(td, &fargs));
  876 }
  877 #endif
  878 
  879 int
  880 linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args)
  881 {
  882         struct linux_emuldata *em;
  883 
  884         if (args->len != sizeof(struct linux_robust_list_head))
  885                 return (EINVAL);
  886 
  887         em = em_find(td);
  888         em->robust_futexes = args->head;
  889 
  890         return (0);
  891 }
  892 
  893 int
  894 linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args)
  895 {
  896         struct linux_emuldata *em;
  897         struct linux_robust_list_head *head;
  898         l_size_t len;
  899         struct thread *td2;
  900         int error;
  901 
  902         if (!args->pid) {
  903                 em = em_find(td);
  904                 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
  905                 head = em->robust_futexes;
  906         } else {
  907                 td2 = linux_tdfind(td, args->pid, -1);
  908                 if (td2 == NULL)
  909                         return (ESRCH);
  910                 if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) {
  911                         PROC_UNLOCK(td2->td_proc);
  912                         return (EPERM);
  913                 }
  914 
  915                 em = em_find(td2);
  916                 KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
  917                 /* XXX: ptrace? */
  918                 if (priv_check(td, PRIV_CRED_SETUID) ||
  919                     priv_check(td, PRIV_CRED_SETEUID) ||
  920                     p_candebug(td, td2->td_proc)) {
  921                         PROC_UNLOCK(td2->td_proc);
  922                         return (EPERM);
  923                 }
  924                 head = em->robust_futexes;
  925 
  926                 PROC_UNLOCK(td2->td_proc);
  927         }
  928 
  929         len = sizeof(struct linux_robust_list_head);
  930         error = copyout(&len, args->len, sizeof(l_size_t));
  931         if (error != 0)
  932                 return (EFAULT);
  933 
  934         return (copyout(&head, args->head, sizeof(l_uintptr_t)));
  935 }
  936 
  937 static int
  938 handle_futex_death(struct thread *td, struct linux_emuldata *em, uint32_t *uaddr,
  939     unsigned int pi, bool pending_op)
  940 {
  941         uint32_t uval, nval, mval;
  942         int error;
  943 
  944 retry:
  945         error = fueword32(uaddr, &uval);
  946         if (error != 0)
  947                 return (EFAULT);
  948 
  949         /*
  950          * Special case for regular (non PI) futexes. The unlock path in
  951          * user space has two race scenarios:
  952          *
  953          * 1. The unlock path releases the user space futex value and
  954          *    before it can execute the futex() syscall to wake up
  955          *    waiters it is killed.
  956          *
  957          * 2. A woken up waiter is killed before it can acquire the
  958          *    futex in user space.
  959          *
  960          * In both cases the TID validation below prevents a wakeup of
  961          * potential waiters which can cause these waiters to block
  962          * forever.
  963          *
  964          * In both cases it is safe to attempt waking up a potential
  965          * waiter without touching the user space futex value and trying
  966          * to set the OWNER_DIED bit.
  967          */
  968         if (pending_op && !pi && !uval) {
  969                 (void)futex_wake(td, uaddr, 1, true);
  970                 return (0);
  971         }
  972 
  973         if ((uval & FUTEX_TID_MASK) == em->em_tid) {
  974                 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
  975                 error = casueword32(uaddr, uval, &nval, mval);
  976                 if (error == -1)
  977                         return (EFAULT);
  978                 if (error == 1) {
  979                         error = thread_check_susp(td, false);
  980                         if (error != 0)
  981                                 return (error);
  982                         goto retry;
  983                 }
  984 
  985                 if (!pi && (uval & FUTEX_WAITERS)) {
  986                         error = futex_wake(td, uaddr, 1, true);
  987                         if (error != 0)
  988                                 return (error);
  989                 } else if (pi && (uval & FUTEX_WAITERS)) {
  990                         error = futex_wake_pi(td, uaddr, true);
  991                         if (error != 0)
  992                                 return (error);
  993                 }
  994         }
  995 
  996         return (0);
  997 }
  998 
  999 static int
 1000 fetch_robust_entry(struct linux_robust_list **entry,
 1001     struct linux_robust_list **head, unsigned int *pi)
 1002 {
 1003         l_ulong uentry;
 1004         int error;
 1005 
 1006         error = copyin((const void *)head, &uentry, sizeof(uentry));
 1007         if (error != 0)
 1008                 return (EFAULT);
 1009 
 1010         *entry = (void *)(uentry & ~1UL);
 1011         *pi = uentry & 1;
 1012 
 1013         return (0);
 1014 }
 1015 
 1016 #define LINUX_HANDLE_DEATH_PENDING      true
 1017 #define LINUX_HANDLE_DEATH_LIST         false
 1018 
 1019 /* This walks the list of robust futexes releasing them. */
 1020 void
 1021 release_futexes(struct thread *td, struct linux_emuldata *em)
 1022 {
 1023         struct linux_robust_list_head *head;
 1024         struct linux_robust_list *entry, *next_entry, *pending;
 1025         unsigned int limit = 2048, pi, next_pi, pip;
 1026         uint32_t *uaddr;
 1027         l_long futex_offset;
 1028         int error;
 1029 
 1030         head = em->robust_futexes;
 1031         if (head == NULL)
 1032                 return;
 1033 
 1034         if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi))
 1035                 return;
 1036 
 1037         error = copyin(&head->futex_offset, &futex_offset,
 1038             sizeof(futex_offset));
 1039         if (error != 0)
 1040                 return;
 1041 
 1042         if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip))
 1043                 return;
 1044 
 1045         while (entry != &head->list) {
 1046                 error = fetch_robust_entry(&next_entry, PTRIN(&entry->next),
 1047                     &next_pi);
 1048 
 1049                 /*
 1050                  * A pending lock might already be on the list, so
 1051                  * don't process it twice.
 1052                  */
 1053                 if (entry != pending) {
 1054                         uaddr = (uint32_t *)((caddr_t)entry + futex_offset);
 1055                         if (handle_futex_death(td, em, uaddr, pi,
 1056                             LINUX_HANDLE_DEATH_LIST))
 1057                                 return;
 1058                 }
 1059                 if (error != 0)
 1060                         return;
 1061 
 1062                 entry = next_entry;
 1063                 pi = next_pi;
 1064 
 1065                 if (!--limit)
 1066                         break;
 1067 
 1068                 sched_relinquish(curthread);
 1069         }
 1070 
 1071         if (pending) {
 1072                 uaddr = (uint32_t *)((caddr_t)pending + futex_offset);
 1073                 (void)handle_futex_death(td, em, uaddr, pip,
 1074                     LINUX_HANDLE_DEATH_PENDING);
 1075         }
 1076 }

Cache object: b1410d506195abcaa4370793de0ccb17


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.