The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/emulation/linux/linux_futex.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */
    2 
    3 /*-
    4  * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  * 3. All advertising materials mentioning features or use of this software
   15  *    must display the following acknowledgement:
   16  *      This product includes software developed by Emmanuel Dreyfus
   17  * 4. The name of the author may not be used to endorse or promote
   18  *    products derived from this software without specific prior written
   19  *    permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
   22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
   23  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
   25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   31  * POSSIBILITY OF SUCH DAMAGE.
   32  */
   33 
   34 #include "opt_compat.h"
   35 
   36 #include <sys/param.h>
   37 #include <sys/systm.h>
   38 #include <sys/imgact.h>
   39 #include <sys/imgact_aout.h>
   40 #include <sys/imgact_elf.h>
   41 #include <sys/kern_syscall.h>
   42 #include <sys/lock.h>
   43 #include <sys/malloc.h>
   44 #include <sys/proc.h>
   45 #include <sys/signalvar.h>
   46 #include <sys/sysent.h>
   47 #include <sys/sysproto.h>
   48 #include <sys/priv.h>
   49 #include <sys/lock.h>
   50 #include <sys/spinlock2.h>
   51 
   52 #include <vm/vm.h>
   53 #include <vm/vm_param.h>
   54 #include <vm/vm_page.h>
   55 #include <vm/vm_extern.h>
   56 #include <sys/exec.h>
   57 #include <sys/kernel.h>
   58 #include <sys/module.h>
   59 #include <machine/cpu.h>
   60 #include <machine/limits.h>
   61 
   62 #include "i386/linux.h"
   63 #include "i386/linux_proto.h"
   64 #include "linux_signal.h"
   65 #include "linux_util.h"
   66 #include "linux_emuldata.h"
   67 
   68 MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes");
   69 MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futexes wp");
   70 
   71 struct futex;
   72 
   73 struct waiting_proc {
   74         uint32_t        wp_flags;
   75         struct futex    *wp_futex;
   76         TAILQ_ENTRY(waiting_proc) wp_list;
   77 };
   78 
   79 struct futex {
   80         struct lock     f_lck;
   81         uint32_t        *f_uaddr;
   82         uint32_t        f_refcount;
   83         LIST_ENTRY(futex) f_list;
   84         TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc;
   85 };
   86 
   87 struct futex_list futex_list;
   88 
   89 #if 0
   90 #define FUTEX_LOCK(f)           spin_lock(&(f)->f_lck)
   91 #define FUTEX_UNLOCK(f)         spin_unlock(&(f)->f_lck)
   92 #define FUTEX_INIT(f)           spin_init(&(f)->f_lck)
   93 #define FUTEX_SLEEP(f, id, flag, wm, timo)      ssleep((id), &(f)->f_lck, (flag), (wm), (timo))
   94 #endif
   95 
   96 #define FUTEX_LOCK(f)           lockmgr(&(f)->f_lck, LK_EXCLUSIVE)
   97 #define FUTEX_UNLOCK(f)         lockmgr(&(f)->f_lck, LK_RELEASE)
   98 #define FUTEX_INIT(f)           lockinit(&(f)->f_lck, "ftlk", 0, LK_CANRECURSE)
   99 #define FUTEX_DESTROY(f)        lockuninit(&(f)->f_lck)
  100 #define FUTEX_ASSERT_LOCKED(f)  KKASSERT(lockstatus(&(f)->f_lck, curthread) == LK_EXCLUSIVE)
  101 #define FUTEX_SLEEP(f, id, flag, wm, timo)      lksleep((id), &(f)->f_lck, (flag), (wm), (timo))
  102 
  103 struct lock futex_mtx;                  /* protects the futex list */
  104 #define FUTEXES_LOCK            lockmgr(&futex_mtx, LK_EXCLUSIVE)
  105 #define FUTEXES_UNLOCK          lockmgr(&futex_mtx, LK_RELEASE)
  106 
  107 /* Debug magic to take advantage of freebsd's mess */
  108 #ifdef LINUX_DEBUG
  109 #define LINUX_CTR_PREFIX
  110 #else
  111 #define LINUX_CTR_PREFIX        while (0)
  112 #endif
  113 
  114 #define LINUX_CTR1(a,b,c)       LINUX_CTR_PREFIX kprintf("linux_futex: " b "\n",c)
  115 #define LINUX_CTR2(a,b,c,d)     LINUX_CTR_PREFIX kprintf("linux_futex: " b "\n",c,d)
  116 #define LINUX_CTR3(a,b,c,d,e)   LINUX_CTR_PREFIX kprintf("linux_futex: " b "\n",c,d,e)
  117 #define LINUX_CTR4(a,b,c,d,e,f) LINUX_CTR_PREFIX kprintf("linux_futex: " b "\n",c,d,e,f)
  118 #define LINUX_CTR5(a,b,c,d,e,f,g)       LINUX_CTR_PREFIX kprintf("linux_futex: " b "\n",c,d,e,f,g)
  119 
  120 
  121 
  122 /* flags for futex_get() */
  123 #define FUTEX_CREATE_WP         0x1     /* create waiting_proc */
  124 #define FUTEX_DONTCREATE        0x2     /* don't create futex if not exists */
  125 #define FUTEX_DONTEXISTS        0x4     /* return EINVAL if futex exists */
  126 
  127 /* wp_flags */
  128 #define FUTEX_WP_REQUEUED       0x1     /* wp requeued - wp moved from wp_list
  129                                          * of futex where thread sleep to wp_list
  130                                          * of another futex.
  131                                          */
  132 #define FUTEX_WP_REMOVED        0x2     /* wp is woken up and removed from futex
  133                                          * wp_list to prevent double wakeup.
  134                                          */
  135 
  136 /* support.s */
  137 int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval);
  138 int futex_addl(int oparg, uint32_t *uaddr, int *oldval);
  139 int futex_orl(int oparg, uint32_t *uaddr, int *oldval);
  140 int futex_andl(int oparg, uint32_t *uaddr, int *oldval);
  141 int futex_xorl(int oparg, uint32_t *uaddr, int *oldval);
  142 
  143 static void
  144 futex_put(struct futex *f, struct waiting_proc *wp)
  145 {
  146         FUTEX_ASSERT_LOCKED(f);
  147         if (wp != NULL) {
  148                 if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0)
  149                         TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
  150                 kfree(wp, M_FUTEX_WP);
  151         }
  152 
  153         FUTEXES_LOCK;
  154         if (--f->f_refcount == 0) {
  155                 LIST_REMOVE(f, f_list);
  156                 FUTEXES_UNLOCK;
  157                 FUTEX_UNLOCK(f);
  158 
  159                 LINUX_CTR2(sys_futex, "futex_put destroy uaddr %p ref %d",
  160                     f->f_uaddr, f->f_refcount);
  161                 FUTEX_DESTROY(f);
  162                 kfree(f, M_FUTEX);
  163                 return;
  164         }
  165 
  166         LINUX_CTR2(sys_futex, "futex_put uaddr %p ref %d",
  167             f->f_uaddr, f->f_refcount);
  168         FUTEXES_UNLOCK;
  169         FUTEX_UNLOCK(f);
  170 }
  171 
  172 static int
  173 futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags)
  174 {
  175         struct futex *f, *tmpf;
  176 
  177         *newf = tmpf = NULL;
  178 
  179 retry:
  180         FUTEXES_LOCK;
  181         LIST_FOREACH(f, &futex_list, f_list) {
  182                 if (f->f_uaddr == uaddr) {
  183                         if (tmpf != NULL) {
  184                                 FUTEX_UNLOCK(tmpf);
  185                                 FUTEX_DESTROY(tmpf);
  186                                 kfree(tmpf, M_FUTEX);
  187                         }
  188                         if (flags & FUTEX_DONTEXISTS) {
  189                                 FUTEXES_UNLOCK;
  190                                 return (EINVAL);
  191                         }
  192 
  193                         /*
  194                          * Increment refcount of the found futex to
  195                          * prevent it from deallocation before FUTEX_LOCK()
  196                          */
  197                         ++f->f_refcount;
  198                         FUTEXES_UNLOCK;
  199 
  200                         FUTEX_LOCK(f);
  201                         *newf = f;
  202                         LINUX_CTR2(sys_futex, "futex_get uaddr %p ref %d",
  203                             uaddr, f->f_refcount);
  204                         return (0);
  205                 }
  206         }
  207 
  208         if (flags & FUTEX_DONTCREATE) {
  209                 FUTEXES_UNLOCK;
  210                 LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr);
  211                 return (0);
  212         }
  213 
  214         if (tmpf == NULL) {
  215                 FUTEXES_UNLOCK;
  216                 tmpf = kmalloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO);
  217                 tmpf->f_uaddr = uaddr;
  218                 tmpf->f_refcount = 1;
  219                 FUTEX_INIT(tmpf);
  220                 TAILQ_INIT(&tmpf->f_waiting_proc);
  221 
  222                 /*
  223                  * Lock the new futex before an insert into the futex_list
  224                  * to prevent futex usage by other.
  225                  */
  226                 FUTEX_LOCK(tmpf);
  227                 goto retry;
  228         }
  229 
  230         LIST_INSERT_HEAD(&futex_list, tmpf, f_list);
  231         FUTEXES_UNLOCK;
  232 
  233         LINUX_CTR2(sys_futex, "futex_get uaddr %p ref %d new",
  234             uaddr, tmpf->f_refcount);
  235         *newf = tmpf;
  236         return (0);
  237 }
  238 
  239 static int
  240 futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f,
  241     uint32_t flags)
  242 {
  243         int error;
  244 
  245         if (flags & FUTEX_CREATE_WP) {
  246                 *wp = kmalloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK);
  247                 (*wp)->wp_flags = 0;
  248         }
  249         error = futex_get0(uaddr, f, flags);
  250         if (error) {
  251                 if (flags & FUTEX_CREATE_WP)
  252                         kfree(*wp, M_FUTEX_WP);
  253                 return (error);
  254         }
  255         if (flags & FUTEX_CREATE_WP) {
  256                 TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list);
  257                 (*wp)->wp_futex = *f;
  258         }
  259 
  260         return (error);
  261 }
  262 
  263 static int
  264 futex_sleep(struct futex *f, struct waiting_proc *wp, int timeout)
  265 {
  266         int error;
  267 
  268         FUTEX_ASSERT_LOCKED(f);
  269         LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %d ref %d",
  270             f->f_uaddr, wp, timeout, f->f_refcount);
  271         error = FUTEX_SLEEP(f, wp, PCATCH, "futex", timeout);
  272         if (wp->wp_flags & FUTEX_WP_REQUEUED) {
  273                 KASSERT(f != wp->wp_futex, ("futex != wp_futex"));
  274                 LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p w"
  275                     " %p requeued uaddr %p ref %d",
  276                     error, f->f_uaddr, wp, wp->wp_futex->f_uaddr,
  277                     wp->wp_futex->f_refcount);
  278                 futex_put(f, NULL);
  279                 f = wp->wp_futex;
  280                 FUTEX_LOCK(f);
  281         }
  282 
  283         futex_put(f, wp);
  284         return (error);
  285 }
  286 
  287 static int
  288 futex_wake(struct futex *f, int n)
  289 {
  290         struct waiting_proc *wp, *wpt;
  291         int count = 0;
  292 
  293         FUTEX_ASSERT_LOCKED(f);
  294         TAILQ_FOREACH_MUTABLE(wp, &f->f_waiting_proc, wp_list, wpt) {
  295                 LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d",
  296                     f->f_uaddr, wp, f->f_refcount);
  297                 wp->wp_flags |= FUTEX_WP_REMOVED;
  298                 TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
  299                 wakeup_one(wp);
  300                 if (++count == n)
  301                         break;
  302         }
  303 
  304         return (count);
  305 }
  306 
  307 static int
  308 futex_requeue(struct futex *f, int n, struct futex *f2, int n2)
  309 {
  310         struct waiting_proc *wp, *wpt;
  311         int count = 0;
  312 
  313         FUTEX_ASSERT_LOCKED(f);
  314         FUTEX_ASSERT_LOCKED(f2);
  315 
  316         TAILQ_FOREACH_MUTABLE(wp, &f->f_waiting_proc, wp_list, wpt) {
  317                 if (++count <= n) {
  318                         LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p",
  319                             f->f_uaddr, wp);
  320                         wp->wp_flags |= FUTEX_WP_REMOVED;
  321                         TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
  322                         wakeup_one(wp);
  323                 } else {
  324                         LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p",
  325                             f->f_uaddr, wp, f2->f_uaddr);
  326                         wp->wp_flags |= FUTEX_WP_REQUEUED;
  327                         /* Move wp to wp_list of f2 futex */
  328                         TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
  329                         TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list);
  330 
  331                         /*
  332                          * Thread which sleeps on wp after waking should
  333                          * acquire f2 lock, so increment refcount of f2 to
  334                          * prevent it from premature deallocation.
  335                          */
  336                         wp->wp_futex = f2;
  337                         FUTEXES_LOCK;
  338                         ++f2->f_refcount;
  339                         FUTEXES_UNLOCK;
  340                         if (count - n >= n2)
  341                                 break;
  342                 }
  343         }
  344 
  345         return (count);
  346 }
  347 
  348 static int
  349 futex_wait(struct futex *f, struct waiting_proc *wp, struct l_timespec *ts)
  350 {
  351         struct l_timespec timeout;
  352         struct timeval tv;
  353         int timeout_hz;
  354         int error;
  355 
  356         if (ts != NULL) {
  357                 error = copyin(ts, &timeout, sizeof(timeout));
  358                 if (error)
  359                         return (error);
  360                 TIMESPEC_TO_TIMEVAL(&tv, &timeout);
  361                 error = itimerfix(&tv);
  362                 if (error)
  363                         return (error);
  364                 timeout_hz = tvtohz_high(&tv);
  365         } else {        
  366                 timeout_hz = 0;
  367         }
  368 
  369         error = futex_sleep(f, wp, timeout_hz);
  370         if (error == EWOULDBLOCK)
  371                 error = ETIMEDOUT;
  372 
  373         return (error);
  374 }
  375 
  376 static int
  377 futex_atomic_op(struct proc *p, int encoded_op, uint32_t *uaddr)
  378 {
  379         int op = (encoded_op >> 28) & 7;
  380         int cmp = (encoded_op >> 24) & 15;
  381         int oparg = (encoded_op << 8) >> 20;
  382         int cmparg = (encoded_op << 20) >> 20;
  383         int oldval = 0, ret;
  384 
  385         if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
  386                 oparg = 1 << oparg;
  387 
  388 #ifdef DEBUG
  389         if (ldebug(sys_futex))
  390                 kprintf("futex_atomic_op: op = %d, cmp = %d, oparg = %x, "
  391                        "cmparg = %x, uaddr = %p\n",
  392                        op, cmp, oparg, cmparg, uaddr);
  393 #endif
  394         /* XXX: linux verifies access here and returns EFAULT */
  395 
  396         switch (op) {
  397         case FUTEX_OP_SET:
  398                 ret = futex_xchgl(oparg, uaddr, &oldval);
  399                 break;
  400         case FUTEX_OP_ADD:
  401                 ret = futex_addl(oparg, uaddr, &oldval);
  402                 break;
  403         case FUTEX_OP_OR:
  404                 ret = futex_orl(oparg, uaddr, &oldval);
  405                 break;
  406         case FUTEX_OP_ANDN:
  407                 ret = futex_andl(~oparg, uaddr, &oldval);
  408                 break;
  409         case FUTEX_OP_XOR:
  410                 ret = futex_xorl(oparg, uaddr, &oldval);
  411                 break;
  412         default:
  413                 ret = -ENOSYS;
  414                 break;
  415         }
  416 
  417         if (ret)
  418                 return (ret);
  419 
  420         switch (cmp) {
  421         case FUTEX_OP_CMP_EQ:
  422                 return (oldval == cmparg);
  423         case FUTEX_OP_CMP_NE:
  424                 return (oldval != cmparg);
  425         case FUTEX_OP_CMP_LT:
  426                 return (oldval < cmparg);
  427         case FUTEX_OP_CMP_GE:
  428                 return (oldval >= cmparg);
  429         case FUTEX_OP_CMP_LE:
  430                 return (oldval <= cmparg);
  431         case FUTEX_OP_CMP_GT:
  432                 return (oldval > cmparg);
  433         default:
  434                 return (-ENOSYS);
  435         }
  436 }
  437 
  438 int
  439 sys_linux_sys_futex(struct linux_sys_futex_args *args)
  440 {
  441         int op_ret, val, ret, nrwake;
  442         struct waiting_proc *wp;
  443         struct futex *f, *f2 = NULL;
  444         int error = 0;
  445 
  446         /*
  447          * Our implementation provides only privates futexes. Most of the apps
  448          * should use private futexes but don't claim so. Therefore we treat
  449          * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works
  450          * in most cases (ie. when futexes are not shared on file descriptor
  451          * or between different processes.).
  452          */
  453         args->op = (args->op & ~LINUX_FUTEX_PRIVATE_FLAG);
  454 
  455         switch (args->op) {
  456         case LINUX_FUTEX_WAIT:
  457                 LINUX_CTR2(sys_futex, "WAIT val %d uaddr %p",
  458                     args->val, args->uaddr);
  459 #ifdef DEBUG
  460                 if (ldebug(sys_futex))
  461                         kprintf(ARGS(sys_futex, "futex_wait val %d uaddr %p"),
  462                             args->val, args->uaddr);
  463 #endif
  464                 error = futex_get(args->uaddr, &wp, &f, FUTEX_CREATE_WP);
  465                 if (error)
  466                         return (error);
  467                 error = copyin(args->uaddr, &val, sizeof(val));
  468                 if (error) {
  469                         LINUX_CTR1(sys_futex, "WAIT copyin failed %d",
  470                             error);
  471                         futex_put(f, wp);
  472                         return (error);
  473                 }
  474                 if (val != args->val) {
  475                         LINUX_CTR3(sys_futex, "WAIT uaddr %p val %d != uval %d",
  476                             args->uaddr, args->val, val);
  477                         futex_put(f, wp);
  478                         return (EWOULDBLOCK);
  479                 }
  480 
  481                 error = futex_wait(f, wp, args->timeout);
  482                 break;
  483 
  484         case LINUX_FUTEX_WAKE:
  485 
  486                 LINUX_CTR2(sys_futex, "WAKE val %d uaddr %p",
  487                     args->val, args->uaddr);
  488 
  489                 /*
  490                  * XXX: Linux is able to cope with different addresses
  491                  * corresponding to the same mapped memory in the sleeping
  492                  * and waker process(es).
  493                  */
  494 #ifdef DEBUG
  495                 if (ldebug(sys_futex))
  496                         kprintf(ARGS(sys_futex, "futex_wake val %d uaddr %p"),
  497                             args->val, args->uaddr);
  498 #endif
  499                 error = futex_get(args->uaddr, NULL, &f, FUTEX_DONTCREATE);
  500                 if (error)
  501                         return (error);
  502                 if (f == NULL) {
  503                         args->sysmsg_iresult = 0;
  504                         return (error);
  505                 }
  506                 args->sysmsg_iresult = futex_wake(f, args->val);
  507                 futex_put(f, NULL);
  508                 break;
  509 
  510         case LINUX_FUTEX_CMP_REQUEUE:
  511 
  512                 LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p "
  513                     "val %d val3 %d uaddr2 %p val2 %d",
  514                     args->uaddr, args->val, args->val3, args->uaddr2,
  515                     (int)(unsigned long)args->timeout);
  516 
  517 #ifdef DEBUG
  518                 if (ldebug(sys_futex))
  519                         kprintf(ARGS(sys_futex, "futex_cmp_requeue uaddr %p "
  520                             "val %d val3 %d uaddr2 %p val2 %d"),
  521                             args->uaddr, args->val, args->val3, args->uaddr2,
  522                             (int)(unsigned long)args->timeout);
  523 #endif
  524                 /*
  525                  * Linux allows this, we would not, it is an incorrect
  526                  * usage of declared ABI, so return EINVAL.
  527                  */
  528                 if (args->uaddr == args->uaddr2)
  529                         return (EINVAL);
  530                 error = futex_get0(args->uaddr, &f, 0);
  531                 if (error)
  532                         return (error);
  533 
  534                 /*
  535                  * To avoid deadlocks return EINVAL if second futex
  536                  * exists at this time. Otherwise create the new futex
  537                  * and ignore false positive LOR which thus happens.
  538                  *
  539                  * Glibc fall back to FUTEX_WAKE in case of any error
  540                  * returned by FUTEX_CMP_REQUEUE.
  541                  */
  542                 error = futex_get0(args->uaddr2, &f2, FUTEX_DONTEXISTS);
  543                 if (error) {
  544                         futex_put(f, NULL);
  545                         return (error);
  546                 }
  547                 error = copyin(args->uaddr, &val, sizeof(val));
  548                 if (error) {
  549                         LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d",
  550                             error);
  551                         futex_put(f2, NULL);
  552                         futex_put(f, NULL);
  553                         return (error);
  554                 }
  555                 if (val != args->val3) {
  556                         LINUX_CTR2(sys_futex, "CMP_REQUEUE val %d != uval %d",
  557                             args->val, val);
  558                         futex_put(f2, NULL);
  559                         futex_put(f, NULL);
  560                         return (EAGAIN);
  561                 }
  562 
  563                 nrwake = (int)(unsigned long)args->timeout;
  564                 args->sysmsg_iresult = futex_requeue(f, args->val, f2, nrwake);
  565                 futex_put(f2, NULL);
  566                 futex_put(f, NULL);
  567                 break;
  568 
  569         case LINUX_FUTEX_WAKE_OP:
  570 
  571                 LINUX_CTR5(sys_futex, "WAKE_OP "
  572                     "uaddr %p op %d val %x uaddr2 %p val3 %x",
  573                     args->uaddr, args->op, args->val,
  574                     args->uaddr2, args->val3);
  575 
  576 #ifdef DEBUG
  577                 if (ldebug(sys_futex))
  578                         kprintf(ARGS(sys_futex, "futex_wake_op "
  579                             "uaddr %p op %d val %x uaddr2 %p val3 %x"),
  580                             args->uaddr, args->op, args->val,
  581                             args->uaddr2, args->val3);
  582 #endif
  583                 error = futex_get0(args->uaddr, &f, 0);
  584                 if (error)
  585                         return (error);
  586                 if (args->uaddr != args->uaddr2)
  587                         error = futex_get0(args->uaddr2, &f2, 0);
  588                 if (error) {
  589                         futex_put(f, NULL);
  590                         return (error);
  591                 }
  592 
  593                 /*
  594                  * This function returns positive number as results and
  595                  * negative as errors
  596                  */
  597                 op_ret = futex_atomic_op(curproc, args->val3, args->uaddr2);
  598 
  599                 if (op_ret < 0) {
  600                         /* XXX: We don't handle the EFAULT yet. */
  601                         if (op_ret != -EFAULT) {
  602                                 if (f2 != NULL)
  603                                         futex_put(f2, NULL);
  604                                 futex_put(f, NULL);
  605                                 return (-op_ret);
  606                         }
  607                         if (f2 != NULL)
  608                                 futex_put(f2, NULL);
  609                         futex_put(f, NULL);
  610                         return (EFAULT);
  611                 }
  612 
  613                 ret = futex_wake(f, args->val);
  614 
  615                 if (op_ret > 0) {
  616                         op_ret = 0;
  617                         nrwake = (int)(unsigned long)args->timeout;
  618 
  619                         if (f2 != NULL)
  620                                 op_ret += futex_wake(f2, nrwake);
  621                         else
  622                                 op_ret += futex_wake(f, nrwake);
  623                         ret += op_ret;
  624 
  625                 }
  626                 if (f2 != NULL)
  627                         futex_put(f2, NULL);
  628                 futex_put(f, NULL);
  629                 args->sysmsg_iresult = ret;
  630                 break;
  631 
  632         case LINUX_FUTEX_LOCK_PI:
  633                 /* not yet implemented */
  634                 return (ENOSYS);
  635 
  636         case LINUX_FUTEX_UNLOCK_PI:
  637                 /* not yet implemented */
  638                 return (ENOSYS);
  639 
  640         case LINUX_FUTEX_TRYLOCK_PI:
  641                 /* not yet implemented */
  642                 return (ENOSYS);
  643 
  644         case LINUX_FUTEX_REQUEUE:
  645 
  646                 /*
  647                  * Glibc does not use this operation since version 2.3.3,
  648                  * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
  649                  * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
  650                  * FUTEX_REQUEUE returned EINVAL.
  651                  */
  652                 return (EINVAL);
  653 
  654         default:
  655                 kprintf("linux_sys_futex: unknown op %d\n", args->op);
  656                 return (ENOSYS);
  657         }
  658 
  659         return (error);
  660 }
  661 
  662 int
  663 sys_linux_set_robust_list(struct linux_set_robust_list_args *args)
  664 {
  665 #ifdef DEBUG
  666         if (ldebug(set_robust_list))
  667                 kprintf(ARGS(set_robust_list, "head %p len %d"),
  668                     args->head, args->len);
  669 #endif
  670 
  671         if (args->len != sizeof(struct linux_robust_list_head))
  672                 return (EINVAL);
  673 
  674         emuldata_set_robust(curproc, args->head);
  675 
  676         return (0);
  677 }
  678 
  679 
  680 
  681 int
  682 sys_linux_get_robust_list(struct linux_get_robust_list_args *args)
  683 {
  684         struct linux_emuldata *em;
  685         struct linux_robust_list_head empty_head;
  686         struct linux_robust_list_head *head;
  687         l_size_t len = sizeof(struct linux_robust_list_head);
  688         int error = 0;
  689 
  690 #ifdef  DEBUG
  691         if (ldebug(get_robust_list))
  692                 kprintf(ARGS(get_robust_list, ""));
  693 #endif
  694 
  695         if (args->pid == 0) {
  696                 EMUL_LOCK();
  697                 em = emuldata_get(curproc);
  698                 KKASSERT(em != NULL);
  699                 if (em->robust_futexes == NULL) {
  700                         bzero(&empty_head, sizeof(empty_head));
  701                         head = &empty_head;
  702                 } else {
  703                         head = em->robust_futexes;
  704                 }
  705                 EMUL_UNLOCK();
  706         } else {
  707                 struct proc *p;
  708 
  709                 p = pfind(args->pid);
  710                 if (p == NULL) {
  711                         return (ESRCH);
  712                 }
  713 
  714                 EMUL_LOCK();
  715                 em = emuldata_get(p);
  716                 head = em->robust_futexes;
  717                 EMUL_UNLOCK();
  718                 /* XXX: ptrace? p_candebug?*/
  719                 if (priv_check(curthread, PRIV_CRED_SETUID) ||
  720                     priv_check(curthread, PRIV_CRED_SETEUID)/* ||
  721                     p_candebug(curproc, p) */) {
  722                         PRELE(p);
  723                         return (EPERM);
  724                 }
  725                 PRELE(p);
  726         }
  727 
  728         error = copyout(&len, args->len, sizeof(l_size_t));
  729         if (error)
  730                 return (EFAULT);
  731 
  732         error = copyout(head, args->head, sizeof(struct linux_robust_list_head));
  733 
  734         return (error);
  735 }
  736 
  737 static int
  738 handle_futex_death(struct proc *p, uint32_t *uaddr, int pi)
  739 {
  740         uint32_t uval, nval, mval;
  741         struct futex *f;
  742         int error;
  743 
  744 retry:
  745         if (copyin(uaddr, &uval, 4))
  746                 return (EFAULT);
  747         if ((uval & FUTEX_TID_MASK) == p->p_pid) {
  748                 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
  749                 nval = casuword((ulong *)uaddr, uval, mval);
  750 
  751                 if (nval == -1)
  752                         return (EFAULT);
  753 
  754                 if (nval != uval)
  755                         goto retry;
  756 
  757                 if (!pi && (uval & FUTEX_WAITERS)) {
  758                         error = futex_get(uaddr, NULL, &f,
  759                             FUTEX_DONTCREATE);
  760                         if (error)
  761                                 return (error);
  762                         if (f != NULL) {
  763                                 futex_wake(f, 1);
  764                                 futex_put(f, NULL);
  765                         }
  766                 }
  767         }
  768 
  769         return (0);
  770 }
  771 
  772 static int
  773 fetch_robust_entry(struct linux_robust_list **entry,
  774     struct linux_robust_list **head, int *pi)
  775 {
  776         l_ulong uentry;
  777 
  778         if (copyin((const void *)head, &uentry, sizeof(l_ulong)))
  779                 return (EFAULT);
  780 
  781         *entry = (void *)(uentry & ~1UL);
  782         *pi = uentry & 1;
  783 
  784         return (0);
  785 }
  786 
  787 /* This walks the list of robust futexes releasing them. */
  788 void
  789 release_futexes(struct proc *p)
  790 {
  791         struct linux_robust_list_head *head = NULL;
  792         struct linux_robust_list *entry, *next_entry, *pending;
  793         unsigned int limit = 2048, pi, next_pi, pip;
  794         struct linux_emuldata *em;
  795         l_long futex_offset;
  796         int rc;
  797 
  798         EMUL_LOCK();
  799         KKASSERT(p != NULL);
  800         em = emuldata_get(p);
  801         KKASSERT(em != NULL);
  802         head = em->robust_futexes;
  803         EMUL_UNLOCK();
  804 
  805         if (head == NULL)
  806                 return;
  807 
  808         if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi))
  809                 return;
  810 
  811         if (copyin(&head->futex_offset, &futex_offset, sizeof(futex_offset)))
  812                 return;
  813 
  814         if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip))
  815                 return;
  816 
  817         while (entry != &head->list) {
  818                 rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi);
  819 
  820                 if (entry != pending)
  821                         if (handle_futex_death(p, (uint32_t *)entry + futex_offset, pi))
  822                                 return;
  823                 if (rc)
  824                         return;
  825 
  826                 entry = next_entry;
  827                 pi = next_pi;
  828 
  829                 if (!--limit)
  830                         break;
  831 
  832                 /* XXX: not sure about this yield, was sched_relinquish(curthread); */
  833                 lwkt_yield();
  834         }
  835 
  836         if (pending)
  837                 handle_futex_death(p, (uint32_t *)pending + futex_offset, pip);
  838 }

Cache object: e63c460a66c7c6d0fcc3ac40350b1f6f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.