vfs_lockf.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: vfs_lockf.c,v 1.78 2022/11/25 16:15:39 riastradh Exp $ */
    2 
    3 /*
    4  * Copyright (c) 1982, 1986, 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * This code is derived from software contributed to Berkeley by
    8  * Scooter Morris at Genentech Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.78 2022/11/25 16:15:39 riastradh Exp $");
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/kernel.h>
   43 #include <sys/file.h>
   44 #include <sys/proc.h>
   45 #include <sys/vnode.h>
   46 #include <sys/pool.h>
   47 #include <sys/fcntl.h>
   48 #include <sys/lockf.h>
   49 #include <sys/atomic.h>
   50 #include <sys/kauth.h>
   51 #include <sys/uidinfo.h>
   52 
   53 /*
   54  * The lockf structure is a kernel structure which contains the information
   55  * associated with a byte range lock.  The lockf structures are linked into
   56  * the vnode structure.  Locks are sorted by the starting byte of the lock for
   57  * efficiency.
   58  *
   59  * lf_next is used for two purposes, depending on whether the lock is
   60  * being held, or is in conflict with an existing lock.  If this lock
   61  * is held, it indicates the next lock on the same vnode.
   62  * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block
   63  * must be queued on the lf_blkhd TAILQ of lock->lf_next.
   64  */
   65 
   66 TAILQ_HEAD(locklist, lockf);
   67 
   68 struct lockf {
   69         kcondvar_t lf_cv;        /* Signalling */
   70         short   lf_flags;        /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */
   71         short   lf_type;         /* Lock type: F_RDLCK, F_WRLCK */
   72         off_t   lf_start;        /* The byte # of the start of the lock */
   73         off_t   lf_end;          /* The byte # of the end of the lock (-1=EOF)*/
   74         void    *lf_id;          /* process or file description holding lock */
   75         struct  lockf **lf_head; /* Back pointer to the head of lockf list */
   76         struct  lockf *lf_next;  /* Next lock on this vnode, or blocking lock */
   77         struct  locklist lf_blkhd; /* List of requests blocked on this lock */
   78         TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */
   79         uid_t   lf_uid;          /* User ID responsible */
   80 };
   81 
   82 /* Maximum length of sleep chains to traverse to try and detect deadlock. */
   83 #define MAXDEPTH 50
   84 
   85 static pool_cache_t lockf_cache;
   86 static kmutex_t *lockf_lock;
   87 static char lockstr[] = "lockf";
   88 
   89 /*
   90  * This variable controls the maximum number of processes that will
   91  * be checked in doing deadlock detection.
   92  */
   93 int maxlockdepth = MAXDEPTH;
   94 
   95 #ifdef LOCKF_DEBUG
   96 int     lockf_debug = 0;
   97 #endif
   98 
   99 #define SELF    0x1
  100 #define OTHERS  0x2
  101 
  102 /*
  103  * XXX TODO
  104  * Misc cleanups: "void *id" should be visible in the API as a
  105  * "struct proc *".
  106  * (This requires rototilling all VFS's which support advisory locking).
  107  */
  108 
  109 /*
  110  * If there's a lot of lock contention on a single vnode, locking
  111  * schemes which allow for more paralleism would be needed.  Given how
  112  * infrequently byte-range locks are actually used in typical BSD
  113  * code, a more complex approach probably isn't worth it.
  114  */
  115 
  116 /*
  117  * We enforce a limit on locks by uid, so that a single user cannot
  118  * run the kernel out of memory.  For now, the limit is pretty coarse.
  119  * There is no limit on root.
  120  *
  121  * Splitting a lock will always succeed, regardless of current allocations.
  122  * If you're slightly above the limit, we still have to permit an allocation
  123  * so that the unlock can succeed.  If the unlocking causes too many splits,
  124  * however, you're totally cutoff.
  125  */
  126 #define MAXLOCKSPERUID (2 * maxfiles)
  127 
  128 #ifdef LOCKF_DEBUG
  129 /*
  130  * Print out a lock.
  131  */
  132 static void
  133 lf_print(const char *tag, struct lockf *lock)
  134 {
  135 
  136         printf("%s: lock %p for ", tag, lock);
  137         if (lock->lf_flags & F_POSIX)
  138                 printf("proc %d", ((struct proc *)lock->lf_id)->p_pid);
  139         else
  140                 printf("file %p", (struct file *)lock->lf_id);
  141         printf(" %s, start %jd, end %jd",
  142                 lock->lf_type == F_RDLCK ? "shared" :
  143                 lock->lf_type == F_WRLCK ? "exclusive" :
  144                 lock->lf_type == F_UNLCK ? "unlock" :
  145                 "unknown", (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
  146         if (TAILQ_FIRST(&lock->lf_blkhd))
  147                 printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd));
  148         else
  149                 printf("\n");
  150 }
  151 
  152 static void
  153 lf_printlist(const char *tag, struct lockf *lock)
  154 {
  155         struct lockf *lf, *blk;
  156 
  157         printf("%s: Lock list:\n", tag);
  158         for (lf = *lock->lf_head; lf; lf = lf->lf_next) {
  159                 printf("\tlock %p for ", lf);
  160                 if (lf->lf_flags & F_POSIX)
  161                         printf("proc %d", ((struct proc *)lf->lf_id)->p_pid);
  162                 else
  163                         printf("file %p", (struct file *)lf->lf_id);
  164                 printf(", %s, start %jd, end %jd",
  165                         lf->lf_type == F_RDLCK ? "shared" :
  166                         lf->lf_type == F_WRLCK ? "exclusive" :
  167                         lf->lf_type == F_UNLCK ? "unlock" :
  168                         "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
  169                 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
  170                         if (blk->lf_flags & F_POSIX)
  171                                 printf("; proc %d",
  172                                     ((struct proc *)blk->lf_id)->p_pid);
  173                         else
  174                                 printf("; file %p", (struct file *)blk->lf_id);
  175                         printf(", %s, start %jd, end %jd",
  176                                 blk->lf_type == F_RDLCK ? "shared" :
  177                                 blk->lf_type == F_WRLCK ? "exclusive" :
  178                                 blk->lf_type == F_UNLCK ? "unlock" :
  179                                 "unknown", (intmax_t)blk->lf_start, (intmax_t)blk->lf_end);
  180                         if (TAILQ_FIRST(&blk->lf_blkhd))
  181                                  panic("lf_printlist: bad list");
  182                 }
  183                 printf("\n");
  184         }
  185 }
  186 #endif /* LOCKF_DEBUG */
  187 
  188 /*
  189  * 3 options for allowfail.
  190  * 0 - always allocate.  1 - cutoff at limit.  2 - cutoff at double limit.
  191  */
  192 static struct lockf *
  193 lf_alloc(int allowfail)
  194 {
  195         struct uidinfo *uip;
  196         struct lockf *lock;
  197         u_long lcnt;
  198         const uid_t uid = kauth_cred_geteuid(kauth_cred_get());
  199 
  200         uip = uid_find(uid);
  201         lcnt = atomic_inc_ulong_nv(&uip->ui_lockcnt);
  202         if (uid && allowfail && lcnt >
  203             (allowfail == 1 ? MAXLOCKSPERUID : (MAXLOCKSPERUID * 2))) {
  204                 atomic_dec_ulong(&uip->ui_lockcnt);
  205                 return NULL;
  206         }
  207 
  208         lock = pool_cache_get(lockf_cache, PR_WAITOK);
  209         lock->lf_uid = uid;
  210         return lock;
  211 }
  212 
  213 static void
  214 lf_free(struct lockf *lock)
  215 {
  216         struct uidinfo *uip;
  217 
  218         uip = uid_find(lock->lf_uid);
  219         atomic_dec_ulong(&uip->ui_lockcnt);
  220         pool_cache_put(lockf_cache, lock);
  221 }
  222 
  223 static int
  224 lf_ctor(void *arg, void *obj, int flag)
  225 {
  226         struct lockf *lock;
  227 
  228         lock = obj;
  229         cv_init(&lock->lf_cv, lockstr);
  230 
  231         return 0;
  232 }
  233 
  234 static void
  235 lf_dtor(void *arg, void *obj)
  236 {
  237         struct lockf *lock;
  238 
  239         lock = obj;
  240         cv_destroy(&lock->lf_cv);
  241 }
  242 
  243 /*
  244  * Walk the list of locks for an inode to
  245  * find an overlapping lock (if any).
  246  *
  247  * NOTE: this returns only the FIRST overlapping lock.  There
  248  *       may be more than one.
  249  */
  250 static int
  251 lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
  252     struct lockf ***prev, struct lockf **overlap)
  253 {
  254         off_t start, end;
  255 
  256         *overlap = lf;
  257         if (lf == NULL)
  258                 return 0;
  259 #ifdef LOCKF_DEBUG
  260         if (lockf_debug & 2)
  261                 lf_print("lf_findoverlap: looking for overlap in", lock);
  262 #endif /* LOCKF_DEBUG */
  263         start = lock->lf_start;
  264         end = lock->lf_end;
  265         while (lf != NULL) {
  266                 if (((type == SELF) && lf->lf_id != lock->lf_id) ||
  267                     ((type == OTHERS) && lf->lf_id == lock->lf_id)) {
  268                         *prev = &lf->lf_next;
  269                         *overlap = lf = lf->lf_next;
  270                         continue;
  271                 }
  272 #ifdef LOCKF_DEBUG
  273                 if (lockf_debug & 2)
  274                         lf_print("\tchecking", lf);
  275 #endif /* LOCKF_DEBUG */
  276                 /*
  277                  * OK, check for overlap
  278                  *
  279                  * Six cases:
  280                  *      0) no overlap
  281                  *      1) overlap == lock
  282                  *      2) overlap contains lock
  283                  *      3) lock contains overlap
  284                  *      4) overlap starts before lock
  285                  *      5) overlap ends after lock
  286                  */
  287                 if ((lf->lf_end != -1 && start > lf->lf_end) ||
  288                     (end != -1 && lf->lf_start > end)) {
  289                         /* Case 0 */
  290 #ifdef LOCKF_DEBUG
  291                         if (lockf_debug & 2)
  292                                 printf("no overlap\n");
  293 #endif /* LOCKF_DEBUG */
  294                         if ((type & SELF) && end != -1 && lf->lf_start > end)
  295                                 return 0;
  296                         *prev = &lf->lf_next;
  297                         *overlap = lf = lf->lf_next;
  298                         continue;
  299                 }
  300                 if ((lf->lf_start == start) && (lf->lf_end == end)) {
  301                         /* Case 1 */
  302 #ifdef LOCKF_DEBUG
  303                         if (lockf_debug & 2)
  304                                 printf("overlap == lock\n");
  305 #endif /* LOCKF_DEBUG */
  306                         return 1;
  307                 }
  308                 if ((lf->lf_start <= start) &&
  309                     (end != -1) &&
  310                     ((lf->lf_end >= end) || (lf->lf_end == -1))) {
  311                         /* Case 2 */
  312 #ifdef LOCKF_DEBUG
  313                         if (lockf_debug & 2)
  314                                 printf("overlap contains lock\n");
  315 #endif /* LOCKF_DEBUG */
  316                         return 2;
  317                 }
  318                 if (start <= lf->lf_start &&
  319                            (end == -1 ||
  320                            (lf->lf_end != -1 && end >= lf->lf_end))) {
  321                         /* Case 3 */
  322 #ifdef LOCKF_DEBUG
  323                         if (lockf_debug & 2)
  324                                 printf("lock contains overlap\n");
  325 #endif /* LOCKF_DEBUG */
  326                         return 3;
  327                 }
  328                 if ((lf->lf_start < start) &&
  329                         ((lf->lf_end >= start) || (lf->lf_end == -1))) {
  330                         /* Case 4 */
  331 #ifdef LOCKF_DEBUG
  332                         if (lockf_debug & 2)
  333                                 printf("overlap starts before lock\n");
  334 #endif /* LOCKF_DEBUG */
  335                         return 4;
  336                 }
  337                 if ((lf->lf_start > start) &&
  338                         (end != -1) &&
  339                         ((lf->lf_end > end) || (lf->lf_end == -1))) {
  340                         /* Case 5 */
  341 #ifdef LOCKF_DEBUG
  342                         if (lockf_debug & 2)
  343                                 printf("overlap ends after lock\n");
  344 #endif /* LOCKF_DEBUG */
  345                         return 5;
  346                 }
  347                 panic("lf_findoverlap: default");
  348         }
  349         return 0;
  350 }
  351 
  352 /*
  353  * Split a lock and a contained region into
  354  * two or three locks as necessary.
  355  */
  356 static void
  357 lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock)
  358 {
  359         struct lockf *splitlock;
  360 
  361 #ifdef LOCKF_DEBUG
  362         if (lockf_debug & 2) {
  363                 lf_print("lf_split", lock1);
  364                 lf_print("splitting from", lock2);
  365         }
  366 #endif /* LOCKF_DEBUG */
  367         /*
  368          * Check to see if splitting into only two pieces.
  369          */
  370         if (lock1->lf_start == lock2->lf_start) {
  371                 lock1->lf_start = lock2->lf_end + 1;
  372                 lock2->lf_next = lock1;
  373                 return;
  374         }
  375         if (lock1->lf_end == lock2->lf_end) {
  376                 lock1->lf_end = lock2->lf_start - 1;
  377                 lock2->lf_next = lock1->lf_next;
  378                 lock1->lf_next = lock2;
  379                 return;
  380         }
  381         /*
  382          * Make a new lock consisting of the last part of
  383          * the encompassing lock
  384          */
  385         splitlock = *sparelock;
  386         *sparelock = NULL;
  387         cv_destroy(&splitlock->lf_cv);
  388         memcpy(splitlock, lock1, sizeof(*splitlock));
  389         cv_init(&splitlock->lf_cv, lockstr);
  390 
  391         splitlock->lf_start = lock2->lf_end + 1;
  392         TAILQ_INIT(&splitlock->lf_blkhd);
  393         lock1->lf_end = lock2->lf_start - 1;
  394         /*
  395          * OK, now link it in
  396          */
  397         splitlock->lf_next = lock1->lf_next;
  398         lock2->lf_next = splitlock;
  399         lock1->lf_next = lock2;
  400 }
  401 
  402 /*
  403  * Wakeup a blocklist
  404  */
  405 static void
  406 lf_wakelock(struct lockf *listhead)
  407 {
  408         struct lockf *wakelock;
  409 
  410         while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) {
  411                 KASSERT(wakelock->lf_next == listhead);
  412                 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
  413                 wakelock->lf_next = NULL;
  414 #ifdef LOCKF_DEBUG
  415                 if (lockf_debug & 2)
  416                         lf_print("lf_wakelock: awakening", wakelock);
  417 #endif
  418                 cv_broadcast(&wakelock->lf_cv);
  419         }
  420 }
  421 
  422 /*
  423  * Remove a byte-range lock on an inode.
  424  *
  425  * Generally, find the lock (or an overlap to that lock)
  426  * and remove it (or shrink it), then wakeup anyone we can.
  427  */
  428 static int
  429 lf_clearlock(struct lockf *unlock, struct lockf **sparelock)
  430 {
  431         struct lockf **head = unlock->lf_head;
  432         struct lockf *lf = *head;
  433         struct lockf *overlap, **prev;
  434         int ovcase;
  435 
  436         if (lf == NULL)
  437                 return 0;
  438 #ifdef LOCKF_DEBUG
  439         if (unlock->lf_type != F_UNLCK)
  440                 panic("lf_clearlock: bad type");
  441         if (lockf_debug & 1)
  442                 lf_print("lf_clearlock", unlock);
  443 #endif /* LOCKF_DEBUG */
  444         prev = head;
  445         while ((ovcase = lf_findoverlap(lf, unlock, SELF,
  446             &prev, &overlap)) != 0) {
  447                 /*
  448                  * Wakeup the list of locks to be retried.
  449                  */
  450                 lf_wakelock(overlap);
  451 
  452                 switch (ovcase) {
  453 
  454                 case 1: /* overlap == lock */
  455                         *prev = overlap->lf_next;
  456                         lf_free(overlap);
  457                         break;
  458 
  459                 case 2: /* overlap contains lock: split it */
  460                         if (overlap->lf_start == unlock->lf_start) {
  461                                 overlap->lf_start = unlock->lf_end + 1;
  462                                 break;
  463                         }
  464                         lf_split(overlap, unlock, sparelock);
  465                         overlap->lf_next = unlock->lf_next;
  466                         break;
  467 
  468                 case 3: /* lock contains overlap */
  469                         *prev = overlap->lf_next;
  470                         lf = overlap->lf_next;
  471                         lf_free(overlap);
  472                         continue;
  473 
  474                 case 4: /* overlap starts before lock */
  475                         overlap->lf_end = unlock->lf_start - 1;
  476                         prev = &overlap->lf_next;
  477                         lf = overlap->lf_next;
  478                         continue;
  479 
  480                 case 5: /* overlap ends after lock */
  481                         overlap->lf_start = unlock->lf_end + 1;
  482                         break;
  483                 }
  484                 break;
  485         }
  486 #ifdef LOCKF_DEBUG
  487         if (lockf_debug & 1)
  488                 lf_printlist("lf_clearlock", unlock);
  489 #endif /* LOCKF_DEBUG */
  490         return 0;
  491 }
  492 
  493 /*
  494  * Walk the list of locks for an inode and
  495  * return the first blocking lock.
  496  */
  497 static struct lockf *
  498 lf_getblock(struct lockf *lock)
  499 {
  500         struct lockf **prev, *overlap, *lf = *(lock->lf_head);
  501 
  502         prev = lock->lf_head;
  503         while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) {
  504                 /*
  505                  * We've found an overlap, see if it blocks us
  506                  */
  507                 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
  508                         return overlap;
  509                 /*
  510                  * Nope, point to the next one on the list and
  511                  * see if it blocks us
  512                  */
  513                 lf = overlap->lf_next;
  514         }
  515         return NULL;
  516 }
  517 
  518 /*
  519  * Set a byte-range lock.
  520  */
  521 static int
  522 lf_setlock(struct lockf *lock, struct lockf **sparelock,
  523     kmutex_t *interlock)
  524 {
  525         struct lockf *block;
  526         struct lockf **head = lock->lf_head;
  527         struct lockf **prev, *overlap, *ltmp;
  528         int ovcase, needtolink, error;
  529 
  530 #ifdef LOCKF_DEBUG
  531         if (lockf_debug & 1)
  532                 lf_print("lf_setlock", lock);
  533 #endif /* LOCKF_DEBUG */
  534 
  535         /*
  536          * Scan lock list for this file looking for locks that would block us.
  537          */
  538         while ((block = lf_getblock(lock)) != NULL) {
  539                 /*
  540                  * Free the structure and return if nonblocking.
  541                  */
  542                 if ((lock->lf_flags & F_WAIT) == 0) {
  543                         lf_free(lock);
  544                         return EAGAIN;
  545                 }
  546                 /*
  547                  * We are blocked. Since flock style locks cover
  548                  * the whole file, there is no chance for deadlock.
  549                  * For byte-range locks we must check for deadlock.
  550                  *
  551                  * Deadlock detection is done by looking through the
  552                  * wait channels to see if there are any cycles that
  553                  * involve us. MAXDEPTH is set just to make sure we
  554                  * do not go off into neverneverland.
  555                  */
  556                 if ((lock->lf_flags & F_POSIX) &&
  557                     (block->lf_flags & F_POSIX)) {
  558                         struct lwp *wlwp;
  559                         volatile const struct lockf *waitblock;
  560                         int i = 0;
  561                         struct proc *p;
  562 
  563                         p = (struct proc *)block->lf_id;
  564                         KASSERT(p != NULL);
  565                         while (i++ < maxlockdepth) {
  566                                 mutex_enter(p->p_lock);
  567                                 if (p->p_nlwps > 1) {
  568                                         mutex_exit(p->p_lock);
  569                                         break;
  570                                 }
  571                                 wlwp = LIST_FIRST(&p->p_lwps);
  572                                 lwp_lock(wlwp);
  573                                 if (wlwp->l_wchan == NULL ||
  574                                     wlwp->l_wmesg != lockstr) {
  575                                         lwp_unlock(wlwp);
  576                                         mutex_exit(p->p_lock);
  577                                         break;
  578                                 }
  579                                 waitblock = wlwp->l_wchan;
  580                                 lwp_unlock(wlwp);
  581                                 mutex_exit(p->p_lock);
  582                                 /* Get the owner of the blocking lock */
  583                                 waitblock = waitblock->lf_next;
  584                                 if ((waitblock->lf_flags & F_POSIX) == 0)
  585                                         break;
  586                                 p = (struct proc *)waitblock->lf_id;
  587                                 if (p == curproc) {
  588                                         lf_free(lock);
  589                                         return EDEADLK;
  590                                 }
  591                         }
  592                         /*
  593                          * If we're still following a dependency chain
  594                          * after maxlockdepth iterations, assume we're in
  595                          * a cycle to be safe.
  596                          */
  597                         if (i >= maxlockdepth) {
  598                                 lf_free(lock);
  599                                 return EDEADLK;
  600                         }
  601                 }
  602                 /*
  603                  * For flock type locks, we must first remove
  604                  * any shared locks that we hold before we sleep
  605                  * waiting for an exclusive lock.
  606                  */
  607                 if ((lock->lf_flags & F_FLOCK) &&
  608                     lock->lf_type == F_WRLCK) {
  609                         lock->lf_type = F_UNLCK;
  610                         (void) lf_clearlock(lock, NULL);
  611                         lock->lf_type = F_WRLCK;
  612                 }
  613                 /*
  614                  * Add our lock to the blocked list and sleep until we're free.
  615                  * Remember who blocked us (for deadlock detection).
  616                  */
  617                 lock->lf_next = block;
  618                 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
  619 #ifdef LOCKF_DEBUG
  620                 if (lockf_debug & 1) {
  621                         lf_print("lf_setlock: blocking on", block);
  622                         lf_printlist("lf_setlock", block);
  623                 }
  624 #endif /* LOCKF_DEBUG */
  625                 error = cv_wait_sig(&lock->lf_cv, interlock);
  626 
  627                 /*
  628                  * We may have been awoken by a signal (in
  629                  * which case we must remove ourselves from the
  630                  * blocked list) and/or by another process
  631                  * releasing a lock (in which case we have already
  632                  * been removed from the blocked list and our
  633                  * lf_next field set to NULL).
  634                  */
  635                 if (lock->lf_next != NULL) {
  636                         TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
  637                         lock->lf_next = NULL;
  638                 }
  639                 if (error) {
  640                         lf_free(lock);
  641                         return error;
  642                 }
  643         }
  644         /*
  645          * No blocks!!  Add the lock.  Note that we will
  646          * downgrade or upgrade any overlapping locks this
  647          * process already owns.
  648          *
  649          * Skip over locks owned by other processes.
  650          * Handle any locks that overlap and are owned by ourselves.
  651          */
  652         prev = head;
  653         block = *head;
  654         needtolink = 1;
  655         for (;;) {
  656                 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
  657                 if (ovcase)
  658                         block = overlap->lf_next;
  659                 /*
  660                  * Six cases:
  661                  *      0) no overlap
  662                  *      1) overlap == lock
  663                  *      2) overlap contains lock
  664                  *      3) lock contains overlap
  665                  *      4) overlap starts before lock
  666                  *      5) overlap ends after lock
  667                  */
  668                 switch (ovcase) {
  669                 case 0: /* no overlap */
  670                         if (needtolink) {
  671                                 *prev = lock;
  672                                 lock->lf_next = overlap;
  673                         }
  674                         break;
  675 
  676                 case 1: /* overlap == lock */
  677                         /*
  678                          * If downgrading lock, others may be
  679                          * able to acquire it.
  680                          */
  681                         if (lock->lf_type == F_RDLCK &&
  682                             overlap->lf_type == F_WRLCK)
  683                                 lf_wakelock(overlap);
  684                         overlap->lf_type = lock->lf_type;
  685                         lf_free(lock);
  686                         lock = overlap; /* for debug output below */
  687                         break;
  688 
  689                 case 2: /* overlap contains lock */
  690                         /*
  691                          * Check for common starting point and different types.
  692                          */
  693                         if (overlap->lf_type == lock->lf_type) {
  694                                 lf_free(lock);
  695                                 lock = overlap; /* for debug output below */
  696                                 break;
  697                         }
  698                         if (overlap->lf_start == lock->lf_start) {
  699                                 *prev = lock;
  700                                 lock->lf_next = overlap;
  701                                 overlap->lf_start = lock->lf_end + 1;
  702                         } else
  703                                 lf_split(overlap, lock, sparelock);
  704                         lf_wakelock(overlap);
  705                         break;
  706 
  707                 case 3: /* lock contains overlap */
  708                         /*
  709                          * If downgrading lock, others may be able to
  710                          * acquire it, otherwise take the list.
  711                          */
  712                         if (lock->lf_type == F_RDLCK &&
  713                             overlap->lf_type == F_WRLCK) {
  714                                 lf_wakelock(overlap);
  715                         } else {
  716                                 while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) {
  717                                         KASSERT(ltmp->lf_next == overlap);
  718                                         TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
  719                                             lf_block);
  720                                         ltmp->lf_next = lock;
  721                                         TAILQ_INSERT_TAIL(&lock->lf_blkhd,
  722                                             ltmp, lf_block);
  723                                 }
  724                         }
  725                         /*
  726                          * Add the new lock if necessary and delete the overlap.
  727                          */
  728                         if (needtolink) {
  729                                 *prev = lock;
  730                                 lock->lf_next = overlap->lf_next;
  731                                 prev = &lock->lf_next;
  732                                 needtolink = 0;
  733                         } else
  734                                 *prev = overlap->lf_next;
  735                         lf_free(overlap);
  736                         continue;
  737 
  738                 case 4: /* overlap starts before lock */
  739                         /*
  740                          * Add lock after overlap on the list.
  741                          */
  742                         lock->lf_next = overlap->lf_next;
  743                         overlap->lf_next = lock;
  744                         overlap->lf_end = lock->lf_start - 1;
  745                         prev = &lock->lf_next;
  746                         lf_wakelock(overlap);
  747                         needtolink = 0;
  748                         continue;
  749 
  750                 case 5: /* overlap ends after lock */
  751                         /*
  752                          * Add the new lock before overlap.
  753                          */
  754                         if (needtolink) {
  755                                 *prev = lock;
  756                                 lock->lf_next = overlap;
  757                         }
  758                         overlap->lf_start = lock->lf_end + 1;
  759                         lf_wakelock(overlap);
  760                         break;
  761                 }
  762                 break;
  763         }
  764 #ifdef LOCKF_DEBUG
  765         if (lockf_debug & 1) {
  766                 lf_print("lf_setlock: got the lock", lock);
  767                 lf_printlist("lf_setlock", lock);
  768         }
  769 #endif /* LOCKF_DEBUG */
  770         return 0;
  771 }
  772 
  773 /*
  774  * Check whether there is a blocking lock,
  775  * and if so return its process identifier.
  776  */
  777 static int
  778 lf_getlock(struct lockf *lock, struct flock *fl)
  779 {
  780         struct lockf *block;
  781 
  782 #ifdef LOCKF_DEBUG
  783         if (lockf_debug & 1)
  784                 lf_print("lf_getlock", lock);
  785 #endif /* LOCKF_DEBUG */
  786 
  787         if ((block = lf_getblock(lock)) != NULL) {
  788                 fl->l_type = block->lf_type;
  789                 fl->l_whence = SEEK_SET;
  790                 fl->l_start = block->lf_start;
  791                 if (block->lf_end == -1)
  792                         fl->l_len = 0;
  793                 else
  794                         fl->l_len = block->lf_end - block->lf_start + 1;
  795                 if (block->lf_flags & F_POSIX)
  796                         fl->l_pid = ((struct proc *)block->lf_id)->p_pid;
  797                 else
  798                         fl->l_pid = -1;
  799         } else {
  800                 fl->l_type = F_UNLCK;
  801         }
  802         return 0;
  803 }
  804 
  805 /*
  806  * Do an advisory lock operation.
  807  */
  808 int
  809 lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size)
  810 {
  811         struct flock *fl = ap->a_fl;
  812         struct lockf *lock = NULL;
  813         struct lockf *sparelock;
  814         kmutex_t *interlock = lockf_lock;
  815         off_t start, end;
  816         int error = 0;
  817 
  818         KASSERTMSG(size >= 0, "size=%jd", (intmax_t)size);
  819 
  820         /*
  821          * Convert the flock structure into a start and end.
  822          */
  823         switch (fl->l_whence) {
  824         case SEEK_SET:
  825         case SEEK_CUR:
  826                 /*
  827                  * Caller is responsible for adding any necessary offset
  828                  * when SEEK_CUR is used.
  829                  */
  830                 start = fl->l_start;
  831                 break;
  832 
  833         case SEEK_END:
  834                 if (fl->l_start > __type_max(off_t) - size)
  835                         return EINVAL;
  836                 start = size + fl->l_start;
  837                 break;
  838 
  839         default:
  840                 return EINVAL;
  841         }
  842 
  843         if (fl->l_len == 0)
  844                 end = -1;
  845         else {
  846                 if (fl->l_len >= 0) {
  847                         if (start >= 0 &&
  848                             fl->l_len - 1 > __type_max(off_t) - start)
  849                                 return EINVAL;
  850                         end = start + (fl->l_len - 1);
  851                 } else {
  852                         /* lockf() allows -ve lengths */
  853                         if (start < 0)
  854                                 return EINVAL;
  855                         end = start - 1;
  856                         start += fl->l_len;
  857                 }
  858         }
  859         if (start < 0)
  860                 return EINVAL;
  861 
  862         /*
  863          * Allocate locks before acquiring the interlock.  We need two
  864          * locks in the worst case.
  865          */
  866         switch (ap->a_op) {
  867         case F_SETLK:
  868         case F_UNLCK:
  869                 /*
  870                  * XXX For F_UNLCK case, we can re-use the lock.
  871                  */
  872                 if ((ap->a_flags & F_FLOCK) == 0) {
  873                         /*
  874                          * Byte-range lock might need one more lock.
  875                          */
  876                         sparelock = lf_alloc(0);
  877                         if (sparelock == NULL) {
  878                                 error = ENOMEM;
  879                                 goto quit;
  880                         }
  881                         break;
  882                 }
  883                 /* FALLTHROUGH */
  884 
  885         case F_GETLK:
  886                 sparelock = NULL;
  887                 break;
  888 
  889         default:
  890                 return EINVAL;
  891         }
  892 
  893         switch (ap->a_op) {
  894         case F_SETLK:
  895                 lock = lf_alloc(1);
  896                 break;
  897         case F_UNLCK:
  898                 if (start == 0 || end == -1) {
  899                         /* never split */
  900                         lock = lf_alloc(0);
  901                 } else {
  902                         /* might split */
  903                         lock = lf_alloc(2);
  904                 }
  905                 break;
  906         case F_GETLK:
  907                 lock = lf_alloc(0);
  908                 break;
  909         }
  910         if (lock == NULL) {
  911                 error = ENOMEM;
  912                 goto quit;
  913         }
  914 
  915         mutex_enter(interlock);
  916 
  917         /*
  918          * Avoid the common case of unlocking when inode has no locks.
  919          */
  920         if (*head == (struct lockf *)0) {
  921                 if (ap->a_op != F_SETLK) {
  922                         fl->l_type = F_UNLCK;
  923                         error = 0;
  924                         goto quit_unlock;
  925                 }
  926         }
  927 
  928         /*
  929          * Create the lockf structure.
  930          */
  931         lock->lf_start = start;
  932         lock->lf_end = end;
  933         lock->lf_head = head;
  934         lock->lf_type = fl->l_type;
  935         lock->lf_next = (struct lockf *)0;
  936         TAILQ_INIT(&lock->lf_blkhd);
  937         lock->lf_flags = ap->a_flags;
  938         if (lock->lf_flags & F_POSIX) {
  939                 KASSERT(curproc == (struct proc *)ap->a_id);
  940         }
  941         lock->lf_id = ap->a_id;
  942 
  943         /*
  944          * Do the requested operation.
  945          */
  946         switch (ap->a_op) {
  947 
  948         case F_SETLK:
  949                 error = lf_setlock(lock, &sparelock, interlock);
  950                 lock = NULL; /* lf_setlock freed it */
  951                 break;
  952 
  953         case F_UNLCK:
  954                 error = lf_clearlock(lock, &sparelock);
  955                 break;
  956 
  957         case F_GETLK:
  958                 error = lf_getlock(lock, fl);
  959                 break;
  960 
  961         default:
  962                 break;
  963                 /* NOTREACHED */
  964         }
  965 
  966 quit_unlock:
  967         mutex_exit(interlock);
  968 quit:
  969         if (lock)
  970                 lf_free(lock);
  971         if (sparelock)
  972                 lf_free(sparelock);
  973 
  974         return error;
  975 }
  976 
  977 /*
  978  * Initialize subsystem.   XXX We use a global lock.  This could be the
  979  * vnode interlock, but the deadlock detection code may need to inspect
  980  * locks belonging to other files.
  981  */
  982 void
  983 lf_init(void)
  984 {
  985 
  986         lockf_cache = pool_cache_init(sizeof(struct lockf), 0, 0, 0, "lockf",
  987             NULL, IPL_NONE, lf_ctor, lf_dtor, NULL);
  988         lockf_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
  989 }
Cache object: ca2b542b83ddb0a6d40dccf44914df4e
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_lockf.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_lockf.c