The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_lockf.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
    3  * Authors: Doug Rabson <dfr@rabson.org>
    4  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 /*-
   28  * Copyright (c) 1982, 1986, 1989, 1993
   29  *      The Regents of the University of California.  All rights reserved.
   30  *
   31  * This code is derived from software contributed to Berkeley by
   32  * Scooter Morris at Genentech Inc.
   33  *
   34  * Redistribution and use in source and binary forms, with or without
   35  * modification, are permitted provided that the following conditions
   36  * are met:
   37  * 1. Redistributions of source code must retain the above copyright
   38  *    notice, this list of conditions and the following disclaimer.
   39  * 2. Redistributions in binary form must reproduce the above copyright
   40  *    notice, this list of conditions and the following disclaimer in the
   41  *    documentation and/or other materials provided with the distribution.
   42  * 4. Neither the name of the University nor the names of its contributors
   43  *    may be used to endorse or promote products derived from this software
   44  *    without specific prior written permission.
   45  *
   46  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   47  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   48  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   49  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   50  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   51  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   52  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   54  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   55  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   56  * SUCH DAMAGE.
   57  *
   58  *      @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
   59  */
   60 
   61 #include <sys/cdefs.h>
   62 __FBSDID("$FreeBSD$");
   63 
   64 #include "opt_debug_lockf.h"
   65 
   66 #include <sys/param.h>
   67 #include <sys/systm.h>
   68 #include <sys/hash.h>
   69 #include <sys/kernel.h>
   70 #include <sys/limits.h>
   71 #include <sys/lock.h>
   72 #include <sys/mount.h>
   73 #include <sys/mutex.h>
   74 #include <sys/proc.h>
   75 #include <sys/sx.h>
   76 #include <sys/unistd.h>
   77 #include <sys/vnode.h>
   78 #include <sys/malloc.h>
   79 #include <sys/fcntl.h>
   80 #include <sys/lockf.h>
   81 #include <sys/taskqueue.h>
   82 
   83 #ifdef LOCKF_DEBUG
   84 #include <sys/sysctl.h>
   85 
   86 #include <ufs/ufs/quota.h>
   87 #include <ufs/ufs/inode.h>
   88 
   89 static int      lockf_debug = 0; /* control debug output */
   90 SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
   91 #endif
   92 
   93 MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
   94 
   95 struct owner_edge;
   96 struct owner_vertex;
   97 struct owner_vertex_list;
   98 struct owner_graph;
   99 
  100 #define NOLOCKF (struct lockf_entry *)0
  101 #define SELF    0x1
  102 #define OTHERS  0x2
  103 static void      lf_init(void *);
  104 static int       lf_hash_owner(caddr_t, struct flock *, int);
  105 static int       lf_owner_matches(struct lock_owner *, caddr_t, struct flock *,
  106     int);
  107 static struct lockf_entry *
  108                  lf_alloc_lock(struct lock_owner *);
  109 static void      lf_free_lock(struct lockf_entry *);
  110 static int       lf_clearlock(struct lockf *, struct lockf_entry *);
  111 static int       lf_overlaps(struct lockf_entry *, struct lockf_entry *);
  112 static int       lf_blocks(struct lockf_entry *, struct lockf_entry *);
  113 static void      lf_free_edge(struct lockf_edge *);
  114 static struct lockf_edge *
  115                  lf_alloc_edge(void);
  116 static void      lf_alloc_vertex(struct lockf_entry *);
  117 static int       lf_add_edge(struct lockf_entry *, struct lockf_entry *);
  118 static void      lf_remove_edge(struct lockf_edge *);
  119 static void      lf_remove_outgoing(struct lockf_entry *);
  120 static void      lf_remove_incoming(struct lockf_entry *);
  121 static int       lf_add_outgoing(struct lockf *, struct lockf_entry *);
  122 static int       lf_add_incoming(struct lockf *, struct lockf_entry *);
  123 static int       lf_findoverlap(struct lockf_entry **, struct lockf_entry *,
  124     int);
  125 static struct lockf_entry *
  126                  lf_getblock(struct lockf *, struct lockf_entry *);
  127 static int       lf_getlock(struct lockf *, struct lockf_entry *, struct flock *);
  128 static void      lf_insert_lock(struct lockf *, struct lockf_entry *);
  129 static void      lf_wakeup_lock(struct lockf *, struct lockf_entry *);
  130 static void      lf_update_dependancies(struct lockf *, struct lockf_entry *,
  131     int all, struct lockf_entry_list *);
  132 static void      lf_set_start(struct lockf *, struct lockf_entry *, off_t,
  133         struct lockf_entry_list*);
  134 static void      lf_set_end(struct lockf *, struct lockf_entry *, off_t,
  135         struct lockf_entry_list*);
  136 static int       lf_setlock(struct lockf *, struct lockf_entry *,
  137     struct vnode *, void **cookiep);
  138 static int       lf_cancel(struct lockf *, struct lockf_entry *, void *);
  139 static void      lf_split(struct lockf *, struct lockf_entry *,
  140     struct lockf_entry *, struct lockf_entry_list *);
  141 #ifdef LOCKF_DEBUG
  142 static int       graph_reaches(struct owner_vertex *x, struct owner_vertex *y,
  143     struct owner_vertex_list *path);
  144 static void      graph_check(struct owner_graph *g, int checkorder);
  145 static void      graph_print_vertices(struct owner_vertex_list *set);
  146 #endif
  147 static int       graph_delta_forward(struct owner_graph *g,
  148     struct owner_vertex *x, struct owner_vertex *y,
  149     struct owner_vertex_list *delta);
  150 static int       graph_delta_backward(struct owner_graph *g,
  151     struct owner_vertex *x, struct owner_vertex *y,
  152     struct owner_vertex_list *delta);
  153 static int       graph_add_indices(int *indices, int n,
  154     struct owner_vertex_list *set);
  155 static int       graph_assign_indices(struct owner_graph *g, int *indices,
  156     int nextunused, struct owner_vertex_list *set);
  157 static int       graph_add_edge(struct owner_graph *g,
  158     struct owner_vertex *x, struct owner_vertex *y);
  159 static void      graph_remove_edge(struct owner_graph *g,
  160     struct owner_vertex *x, struct owner_vertex *y);
  161 static struct owner_vertex *graph_alloc_vertex(struct owner_graph *g,
  162     struct lock_owner *lo);
  163 static void      graph_free_vertex(struct owner_graph *g,
  164     struct owner_vertex *v);
  165 static struct owner_graph * graph_init(struct owner_graph *g);
  166 #ifdef LOCKF_DEBUG
  167 static void      lf_print(char *, struct lockf_entry *);
  168 static void      lf_printlist(char *, struct lockf_entry *);
  169 static void      lf_print_owner(struct lock_owner *);
  170 #endif
  171 
  172 /*
  173  * This structure is used to keep track of both local and remote lock
  174  * owners. The lf_owner field of the struct lockf_entry points back at
  175  * the lock owner structure. Each possible lock owner (local proc for
  176  * POSIX fcntl locks, local file for BSD flock locks or <pid,sysid>
  177  * pair for remote locks) is represented by a unique instance of
  178  * struct lock_owner.
  179  *
  180  * If a lock owner has a lock that blocks some other lock or a lock
  181  * that is waiting for some other lock, it also has a vertex in the
  182  * owner_graph below.
  183  *
  184  * Locks:
  185  * (s)          locked by state->ls_lock
  186  * (S)          locked by lf_lock_states_lock
  187  * (l)          locked by lf_lock_owners_lock
  188  * (g)          locked by lf_owner_graph_lock
  189  * (c)          const until freeing
  190  */
  191 #define LOCK_OWNER_HASH_SIZE    256
  192 
  193 struct lock_owner {
  194         LIST_ENTRY(lock_owner) lo_link; /* (l) hash chain */
  195         int     lo_refs;            /* (l) Number of locks referring to this */
  196         int     lo_flags;           /* (c) Flags passwd to lf_advlock */
  197         caddr_t lo_id;              /* (c) Id value passed to lf_advlock */
  198         pid_t   lo_pid;             /* (c) Process Id of the lock owner */
  199         int     lo_sysid;           /* (c) System Id of the lock owner */
  200         struct owner_vertex *lo_vertex; /* (g) entry in deadlock graph */
  201 };
  202 
  203 LIST_HEAD(lock_owner_list, lock_owner);
  204 
  205 static struct sx                lf_lock_states_lock;
  206 static struct lockf_list        lf_lock_states; /* (S) */
  207 static struct sx                lf_lock_owners_lock;
  208 static struct lock_owner_list   lf_lock_owners[LOCK_OWNER_HASH_SIZE]; /* (l) */
  209 
  210 /*
  211  * Structures for deadlock detection.
  212  *
  213  * We have two types of directed graph, the first is the set of locks,
  214  * both active and pending on a vnode. Within this graph, active locks
  215  * are terminal nodes in the graph (i.e. have no out-going
  216  * edges). Pending locks have out-going edges to each blocking active
  217  * lock that prevents the lock from being granted and also to each
  218  * older pending lock that would block them if it was active. The
  219  * graph for each vnode is naturally acyclic; new edges are only ever
  220  * added to or from new nodes (either new pending locks which only add
  221  * out-going edges or new active locks which only add in-coming edges)
  222  * therefore they cannot create loops in the lock graph.
  223  *
  224  * The second graph is a global graph of lock owners. Each lock owner
  225  * is a vertex in that graph and an edge is added to the graph
  226  * whenever an edge is added to a vnode graph, with end points
  227  * corresponding to owner of the new pending lock and the owner of the
  228  * lock upon which it waits. In order to prevent deadlock, we only add
  229  * an edge to this graph if the new edge would not create a cycle.
  230  * 
  231  * The lock owner graph is topologically sorted, i.e. if a node has
  232  * any outgoing edges, then it has an order strictly less than any
  233  * node to which it has an outgoing edge. We preserve this ordering
  234  * (and detect cycles) on edge insertion using Algorithm PK from the
  235  * paper "A Dynamic Topological Sort Algorithm for Directed Acyclic
  236  * Graphs" (ACM Journal of Experimental Algorithms, Vol 11, Article
  237  * No. 1.7)
  238  */
  239 struct owner_vertex;
  240 
  241 struct owner_edge {
  242         LIST_ENTRY(owner_edge) e_outlink; /* (g) link from's out-edge list */
  243         LIST_ENTRY(owner_edge) e_inlink;  /* (g) link to's in-edge list */
  244         int             e_refs;           /* (g) number of times added */
  245         struct owner_vertex *e_from;      /* (c) out-going from here */
  246         struct owner_vertex *e_to;        /* (c) in-coming to here */
  247 };
  248 LIST_HEAD(owner_edge_list, owner_edge);
  249 
  250 struct owner_vertex {
  251         TAILQ_ENTRY(owner_vertex) v_link; /* (g) workspace for edge insertion */
  252         uint32_t        v_gen;            /* (g) workspace for edge insertion */
  253         int             v_order;          /* (g) order of vertex in graph */
  254         struct owner_edge_list v_outedges;/* (g) list of out-edges */
  255         struct owner_edge_list v_inedges; /* (g) list of in-edges */
  256         struct lock_owner *v_owner;       /* (c) corresponding lock owner */
  257 };
  258 TAILQ_HEAD(owner_vertex_list, owner_vertex);
  259 
  260 struct owner_graph {
  261         struct owner_vertex** g_vertices; /* (g) pointers to vertices */
  262         int             g_size;           /* (g) number of vertices */
  263         int             g_space;          /* (g) space allocated for vertices */
  264         int             *g_indexbuf;      /* (g) workspace for loop detection */
  265         uint32_t        g_gen;            /* (g) increment when re-ordering */
  266 };
  267 
  268 static struct sx                lf_owner_graph_lock;
  269 static struct owner_graph       lf_owner_graph;
  270 
  271 /*
  272  * Initialise various structures and locks.
  273  */
  274 static void
  275 lf_init(void *dummy)
  276 {
  277         int i;
  278 
  279         sx_init(&lf_lock_states_lock, "lock states lock");
  280         LIST_INIT(&lf_lock_states);
  281 
  282         sx_init(&lf_lock_owners_lock, "lock owners lock");
  283         for (i = 0; i < LOCK_OWNER_HASH_SIZE; i++)
  284                 LIST_INIT(&lf_lock_owners[i]);
  285 
  286         sx_init(&lf_owner_graph_lock, "owner graph lock");
  287         graph_init(&lf_owner_graph);
  288 }
  289 SYSINIT(lf_init, SI_SUB_LOCK, SI_ORDER_FIRST, lf_init, NULL);
  290 
  291 /*
  292  * Generate a hash value for a lock owner.
  293  */
  294 static int
  295 lf_hash_owner(caddr_t id, struct flock *fl, int flags)
  296 {
  297         uint32_t h;
  298 
  299         if (flags & F_REMOTE) {
  300                 h = HASHSTEP(0, fl->l_pid);
  301                 h = HASHSTEP(h, fl->l_sysid);
  302         } else if (flags & F_FLOCK) {
  303                 h = ((uintptr_t) id) >> 7;
  304         } else {
  305                 struct proc *p = (struct proc *) id;
  306                 h = HASHSTEP(0, p->p_pid);
  307                 h = HASHSTEP(h, 0);
  308         }
  309 
  310         return (h % LOCK_OWNER_HASH_SIZE);
  311 }
  312 
  313 /*
  314  * Return true if a lock owner matches the details passed to
  315  * lf_advlock.
  316  */
  317 static int
  318 lf_owner_matches(struct lock_owner *lo, caddr_t id, struct flock *fl,
  319     int flags)
  320 {
  321         if (flags & F_REMOTE) {
  322                 return lo->lo_pid == fl->l_pid
  323                         && lo->lo_sysid == fl->l_sysid;
  324         } else {
  325                 return lo->lo_id == id;
  326         }
  327 }
  328 
  329 static struct lockf_entry *
  330 lf_alloc_lock(struct lock_owner *lo)
  331 {
  332         struct lockf_entry *lf;
  333 
  334         lf = malloc(sizeof(struct lockf_entry), M_LOCKF, M_WAITOK|M_ZERO);
  335 
  336 #ifdef LOCKF_DEBUG
  337         if (lockf_debug & 4)
  338                 printf("Allocated lock %p\n", lf);
  339 #endif
  340         if (lo) {
  341                 sx_xlock(&lf_lock_owners_lock);
  342                 lo->lo_refs++;
  343                 sx_xunlock(&lf_lock_owners_lock);
  344                 lf->lf_owner = lo;
  345         }
  346 
  347         return (lf);
  348 }
  349 
  350 static void
  351 lf_free_lock(struct lockf_entry *lock)
  352 {
  353         /*
  354          * Adjust the lock_owner reference count and
  355          * reclaim the entry if this is the last lock
  356          * for that owner.
  357          */
  358         struct lock_owner *lo = lock->lf_owner;
  359         if (lo) {
  360                 KASSERT(LIST_EMPTY(&lock->lf_outedges),
  361                     ("freeing lock with dependancies"));
  362                 KASSERT(LIST_EMPTY(&lock->lf_inedges),
  363                     ("freeing lock with dependants"));
  364                 sx_xlock(&lf_lock_owners_lock);
  365                 KASSERT(lo->lo_refs > 0, ("lock owner refcount"));
  366                 lo->lo_refs--;
  367                 if (lo->lo_refs == 0) {
  368 #ifdef LOCKF_DEBUG
  369                         if (lockf_debug & 1)
  370                                 printf("lf_free_lock: freeing lock owner %p\n",
  371                                     lo);
  372 #endif
  373                         if (lo->lo_vertex) {
  374                                 sx_xlock(&lf_owner_graph_lock);
  375                                 graph_free_vertex(&lf_owner_graph,
  376                                     lo->lo_vertex);
  377                                 sx_xunlock(&lf_owner_graph_lock);
  378                         }
  379                         LIST_REMOVE(lo, lo_link);
  380                         free(lo, M_LOCKF);
  381 #ifdef LOCKF_DEBUG
  382                         if (lockf_debug & 4)
  383                                 printf("Freed lock owner %p\n", lo);
  384 #endif
  385                 }
  386                 sx_unlock(&lf_lock_owners_lock);
  387         }
  388         if ((lock->lf_flags & F_REMOTE) && lock->lf_vnode) {
  389                 vrele(lock->lf_vnode);
  390                 lock->lf_vnode = NULL;
  391         }
  392 #ifdef LOCKF_DEBUG
  393         if (lockf_debug & 4)
  394                 printf("Freed lock %p\n", lock);
  395 #endif
  396         free(lock, M_LOCKF);
  397 }
  398 
  399 /*
  400  * Advisory record locking support
  401  */
  402 int
  403 lf_advlockasync(struct vop_advlockasync_args *ap, struct lockf **statep,
  404     u_quad_t size)
  405 {
  406         struct lockf *state, *freestate = NULL;
  407         struct flock *fl = ap->a_fl;
  408         struct lockf_entry *lock;
  409         struct vnode *vp = ap->a_vp;
  410         caddr_t id = ap->a_id;
  411         int flags = ap->a_flags;
  412         int hash;
  413         struct lock_owner *lo;
  414         off_t start, end, oadd;
  415         int error;
  416 
  417         /*
  418          * Handle the F_UNLKSYS case first - no need to mess about
  419          * creating a lock owner for this one.
  420          */
  421         if (ap->a_op == F_UNLCKSYS) {
  422                 lf_clearremotesys(fl->l_sysid);
  423                 return (0);
  424         }
  425 
  426         /*
  427          * Convert the flock structure into a start and end.
  428          */
  429         switch (fl->l_whence) {
  430 
  431         case SEEK_SET:
  432         case SEEK_CUR:
  433                 /*
  434                  * Caller is responsible for adding any necessary offset
  435                  * when SEEK_CUR is used.
  436                  */
  437                 start = fl->l_start;
  438                 break;
  439 
  440         case SEEK_END:
  441                 if (size > OFF_MAX ||
  442                     (fl->l_start > 0 && size > OFF_MAX - fl->l_start))
  443                         return (EOVERFLOW);
  444                 start = size + fl->l_start;
  445                 break;
  446 
  447         default:
  448                 return (EINVAL);
  449         }
  450         if (start < 0)
  451                 return (EINVAL);
  452         if (fl->l_len < 0) {
  453                 if (start == 0)
  454                         return (EINVAL);
  455                 end = start - 1;
  456                 start += fl->l_len;
  457                 if (start < 0)
  458                         return (EINVAL);
  459         } else if (fl->l_len == 0) {
  460                 end = OFF_MAX;
  461         } else {
  462                 oadd = fl->l_len - 1;
  463                 if (oadd > OFF_MAX - start)
  464                         return (EOVERFLOW);
  465                 end = start + oadd;
  466         }
  467         /*
  468          * Avoid the common case of unlocking when inode has no locks.
  469          */
  470         VI_LOCK(vp);
  471         if ((*statep) == NULL) {
  472                 if (ap->a_op != F_SETLK) {
  473                         fl->l_type = F_UNLCK;
  474                         VI_UNLOCK(vp);
  475                         return (0);
  476                 }
  477         }
  478         VI_UNLOCK(vp);
  479 
  480         /*
  481          * Map our arguments to an existing lock owner or create one
  482          * if this is the first time we have seen this owner.
  483          */
  484         hash = lf_hash_owner(id, fl, flags);
  485         sx_xlock(&lf_lock_owners_lock);
  486         LIST_FOREACH(lo, &lf_lock_owners[hash], lo_link)
  487                 if (lf_owner_matches(lo, id, fl, flags))
  488                         break;
  489         if (!lo) {
  490                 /*
  491                  * We initialise the lock with a reference
  492                  * count which matches the new lockf_entry
  493                  * structure created below.
  494                  */
  495                 lo = malloc(sizeof(struct lock_owner), M_LOCKF,
  496                     M_WAITOK|M_ZERO);
  497 #ifdef LOCKF_DEBUG
  498                 if (lockf_debug & 4)
  499                         printf("Allocated lock owner %p\n", lo);
  500 #endif
  501 
  502                 lo->lo_refs = 1;
  503                 lo->lo_flags = flags;
  504                 lo->lo_id = id;
  505                 if (flags & F_REMOTE) {
  506                         lo->lo_pid = fl->l_pid;
  507                         lo->lo_sysid = fl->l_sysid;
  508                 } else if (flags & F_FLOCK) {
  509                         lo->lo_pid = -1;
  510                         lo->lo_sysid = 0;
  511                 } else {
  512                         struct proc *p = (struct proc *) id;
  513                         lo->lo_pid = p->p_pid;
  514                         lo->lo_sysid = 0;
  515                 }
  516                 lo->lo_vertex = NULL;
  517 
  518 #ifdef LOCKF_DEBUG
  519                 if (lockf_debug & 1) {
  520                         printf("lf_advlockasync: new lock owner %p ", lo);
  521                         lf_print_owner(lo);
  522                         printf("\n");
  523                 }
  524 #endif
  525 
  526                 LIST_INSERT_HEAD(&lf_lock_owners[hash], lo, lo_link);
  527         } else {
  528                 /*
  529                  * We have seen this lock owner before, increase its
  530                  * reference count to account for the new lockf_entry
  531                  * structure we create below.
  532                  */
  533                 lo->lo_refs++;
  534         }
  535         sx_xunlock(&lf_lock_owners_lock);
  536 
  537         /*
  538          * Create the lockf structure. We initialise the lf_owner
  539          * field here instead of in lf_alloc_lock() to avoid paying
  540          * the lf_lock_owners_lock tax twice.
  541          */
  542         lock = lf_alloc_lock(NULL);
  543         lock->lf_start = start;
  544         lock->lf_end = end;
  545         lock->lf_owner = lo;
  546         lock->lf_vnode = vp;
  547         if (flags & F_REMOTE) {
  548                 /*
  549                  * For remote locks, the caller may release its ref to
  550                  * the vnode at any time - we have to ref it here to
  551                  * prevent it from being recycled unexpectedly.
  552                  */
  553                 vref(vp);
  554         }
  555 
  556         /*
  557          * XXX The problem is that VTOI is ufs specific, so it will
  558          * break LOCKF_DEBUG for all other FS's other than UFS because
  559          * it casts the vnode->data ptr to struct inode *.
  560          */
  561 /*      lock->lf_inode = VTOI(ap->a_vp); */
  562         lock->lf_inode = (struct inode *)0;
  563         lock->lf_type = fl->l_type;
  564         LIST_INIT(&lock->lf_outedges);
  565         LIST_INIT(&lock->lf_inedges);
  566         lock->lf_async_task = ap->a_task;
  567         lock->lf_flags = ap->a_flags;
  568 
  569         /*
  570          * Do the requested operation. First find our state structure
  571          * and create a new one if necessary - the caller's *statep
  572          * variable and the state's ls_threads count is protected by
  573          * the vnode interlock.
  574          */
  575         VI_LOCK(vp);
  576 
  577         /*
  578          * Allocate a state structure if necessary.
  579          */
  580         state = *statep;
  581         if (state == NULL) {
  582                 struct lockf *ls;
  583 
  584                 VI_UNLOCK(vp);
  585 
  586                 ls = malloc(sizeof(struct lockf), M_LOCKF, M_WAITOK|M_ZERO);
  587                 sx_init(&ls->ls_lock, "ls_lock");
  588                 LIST_INIT(&ls->ls_active);
  589                 LIST_INIT(&ls->ls_pending);
  590                 ls->ls_threads = 1;
  591 
  592                 sx_xlock(&lf_lock_states_lock);
  593                 LIST_INSERT_HEAD(&lf_lock_states, ls, ls_link);
  594                 sx_xunlock(&lf_lock_states_lock);
  595 
  596                 /*
  597                  * Cope if we lost a race with some other thread while
  598                  * trying to allocate memory.
  599                  */
  600                 VI_LOCK(vp);
  601                 if ((*statep) == NULL) {
  602                         state = *statep = ls;
  603                         VI_UNLOCK(vp);
  604                 } else {
  605                         state = *statep;
  606                         state->ls_threads++;
  607                         VI_UNLOCK(vp);
  608 
  609                         sx_xlock(&lf_lock_states_lock);
  610                         LIST_REMOVE(ls, ls_link);
  611                         sx_xunlock(&lf_lock_states_lock);
  612                         sx_destroy(&ls->ls_lock);
  613                         free(ls, M_LOCKF);
  614                 }
  615         } else {
  616                 state->ls_threads++;
  617                 VI_UNLOCK(vp);
  618         }
  619 
  620         sx_xlock(&state->ls_lock);
  621         switch(ap->a_op) {
  622         case F_SETLK:
  623                 error = lf_setlock(state, lock, vp, ap->a_cookiep);
  624                 break;
  625 
  626         case F_UNLCK:
  627                 error = lf_clearlock(state, lock);
  628                 lf_free_lock(lock);
  629                 break;
  630 
  631         case F_GETLK:
  632                 error = lf_getlock(state, lock, fl);
  633                 lf_free_lock(lock);
  634                 break;
  635 
  636         case F_CANCEL:
  637                 if (ap->a_cookiep)
  638                         error = lf_cancel(state, lock, *ap->a_cookiep);
  639                 else
  640                         error = EINVAL;
  641                 lf_free_lock(lock);
  642                 break;
  643 
  644         default:
  645                 lf_free_lock(lock);
  646                 error = EINVAL;
  647                 break;
  648         }
  649 
  650 #ifdef INVARIANTS
  651         /*
  652          * Check for some can't happen stuff. In this case, the active
  653          * lock list becoming disordered or containing mutually
  654          * blocking locks. We also check the pending list for locks
  655          * which should be active (i.e. have no out-going edges).
  656          */
  657         LIST_FOREACH(lock, &state->ls_active, lf_link) {
  658                 struct lockf_entry *lf;
  659                 if (LIST_NEXT(lock, lf_link))
  660                         KASSERT((lock->lf_start
  661                                 <= LIST_NEXT(lock, lf_link)->lf_start),
  662                             ("locks disordered"));
  663                 LIST_FOREACH(lf, &state->ls_active, lf_link) {
  664                         if (lock == lf)
  665                                 break;
  666                         KASSERT(!lf_blocks(lock, lf),
  667                             ("two conflicting active locks"));
  668                         if (lock->lf_owner == lf->lf_owner)
  669                                 KASSERT(!lf_overlaps(lock, lf),
  670                                     ("two overlapping locks from same owner"));
  671                 }
  672         }
  673         LIST_FOREACH(lock, &state->ls_pending, lf_link) {
  674                 KASSERT(!LIST_EMPTY(&lock->lf_outedges),
  675                     ("pending lock which should be active"));
  676         }
  677 #endif
  678         sx_xunlock(&state->ls_lock);
  679 
  680         /*
  681          * If we have removed the last active lock on the vnode and
  682          * this is the last thread that was in-progress, we can free
  683          * the state structure. We update the caller's pointer inside
  684          * the vnode interlock but call free outside.
  685          *
  686          * XXX alternatively, keep the state structure around until
  687          * the filesystem recycles - requires a callback from the
  688          * filesystem.
  689          */
  690         VI_LOCK(vp);
  691 
  692         state->ls_threads--;
  693         if (LIST_EMPTY(&state->ls_active) && state->ls_threads == 0) {
  694                 KASSERT(LIST_EMPTY(&state->ls_pending),
  695                     ("freeing state with pending locks"));
  696                 freestate = state;
  697                 *statep = NULL;
  698         }
  699 
  700         VI_UNLOCK(vp);
  701 
  702         if (freestate) {
  703                 sx_xlock(&lf_lock_states_lock);
  704                 LIST_REMOVE(freestate, ls_link);
  705                 sx_xunlock(&lf_lock_states_lock);
  706                 sx_destroy(&freestate->ls_lock);
  707                 free(freestate, M_LOCKF);
  708         }
  709         return (error);
  710 }
  711 
  712 int
  713 lf_advlock(struct vop_advlock_args *ap, struct lockf **statep, u_quad_t size)
  714 {
  715         struct vop_advlockasync_args a;
  716 
  717         a.a_vp = ap->a_vp;
  718         a.a_id = ap->a_id;
  719         a.a_op = ap->a_op;
  720         a.a_fl = ap->a_fl;
  721         a.a_flags = ap->a_flags;
  722         a.a_task = NULL;
  723         a.a_cookiep = NULL;
  724 
  725         return (lf_advlockasync(&a, statep, size));
  726 }
  727 
  728 /*
  729  * Return non-zero if locks 'x' and 'y' overlap.
  730  */
  731 static int
  732 lf_overlaps(struct lockf_entry *x, struct lockf_entry *y)
  733 {
  734 
  735         return (x->lf_start <= y->lf_end && x->lf_end >= y->lf_start);
  736 }
  737 
  738 /*
  739  * Return non-zero if lock 'x' is blocked by lock 'y' (or vice versa).
  740  */
  741 static int
  742 lf_blocks(struct lockf_entry *x, struct lockf_entry *y)
  743 {
  744 
  745         return x->lf_owner != y->lf_owner
  746                 && (x->lf_type == F_WRLCK || y->lf_type == F_WRLCK)
  747                 && lf_overlaps(x, y);
  748 }
  749 
  750 /*
  751  * Allocate a lock edge from the free list
  752  */
  753 static struct lockf_edge *
  754 lf_alloc_edge(void)
  755 {
  756 
  757         return (malloc(sizeof(struct lockf_edge), M_LOCKF, M_WAITOK|M_ZERO));
  758 }
  759 
  760 /*
  761  * Free a lock edge.
  762  */
  763 static void
  764 lf_free_edge(struct lockf_edge *e)
  765 {
  766 
  767         free(e, M_LOCKF);
  768 }
  769 
  770 
  771 /*
  772  * Ensure that the lock's owner has a corresponding vertex in the
  773  * owner graph.
  774  */
  775 static void
  776 lf_alloc_vertex(struct lockf_entry *lock)
  777 {
  778         struct owner_graph *g = &lf_owner_graph;
  779 
  780         if (!lock->lf_owner->lo_vertex)
  781                 lock->lf_owner->lo_vertex =
  782                         graph_alloc_vertex(g, lock->lf_owner);
  783 }
  784 
  785 /*
  786  * Attempt to record an edge from lock x to lock y. Return EDEADLK if
  787  * the new edge would cause a cycle in the owner graph.
  788  */
  789 static int
  790 lf_add_edge(struct lockf_entry *x, struct lockf_entry *y)
  791 {
  792         struct owner_graph *g = &lf_owner_graph;
  793         struct lockf_edge *e;
  794         int error;
  795 
  796 #ifdef INVARIANTS
  797         LIST_FOREACH(e, &x->lf_outedges, le_outlink)
  798                 KASSERT(e->le_to != y, ("adding lock edge twice"));
  799 #endif
  800 
  801         /*
  802          * Make sure the two owners have entries in the owner graph.
  803          */
  804         lf_alloc_vertex(x);
  805         lf_alloc_vertex(y);
  806 
  807         error = graph_add_edge(g, x->lf_owner->lo_vertex,
  808             y->lf_owner->lo_vertex);
  809         if (error)
  810                 return (error);
  811 
  812         e = lf_alloc_edge();
  813         LIST_INSERT_HEAD(&x->lf_outedges, e, le_outlink);
  814         LIST_INSERT_HEAD(&y->lf_inedges, e, le_inlink);
  815         e->le_from = x;
  816         e->le_to = y;
  817 
  818         return (0);
  819 }
  820 
  821 /*
  822  * Remove an edge from the lock graph.
  823  */
  824 static void
  825 lf_remove_edge(struct lockf_edge *e)
  826 {
  827         struct owner_graph *g = &lf_owner_graph;
  828         struct lockf_entry *x = e->le_from;
  829         struct lockf_entry *y = e->le_to;
  830 
  831         graph_remove_edge(g, x->lf_owner->lo_vertex, y->lf_owner->lo_vertex);
  832         LIST_REMOVE(e, le_outlink);
  833         LIST_REMOVE(e, le_inlink);
  834         e->le_from = NULL;
  835         e->le_to = NULL;
  836         lf_free_edge(e);
  837 }
  838 
  839 /*
  840  * Remove all out-going edges from lock x.
  841  */
  842 static void
  843 lf_remove_outgoing(struct lockf_entry *x)
  844 {
  845         struct lockf_edge *e;
  846 
  847         while ((e = LIST_FIRST(&x->lf_outedges)) != NULL) {
  848                 lf_remove_edge(e);
  849         }
  850 }
  851 
  852 /*
  853  * Remove all in-coming edges from lock x.
  854  */
  855 static void
  856 lf_remove_incoming(struct lockf_entry *x)
  857 {
  858         struct lockf_edge *e;
  859 
  860         while ((e = LIST_FIRST(&x->lf_inedges)) != NULL) {
  861                 lf_remove_edge(e);
  862         }
  863 }
  864 
  865 /*
  866  * Walk the list of locks for the file and create an out-going edge
  867  * from lock to each blocking lock.
  868  */
  869 static int
  870 lf_add_outgoing(struct lockf *state, struct lockf_entry *lock)
  871 {
  872         struct lockf_entry *overlap;
  873         int error;
  874 
  875         LIST_FOREACH(overlap, &state->ls_active, lf_link) {
  876                 /*
  877                  * We may assume that the active list is sorted by
  878                  * lf_start.
  879                  */
  880                 if (overlap->lf_start > lock->lf_end)
  881                         break;
  882                 if (!lf_blocks(lock, overlap))
  883                         continue;
  884 
  885                 /*
  886                  * We've found a blocking lock. Add the corresponding
  887                  * edge to the graphs and see if it would cause a
  888                  * deadlock.
  889                  */
  890                 error = lf_add_edge(lock, overlap);
  891 
  892                 /*
  893                  * The only error that lf_add_edge returns is EDEADLK.
  894                  * Remove any edges we added and return the error.
  895                  */
  896                 if (error) {
  897                         lf_remove_outgoing(lock);
  898                         return (error);
  899                 }
  900         }
  901 
  902         /*
  903          * We also need to add edges to sleeping locks that block
  904          * us. This ensures that lf_wakeup_lock cannot grant two
  905          * mutually blocking locks simultaneously and also enforces a
  906          * 'first come, first served' fairness model. Note that this
  907          * only happens if we are blocked by at least one active lock
  908          * due to the call to lf_getblock in lf_setlock below.
  909          */
  910         LIST_FOREACH(overlap, &state->ls_pending, lf_link) {
  911                 if (!lf_blocks(lock, overlap))
  912                         continue;
  913                 /*
  914                  * We've found a blocking lock. Add the corresponding
  915                  * edge to the graphs and see if it would cause a
  916                  * deadlock.
  917                  */
  918                 error = lf_add_edge(lock, overlap);
  919 
  920                 /*
  921                  * The only error that lf_add_edge returns is EDEADLK.
  922                  * Remove any edges we added and return the error.
  923                  */
  924                 if (error) {
  925                         lf_remove_outgoing(lock);
  926                         return (error);
  927                 }
  928         }
  929 
  930         return (0);
  931 }
  932 
  933 /*
  934  * Walk the list of pending locks for the file and create an in-coming
  935  * edge from lock to each blocking lock.
  936  */
  937 static int
  938 lf_add_incoming(struct lockf *state, struct lockf_entry *lock)
  939 {
  940         struct lockf_entry *overlap;
  941         int error;
  942 
  943         LIST_FOREACH(overlap, &state->ls_pending, lf_link) {
  944                 if (!lf_blocks(lock, overlap))
  945                         continue;
  946 
  947                 /*
  948                  * We've found a blocking lock. Add the corresponding
  949                  * edge to the graphs and see if it would cause a
  950                  * deadlock.
  951                  */
  952                 error = lf_add_edge(overlap, lock);
  953 
  954                 /*
  955                  * The only error that lf_add_edge returns is EDEADLK.
  956                  * Remove any edges we added and return the error.
  957                  */
  958                 if (error) {
  959                         lf_remove_incoming(lock);
  960                         return (error);
  961                 }
  962         }
  963         return (0);
  964 }
  965 
  966 /*
  967  * Insert lock into the active list, keeping list entries ordered by
  968  * increasing values of lf_start.
  969  */
  970 static void
  971 lf_insert_lock(struct lockf *state, struct lockf_entry *lock)
  972 {
  973         struct lockf_entry *lf, *lfprev;
  974 
  975         if (LIST_EMPTY(&state->ls_active)) {
  976                 LIST_INSERT_HEAD(&state->ls_active, lock, lf_link);
  977                 return;
  978         }
  979 
  980         lfprev = NULL;
  981         LIST_FOREACH(lf, &state->ls_active, lf_link) {
  982                 if (lf->lf_start > lock->lf_start) {
  983                         LIST_INSERT_BEFORE(lf, lock, lf_link);
  984                         return;
  985                 }
  986                 lfprev = lf;
  987         }
  988         LIST_INSERT_AFTER(lfprev, lock, lf_link);
  989 }
  990 
  991 /*
  992  * Wake up a sleeping lock and remove it from the pending list now
  993  * that all its dependancies have been resolved. The caller should
  994  * arrange for the lock to be added to the active list, adjusting any
  995  * existing locks for the same owner as needed.
  996  */
  997 static void
  998 lf_wakeup_lock(struct lockf *state, struct lockf_entry *wakelock)
  999 {
 1000 
 1001         /*
 1002          * Remove from ls_pending list and wake up the caller
 1003          * or start the async notification, as appropriate.
 1004          */
 1005         LIST_REMOVE(wakelock, lf_link);
 1006 #ifdef LOCKF_DEBUG
 1007         if (lockf_debug & 1)
 1008                 lf_print("lf_wakeup_lock: awakening", wakelock);
 1009 #endif /* LOCKF_DEBUG */
 1010         if (wakelock->lf_async_task) {
 1011                 taskqueue_enqueue(taskqueue_thread, wakelock->lf_async_task);
 1012         } else {
 1013                 wakeup(wakelock);
 1014         }
 1015 }
 1016 
 1017 /*
 1018  * Re-check all dependant locks and remove edges to locks that we no
 1019  * longer block. If 'all' is non-zero, the lock has been removed and
 1020  * we must remove all the dependancies, otherwise it has simply been
 1021  * reduced but remains active. Any pending locks which have been been
 1022  * unblocked are added to 'granted'
 1023  */
 1024 static void
 1025 lf_update_dependancies(struct lockf *state, struct lockf_entry *lock, int all,
 1026         struct lockf_entry_list *granted)
 1027 {
 1028         struct lockf_edge *e, *ne;
 1029         struct lockf_entry *deplock;
 1030 
 1031         LIST_FOREACH_SAFE(e, &lock->lf_inedges, le_inlink, ne) {
 1032                 deplock = e->le_from;
 1033                 if (all || !lf_blocks(lock, deplock)) {
 1034                         sx_xlock(&lf_owner_graph_lock);
 1035                         lf_remove_edge(e);
 1036                         sx_xunlock(&lf_owner_graph_lock);
 1037                         if (LIST_EMPTY(&deplock->lf_outedges)) {
 1038                                 lf_wakeup_lock(state, deplock);
 1039                                 LIST_INSERT_HEAD(granted, deplock, lf_link);
 1040                         }
 1041                 }
 1042         }
 1043 }
 1044 
 1045 /*
 1046  * Set the start of an existing active lock, updating dependancies and
 1047  * adding any newly woken locks to 'granted'.
 1048  */
 1049 static void
 1050 lf_set_start(struct lockf *state, struct lockf_entry *lock, off_t new_start,
 1051         struct lockf_entry_list *granted)
 1052 {
 1053 
 1054         KASSERT(new_start >= lock->lf_start, ("can't increase lock"));
 1055         lock->lf_start = new_start;
 1056         LIST_REMOVE(lock, lf_link);
 1057         lf_insert_lock(state, lock);
 1058         lf_update_dependancies(state, lock, FALSE, granted);
 1059 }
 1060 
 1061 /*
 1062  * Set the end of an existing active lock, updating dependancies and
 1063  * adding any newly woken locks to 'granted'.
 1064  */
 1065 static void
 1066 lf_set_end(struct lockf *state, struct lockf_entry *lock, off_t new_end,
 1067         struct lockf_entry_list *granted)
 1068 {
 1069 
 1070         KASSERT(new_end <= lock->lf_end, ("can't increase lock"));
 1071         lock->lf_end = new_end;
 1072         lf_update_dependancies(state, lock, FALSE, granted);
 1073 }
 1074 
 1075 /*
 1076  * Add a lock to the active list, updating or removing any current
 1077  * locks owned by the same owner and processing any pending locks that
 1078  * become unblocked as a result. This code is also used for unlock
 1079  * since the logic for updating existing locks is identical.
 1080  *
 1081  * As a result of processing the new lock, we may unblock existing
 1082  * pending locks as a result of downgrading/unlocking. We simply
 1083  * activate the newly granted locks by looping.
 1084  *
 1085  * Since the new lock already has its dependancies set up, we always
 1086  * add it to the list (unless its an unlock request). This may
 1087  * fragment the lock list in some pathological cases but its probably
 1088  * not a real problem.
 1089  */
 1090 static void
 1091 lf_activate_lock(struct lockf *state, struct lockf_entry *lock)
 1092 {
 1093         struct lockf_entry *overlap, *lf;
 1094         struct lockf_entry_list granted;
 1095         int ovcase;
 1096 
 1097         LIST_INIT(&granted);
 1098         LIST_INSERT_HEAD(&granted, lock, lf_link);
 1099 
 1100         while (!LIST_EMPTY(&granted)) {
 1101                 lock = LIST_FIRST(&granted);
 1102                 LIST_REMOVE(lock, lf_link);
 1103 
 1104                 /*
 1105                  * Skip over locks owned by other processes.  Handle
 1106                  * any locks that overlap and are owned by ourselves.
 1107                  */
 1108                 overlap = LIST_FIRST(&state->ls_active);
 1109                 for (;;) {
 1110                         ovcase = lf_findoverlap(&overlap, lock, SELF);
 1111 
 1112 #ifdef LOCKF_DEBUG
 1113                         if (ovcase && (lockf_debug & 2)) {
 1114                                 printf("lf_setlock: overlap %d", ovcase);
 1115                                 lf_print("", overlap);
 1116                         }
 1117 #endif
 1118                         /*
 1119                          * Six cases:
 1120                          *      0) no overlap
 1121                          *      1) overlap == lock
 1122                          *      2) overlap contains lock
 1123                          *      3) lock contains overlap
 1124                          *      4) overlap starts before lock
 1125                          *      5) overlap ends after lock
 1126                          */
 1127                         switch (ovcase) {
 1128                         case 0: /* no overlap */
 1129                                 break;
 1130 
 1131                         case 1: /* overlap == lock */
 1132                                 /*
 1133                                  * We have already setup the
 1134                                  * dependants for the new lock, taking
 1135                                  * into account a possible downgrade
 1136                                  * or unlock. Remove the old lock.
 1137                                  */
 1138                                 LIST_REMOVE(overlap, lf_link);
 1139                                 lf_update_dependancies(state, overlap, TRUE,
 1140                                         &granted);
 1141                                 lf_free_lock(overlap);
 1142                                 break;
 1143 
 1144                         case 2: /* overlap contains lock */
 1145                                 /*
 1146                                  * Just split the existing lock.
 1147                                  */
 1148                                 lf_split(state, overlap, lock, &granted);
 1149                                 break;
 1150 
 1151                         case 3: /* lock contains overlap */
 1152                                 /*
 1153                                  * Delete the overlap and advance to
 1154                                  * the next entry in the list.
 1155                                  */
 1156                                 lf = LIST_NEXT(overlap, lf_link);
 1157                                 LIST_REMOVE(overlap, lf_link);
 1158                                 lf_update_dependancies(state, overlap, TRUE,
 1159                                         &granted);
 1160                                 lf_free_lock(overlap);
 1161                                 overlap = lf;
 1162                                 continue;
 1163 
 1164                         case 4: /* overlap starts before lock */
 1165                                 /*
 1166                                  * Just update the overlap end and
 1167                                  * move on.
 1168                                  */
 1169                                 lf_set_end(state, overlap, lock->lf_start - 1,
 1170                                     &granted);
 1171                                 overlap = LIST_NEXT(overlap, lf_link);
 1172                                 continue;
 1173 
 1174                         case 5: /* overlap ends after lock */
 1175                                 /*
 1176                                  * Change the start of overlap and
 1177                                  * re-insert.
 1178                                  */
 1179                                 lf_set_start(state, overlap, lock->lf_end + 1,
 1180                                     &granted);
 1181                                 break;
 1182                         }
 1183                         break;
 1184                 }
 1185 #ifdef LOCKF_DEBUG
 1186                 if (lockf_debug & 1) {
 1187                         if (lock->lf_type != F_UNLCK)
 1188                                 lf_print("lf_activate_lock: activated", lock);
 1189                         else
 1190                                 lf_print("lf_activate_lock: unlocked", lock);
 1191                         lf_printlist("lf_activate_lock", lock);
 1192                 }
 1193 #endif /* LOCKF_DEBUG */
 1194                 if (lock->lf_type != F_UNLCK)
 1195                         lf_insert_lock(state, lock);
 1196         }
 1197 }
 1198 
 1199 /*
 1200  * Cancel a pending lock request, either as a result of a signal or a
 1201  * cancel request for an async lock.
 1202  */
 1203 static void
 1204 lf_cancel_lock(struct lockf *state, struct lockf_entry *lock)
 1205 {
 1206         struct lockf_entry_list granted;
 1207 
 1208         /*
 1209          * Note it is theoretically possible that cancelling this lock
 1210          * may allow some other pending lock to become
 1211          * active. Consider this case:
 1212          *
 1213          * Owner        Action          Result          Dependancies
 1214          * 
 1215          * A:           lock [0..0]     succeeds        
 1216          * B:           lock [2..2]     succeeds        
 1217          * C:           lock [1..2]     blocked         C->B
 1218          * D:           lock [0..1]     blocked         C->B,D->A,D->C
 1219          * A:           unlock [0..0]                   C->B,D->C
 1220          * C:           cancel [1..2]   
 1221          */
 1222 
 1223         LIST_REMOVE(lock, lf_link);
 1224 
 1225         /*
 1226          * Removing out-going edges is simple.
 1227          */
 1228         sx_xlock(&lf_owner_graph_lock);
 1229         lf_remove_outgoing(lock);
 1230         sx_xunlock(&lf_owner_graph_lock);
 1231 
 1232         /*
 1233          * Removing in-coming edges may allow some other lock to
 1234          * become active - we use lf_update_dependancies to figure
 1235          * this out.
 1236          */
 1237         LIST_INIT(&granted);
 1238         lf_update_dependancies(state, lock, TRUE, &granted);
 1239         lf_free_lock(lock);
 1240 
 1241         /*
 1242          * Feed any newly active locks to lf_activate_lock.
 1243          */
 1244         while (!LIST_EMPTY(&granted)) {
 1245                 lock = LIST_FIRST(&granted);
 1246                 LIST_REMOVE(lock, lf_link);
 1247                 lf_activate_lock(state, lock);
 1248         }
 1249 }
 1250 
 1251 /*
 1252  * Set a byte-range lock.
 1253  */
 1254 static int
 1255 lf_setlock(struct lockf *state, struct lockf_entry *lock, struct vnode *vp,
 1256     void **cookiep)
 1257 {
 1258         struct lockf_entry *block;
 1259         static char lockstr[] = "lockf";
 1260         int priority, error;
 1261 
 1262 #ifdef LOCKF_DEBUG
 1263         if (lockf_debug & 1)
 1264                 lf_print("lf_setlock", lock);
 1265 #endif /* LOCKF_DEBUG */
 1266 
 1267         /*
 1268          * Set the priority
 1269          */
 1270         priority = PLOCK;
 1271         if (lock->lf_type == F_WRLCK)
 1272                 priority += 4;
 1273         priority |= PCATCH;
 1274         /*
 1275          * Scan lock list for this file looking for locks that would block us.
 1276          */
 1277         while ((block = lf_getblock(state, lock))) {
 1278                 /*
 1279                  * Free the structure and return if nonblocking.
 1280                  */
 1281                 if ((lock->lf_flags & F_WAIT) == 0
 1282                     && lock->lf_async_task == NULL) {
 1283                         lf_free_lock(lock);
 1284                         error = EAGAIN;
 1285                         goto out;
 1286                 }
 1287 
 1288                 /*
 1289                  * For flock type locks, we must first remove
 1290                  * any shared locks that we hold before we sleep
 1291                  * waiting for an exclusive lock.
 1292                  */
 1293                 if ((lock->lf_flags & F_FLOCK) &&
 1294                     lock->lf_type == F_WRLCK) {
 1295                         lock->lf_type = F_UNLCK;
 1296                         lf_activate_lock(state, lock);
 1297                         lock->lf_type = F_WRLCK;
 1298                 }
 1299 
 1300                 /*
 1301                  * We are blocked. Create edges to each blocking lock,
 1302                  * checking for deadlock using the owner graph. For
 1303                  * simplicity, we run deadlock detection for all
 1304                  * locks, posix and otherwise.
 1305                  */
 1306                 sx_xlock(&lf_owner_graph_lock);
 1307                 error = lf_add_outgoing(state, lock);
 1308                 sx_xunlock(&lf_owner_graph_lock);
 1309 
 1310                 if (error) {
 1311 #ifdef LOCKF_DEBUG
 1312                         if (lockf_debug & 1)
 1313                                 lf_print("lf_setlock: deadlock", lock);
 1314 #endif
 1315                         lf_free_lock(lock);
 1316                         goto out;
 1317                 }
 1318 
 1319                 /*
 1320                  * We have added edges to everything that blocks
 1321                  * us. Sleep until they all go away.
 1322                  */
 1323                 LIST_INSERT_HEAD(&state->ls_pending, lock, lf_link);
 1324 #ifdef LOCKF_DEBUG
 1325                 if (lockf_debug & 1) {
 1326                         struct lockf_edge *e;
 1327                         LIST_FOREACH(e, &lock->lf_outedges, le_outlink) {
 1328                                 lf_print("lf_setlock: blocking on", e->le_to);
 1329                                 lf_printlist("lf_setlock", e->le_to);
 1330                         }
 1331                 }
 1332 #endif /* LOCKF_DEBUG */
 1333 
 1334                 if ((lock->lf_flags & F_WAIT) == 0) {
 1335                         /*
 1336                          * The caller requested async notification -
 1337                          * this callback happens when the blocking
 1338                          * lock is released, allowing the caller to
 1339                          * make another attempt to take the lock.
 1340                          */
 1341                         *cookiep = (void *) lock;
 1342                         error = EINPROGRESS;
 1343                         goto out;
 1344                 }
 1345 
 1346                 error = sx_sleep(lock, &state->ls_lock, priority, lockstr, 0);
 1347                 /*
 1348                  * We may have been awakened by a signal and/or by a
 1349                  * debugger continuing us (in which cases we must
 1350                  * remove our lock graph edges) and/or by another
 1351                  * process releasing a lock (in which case our edges
 1352                  * have already been removed and we have been moved to
 1353                  * the active list).
 1354                  *
 1355                  * Note that it is possible to receive a signal after
 1356                  * we were successfully woken (and moved to the active
 1357                  * list) but before we resumed execution. In this
 1358                  * case, our lf_outedges list will be clear. We
 1359                  * pretend there was no error.
 1360                  *
 1361                  * Note also, if we have been sleeping long enough, we
 1362                  * may now have incoming edges from some newer lock
 1363                  * which is waiting behind us in the queue.
 1364                  */
 1365                 if (LIST_EMPTY(&lock->lf_outedges)) {
 1366                         error = 0;
 1367                 } else {
 1368                         lf_cancel_lock(state, lock);
 1369                         goto out;
 1370                 }
 1371 #ifdef LOCKF_DEBUG
 1372                 if (lockf_debug & 1) {
 1373                         lf_print("lf_setlock: granted", lock);
 1374                 }
 1375 #endif
 1376                 goto out;
 1377         }
 1378         /*
 1379          * It looks like we are going to grant the lock. First add
 1380          * edges from any currently pending lock that the new lock
 1381          * would block.
 1382          */
 1383         sx_xlock(&lf_owner_graph_lock);
 1384         error = lf_add_incoming(state, lock);
 1385         sx_xunlock(&lf_owner_graph_lock);
 1386         if (error) {
 1387 #ifdef LOCKF_DEBUG
 1388                 if (lockf_debug & 1)
 1389                         lf_print("lf_setlock: deadlock", lock);
 1390 #endif
 1391                 lf_free_lock(lock);
 1392                 goto out;
 1393         }
 1394 
 1395         /*
 1396          * No blocks!!  Add the lock.  Note that we will
 1397          * downgrade or upgrade any overlapping locks this
 1398          * process already owns.
 1399          */
 1400         lf_activate_lock(state, lock);
 1401         error = 0;
 1402 out:
 1403         return (error);
 1404 }
 1405 
 1406 /*
 1407  * Remove a byte-range lock on an inode.
 1408  *
 1409  * Generally, find the lock (or an overlap to that lock)
 1410  * and remove it (or shrink it), then wakeup anyone we can.
 1411  */
 1412 static int
 1413 lf_clearlock(struct lockf *state, struct lockf_entry *unlock)
 1414 {
 1415         struct lockf_entry *overlap;
 1416 
 1417         overlap = LIST_FIRST(&state->ls_active);
 1418 
 1419         if (overlap == NOLOCKF)
 1420                 return (0);
 1421 #ifdef LOCKF_DEBUG
 1422         if (unlock->lf_type != F_UNLCK)
 1423                 panic("lf_clearlock: bad type");
 1424         if (lockf_debug & 1)
 1425                 lf_print("lf_clearlock", unlock);
 1426 #endif /* LOCKF_DEBUG */
 1427 
 1428         lf_activate_lock(state, unlock);
 1429 
 1430         return (0);
 1431 }
 1432 
 1433 /*
 1434  * Check whether there is a blocking lock, and if so return its
 1435  * details in '*fl'.
 1436  */
 1437 static int
 1438 lf_getlock(struct lockf *state, struct lockf_entry *lock, struct flock *fl)
 1439 {
 1440         struct lockf_entry *block;
 1441 
 1442 #ifdef LOCKF_DEBUG
 1443         if (lockf_debug & 1)
 1444                 lf_print("lf_getlock", lock);
 1445 #endif /* LOCKF_DEBUG */
 1446 
 1447         if ((block = lf_getblock(state, lock))) {
 1448                 fl->l_type = block->lf_type;
 1449                 fl->l_whence = SEEK_SET;
 1450                 fl->l_start = block->lf_start;
 1451                 if (block->lf_end == OFF_MAX)
 1452                         fl->l_len = 0;
 1453                 else
 1454                         fl->l_len = block->lf_end - block->lf_start + 1;
 1455                 fl->l_pid = block->lf_owner->lo_pid;
 1456                 fl->l_sysid = block->lf_owner->lo_sysid;
 1457         } else {
 1458                 fl->l_type = F_UNLCK;
 1459         }
 1460         return (0);
 1461 }
 1462 
 1463 /*
 1464  * Cancel an async lock request.
 1465  */
 1466 static int
 1467 lf_cancel(struct lockf *state, struct lockf_entry *lock, void *cookie)
 1468 {
 1469         struct lockf_entry *reallock;
 1470 
 1471         /*
 1472          * We need to match this request with an existing lock
 1473          * request.
 1474          */
 1475         LIST_FOREACH(reallock, &state->ls_pending, lf_link) {
 1476                 if ((void *) reallock == cookie) {
 1477                         /*
 1478                          * Double-check that this lock looks right
 1479                          * (maybe use a rolling ID for the cancel
 1480                          * cookie instead?)
 1481                          */
 1482                         if (!(reallock->lf_vnode == lock->lf_vnode
 1483                                 && reallock->lf_start == lock->lf_start
 1484                                 && reallock->lf_end == lock->lf_end)) {
 1485                                 return (ENOENT);
 1486                         }
 1487 
 1488                         /*
 1489                          * Make sure this lock was async and then just
 1490                          * remove it from its wait lists.
 1491                          */
 1492                         if (!reallock->lf_async_task) {
 1493                                 return (ENOENT);
 1494                         }
 1495 
 1496                         /*
 1497                          * Note that since any other thread must take
 1498                          * state->ls_lock before it can possibly
 1499                          * trigger the async callback, we are safe
 1500                          * from a race with lf_wakeup_lock, i.e. we
 1501                          * can free the lock (actually our caller does
 1502                          * this).
 1503                          */
 1504                         lf_cancel_lock(state, reallock);
 1505                         return (0);
 1506                 }
 1507         }
 1508 
 1509         /*
 1510          * We didn't find a matching lock - not much we can do here.
 1511          */
 1512         return (ENOENT);
 1513 }
 1514 
 1515 /*
 1516  * Walk the list of locks for an inode and
 1517  * return the first blocking lock.
 1518  */
 1519 static struct lockf_entry *
 1520 lf_getblock(struct lockf *state, struct lockf_entry *lock)
 1521 {
 1522         struct lockf_entry *overlap;
 1523 
 1524         LIST_FOREACH(overlap, &state->ls_active, lf_link) {
 1525                 /*
 1526                  * We may assume that the active list is sorted by
 1527                  * lf_start.
 1528                  */
 1529                 if (overlap->lf_start > lock->lf_end)
 1530                         break;
 1531                 if (!lf_blocks(lock, overlap))
 1532                         continue;
 1533                 return (overlap);
 1534         }
 1535         return (NOLOCKF);
 1536 }
 1537 
 1538 /*
 1539  * Walk the list of locks for an inode to find an overlapping lock (if
 1540  * any) and return a classification of that overlap.
 1541  *
 1542  * Arguments:
 1543  *      *overlap        The place in the lock list to start looking
 1544  *      lock            The lock which is being tested
 1545  *      type            Pass 'SELF' to test only locks with the same
 1546  *                      owner as lock, or 'OTHER' to test only locks
 1547  *                      with a different owner
 1548  *
 1549  * Returns one of six values:
 1550  *      0) no overlap
 1551  *      1) overlap == lock
 1552  *      2) overlap contains lock
 1553  *      3) lock contains overlap
 1554  *      4) overlap starts before lock
 1555  *      5) overlap ends after lock
 1556  *
 1557  * If there is an overlapping lock, '*overlap' is set to point at the
 1558  * overlapping lock.
 1559  *
 1560  * NOTE: this returns only the FIRST overlapping lock.  There
 1561  *       may be more than one.
 1562  */
 1563 static int
 1564 lf_findoverlap(struct lockf_entry **overlap, struct lockf_entry *lock, int type)
 1565 {
 1566         struct lockf_entry *lf;
 1567         off_t start, end;
 1568         int res;
 1569 
 1570         if ((*overlap) == NOLOCKF) {
 1571                 return (0);
 1572         }
 1573 #ifdef LOCKF_DEBUG
 1574         if (lockf_debug & 2)
 1575                 lf_print("lf_findoverlap: looking for overlap in", lock);
 1576 #endif /* LOCKF_DEBUG */
 1577         start = lock->lf_start;
 1578         end = lock->lf_end;
 1579         res = 0;
 1580         while (*overlap) {
 1581                 lf = *overlap;
 1582                 if (lf->lf_start > end)
 1583                         break;
 1584                 if (((type & SELF) && lf->lf_owner != lock->lf_owner) ||
 1585                     ((type & OTHERS) && lf->lf_owner == lock->lf_owner)) {
 1586                         *overlap = LIST_NEXT(lf, lf_link);
 1587                         continue;
 1588                 }
 1589 #ifdef LOCKF_DEBUG
 1590                 if (lockf_debug & 2)
 1591                         lf_print("\tchecking", lf);
 1592 #endif /* LOCKF_DEBUG */
 1593                 /*
 1594                  * OK, check for overlap
 1595                  *
 1596                  * Six cases:
 1597                  *      0) no overlap
 1598                  *      1) overlap == lock
 1599                  *      2) overlap contains lock
 1600                  *      3) lock contains overlap
 1601                  *      4) overlap starts before lock
 1602                  *      5) overlap ends after lock
 1603                  */
 1604                 if (start > lf->lf_end) {
 1605                         /* Case 0 */
 1606 #ifdef LOCKF_DEBUG
 1607                         if (lockf_debug & 2)
 1608                                 printf("no overlap\n");
 1609 #endif /* LOCKF_DEBUG */
 1610                         *overlap = LIST_NEXT(lf, lf_link);
 1611                         continue;
 1612                 }
 1613                 if (lf->lf_start == start && lf->lf_end == end) {
 1614                         /* Case 1 */
 1615 #ifdef LOCKF_DEBUG
 1616                         if (lockf_debug & 2)
 1617                                 printf("overlap == lock\n");
 1618 #endif /* LOCKF_DEBUG */
 1619                         res = 1;
 1620                         break;
 1621                 }
 1622                 if (lf->lf_start <= start && lf->lf_end >= end) {
 1623                         /* Case 2 */
 1624 #ifdef LOCKF_DEBUG
 1625                         if (lockf_debug & 2)
 1626                                 printf("overlap contains lock\n");
 1627 #endif /* LOCKF_DEBUG */
 1628                         res = 2;
 1629                         break;
 1630                 }
 1631                 if (start <= lf->lf_start && end >= lf->lf_end) {
 1632                         /* Case 3 */
 1633 #ifdef LOCKF_DEBUG
 1634                         if (lockf_debug & 2)
 1635                                 printf("lock contains overlap\n");
 1636 #endif /* LOCKF_DEBUG */
 1637                         res = 3;
 1638                         break;
 1639                 }
 1640                 if (lf->lf_start < start && lf->lf_end >= start) {
 1641                         /* Case 4 */
 1642 #ifdef LOCKF_DEBUG
 1643                         if (lockf_debug & 2)
 1644                                 printf("overlap starts before lock\n");
 1645 #endif /* LOCKF_DEBUG */
 1646                         res = 4;
 1647                         break;
 1648                 }
 1649                 if (lf->lf_start > start && lf->lf_end > end) {
 1650                         /* Case 5 */
 1651 #ifdef LOCKF_DEBUG
 1652                         if (lockf_debug & 2)
 1653                                 printf("overlap ends after lock\n");
 1654 #endif /* LOCKF_DEBUG */
 1655                         res = 5;
 1656                         break;
 1657                 }
 1658                 panic("lf_findoverlap: default");
 1659         }
 1660         return (res);
 1661 }
 1662 
 1663 /*
 1664  * Split an the existing 'lock1', based on the extent of the lock
 1665  * described by 'lock2'. The existing lock should cover 'lock2'
 1666  * entirely.
 1667  *
 1668  * Any pending locks which have been been unblocked are added to
 1669  * 'granted'
 1670  */
 1671 static void
 1672 lf_split(struct lockf *state, struct lockf_entry *lock1,
 1673     struct lockf_entry *lock2, struct lockf_entry_list *granted)
 1674 {
 1675         struct lockf_entry *splitlock;
 1676 
 1677 #ifdef LOCKF_DEBUG
 1678         if (lockf_debug & 2) {
 1679                 lf_print("lf_split", lock1);
 1680                 lf_print("splitting from", lock2);
 1681         }
 1682 #endif /* LOCKF_DEBUG */
 1683         /*
 1684          * Check to see if we don't need to split at all.
 1685          */
 1686         if (lock1->lf_start == lock2->lf_start) {
 1687                 lf_set_start(state, lock1, lock2->lf_end + 1, granted);
 1688                 return;
 1689         }
 1690         if (lock1->lf_end == lock2->lf_end) {
 1691                 lf_set_end(state, lock1, lock2->lf_start - 1, granted);
 1692                 return;
 1693         }
 1694         /*
 1695          * Make a new lock consisting of the last part of
 1696          * the encompassing lock.
 1697          */
 1698         splitlock = lf_alloc_lock(lock1->lf_owner);
 1699         memcpy(splitlock, lock1, sizeof *splitlock);
 1700         if (splitlock->lf_flags & F_REMOTE)
 1701                 vref(splitlock->lf_vnode);
 1702 
 1703         /*
 1704          * This cannot cause a deadlock since any edges we would add
 1705          * to splitlock already exist in lock1. We must be sure to add
 1706          * necessary dependancies to splitlock before we reduce lock1
 1707          * otherwise we may accidentally grant a pending lock that
 1708          * was blocked by the tail end of lock1.
 1709          */
 1710         splitlock->lf_start = lock2->lf_end + 1;
 1711         LIST_INIT(&splitlock->lf_outedges);
 1712         LIST_INIT(&splitlock->lf_inedges);
 1713         sx_xlock(&lf_owner_graph_lock);
 1714         lf_add_incoming(state, splitlock);
 1715         sx_xunlock(&lf_owner_graph_lock);
 1716 
 1717         lf_set_end(state, lock1, lock2->lf_start - 1, granted);
 1718 
 1719         /*
 1720          * OK, now link it in
 1721          */
 1722         lf_insert_lock(state, splitlock);
 1723 }
 1724 
 1725 struct clearlock {
 1726         STAILQ_ENTRY(clearlock) link;
 1727         struct vnode *vp;
 1728         struct flock fl;
 1729 };
 1730 STAILQ_HEAD(clearlocklist, clearlock);
 1731 
 1732 void
 1733 lf_clearremotesys(int sysid)
 1734 {
 1735         struct lockf *ls;
 1736         struct lockf_entry *lf;
 1737         struct clearlock *cl;
 1738         struct clearlocklist locks;
 1739 
 1740         KASSERT(sysid != 0, ("Can't clear local locks with F_UNLCKSYS"));
 1741 
 1742         /*
 1743          * In order to keep the locking simple, we iterate over the
 1744          * active lock lists to build a list of locks that need
 1745          * releasing. We then call VOP_ADVLOCK for each one in turn.
 1746          *
 1747          * We take an extra reference to the vnode for the duration to
 1748          * make sure it doesn't go away before we are finished.
 1749          */
 1750         STAILQ_INIT(&locks);
 1751         sx_xlock(&lf_lock_states_lock);
 1752         LIST_FOREACH(ls, &lf_lock_states, ls_link) {
 1753                 sx_xlock(&ls->ls_lock);
 1754                 LIST_FOREACH(lf, &ls->ls_active, lf_link) {
 1755                         if (lf->lf_owner->lo_sysid != sysid)
 1756                                 continue;
 1757 
 1758                         cl = malloc(sizeof(struct clearlock), M_LOCKF,
 1759                             M_WAITOK);
 1760                         cl->vp = lf->lf_vnode;
 1761                         vref(cl->vp);
 1762                         cl->fl.l_start = lf->lf_start;
 1763                         if (lf->lf_end == OFF_MAX)
 1764                                 cl->fl.l_len = 0;
 1765                         else
 1766                                 cl->fl.l_len =
 1767                                         lf->lf_end - lf->lf_start + 1;
 1768                         cl->fl.l_whence = SEEK_SET;
 1769                         cl->fl.l_type = F_UNLCK;
 1770                         cl->fl.l_pid = lf->lf_owner->lo_pid;
 1771                         cl->fl.l_sysid = sysid;
 1772                         STAILQ_INSERT_TAIL(&locks, cl, link);
 1773                 }
 1774                 sx_xunlock(&ls->ls_lock);
 1775         }
 1776         sx_xunlock(&lf_lock_states_lock);
 1777 
 1778         while ((cl = STAILQ_FIRST(&locks)) != NULL) {
 1779                 STAILQ_REMOVE_HEAD(&locks, link);
 1780                 VOP_ADVLOCK(cl->vp, 0, F_UNLCK, &cl->fl, F_REMOTE);
 1781                 vrele(cl->vp);
 1782                 free(cl, M_LOCKF);
 1783         }
 1784 }
 1785 
 1786 int
 1787 lf_countlocks(int sysid)
 1788 {
 1789         int i;
 1790         struct lock_owner *lo;
 1791         int count;
 1792 
 1793         count = 0;
 1794         sx_xlock(&lf_lock_owners_lock);
 1795         for (i = 0; i < LOCK_OWNER_HASH_SIZE; i++)
 1796                 LIST_FOREACH(lo, &lf_lock_owners[i], lo_link)
 1797                         if (lo->lo_sysid == sysid)
 1798                                 count += lo->lo_refs;
 1799         sx_xunlock(&lf_lock_owners_lock);
 1800 
 1801         return (count);
 1802 }
 1803 
 1804 #ifdef LOCKF_DEBUG
 1805 
 1806 /*
 1807  * Return non-zero if y is reachable from x using a brute force
 1808  * search. If reachable and path is non-null, return the route taken
 1809  * in path.
 1810  */
 1811 static int
 1812 graph_reaches(struct owner_vertex *x, struct owner_vertex *y,
 1813     struct owner_vertex_list *path)
 1814 {
 1815         struct owner_edge *e;
 1816 
 1817         if (x == y) {
 1818                 if (path)
 1819                         TAILQ_INSERT_HEAD(path, x, v_link);
 1820                 return 1;
 1821         }
 1822 
 1823         LIST_FOREACH(e, &x->v_outedges, e_outlink) {
 1824                 if (graph_reaches(e->e_to, y, path)) {
 1825                         if (path)
 1826                                 TAILQ_INSERT_HEAD(path, x, v_link);
 1827                         return 1;
 1828                 }
 1829         }
 1830         return 0;
 1831 }
 1832 
 1833 /*
 1834  * Perform consistency checks on the graph. Make sure the values of
 1835  * v_order are correct. If checkorder is non-zero, check no vertex can
 1836  * reach any other vertex with a smaller order.
 1837  */
 1838 static void
 1839 graph_check(struct owner_graph *g, int checkorder)
 1840 {
 1841         int i, j;
 1842 
 1843         for (i = 0; i < g->g_size; i++) {
 1844                 if (!g->g_vertices[i]->v_owner)
 1845                         continue;
 1846                 KASSERT(g->g_vertices[i]->v_order == i,
 1847                     ("lock graph vertices disordered"));
 1848                 if (checkorder) {
 1849                         for (j = 0; j < i; j++) {
 1850                                 if (!g->g_vertices[j]->v_owner)
 1851                                         continue;
 1852                                 KASSERT(!graph_reaches(g->g_vertices[i],
 1853                                         g->g_vertices[j], NULL),
 1854                                     ("lock graph vertices disordered"));
 1855                         }
 1856                 }
 1857         }
 1858 }
 1859 
 1860 static void
 1861 graph_print_vertices(struct owner_vertex_list *set)
 1862 {
 1863         struct owner_vertex *v;
 1864 
 1865         printf("{ ");
 1866         TAILQ_FOREACH(v, set, v_link) {
 1867                 printf("%d:", v->v_order);
 1868                 lf_print_owner(v->v_owner);
 1869                 if (TAILQ_NEXT(v, v_link))
 1870                         printf(", ");
 1871         }
 1872         printf(" }\n");
 1873 }
 1874 
 1875 #endif
 1876 
 1877 /*
 1878  * Calculate the sub-set of vertices v from the affected region [y..x]
 1879  * where v is reachable from y. Return -1 if a loop was detected
 1880  * (i.e. x is reachable from y, otherwise the number of vertices in
 1881  * this subset.
 1882  */
 1883 static int
 1884 graph_delta_forward(struct owner_graph *g, struct owner_vertex *x,
 1885     struct owner_vertex *y, struct owner_vertex_list *delta)
 1886 {
 1887         uint32_t gen;
 1888         struct owner_vertex *v;
 1889         struct owner_edge *e;
 1890         int n;
 1891 
 1892         /*
 1893          * We start with a set containing just y. Then for each vertex
 1894          * v in the set so far unprocessed, we add each vertex that v
 1895          * has an out-edge to and that is within the affected region
 1896          * [y..x]. If we see the vertex x on our travels, stop
 1897          * immediately.
 1898          */
 1899         TAILQ_INIT(delta);
 1900         TAILQ_INSERT_TAIL(delta, y, v_link);
 1901         v = y;
 1902         n = 1;
 1903         gen = g->g_gen;
 1904         while (v) {
 1905                 LIST_FOREACH(e, &v->v_outedges, e_outlink) {
 1906                         if (e->e_to == x)
 1907                                 return -1;
 1908                         if (e->e_to->v_order < x->v_order
 1909                             && e->e_to->v_gen != gen) {
 1910                                 e->e_to->v_gen = gen;
 1911                                 TAILQ_INSERT_TAIL(delta, e->e_to, v_link);
 1912                                 n++;
 1913                         }
 1914                 }
 1915                 v = TAILQ_NEXT(v, v_link);
 1916         }
 1917 
 1918         return (n);
 1919 }
 1920 
 1921 /*
 1922  * Calculate the sub-set of vertices v from the affected region [y..x]
 1923  * where v reaches x. Return the number of vertices in this subset.
 1924  */
 1925 static int
 1926 graph_delta_backward(struct owner_graph *g, struct owner_vertex *x,
 1927     struct owner_vertex *y, struct owner_vertex_list *delta)
 1928 {
 1929         uint32_t gen;
 1930         struct owner_vertex *v;
 1931         struct owner_edge *e;
 1932         int n;
 1933 
 1934         /*
 1935          * We start with a set containing just x. Then for each vertex
 1936          * v in the set so far unprocessed, we add each vertex that v
 1937          * has an in-edge from and that is within the affected region
 1938          * [y..x].
 1939          */
 1940         TAILQ_INIT(delta);
 1941         TAILQ_INSERT_TAIL(delta, x, v_link);
 1942         v = x;
 1943         n = 1;
 1944         gen = g->g_gen;
 1945         while (v) {
 1946                 LIST_FOREACH(e, &v->v_inedges, e_inlink) {
 1947                         if (e->e_from->v_order > y->v_order
 1948                             && e->e_from->v_gen != gen) {
 1949                                 e->e_from->v_gen = gen;
 1950                                 TAILQ_INSERT_HEAD(delta, e->e_from, v_link);
 1951                                 n++;
 1952                         }
 1953                 }
 1954                 v = TAILQ_PREV(v, owner_vertex_list, v_link);
 1955         }
 1956 
 1957         return (n);
 1958 }
 1959 
 1960 static int
 1961 graph_add_indices(int *indices, int n, struct owner_vertex_list *set)
 1962 {
 1963         struct owner_vertex *v;
 1964         int i, j;
 1965 
 1966         TAILQ_FOREACH(v, set, v_link) {
 1967                 for (i = n;
 1968                      i > 0 && indices[i - 1] > v->v_order; i--)
 1969                         ;
 1970                 for (j = n - 1; j >= i; j--)
 1971                         indices[j + 1] = indices[j];
 1972                 indices[i] = v->v_order;
 1973                 n++;
 1974         }
 1975 
 1976         return (n);
 1977 }
 1978 
 1979 static int
 1980 graph_assign_indices(struct owner_graph *g, int *indices, int nextunused,
 1981     struct owner_vertex_list *set)
 1982 {
 1983         struct owner_vertex *v, *vlowest;
 1984 
 1985         while (!TAILQ_EMPTY(set)) {
 1986                 vlowest = NULL;
 1987                 TAILQ_FOREACH(v, set, v_link) {
 1988                         if (!vlowest || v->v_order < vlowest->v_order)
 1989                                 vlowest = v;
 1990                 }
 1991                 TAILQ_REMOVE(set, vlowest, v_link);
 1992                 vlowest->v_order = indices[nextunused];
 1993                 g->g_vertices[vlowest->v_order] = vlowest;
 1994                 nextunused++;
 1995         }
 1996 
 1997         return (nextunused);
 1998 }
 1999 
 2000 static int
 2001 graph_add_edge(struct owner_graph *g, struct owner_vertex *x,
 2002     struct owner_vertex *y)
 2003 {
 2004         struct owner_edge *e;
 2005         struct owner_vertex_list deltaF, deltaB;
 2006         int nF, nB, n, vi, i;
 2007         int *indices;
 2008 
 2009         sx_assert(&lf_owner_graph_lock, SX_XLOCKED);
 2010 
 2011         LIST_FOREACH(e, &x->v_outedges, e_outlink) {
 2012                 if (e->e_to == y) {
 2013                         e->e_refs++;
 2014                         return (0);
 2015                 }
 2016         }
 2017 
 2018 #ifdef LOCKF_DEBUG
 2019         if (lockf_debug & 8) {
 2020                 printf("adding edge %d:", x->v_order);
 2021                 lf_print_owner(x->v_owner);
 2022                 printf(" -> %d:", y->v_order);
 2023                 lf_print_owner(y->v_owner);
 2024                 printf("\n");
 2025         }
 2026 #endif
 2027         if (y->v_order < x->v_order) {
 2028                 /*
 2029                  * The new edge violates the order. First find the set
 2030                  * of affected vertices reachable from y (deltaF) and
 2031                  * the set of affect vertices affected that reach x
 2032                  * (deltaB), using the graph generation number to
 2033                  * detect whether we have visited a given vertex
 2034                  * already. We re-order the graph so that each vertex
 2035                  * in deltaB appears before each vertex in deltaF.
 2036                  *
 2037                  * If x is a member of deltaF, then the new edge would
 2038                  * create a cycle. Otherwise, we may assume that
 2039                  * deltaF and deltaB are disjoint.
 2040                  */
 2041                 g->g_gen++;
 2042                 if (g->g_gen == 0) {
 2043                         /*
 2044                          * Generation wrap.
 2045                          */
 2046                         for (vi = 0; vi < g->g_size; vi++) {
 2047                                 g->g_vertices[vi]->v_gen = 0;
 2048                         }
 2049                         g->g_gen++;
 2050                 }
 2051                 nF = graph_delta_forward(g, x, y, &deltaF);
 2052                 if (nF < 0) {
 2053 #ifdef LOCKF_DEBUG
 2054                         if (lockf_debug & 8) {
 2055                                 struct owner_vertex_list path;
 2056                                 printf("deadlock: ");
 2057                                 TAILQ_INIT(&path);
 2058                                 graph_reaches(y, x, &path);
 2059                                 graph_print_vertices(&path);
 2060                         }
 2061 #endif
 2062                         return (EDEADLK);
 2063                 }
 2064 
 2065 #ifdef LOCKF_DEBUG
 2066                 if (lockf_debug & 8) {
 2067                         printf("re-ordering graph vertices\n");
 2068                         printf("deltaF = ");
 2069                         graph_print_vertices(&deltaF);
 2070                 }
 2071 #endif
 2072 
 2073                 nB = graph_delta_backward(g, x, y, &deltaB);
 2074 
 2075 #ifdef LOCKF_DEBUG
 2076                 if (lockf_debug & 8) {
 2077                         printf("deltaB = ");
 2078                         graph_print_vertices(&deltaB);
 2079                 }
 2080 #endif
 2081 
 2082                 /*
 2083                  * We first build a set of vertex indices (vertex
 2084                  * order values) that we may use, then we re-assign
 2085                  * orders first to those vertices in deltaB, then to
 2086                  * deltaF. Note that the contents of deltaF and deltaB
 2087                  * may be partially disordered - we perform an
 2088                  * insertion sort while building our index set.
 2089                  */
 2090                 indices = g->g_indexbuf;
 2091                 n = graph_add_indices(indices, 0, &deltaF);
 2092                 graph_add_indices(indices, n, &deltaB);
 2093 
 2094                 /*
 2095                  * We must also be sure to maintain the relative
 2096                  * ordering of deltaF and deltaB when re-assigning
 2097                  * vertices. We do this by iteratively removing the
 2098                  * lowest ordered element from the set and assigning
 2099                  * it the next value from our new ordering.
 2100                  */
 2101                 i = graph_assign_indices(g, indices, 0, &deltaB);
 2102                 graph_assign_indices(g, indices, i, &deltaF);
 2103 
 2104 #ifdef LOCKF_DEBUG
 2105                 if (lockf_debug & 8) {
 2106                         struct owner_vertex_list set;
 2107                         TAILQ_INIT(&set);
 2108                         for (i = 0; i < nB + nF; i++)
 2109                                 TAILQ_INSERT_TAIL(&set,
 2110                                     g->g_vertices[indices[i]], v_link);
 2111                         printf("new ordering = ");
 2112                         graph_print_vertices(&set);
 2113                 }
 2114 #endif
 2115         }
 2116 
 2117         KASSERT(x->v_order < y->v_order, ("Failed to re-order graph"));
 2118 
 2119 #ifdef LOCKF_DEBUG
 2120         if (lockf_debug & 8) {
 2121                 graph_check(g, TRUE);
 2122         }
 2123 #endif
 2124 
 2125         e = malloc(sizeof(struct owner_edge), M_LOCKF, M_WAITOK);
 2126 
 2127         LIST_INSERT_HEAD(&x->v_outedges, e, e_outlink);
 2128         LIST_INSERT_HEAD(&y->v_inedges, e, e_inlink);
 2129         e->e_refs = 1;
 2130         e->e_from = x;
 2131         e->e_to = y;
 2132 
 2133         return (0);
 2134 }
 2135 
 2136 /*
 2137  * Remove an edge x->y from the graph.
 2138  */
 2139 static void
 2140 graph_remove_edge(struct owner_graph *g, struct owner_vertex *x,
 2141     struct owner_vertex *y)
 2142 {
 2143         struct owner_edge *e;
 2144 
 2145         sx_assert(&lf_owner_graph_lock, SX_XLOCKED);
 2146 
 2147         LIST_FOREACH(e, &x->v_outedges, e_outlink) {
 2148                 if (e->e_to == y)
 2149                         break;
 2150         }
 2151         KASSERT(e, ("Removing non-existent edge from deadlock graph"));
 2152 
 2153         e->e_refs--;
 2154         if (e->e_refs == 0) {
 2155 #ifdef LOCKF_DEBUG
 2156                 if (lockf_debug & 8) {
 2157                         printf("removing edge %d:", x->v_order);
 2158                         lf_print_owner(x->v_owner);
 2159                         printf(" -> %d:", y->v_order);
 2160                         lf_print_owner(y->v_owner);
 2161                         printf("\n");
 2162                 }
 2163 #endif
 2164                 LIST_REMOVE(e, e_outlink);
 2165                 LIST_REMOVE(e, e_inlink);
 2166                 free(e, M_LOCKF);
 2167         }
 2168 }
 2169 
 2170 /*
 2171  * Allocate a vertex from the free list. Return ENOMEM if there are
 2172  * none.
 2173  */
 2174 static struct owner_vertex *
 2175 graph_alloc_vertex(struct owner_graph *g, struct lock_owner *lo)
 2176 {
 2177         struct owner_vertex *v;
 2178 
 2179         sx_assert(&lf_owner_graph_lock, SX_XLOCKED);
 2180 
 2181         v = malloc(sizeof(struct owner_vertex), M_LOCKF, M_WAITOK);
 2182         if (g->g_size == g->g_space) {
 2183                 g->g_vertices = realloc(g->g_vertices,
 2184                     2 * g->g_space * sizeof(struct owner_vertex *),
 2185                     M_LOCKF, M_WAITOK);
 2186                 free(g->g_indexbuf, M_LOCKF);
 2187                 g->g_indexbuf = malloc(2 * g->g_space * sizeof(int),
 2188                     M_LOCKF, M_WAITOK);
 2189                 g->g_space = 2 * g->g_space;
 2190         }
 2191         v->v_order = g->g_size;
 2192         v->v_gen = g->g_gen;
 2193         g->g_vertices[g->g_size] = v;
 2194         g->g_size++;
 2195 
 2196         LIST_INIT(&v->v_outedges);
 2197         LIST_INIT(&v->v_inedges);
 2198         v->v_owner = lo;
 2199 
 2200         return (v);
 2201 }
 2202 
 2203 static void
 2204 graph_free_vertex(struct owner_graph *g, struct owner_vertex *v)
 2205 {
 2206         struct owner_vertex *w;
 2207         int i;
 2208 
 2209         sx_assert(&lf_owner_graph_lock, SX_XLOCKED);
 2210         
 2211         KASSERT(LIST_EMPTY(&v->v_outedges), ("Freeing vertex with edges"));
 2212         KASSERT(LIST_EMPTY(&v->v_inedges), ("Freeing vertex with edges"));
 2213 
 2214         /*
 2215          * Remove from the graph's array and close up the gap,
 2216          * renumbering the other vertices.
 2217          */
 2218         for (i = v->v_order + 1; i < g->g_size; i++) {
 2219                 w = g->g_vertices[i];
 2220                 w->v_order--;
 2221                 g->g_vertices[i - 1] = w;
 2222         }
 2223         g->g_size--;
 2224 
 2225         free(v, M_LOCKF);
 2226 }
 2227 
 2228 static struct owner_graph *
 2229 graph_init(struct owner_graph *g)
 2230 {
 2231 
 2232         g->g_vertices = malloc(10 * sizeof(struct owner_vertex *),
 2233             M_LOCKF, M_WAITOK);
 2234         g->g_size = 0;
 2235         g->g_space = 10;
 2236         g->g_indexbuf = malloc(g->g_space * sizeof(int), M_LOCKF, M_WAITOK);
 2237         g->g_gen = 0;
 2238 
 2239         return (g);
 2240 }
 2241 
 2242 #ifdef LOCKF_DEBUG
 2243 /*
 2244  * Print description of a lock owner
 2245  */
 2246 static void
 2247 lf_print_owner(struct lock_owner *lo)
 2248 {
 2249 
 2250         if (lo->lo_flags & F_REMOTE) {
 2251                 printf("remote pid %d, system %d",
 2252                     lo->lo_pid, lo->lo_sysid);
 2253         } else if (lo->lo_flags & F_FLOCK) {
 2254                 printf("file %p", lo->lo_id);
 2255         } else {
 2256                 printf("local pid %d", lo->lo_pid);
 2257         }
 2258 }
 2259 
 2260 /*
 2261  * Print out a lock.
 2262  */
 2263 static void
 2264 lf_print(char *tag, struct lockf_entry *lock)
 2265 {
 2266 
 2267         printf("%s: lock %p for ", tag, (void *)lock);
 2268         lf_print_owner(lock->lf_owner);
 2269         if (lock->lf_inode != (struct inode *)0)
 2270                 printf(" in ino %ju on dev <%s>,",
 2271                     (uintmax_t)lock->lf_inode->i_number,
 2272                     devtoname(lock->lf_inode->i_dev));
 2273         printf(" %s, start %jd, end ",
 2274             lock->lf_type == F_RDLCK ? "shared" :
 2275             lock->lf_type == F_WRLCK ? "exclusive" :
 2276             lock->lf_type == F_UNLCK ? "unlock" : "unknown",
 2277             (intmax_t)lock->lf_start);
 2278         if (lock->lf_end == OFF_MAX)
 2279                 printf("EOF");
 2280         else
 2281                 printf("%jd", (intmax_t)lock->lf_end);
 2282         if (!LIST_EMPTY(&lock->lf_outedges))
 2283                 printf(" block %p\n",
 2284                     (void *)LIST_FIRST(&lock->lf_outedges)->le_to);
 2285         else
 2286                 printf("\n");
 2287 }
 2288 
 2289 static void
 2290 lf_printlist(char *tag, struct lockf_entry *lock)
 2291 {
 2292         struct lockf_entry *lf, *blk;
 2293         struct lockf_edge *e;
 2294 
 2295         if (lock->lf_inode == (struct inode *)0)
 2296                 return;
 2297 
 2298         printf("%s: Lock list for ino %ju on dev <%s>:\n",
 2299             tag, (uintmax_t)lock->lf_inode->i_number,
 2300             devtoname(lock->lf_inode->i_dev));
 2301         LIST_FOREACH(lf, &lock->lf_inode->i_lockf->ls_active, lf_link) {
 2302                 printf("\tlock %p for ",(void *)lf);
 2303                 lf_print_owner(lock->lf_owner);
 2304                 printf(", %s, start %jd, end %jd",
 2305                     lf->lf_type == F_RDLCK ? "shared" :
 2306                     lf->lf_type == F_WRLCK ? "exclusive" :
 2307                     lf->lf_type == F_UNLCK ? "unlock" :
 2308                     "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
 2309                 LIST_FOREACH(e, &lf->lf_outedges, le_outlink) {
 2310                         blk = e->le_to;
 2311                         printf("\n\t\tlock request %p for ", (void *)blk);
 2312                         lf_print_owner(blk->lf_owner);
 2313                         printf(", %s, start %jd, end %jd",
 2314                             blk->lf_type == F_RDLCK ? "shared" :
 2315                             blk->lf_type == F_WRLCK ? "exclusive" :
 2316                             blk->lf_type == F_UNLCK ? "unlock" :
 2317                             "unknown", (intmax_t)blk->lf_start,
 2318                             (intmax_t)blk->lf_end);
 2319                         if (!LIST_EMPTY(&blk->lf_inedges))
 2320                                 panic("lf_printlist: bad list");
 2321                 }
 2322                 printf("\n");
 2323         }
 2324 }
 2325 #endif /* LOCKF_DEBUG */

Cache object: b76850d0629ce9dff52d136d824d09e2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.