[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]

FreeBSD/Linux Kernel Cross Reference
sys/nlm/nlm_prot_impl.c

Version: -  FREEBSD  -  FREEBSD7  -  FREEBSD70  -  FREEBSD6  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  OPENSOLARIS  -  minix-3-1-1  -  TRUSTEDBSD-SEBSD  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

  1 /*-
  2  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
  3  * Authors: Doug Rabson <dfr@rabson.org>
  4  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
  5  *
  6  * Redistribution and use in source and binary forms, with or without
  7  * modification, are permitted provided that the following conditions
  8  * are met:
  9  * 1. Redistributions of source code must retain the above copyright
 10  *    notice, this list of conditions and the following disclaimer.
 11  * 2. Redistributions in binary form must reproduce the above copyright
 12  *    notice, this list of conditions and the following disclaimer in the
 13  *    documentation and/or other materials provided with the distribution.
 14  *
 15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 25  * SUCH DAMAGE.
 26  */
 27 
 28 #include "opt_inet6.h"
 29 #include "opt_nfs.h"
 30 
 31 #include <sys/cdefs.h>
 32 __FBSDID("$FreeBSD: src/sys/nlm/nlm_prot_impl.c,v 1.15 2008/11/03 10:38:00 dfr Exp $");
 33 
 34 #include <sys/param.h>
 35 #include <sys/fcntl.h>
 36 #include <sys/kernel.h>
 37 #include <sys/kthread.h>
 38 #include <sys/lockf.h>
 39 #include <sys/malloc.h>
 40 #include <sys/mount.h>
 41 #if __FreeBSD_version >= 700000
 42 #include <sys/priv.h>
 43 #endif
 44 #include <sys/proc.h>
 45 #include <sys/socket.h>
 46 #include <sys/socketvar.h>
 47 #include <sys/syscall.h>
 48 #include <sys/sysctl.h>
 49 #include <sys/sysent.h>
 50 #include <sys/sysproto.h>
 51 #include <sys/systm.h>
 52 #include <sys/taskqueue.h>
 53 #include <sys/unistd.h>
 54 #include <sys/vnode.h>
 55 
 56 #include <nfs/nfsproto.h>
 57 #include <nfsclient/nfs.h>
 58 #include <nfsclient/nfsnode.h>
 59 
 60 #include <nlm/nlm_prot.h>
 61 #include <nlm/sm_inter.h>
 62 #include <nlm/nlm.h>
 63 #include <rpc/rpc_com.h>
 64 #include <rpc/rpcb_prot.h>
 65 
 66 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager");
 67 
 68 /*
 69  * If a host is inactive (and holds no locks) for this amount of
 70  * seconds, we consider it idle and stop tracking it.
 71  */
 72 #define NLM_IDLE_TIMEOUT        30
 73 
 74 /*
 75  * We check the host list for idle every few seconds.
 76  */
 77 #define NLM_IDLE_PERIOD         5
 78 
 79 /*
 80  * Support for sysctl vfs.nlm.sysid
 81  */
 82 SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW, NULL, "Network Lock Manager");
 83 SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid, CTLFLAG_RW, NULL, "");
 84 
 85 /*
 86  * Syscall hooks
 87  */
 88 static int nlm_syscall_offset = SYS_nlm_syscall;
 89 static struct sysent nlm_syscall_prev_sysent;
 90 #if __FreeBSD_version < 700000
 91 static struct sysent nlm_syscall_sysent = {
 92         (sizeof(struct nlm_syscall_args) / sizeof(register_t)) | SYF_MPSAFE,
 93         (sy_call_t *) nlm_syscall
 94 };
 95 #else
 96 MAKE_SYSENT(nlm_syscall);
 97 #endif
 98 static bool_t nlm_syscall_registered = FALSE;
 99 
100 /*
101  * Debug level passed in from userland. We also support a sysctl hook
102  * so that it can be changed on a live system.
103  */
104 static int nlm_debug_level;
105 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, "");
106 
107 /*
108  * Grace period handling. The value of nlm_grace_threshold is the
109  * value of time_uptime after which we are serving requests normally.
110  */
111 static time_t nlm_grace_threshold;
112 
113 /*
114  * We check for idle hosts if time_uptime is greater than
115  * nlm_next_idle_check,
116  */
117 static time_t nlm_next_idle_check;
118 
119 /*
120  * A socket to use for RPC - shared by all IPv4 RPC clients.
121  */
122 static struct socket *nlm_socket;
123 
124 #ifdef INET6
125 
126 /*
127  * A socket to use for RPC - shared by all IPv6 RPC clients.
128  */
129 static struct socket *nlm_socket6;
130 
131 #endif
132 
133 /*
134  * An RPC client handle that can be used to communicate with the local
135  * NSM.
136  */
137 static CLIENT *nlm_nsm;
138 
139 /*
140  * An AUTH handle for the server's creds.
141  */
142 static AUTH *nlm_auth;
143 
144 /*
145  * A zero timeval for sending async RPC messages.
146  */
147 struct timeval nlm_zero_tv = { 0, 0 };
148 
149 /*
150  * The local NSM state number
151  */
152 int nlm_nsm_state;
153 
154 
155 /*
156  * A lock to protect the host list and waiting lock list.
157  */
158 static struct mtx nlm_global_lock;
159 
160 /*
161  * Locks:
162  * (l)          locked by nh_lock
163  * (s)          only accessed via server RPC which is single threaded
164  * (g)          locked by nlm_global_lock
165  * (c)          const until freeing
166  * (a)          modified using atomic ops
167  */
168 
169 /*
170  * A pending client-side lock request, stored on the nlm_waiting_locks
171  * list.
172  */
173 struct nlm_waiting_lock {
174         TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */
175         bool_t          nw_waiting;            /* (g) */
176         nlm4_lock       nw_lock;               /* (c) */
177         union nfsfh     nw_fh;                 /* (c) */
178         struct vnode    *nw_vp;                /* (c) */
179 };
180 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock);
181 
182 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */
183 
184 /*
185  * A pending server-side asynchronous lock request, stored on the
186  * nh_pending list of the NLM host.
187  */
188 struct nlm_async_lock {
189         TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */
190         struct task     af_task;        /* (c) async callback details */
191         void            *af_cookie;     /* (l) lock manager cancel token */
192         struct vnode    *af_vp;         /* (l) vnode to lock */
193         struct flock    af_fl;          /* (c) lock details */
194         struct nlm_host *af_host;       /* (c) host which is locking */
195         CLIENT          *af_rpc;        /* (c) rpc client to send message */
196         nlm4_testargs   af_granted;     /* (c) notification details */
197 };
198 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock);
199 
200 /*
201  * NLM host.
202  */
203 enum nlm_host_state {
204         NLM_UNMONITORED,
205         NLM_MONITORED,
206         NLM_MONITOR_FAILED,
207         NLM_RECOVERING
208 };
209 
210 struct nlm_rpc {
211         CLIENT          *nr_client;    /* (l) RPC client handle */
212         time_t          nr_create_time; /* (l) when client was created */
213 };
214 
215 struct nlm_host {
216         struct mtx      nh_lock;
217         volatile u_int  nh_refs;       /* (a) reference count */
218         TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */
219         char            nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */
220         uint32_t        nh_sysid;        /* (c) our allocaed system ID */
221         char            nh_sysid_string[10]; /* (c) string rep. of sysid */
222         struct sockaddr_storage nh_addr; /* (s) remote address of host */
223         struct nlm_rpc  nh_srvrpc;       /* (l) RPC for server replies */
224         struct nlm_rpc  nh_clntrpc;      /* (l) RPC for client requests */
225         rpcvers_t       nh_vers;         /* (s) NLM version of host */
226         int             nh_state;        /* (s) last seen NSM state of host */
227         enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */
228         time_t          nh_idle_timeout; /* (s) Time at which host is idle */
229         struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */
230         struct nlm_async_lock_list nh_pending; /* (l) pending async locks */
231         struct nlm_async_lock_list nh_finished; /* (l) finished async locks */
232 };
233 TAILQ_HEAD(nlm_host_list, nlm_host);
234 
235 static struct nlm_host_list nlm_hosts; /* (g) */
236 static uint32_t nlm_next_sysid = 1;    /* (g) */
237 
238 static void     nlm_host_unmonitor(struct nlm_host *);
239 
240 /**********************************************************************/
241 
242 /*
243  * Initialise NLM globals.
244  */
245 static void
246 nlm_init(void *dummy)
247 {
248         int error;
249 
250         mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF);
251         TAILQ_INIT(&nlm_waiting_locks);
252         TAILQ_INIT(&nlm_hosts);
253 
254         error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent,
255             &nlm_syscall_prev_sysent);
256         if (error)
257                 printf("Can't register NLM syscall\n");
258         else
259                 nlm_syscall_registered = TRUE;
260 }
261 SYSINIT(nlm_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_init, NULL);
262 
263 static void
264 nlm_uninit(void *dummy)
265 {
266 
267         if (nlm_syscall_registered)
268                 syscall_deregister(&nlm_syscall_offset,
269                     &nlm_syscall_prev_sysent);
270 }
271 SYSUNINIT(nlm_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_uninit, NULL);
272 
273 /*
274  * Copy a struct netobj.
275  */ 
276 void
277 nlm_copy_netobj(struct netobj *dst, struct netobj *src,
278     struct malloc_type *type)
279 {
280 
281         dst->n_len = src->n_len;
282         dst->n_bytes = malloc(src->n_len, type, M_WAITOK);
283         memcpy(dst->n_bytes, src->n_bytes, src->n_len);
284 }
285 
286 /*
287  * Create an RPC client handle for the given (address,prog,vers)
288  * triple using UDP.
289  */
290 static CLIENT *
291 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers)
292 {
293         char *wchan = "nlmrcv";
294         const char* protofmly;
295         struct sockaddr_storage ss;
296         struct socket *so;
297         CLIENT *rpcb;
298         struct timeval timo;
299         RPCB parms;
300         char *uaddr;
301         enum clnt_stat stat = RPC_SUCCESS;
302         int rpcvers = RPCBVERS4;
303         bool_t do_tcp = FALSE;
304         struct portmap mapping;
305         u_short port = 0;
306 
307         /*
308          * First we need to contact the remote RPCBIND service to find
309          * the right port.
310          */
311         memcpy(&ss, sa, sa->sa_len);
312         switch (ss.ss_family) {
313         case AF_INET:
314                 ((struct sockaddr_in *)&ss)->sin_port = htons(111);
315                 protofmly = "inet";
316                 so = nlm_socket;
317                 break;
318                 
319 #ifdef INET6
320         case AF_INET6:
321                 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(111);
322                 protofmly = "inet6";
323                 so = nlm_socket6;
324                 break;
325 #endif
326 
327         default:
328                 /*
329                  * Unsupported address family - fail.
330                  */
331                 return (NULL);
332         }
333 
334         rpcb = clnt_dg_create(so, (struct sockaddr *)&ss,
335             RPCBPROG, rpcvers, 0, 0);
336         if (!rpcb)
337                 return (NULL);
338 
339 try_tcp:
340         parms.r_prog = prog;
341         parms.r_vers = vers;
342         if (do_tcp)
343                 parms.r_netid = "tcp";
344         else
345                 parms.r_netid = "udp";
346         parms.r_addr = "";
347         parms.r_owner = "";
348 
349         /*
350          * Use the default timeout.
351          */
352         timo.tv_sec = 25;
353         timo.tv_usec = 0;
354 again:
355         switch (rpcvers) {
356         case RPCBVERS4:
357         case RPCBVERS:
358                 /*
359                  * Try RPCBIND 4 then 3.
360                  */
361                 uaddr = NULL;
362                 stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR,
363                     (xdrproc_t) xdr_rpcb, &parms,
364                     (xdrproc_t) xdr_wrapstring, &uaddr, timo);
365                 if (stat == RPC_PROGVERSMISMATCH) {
366                         if (rpcvers == RPCBVERS4)
367                                 rpcvers = RPCBVERS;
368                         else if (rpcvers == RPCBVERS)
369                                 rpcvers = PMAPVERS;
370                         CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers);
371                         goto again;
372                 } else if (stat == RPC_SUCCESS) {
373                         /*
374                          * We have a reply from the remote RPCBIND - turn it
375                          * into an appropriate address and make a new client
376                          * that can talk to the remote NLM.
377                          *
378                          * XXX fixup IPv6 scope ID.
379                          */
380                         struct netbuf *a;
381                         a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr);
382                         if (!a) {
383                                 CLNT_DESTROY(rpcb);
384                                 return (NULL);
385                         }
386                         memcpy(&ss, a->buf, a->len);
387                         free(a->buf, M_RPC);
388                         free(a, M_RPC);
389                         xdr_free((xdrproc_t) xdr_wrapstring, &uaddr);
390                 }
391                 break;
392         case PMAPVERS:
393                 /*
394                  * Try portmap.
395                  */
396                 mapping.pm_prog = parms.r_prog;
397                 mapping.pm_vers = parms.r_vers;
398                 mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP;
399                 mapping.pm_port = 0;
400 
401                 stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT,
402                     (xdrproc_t) xdr_portmap, &mapping,
403                     (xdrproc_t) xdr_u_short, &port, timo);
404 
405                 if (stat == RPC_SUCCESS) {
406                         switch (ss.ss_family) {
407                         case AF_INET:
408                                 ((struct sockaddr_in *)&ss)->sin_port =
409                                         htons(port);
410                                 break;
411                 
412 #ifdef INET6
413                         case AF_INET6:
414                                 ((struct sockaddr_in6 *)&ss)->sin6_port =
415                                         htons(port);
416                                 break;
417 #endif
418                         }
419                 }
420                 break;
421         default:
422                 panic("invalid rpcvers %d", rpcvers);
423         }
424         /*
425          * We may have a positive response from the portmapper, but the NLM
426          * service was not found. Make sure we received a valid port.
427          */
428         switch (ss.ss_family) {
429         case AF_INET:
430                 port = ((struct sockaddr_in *)&ss)->sin_port;
431                 break;
432 #ifdef INET6
433         case AF_INET6:
434                 port = ((struct sockaddr_in6 *)&ss)->sin6_port;
435                 break;
436 #endif
437         }
438         if (stat != RPC_SUCCESS || !port) {
439                 /*
440                  * If we were able to talk to rpcbind or portmap, but the udp
441                  * variant wasn't available, ask about tcp.
442                  *
443                  * XXX - We could also check for a TCP portmapper, but
444                  * if the host is running a portmapper at all, we should be able
445                  * to hail it over UDP.
446                  */
447                 if (stat == RPC_SUCCESS && !do_tcp) {
448                         do_tcp = TRUE;
449                         goto try_tcp;
450                 }
451 
452                 /* Otherwise, bad news. */
453                 printf("NLM: failed to contact remote rpcbind, "
454                     "stat = %d, port = %d\n",
455                     (int) stat, port);
456                 CLNT_DESTROY(rpcb);
457                 return (NULL);
458         }
459 
460         if (do_tcp) {
461                 /*
462                  * Destroy the UDP client we used to speak to rpcbind and
463                  * recreate as a TCP client.
464                  */
465                 struct netconfig *nconf = NULL;
466 
467                 CLNT_DESTROY(rpcb);
468 
469                 switch (ss.ss_family) {
470                 case AF_INET:
471                         nconf = getnetconfigent("tcp");
472                         break;
473 #ifdef INET6
474                 case AF_INET6:
475                         nconf = getnetconfigent("tcp6");
476                         break;
477 #endif
478                 }
479 
480                 rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss,
481                     prog, vers, 0, 0);
482                 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan);
483                 rpcb->cl_auth = nlm_auth;
484                 
485         } else {
486                 /*
487                  * Re-use the client we used to speak to rpcbind.
488                  */
489                 CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss);
490                 CLNT_CONTROL(rpcb, CLSET_PROG, &prog);
491                 CLNT_CONTROL(rpcb, CLSET_VERS, &vers);
492                 CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan);
493                 rpcb->cl_auth = nlm_auth;
494         }
495 
496         return (rpcb);
497 }
498 
499 /*
500  * This async callback after when an async lock request has been
501  * granted. We notify the host which initiated the request.
502  */
503 static void
504 nlm_lock_callback(void *arg, int pending)
505 {
506         struct nlm_async_lock *af = (struct nlm_async_lock *) arg;
507         struct rpc_callextra ext;
508 
509         if (nlm_debug_level >= 2)
510                 printf("NLM: async lock %p for %s (sysid %d) granted\n",
511                     af, af->af_host->nh_caller_name,
512                     af->af_host->nh_sysid);
513 
514         /*
515          * Send the results back to the host.
516          *
517          * Note: there is a possible race here with nlm_host_notify
518          * destroying the RPC client. To avoid problems, the first
519          * thing nlm_host_notify does is to cancel pending async lock
520          * requests.
521          */
522         memset(&ext, 0, sizeof(ext));
523         ext.rc_auth = nlm_auth;
524         if (af->af_host->nh_vers == NLM_VERS4) {
525                 nlm4_granted_msg_4(&af->af_granted,
526                     NULL, af->af_rpc, &ext, nlm_zero_tv);
527         } else {
528                 /*
529                  * Back-convert to legacy protocol
530                  */
531                 nlm_testargs granted;
532                 granted.cookie = af->af_granted.cookie;
533                 granted.exclusive = af->af_granted.exclusive;
534                 granted.alock.caller_name =
535                         af->af_granted.alock.caller_name;
536                 granted.alock.fh = af->af_granted.alock.fh;
537                 granted.alock.oh = af->af_granted.alock.oh;
538                 granted.alock.svid = af->af_granted.alock.svid;
539                 granted.alock.l_offset =
540                         af->af_granted.alock.l_offset;
541                 granted.alock.l_len =
542                         af->af_granted.alock.l_len;
543 
544                 nlm_granted_msg_1(&granted,
545                     NULL, af->af_rpc, &ext, nlm_zero_tv);
546         }
547 
548         /*
549          * Move this entry to the nh_finished list. Someone else will
550          * free it later - its too hard to do it here safely without
551          * racing with cancel.
552          *
553          * XXX possibly we should have a third "granted sent but not
554          * ack'ed" list so that we can re-send the granted message.
555          */
556         mtx_lock(&af->af_host->nh_lock);
557         TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link);
558         TAILQ_INSERT_TAIL(&af->af_host->nh_finished, af, af_link);
559         mtx_unlock(&af->af_host->nh_lock);
560 }
561 
562 /*
563  * Free an async lock request. The request must have been removed from
564  * any list.
565  */
566 static void
567 nlm_free_async_lock(struct nlm_async_lock *af)
568 {
569         /*
570          * Free an async lock.
571          */
572         if (af->af_rpc)
573                 CLNT_RELEASE(af->af_rpc);
574         xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted);
575         if (af->af_vp)
576                 vrele(af->af_vp);
577         free(af, M_NLM);
578 }
579 
580 /*
581  * Cancel our async request - this must be called with
582  * af->nh_host->nh_lock held. This is slightly complicated by a
583  * potential race with our own callback. If we fail to cancel the
584  * lock, it must already have been granted - we make sure our async
585  * task has completed by calling taskqueue_drain in this case.
586  */
587 static int
588 nlm_cancel_async_lock(struct nlm_async_lock *af)
589 {
590         struct nlm_host *host = af->af_host;
591         int error;
592 
593         mtx_assert(&host->nh_lock, MA_OWNED);
594 
595         mtx_unlock(&host->nh_lock);
596 
597         error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl,
598             F_REMOTE, NULL, &af->af_cookie);
599 
600         if (error) {
601                 /*
602                  * We failed to cancel - make sure our callback has
603                  * completed before we continue.
604                  */
605                 taskqueue_drain(taskqueue_thread, &af->af_task);
606         }
607 
608         mtx_lock(&host->nh_lock);
609         
610         if (!error) {
611                 if (nlm_debug_level >= 2)
612                         printf("NLM: async lock %p for %s (sysid %d) "
613                             "cancelled\n",
614                             af, host->nh_caller_name, host->nh_sysid);
615 
616                 /*
617                  * Remove from the nh_pending list and free now that
618                  * we are safe from the callback.
619                  */
620                 TAILQ_REMOVE(&host->nh_pending, af, af_link);
621                 mtx_unlock(&host->nh_lock);
622                 nlm_free_async_lock(af);
623                 mtx_lock(&host->nh_lock);
624         }
625 
626         return (error);
627 }
628 
629 static void
630 nlm_free_finished_locks(struct nlm_host *host)
631 {
632         struct nlm_async_lock *af;
633 
634         mtx_lock(&host->nh_lock);
635         while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) {
636                 TAILQ_REMOVE(&host->nh_finished, af, af_link);
637                 mtx_unlock(&host->nh_lock);
638                 nlm_free_async_lock(af);
639                 mtx_lock(&host->nh_lock);
640         }
641         mtx_unlock(&host->nh_lock);
642 }
643 
644 /*
645  * Free resources used by a host. This is called after the reference
646  * count has reached zero so it doesn't need to worry about locks.
647  */
648 static void
649 nlm_host_destroy(struct nlm_host *host)
650 {
651 
652         mtx_lock(&nlm_global_lock);
653         TAILQ_REMOVE(&nlm_hosts, host, nh_link);
654         mtx_unlock(&nlm_global_lock);
655 
656         if (host->nh_srvrpc.nr_client)
657                 CLNT_RELEASE(host->nh_srvrpc.nr_client);
658         if (host->nh_clntrpc.nr_client)
659                 CLNT_RELEASE(host->nh_clntrpc.nr_client);
660         mtx_destroy(&host->nh_lock);
661         sysctl_ctx_free(&host->nh_sysctl);
662         free(host, M_NLM);
663 }
664 
665 #ifdef NFSCLIENT
666 
667 /*
668  * Thread start callback for client lock recovery
669  */
670 static void
671 nlm_client_recovery_start(void *arg)
672 {
673         struct nlm_host *host = (struct nlm_host *) arg;
674 
675         if (nlm_debug_level >= 1)
676                 printf("NLM: client lock recovery for %s started\n",
677                     host->nh_caller_name);
678 
679         nlm_client_recovery(host);
680 
681         if (nlm_debug_level >= 1)
682                 printf("NLM: client lock recovery for %s completed\n",
683                     host->nh_caller_name);
684 
685         host->nh_monstate = NLM_MONITORED;
686         nlm_host_release(host);
687 
688         kthread_exit();
689 }
690 
691 #endif
692 
693 /*
694  * This is called when we receive a host state change notification. We
695  * unlock any active locks owned by the host. When rpc.lockd is
696  * shutting down, this function is called with newstate set to zero
697  * which allows us to cancel any pending async locks and clear the
698  * locking state.
699  */
700 static void
701 nlm_host_notify(struct nlm_host *host, int newstate)
702 {
703         struct nlm_async_lock *af;
704 
705         if (newstate) {
706                 if (nlm_debug_level >= 1)
707                         printf("NLM: host %s (sysid %d) rebooted, new "
708                             "state is %d\n",
709                             host->nh_caller_name, host->nh_sysid, newstate);
710         }
711 
712         /*
713          * Cancel any pending async locks for this host.
714          */
715         mtx_lock(&host->nh_lock);
716         while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) {
717                 /*
718                  * nlm_cancel_async_lock will remove the entry from
719                  * nh_pending and free it.
720                  */
721                 nlm_cancel_async_lock(af);
722         }
723         mtx_unlock(&host->nh_lock);
724         nlm_free_finished_locks(host);
725 
726         /*
727          * The host just rebooted - trash its locks.
728          */
729         lf_clearremotesys(host->nh_sysid);
730         host->nh_state = newstate;
731 
732 #ifdef NFSCLIENT
733         /*
734          * If we have any remote locks for this host (i.e. it
735          * represents a remote NFS server that our local NFS client
736          * has locks for), start a recovery thread.
737          */
738         if (newstate != 0
739             && host->nh_monstate != NLM_RECOVERING
740             && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) {
741                 struct thread *td;
742                 host->nh_monstate = NLM_RECOVERING;
743                 refcount_acquire(&host->nh_refs);
744                 kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0,
745                     "NFS lock recovery for %s", host->nh_caller_name);
746         }
747 #endif
748 }
749 
750 /*
751  * Sysctl handler to count the number of locks for a sysid.
752  */
753 static int
754 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS)
755 {
756         struct nlm_host *host;
757         int count;
758 
759         host = oidp->oid_arg1;
760         count = lf_countlocks(host->nh_sysid);
761         return sysctl_handle_int(oidp, &count, 0, req);
762 }
763 
764 /*
765  * Sysctl handler to count the number of client locks for a sysid.
766  */
767 static int
768 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS)
769 {
770         struct nlm_host *host;
771         int count;
772 
773         host = oidp->oid_arg1;
774         count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid);
775         return sysctl_handle_int(oidp, &count, 0, req);
776 }
777 
778 /*
779  * Create a new NLM host.
780  */
781 static struct nlm_host *
782 nlm_create_host(const char* caller_name)
783 {
784         struct nlm_host *host;
785         struct sysctl_oid *oid;
786 
787         mtx_assert(&nlm_global_lock, MA_OWNED);
788 
789         if (nlm_debug_level >= 1)
790                 printf("NLM: new host %s (sysid %d)\n",
791                     caller_name, nlm_next_sysid);
792         host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO);
793         if (!host)
794                 return (NULL);
795         mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF);
796         host->nh_refs = 1;
797         strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN);
798         host->nh_sysid = nlm_next_sysid++;
799         snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string),
800                 "%d", host->nh_sysid);
801         host->nh_vers = 0;
802         host->nh_state = 0;
803         host->nh_monstate = NLM_UNMONITORED;
804         TAILQ_INIT(&host->nh_pending);
805         TAILQ_INIT(&host->nh_finished);
806         TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link);
807 
808         mtx_unlock(&nlm_global_lock);
809 
810         sysctl_ctx_init(&host->nh_sysctl);
811         oid = SYSCTL_ADD_NODE(&host->nh_sysctl,
812             SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid),
813             OID_AUTO, host->nh_sysid_string, CTLFLAG_RD, NULL, "");
814         SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
815             "hostname", CTLFLAG_RD, host->nh_caller_name, 0, "");
816         SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
817             "version", CTLFLAG_RD, &host->nh_vers, 0, "");
818         SYSCTL_ADD_INT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
819             "monitored", CTLFLAG_RD, &host->nh_monstate, 0, "");
820         SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
821             "lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0,
822             nlm_host_lock_count_sysctl, "I", "");
823         SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
824             "client_lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0,
825             nlm_host_client_lock_count_sysctl, "I", "");
826 
827         mtx_lock(&nlm_global_lock);
828 
829         return (host);
830 }
831 
832 /*
833  * Return non-zero if the address parts of the two sockaddrs are the
834  * same.
835  */
836 static int
837 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b)
838 {
839         const struct sockaddr_in *a4, *b4;
840 #ifdef INET6
841         const struct sockaddr_in6 *a6, *b6;
842 #endif
843 
844         if (a->sa_family != b->sa_family)
845                 return (FALSE);
846 
847         switch (a->sa_family) {
848         case AF_INET:
849                 a4 = (const struct sockaddr_in *) a;
850                 b4 = (const struct sockaddr_in *) b;
851                 return !memcmp(&a4->sin_addr, &b4->sin_addr,
852                     sizeof(a4->sin_addr));
853 #ifdef INET6
854         case AF_INET6:
855                 a6 = (const struct sockaddr_in6 *) a;
856                 b6 = (const struct sockaddr_in6 *) b;
857                 return !memcmp(&a6->sin6_addr, &b6->sin6_addr,
858                     sizeof(a6->sin6_addr));
859 #endif
860         }
861 
862         return (0);
863 }
864 
865 /*
866  * Check for idle hosts and stop monitoring them. We could also free
867  * the host structure here, possibly after a larger timeout but that
868  * would require some care to avoid races with
869  * e.g. nlm_host_lock_count_sysctl.
870  */
871 static void
872 nlm_check_idle(void)
873 {
874         struct nlm_host *host;
875 
876         mtx_assert(&nlm_global_lock, MA_OWNED);
877 
878         if (time_uptime <= nlm_next_idle_check)
879                 return;
880 
881         nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD;
882 
883         TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
884                 if (host->nh_monstate == NLM_MONITORED
885                     && time_uptime > host->nh_idle_timeout) {
886                         mtx_unlock(&nlm_global_lock);
887                         if (lf_countlocks(host->nh_sysid) > 0
888                             || lf_countlocks(NLM_SYSID_CLIENT
889                                 + host->nh_sysid)) {
890                                 host->nh_idle_timeout =
891                                         time_uptime + NLM_IDLE_TIMEOUT;
892                                 mtx_lock(&nlm_global_lock);
893                                 continue;
894                         }
895                         nlm_host_unmonitor(host);
896                         mtx_lock(&nlm_global_lock);
897                 } 
898         }
899 }
900 
901 /*
902  * Search for an existing NLM host that matches the given name
903  * (typically the caller_name element of an nlm4_lock).  If none is
904  * found, create a new host. If 'addr' is non-NULL, record the remote
905  * address of the host so that we can call it back for async
906  * responses. If 'vers' is greater than zero then record the NLM
907  * program version to use to communicate with this client.
908  */
909 struct nlm_host *
910 nlm_find_host_by_name(const char *name, const struct sockaddr *addr,
911     rpcvers_t vers)
912 {
913         struct nlm_host *host;
914 
915         mtx_lock(&nlm_global_lock);
916 
917         /*
918          * The remote host is determined by caller_name.
919          */
920         TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
921                 if (!strcmp(host->nh_caller_name, name))
922                         break;
923         }
924 
925         if (!host) {
926                 host = nlm_create_host(name);
927                 if (!host) {
928                         mtx_unlock(&nlm_global_lock);
929                         return (NULL);
930                 }
931         }
932         refcount_acquire(&host->nh_refs);
933 
934         host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT;
935 
936         /*
937          * If we have an address for the host, record it so that we
938          * can send async replies etc.
939          */
940         if (addr) {
941                 
942                 KASSERT(addr->sa_len < sizeof(struct sockaddr_storage),
943                     ("Strange remote transport address length"));
944 
945                 /*
946                  * If we have seen an address before and we currently
947                  * have an RPC client handle, make sure the address is
948                  * the same, otherwise discard the client handle.
949                  */
950                 if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) {
951                         if (!nlm_compare_addr(
952                                     (struct sockaddr *) &host->nh_addr,
953                                     addr)
954                             || host->nh_vers != vers) {
955                                 CLIENT *client;
956                                 mtx_lock(&host->nh_lock);
957                                 client = host->nh_srvrpc.nr_client;
958                                 host->nh_srvrpc.nr_client = NULL;
959                                 mtx_unlock(&host->nh_lock);
960                                 if (client) {
961                                         CLNT_RELEASE(client);
962                                 }
963                         }
964                 }
965                 memcpy(&host->nh_addr, addr, addr->sa_len);
966                 host->nh_vers = vers;
967         }
968 
969         nlm_check_idle();
970 
971         mtx_unlock(&nlm_global_lock);
972 
973         return (host);
974 }
975 
976 /*
977  * Search for an existing NLM host that matches the given remote
978  * address. If none is found, create a new host with the requested
979  * address and remember 'vers' as the NLM protocol version to use for
980  * that host.
981  */
982 struct nlm_host *
983 nlm_find_host_by_addr(const struct sockaddr *addr, int vers)
984 {
985         /*
986          * Fake up a name using inet_ntop. This buffer is
987          * large enough for an IPv6 address.
988          */
989         char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"];
990         struct nlm_host *host;
991 
992         switch (addr->sa_family) {
993         case AF_INET:
994                 __rpc_inet_ntop(AF_INET,
995                     &((const struct sockaddr_in *) addr)->sin_addr,
996                     tmp, sizeof tmp);
997                 break;
998 #ifdef INET6
999         case AF_INET6:
1000                 __rpc_inet_ntop(AF_INET6,
1001                     &((const struct sockaddr_in6 *) addr)->sin6_addr,
1002                     tmp, sizeof tmp);
1003                 break;
1004 #endif
1005         default:
1006                 strcmp(tmp, "<unknown>");
1007         }
1008 
1009 
1010         mtx_lock(&nlm_global_lock);
1011 
1012         /*
1013          * The remote host is determined by caller_name.
1014          */
1015         TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
1016                 if (nlm_compare_addr(addr,
1017                         (const struct sockaddr *) &host->nh_addr))
1018                         break;
1019         }
1020 
1021         if (!host) {
1022                 host =