[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]

FreeBSD/Linux Kernel Cross Reference
sys/nfsserver/nfs_serv.c

Version: -  FREEBSD  -  FREEBSD7  -  FREEBSD70  -  FREEBSD6  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  OPENSOLARIS  -  minix-3-1-1  -  TRUSTEDBSD-SEBSD  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

  1 /*-
  2  * Copyright (c) 1989, 1993
  3  *      The Regents of the University of California.  All rights reserved.
  4  *
  5  * This code is derived from software contributed to Berkeley by
  6  * Rick Macklem at The University of Guelph.
  7  *
  8  * Redistribution and use in source and binary forms, with or without
  9  * modification, are permitted provided that the following conditions
 10  * are met:
 11  * 1. Redistributions of source code must retain the above copyright
 12  *    notice, this list of conditions and the following disclaimer.
 13  * 2. Redistributions in binary form must reproduce the above copyright
 14  *    notice, this list of conditions and the following disclaimer in the
 15  *    documentation and/or other materials provided with the distribution.
 16  * 4. Neither the name of the University nor the names of its contributors
 17  *    may be used to endorse or promote products derived from this software
 18  *    without specific prior written permission.
 19  *
 20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 30  * SUCH DAMAGE.
 31  *
 32  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
 33  */
 34 
 35 #include <sys/cdefs.h>
 36 __FBSDID("$FreeBSD: src/sys/nfsserver/nfs_serv.c,v 1.190 2008/11/03 10:38:00 dfr Exp $");
 37 
 38 /*
 39  * nfs version 2 and 3 server calls to vnode ops
 40  * - these routines generally have 3 phases
 41  *   1 - break down and validate rpc request in mbuf list
 42  *   2 - do the vnode ops for the request
 43  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
 44  *   3 - build the rpc reply in an mbuf list
 45  *   nb:
 46  *      - do not mix the phases, since the nfsm_?? macros can return failures
 47  *        on a bad rpc or similar and do not do any vrele() or vput()'s
 48  *
 49  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
 50  *      error number iff error != 0 whereas
 51  *      returning an error from the server function implies a fatal error
 52  *      such as a badly constructed rpc request that should be dropped without
 53  *      a reply.
 54  *      For nfsm_reply(), the case where error == EBADRPC is treated
 55  *      specially; after constructing a reply, it does an immediate
 56  *      `goto nfsmout' to avoid getting any V3 post-op status appended.
 57  *
 58  * Other notes:
 59  *      Warning: always pay careful attention to resource cleanup on return
 60  *      and note that nfsm_*() macros can terminate a procedure on certain
 61  *      errors.
 62  *
 63  *      lookup() and namei()
 64  *      may return garbage in various structural fields/return elements
 65  *      if an error is returned, and may garbage up nd.ni_dvp even if no
 66  *      error is returned and you did not request LOCKPARENT or WANTPARENT.
 67  *
 68  *      We use the ni_cnd.cn_flags 'HASBUF' flag to track whether the name
 69  *      buffer has been freed or not.
 70  */
 71 
 72 #include <sys/param.h>
 73 #include <sys/systm.h>
 74 #include <sys/proc.h>
 75 #include <sys/namei.h>
 76 #include <sys/unistd.h>
 77 #include <sys/vnode.h>
 78 #include <sys/mount.h>
 79 #include <sys/socket.h>
 80 #include <sys/socketvar.h>
 81 #include <sys/malloc.h>
 82 #include <sys/mbuf.h>
 83 #include <sys/priv.h>
 84 #include <sys/dirent.h>
 85 #include <sys/stat.h>
 86 #include <sys/kernel.h>
 87 #include <sys/sysctl.h>
 88 #include <sys/bio.h>
 89 #include <sys/buf.h>
 90 
 91 #include <vm/vm.h>
 92 #include <vm/vm_extern.h>
 93 #include <vm/vm_object.h>
 94 
 95 #include <nfs/nfsproto.h>
 96 #include <nfs/rpcv2.h>
 97 #include <nfsserver/nfs.h>
 98 #include <nfs/xdr_subs.h>
 99 #include <nfsserver/nfsm_subs.h>
100 
101 #ifdef NFSRV_DEBUG
102 #define nfsdbprintf(info)       printf info
103 #else
104 #define nfsdbprintf(info)
105 #endif
106 
107 #define MAX_COMMIT_COUNT        (1024 * 1024)
108 
109 #define NUM_HEURISTIC           1017
110 #define NHUSE_INIT              64
111 #define NHUSE_INC               16
112 #define NHUSE_MAX               2048
113 
114 static struct nfsheur {
115         struct vnode *nh_vp;    /* vp to match (unreferenced pointer) */
116         off_t nh_nextr;         /* next offset for sequential detection */
117         int nh_use;             /* use count for selection */
118         int nh_seqcount;        /* heuristic */
119 } nfsheur[NUM_HEURISTIC];
120 
121 /* Global vars */
122 
123 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
124 int nfsrvw_procrastinate_v3 = 0;
125 
126 static struct timeval   nfsver = { 0 };
127 
128 SYSCTL_NODE(_vfs, OID_AUTO, nfsrv, CTLFLAG_RW, 0, "NFS server");
129 
130 static int nfs_async;
131 static int nfs_commit_blks;
132 static int nfs_commit_miss;
133 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
134     "Tell client that writes were synced even though they were not");
135 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
136     "Number of completed commits");
137 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
138 
139 struct nfsrvstats nfsrvstats;
140 SYSCTL_STRUCT(_vfs_nfsrv, NFS_NFSRVSTATS, nfsrvstats, CTLFLAG_RW,
141         &nfsrvstats, nfsrvstats, "S,nfsrvstats");
142 
143 static int      nfsrv_access(struct vnode *, accmode_t, struct ucred *,
144                     int, int);
145 #ifdef NFS_LEGACYRPC
146 static void     nfsrvw_coalesce(struct nfsrv_descript *,
147                     struct nfsrv_descript *);
148 #endif
149 
150 /*
151  * Clear nameidata fields that are tested in nsfmout cleanup code prior
152  * to using first nfsm macro (that might jump to the cleanup code).
153  */
154 
155 static __inline void
156 ndclear(struct nameidata *nd)
157 {
158 
159         nd->ni_cnd.cn_flags = 0;
160         nd->ni_vp = NULL;
161         nd->ni_dvp = NULL;
162         nd->ni_startdir = NULL;
163 }
164 
165 /*
166  * Takes two vfslocked integers and returns with at most one
167  * reference to giant.  The return value indicates whether giant
168  * is held by either lock.  This simplifies nfsrv ops by allowing
169  * them to track only one vfslocked var.
170  */
171 static __inline int
172 nfsrv_lockedpair(int vfs1, int vfs2)
173 {
174 
175         if (vfs1 && vfs2)
176                 VFS_UNLOCK_GIANT(vfs2);
177 
178         return (vfs1 | vfs2);
179 }
180 
181 static __inline int
182 nfsrv_lockedpair_nd(int vfs1, struct nameidata *nd)
183 {
184         int vfs2;
185 
186         vfs2 = NDHASGIANT(nd);
187 
188         return nfsrv_lockedpair(vfs1, vfs2);
189 }
190 
191 /*
192  * nfs v3 access service
193  */
194 int
195 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
196     struct mbuf **mrq)
197 {
198         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
199         struct sockaddr *nam = nfsd->nd_nam;
200         caddr_t dpos = nfsd->nd_dpos;
201         struct ucred *cred = nfsd->nd_cr;
202         struct vnode *vp = NULL;
203         nfsfh_t nfh;
204         fhandle_t *fhp;
205         u_int32_t *tl;
206         caddr_t bpos;
207         int error = 0, rdonly, getret;
208         struct mbuf *mb, *mreq;
209         struct vattr vattr, *vap = &vattr;
210         u_long testmode, nfsmode;
211         int v3 = (nfsd->nd_flag & ND_NFSV3);
212         int vfslocked;
213 
214         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
215         if (!v3)
216                 panic("nfsrv3_access: v3 proc called on a v2 connection");
217         vfslocked = 0;
218         fhp = &nfh.fh_generic;
219         nfsm_srvmtofh(fhp);
220         tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
221         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, nfsd, slp,
222             nam, &rdonly, TRUE);
223         if (error) {
224                 nfsm_reply(NFSX_UNSIGNED);
225                 nfsm_srvpostop_attr(1, NULL);
226                 error = 0;
227                 goto nfsmout;
228         }
229         nfsmode = fxdr_unsigned(u_int32_t, *tl);
230         if ((nfsmode & NFSV3ACCESS_READ) &&
231                 nfsrv_access(vp, VREAD, cred, rdonly, 0))
232                 nfsmode &= ~NFSV3ACCESS_READ;
233         if (vp->v_type == VDIR)
234                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
235                         NFSV3ACCESS_DELETE);
236         else
237                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
238         if ((nfsmode & testmode) &&
239                 nfsrv_access(vp, VWRITE, cred, rdonly, 0))
240                 nfsmode &= ~testmode;
241         if (vp->v_type == VDIR)
242                 testmode = NFSV3ACCESS_LOOKUP;
243         else
244                 testmode = NFSV3ACCESS_EXECUTE;
245         if ((nfsmode & testmode) &&
246                 nfsrv_access(vp, VEXEC, cred, rdonly, 0))
247                 nfsmode &= ~testmode;
248         getret = VOP_GETATTR(vp, vap, cred);
249         vput(vp);
250         vp = NULL;
251         nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
252         nfsm_srvpostop_attr(getret, vap);
253         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
254         *tl = txdr_unsigned(nfsmode);
255 nfsmout:
256         if (vp)
257                 vput(vp);
258         VFS_UNLOCK_GIANT(vfslocked);
259         return(error);
260 }
261 
262 /*
263  * nfs getattr service
264  */
265 int
266 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
267     struct mbuf **mrq)
268 {
269         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
270         struct sockaddr *nam = nfsd->nd_nam;
271         caddr_t dpos = nfsd->nd_dpos;
272         struct ucred *cred = nfsd->nd_cr;
273         struct nfs_fattr *fp;
274         struct vattr va;
275         struct vattr *vap = &va;
276         struct vnode *vp = NULL;
277         nfsfh_t nfh;
278         fhandle_t *fhp;
279         caddr_t bpos;
280         int error = 0, rdonly;
281         struct mbuf *mb, *mreq;
282         int vfslocked;
283 
284         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
285         vfslocked = 0;
286         fhp = &nfh.fh_generic;
287         nfsm_srvmtofh(fhp);
288         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, nfsd, slp, nam,
289             &rdonly, TRUE);
290         if (error) {
291                 nfsm_reply(0);
292                 error = 0;
293                 goto nfsmout;
294         }
295         error = VOP_GETATTR(vp, vap, cred);
296         vput(vp);
297         vp = NULL;
298         nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
299         if (error) {
300                 error = 0;
301                 goto nfsmout;
302         }
303         fp = nfsm_build(struct nfs_fattr *,
304             NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
305         nfsm_srvfillattr(vap, fp);
306         /* fall through */
307 
308 nfsmout:
309         if (vp)
310                 vput(vp);
311         VFS_UNLOCK_GIANT(vfslocked);
312         return(error);
313 }
314 
315 /*
316  * nfs setattr service
317  */
318 int
319 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
320     struct mbuf **mrq)
321 {
322         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
323         struct sockaddr *nam = nfsd->nd_nam;
324         caddr_t dpos = nfsd->nd_dpos;
325         struct ucred *cred = nfsd->nd_cr;
326         struct vattr va, preat;
327         struct vattr *vap = &va;
328         struct nfsv2_sattr *sp;
329         struct nfs_fattr *fp;
330         struct vnode *vp = NULL;
331         nfsfh_t nfh;
332         fhandle_t *fhp;
333         u_int32_t *tl;
334         caddr_t bpos;
335         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
336         int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
337         struct mbuf *mb, *mreq;
338         struct timespec guard = { 0, 0 };
339         struct mount *mp = NULL;
340         int tvfslocked;
341         int vfslocked;
342 
343         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
344         vfslocked = 0;
345         fhp = &nfh.fh_generic;
346         nfsm_srvmtofh(fhp);
347         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
348                 error = ESTALE;
349                 goto out;
350         }
351         vfslocked = VFS_LOCK_GIANT(mp);
352         (void) vn_start_write(NULL, &mp, V_WAIT);
353         vfs_rel(mp);            /* The write holds a ref. */
354         VATTR_NULL(vap);
355         if (v3) {
356                 nfsm_srvsattr(vap);
357                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
358                 gcheck = fxdr_unsigned(int, *tl);
359                 if (gcheck) {
360                         tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
361                         fxdr_nfsv3time(tl, &guard);
362                 }
363         } else {
364                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
365                 /*
366                  * Nah nah nah nah na nah
367                  * There is a bug in the Sun client that puts 0xffff in the mode
368                  * field of sattr when it should put in 0xffffffff. The u_short
369                  * doesn't sign extend.
370                  * --> check the low order 2 bytes for 0xffff
371                  */
372                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
373                         vap->va_mode = nfstov_mode(sp->sa_mode);
374                 if (sp->sa_uid != nfsrv_nfs_xdrneg1)
375                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
376                 if (sp->sa_gid != nfsrv_nfs_xdrneg1)
377                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
378                 if (sp->sa_size != nfsrv_nfs_xdrneg1)
379                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
380                 if (sp->sa_atime.nfsv2_sec != nfsrv_nfs_xdrneg1) {
381 #ifdef notyet
382                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
383 #else
384                         vap->va_atime.tv_sec =
385                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
386                         vap->va_atime.tv_nsec = 0;
387 #endif
388                 }
389                 if (sp->sa_mtime.nfsv2_sec != nfsrv_nfs_xdrneg1)
390                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
391 
392         }
393 
394         /*
395          * Now that we have all the fields, lets do it.
396          */
397         error = nfsrv_fhtovp(fhp, 1, &vp, &tvfslocked, nfsd, slp,
398             nam, &rdonly, TRUE);
399         vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
400         if (error) {
401                 nfsm_reply(2 * NFSX_UNSIGNED);
402                 if (v3)
403                         nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
404                 error = 0;
405                 goto nfsmout;
406         }
407 
408         /*
409          * vp now an active resource, pay careful attention to cleanup
410          */
411         if (v3) {
412                 error = preat_ret = VOP_GETATTR(vp, &preat, cred);
413                 if (!error && gcheck &&
414                         (preat.va_ctime.tv_sec != guard.tv_sec ||
415                          preat.va_ctime.tv_nsec != guard.tv_nsec))
416                         error = NFSERR_NOT_SYNC;
417                 if (error) {
418                         vput(vp);
419                         vp = NULL;
420                         nfsm_reply(NFSX_WCCDATA(v3));
421                         if (v3)
422                                 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
423                         error = 0;
424                         goto nfsmout;
425                 }
426         }
427 
428         /*
429          * If the size is being changed write acces is required, otherwise
430          * just check for a read only filesystem.
431          */
432         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
433                 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
434                         error = EROFS;
435                         goto out;
436                 }
437         } else {
438                 if (vp->v_type == VDIR) {
439                         error = EISDIR;
440                         goto out;
441                 } else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
442                     0)) != 0)
443                         goto out;
444         }
445         error = VOP_SETATTR(vp, vap, cred);
446         postat_ret = VOP_GETATTR(vp, vap, cred);
447         if (!error)
448                 error = postat_ret;
449 out:
450         if (vp != NULL)
451                 vput(vp);
452 
453         vp = NULL;
454         nfsm_reply(NFSX_WCCORFATTR(v3));
455         if (v3) {
456                 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
457         } else if (!error) {
458                 /* v2 non-error case. */
459                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
460                 nfsm_srvfillattr(vap, fp);
461         }
462         error = 0;
463         /* fall through */
464 
465 nfsmout:
466         if (vp)
467                 vput(vp);
468         vn_finished_write(mp);
469         VFS_UNLOCK_GIANT(vfslocked);
470         return(error);
471 }
472 
473 /*
474  * nfs lookup rpc
475  */
476 int
477 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
478     struct mbuf **mrq)
479 {
480         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
481         struct sockaddr *nam = nfsd->nd_nam;
482         caddr_t dpos = nfsd->nd_dpos;
483         struct ucred *cred = nfsd->nd_cr;
484         struct nfs_fattr *fp;
485         struct nameidata nd, ind, *ndp = &nd;
486         struct vnode *vp, *dirp = NULL;
487         nfsfh_t nfh;
488         fhandle_t *fhp;
489         caddr_t bpos;
490         int error = 0, len, dirattr_ret = 1;
491         int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
492         struct mbuf *mb, *mreq;
493         struct vattr va, dirattr, *vap = &va;
494         int tvfslocked;
495         int vfslocked;
496 
497         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
498         ndclear(&nd);
499         vfslocked = 0;
500 
501         fhp = &nfh.fh_generic;
502         nfsm_srvmtofh(fhp);
503         nfsm_srvnamesiz(len);
504 
505         pubflag = nfs_ispublicfh(fhp);
506 
507         nd.ni_cnd.cn_cred = cred;
508         nd.ni_cnd.cn_nameiop = LOOKUP;
509         nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART | MPSAFE;
510         error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
511                 &dirp, v3, &dirattr, &dirattr_ret, pubflag);
512         vfslocked = NDHASGIANT(&nd);
513 
514         /*
515          * namei failure, only dirp to cleanup.  Clear out garbarge from
516          * structure in case macros jump to nfsmout.
517          */
518 
519         if (error) {
520                 if (dirp) {
521                         vrele(dirp);
522                         dirp = NULL;
523                 }
524                 nfsm_reply(NFSX_POSTOPATTR(v3));
525                 if (v3)
526                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
527                 error = 0;
528                 goto nfsmout;
529         }
530 
531         /*
532          * Locate index file for public filehandle
533          *
534          * error is 0 on entry and 0 on exit from this block.
535          */
536 
537         if (pubflag) {
538                 if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) {
539                         /*
540                          * Setup call to lookup() to see if we can find
541                          * the index file. Arguably, this doesn't belong
542                          * in a kernel.. Ugh.  If an error occurs, do not
543                          * try to install an index file and then clear the
544                          * error.
545                          *
546                          * When we replace nd with ind and redirect ndp,
547                          * maintenance of ni_startdir and ni_vp shift to
548                          * ind and we have to clean them up in the old nd.
549                          * However, the cnd resource continues to be maintained
550                          * via the original nd.  Confused?  You aren't alone!
551                          */
552                         ind = nd;
553                         VOP_UNLOCK(nd.ni_vp, 0);
554                         ind.ni_pathlen = strlen(nfs_pub.np_index);
555                         ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf =
556                             nfs_pub.np_index;
557                         ind.ni_startdir = nd.ni_vp;
558                         VREF(ind.ni_startdir);
559                         ind.ni_cnd.cn_flags &= ~GIANTHELD;
560                         tvfslocked = VFS_LOCK_GIANT(ind.ni_startdir->v_mount);
561                         if (tvfslocked)
562                                 nd.ni_cnd.cn_flags |= GIANTHELD;
563                         error = lookup(&ind);
564                         ind.ni_dvp = NULL;
565                         vfslocked = nfsrv_lockedpair_nd(vfslocked, &ind);
566                         ind.ni_cnd.cn_flags &= ~GIANTHELD;
567 
568                         if (error == 0) {
569                                 /*
570                                  * Found an index file. Get rid of
571                                  * the old references.  transfer nd.ni_vp'
572                                  */
573                                 if (dirp)
574                                         vrele(dirp);
575                                 dirp = nd.ni_vp;
576                                 nd.ni_vp = NULL;
577                                 vrele(nd.ni_startdir);
578                                 nd.ni_startdir = NULL;
579                                 ndp = &ind;
580                         }
581                         error = 0;
582                 }
583                 /*
584                  * If the public filehandle was used, check that this lookup
585                  * didn't result in a filehandle outside the publicly exported
586                  * filesystem.  We clear the poor vp here to avoid lockups due
587                  * to NFS I/O.
588                  */
589 
590                 if (ndp->ni_vp->v_mount != nfs_pub.np_mount) {
591                         vput(nd.ni_vp);
592                         nd.ni_vp = NULL;
593                         error = EPERM;
594                 }
595         }
596 
597         /*
598          * Resources at this point:
599          *      ndp->ni_vp      may not be NULL
600          */
601 
602         if (error) {
603                 nfsm_reply(NFSX_POSTOPATTR(v3));
604                 if (v3)
605                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
606                 error = 0;
607                 goto nfsmout;
608         }
609 
610         /*
611          * Get underlying attribute, then release remaining resources ( for
612          * the same potential blocking reason ) and reply.
613          */
614         vp = ndp->ni_vp;
615         bzero((caddr_t)fhp, sizeof(nfh));
616         fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
617         error = VOP_VPTOFH(vp, &fhp->fh_fid);
618         if (!error)
619                 error = VOP_GETATTR(vp, vap, cred);
620 
621         vput(vp);
622         vrele(ndp->ni_startdir);
623         vrele(dirp);
624         ndp->ni_vp = NULL;
625         ndp->ni_startdir = NULL;
626         dirp = NULL;
627         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3));
628         if (error) {
629                 if (v3)
630                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
631                 error = 0;
632                 goto nfsmout;
633         }
634         nfsm_srvfhtom(fhp, v3);
635         if (v3) {
636                 nfsm_srvpostop_attr(0, vap);
637                 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
638         } else {
639                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
640                 nfsm_srvfillattr(vap, fp);
641         }
642 
643 nfsmout:
644         if (ndp->ni_vp || dirp || ndp->ni_startdir) {
645                 if (ndp->ni_vp)
646                         vput(ndp->ni_vp);
647                 if (dirp)
648                         vrele(dirp);
649                 if (ndp->ni_startdir)
650                         vrele(ndp->ni_startdir);
651         }
652         NDFREE(&nd, NDF_ONLY_PNBUF);
653         VFS_UNLOCK_GIANT(vfslocked);
654         return (error);
655 }
656 
657 /*
658  * nfs readlink service
659  */
660 int
661 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
662     struct mbuf **mrq)
663 {
664         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
665         struct sockaddr *nam = nfsd->nd_nam;
666         caddr_t dpos = nfsd->nd_dpos;
667         struct ucred *cred = nfsd->nd_cr;
668         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
669         struct iovec *ivp = iv;
670         struct mbuf *mp;
671         u_int32_t *tl;
672         caddr_t bpos;
673         int error = 0, rdonly, i, tlen, len, getret;
674         int v3 = (nfsd->nd_flag & ND_NFSV3);
675         struct mbuf *mb, *mp3, *nmp, *mreq;
676         struct vnode *vp = NULL;
677         struct vattr attr;
678         nfsfh_t nfh;
679         fhandle_t *fhp;
680         struct uio io, *uiop = &io;
681         int vfslocked;
682 
683         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
684         vfslocked = 0;
685 #ifndef nolint
686         mp = NULL;
687 #endif
688         mp3 = NULL;
689         fhp = &nfh.fh_generic;
690         nfsm_srvmtofh(fhp);
691         len = 0;
692         i = 0;
693         while (len < NFS_MAXPATHLEN) {
694                 MGET(nmp, M_WAIT, MT_DATA);
695                 MCLGET(nmp, M_WAIT);
696                 nmp->m_len = NFSMSIZ(nmp);
697                 if (len == 0)
698                         mp3 = mp = nmp;
699                 else {
700                         mp->m_next = nmp;
701                         mp = nmp;
702                 }
703                 if ((len + mp->m_len) > NFS_MAXPATHLEN) {
704                         mp->m_len = NFS_MAXPATHLEN - len;
705                         len = NFS_MAXPATHLEN;
706                 } else
707                         len += mp->m_len;
708                 ivp->iov_base = mtod(mp, caddr_t);
709                 ivp->iov_len = mp->m_len;
710                 i++;
711                 ivp++;
712         }
713         uiop->uio_iov = iv;
714         uiop->uio_iovcnt = i;
715         uiop->uio_offset = 0;
716         uiop->uio_resid = len;
717         uiop->uio_rw = UIO_READ;
718         uiop->uio_segflg = UIO_SYSSPACE;
719         uiop->uio_td = NULL;
720         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, nfsd, slp,
721             nam, &rdonly, TRUE);
722         if (error) {
723                 nfsm_reply(2 * NFSX_UNSIGNED);
724                 if (v3)
725                         nfsm_srvpostop_attr(1, NULL);
726                 error = 0;
727                 goto nfsmout;
728         }
729         if (vp->v_type != VLNK) {
730                 if (v3)
731                         error = EINVAL;
732                 else
733                         error = ENXIO;
734         } else 
735                 error = VOP_READLINK(vp, uiop, cred);
736         getret = VOP_GETATTR(vp, &attr, cred);
737         vput(vp);
738         vp = NULL;
739         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
740         if (v3)
741                 nfsm_srvpostop_attr(getret, &attr);
742         if (error) {
743                 error = 0;
744                 goto nfsmout;
745         }
746         if (uiop->uio_resid > 0) {
747                 len -= uiop->uio_resid;
748                 tlen = nfsm_rndup(len);
749                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
750         }
751         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
752         *tl = txdr_unsigned(len);
753         mb->m_next = mp3;
754         mp3 = NULL;
755 nfsmout:
756         if (mp3)
757                 m_freem(mp3);
758         if (vp)
759                 vput(vp);
760         VFS_UNLOCK_GIANT(vfslocked);
761         return(error);
762 }
763 
764 /*
765  * nfs read service
766  */
767 int
768 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
769     struct mbuf **mrq)
770 {
771         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
772         struct sockaddr *nam = nfsd->nd_nam;
773         caddr_t dpos = nfsd->nd_dpos;
774         struct ucred *cred = nfsd->nd_cr;
775         struct iovec *iv;
776         struct iovec *iv2;
777         struct mbuf *m;
778         struct nfs_fattr *fp;
779         u_int32_t *tl;
780         int i;
781         caddr_t bpos;
782         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
783         int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen;
784         struct mbuf *mb, *mreq;
785         struct mbuf *m2;
786         struct vnode *vp = NULL;
787         nfsfh_t nfh;
788         fhandle_t *fhp;
789         struct uio io, *uiop = &io;
790         struct vattr va, *vap = &va;
791         struct nfsheur *nh;
792         off_t off;
793         int ioflag = 0;
794         int vfslocked;
795 
796 
797         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
798         vfslocked = 0;
799         fhp = &nfh.fh_generic;
800         nfsm_srvmtofh(fhp);
801         if (v3) {
802                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
803                 off = fxdr_hyper(tl);
804         } else {
805                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
806                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
807         }
808         nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd));
809 
810         /*
811          * Reference vp.  If an error occurs, vp will be invalid, but we
812          * have to NULL it just in case.  The macros might goto nfsmout
813          * as well.
814          */
815 
816         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, nfsd, slp,
817             nam, &rdonly, TRUE);
818         if (error) {
819                 vp = NULL;
820                 nfsm_reply(2 * NFSX_UNSIGNED);
821                 if (v3)
822                         nfsm_srvpostop_attr(1, NULL);
823                 error = 0;
824                 goto nfsmout;
825         }
826 
827         if (vp->v_type != VREG) {
828                 if (v3)
829                         error = EINVAL;
830                 else
831                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
832         }
833         if (!error) {
834                 if ((error = nfsrv_access(vp, VREAD, cred, rdonly, 1)) != 0)
835                         error = nfsrv_access(vp, VEXEC, cred, rdonly, 1);
836         }
837         getret = VOP_GETATTR(vp, vap, cred);
838         if (!error)
839                 error = getret;
840         if (error) {
841                 vput(vp);
842                 vp = NULL;
843                 nfsm_reply(NFSX_POSTOPATTR(v3));
844                 if (v3)
845                         nfsm_srvpostop_attr(getret, vap);
846                 error = 0;
847                 goto nfsmout;
848         }
849 
850         /*
851          * Calculate byte count to read
852          */
853 
854         if (off >= vap->va_size)
855                 cnt = 0;
856         else if ((off + reqlen) > vap->va_size)
857                 cnt = vap->va_size - off;
858         else
859                 cnt = reqlen;
860 
861         /*
862          * Calculate seqcount for heuristic
863          */
864 
865         {
866                 int hi;
867                 int try = 32;
868 
869                 /*
870                  * Locate best candidate
871                  */
872 
873                 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
874                 nh = &nfsheur[hi];
875 
876                 while (try--) {
877                         if (nfsheur[hi].nh_vp == vp) {
878                                 nh = &nfsheur[hi];
879                                 break;
880                         }
881                         if (nfsheur[hi].nh_use > 0)
882                                 --nfsheur[hi].nh_use;
883                         hi = (hi + 1) % NUM_HEURISTIC;
884                         if (nfsheur[hi].nh_use < nh->nh_use)
885                                 nh = &nfsheur[hi];
886                 }
887 
888                 if (nh->nh_vp != vp) {
889                         nh->nh_vp = vp;
890                         nh->nh_nextr = off;
891                         nh->nh_use = NHUSE_INIT;
892                         if (off == 0)
893                                 nh->nh_seqcount = 4;
894                         else
895                                 nh->nh_seqcount = 1;
896                 }
897 
898                 /*
899                  * Calculate heuristic
900                  */
901 
902                 if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
903                         if (++nh->nh_seqcount > IO_SEQMAX)
904                                 nh->nh_seqcount = IO_SEQMAX;
905                 } else if (nh->nh_seqcount > 1) {
906                         nh->nh_seqcount = 1;
907                 } else {
908                         nh->nh_seqcount = 0;
909                 }
910                 nh->nh_use += NHUSE_INC;
911                 if (nh->nh_use > NHUSE_MAX)
912                         nh->nh_use = NHUSE_MAX;
913                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
914         }
915 
916         nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
917         if (v3) {
918                 tl = nfsm_build(u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
919                 *tl++ = nfsrv_nfs_true;
920                 fp = (struct nfs_fattr *)tl;
921                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
922         } else {
923                 tl = nfsm_build(u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
924                 fp = (struct nfs_fattr *)tl;
925                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
926         }
927         len = left = nfsm_rndup(cnt);
928         if (cnt > 0) {
929                 /*
930                  * Generate the mbuf list with the uio_iov ref. to it.
931                  */
932                 i = 0;
933                 m = m2 = mb;
934                 while (left > 0) {
935                         siz = min(M_TRAILINGSPACE(m), left);
936                         if (siz > 0) {
937                                 left -= siz;
938                                 i++;
939                         }
940                         if (left > 0) {
941                                 MGET(m, M_WAIT, MT_DATA);
942                                 MCLGET(m, M_WAIT);
943                                 m->m_len = 0;
944                                 m2->m_next = m;
945                                 m2 = m;
946                         }
947                 }
948                 iv = malloc(i * sizeof (struct iovec),
949                        M_TEMP, M_WAITOK);
950                 uiop->uio_iov = iv2 = iv;
951                 m = mb;
952                 left = len;
953                 i = 0;
954                 while (left > 0) {
955                         if (m == NULL)
956                                 panic("nfsrv_read iov");
957