[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]

FreeBSD/Linux Kernel Cross Reference
sys/nfsclient/nfs_vnops.c

Version: -  FREEBSD  -  FREEBSD7  -  FREEBSD70  -  FREEBSD6  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  OPENSOLARIS  -  minix-3-1-1  -  TRUSTEDBSD-SEBSD  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

  1 /*-
  2  * Copyright (c) 1989, 1993
  3  *      The Regents of the University of California.  All rights reserved.
  4  *
  5  * This code is derived from software contributed to Berkeley by
  6  * Rick Macklem at The University of Guelph.
  7  *
  8  * Redistribution and use in source and binary forms, with or without
  9  * modification, are permitted provided that the following conditions
 10  * are met:
 11  * 1. Redistributions of source code must retain the above copyright
 12  *    notice, this list of conditions and the following disclaimer.
 13  * 2. Redistributions in binary form must reproduce the above copyright
 14  *    notice, this list of conditions and the following disclaimer in the
 15  *    documentation and/or other materials provided with the distribution.
 16  * 4. Neither the name of the University nor the names of its contributors
 17  *    may be used to endorse or promote products derived from this software
 18  *    without specific prior written permission.
 19  *
 20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 30  * SUCH DAMAGE.
 31  *
 32  *      @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
 33  */
 34 
 35 #include <sys/cdefs.h>
 36 __FBSDID("$FreeBSD: src/sys/nfsclient/nfs_vnops.c,v 1.293 2008/10/28 13:44:11 trasz Exp $");
 37 
 38 /*
 39  * vnode op calls for Sun NFS version 2 and 3
 40  */
 41 
 42 #include "opt_inet.h"
 43 
 44 #include <sys/param.h>
 45 #include <sys/kernel.h>
 46 #include <sys/systm.h>
 47 #include <sys/resourcevar.h>
 48 #include <sys/proc.h>
 49 #include <sys/mount.h>
 50 #include <sys/bio.h>
 51 #include <sys/buf.h>
 52 #include <sys/malloc.h>
 53 #include <sys/mbuf.h>
 54 #include <sys/namei.h>
 55 #include <sys/socket.h>
 56 #include <sys/vnode.h>
 57 #include <sys/dirent.h>
 58 #include <sys/fcntl.h>
 59 #include <sys/lockf.h>
 60 #include <sys/stat.h>
 61 #include <sys/sysctl.h>
 62 #include <sys/signalvar.h>
 63 #include <sys/vimage.h>
 64 
 65 #include <vm/vm.h>
 66 #include <vm/vm_object.h>
 67 #include <vm/vm_extern.h>
 68 #include <vm/vm_object.h>
 69 
 70 #include <fs/fifofs/fifo.h>
 71 
 72 #include <rpc/rpcclnt.h>
 73 
 74 #include <nfs/rpcv2.h>
 75 #include <nfs/nfsproto.h>
 76 #include <nfsclient/nfs.h>
 77 #include <nfsclient/nfsnode.h>
 78 #include <nfsclient/nfsmount.h>
 79 #include <nfsclient/nfs_lock.h>
 80 #include <nfs/xdr_subs.h>
 81 #include <nfsclient/nfsm_subs.h>
 82 
 83 #include <net/if.h>
 84 #include <netinet/in.h>
 85 #include <netinet/in_var.h>
 86 
 87 /* Defs */
 88 #define TRUE    1
 89 #define FALSE   0
 90 
 91 /*
 92  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
 93  * calls are not in getblk() and brelse() so that they would not be necessary
 94  * here.
 95  */
 96 #ifndef B_VMIO
 97 #define vfs_busy_pages(bp, f)
 98 #endif
 99 
100 static vop_read_t       nfsfifo_read;
101 static vop_write_t      nfsfifo_write;
102 static vop_close_t      nfsfifo_close;
103 static int      nfs_flush(struct vnode *, int, int);
104 static int      nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *);
105 static vop_lookup_t     nfs_lookup;
106 static vop_create_t     nfs_create;
107 static vop_mknod_t      nfs_mknod;
108 static vop_open_t       nfs_open;
109 static vop_close_t      nfs_close;
110 static vop_access_t     nfs_access;
111 static vop_getattr_t    nfs_getattr;
112 static vop_setattr_t    nfs_setattr;
113 static vop_read_t       nfs_read;
114 static vop_fsync_t      nfs_fsync;
115 static vop_remove_t     nfs_remove;
116 static vop_link_t       nfs_link;
117 static vop_rename_t     nfs_rename;
118 static vop_mkdir_t      nfs_mkdir;
119 static vop_rmdir_t      nfs_rmdir;
120 static vop_symlink_t    nfs_symlink;
121 static vop_readdir_t    nfs_readdir;
122 static vop_strategy_t   nfs_strategy;
123 static  int     nfs_lookitup(struct vnode *, const char *, int,
124                     struct ucred *, struct thread *, struct nfsnode **);
125 static  int     nfs_sillyrename(struct vnode *, struct vnode *,
126                     struct componentname *);
127 static vop_access_t     nfsspec_access;
128 static vop_readlink_t   nfs_readlink;
129 static vop_print_t      nfs_print;
130 static vop_advlock_t    nfs_advlock;
131 static vop_advlockasync_t nfs_advlockasync;
132 
133 /*
134  * Global vfs data structures for nfs
135  */
136 struct vop_vector nfs_vnodeops = {
137         .vop_default =          &default_vnodeops,
138         .vop_access =           nfs_access,
139         .vop_advlock =          nfs_advlock,
140         .vop_advlockasync =     nfs_advlockasync,
141         .vop_close =            nfs_close,
142         .vop_create =           nfs_create,
143         .vop_fsync =            nfs_fsync,
144         .vop_getattr =          nfs_getattr,
145         .vop_getpages =         nfs_getpages,
146         .vop_putpages =         nfs_putpages,
147         .vop_inactive =         nfs_inactive,
148         .vop_lease =            VOP_NULL,
149         .vop_link =             nfs_link,
150         .vop_lookup =           nfs_lookup,
151         .vop_mkdir =            nfs_mkdir,
152         .vop_mknod =            nfs_mknod,
153         .vop_open =             nfs_open,
154         .vop_print =            nfs_print,
155         .vop_read =             nfs_read,
156         .vop_readdir =          nfs_readdir,
157         .vop_readlink =         nfs_readlink,
158         .vop_reclaim =          nfs_reclaim,
159         .vop_remove =           nfs_remove,
160         .vop_rename =           nfs_rename,
161         .vop_rmdir =            nfs_rmdir,
162         .vop_setattr =          nfs_setattr,
163         .vop_strategy =         nfs_strategy,
164         .vop_symlink =          nfs_symlink,
165         .vop_write =            nfs_write,
166 };
167 
168 struct vop_vector nfs_fifoops = {
169         .vop_default =          &fifo_specops,
170         .vop_access =           nfsspec_access,
171         .vop_close =            nfsfifo_close,
172         .vop_fsync =            nfs_fsync,
173         .vop_getattr =          nfs_getattr,
174         .vop_inactive =         nfs_inactive,
175         .vop_print =            nfs_print,
176         .vop_read =             nfsfifo_read,
177         .vop_reclaim =          nfs_reclaim,
178         .vop_setattr =          nfs_setattr,
179         .vop_write =            nfsfifo_write,
180 };
181 
182 static int      nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
183                              struct componentname *cnp, struct vattr *vap);
184 static int      nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
185                               struct ucred *cred, struct thread *td);
186 static int      nfs_renamerpc(struct vnode *fdvp, const char *fnameptr,
187                               int fnamelen, struct vnode *tdvp,
188                               const char *tnameptr, int tnamelen,
189                               struct ucred *cred, struct thread *td);
190 static int      nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
191                              struct sillyrename *sp);
192 
193 /*
194  * Global variables
195  */
196 struct mtx      nfs_iod_mtx;
197 struct proc     *nfs_iodwant[NFS_MAXASYNCDAEMON];
198 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
199 int              nfs_numasync = 0;
200 vop_advlock_t   *nfs_advlock_p = nfs_dolock;
201 vop_reclaim_t   *nfs_reclaim_p = NULL;
202 #define DIRHDSIZ        (sizeof (struct dirent) - (MAXNAMLEN + 1))
203 
204 SYSCTL_DECL(_vfs_nfs);
205 
206 static int      nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
207 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
208            &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
209 
210 static int      nfsv3_commit_on_close = 0;
211 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
212            &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
213 
214 static int      nfs_clean_pages_on_close = 1;
215 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
216            &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
217 
218 int nfs_directio_enable = 0;
219 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
220            &nfs_directio_enable, 0, "Enable NFS directio");
221 
222 /*
223  * This sysctl allows other processes to mmap a file that has been opened
224  * O_DIRECT by a process.  In general, having processes mmap the file while
225  * Direct IO is in progress can lead to Data Inconsistencies.  But, we allow
226  * this by default to prevent DoS attacks - to prevent a malicious user from
227  * opening up files O_DIRECT preventing other users from mmap'ing these
228  * files.  "Protected" environments where stricter consistency guarantees are
229  * required can disable this knob.  The process that opened the file O_DIRECT
230  * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
231  * meaningful.
232  */
233 int nfs_directio_allow_mmap = 1;
234 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
235            &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
236 
237 #if 0
238 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
239            &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
240 
241 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
242            &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
243 #endif
244 
245 #define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY          \
246                          | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE     \
247                          | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
248 
249 /*
250  * SMP Locking Note :
251  * The list of locks after the description of the lock is the ordering
252  * of other locks acquired with the lock held.
253  * np->n_mtx : Protects the fields in the nfsnode.
254        VM Object Lock
255        VI_MTX (acquired indirectly)
256  * nmp->nm_mtx : Protects the fields in the nfsmount.
257        rep->r_mtx
258  * nfs_iod_mtx : Global lock, protects shared nfsiod state.
259  * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
260        nmp->nm_mtx
261        rep->r_mtx
262  * rep->r_mtx : Protects the fields in an nfsreq.
263  */
264 
265 static int
266 nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
267     struct ucred *cred)
268 {
269         const int v3 = 1;
270         u_int32_t *tl;
271         int error = 0, attrflag;
272 
273         struct mbuf *mreq, *mrep, *md, *mb;
274         caddr_t bpos, dpos;
275         u_int32_t rmode;
276         struct nfsnode *np = VTONFS(vp);
277 
278         nfsstats.rpccnt[NFSPROC_ACCESS]++;
279         mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
280         mb = mreq;
281         bpos = mtod(mb, caddr_t);
282         nfsm_fhtom(vp, v3);
283         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
284         *tl = txdr_unsigned(wmode);
285         nfsm_request(vp, NFSPROC_ACCESS, td, cred);
286         nfsm_postop_attr(vp, attrflag);
287         if (!error) {
288                 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
289                 rmode = fxdr_unsigned(u_int32_t, *tl);
290                 mtx_lock(&np->n_mtx);
291                 np->n_mode = rmode;
292                 np->n_modeuid = cred->cr_uid;
293                 np->n_modestamp = time_second;
294                 mtx_unlock(&np->n_mtx);
295         }
296         m_freem(mrep);
297 nfsmout:
298         return (error);
299 }
300 
301 /*
302  * nfs access vnode op.
303  * For nfs version 2, just return ok. File accesses may fail later.
304  * For nfs version 3, use the access rpc to check accessibility. If file modes
305  * are changed on the server, accesses might still fail later.
306  */
307 static int
308 nfs_access(struct vop_access_args *ap)
309 {
310         struct vnode *vp = ap->a_vp;
311         int error = 0;
312         u_int32_t mode, wmode;
313         int v3 = NFS_ISV3(vp);
314         struct nfsnode *np = VTONFS(vp);
315 
316         /*
317          * Disallow write attempts on filesystems mounted read-only;
318          * unless the file is a socket, fifo, or a block or character
319          * device resident on the filesystem.
320          */
321         if ((ap->a_accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
322                 switch (vp->v_type) {
323                 case VREG:
324                 case VDIR:
325                 case VLNK:
326                         return (EROFS);
327                 default:
328                         break;
329                 }
330         }
331         /*
332          * For nfs v3, check to see if we have done this recently, and if
333          * so return our cached result instead of making an ACCESS call.
334          * If not, do an access rpc, otherwise you are stuck emulating
335          * ufs_access() locally using the vattr. This may not be correct,
336          * since the server may apply other access criteria such as
337          * client uid-->server uid mapping that we do not know about.
338          */
339         if (v3) {
340                 if (ap->a_accmode & VREAD)
341                         mode = NFSV3ACCESS_READ;
342                 else
343                         mode = 0;
344                 if (vp->v_type != VDIR) {
345                         if (ap->a_accmode & VWRITE)
346                                 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
347                         if (ap->a_accmode & VEXEC)
348                                 mode |= NFSV3ACCESS_EXECUTE;
349                 } else {
350                         if (ap->a_accmode & VWRITE)
351                                 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
352                                          NFSV3ACCESS_DELETE);
353                         if (ap->a_accmode & VEXEC)
354                                 mode |= NFSV3ACCESS_LOOKUP;
355                 }
356                 /* XXX safety belt, only make blanket request if caching */
357                 if (nfsaccess_cache_timeout > 0) {
358                         wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
359                                 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
360                                 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
361                 } else {
362                         wmode = mode;
363                 }
364 
365                 /*
366                  * Does our cached result allow us to give a definite yes to
367                  * this request?
368                  */
369                 mtx_lock(&np->n_mtx);
370                 if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
371                     (ap->a_cred->cr_uid == np->n_modeuid) &&
372                     ((np->n_mode & mode) == mode)) {
373                         nfsstats.accesscache_hits++;
374                 } else {
375                         /*
376                          * Either a no, or a don't know.  Go to the wire.
377                          */
378                         nfsstats.accesscache_misses++;
379                         mtx_unlock(&np->n_mtx);
380                         error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred);
381                         mtx_lock(&np->n_mtx);
382                         if (!error) {
383                                 if ((np->n_mode & mode) != mode) {
384                                         error = EACCES;
385                                 }
386                         }
387                 }
388                 mtx_unlock(&np->n_mtx);
389                 return (error);
390         } else {
391                 if ((error = nfsspec_access(ap)) != 0) {
392                         return (error);
393                 }
394                 /*
395                  * Attempt to prevent a mapped root from accessing a file
396                  * which it shouldn't.  We try to read a byte from the file
397                  * if the user is root and the file is not zero length.
398                  * After calling nfsspec_access, we should have the correct
399                  * file size cached.
400                  */
401                 mtx_lock(&np->n_mtx);
402                 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD)
403                     && VTONFS(vp)->n_size > 0) {
404                         struct iovec aiov;
405                         struct uio auio;
406                         char buf[1];
407 
408                         mtx_unlock(&np->n_mtx);
409                         aiov.iov_base = buf;
410                         aiov.iov_len = 1;
411                         auio.uio_iov = &aiov;
412                         auio.uio_iovcnt = 1;
413                         auio.uio_offset = 0;
414                         auio.uio_resid = 1;
415                         auio.uio_segflg = UIO_SYSSPACE;
416                         auio.uio_rw = UIO_READ;
417                         auio.uio_td = ap->a_td;
418 
419                         if (vp->v_type == VREG)
420                                 error = nfs_readrpc(vp, &auio, ap->a_cred);
421                         else if (vp->v_type == VDIR) {
422                                 char* bp;
423                                 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
424                                 aiov.iov_base = bp;
425                                 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
426                                 error = nfs_readdirrpc(vp, &auio, ap->a_cred);
427                                 free(bp, M_TEMP);
428                         } else if (vp->v_type == VLNK)
429                                 error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
430                         else
431                                 error = EACCES;
432                 } else
433                         mtx_unlock(&np->n_mtx);
434                 return (error);
435         }
436 }
437 
438 int nfs_otw_getattr_avoid = 0;
439 
440 /*
441  * nfs open vnode op
442  * Check to see if the type is ok
443  * and that deletion is not in progress.
444  * For paged in text files, you will need to flush the page cache
445  * if consistency is lost.
446  */
447 /* ARGSUSED */
448 static int
449 nfs_open(struct vop_open_args *ap)
450 {
451         struct vnode *vp = ap->a_vp;
452         struct nfsnode *np = VTONFS(vp);
453         struct vattr vattr;
454         int error;
455         int fmode = ap->a_mode;
456 
457         if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
458                 return (EOPNOTSUPP);
459 
460         /*
461          * Get a valid lease. If cached data is stale, flush it.
462          */
463         mtx_lock(&np->n_mtx);
464         if (np->n_flag & NMODIFIED) {
465                 mtx_unlock(&np->n_mtx);                 
466                 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
467                 if (error == EINTR || error == EIO)
468                         return (error);
469                 np->n_attrstamp = 0;
470                 if (vp->v_type == VDIR)
471                         np->n_direofoffset = 0;
472                 error = VOP_GETATTR(vp, &vattr, ap->a_cred);
473                 if (error)
474                         return (error);
475                 mtx_lock(&np->n_mtx);
476                 np->n_mtime = vattr.va_mtime;
477                 mtx_unlock(&np->n_mtx);
478         } else {
479                 struct thread *td = curthread;
480 
481                 if (np->n_ac_ts_syscalls != td->td_syscalls ||
482                     np->n_ac_ts_tid != td->td_tid || 
483                     td->td_proc == NULL ||
484                     np->n_ac_ts_pid != td->td_proc->p_pid) {
485                         np->n_attrstamp = 0;
486                 }
487                 mtx_unlock(&np->n_mtx);                                         
488                 error = VOP_GETATTR(vp, &vattr, ap->a_cred);
489                 if (error)
490                         return (error);
491                 mtx_lock(&np->n_mtx);
492                 if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
493                         if (vp->v_type == VDIR)
494                                 np->n_direofoffset = 0;
495                         mtx_unlock(&np->n_mtx);
496                         error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
497                         if (error == EINTR || error == EIO) {
498                                 return (error);
499                         }
500                         mtx_lock(&np->n_mtx);
501                         np->n_mtime = vattr.va_mtime;
502                 }
503                 mtx_unlock(&np->n_mtx);
504         }
505         /*
506          * If the object has >= 1 O_DIRECT active opens, we disable caching.
507          */
508         if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
509                 if (np->n_directio_opens == 0) {
510                         error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
511                         if (error)
512                                 return (error);
513                         mtx_lock(&np->n_mtx);
514                         np->n_flag |= NNONCACHE;
515                         mtx_unlock(&np->n_mtx);
516                 }
517                 np->n_directio_opens++;
518         }
519         vnode_create_vobject(vp, vattr.va_size, ap->a_td);
520         return (0);
521 }
522 
523 /*
524  * nfs close vnode op
525  * What an NFS client should do upon close after writing is a debatable issue.
526  * Most NFS clients push delayed writes to the server upon close, basically for
527  * two reasons:
528  * 1 - So that any write errors may be reported back to the client process
529  *     doing the close system call. By far the two most likely errors are
530  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
531  * 2 - To put a worst case upper bound on cache inconsistency between
532  *     multiple clients for the file.
533  * There is also a consistency problem for Version 2 of the protocol w.r.t.
534  * not being able to tell if other clients are writing a file concurrently,
535  * since there is no way of knowing if the changed modify time in the reply
536  * is only due to the write for this client.
537  * (NFS Version 3 provides weak cache consistency data in the reply that
538  *  should be sufficient to detect and handle this case.)
539  *
540  * The current code does the following:
541  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
542  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
543  *                     or commit them (this satisfies 1 and 2 except for the
544  *                     case where the server crashes after this close but
545  *                     before the commit RPC, which is felt to be "good
546  *                     enough". Changing the last argument to nfs_flush() to
547  *                     a 1 would force a commit operation, if it is felt a
548  *                     commit is necessary now.
549  */
550 /* ARGSUSED */
551 static int
552 nfs_close(struct vop_close_args *ap)
553 {
554         struct vnode *vp = ap->a_vp;
555         struct nfsnode *np = VTONFS(vp);
556         int error = 0;
557         int fmode = ap->a_fflag;
558 
559         if (vp->v_type == VREG) {
560             /*
561              * Examine and clean dirty pages, regardless of NMODIFIED.
562              * This closes a major hole in close-to-open consistency.
563              * We want to push out all dirty pages (and buffers) on
564              * close, regardless of whether they were dirtied by
565              * mmap'ed writes or via write().
566              */
567             if (nfs_clean_pages_on_close && vp->v_object) {
568                 VM_OBJECT_LOCK(vp->v_object);
569                 vm_object_page_clean(vp->v_object, 0, 0, 0);
570                 VM_OBJECT_UNLOCK(vp->v_object);
571             }
572             mtx_lock(&np->n_mtx);
573             if (np->n_flag & NMODIFIED) {
574                 mtx_unlock(&np->n_mtx);
575                 if (NFS_ISV3(vp)) {
576                     /*
577                      * Under NFSv3 we have dirty buffers to dispose of.  We
578                      * must flush them to the NFS server.  We have the option
579                      * of waiting all the way through the commit rpc or just
580                      * waiting for the initial write.  The default is to only
581                      * wait through the initial write so the data is in the
582                      * server's cache, which is roughly similar to the state
583                      * a standard disk subsystem leaves the file in on close().
584                      *
585                      * We cannot clear the NMODIFIED bit in np->n_flag due to
586                      * potential races with other processes, and certainly
587                      * cannot clear it if we don't commit.
588                      */
589                     int cm = nfsv3_commit_on_close ? 1 : 0;
590                     error = nfs_flush(vp, MNT_WAIT, cm);
591                     /* np->n_flag &= ~NMODIFIED; */
592                 } else
593                     error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
594                 mtx_lock(&np->n_mtx);
595             }
596             /* 
597              * Invalidate the attribute cache in all cases.
598              * An open is going to fetch fresh attrs any way, other procs
599              * on this node that have file open will be forced to do an 
600              * otw attr fetch, but this is safe.
601              */
602             np->n_attrstamp = 0;
603             if (np->n_flag & NWRITEERR) {
604                 np->n_flag &= ~NWRITEERR;
605                 error = np->n_error;
606             }
607             mtx_unlock(&np->n_mtx);
608         }
609         if (nfs_directio_enable)
610                 KASSERT((np->n_directio_asyncwr == 0),
611                         ("nfs_close: dirty unflushed (%d) directio buffers\n",
612                          np->n_directio_asyncwr));
613         if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
614                 mtx_lock(&np->n_mtx);
615                 KASSERT((np->n_directio_opens > 0), 
616                         ("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
617                 np->n_directio_opens--;
618                 if (np->n_directio_opens == 0)
619                         np->n_flag &= ~NNONCACHE;
620                 mtx_unlock(&np->n_mtx);
621         }
622         return (error);
623 }
624 
625 /*
626  * nfs getattr call from vfs.
627  */
628 static int
629 nfs_getattr(struct vop_getattr_args *ap)
630 {
631         struct vnode *vp = ap->a_vp;
632         struct nfsnode *np = VTONFS(vp);
633         struct thread *td = curthread;
634         struct vattr *vap = ap->a_vap;
635         struct vattr vattr;
636         caddr_t bpos, dpos;
637         int error = 0;
638         struct mbuf *mreq, *mrep, *md, *mb;
639         int v3 = NFS_ISV3(vp);
640 
641         /*
642          * Update local times for special files.
643          */
644         mtx_lock(&np->n_mtx);
645         if (np->n_flag & (NACC | NUPD))
646                 np->n_flag |= NCHG;
647         mtx_unlock(&np->n_mtx);
648         /*
649          * First look in the cache.
650          */
651         if (nfs_getattrcache(vp, &vattr) == 0)
652                 goto nfsmout;
653         if (v3 && nfsaccess_cache_timeout > 0) {
654                 nfsstats.accesscache_misses++;
655                 nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, ap->a_cred);
656                 if (nfs_getattrcache(vp, &vattr) == 0)
657                         goto nfsmout;
658         }
659         nfsstats.rpccnt[NFSPROC_GETATTR]++;
660         mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
661         mb = mreq;
662         bpos = mtod(mb, caddr_t);
663         nfsm_fhtom(vp, v3);
664         nfsm_request(vp, NFSPROC_GETATTR, td, ap->a_cred);
665         if (!error) {
666                 nfsm_loadattr(vp, &vattr);
667         }
668         m_freem(mrep);
669 nfsmout:
670         vap->va_type = vattr.va_type;
671         vap->va_mode = vattr.va_mode;
672         vap->va_nlink = vattr.va_nlink;
673         vap->va_uid = vattr.va_uid;
674         vap->va_gid = vattr.va_gid;
675         vap->va_fsid = vattr.va_fsid;
676         vap->va_fileid = vattr.va_fileid;
677         vap->va_size = vattr.va_size;
678         vap->va_blocksize = vattr.va_blocksize;
679         vap->va_atime = vattr.va_atime;
680         vap->va_mtime = vattr.va_mtime;
681         vap->va_ctime = vattr.va_ctime;
682         vap->va_gen = vattr.va_gen;
683         vap->va_flags = vattr.va_flags;
684         vap->va_rdev = vattr.va_rdev;
685         vap->va_bytes = vattr.va_bytes;
686         vap->va_filerev = vattr.va_filerev;
687 
688         return (error);
689 }
690 
691 /*
692  * nfs setattr call.
693  */
694 static int
695 nfs_setattr(struct vop_setattr_args *ap)
696 {
697         struct vnode *vp = ap->a_vp;
698         struct nfsnode *np = VTONFS(vp);
699         struct vattr *vap = ap->a_vap;
700         struct thread *td = curthread;
701         int error = 0;
702         u_quad_t tsize;
703 
704 #ifndef nolint
705         tsize = (u_quad_t)0;
706 #endif
707 
708         /*
709          * Setting of flags and marking of atimes are not supported.
710          */
711         if (vap->va_flags != VNOVAL || (vap->va_vaflags & VA_MARK_ATIME))
712                 return (EOPNOTSUPP);
713 
714         /*
715          * Disallow write attempts if the filesystem is mounted read-only.
716          */
717         if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
718             vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
719             vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
720             (vp->v_mount->mnt_flag & MNT_RDONLY)) {
721                 error = EROFS;
722                 goto out;
723         }
724         if (vap->va_size != VNOVAL) {
725                 switch (vp->v_type) {
726                 case VDIR:
727                         return (EISDIR);
728                 case VCHR:
729                 case VBLK:
730                 case VSOCK:
731                 case VFIFO:
732                         if (vap->va_mtime.tv_sec == VNOVAL &&
733                             vap->va_atime.tv_sec == VNOVAL &&
734                             vap->va_mode == (mode_t)VNOVAL &&
735                             vap->va_uid == (uid_t)VNOVAL &&
736                             vap->va_gid == (gid_t)VNOVAL)
737                                 return (0);             
738                         vap->va_size = VNOVAL;
739                         break;
740                 default:
741                         /*
742                          * Disallow write attempts if the filesystem is
743                          * mounted read-only.
744                          */
745                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
746                                 return (EROFS);
747                         /*
748                          *  We run vnode_pager_setsize() early (why?),
749                          * we must set np->n_size now to avoid vinvalbuf
750                          * V_SAVE races that might setsize a lower
751                          * value.
752                          */
753                         mtx_lock(&np->n_mtx);
754                         tsize = np->n_size;
755                         mtx_unlock(&np->n_mtx);
756                         error = nfs_meta_setsize(vp, ap->a_cred, td,
757                             vap->va_size);
758                         mtx_lock(&np->n_mtx);
759                         if (np->n_flag & NMODIFIED) {
760                             tsize = np->n_size;
761                             mtx_unlock(&np->n_mtx);
762                             if (vap->va_size == 0)
763                                 error = nfs_vinvalbuf(vp, 0, td, 1);
764                             else
765                                 error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
766                             if (error) {
767                                 vnode_pager_setsize(vp, tsize);
768                                 goto out;
769                             }
770                         } else
771                             mtx_unlock(&np->n_mtx);
772                         /*
773                          * np->n_size has already been set to vap->va_size
774                          * in nfs_meta_setsize(). We must set it again since
775                          * nfs_loadattrcache() could be called through
776                          * nfs_meta_setsize() and could modify np->n_size.
777                          */
778                         mtx_lock(&np->n_mtx);
779                         np->n_vattr.va_size = np->n_size = vap->va_size;
780                         mtx_unlock(&np->n_mtx);
781                 };
782         } else {
783                 mtx_lock(&np->n_mtx);
784                 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 
785                     (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
786                         mtx_unlock(&np->n_mtx);
787                         if ((error = nfs_vinvalbuf(vp, V_SAVE, td, 1)) != 0 &&
788                             (error == EINTR || error == EIO))
789                                 return error;
790                 } else
791                         mtx_unlock(&np->n_mtx);
792         }
793         error = nfs_setattrrpc(vp, vap, ap->a_cred);
794         if (error && vap->va_size != VNOVAL) {
795                 mtx_lock(&np->n_mtx);
796                 np->n_size = np->n_vattr.va_size = tsize;
797                 vnode_pager_setsize(vp, tsize);
798                 mtx_unlock(&np->n_mtx);
799         }
800 out:
801         return (error);
802 }
803 
804 /*
805  * Do an nfs setattr rpc.
806  */
807 static int
808 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred)
809 {
810         struct nfsv2_sattr *sp;
811         struct nfsnode *np = VTONFS(vp);
812         caddr_t bpos, dpos;
813         u_int32_t *tl;
814         int error = 0, wccflag = NFSV3_WCCRATTR;
815         struct mbuf *mreq, *mrep, *md, *mb;
816         int v3 = NFS_ISV3(vp);
817 
818         nfsstats.rpccnt[NFSPROC_SETATTR]++;
819         mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
820         mb = mreq;
821         bpos = mtod(mb, caddr_t);
822         nfsm_fhtom(vp, v3);
823         if (v3) {
824                 nfsm_v3attrbuild(vap, TRUE);
825                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
826                 *tl = nfs_false;
827         } else {
828                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
829                 if (vap->va_mode == (mode_t)VNOVAL)
830                         sp->sa_mode = nfs_xdrneg1;
831                 else
832                         sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
833                 if (vap->va_uid == (uid_t)VNOVAL)
834                         sp->sa_uid = nfs_xdrneg1;
835                 else
836                         sp->sa_uid = txdr_unsigned(vap->va_uid);
837                 if (vap->va_gid == (gid_t)VNOVAL)
838                         sp->sa_gid = nfs_xdrneg1;
839                 else
840                         sp->sa_gid = txdr_unsigned(vap->va_gid);
841                 sp->sa_size = txdr_unsigned(vap->va_size);
842                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
843                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
844         }
845         nfsm_request(vp, NFSPROC_SETATTR, curthread, cred);
846         if (v3) {
847                 np->n_modestamp = 0;
848                 nfsm_wcc_data(vp, wccflag);
849         } else
850                 nfsm_loadattr(vp, NULL);
851         m_freem(mrep);
852 nfsmout:
853         return (error);
854 }
855 
856 /*
857  * nfs lookup call, one step at a time...
858  * First look in cache
859  * If not found, unlock the directory nfsnode and do the rpc
860  */
861 static int
862 nfs_lookup(struct vop_lookup_args *ap)
863 {
864         struct componentname *cnp = ap->a_cnp;
865         struct vnode *dvp = ap->a_dvp;
866         struct vnode **vpp = ap->a_vpp;
867         int flags = cnp->cn_flags;
868         struct vnode *newvp;
869         struct nfsmount *nmp;
870         caddr_t bpos, dpos;
871         struct mbuf *mreq, *mrep, *md, *mb;
872         long len;
873         nfsfh_t *fhp;
874         struct nfsnode *np;
875         int error = 0, attrflag, fhsize;
876         int v3 = NFS_ISV3(dvp);
877         struct thread *td = cnp->cn_thread;
878         
879         *vpp = NULLVP;
880         if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
881             (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
882                 return (EROFS);
883         if (dvp->v_type != VDIR)
884                 return (ENOTDIR);
885         nmp = VFSTONFS(dvp->v_mount);
886         np = VTONFS(dvp);
887         if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) {
888                 *vpp = NULLVP;
889                 return (error);
890         }
891         error = cache_lookup(dvp, vpp, cnp);
892</