nfs_clvnops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      from nfs_vnops.c        8.16 (Berkeley) 5/27/95
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/9.0/sys/fs/nfsclient/nfs_clvnops.c 224532 2011-07-30 22:57:38Z rmacklem $");
   37 
   38 /*
   39  * vnode op calls for Sun NFS version 2, 3 and 4
   40  */
   41 
   42 #include "opt_kdtrace.h"
   43 #include "opt_inet.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/kernel.h>
   47 #include <sys/systm.h>
   48 #include <sys/resourcevar.h>
   49 #include <sys/proc.h>
   50 #include <sys/mount.h>
   51 #include <sys/bio.h>
   52 #include <sys/buf.h>
   53 #include <sys/jail.h>
   54 #include <sys/malloc.h>
   55 #include <sys/mbuf.h>
   56 #include <sys/namei.h>
   57 #include <sys/socket.h>
   58 #include <sys/vnode.h>
   59 #include <sys/dirent.h>
   60 #include <sys/fcntl.h>
   61 #include <sys/lockf.h>
   62 #include <sys/stat.h>
   63 #include <sys/sysctl.h>
   64 #include <sys/signalvar.h>
   65 
   66 #include <vm/vm.h>
   67 #include <vm/vm_extern.h>
   68 #include <vm/vm_object.h>
   69 
   70 #include <fs/nfs/nfsport.h>
   71 #include <fs/nfsclient/nfsnode.h>
   72 #include <fs/nfsclient/nfsmount.h>
   73 #include <fs/nfsclient/nfs.h>
   74 #include <fs/nfsclient/nfs_kdtrace.h>
   75 
   76 #include <net/if.h>
   77 #include <netinet/in.h>
   78 #include <netinet/in_var.h>
   79 
   80 #include <nfs/nfs_lock.h>
   81 
   82 #ifdef KDTRACE_HOOKS
   83 #include <sys/dtrace_bsd.h>
   84 
   85 dtrace_nfsclient_accesscache_flush_probe_func_t
   86                 dtrace_nfscl_accesscache_flush_done_probe;
   87 uint32_t        nfscl_accesscache_flush_done_id;
   88 
   89 dtrace_nfsclient_accesscache_get_probe_func_t
   90                 dtrace_nfscl_accesscache_get_hit_probe,
   91                 dtrace_nfscl_accesscache_get_miss_probe;
   92 uint32_t        nfscl_accesscache_get_hit_id;
   93 uint32_t        nfscl_accesscache_get_miss_id;
   94 
   95 dtrace_nfsclient_accesscache_load_probe_func_t
   96                 dtrace_nfscl_accesscache_load_done_probe;
   97 uint32_t        nfscl_accesscache_load_done_id;
   98 #endif /* !KDTRACE_HOOKS */
   99 
  100 /* Defs */
  101 #define TRUE    1
  102 #define FALSE   0
  103 
  104 extern struct nfsstats newnfsstats;
  105 extern int nfsrv_useacl;
  106 MALLOC_DECLARE(M_NEWNFSREQ);
  107 
  108 /*
  109  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
  110  * calls are not in getblk() and brelse() so that they would not be necessary
  111  * here.
  112  */
  113 #ifndef B_VMIO
  114 #define vfs_busy_pages(bp, f)
  115 #endif
  116 
  117 static vop_read_t       nfsfifo_read;
  118 static vop_write_t      nfsfifo_write;
  119 static vop_close_t      nfsfifo_close;
  120 static int      nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
  121                     struct thread *);
  122 static vop_lookup_t     nfs_lookup;
  123 static vop_create_t     nfs_create;
  124 static vop_mknod_t      nfs_mknod;
  125 static vop_open_t       nfs_open;
  126 static vop_pathconf_t   nfs_pathconf;
  127 static vop_close_t      nfs_close;
  128 static vop_access_t     nfs_access;
  129 static vop_getattr_t    nfs_getattr;
  130 static vop_setattr_t    nfs_setattr;
  131 static vop_read_t       nfs_read;
  132 static vop_fsync_t      nfs_fsync;
  133 static vop_remove_t     nfs_remove;
  134 static vop_link_t       nfs_link;
  135 static vop_rename_t     nfs_rename;
  136 static vop_mkdir_t      nfs_mkdir;
  137 static vop_rmdir_t      nfs_rmdir;
  138 static vop_symlink_t    nfs_symlink;
  139 static vop_readdir_t    nfs_readdir;
  140 static vop_strategy_t   nfs_strategy;
  141 static vop_lock1_t      nfs_lock1;
  142 static  int     nfs_lookitup(struct vnode *, char *, int,
  143                     struct ucred *, struct thread *, struct nfsnode **);
  144 static  int     nfs_sillyrename(struct vnode *, struct vnode *,
  145                     struct componentname *);
  146 static vop_access_t     nfsspec_access;
  147 static vop_readlink_t   nfs_readlink;
  148 static vop_print_t      nfs_print;
  149 static vop_advlock_t    nfs_advlock;
  150 static vop_advlockasync_t nfs_advlockasync;
  151 static vop_getacl_t nfs_getacl;
  152 static vop_setacl_t nfs_setacl;
  153 
  154 /*
  155  * Global vfs data structures for nfs
  156  */
  157 struct vop_vector newnfs_vnodeops = {
  158         .vop_default =          &default_vnodeops,
  159         .vop_access =           nfs_access,
  160         .vop_advlock =          nfs_advlock,
  161         .vop_advlockasync =     nfs_advlockasync,
  162         .vop_close =            nfs_close,
  163         .vop_create =           nfs_create,
  164         .vop_fsync =            nfs_fsync,
  165         .vop_getattr =          nfs_getattr,
  166         .vop_getpages =         ncl_getpages,
  167         .vop_putpages =         ncl_putpages,
  168         .vop_inactive =         ncl_inactive,
  169         .vop_link =             nfs_link,
  170         .vop_lock1 =            nfs_lock1,
  171         .vop_lookup =           nfs_lookup,
  172         .vop_mkdir =            nfs_mkdir,
  173         .vop_mknod =            nfs_mknod,
  174         .vop_open =             nfs_open,
  175         .vop_pathconf =         nfs_pathconf,
  176         .vop_print =            nfs_print,
  177         .vop_read =             nfs_read,
  178         .vop_readdir =          nfs_readdir,
  179         .vop_readlink =         nfs_readlink,
  180         .vop_reclaim =          ncl_reclaim,
  181         .vop_remove =           nfs_remove,
  182         .vop_rename =           nfs_rename,
  183         .vop_rmdir =            nfs_rmdir,
  184         .vop_setattr =          nfs_setattr,
  185         .vop_strategy =         nfs_strategy,
  186         .vop_symlink =          nfs_symlink,
  187         .vop_write =            ncl_write,
  188         .vop_getacl =           nfs_getacl,
  189         .vop_setacl =           nfs_setacl,
  190 };
  191 
  192 struct vop_vector newnfs_fifoops = {
  193         .vop_default =          &fifo_specops,
  194         .vop_access =           nfsspec_access,
  195         .vop_close =            nfsfifo_close,
  196         .vop_fsync =            nfs_fsync,
  197         .vop_getattr =          nfs_getattr,
  198         .vop_inactive =         ncl_inactive,
  199         .vop_print =            nfs_print,
  200         .vop_read =             nfsfifo_read,
  201         .vop_reclaim =          ncl_reclaim,
  202         .vop_setattr =          nfs_setattr,
  203         .vop_write =            nfsfifo_write,
  204 };
  205 
  206 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
  207     struct componentname *cnp, struct vattr *vap);
  208 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
  209     int namelen, struct ucred *cred, struct thread *td);
  210 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp,
  211     char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp,
  212     char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td);
  213 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp,
  214     struct componentname *scnp, struct sillyrename *sp);
  215 
  216 /*
  217  * Global variables
  218  */
  219 #define DIRHDSIZ        (sizeof (struct dirent) - (MAXNAMLEN + 1))
  220 
  221 SYSCTL_DECL(_vfs_nfs);
  222 
  223 static int      nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
  224 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
  225            &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
  226 
  227 static int      nfs_prime_access_cache = 0;
  228 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
  229            &nfs_prime_access_cache, 0,
  230            "Prime NFS ACCESS cache when fetching attributes");
  231 
  232 static int      newnfs_commit_on_close = 0;
  233 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW,
  234     &newnfs_commit_on_close, 0, "write+commit on close, else only write");
  235 
  236 static int      nfs_clean_pages_on_close = 1;
  237 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
  238            &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
  239 
  240 int newnfs_directio_enable = 0;
  241 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
  242            &newnfs_directio_enable, 0, "Enable NFS directio");
  243 
  244 /*
  245  * This sysctl allows other processes to mmap a file that has been opened
  246  * O_DIRECT by a process.  In general, having processes mmap the file while
  247  * Direct IO is in progress can lead to Data Inconsistencies.  But, we allow
  248  * this by default to prevent DoS attacks - to prevent a malicious user from
  249  * opening up files O_DIRECT preventing other users from mmap'ing these
  250  * files.  "Protected" environments where stricter consistency guarantees are
  251  * required can disable this knob.  The process that opened the file O_DIRECT
  252  * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
  253  * meaningful.
  254  */
  255 int newnfs_directio_allow_mmap = 1;
  256 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
  257            &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
  258 
  259 #if 0
  260 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
  261            &newnfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
  262 
  263 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
  264            &newnfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
  265 #endif
  266 
  267 #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY                \
  268                          | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \
  269                          | NFSACCESS_DELETE | NFSACCESS_LOOKUP)
  270 
  271 /*
  272  * SMP Locking Note :
  273  * The list of locks after the description of the lock is the ordering
  274  * of other locks acquired with the lock held.
  275  * np->n_mtx : Protects the fields in the nfsnode.
  276        VM Object Lock
  277        VI_MTX (acquired indirectly)
  278  * nmp->nm_mtx : Protects the fields in the nfsmount.
  279        rep->r_mtx
  280  * ncl_iod_mutex : Global lock, protects shared nfsiod state.
  281  * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
  282        nmp->nm_mtx
  283        rep->r_mtx
  284  * rep->r_mtx : Protects the fields in an nfsreq.
  285  */
  286 
  287 static int
  288 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td,
  289     struct ucred *cred, u_int32_t *retmode)
  290 {
  291         int error = 0, attrflag, i, lrupos;
  292         u_int32_t rmode;
  293         struct nfsnode *np = VTONFS(vp);
  294         struct nfsvattr nfsva;
  295 
  296         error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag,
  297             &rmode, NULL);
  298         if (attrflag)
  299                 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
  300         if (!error) {
  301                 lrupos = 0;
  302                 mtx_lock(&np->n_mtx);
  303                 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
  304                         if (np->n_accesscache[i].uid == cred->cr_uid) {
  305                                 np->n_accesscache[i].mode = rmode;
  306                                 np->n_accesscache[i].stamp = time_second;
  307                                 break;
  308                         }
  309                         if (i > 0 && np->n_accesscache[i].stamp <
  310                             np->n_accesscache[lrupos].stamp)
  311                                 lrupos = i;
  312                 }
  313                 if (i == NFS_ACCESSCACHESIZE) {
  314                         np->n_accesscache[lrupos].uid = cred->cr_uid;
  315                         np->n_accesscache[lrupos].mode = rmode;
  316                         np->n_accesscache[lrupos].stamp = time_second;
  317                 }
  318                 mtx_unlock(&np->n_mtx);
  319                 if (retmode != NULL)
  320                         *retmode = rmode;
  321                 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0);
  322         } else if (NFS_ISV4(vp)) {
  323                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
  324         }
  325 #ifdef KDTRACE_HOOKS
  326         if (error != 0)
  327                 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0,
  328                     error);
  329 #endif
  330         return (error);
  331 }
  332 
  333 /*
  334  * nfs access vnode op.
  335  * For nfs version 2, just return ok. File accesses may fail later.
  336  * For nfs version 3, use the access rpc to check accessibility. If file modes
  337  * are changed on the server, accesses might still fail later.
  338  */
  339 static int
  340 nfs_access(struct vop_access_args *ap)
  341 {
  342         struct vnode *vp = ap->a_vp;
  343         int error = 0, i, gotahit;
  344         u_int32_t mode, wmode, rmode;
  345         int v34 = NFS_ISV34(vp);
  346         struct nfsnode *np = VTONFS(vp);
  347 
  348         /*
  349          * Disallow write attempts on filesystems mounted read-only;
  350          * unless the file is a socket, fifo, or a block or character
  351          * device resident on the filesystem.
  352          */
  353         if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS |
  354             VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL |
  355             VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
  356                 switch (vp->v_type) {
  357                 case VREG:
  358                 case VDIR:
  359                 case VLNK:
  360                         return (EROFS);
  361                 default:
  362                         break;
  363                 }
  364         }
  365         /*
  366          * For nfs v3 or v4, check to see if we have done this recently, and if
  367          * so return our cached result instead of making an ACCESS call.
  368          * If not, do an access rpc, otherwise you are stuck emulating
  369          * ufs_access() locally using the vattr. This may not be correct,
  370          * since the server may apply other access criteria such as
  371          * client uid-->server uid mapping that we do not know about.
  372          */
  373         if (v34) {
  374                 if (ap->a_accmode & VREAD)
  375                         mode = NFSACCESS_READ;
  376                 else
  377                         mode = 0;
  378                 if (vp->v_type != VDIR) {
  379                         if (ap->a_accmode & VWRITE)
  380                                 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
  381                         if (ap->a_accmode & VAPPEND)
  382                                 mode |= NFSACCESS_EXTEND;
  383                         if (ap->a_accmode & VEXEC)
  384                                 mode |= NFSACCESS_EXECUTE;
  385                         if (ap->a_accmode & VDELETE)
  386                                 mode |= NFSACCESS_DELETE;
  387                 } else {
  388                         if (ap->a_accmode & VWRITE)
  389                                 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
  390                         if (ap->a_accmode & VAPPEND)
  391                                 mode |= NFSACCESS_EXTEND;
  392                         if (ap->a_accmode & VEXEC)
  393                                 mode |= NFSACCESS_LOOKUP;
  394                         if (ap->a_accmode & VDELETE)
  395                                 mode |= NFSACCESS_DELETE;
  396                         if (ap->a_accmode & VDELETE_CHILD)
  397                                 mode |= NFSACCESS_MODIFY;
  398                 }
  399                 /* XXX safety belt, only make blanket request if caching */
  400                 if (nfsaccess_cache_timeout > 0) {
  401                         wmode = NFSACCESS_READ | NFSACCESS_MODIFY |
  402                                 NFSACCESS_EXTEND | NFSACCESS_EXECUTE |
  403                                 NFSACCESS_DELETE | NFSACCESS_LOOKUP;
  404                 } else {
  405                         wmode = mode;
  406                 }
  407 
  408                 /*
  409                  * Does our cached result allow us to give a definite yes to
  410                  * this request?
  411                  */
  412                 gotahit = 0;
  413                 mtx_lock(&np->n_mtx);
  414                 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
  415                         if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) {
  416                             if (time_second < (np->n_accesscache[i].stamp
  417                                 + nfsaccess_cache_timeout) &&
  418                                 (np->n_accesscache[i].mode & mode) == mode) {
  419                                 NFSINCRGLOBAL(newnfsstats.accesscache_hits);
  420                                 gotahit = 1;
  421                             }
  422                             break;
  423                         }
  424                 }
  425                 mtx_unlock(&np->n_mtx);
  426 #ifdef KDTRACE_HOOKS
  427                 if (gotahit != 0)
  428                         KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp,
  429                             ap->a_cred->cr_uid, mode);
  430                 else
  431                         KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp,
  432                             ap->a_cred->cr_uid, mode);
  433 #endif
  434                 if (gotahit == 0) {
  435                         /*
  436                          * Either a no, or a don't know.  Go to the wire.
  437                          */
  438                         NFSINCRGLOBAL(newnfsstats.accesscache_misses);
  439                         error = nfs34_access_otw(vp, wmode, ap->a_td,
  440                             ap->a_cred, &rmode);
  441                         if (!error &&
  442                             (rmode & mode) != mode)
  443                                 error = EACCES;
  444                 }
  445                 return (error);
  446         } else {
  447                 if ((error = nfsspec_access(ap)) != 0) {
  448                         return (error);
  449                 }
  450                 /*
  451                  * Attempt to prevent a mapped root from accessing a file
  452                  * which it shouldn't.  We try to read a byte from the file
  453                  * if the user is root and the file is not zero length.
  454                  * After calling nfsspec_access, we should have the correct
  455                  * file size cached.
  456                  */
  457                 mtx_lock(&np->n_mtx);
  458                 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD)
  459                     && VTONFS(vp)->n_size > 0) {
  460                         struct iovec aiov;
  461                         struct uio auio;
  462                         char buf[1];
  463 
  464                         mtx_unlock(&np->n_mtx);
  465                         aiov.iov_base = buf;
  466                         aiov.iov_len = 1;
  467                         auio.uio_iov = &aiov;
  468                         auio.uio_iovcnt = 1;
  469                         auio.uio_offset = 0;
  470                         auio.uio_resid = 1;
  471                         auio.uio_segflg = UIO_SYSSPACE;
  472                         auio.uio_rw = UIO_READ;
  473                         auio.uio_td = ap->a_td;
  474 
  475                         if (vp->v_type == VREG)
  476                                 error = ncl_readrpc(vp, &auio, ap->a_cred);
  477                         else if (vp->v_type == VDIR) {
  478                                 char* bp;
  479                                 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
  480                                 aiov.iov_base = bp;
  481                                 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
  482                                 error = ncl_readdirrpc(vp, &auio, ap->a_cred,
  483                                     ap->a_td);
  484                                 free(bp, M_TEMP);
  485                         } else if (vp->v_type == VLNK)
  486                                 error = ncl_readlinkrpc(vp, &auio, ap->a_cred);
  487                         else
  488                                 error = EACCES;
  489                 } else
  490                         mtx_unlock(&np->n_mtx);
  491                 return (error);
  492         }
  493 }
  494 
  495 
  496 /*
  497  * nfs open vnode op
  498  * Check to see if the type is ok
  499  * and that deletion is not in progress.
  500  * For paged in text files, you will need to flush the page cache
  501  * if consistency is lost.
  502  */
  503 /* ARGSUSED */
  504 static int
  505 nfs_open(struct vop_open_args *ap)
  506 {
  507         struct vnode *vp = ap->a_vp;
  508         struct nfsnode *np = VTONFS(vp);
  509         struct vattr vattr;
  510         int error;
  511         int fmode = ap->a_mode;
  512 
  513         if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
  514                 return (EOPNOTSUPP);
  515 
  516         /*
  517          * For NFSv4, we need to do the Open Op before cache validation,
  518          * so that we conform to RFC3530 Sec. 9.3.1.
  519          */
  520         if (NFS_ISV4(vp)) {
  521                 error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td);
  522                 if (error) {
  523                         error = nfscl_maperr(ap->a_td, error, (uid_t)0,
  524                             (gid_t)0);
  525                         return (error);
  526                 }
  527         }
  528 
  529         /*
  530          * Now, if this Open will be doing reading, re-validate/flush the
  531          * cache, so that Close/Open coherency is maintained.
  532          */
  533         mtx_lock(&np->n_mtx);
  534         if (np->n_flag & NMODIFIED) {
  535                 mtx_unlock(&np->n_mtx);
  536                 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  537                 if (error == EINTR || error == EIO) {
  538                         if (NFS_ISV4(vp))
  539                                 (void) nfsrpc_close(vp, 0, ap->a_td);
  540                         return (error);
  541                 }
  542                 mtx_lock(&np->n_mtx);
  543                 np->n_attrstamp = 0;
  544                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
  545                 if (vp->v_type == VDIR)
  546                         np->n_direofoffset = 0;
  547                 mtx_unlock(&np->n_mtx);
  548                 error = VOP_GETATTR(vp, &vattr, ap->a_cred);
  549                 if (error) {
  550                         if (NFS_ISV4(vp))
  551                                 (void) nfsrpc_close(vp, 0, ap->a_td);
  552                         return (error);
  553                 }
  554                 mtx_lock(&np->n_mtx);
  555                 np->n_mtime = vattr.va_mtime;
  556                 if (NFS_ISV4(vp))
  557                         np->n_change = vattr.va_filerev;
  558         } else {
  559                 mtx_unlock(&np->n_mtx);
  560                 error = VOP_GETATTR(vp, &vattr, ap->a_cred);
  561                 if (error) {
  562                         if (NFS_ISV4(vp))
  563                                 (void) nfsrpc_close(vp, 0, ap->a_td);
  564                         return (error);
  565                 }
  566                 mtx_lock(&np->n_mtx);
  567                 if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) ||
  568                     NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
  569                         if (vp->v_type == VDIR)
  570                                 np->n_direofoffset = 0;
  571                         mtx_unlock(&np->n_mtx);
  572                         error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  573                         if (error == EINTR || error == EIO) {
  574                                 if (NFS_ISV4(vp))
  575                                         (void) nfsrpc_close(vp, 0, ap->a_td);
  576                                 return (error);
  577                         }
  578                         mtx_lock(&np->n_mtx);
  579                         np->n_mtime = vattr.va_mtime;
  580                         if (NFS_ISV4(vp))
  581                                 np->n_change = vattr.va_filerev;
  582                 }
  583         }
  584 
  585         /*
  586          * If the object has >= 1 O_DIRECT active opens, we disable caching.
  587          */
  588         if (newnfs_directio_enable && (fmode & O_DIRECT) &&
  589             (vp->v_type == VREG)) {
  590                 if (np->n_directio_opens == 0) {
  591                         mtx_unlock(&np->n_mtx);
  592                         error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  593                         if (error) {
  594                                 if (NFS_ISV4(vp))
  595                                         (void) nfsrpc_close(vp, 0, ap->a_td);
  596                                 return (error);
  597                         }
  598                         mtx_lock(&np->n_mtx);
  599                         np->n_flag |= NNONCACHE;
  600                 }
  601                 np->n_directio_opens++;
  602         }
  603         mtx_unlock(&np->n_mtx);
  604         vnode_create_vobject(vp, vattr.va_size, ap->a_td);
  605         return (0);
  606 }
  607 
  608 /*
  609  * nfs close vnode op
  610  * What an NFS client should do upon close after writing is a debatable issue.
  611  * Most NFS clients push delayed writes to the server upon close, basically for
  612  * two reasons:
  613  * 1 - So that any write errors may be reported back to the client process
  614  *     doing the close system call. By far the two most likely errors are
  615  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  616  * 2 - To put a worst case upper bound on cache inconsistency between
  617  *     multiple clients for the file.
  618  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  619  * not being able to tell if other clients are writing a file concurrently,
  620  * since there is no way of knowing if the changed modify time in the reply
  621  * is only due to the write for this client.
  622  * (NFS Version 3 provides weak cache consistency data in the reply that
  623  *  should be sufficient to detect and handle this case.)
  624  *
  625  * The current code does the following:
  626  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  627  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  628  *                     or commit them (this satisfies 1 and 2 except for the
  629  *                     case where the server crashes after this close but
  630  *                     before the commit RPC, which is felt to be "good
  631  *                     enough". Changing the last argument to ncl_flush() to
  632  *                     a 1 would force a commit operation, if it is felt a
  633  *                     commit is necessary now.
  634  * for NFS Version 4 - flush the dirty buffers and commit them, if
  635  *                     nfscl_mustflush() says this is necessary.
  636  *                     It is necessary if there is no write delegation held,
  637  *                     in order to satisfy open/close coherency.
  638  *                     If the file isn't cached on local stable storage,
  639  *                     it may be necessary in order to detect "out of space"
  640  *                     errors from the server, if the write delegation
  641  *                     issued by the server doesn't allow the file to grow.
  642  */
  643 /* ARGSUSED */
  644 static int
  645 nfs_close(struct vop_close_args *ap)
  646 {
  647         struct vnode *vp = ap->a_vp;
  648         struct nfsnode *np = VTONFS(vp);
  649         struct nfsvattr nfsva;
  650         struct ucred *cred;
  651         int error = 0, ret, localcred = 0;
  652         int fmode = ap->a_fflag;
  653 
  654         if ((vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF))
  655                 return (0);
  656         /*
  657          * During shutdown, a_cred isn't valid, so just use root.
  658          */
  659         if (ap->a_cred == NOCRED) {
  660                 cred = newnfs_getcred();
  661                 localcred = 1;
  662         } else {
  663                 cred = ap->a_cred;
  664         }
  665         if (vp->v_type == VREG) {
  666             /*
  667              * Examine and clean dirty pages, regardless of NMODIFIED.
  668              * This closes a major hole in close-to-open consistency.
  669              * We want to push out all dirty pages (and buffers) on
  670              * close, regardless of whether they were dirtied by
  671              * mmap'ed writes or via write().
  672              */
  673             if (nfs_clean_pages_on_close && vp->v_object) {
  674                 VM_OBJECT_LOCK(vp->v_object);
  675                 vm_object_page_clean(vp->v_object, 0, 0, 0);
  676                 VM_OBJECT_UNLOCK(vp->v_object);
  677             }
  678             mtx_lock(&np->n_mtx);
  679             if (np->n_flag & NMODIFIED) {
  680                 mtx_unlock(&np->n_mtx);
  681                 if (NFS_ISV3(vp)) {
  682                     /*
  683                      * Under NFSv3 we have dirty buffers to dispose of.  We
  684                      * must flush them to the NFS server.  We have the option
  685                      * of waiting all the way through the commit rpc or just
  686                      * waiting for the initial write.  The default is to only
  687                      * wait through the initial write so the data is in the
  688                      * server's cache, which is roughly similar to the state
  689                      * a standard disk subsystem leaves the file in on close().
  690                      *
  691                      * We cannot clear the NMODIFIED bit in np->n_flag due to
  692                      * potential races with other processes, and certainly
  693                      * cannot clear it if we don't commit.
  694                      * These races occur when there is no longer the old
  695                      * traditional vnode locking implemented for Vnode Ops.
  696                      */
  697                     int cm = newnfs_commit_on_close ? 1 : 0;
  698                     error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0);
  699                     /* np->n_flag &= ~NMODIFIED; */
  700                 } else if (NFS_ISV4(vp)) { 
  701                         if (nfscl_mustflush(vp) != 0) {
  702                                 int cm = newnfs_commit_on_close ? 1 : 0;
  703                                 error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td,
  704                                     cm, 0);
  705                                 /*
  706                                  * as above w.r.t races when clearing
  707                                  * NMODIFIED.
  708                                  * np->n_flag &= ~NMODIFIED;
  709                                  */
  710                         }
  711                 } else
  712                     error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  713                 mtx_lock(&np->n_mtx);
  714             }
  715             /* 
  716              * Invalidate the attribute cache in all cases.
  717              * An open is going to fetch fresh attrs any way, other procs
  718              * on this node that have file open will be forced to do an 
  719              * otw attr fetch, but this is safe.
  720              * --> A user found that their RPC count dropped by 20% when
  721              *     this was commented out and I can't see any requirement
  722              *     for it, so I've disabled it when negative lookups are
  723              *     enabled. (What does this have to do with negative lookup
  724              *     caching? Well nothing, except it was reported by the
  725              *     same user that needed negative lookup caching and I wanted
  726              *     there to be a way to disable it to see if it
  727              *     is the cause of some caching/coherency issue that might
  728              *     crop up.)
  729              */
  730             if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) {
  731                     np->n_attrstamp = 0;
  732                     KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
  733             }
  734             if (np->n_flag & NWRITEERR) {
  735                 np->n_flag &= ~NWRITEERR;
  736                 error = np->n_error;
  737             }
  738             mtx_unlock(&np->n_mtx);
  739         }
  740 
  741         if (NFS_ISV4(vp)) {
  742                 /*
  743                  * Get attributes so "change" is up to date.
  744                  */
  745                 if (error == 0 && nfscl_mustflush(vp) != 0) {
  746                         ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva,
  747                             NULL);
  748                         if (!ret) {
  749                                 np->n_change = nfsva.na_filerev;
  750                                 (void) nfscl_loadattrcache(&vp, &nfsva, NULL,
  751                                     NULL, 0, 0);
  752                         }
  753                 }
  754 
  755                 /*
  756                  * and do the close.
  757                  */
  758                 ret = nfsrpc_close(vp, 0, ap->a_td);
  759                 if (!error && ret)
  760                         error = ret;
  761                 if (error)
  762                         error = nfscl_maperr(ap->a_td, error, (uid_t)0,
  763                             (gid_t)0);
  764         }
  765         if (newnfs_directio_enable)
  766                 KASSERT((np->n_directio_asyncwr == 0),
  767                         ("nfs_close: dirty unflushed (%d) directio buffers\n",
  768                          np->n_directio_asyncwr));
  769         if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
  770                 mtx_lock(&np->n_mtx);
  771                 KASSERT((np->n_directio_opens > 0), 
  772                         ("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
  773                 np->n_directio_opens--;
  774                 if (np->n_directio_opens == 0)
  775                         np->n_flag &= ~NNONCACHE;
  776                 mtx_unlock(&np->n_mtx);
  777         }
  778         if (localcred)
  779                 NFSFREECRED(cred);
  780         return (error);
  781 }
  782 
  783 /*
  784  * nfs getattr call from vfs.
  785  */
  786 static int
  787 nfs_getattr(struct vop_getattr_args *ap)
  788 {
  789         struct vnode *vp = ap->a_vp;
  790         struct thread *td = curthread;  /* XXX */
  791         struct nfsnode *np = VTONFS(vp);
  792         int error = 0;
  793         struct nfsvattr nfsva;
  794         struct vattr *vap = ap->a_vap;
  795         struct vattr vattr;
  796 
  797         /*
  798          * Update local times for special files.
  799          */
  800         mtx_lock(&np->n_mtx);
  801         if (np->n_flag & (NACC | NUPD))
  802                 np->n_flag |= NCHG;
  803         mtx_unlock(&np->n_mtx);
  804         /*
  805          * First look in the cache.
  806          */
  807         if (ncl_getattrcache(vp, &vattr) == 0) {
  808                 vap->va_type = vattr.va_type;
  809                 vap->va_mode = vattr.va_mode;
  810                 vap->va_nlink = vattr.va_nlink;
  811                 vap->va_uid = vattr.va_uid;
  812                 vap->va_gid = vattr.va_gid;
  813                 vap->va_fsid = vattr.va_fsid;
  814                 vap->va_fileid = vattr.va_fileid;
  815                 vap->va_size = vattr.va_size;
  816                 vap->va_blocksize = vattr.va_blocksize;
  817                 vap->va_atime = vattr.va_atime;
  818                 vap->va_mtime = vattr.va_mtime;
  819                 vap->va_ctime = vattr.va_ctime;
  820                 vap->va_gen = vattr.va_gen;
  821                 vap->va_flags = vattr.va_flags;
  822                 vap->va_rdev = vattr.va_rdev;
  823                 vap->va_bytes = vattr.va_bytes;
  824                 vap->va_filerev = vattr.va_filerev;
  825                 /*
  826                  * Get the local modify time for the case of a write
  827                  * delegation.
  828                  */
  829                 nfscl_deleggetmodtime(vp, &vap->va_mtime);
  830                 return (0);
  831         }
  832 
  833         if (NFS_ISV34(vp) && nfs_prime_access_cache &&
  834             nfsaccess_cache_timeout > 0) {
  835                 NFSINCRGLOBAL(newnfsstats.accesscache_misses);
  836                 nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL);
  837                 if (ncl_getattrcache(vp, ap->a_vap) == 0) {
  838                         nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime);
  839                         return (0);
  840                 }
  841         }
  842         error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL);
  843         if (!error)
  844                 error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0);
  845         if (!error) {
  846                 /*
  847                  * Get the local modify time for the case of a write
  848                  * delegation.
  849                  */
  850                 nfscl_deleggetmodtime(vp, &vap->va_mtime);
  851         } else if (NFS_ISV4(vp)) {
  852                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
  853         }
  854         return (error);
  855 }
  856 
  857 /*
  858  * nfs setattr call.
  859  */
  860 static int
  861 nfs_setattr(struct vop_setattr_args *ap)
  862 {
  863         struct vnode *vp = ap->a_vp;
  864         struct nfsnode *np = VTONFS(vp);
  865         struct thread *td = curthread;  /* XXX */
  866         struct vattr *vap = ap->a_vap;
  867         int error = 0;
  868         u_quad_t tsize;
  869 
  870 #ifndef nolint
  871         tsize = (u_quad_t)0;
  872 #endif
  873 
  874         /*
  875          * Setting of flags and marking of atimes are not supported.
  876          */
  877         if (vap->va_flags != VNOVAL)
  878                 return (EOPNOTSUPP);
  879 
  880         /*
  881          * Disallow write attempts if the filesystem is mounted read-only.
  882          */
  883         if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
  884             vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
  885             vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
  886             (vp->v_mount->mnt_flag & MNT_RDONLY))
  887                 return (EROFS);
  888         if (vap->va_size != VNOVAL) {
  889                 switch (vp->v_type) {
  890                 case VDIR:
  891                         return (EISDIR);
  892                 case VCHR:
  893                 case VBLK:
  894                 case VSOCK:
  895                 case VFIFO:
  896                         if (vap->va_mtime.tv_sec == VNOVAL &&
  897                             vap->va_atime.tv_sec == VNOVAL &&
  898                             vap->va_mode == (mode_t)VNOVAL &&
  899                             vap->va_uid == (uid_t)VNOVAL &&
  900                             vap->va_gid == (gid_t)VNOVAL)
  901                                 return (0);             
  902                         vap->va_size = VNOVAL;
  903                         break;
  904                 default:
  905                         /*
  906                          * Disallow write attempts if the filesystem is
  907                          * mounted read-only.
  908                          */
  909                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
  910                                 return (EROFS);
  911                         /*
  912                          *  We run vnode_pager_setsize() early (why?),
  913                          * we must set np->n_size now to avoid vinvalbuf
  914                          * V_SAVE races that might setsize a lower
  915                          * value.
  916                          */
  917                         mtx_lock(&np->n_mtx);
  918                         tsize = np->n_size;
  919                         mtx_unlock(&np->n_mtx);
  920                         error = ncl_meta_setsize(vp, ap->a_cred, td,
  921                             vap->va_size);
  922                         mtx_lock(&np->n_mtx);
  923                         if (np->n_flag & NMODIFIED) {
  924                             tsize = np->n_size;
  925                             mtx_unlock(&np->n_mtx);
  926                             if (vap->va_size == 0)
  927                                 error = ncl_vinvalbuf(vp, 0, td, 1);
  928                             else
  929                                 error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
  930                             if (error) {
  931                                 vnode_pager_setsize(vp, tsize);
  932                                 return (error);
  933                             }
  934                             /*
  935                              * Call nfscl_delegmodtime() to set the modify time
  936                              * locally, as required.
  937                              */
  938                             nfscl_delegmodtime(vp);
  939                         } else
  940                             mtx_unlock(&np->n_mtx);
  941                         /*
  942                          * np->n_size has already been set to vap->va_size
  943                          * in ncl_meta_setsize(). We must set it again since
  944                          * nfs_loadattrcache() could be called through
  945                          * ncl_meta_setsize() and could modify np->n_size.
  946                          */
  947                         mtx_lock(&np->n_mtx);
  948                         np->n_vattr.na_size = np->n_size = vap->va_size;
  949                         mtx_unlock(&np->n_mtx);
  950                 };
  951         } else {
  952                 mtx_lock(&np->n_mtx);
  953                 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 
  954                     (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
  955                         mtx_unlock(&np->n_mtx);
  956                         if ((error = ncl_vinvalbuf(vp, V_SAVE, td, 1)) != 0 &&
  957                             (error == EINTR || error == EIO))
  958                                 return (error);
  959                 } else
  960                         mtx_unlock(&np->n_mtx);
  961         }
  962         error = nfs_setattrrpc(vp, vap, ap->a_cred, td);
  963         if (error && vap->va_size != VNOVAL) {
  964                 mtx_lock(&np->n_mtx);
  965                 np->n_size = np->n_vattr.na_size = tsize;
  966                 vnode_pager_setsize(vp, tsize);
  967                 mtx_unlock(&np->n_mtx);
  968         }
  969         return (error);
  970 }
  971 
  972 /*
  973  * Do an nfs setattr rpc.
  974  */
  975 static int
  976 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
  977     struct thread *td)
  978 {
  979         struct nfsnode *np = VTONFS(vp);
  980         int error, ret, attrflag, i;
  981         struct nfsvattr nfsva;
  982 
  983         if (NFS_ISV34(vp)) {
  984                 mtx_lock(&np->n_mtx);
  985                 for (i = 0; i < NFS_ACCESSCACHESIZE; i++)
  986                         np->n_accesscache[i].stamp = 0;
  987                 np->n_flag |= NDELEGMOD;
  988                 mtx_unlock(&np->n_mtx);
  989                 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp);
  990         }
  991         error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag,
  992             NULL);
  993         if (attrflag) {
  994                 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
  995                 if (ret && !error)
  996                         error = ret;
  997         }
  998         if (error && NFS_ISV4(vp))
  999                 error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid);
 1000         return (error);
 1001 }
 1002 
 1003 /*
 1004  * nfs lookup call, one step at a time...
 1005  * First look in cache
 1006  * If not found, unlock the directory nfsnode and do the rpc
 1007  */
 1008 static int
 1009 nfs_lookup(struct vop_lookup_args *ap)
 1010 {
 1011         struct componentname *cnp = ap->a_cnp;
 1012         struct vnode *dvp = ap->a_dvp;
 1013         struct vnode **vpp = ap->a_vpp;
 1014         struct mount *mp = dvp->v_mount;
 1015         int flags = cnp->cn_flags;
 1016         struct vnode *newvp;
 1017         struct nfsmount *nmp;
 1018         struct nfsnode *np, *newnp;
 1019         int error = 0, attrflag, dattrflag, ltype;
 1020         struct thread *td = cnp->cn_thread;
 1021         struct nfsfh *nfhp;
 1022         struct nfsvattr dnfsva, nfsva;
 1023         struct vattr vattr;
 1024         struct timespec dmtime;
 1025         
 1026         *vpp = NULLVP;
 1027         if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
 1028             (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 1029                 return (EROFS);
 1030         if (dvp->v_type != VDIR)
 1031                 return (ENOTDIR);
 1032         nmp = VFSTONFS(mp);
 1033         np = VTONFS(dvp);
 1034 
 1035         /* For NFSv4, wait until any remove is done. */
 1036         mtx_lock(&np->n_mtx);
 1037         while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) {
 1038                 np->n_flag |= NREMOVEWANT;
 1039                 (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0);
 1040         }
 1041         mtx_unlock(&np->n_mtx);
 1042 
 1043         if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0)
 1044                 return (error);
 1045         error = cache_lookup(dvp, vpp, cnp);
 1046         if (error > 0 && error != ENOENT)
 1047                 return (error);
 1048         if (error == -1) {
 1049                 /*
 1050                  * We only accept a positive hit in the cache if the
 1051                  * change time of the file matches our cached copy.
 1052                  * Otherwise, we discard the cache entry and fallback
 1053                  * to doing a lookup RPC.
 1054                  *
 1055                  * To better handle stale file handles and attributes,
 1056                  * clear the attribute cache of this node if it is a
 1057                  * leaf component, part of an open() call, and not
 1058                  * locally modified before fetching the attributes.
 1059                  * This should allow stale file handles to be detected
 1060                  * here where we can fall back to a LOOKUP RPC to
 1061                  * recover rather than having nfs_open() detect the
 1062                  * stale file handle and failing open(2) with ESTALE.
 1063                  */
 1064                 newvp = *vpp;
 1065                 newnp = VTONFS(newvp);
 1066                 if (!(nmp->nm_flag & NFSMNT_NOCTO) &&
 1067                     (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
 1068                     !(newnp->n_flag & NMODIFIED)) {
 1069                         mtx_lock(&newnp->n_mtx);
 1070                         newnp->n_attrstamp = 0;
 1071                         KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
 1072                         mtx_unlock(&newnp->n_mtx);
 1073                 }
 1074                 if (nfscl_nodeleg(newvp, 0) == 0 ||
 1075                     (VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
 1076                     timespeccmp(&vattr.va_ctime, &newnp->n_ctime, ==))) {
 1077                         NFSINCRGLOBAL(newnfsstats.lookupcache_hits);
 1078                         if (cnp->cn_nameiop != LOOKUP &&
 1079                             (flags & ISLASTCN))
 1080                                 cnp->cn_flags |= SAVENAME;
 1081                         return (0);
 1082                 }
 1083                 cache_purge(newvp);
 1084                 if (dvp != newvp)
 1085                         vput(newvp);
 1086                 else 
 1087                         vrele(newvp);
 1088                 *vpp = NULLVP;
 1089         } else if (error == ENOENT) {
 1090                 if (dvp->v_iflag & VI_DOOMED)
 1091                         return (ENOENT);
 1092                 /*
 1093                  * We only accept a negative hit in the cache if the
 1094                  * modification time of the parent directory matches
 1095                  * our cached copy.  Otherwise, we discard all of the
 1096                  * negative cache entries for this directory. We also
 1097                  * only trust -ve cache entries for less than
 1098                  * nm_negative_namecache_timeout seconds.
 1099                  */
 1100                 if ((u_int)(ticks - np->n_dmtime_ticks) <
 1101                     (nmp->nm_negnametimeo * hz) &&
 1102                     VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
 1103                     timespeccmp(&vattr.va_mtime, &np->n_dmtime, ==)) {
 1104                         NFSINCRGLOBAL(newnfsstats.lookupcache_hits);
 1105                         return (ENOENT);
 1106                 }
 1107                 cache_purge_negative(dvp);
 1108                 mtx_lock(&np->n_mtx);
 1109                 timespecclear(&np->n_dmtime);
 1110                 mtx_unlock(&np->n_mtx);
 1111         }
 1112 
 1113         /*
 1114          * Cache the modification time of the parent directory in case
 1115          * the lookup fails and results in adding the first negative
 1116          * name cache entry for the directory.  Since this is reading
 1117          * a single time_t, don't bother with locking.  The
 1118          * modification time may be a bit stale, but it must be read
 1119          * before performing the lookup RPC to prevent a race where
 1120          * another lookup updates the timestamp on the directory after
 1121          * the lookup RPC has been performed on the server but before
 1122          * n_dmtime is set at the end of this function.
 1123          */
 1124         dmtime = np->n_vattr.na_mtime;
 1125         error = 0;
 1126         newvp = NULLVP;
 1127         NFSINCRGLOBAL(newnfsstats.lookupcache_misses);
 1128         error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 1129             cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 1130             NULL);
 1131         if (dattrflag)
 1132                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 1133         if (error) {
 1134                 if (newvp != NULLVP) {
 1135                         vput(newvp);
 1136                         *vpp = NULLVP;
 1137                 }
 1138 
 1139                 if (error != ENOENT) {
 1140                         if (NFS_ISV4(dvp))
 1141                                 error = nfscl_maperr(td, error, (uid_t)0,
 1142                                     (gid_t)0);
 1143                         return (error);
 1144                 }
 1145 
 1146                 /* The requested file was not found. */
 1147                 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 1148                     (flags & ISLASTCN)) {
 1149                         /*
 1150                          * XXX: UFS does a full VOP_ACCESS(dvp,
 1151                          * VWRITE) here instead of just checking
 1152                          * MNT_RDONLY.
 1153                          */
 1154                         if (mp->mnt_flag & MNT_RDONLY)
 1155                                 return (EROFS);
 1156                         cnp->cn_flags |= SAVENAME;
 1157                         return (EJUSTRETURN);
 1158                 }
 1159 
 1160                 if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) {
 1161                         /*
 1162                          * Maintain n_dmtime as the modification time
 1163                          * of the parent directory when the oldest -ve
 1164                          * name cache entry for this directory was
 1165                          * added.  If a -ve cache entry has already
 1166                          * been added with a newer modification time
 1167                          * by a concurrent lookup, then don't bother
 1168                          * adding a cache entry.  The modification
 1169                          * time of the directory might have changed
 1170                          * due to the file this lookup failed to find
 1171                          * being created.  In that case a subsequent
 1172                          * lookup would incorrectly use the entry
 1173                          * added here instead of doing an extra
 1174                          * lookup.
 1175                          */
 1176                         mtx_lock(&np->n_mtx);
 1177                         if (timespeccmp(&np->n_dmtime, &dmtime, <=)) {
 1178                                 if (!timespecisset(&np->n_dmtime)) {
 1179                                         np->n_dmtime = dmtime;
 1180                                         np->n_dmtime_ticks = ticks;
 1181                                 }
 1182                                 mtx_unlock(&np->n_mtx);
 1183                                 cache_enter(dvp, NULL, cnp);
 1184                         } else
 1185                                 mtx_unlock(&np->n_mtx);
 1186                 }
 1187                 return (ENOENT);
 1188         }
 1189 
 1190         /*
 1191          * Handle RENAME case...
 1192          */
 1193         if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
 1194                 if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
 1195                         FREE((caddr_t)nfhp, M_NFSFH);
 1196                         return (EISDIR);
 1197                 }
 1198                 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
 1199                     LK_EXCLUSIVE);
 1200                 if (error)
 1201                         return (error);
 1202                 newvp = NFSTOV(np);
 1203                 if (attrflag)
 1204                         (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 1205                             0, 1);
 1206                 *vpp = newvp;
 1207                 cnp->cn_flags |= SAVENAME;
 1208                 return (0);
 1209         }
 1210 
 1211         if (flags & ISDOTDOT) {
 1212                 ltype = NFSVOPISLOCKED(dvp);
 1213                 error = vfs_busy(mp, MBF_NOWAIT);
 1214                 if (error != 0) {
 1215                         vfs_ref(mp);
 1216                         NFSVOPUNLOCK(dvp, 0);
 1217                         error = vfs_busy(mp, 0);
 1218                         NFSVOPLOCK(dvp, ltype | LK_RETRY);
 1219                         vfs_rel(mp);
 1220                         if (error == 0 && (dvp->v_iflag & VI_DOOMED)) {
 1221                                 vfs_unbusy(mp);
 1222                                 error = ENOENT;
 1223                         }
 1224                         if (error != 0)
 1225                                 return (error);
 1226                 }
 1227                 NFSVOPUNLOCK(dvp, 0);
 1228                 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
 1229                     cnp->cn_lkflags);
 1230                 if (error == 0)
 1231                         newvp = NFSTOV(np);
 1232                 vfs_unbusy(mp);
 1233                 if (newvp != dvp)
 1234                         NFSVOPLOCK(dvp, ltype | LK_RETRY);
 1235                 if (dvp->v_iflag & VI_DOOMED) {
 1236                         if (error == 0) {
 1237                                 if (newvp == dvp)
 1238                                         vrele(newvp);
 1239                                 else
 1240                                         vput(newvp);
 1241                         }
 1242                         error = ENOENT;
 1243                 }
 1244                 if (error != 0)
 1245                         return (error);
 1246                 if (attrflag)
 1247                         (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 1248                             0, 1);
 1249         } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
 1250                 FREE((caddr_t)nfhp, M_NFSFH);
 1251                 VREF(dvp);
 1252                 newvp = dvp;
 1253                 if (attrflag)
 1254                         (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 1255                             0, 1);
 1256         } else {
 1257                 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
 1258                     cnp->cn_lkflags);
 1259                 if (error)
 1260                         return (error);
 1261                 newvp = NFSTOV(np);
 1262                 if (attrflag)
 1263                         (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 1264                             0, 1);
 1265                 else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
 1266                     !(np->n_flag & NMODIFIED)) {                        
 1267                         /*
 1268                          * Flush the attribute cache when opening a
 1269                          * leaf node to ensure that fresh attributes
 1270                          * are fetched in nfs_open() since we did not
 1271                          * fetch attributes from the LOOKUP reply.
 1272                          */
 1273                         mtx_lock(&np->n_mtx);
 1274                         np->n_attrstamp = 0;
 1275                         KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
 1276                         mtx_unlock(&np->n_mtx);
 1277                 }
 1278         }
 1279         if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 1280                 cnp->cn_flags |= SAVENAME;
 1281         if ((cnp->cn_flags & MAKEENTRY) &&
 1282             (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
 1283                 np->n_ctime = np->n_vattr.na_vattr.va_ctime;
 1284                 cache_enter(dvp, newvp, cnp);
 1285         }
 1286         *vpp = newvp;
 1287         return (0);
 1288 }
 1289 
 1290 /*
 1291  * nfs read call.
 1292  * Just call ncl_bioread() to do the work.
 1293  */
 1294 static int
 1295 nfs_read(struct vop_read_args *ap)
 1296 {
 1297         struct vnode *vp = ap->a_vp;
 1298 
 1299         switch (vp->v_type) {
 1300         case VREG:
 1301                 return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
 1302         case VDIR:
 1303                 return (EISDIR);
 1304         default:
 1305                 return (EOPNOTSUPP);
 1306         }
 1307 }
 1308 
 1309 /*
 1310  * nfs readlink call
 1311  */
 1312 static int
 1313 nfs_readlink(struct vop_readlink_args *ap)
 1314 {
 1315         struct vnode *vp = ap->a_vp;
 1316 
 1317         if (vp->v_type != VLNK)
 1318                 return (EINVAL);
 1319         return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred));
 1320 }
 1321 
 1322 /*
 1323  * Do a readlink rpc.
 1324  * Called by ncl_doio() from below the buffer cache.
 1325  */
 1326 int
 1327 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 1328 {
 1329         int error, ret, attrflag;
 1330         struct nfsvattr nfsva;
 1331 
 1332         error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva,
 1333             &attrflag, NULL);
 1334         if (attrflag) {
 1335                 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 1336                 if (ret && !error)
 1337                         error = ret;
 1338         }
 1339         if (error && NFS_ISV4(vp))
 1340                 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 1341         return (error);
 1342 }
 1343 
 1344 /*
 1345  * nfs read rpc call
 1346  * Ditto above
 1347  */
 1348 int
 1349 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 1350 {
 1351         int error, ret, attrflag;
 1352         struct nfsvattr nfsva;
 1353 
 1354         error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva, &attrflag,
 1355             NULL);
 1356         if (attrflag) {
 1357                 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 1358                 if (ret && !error)
 1359                         error = ret;
 1360         }
 1361         if (error && NFS_ISV4(vp))
 1362                 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 1363         return (error);
 1364 }
 1365 
 1366 /*
 1367  * nfs write call
 1368  */
 1369 int
 1370 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
 1371     int *iomode, int *must_commit, int called_from_strategy)
 1372 {
 1373         struct nfsvattr nfsva;
 1374         int error = 0, attrflag, ret;
 1375 
 1376         error = nfsrpc_write(vp, uiop, iomode, must_commit, cred,
 1377             uiop->uio_td, &nfsva, &attrflag, NULL, called_from_strategy);
 1378         if (attrflag) {
 1379                 if (VTONFS(vp)->n_flag & ND_NFSV4)
 1380                         ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1,
 1381                             1);
 1382                 else
 1383                         ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
 1384                             1);
 1385                 if (ret && !error)
 1386                         error = ret;
 1387         }
 1388         if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC)
 1389                 *iomode = NFSWRITE_FILESYNC;
 1390         if (error && NFS_ISV4(vp))
 1391                 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 1392         return (error);
 1393 }
 1394 
 1395 /*
 1396  * nfs mknod rpc
 1397  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
 1398  * mode set to specify the file type and the size field for rdev.
 1399  */
 1400 static int
 1401 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 1402     struct vattr *vap)
 1403 {
 1404         struct nfsvattr nfsva, dnfsva;
 1405         struct vnode *newvp = NULL;
 1406         struct nfsnode *np = NULL, *dnp;
 1407         struct nfsfh *nfhp;
 1408         struct vattr vattr;
 1409         int error = 0, attrflag, dattrflag;
 1410         u_int32_t rdev;
 1411 
 1412         if (vap->va_type == VCHR || vap->va_type == VBLK)
 1413                 rdev = vap->va_rdev;
 1414         else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 1415                 rdev = 0xffffffff;
 1416         else
 1417                 return (EOPNOTSUPP);
 1418         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
 1419                 return (error);
 1420         error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap,
 1421             rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva,
 1422             &nfsva, &nfhp, &attrflag, &dattrflag, NULL);
 1423         if (!error) {
 1424                 if (!nfhp)
 1425                         (void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
 1426                             cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread,
 1427                             &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 1428                             NULL);
 1429                 if (nfhp)
 1430                         error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
 1431                             cnp->cn_thread, &np, NULL, LK_EXCLUSIVE);
 1432         }
 1433         if (dattrflag)
 1434                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 1435         if (!error) {
 1436                 newvp = NFSTOV(np);
 1437                 if (attrflag != 0) {
 1438                         error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 1439                             0, 1);
 1440                         if (error != 0)
 1441                                 vput(newvp);
 1442                 }
 1443         }
 1444         if (!error) {
 1445                 if ((cnp->cn_flags & MAKEENTRY))
 1446                         cache_enter(dvp, newvp, cnp);
 1447                 *vpp = newvp;
 1448         } else if (NFS_ISV4(dvp)) {
 1449                 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid,
 1450                     vap->va_gid);
 1451         }
 1452         dnp = VTONFS(dvp);
 1453         mtx_lock(&dnp->n_mtx);
 1454         dnp->n_flag |= NMODIFIED;
 1455         if (!dattrflag) {
 1456                 dnp->n_attrstamp = 0;
 1457                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 1458         }
 1459         mtx_unlock(&dnp->n_mtx);
 1460         return (error);
 1461 }
 1462 
 1463 /*
 1464  * nfs mknod vop
 1465  * just call nfs_mknodrpc() to do the work.
 1466  */
 1467 /* ARGSUSED */
 1468 static int
 1469 nfs_mknod(struct vop_mknod_args *ap)
 1470 {
 1471         return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
 1472 }
 1473 
 1474 static struct mtx nfs_cverf_mtx;
 1475 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex",
 1476     MTX_DEF);
 1477 
 1478 static nfsquad_t
 1479 nfs_get_cverf(void)
 1480 {
 1481         static nfsquad_t cverf;
 1482         nfsquad_t ret;
 1483         static int cverf_initialized = 0;
 1484 
 1485         mtx_lock(&nfs_cverf_mtx);
 1486         if (cverf_initialized == 0) {
 1487                 cverf.lval[0] = arc4random();
 1488                 cverf.lval[1] = arc4random();
 1489                 cverf_initialized = 1;
 1490         } else
 1491                 cverf.qval++;
 1492         ret = cverf;
 1493         mtx_unlock(&nfs_cverf_mtx);
 1494 
 1495         return (ret);
 1496 }
 1497 
 1498 /*
 1499  * nfs file create call
 1500  */
 1501 static int
 1502 nfs_create(struct vop_create_args *ap)
 1503 {
 1504         struct vnode *dvp = ap->a_dvp;
 1505         struct vattr *vap = ap->a_vap;
 1506         struct componentname *cnp = ap->a_cnp;
 1507         struct nfsnode *np = NULL, *dnp;
 1508         struct vnode *newvp = NULL;
 1509         struct nfsmount *nmp;
 1510         struct nfsvattr dnfsva, nfsva;
 1511         struct nfsfh *nfhp;
 1512         nfsquad_t cverf;
 1513         int error = 0, attrflag, dattrflag, fmode = 0;
 1514         struct vattr vattr;
 1515 
 1516         /*
 1517          * Oops, not for me..
 1518          */
 1519         if (vap->va_type == VSOCK)
 1520                 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
 1521 
 1522         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
 1523                 return (error);
 1524         if (vap->va_vaflags & VA_EXCLUSIVE)
 1525                 fmode |= O_EXCL;
 1526         dnp = VTONFS(dvp);
 1527         nmp = VFSTONFS(vnode_mount(dvp));
 1528 again:
 1529         /* For NFSv4, wait until any remove is done. */
 1530         mtx_lock(&dnp->n_mtx);
 1531         while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) {
 1532                 dnp->n_flag |= NREMOVEWANT;
 1533                 (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0);
 1534         }
 1535         mtx_unlock(&dnp->n_mtx);
 1536 
 1537         cverf = nfs_get_cverf();
 1538         error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 1539             vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva,
 1540             &nfhp, &attrflag, &dattrflag, NULL);
 1541         if (!error) {
 1542                 if (nfhp == NULL)
 1543                         (void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
 1544                             cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread,
 1545                             &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 1546                             NULL);
 1547                 if (nfhp != NULL)
 1548                         error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
 1549                             cnp->cn_thread, &np, NULL, LK_EXCLUSIVE);
 1550         }
 1551         if (dattrflag)
 1552                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 1553         if (!error) {
 1554                 newvp = NFSTOV(np);
 1555                 if (attrflag)
 1556                         error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 1557                             0, 1);
 1558         }
 1559         if (error) {
 1560                 if (newvp != NULL) {
 1561                         vput(newvp);
 1562                         newvp = NULL;
 1563                 }
 1564                 if (NFS_ISV34(dvp) && (fmode & O_EXCL) &&
 1565                     error == NFSERR_NOTSUPP) {
 1566                         fmode &= ~O_EXCL;
 1567                         goto again;
 1568                 }
 1569         } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) {
 1570                 if (nfscl_checksattr(vap, &nfsva)) {
 1571                         /*
 1572                          * We are normally called with only a partially
 1573                          * initialized VAP. Since the NFSv3 spec says that
 1574                          * the server may use the file attributes to
 1575                          * store the verifier, the spec requires us to do a
 1576                          * SETATTR RPC. FreeBSD servers store the verifier in
 1577                          * atime, but we can't really assume that all servers
 1578                          * will so we ensure that our SETATTR sets both atime
 1579                          * and mtime.
 1580                          */
 1581                         if (vap->va_mtime.tv_sec == VNOVAL)
 1582                                 vfs_timestamp(&vap->va_mtime);
 1583                         if (vap->va_atime.tv_sec == VNOVAL)
 1584                                 vap->va_atime = vap->va_mtime;
 1585                         error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred,
 1586                             cnp->cn_thread, &nfsva, &attrflag, NULL);
 1587                         if (error && (vap->va_uid != (uid_t)VNOVAL ||
 1588                             vap->va_gid != (gid_t)VNOVAL)) {
 1589                                 /* try again without setting uid/gid */
 1590                                 vap->va_uid = (uid_t)VNOVAL;
 1591                                 vap->va_gid = (uid_t)VNOVAL;
 1592                                 error = nfsrpc_setattr(newvp, vap, NULL, 
 1593                                     cnp->cn_cred, cnp->cn_thread, &nfsva,
 1594                                     &attrflag, NULL);
 1595                         }
 1596                         if (attrflag)
 1597                                 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
 1598                                     NULL, 0, 1);
 1599                         if (error != 0)
 1600                                 vput(newvp);
 1601                 }
 1602         }
 1603         if (!error) {
 1604                 if (cnp->cn_flags & MAKEENTRY)
 1605                         cache_enter(dvp, newvp, cnp);
 1606                 *ap->a_vpp = newvp;
 1607         } else if (NFS_ISV4(dvp)) {
 1608                 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid,
 1609                     vap->va_gid);
 1610         }
 1611         mtx_lock(&dnp->n_mtx);
 1612         dnp->n_flag |= NMODIFIED;
 1613         if (!dattrflag) {
 1614                 dnp->n_attrstamp = 0;
 1615                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 1616         }
 1617         mtx_unlock(&dnp->n_mtx);
 1618         return (error);
 1619 }
 1620 
 1621 /*
 1622  * nfs file remove call
 1623  * To try and make nfs semantics closer to ufs semantics, a file that has
 1624  * other processes using the vnode is renamed instead of removed and then
 1625  * removed later on the last close.
 1626  * - If v_usecount > 1
 1627  *        If a rename is not already in the works
 1628  *           call nfs_sillyrename() to set it up
 1629  *     else
 1630  *        do the remove rpc
 1631  */
 1632 static int
 1633 nfs_remove(struct vop_remove_args *ap)
 1634 {
 1635         struct vnode *vp = ap->a_vp;
 1636         struct vnode *dvp = ap->a_dvp;
 1637         struct componentname *cnp = ap->a_cnp;
 1638         struct nfsnode *np = VTONFS(vp);
 1639         int error = 0;
 1640         struct vattr vattr;
 1641 
 1642         KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name"));
 1643         KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount"));
 1644         if (vp->v_type == VDIR)
 1645                 error = EPERM;
 1646         else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
 1647             VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 &&
 1648             vattr.va_nlink > 1)) {
 1649                 /*
 1650                  * Purge the name cache so that the chance of a lookup for
 1651                  * the name succeeding while the remove is in progress is
 1652                  * minimized. Without node locking it can still happen, such
 1653                  * that an I/O op returns ESTALE, but since you get this if
 1654                  * another host removes the file..
 1655                  */
 1656                 cache_purge(vp);
 1657                 /*
 1658                  * throw away biocache buffers, mainly to avoid
 1659                  * unnecessary delayed writes later.
 1660                  */
 1661                 error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1);
 1662                 /* Do the rpc */
 1663                 if (error != EINTR && error != EIO)
 1664                         error = nfs_removerpc(dvp, vp, cnp->cn_nameptr,
 1665                             cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
 1666                 /*
 1667                  * Kludge City: If the first reply to the remove rpc is lost..
 1668                  *   the reply to the retransmitted request will be ENOENT
 1669                  *   since the file was in fact removed
 1670                  *   Therefore, we cheat and return success.
 1671                  */
 1672                 if (error == ENOENT)
 1673                         error = 0;
 1674         } else if (!np->n_sillyrename)
 1675                 error = nfs_sillyrename(dvp, vp, cnp);
 1676         mtx_lock(&np->n_mtx);
 1677         np->n_attrstamp = 0;
 1678         mtx_unlock(&np->n_mtx);
 1679         KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 1680         return (error);
 1681 }
 1682 
 1683 /*
 1684  * nfs file remove rpc called from nfs_inactive
 1685  */
 1686 int
 1687 ncl_removeit(struct sillyrename *sp, struct vnode *vp)
 1688 {
 1689         /*
 1690          * Make sure that the directory vnode is still valid.
 1691          * XXX we should lock sp->s_dvp here.
 1692          */
 1693         if (sp->s_dvp->v_type == VBAD)
 1694                 return (0);
 1695         return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen,
 1696             sp->s_cred, NULL));
 1697 }
 1698 
 1699 /*
 1700  * Nfs remove rpc, called from nfs_remove() and ncl_removeit().
 1701  */
 1702 static int
 1703 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
 1704     int namelen, struct ucred *cred, struct thread *td)
 1705 {
 1706         struct nfsvattr dnfsva;
 1707         struct nfsnode *dnp = VTONFS(dvp);
 1708         int error = 0, dattrflag;
 1709 
 1710         mtx_lock(&dnp->n_mtx);
 1711         dnp->n_flag |= NREMOVEINPROG;
 1712         mtx_unlock(&dnp->n_mtx);
 1713         error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva,
 1714             &dattrflag, NULL);
 1715         mtx_lock(&dnp->n_mtx);
 1716         if ((dnp->n_flag & NREMOVEWANT)) {
 1717                 dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG);
 1718                 mtx_unlock(&dnp->n_mtx);
 1719                 wakeup((caddr_t)dnp);
 1720         } else {
 1721                 dnp->n_flag &= ~NREMOVEINPROG;
 1722                 mtx_unlock(&dnp->n_mtx);
 1723         }
 1724         if (dattrflag)
 1725                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 1726         mtx_lock(&dnp->n_mtx);
 1727         dnp->n_flag |= NMODIFIED;
 1728         if (!dattrflag) {
 1729                 dnp->n_attrstamp = 0;
 1730                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 1731         }
 1732         mtx_unlock(&dnp->n_mtx);
 1733         if (error && NFS_ISV4(dvp))
 1734                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 1735         return (error);
 1736 }
 1737 
 1738 /*
 1739  * nfs file rename call
 1740  */
 1741 static int
 1742 nfs_rename(struct vop_rename_args *ap)
 1743 {
 1744         struct vnode *fvp = ap->a_fvp;
 1745         struct vnode *tvp = ap->a_tvp;
 1746         struct vnode *fdvp = ap->a_fdvp;
 1747         struct vnode *tdvp = ap->a_tdvp;
 1748         struct componentname *tcnp = ap->a_tcnp;
 1749         struct componentname *fcnp = ap->a_fcnp;
 1750         struct nfsnode *fnp = VTONFS(ap->a_fvp);
 1751         struct nfsnode *tdnp = VTONFS(ap->a_tdvp);
 1752         struct nfsv4node *newv4 = NULL;
 1753         int error;
 1754 
 1755         KASSERT((tcnp->cn_flags & HASBUF) != 0 &&
 1756             (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name"));
 1757         /* Check for cross-device rename */
 1758         if ((fvp->v_mount != tdvp->v_mount) ||
 1759             (tvp && (fvp->v_mount != tvp->v_mount))) {
 1760                 error = EXDEV;
 1761                 goto out;
 1762         }
 1763 
 1764         if (fvp == tvp) {
 1765                 ncl_printf("nfs_rename: fvp == tvp (can't happen)\n");
 1766                 error = 0;
 1767                 goto out;
 1768         }
 1769         if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0)
 1770                 goto out;
 1771 
 1772         /*
 1773          * We have to flush B_DELWRI data prior to renaming
 1774          * the file.  If we don't, the delayed-write buffers
 1775          * can be flushed out later after the file has gone stale
 1776          * under NFSV3.  NFSV2 does not have this problem because
 1777          * ( as far as I can tell ) it flushes dirty buffers more
 1778          * often.
 1779          * 
 1780          * Skip the rename operation if the fsync fails, this can happen
 1781          * due to the server's volume being full, when we pushed out data
 1782          * that was written back to our cache earlier. Not checking for
 1783          * this condition can result in potential (silent) data loss.
 1784          */
 1785         error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
 1786         NFSVOPUNLOCK(fvp, 0);
 1787         if (!error && tvp)
 1788                 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
 1789         if (error)
 1790                 goto out;
 1791 
 1792         /*
 1793          * If the tvp exists and is in use, sillyrename it before doing the
 1794          * rename of the new file over it.
 1795          * XXX Can't sillyrename a directory.
 1796          */
 1797         if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
 1798                 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 1799                 vput(tvp);
 1800                 tvp = NULL;
 1801         }
 1802 
 1803         error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 1804             tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 1805             tcnp->cn_thread);
 1806 
 1807         if (error == 0 && NFS_ISV4(tdvp)) {
 1808                 /*
 1809                  * For NFSv4, check to see if it is the same name and
 1810                  * replace the name, if it is different.
 1811                  */
 1812                 MALLOC(newv4, struct nfsv4node *,
 1813                     sizeof (struct nfsv4node) +
 1814                     tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1,
 1815                     M_NFSV4NODE, M_WAITOK);
 1816                 mtx_lock(&tdnp->n_mtx);
 1817                 mtx_lock(&fnp->n_mtx);
 1818                 if (fnp->n_v4 != NULL && fvp->v_type == VREG &&
 1819                     (fnp->n_v4->n4_namelen != tcnp->cn_namelen ||
 1820                       NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4),
 1821                       tcnp->cn_namelen) ||
 1822                       tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen ||
 1823                       NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
 1824                         tdnp->n_fhp->nfh_len))) {
 1825 #ifdef notdef
 1826 { char nnn[100]; int nnnl;
 1827 nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99;
 1828 bcopy(tcnp->cn_nameptr, nnn, nnnl);
 1829 nnn[nnnl] = '\0';
 1830 printf("ren replace=%s\n",nnn);
 1831 }
 1832 #endif
 1833                         FREE((caddr_t)fnp->n_v4, M_NFSV4NODE);
 1834                         fnp->n_v4 = newv4;
 1835                         newv4 = NULL;
 1836                         fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len;
 1837                         fnp->n_v4->n4_namelen = tcnp->cn_namelen;
 1838                         NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
 1839                             tdnp->n_fhp->nfh_len);
 1840                         NFSBCOPY(tcnp->cn_nameptr,
 1841                             NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen);
 1842                 }
 1843                 mtx_unlock(&tdnp->n_mtx);
 1844                 mtx_unlock(&fnp->n_mtx);
 1845                 if (newv4 != NULL)
 1846                         FREE((caddr_t)newv4, M_NFSV4NODE);
 1847         }
 1848 
 1849         if (fvp->v_type == VDIR) {
 1850                 if (tvp != NULL && tvp->v_type == VDIR)
 1851                         cache_purge(tdvp);
 1852                 cache_purge(fdvp);
 1853         }
 1854 
 1855 out:
 1856         if (tdvp == tvp)
 1857                 vrele(tdvp);
 1858         else
 1859                 vput(tdvp);
 1860         if (tvp)
 1861                 vput(tvp);
 1862         vrele(fdvp);
 1863         vrele(fvp);
 1864         /*
 1865          * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 1866          */
 1867         if (error == ENOENT)
 1868                 error = 0;
 1869         return (error);
 1870 }
 1871 
 1872 /*
 1873  * nfs file rename rpc called from nfs_remove() above
 1874  */
 1875 static int
 1876 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp,
 1877     struct sillyrename *sp)
 1878 {
 1879 
 1880         return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen,
 1881             sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred,
 1882             scnp->cn_thread));
 1883 }
 1884 
 1885 /*
 1886  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
 1887  */
 1888 static int
 1889 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
 1890     int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr,
 1891     int tnamelen, struct ucred *cred, struct thread *td)
 1892 {
 1893         struct nfsvattr fnfsva, tnfsva;
 1894         struct nfsnode *fdnp = VTONFS(fdvp);
 1895         struct nfsnode *tdnp = VTONFS(tdvp);
 1896         int error = 0, fattrflag, tattrflag;
 1897 
 1898         error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp,
 1899             tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag,
 1900             &tattrflag, NULL, NULL);
 1901         mtx_lock(&fdnp->n_mtx);
 1902         fdnp->n_flag |= NMODIFIED;
 1903         if (fattrflag != 0) {
 1904                 mtx_unlock(&fdnp->n_mtx);
 1905                 (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1);
 1906         } else {
 1907                 fdnp->n_attrstamp = 0;
 1908                 mtx_unlock(&fdnp->n_mtx);
 1909                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp);
 1910         }
 1911         mtx_lock(&tdnp->n_mtx);
 1912         tdnp->n_flag |= NMODIFIED;
 1913         if (tattrflag != 0) {
 1914                 mtx_unlock(&tdnp->n_mtx);
 1915                 (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1);
 1916         } else {
 1917                 tdnp->n_attrstamp = 0;
 1918                 mtx_unlock(&tdnp->n_mtx);
 1919                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 1920         }
 1921         if (error && NFS_ISV4(fdvp))
 1922                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 1923         return (error);
 1924 }
 1925 
 1926 /*
 1927  * nfs hard link create call
 1928  */
 1929 static int
 1930 nfs_link(struct vop_link_args *ap)
 1931 {
 1932         struct vnode *vp = ap->a_vp;
 1933         struct vnode *tdvp = ap->a_tdvp;
 1934         struct componentname *cnp = ap->a_cnp;
 1935         struct nfsnode *np, *tdnp;
 1936         struct nfsvattr nfsva, dnfsva;
 1937         int error = 0, attrflag, dattrflag;
 1938 
 1939         if (vp->v_mount != tdvp->v_mount) {
 1940                 return (EXDEV);
 1941         }
 1942 
 1943         /*
 1944          * Push all writes to the server, so that the attribute cache
 1945          * doesn't get "out of sync" with the server.
 1946          * XXX There should be a better way!
 1947          */
 1948         VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
 1949 
 1950         error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen,
 1951             cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag,
 1952             &dattrflag, NULL);
 1953         tdnp = VTONFS(tdvp);
 1954         mtx_lock(&tdnp->n_mtx);
 1955         tdnp->n_flag |= NMODIFIED;
 1956         if (dattrflag != 0) {
 1957                 mtx_unlock(&tdnp->n_mtx);
 1958                 (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1);
 1959         } else {
 1960                 tdnp->n_attrstamp = 0;
 1961                 mtx_unlock(&tdnp->n_mtx);
 1962                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 1963         }
 1964         if (attrflag)
 1965                 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 1966         else {
 1967                 np = VTONFS(vp);
 1968                 mtx_lock(&np->n_mtx);
 1969                 np->n_attrstamp = 0;
 1970                 mtx_unlock(&np->n_mtx);
 1971                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 1972         }
 1973         /*
 1974          * If negative lookup caching is enabled, I might as well
 1975          * add an entry for this node. Not necessary for correctness,
 1976          * but if negative caching is enabled, then the system
 1977          * must care about lookup caching hit rate, so...
 1978          */
 1979         if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 &&
 1980             (cnp->cn_flags & MAKEENTRY))
 1981                 cache_enter(tdvp, vp, cnp);
 1982         if (error && NFS_ISV4(vp))
 1983                 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0,
 1984                     (gid_t)0);
 1985         return (error);
 1986 }
 1987 
 1988 /*
 1989  * nfs symbolic link create call
 1990  */
 1991 static int
 1992 nfs_symlink(struct vop_symlink_args *ap)
 1993 {
 1994         struct vnode *dvp = ap->a_dvp;
 1995         struct vattr *vap = ap->a_vap;
 1996         struct componentname *cnp = ap->a_cnp;
 1997         struct nfsvattr nfsva, dnfsva;
 1998         struct nfsfh *nfhp;
 1999         struct nfsnode *np = NULL, *dnp;
 2000         struct vnode *newvp = NULL;
 2001         int error = 0, attrflag, dattrflag, ret;
 2002 
 2003         vap->va_type = VLNK;
 2004         error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 2005             ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva,
 2006             &nfsva, &nfhp, &attrflag, &dattrflag, NULL);
 2007         if (nfhp) {
 2008                 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread,
 2009                     &np, NULL, LK_EXCLUSIVE);
 2010                 if (!ret)
 2011                         newvp = NFSTOV(np);
 2012                 else if (!error)
 2013                         error = ret;
 2014         }
 2015         if (newvp != NULL) {
 2016                 if (attrflag)
 2017                         (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 2018                             0, 1);
 2019         } else if (!error) {
 2020                 /*
 2021                  * If we do not have an error and we could not extract the
 2022                  * newvp from the response due to the request being NFSv2, we
 2023                  * have to do a lookup in order to obtain a newvp to return.
 2024                  */
 2025                 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 2026                     cnp->cn_cred, cnp->cn_thread, &np);
 2027                 if (!error)
 2028                         newvp = NFSTOV(np);
 2029         }
 2030         if (error) {
 2031                 if (newvp)
 2032                         vput(newvp);
 2033                 if (NFS_ISV4(dvp))
 2034                         error = nfscl_maperr(cnp->cn_thread, error,
 2035                             vap->va_uid, vap->va_gid);
 2036         } else {
 2037                 /*
 2038                  * If negative lookup caching is enabled, I might as well
 2039                  * add an entry for this node. Not necessary for correctness,
 2040                  * but if negative caching is enabled, then the system
 2041                  * must care about lookup caching hit rate, so...
 2042                  */
 2043                 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 &&
 2044                     (cnp->cn_flags & MAKEENTRY))
 2045                         cache_enter(dvp, newvp, cnp);
 2046                 *ap->a_vpp = newvp;
 2047         }
 2048 
 2049         dnp = VTONFS(dvp);
 2050         mtx_lock(&dnp->n_mtx);
 2051         dnp->n_flag |= NMODIFIED;
 2052         if (dattrflag != 0) {
 2053                 mtx_unlock(&dnp->n_mtx);
 2054                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 2055         } else {
 2056                 dnp->n_attrstamp = 0;
 2057                 mtx_unlock(&dnp->n_mtx);
 2058                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 2059         }
 2060         return (error);
 2061 }
 2062 
 2063 /*
 2064  * nfs make dir call
 2065  */
 2066 static int
 2067 nfs_mkdir(struct vop_mkdir_args *ap)
 2068 {
 2069         struct vnode *dvp = ap->a_dvp;
 2070         struct vattr *vap = ap->a_vap;
 2071         struct componentname *cnp = ap->a_cnp;
 2072         struct nfsnode *np = NULL, *dnp;
 2073         struct vnode *newvp = NULL;
 2074         struct vattr vattr;
 2075         struct nfsfh *nfhp;
 2076         struct nfsvattr nfsva, dnfsva;
 2077         int error = 0, attrflag, dattrflag, ret;
 2078 
 2079         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
 2080                 return (error);
 2081         vap->va_type = VDIR;
 2082         error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 2083             vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp,
 2084             &attrflag, &dattrflag, NULL);
 2085         dnp = VTONFS(dvp);
 2086         mtx_lock(&dnp->n_mtx);
 2087         dnp->n_flag |= NMODIFIED;
 2088         if (dattrflag != 0) {
 2089                 mtx_unlock(&dnp->n_mtx);
 2090                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 2091         } else {
 2092                 dnp->n_attrstamp = 0;
 2093                 mtx_unlock(&dnp->n_mtx);
 2094                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 2095         }
 2096         if (nfhp) {
 2097                 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread,
 2098                     &np, NULL, LK_EXCLUSIVE);
 2099                 if (!ret) {
 2100                         newvp = NFSTOV(np);
 2101                         if (attrflag)
 2102                            (void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
 2103                                 NULL, 0, 1);
 2104                 } else if (!error)
 2105                         error = ret;
 2106         }
 2107         if (!error && newvp == NULL) {
 2108                 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 2109                     cnp->cn_cred, cnp->cn_thread, &np);
 2110                 if (!error) {
 2111                         newvp = NFSTOV(np);
 2112                         if (newvp->v_type != VDIR)
 2113                                 error = EEXIST;
 2114                 }
 2115         }
 2116         if (error) {
 2117                 if (newvp)
 2118                         vput(newvp);
 2119                 if (NFS_ISV4(dvp))
 2120                         error = nfscl_maperr(cnp->cn_thread, error,
 2121                             vap->va_uid, vap->va_gid);
 2122         } else {
 2123                 /*
 2124                  * If negative lookup caching is enabled, I might as well
 2125                  * add an entry for this node. Not necessary for correctness,
 2126                  * but if negative caching is enabled, then the system
 2127                  * must care about lookup caching hit rate, so...
 2128                  */
 2129                 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 &&
 2130                     (cnp->cn_flags & MAKEENTRY))
 2131                         cache_enter(dvp, newvp, cnp);
 2132                 *ap->a_vpp = newvp;
 2133         }
 2134         return (error);
 2135 }
 2136 
 2137 /*
 2138  * nfs remove directory call
 2139  */
 2140 static int
 2141 nfs_rmdir(struct vop_rmdir_args *ap)
 2142 {
 2143         struct vnode *vp = ap->a_vp;
 2144         struct vnode *dvp = ap->a_dvp;
 2145         struct componentname *cnp = ap->a_cnp;
 2146         struct nfsnode *dnp;
 2147         struct nfsvattr dnfsva;
 2148         int error, dattrflag;
 2149 
 2150         if (dvp == vp)
 2151                 return (EINVAL);
 2152         error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 2153             cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL);
 2154         dnp = VTONFS(dvp);
 2155         mtx_lock(&dnp->n_mtx);
 2156         dnp->n_flag |= NMODIFIED;
 2157         if (dattrflag != 0) {
 2158                 mtx_unlock(&dnp->n_mtx);
 2159                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 2160         } else {
 2161                 dnp->n_attrstamp = 0;
 2162                 mtx_unlock(&dnp->n_mtx);
 2163                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 2164         }
 2165 
 2166         cache_purge(dvp);
 2167         cache_purge(vp);
 2168         if (error && NFS_ISV4(dvp))
 2169                 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0,
 2170                     (gid_t)0);
 2171         /*
 2172          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 2173          */
 2174         if (error == ENOENT)
 2175                 error = 0;
 2176         return (error);
 2177 }
 2178 
 2179 /*
 2180  * nfs readdir call
 2181  */
 2182 static int
 2183 nfs_readdir(struct vop_readdir_args *ap)
 2184 {
 2185         struct vnode *vp = ap->a_vp;
 2186         struct nfsnode *np = VTONFS(vp);
 2187         struct uio *uio = ap->a_uio;
 2188         int tresid, error = 0;
 2189         struct vattr vattr;
 2190         
 2191         if (vp->v_type != VDIR) 
 2192                 return(EPERM);
 2193 
 2194         /*
 2195          * First, check for hit on the EOF offset cache
 2196          */
 2197         if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 2198             (np->n_flag & NMODIFIED) == 0) {
 2199                 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) {
 2200                         mtx_lock(&np->n_mtx);
 2201                         if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) ||
 2202                             !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
 2203                                 mtx_unlock(&np->n_mtx);
 2204                                 NFSINCRGLOBAL(newnfsstats.direofcache_hits);
 2205                                 return (0);
 2206                         } else
 2207                                 mtx_unlock(&np->n_mtx);
 2208                 }
 2209         }
 2210 
 2211         /*
 2212          * Call ncl_bioread() to do the real work.
 2213          */
 2214         tresid = uio->uio_resid;
 2215         error = ncl_bioread(vp, uio, 0, ap->a_cred);
 2216 
 2217         if (!error && uio->uio_resid == tresid)
 2218                 NFSINCRGLOBAL(newnfsstats.direofcache_misses);
 2219         return (error);
 2220 }
 2221 
 2222 /*
 2223  * Readdir rpc call.
 2224  * Called from below the buffer cache by ncl_doio().
 2225  */
 2226 int
 2227 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
 2228     struct thread *td)
 2229 {
 2230         struct nfsvattr nfsva;
 2231         nfsuint64 *cookiep, cookie;
 2232         struct nfsnode *dnp = VTONFS(vp);
 2233         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2234         int error = 0, eof, attrflag;
 2235 
 2236         KASSERT(uiop->uio_iovcnt == 1 &&
 2237             (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
 2238             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
 2239             ("nfs readdirrpc bad uio"));
 2240 
 2241         /*
 2242          * If there is no cookie, assume directory was stale.
 2243          */
 2244         ncl_dircookie_lock(dnp);
 2245         cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
 2246         if (cookiep) {
 2247                 cookie = *cookiep;
 2248                 ncl_dircookie_unlock(dnp);
 2249         } else {
 2250                 ncl_dircookie_unlock(dnp);              
 2251                 return (NFSERR_BAD_COOKIE);
 2252         }
 2253 
 2254         if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
 2255                 (void)ncl_fsinfo(nmp, vp, cred, td);
 2256 
 2257         error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva,
 2258             &attrflag, &eof, NULL);
 2259         if (attrflag)
 2260                 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 2261 
 2262         if (!error) {
 2263                 /*
 2264                  * We are now either at the end of the directory or have filled
 2265                  * the block.
 2266                  */
 2267                 if (eof)
 2268                         dnp->n_direofoffset = uiop->uio_offset;
 2269                 else {
 2270                         if (uiop->uio_resid > 0)
 2271                                 ncl_printf("EEK! readdirrpc resid > 0\n");
 2272                         ncl_dircookie_lock(dnp);
 2273                         cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
 2274                         *cookiep = cookie;
 2275                         ncl_dircookie_unlock(dnp);
 2276                 }
 2277         } else if (NFS_ISV4(vp)) {
 2278                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 2279         }
 2280         return (error);
 2281 }
 2282 
 2283 /*
 2284  * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc().
 2285  */
 2286 int
 2287 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
 2288     struct thread *td)
 2289 {
 2290         struct nfsvattr nfsva;
 2291         nfsuint64 *cookiep, cookie;
 2292         struct nfsnode *dnp = VTONFS(vp);
 2293         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2294         int error = 0, attrflag, eof;
 2295 
 2296         KASSERT(uiop->uio_iovcnt == 1 &&
 2297             (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
 2298             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
 2299             ("nfs readdirplusrpc bad uio"));
 2300 
 2301         /*
 2302          * If there is no cookie, assume directory was stale.
 2303          */
 2304         ncl_dircookie_lock(dnp);
 2305         cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
 2306         if (cookiep) {
 2307                 cookie = *cookiep;
 2308                 ncl_dircookie_unlock(dnp);
 2309         } else {
 2310                 ncl_dircookie_unlock(dnp);
 2311                 return (NFSERR_BAD_COOKIE);
 2312         }
 2313 
 2314         if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
 2315                 (void)ncl_fsinfo(nmp, vp, cred, td);
 2316         error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva,
 2317             &attrflag, &eof, NULL);
 2318         if (attrflag)
 2319                 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 2320 
 2321         if (!error) {
 2322                 /*
 2323                  * We are now either at end of the directory or have filled the
 2324                  * the block.
 2325                  */
 2326                 if (eof)
 2327                         dnp->n_direofoffset = uiop->uio_offset;
 2328                 else {
 2329                         if (uiop->uio_resid > 0)
 2330                                 ncl_printf("EEK! readdirplusrpc resid > 0\n");
 2331                         ncl_dircookie_lock(dnp);
 2332                         cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
 2333                         *cookiep = cookie;
 2334                         ncl_dircookie_unlock(dnp);
 2335                 }
 2336         } else if (NFS_ISV4(vp)) {
 2337                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 2338         }
 2339         return (error);
 2340 }
 2341 
 2342 /*
 2343  * Silly rename. To make the NFS filesystem that is stateless look a little
 2344  * more like the "ufs" a remove of an active vnode is translated to a rename
 2345  * to a funny looking filename that is removed by nfs_inactive on the
 2346  * nfsnode. There is the potential for another process on a different client
 2347  * to create the same funny name between the nfs_lookitup() fails and the
 2348  * nfs_rename() completes, but...
 2349  */
 2350 static int
 2351 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 2352 {
 2353         struct sillyrename *sp;
 2354         struct nfsnode *np;
 2355         int error;
 2356         short pid;
 2357         unsigned int lticks;
 2358 
 2359         cache_purge(dvp);
 2360         np = VTONFS(vp);
 2361         KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir"));
 2362         MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
 2363             M_NEWNFSREQ, M_WAITOK);
 2364         sp->s_cred = crhold(cnp->cn_cred);
 2365         sp->s_dvp = dvp;
 2366         VREF(dvp);
 2367 
 2368         /* 
 2369          * Fudge together a funny name.
 2370          * Changing the format of the funny name to accomodate more 
 2371          * sillynames per directory.
 2372          * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 
 2373          * CPU ticks since boot.
 2374          */
 2375         pid = cnp->cn_thread->td_proc->p_pid;
 2376         lticks = (unsigned int)ticks;
 2377         for ( ; ; ) {
 2378                 sp->s_namlen = sprintf(sp->s_name, 
 2379                                        ".nfs.%08x.%04x4.4", lticks, 
 2380                                        pid);
 2381                 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 2382                                  cnp->cn_thread, NULL))
 2383                         break;
 2384                 lticks++;
 2385         }
 2386         error = nfs_renameit(dvp, vp, cnp, sp);
 2387         if (error)
 2388                 goto bad;
 2389         error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 2390                 cnp->cn_thread, &np);
 2391         np->n_sillyrename = sp;
 2392         return (0);
 2393 bad:
 2394         vrele(sp->s_dvp);
 2395         crfree(sp->s_cred);
 2396         free((caddr_t)sp, M_NEWNFSREQ);
 2397         return (error);
 2398 }
 2399 
 2400 /*
 2401  * Look up a file name and optionally either update the file handle or
 2402  * allocate an nfsnode, depending on the value of npp.
 2403  * npp == NULL  --> just do the lookup
 2404  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
 2405  *                      handled too
 2406  * *npp != NULL --> update the file handle in the vnode
 2407  */
 2408 static int
 2409 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred,
 2410     struct thread *td, struct nfsnode **npp)
 2411 {
 2412         struct vnode *newvp = NULL, *vp;
 2413         struct nfsnode *np, *dnp = VTONFS(dvp);
 2414         struct nfsfh *nfhp, *onfhp;
 2415         struct nfsvattr nfsva, dnfsva;
 2416         struct componentname cn;
 2417         int error = 0, attrflag, dattrflag;
 2418         u_int hash;
 2419 
 2420         error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva,
 2421             &nfhp, &attrflag, &dattrflag, NULL);
 2422         if (dattrflag)
 2423                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 2424         if (npp && !error) {
 2425                 if (*npp != NULL) {
 2426                     np = *npp;
 2427                     vp = NFSTOV(np);
 2428                     /*
 2429                      * For NFSv4, check to see if it is the same name and
 2430                      * replace the name, if it is different.
 2431                      */
 2432                     if (np->n_v4 != NULL && nfsva.na_type == VREG &&
 2433                         (np->n_v4->n4_namelen != len ||
 2434                          NFSBCMP(name, NFS4NODENAME(np->n_v4), len) ||
 2435                          dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen ||
 2436                          NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 2437                          dnp->n_fhp->nfh_len))) {
 2438 #ifdef notdef
 2439 { char nnn[100]; int nnnl;
 2440 nnnl = (len < 100) ? len : 99;
 2441 bcopy(name, nnn, nnnl);
 2442 nnn[nnnl] = '\0';
 2443 printf("replace=%s\n",nnn);
 2444 }
 2445 #endif
 2446                             FREE((caddr_t)np->n_v4, M_NFSV4NODE);
 2447                             MALLOC(np->n_v4, struct nfsv4node *,
 2448                                 sizeof (struct nfsv4node) +
 2449                                 dnp->n_fhp->nfh_len + len - 1,
 2450                                 M_NFSV4NODE, M_WAITOK);
 2451                             np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len;
 2452                             np->n_v4->n4_namelen = len;
 2453                             NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 2454                                 dnp->n_fhp->nfh_len);
 2455                             NFSBCOPY(name, NFS4NODENAME(np->n_v4), len);
 2456                     }
 2457                     hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len,
 2458                         FNV1_32_INIT);
 2459                     onfhp = np->n_fhp;
 2460                     /*
 2461                      * Rehash node for new file handle.
 2462                      */
 2463                     vfs_hash_rehash(vp, hash);
 2464                     np->n_fhp = nfhp;
 2465                     if (onfhp != NULL)
 2466                         FREE((caddr_t)onfhp, M_NFSFH);
 2467                     newvp = NFSTOV(np);
 2468                 } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) {
 2469                     FREE((caddr_t)nfhp, M_NFSFH);
 2470                     VREF(dvp);
 2471                     newvp = dvp;
 2472                 } else {
 2473                     cn.cn_nameptr = name;
 2474                     cn.cn_namelen = len;
 2475                     error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td,
 2476                         &np, NULL, LK_EXCLUSIVE);
 2477                     if (error)
 2478                         return (error);
 2479                     newvp = NFSTOV(np);
 2480                 }
 2481                 if (!attrflag && *npp == NULL) {
 2482                         if (newvp == dvp)
 2483                                 vrele(newvp);
 2484                         else
 2485                                 vput(newvp);
 2486                         return (ENOENT);
 2487                 }
 2488                 if (attrflag)
 2489                         (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 2490                             0, 1);
 2491         }
 2492         if (npp && *npp == NULL) {
 2493                 if (error) {
 2494                         if (newvp) {
 2495                                 if (newvp == dvp)
 2496                                         vrele(newvp);
 2497                                 else
 2498                                         vput(newvp);
 2499                         }
 2500                 } else
 2501                         *npp = np;
 2502         }
 2503         if (error && NFS_ISV4(dvp))
 2504                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 2505         return (error);
 2506 }
 2507 
 2508 /*
 2509  * Nfs Version 3 and 4 commit rpc
 2510  */
 2511 int
 2512 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
 2513    struct thread *td)
 2514 {
 2515         struct nfsvattr nfsva;
 2516         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2517         int error, attrflag;
 2518         u_char verf[NFSX_VERF];
 2519 
 2520         mtx_lock(&nmp->nm_mtx);
 2521         if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
 2522                 mtx_unlock(&nmp->nm_mtx);
 2523                 return (0);
 2524         }
 2525         mtx_unlock(&nmp->nm_mtx);
 2526         error = nfsrpc_commit(vp, offset, cnt, cred, td, verf, &nfsva,
 2527             &attrflag, NULL);
 2528         if (!error) {
 2529                 mtx_lock(&nmp->nm_mtx);
 2530                 if (NFSBCMP((caddr_t)nmp->nm_verf, verf, NFSX_VERF)) {
 2531                         NFSBCOPY(verf, (caddr_t)nmp->nm_verf, NFSX_VERF);
 2532                         error = NFSERR_STALEWRITEVERF;
 2533                 }
 2534                 mtx_unlock(&nmp->nm_mtx);
 2535                 if (!error && attrflag)
 2536                         (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL,
 2537                             0, 1);
 2538         } else if (NFS_ISV4(vp)) {
 2539                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 2540         }
 2541         return (error);
 2542 }
 2543 
 2544 /*
 2545  * Strategy routine.
 2546  * For async requests when nfsiod(s) are running, queue the request by
 2547  * calling ncl_asyncio(), otherwise just all ncl_doio() to do the
 2548  * request.
 2549  */
 2550 static int
 2551 nfs_strategy(struct vop_strategy_args *ap)
 2552 {
 2553         struct buf *bp = ap->a_bp;
 2554         struct ucred *cr;
 2555 
 2556         KASSERT(!(bp->b_flags & B_DONE),
 2557             ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
 2558         BUF_ASSERT_HELD(bp);
 2559 
 2560         if (bp->b_iocmd == BIO_READ)
 2561                 cr = bp->b_rcred;
 2562         else
 2563                 cr = bp->b_wcred;
 2564 
 2565         /*
 2566          * If the op is asynchronous and an i/o daemon is waiting
 2567          * queue the request, wake it up and wait for completion
 2568          * otherwise just do it ourselves.
 2569          */
 2570         if ((bp->b_flags & B_ASYNC) == 0 ||
 2571             ncl_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread))
 2572                 (void) ncl_doio(ap->a_vp, bp, cr, curthread, 1);
 2573         return (0);
 2574 }
 2575 
 2576 /*
 2577  * fsync vnode op. Just call ncl_flush() with commit == 1.
 2578  */
 2579 /* ARGSUSED */
 2580 static int
 2581 nfs_fsync(struct vop_fsync_args *ap)
 2582 {
 2583         return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1, 0));
 2584 }
 2585 
 2586 /*
 2587  * Flush all the blocks associated with a vnode.
 2588  *      Walk through the buffer pool and push any dirty pages
 2589  *      associated with the vnode.
 2590  * If the called_from_renewthread argument is TRUE, it has been called
 2591  * from the NFSv4 renew thread and, as such, cannot block indefinitely
 2592  * waiting for a buffer write to complete.
 2593  */
 2594 int
 2595 ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td,
 2596     int commit, int called_from_renewthread)
 2597 {
 2598         struct nfsnode *np = VTONFS(vp);
 2599         struct buf *bp;
 2600         int i;
 2601         struct buf *nbp;
 2602         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2603         int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 2604         int passone = 1, trycnt = 0;
 2605         u_quad_t off, endoff, toff;
 2606         struct ucred* wcred = NULL;
 2607         struct buf **bvec = NULL;
 2608         struct bufobj *bo;
 2609 #ifndef NFS_COMMITBVECSIZ
 2610 #define NFS_COMMITBVECSIZ       20
 2611 #endif
 2612         struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 2613         int bvecsize = 0, bveccount;
 2614 
 2615         if (called_from_renewthread != 0)
 2616                 slptimeo = hz;
 2617         if (nmp->nm_flag & NFSMNT_INT)
 2618                 slpflag = NFS_PCATCH;
 2619         if (!commit)
 2620                 passone = 0;
 2621         bo = &vp->v_bufobj;
 2622         /*
 2623          * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 2624          * server, but has not been committed to stable storage on the server
 2625          * yet. On the first pass, the byte range is worked out and the commit
 2626          * rpc is done. On the second pass, ncl_writebp() is called to do the
 2627          * job.
 2628          */
 2629 again:
 2630         off = (u_quad_t)-1;
 2631         endoff = 0;
 2632         bvecpos = 0;
 2633         if (NFS_ISV34(vp) && commit) {
 2634                 if (bvec != NULL && bvec != bvec_on_stack)
 2635                         free(bvec, M_TEMP);
 2636                 /*
 2637                  * Count up how many buffers waiting for a commit.
 2638                  */
 2639                 bveccount = 0;
 2640                 BO_LOCK(bo);
 2641                 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 2642                         if (!BUF_ISLOCKED(bp) &&
 2643                             (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 2644                                 == (B_DELWRI | B_NEEDCOMMIT))
 2645                                 bveccount++;
 2646                 }
 2647                 /*
 2648                  * Allocate space to remember the list of bufs to commit.  It is
 2649                  * important to use M_NOWAIT here to avoid a race with nfs_write.
 2650                  * If we can't get memory (for whatever reason), we will end up
 2651                  * committing the buffers one-by-one in the loop below.
 2652                  */
 2653                 if (bveccount > NFS_COMMITBVECSIZ) {
 2654                         /*
 2655                          * Release the vnode interlock to avoid a lock
 2656                          * order reversal.
 2657                          */
 2658                         BO_UNLOCK(bo);
 2659                         bvec = (struct buf **)
 2660                                 malloc(bveccount * sizeof(struct buf *),
 2661                                        M_TEMP, M_NOWAIT);
 2662                         BO_LOCK(bo);
 2663                         if (bvec == NULL) {
 2664                                 bvec = bvec_on_stack;
 2665                                 bvecsize = NFS_COMMITBVECSIZ;
 2666                         } else
 2667                                 bvecsize = bveccount;
 2668                 } else {
 2669                         bvec = bvec_on_stack;
 2670                         bvecsize = NFS_COMMITBVECSIZ;
 2671                 }
 2672                 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 2673                         if (bvecpos >= bvecsize)
 2674                                 break;
 2675                         if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 2676                                 nbp = TAILQ_NEXT(bp, b_bobufs);
 2677                                 continue;
 2678                         }
 2679                         if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
 2680                             (B_DELWRI | B_NEEDCOMMIT)) {
 2681                                 BUF_UNLOCK(bp);
 2682                                 nbp = TAILQ_NEXT(bp, b_bobufs);
 2683                                 continue;
 2684                         }
 2685                         BO_UNLOCK(bo);
 2686                         bremfree(bp);
 2687                         /*
 2688                          * Work out if all buffers are using the same cred
 2689                          * so we can deal with them all with one commit.
 2690                          *
 2691                          * NOTE: we are not clearing B_DONE here, so we have
 2692                          * to do it later on in this routine if we intend to
 2693                          * initiate I/O on the bp.
 2694                          *
 2695                          * Note: to avoid loopback deadlocks, we do not
 2696                          * assign b_runningbufspace.
 2697                          */
 2698                         if (wcred == NULL)
 2699                                 wcred = bp->b_wcred;
 2700                         else if (wcred != bp->b_wcred)
 2701                                 wcred = NOCRED;
 2702                         vfs_busy_pages(bp, 1);
 2703 
 2704                         BO_LOCK(bo);
 2705                         /*
 2706                          * bp is protected by being locked, but nbp is not
 2707                          * and vfs_busy_pages() may sleep.  We have to
 2708                          * recalculate nbp.
 2709                          */
 2710                         nbp = TAILQ_NEXT(bp, b_bobufs);
 2711 
 2712                         /*
 2713                          * A list of these buffers is kept so that the
 2714                          * second loop knows which buffers have actually
 2715                          * been committed. This is necessary, since there
 2716                          * may be a race between the commit rpc and new
 2717                          * uncommitted writes on the file.
 2718                          */
 2719                         bvec[bvecpos++] = bp;
 2720                         toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 2721                                 bp->b_dirtyoff;
 2722                         if (toff < off)
 2723                                 off = toff;
 2724                         toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 2725                         if (toff > endoff)
 2726                                 endoff = toff;
 2727                 }
 2728                 BO_UNLOCK(bo);
 2729         }
 2730         if (bvecpos > 0) {
 2731                 /*
 2732                  * Commit data on the server, as required.
 2733                  * If all bufs are using the same wcred, then use that with
 2734                  * one call for all of them, otherwise commit each one
 2735                  * separately.
 2736                  */
 2737                 if (wcred != NOCRED)
 2738                         retv = ncl_commit(vp, off, (int)(endoff - off),
 2739                                           wcred, td);
 2740                 else {
 2741                         retv = 0;
 2742                         for (i = 0; i < bvecpos; i++) {
 2743                                 off_t off, size;
 2744                                 bp = bvec[i];
 2745                                 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 2746                                         bp->b_dirtyoff;
 2747                                 size = (u_quad_t)(bp->b_dirtyend
 2748                                                   - bp->b_dirtyoff);
 2749                                 retv = ncl_commit(vp, off, (int)size,
 2750                                                   bp->b_wcred, td);
 2751                                 if (retv) break;
 2752                         }
 2753                 }
 2754 
 2755                 if (retv == NFSERR_STALEWRITEVERF)
 2756                         ncl_clearcommit(vp->v_mount);
 2757 
 2758                 /*
 2759                  * Now, either mark the blocks I/O done or mark the
 2760                  * blocks dirty, depending on whether the commit
 2761                  * succeeded.
 2762                  */
 2763                 for (i = 0; i < bvecpos; i++) {
 2764                         bp = bvec[i];
 2765                         bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 2766                         if (retv) {
 2767                                 /*
 2768                                  * Error, leave B_DELWRI intact
 2769                                  */
 2770                                 vfs_unbusy_pages(bp);
 2771                                 brelse(bp);
 2772                         } else {
 2773                                 /*
 2774                                  * Success, remove B_DELWRI ( bundirty() ).
 2775                                  *
 2776                                  * b_dirtyoff/b_dirtyend seem to be NFS
 2777                                  * specific.  We should probably move that
 2778                                  * into bundirty(). XXX
 2779                                  */
 2780                                 bufobj_wref(bo);
 2781                                 bp->b_flags |= B_ASYNC;
 2782                                 bundirty(bp);
 2783                                 bp->b_flags &= ~B_DONE;
 2784                                 bp->b_ioflags &= ~BIO_ERROR;
 2785                                 bp->b_dirtyoff = bp->b_dirtyend = 0;
 2786                                 bufdone(bp);
 2787                         }
 2788                 }
 2789         }
 2790 
 2791         /*
 2792          * Start/do any write(s) that are required.
 2793          */
 2794 loop:
 2795         BO_LOCK(bo);
 2796         TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 2797                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 2798                         if (waitfor != MNT_WAIT || passone)
 2799                                 continue;
 2800 
 2801                         error = BUF_TIMELOCK(bp,
 2802                             LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 2803                             BO_MTX(bo), "nfsfsync", slpflag, slptimeo);
 2804                         if (error == 0) {
 2805                                 BUF_UNLOCK(bp);
 2806                                 goto loop;
 2807                         }
 2808                         if (error == ENOLCK) {
 2809                                 error = 0;
 2810                                 goto loop;
 2811                         }
 2812                         if (called_from_renewthread != 0) {
 2813                                 /*
 2814                                  * Return EIO so the flush will be retried
 2815                                  * later.
 2816                                  */
 2817                                 error = EIO;
 2818                                 goto done;
 2819                         }
 2820                         if (newnfs_sigintr(nmp, td)) {
 2821                                 error = EINTR;
 2822                                 goto done;
 2823                         }
 2824                         if (slpflag & PCATCH) {
 2825                                 slpflag = 0;
 2826                                 slptimeo = 2 * hz;
 2827                         }
 2828                         goto loop;
 2829                 }
 2830                 if ((bp->b_flags & B_DELWRI) == 0)
 2831                         panic("nfs_fsync: not dirty");
 2832                 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
 2833                         BUF_UNLOCK(bp);
 2834                         continue;
 2835                 }
 2836                 BO_UNLOCK(bo);
 2837                 bremfree(bp);
 2838                 if (passone || !commit)
 2839                     bp->b_flags |= B_ASYNC;
 2840                 else
 2841                     bp->b_flags |= B_ASYNC;
 2842                 bwrite(bp);
 2843                 if (newnfs_sigintr(nmp, td)) {
 2844                         error = EINTR;
 2845                         goto done;
 2846                 }
 2847                 goto loop;
 2848         }
 2849         if (passone) {
 2850                 passone = 0;
 2851                 BO_UNLOCK(bo);
 2852                 goto again;
 2853         }
 2854         if (waitfor == MNT_WAIT) {
 2855                 while (bo->bo_numoutput) {
 2856                         error = bufobj_wwait(bo, slpflag, slptimeo);
 2857                         if (error) {
 2858                             BO_UNLOCK(bo);
 2859                             if (called_from_renewthread != 0) {
 2860                                 /*
 2861                                  * Return EIO so that the flush will be
 2862                                  * retried later.
 2863                                  */
 2864                                 error = EIO;
 2865                                 goto done;
 2866                             }
 2867                             error = newnfs_sigintr(nmp, td);
 2868                             if (error)
 2869                                 goto done;
 2870                             if (slpflag & PCATCH) {
 2871                                 slpflag = 0;
 2872                                 slptimeo = 2 * hz;
 2873                             }
 2874                             BO_LOCK(bo);
 2875                         }
 2876                 }
 2877                 if (bo->bo_dirty.bv_cnt != 0 && commit) {
 2878                         BO_UNLOCK(bo);
 2879                         goto loop;
 2880                 }
 2881                 /*
 2882                  * Wait for all the async IO requests to drain
 2883                  */
 2884                 BO_UNLOCK(bo);
 2885                 mtx_lock(&np->n_mtx);
 2886                 while (np->n_directio_asyncwr > 0) {
 2887                         np->n_flag |= NFSYNCWAIT;
 2888                         error = newnfs_msleep(td, &np->n_directio_asyncwr,
 2889                             &np->n_mtx, slpflag | (PRIBIO + 1), 
 2890                             "nfsfsync", 0);
 2891                         if (error) {
 2892                                 if (newnfs_sigintr(nmp, td)) {
 2893                                         mtx_unlock(&np->n_mtx);
 2894                                         error = EINTR;  
 2895                                         goto done;
 2896                                 }
 2897                         }
 2898                 }
 2899                 mtx_unlock(&np->n_mtx);
 2900         } else
 2901                 BO_UNLOCK(bo);
 2902         mtx_lock(&np->n_mtx);
 2903         if (np->n_flag & NWRITEERR) {
 2904                 error = np->n_error;
 2905                 np->n_flag &= ~NWRITEERR;
 2906         }
 2907         if (commit && bo->bo_dirty.bv_cnt == 0 &&
 2908             bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
 2909                 np->n_flag &= ~NMODIFIED;
 2910         mtx_unlock(&np->n_mtx);
 2911 done:
 2912         if (bvec != NULL && bvec != bvec_on_stack)
 2913                 free(bvec, M_TEMP);
 2914         if (error == 0 && commit != 0 && waitfor == MNT_WAIT &&
 2915             (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 ||
 2916              np->n_directio_asyncwr != 0) && trycnt++ < 5) {
 2917                 /* try, try again... */
 2918                 passone = 1;
 2919                 wcred = NULL;
 2920                 bvec = NULL;
 2921                 bvecsize = 0;
 2922 printf("try%d\n", trycnt);
 2923                 goto again;
 2924         }
 2925         return (error);
 2926 }
 2927 
 2928 /*
 2929  * NFS advisory byte-level locks.
 2930  */
 2931 static int
 2932 nfs_advlock(struct vop_advlock_args *ap)
 2933 {
 2934         struct vnode *vp = ap->a_vp;
 2935         struct ucred *cred;
 2936         struct nfsnode *np = VTONFS(ap->a_vp);
 2937         struct proc *p = (struct proc *)ap->a_id;
 2938         struct thread *td = curthread;  /* XXX */
 2939         struct vattr va;
 2940         int ret, error = EOPNOTSUPP;
 2941         u_quad_t size;
 2942         
 2943         if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) {
 2944                 if ((ap->a_flags & F_POSIX) != 0)
 2945                         cred = p->p_ucred;
 2946                 else
 2947                         cred = td->td_ucred;
 2948                 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2949                 if (vp->v_iflag & VI_DOOMED) {
 2950                         NFSVOPUNLOCK(vp, 0);
 2951                         return (EBADF);
 2952                 }
 2953 
 2954                 /*
 2955                  * If this is unlocking a write locked region, flush and
 2956                  * commit them before unlocking. This is required by
 2957                  * RFC3530 Sec. 9.3.2.
 2958                  */
 2959                 if (ap->a_op == F_UNLCK &&
 2960                     nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id,
 2961                     ap->a_flags))
 2962                         (void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0);
 2963 
 2964                 /*
 2965                  * Loop around doing the lock op, while a blocking lock
 2966                  * must wait for the lock op to succeed.
 2967                  */
 2968                 do {
 2969                         ret = nfsrpc_advlock(vp, np->n_size, ap->a_op,
 2970                             ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags);
 2971                         if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
 2972                             ap->a_op == F_SETLK) {
 2973                                 NFSVOPUNLOCK(vp, 0);
 2974                                 error = nfs_catnap(PZERO | PCATCH, ret,
 2975                                     "ncladvl");
 2976                                 if (error)
 2977                                         return (EINTR);
 2978                                 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2979                                 if (vp->v_iflag & VI_DOOMED) {
 2980                                         NFSVOPUNLOCK(vp, 0);
 2981                                         return (EBADF);
 2982                                 }
 2983                         }
 2984                 } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
 2985                      ap->a_op == F_SETLK);
 2986                 if (ret == NFSERR_DENIED) {
 2987                         NFSVOPUNLOCK(vp, 0);
 2988                         return (EAGAIN);
 2989                 } else if (ret == EINVAL || ret == EBADF || ret == EINTR) {
 2990                         NFSVOPUNLOCK(vp, 0);
 2991                         return (ret);
 2992                 } else if (ret != 0) {
 2993                         NFSVOPUNLOCK(vp, 0);
 2994                         return (EACCES);
 2995                 }
 2996 
 2997                 /*
 2998                  * Now, if we just got a lock, invalidate data in the buffer
 2999                  * cache, as required, so that the coherency conforms with
 3000                  * RFC3530 Sec. 9.3.2.
 3001                  */
 3002                 if (ap->a_op == F_SETLK) {
 3003                         if ((np->n_flag & NMODIFIED) == 0) {
 3004                                 np->n_attrstamp = 0;
 3005                                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 3006                                 ret = VOP_GETATTR(vp, &va, cred);
 3007                         }
 3008                         if ((np->n_flag & NMODIFIED) || ret ||
 3009                             np->n_change != va.va_filerev) {
 3010                                 (void) ncl_vinvalbuf(vp, V_SAVE, td, 1);
 3011                                 np->n_attrstamp = 0;
 3012                                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 3013                                 ret = VOP_GETATTR(vp, &va, cred);
 3014                                 if (!ret) {
 3015                                         np->n_mtime = va.va_mtime;
 3016                                         np->n_change = va.va_filerev;
 3017                                 }
 3018                         }
 3019                 }
 3020                 NFSVOPUNLOCK(vp, 0);
 3021                 return (0);
 3022         } else if (!NFS_ISV4(vp)) {
 3023                 error = NFSVOPLOCK(vp, LK_SHARED);
 3024                 if (error)
 3025                         return (error);
 3026                 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 3027                         size = VTONFS(vp)->n_size;
 3028                         NFSVOPUNLOCK(vp, 0);
 3029                         error = lf_advlock(ap, &(vp->v_lockf), size);
 3030                 } else {
 3031                         if (nfs_advlock_p != NULL)
 3032                                 error = nfs_advlock_p(ap);
 3033                         else {
 3034                                 NFSVOPUNLOCK(vp, 0);
 3035                                 error = ENOLCK;
 3036                         }
 3037                 }
 3038         }
 3039         return (error);
 3040 }
 3041 
 3042 /*
 3043  * NFS advisory byte-level locks.
 3044  */
 3045 static int
 3046 nfs_advlockasync(struct vop_advlockasync_args *ap)
 3047 {
 3048         struct vnode *vp = ap->a_vp;
 3049         u_quad_t size;
 3050         int error;
 3051         
 3052         if (NFS_ISV4(vp))
 3053                 return (EOPNOTSUPP);
 3054         error = NFSVOPLOCK(vp, LK_SHARED);
 3055         if (error)
 3056                 return (error);
 3057         if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 3058                 size = VTONFS(vp)->n_size;
 3059                 NFSVOPUNLOCK(vp, 0);
 3060                 error = lf_advlockasync(ap, &(vp->v_lockf), size);
 3061         } else {
 3062                 NFSVOPUNLOCK(vp, 0);
 3063                 error = EOPNOTSUPP;
 3064         }
 3065         return (error);
 3066 }
 3067 
 3068 /*
 3069  * Print out the contents of an nfsnode.
 3070  */
 3071 static int
 3072 nfs_print(struct vop_print_args *ap)
 3073 {
 3074         struct vnode *vp = ap->a_vp;
 3075         struct nfsnode *np = VTONFS(vp);
 3076 
 3077         ncl_printf("\tfileid %ld fsid 0x%x",
 3078            np->n_vattr.na_fileid, np->n_vattr.na_fsid);
 3079         if (vp->v_type == VFIFO)
 3080                 fifo_printinfo(vp);
 3081         printf("\n");
 3082         return (0);
 3083 }
 3084 
 3085 /*
 3086  * This is the "real" nfs::bwrite(struct buf*).
 3087  * We set B_CACHE if this is a VMIO buffer.
 3088  */
 3089 int
 3090 ncl_writebp(struct buf *bp, int force __unused, struct thread *td)
 3091 {
 3092         int s;
 3093         int oldflags = bp->b_flags;
 3094 #if 0
 3095         int retv = 1;
 3096         off_t off;
 3097 #endif
 3098 
 3099         BUF_ASSERT_HELD(bp);
 3100 
 3101         if (bp->b_flags & B_INVAL) {
 3102                 brelse(bp);
 3103                 return(0);
 3104         }
 3105 
 3106         bp->b_flags |= B_CACHE;
 3107 
 3108         /*
 3109          * Undirty the bp.  We will redirty it later if the I/O fails.
 3110          */
 3111 
 3112         s = splbio();
 3113         bundirty(bp);
 3114         bp->b_flags &= ~B_DONE;
 3115         bp->b_ioflags &= ~BIO_ERROR;
 3116         bp->b_iocmd = BIO_WRITE;
 3117 
 3118         bufobj_wref(bp->b_bufobj);
 3119         curthread->td_ru.ru_oublock++;
 3120         splx(s);
 3121 
 3122         /*
 3123          * Note: to avoid loopback deadlocks, we do not
 3124          * assign b_runningbufspace.
 3125          */
 3126         vfs_busy_pages(bp, 1);
 3127 
 3128         BUF_KERNPROC(bp);
 3129         bp->b_iooffset = dbtob(bp->b_blkno);
 3130         bstrategy(bp);
 3131 
 3132         if( (oldflags & B_ASYNC) == 0) {
 3133                 int rtval = bufwait(bp);
 3134 
 3135                 if (oldflags & B_DELWRI) {
 3136                         s = splbio();
 3137                         reassignbuf(bp);
 3138                         splx(s);
 3139                 }
 3140                 brelse(bp);
 3141                 return (rtval);
 3142         }
 3143 
 3144         return (0);
 3145 }
 3146 
 3147 /*
 3148  * nfs special file access vnode op.
 3149  * Essentially just get vattr and then imitate iaccess() since the device is
 3150  * local to the client.
 3151  */
 3152 static int
 3153 nfsspec_access(struct vop_access_args *ap)
 3154 {
 3155         struct vattr *vap;
 3156         struct ucred *cred = ap->a_cred;
 3157         struct vnode *vp = ap->a_vp;
 3158         accmode_t accmode = ap->a_accmode;
 3159         struct vattr vattr;
 3160         int error;
 3161 
 3162         /*
 3163          * Disallow write attempts on filesystems mounted read-only;
 3164          * unless the file is a socket, fifo, or a block or character
 3165          * device resident on the filesystem.
 3166          */
 3167         if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 3168                 switch (vp->v_type) {
 3169                 case VREG:
 3170                 case VDIR:
 3171                 case VLNK:
 3172                         return (EROFS);
 3173                 default:
 3174                         break;
 3175                 }
 3176         }
 3177         vap = &vattr;
 3178         error = VOP_GETATTR(vp, vap, cred);
 3179         if (error)
 3180                 goto out;
 3181         error  = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
 3182             accmode, cred, NULL);
 3183 out:
 3184         return error;
 3185 }
 3186 
 3187 /*
 3188  * Read wrapper for fifos.
 3189  */
 3190 static int
 3191 nfsfifo_read(struct vop_read_args *ap)
 3192 {
 3193         struct nfsnode *np = VTONFS(ap->a_vp);
 3194         int error;
 3195 
 3196         /*
 3197          * Set access flag.
 3198          */
 3199         mtx_lock(&np->n_mtx);
 3200         np->n_flag |= NACC;
 3201         getnanotime(&np->n_atim);
 3202         mtx_unlock(&np->n_mtx);
 3203         error = fifo_specops.vop_read(ap);
 3204         return error;   
 3205 }
 3206 
 3207 /*
 3208  * Write wrapper for fifos.
 3209  */
 3210 static int
 3211 nfsfifo_write(struct vop_write_args *ap)
 3212 {
 3213         struct nfsnode *np = VTONFS(ap->a_vp);
 3214 
 3215         /*
 3216          * Set update flag.
 3217          */
 3218         mtx_lock(&np->n_mtx);
 3219         np->n_flag |= NUPD;
 3220         getnanotime(&np->n_mtim);
 3221         mtx_unlock(&np->n_mtx);
 3222         return(fifo_specops.vop_write(ap));
 3223 }
 3224 
 3225 /*
 3226  * Close wrapper for fifos.
 3227  *
 3228  * Update the times on the nfsnode then do fifo close.
 3229  */
 3230 static int
 3231 nfsfifo_close(struct vop_close_args *ap)
 3232 {
 3233         struct vnode *vp = ap->a_vp;
 3234         struct nfsnode *np = VTONFS(vp);
 3235         struct vattr vattr;
 3236         struct timespec ts;
 3237 
 3238         mtx_lock(&np->n_mtx);
 3239         if (np->n_flag & (NACC | NUPD)) {
 3240                 getnanotime(&ts);
 3241                 if (np->n_flag & NACC)
 3242                         np->n_atim = ts;
 3243                 if (np->n_flag & NUPD)
 3244                         np->n_mtim = ts;
 3245                 np->n_flag |= NCHG;
 3246                 if (vrefcnt(vp) == 1 &&
 3247                     (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 3248                         VATTR_NULL(&vattr);
 3249                         if (np->n_flag & NACC)
 3250                                 vattr.va_atime = np->n_atim;
 3251                         if (np->n_flag & NUPD)
 3252                                 vattr.va_mtime = np->n_mtim;
 3253                         mtx_unlock(&np->n_mtx);
 3254                         (void)VOP_SETATTR(vp, &vattr, ap->a_cred);
 3255                         goto out;
 3256                 }
 3257         }
 3258         mtx_unlock(&np->n_mtx);
 3259 out:
 3260         return (fifo_specops.vop_close(ap));
 3261 }
 3262 
 3263 /*
 3264  * Just call ncl_writebp() with the force argument set to 1.
 3265  *
 3266  * NOTE: B_DONE may or may not be set in a_bp on call.
 3267  */
 3268 static int
 3269 nfs_bwrite(struct buf *bp)
 3270 {
 3271 
 3272         return (ncl_writebp(bp, 1, curthread));
 3273 }
 3274 
 3275 struct buf_ops buf_ops_newnfs = {
 3276         .bop_name       =       "buf_ops_nfs",
 3277         .bop_write      =       nfs_bwrite,
 3278         .bop_strategy   =       bufstrategy,
 3279         .bop_sync       =       bufsync,
 3280         .bop_bdflush    =       bufbdflush,
 3281 };
 3282 
 3283 /*
 3284  * Cloned from vop_stdlock(), and then the ugly hack added.
 3285  */
 3286 static int
 3287 nfs_lock1(struct vop_lock1_args *ap)
 3288 {
 3289         struct vnode *vp = ap->a_vp;
 3290         int error = 0;
 3291 
 3292         /*
 3293          * Since vfs_hash_get() calls vget() and it will no longer work
 3294          * for FreeBSD8 with flags == 0, I can only think of this horrible
 3295          * hack to work around it. I call vfs_hash_get() with LK_EXCLOTHER
 3296          * and then handle it here. All I want for this case is a v_usecount
 3297          * on the vnode to use for recovery, while another thread might
 3298          * hold a lock on the vnode. I have the other threads blocked, so
 3299          * there isn't any race problem.
 3300          */
 3301         if ((ap->a_flags & LK_TYPE_MASK) == LK_EXCLOTHER) {
 3302                 if ((ap->a_flags & LK_INTERLOCK) == 0)
 3303                         panic("ncllock1");
 3304                 if ((vp->v_iflag & VI_DOOMED))
 3305                         error = ENOENT;
 3306                 VI_UNLOCK(vp);
 3307                 return (error);
 3308         }
 3309         return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
 3310             LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file,
 3311             ap->a_line));
 3312 }
 3313 
 3314 static int
 3315 nfs_getacl(struct vop_getacl_args *ap)
 3316 {
 3317         int error;
 3318 
 3319         if (ap->a_type != ACL_TYPE_NFS4)
 3320                 return (EOPNOTSUPP);
 3321         error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp,
 3322             NULL);
 3323         if (error > NFSERR_STALE) {
 3324                 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
 3325                 error = EPERM;
 3326         }
 3327         return (error);
 3328 }
 3329 
 3330 static int
 3331 nfs_setacl(struct vop_setacl_args *ap)
 3332 {
 3333         int error;
 3334 
 3335         if (ap->a_type != ACL_TYPE_NFS4)
 3336                 return (EOPNOTSUPP);
 3337         error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp,
 3338             NULL);
 3339         if (error > NFSERR_STALE) {
 3340                 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
 3341                 error = EPERM;
 3342         }
 3343         return (error);
 3344 }
 3345 
 3346 /*
 3347  * Return POSIX pathconf information applicable to nfs filesystems.
 3348  */
 3349 static int
 3350 nfs_pathconf(struct vop_pathconf_args *ap)
 3351 {
 3352         struct nfsv3_pathconf pc;
 3353         struct nfsvattr nfsva;
 3354         struct vnode *vp = ap->a_vp;
 3355         struct thread *td = curthread;
 3356         int attrflag, error;
 3357 
 3358         if (NFS_ISV4(vp) || (NFS_ISV3(vp) && (ap->a_name == _PC_LINK_MAX ||
 3359             ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED ||
 3360             ap->a_name == _PC_NO_TRUNC))) {
 3361                 /*
 3362                  * Since only the above 4 a_names are returned by the NFSv3
 3363                  * Pathconf RPC, there is no point in doing it for others.
 3364                  */
 3365                 error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva,
 3366                     &attrflag, NULL);
 3367                 if (attrflag != 0)
 3368                         (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
 3369                             1);
 3370                 if (error != 0)
 3371                         return (error);
 3372         } else {
 3373                 /*
 3374                  * For NFSv2 (or NFSv3 when not one of the above 4 a_names),
 3375                  * just fake them.
 3376                  */
 3377                 pc.pc_linkmax = LINK_MAX;
 3378                 pc.pc_namemax = NFS_MAXNAMLEN;
 3379                 pc.pc_notrunc = 1;
 3380                 pc.pc_chownrestricted = 1;
 3381                 pc.pc_caseinsensitive = 0;
 3382                 pc.pc_casepreserving = 1;
 3383                 error = 0;
 3384         }
 3385         switch (ap->a_name) {
 3386         case _PC_LINK_MAX:
 3387                 *ap->a_retval = pc.pc_linkmax;
 3388                 break;
 3389         case _PC_NAME_MAX:
 3390                 *ap->a_retval = pc.pc_namemax;
 3391                 break;
 3392         case _PC_PATH_MAX:
 3393                 *ap->a_retval = PATH_MAX;
 3394                 break;
 3395         case _PC_PIPE_BUF:
 3396                 *ap->a_retval = PIPE_BUF;
 3397                 break;
 3398         case _PC_CHOWN_RESTRICTED:
 3399                 *ap->a_retval = pc.pc_chownrestricted;
 3400                 break;
 3401         case _PC_NO_TRUNC:
 3402                 *ap->a_retval = pc.pc_notrunc;
 3403                 break;
 3404         case _PC_ACL_EXTENDED:
 3405                 *ap->a_retval = 0;
 3406                 break;
 3407         case _PC_ACL_NFS4:
 3408                 if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 &&
 3409                     NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL))
 3410                         *ap->a_retval = 1;
 3411                 else
 3412                         *ap->a_retval = 0;
 3413                 break;
 3414         case _PC_ACL_PATH_MAX:
 3415                 if (NFS_ISV4(vp))
 3416                         *ap->a_retval = ACL_MAX_ENTRIES;
 3417                 else
 3418                         *ap->a_retval = 3;
 3419                 break;
 3420         case _PC_MAC_PRESENT:
 3421                 *ap->a_retval = 0;
 3422                 break;
 3423         case _PC_ASYNC_IO:
 3424                 /* _PC_ASYNC_IO should have been handled by upper layers. */
 3425                 KASSERT(0, ("_PC_ASYNC_IO should not get here"));
 3426                 error = EINVAL;
 3427                 break;
 3428         case _PC_PRIO_IO:
 3429                 *ap->a_retval = 0;
 3430                 break;
 3431         case _PC_SYNC_IO:
 3432                 *ap->a_retval = 0;
 3433                 break;
 3434         case _PC_ALLOC_SIZE_MIN:
 3435                 *ap->a_retval = vp->v_mount->mnt_stat.f_bsize;
 3436                 break;
 3437         case _PC_FILESIZEBITS:
 3438                 if (NFS_ISV34(vp))
 3439                         *ap->a_retval = 64;
 3440                 else
 3441                         *ap->a_retval = 32;
 3442                 break;
 3443         case _PC_REC_INCR_XFER_SIZE:
 3444                 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
 3445                 break;
 3446         case _PC_REC_MAX_XFER_SIZE:
 3447                 *ap->a_retval = -1; /* means ``unlimited'' */
 3448                 break;
 3449         case _PC_REC_MIN_XFER_SIZE:
 3450                 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
 3451                 break;
 3452         case _PC_REC_XFER_ALIGN:
 3453                 *ap->a_retval = PAGE_SIZE;
 3454                 break;
 3455         case _PC_SYMLINK_MAX:
 3456                 *ap->a_retval = NFS_MAXPATHLEN;
 3457                 break;
 3458 
 3459         default:
 3460                 error = EINVAL;
 3461                 break;
 3462         }
 3463         return (error);
 3464 }
 3465
Cache object: 79a67fa6c184aa316305b9f5161af980
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/fs/nfsclient/nfs_clvnops.c

FreeBSD/Linux Kernel Cross Reference
sys/fs/nfsclient/nfs_clvnops.c