nfs_vnops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD$");
   37 
   38 /*
   39  * vnode op calls for Sun NFS version 2 and 3
   40  */
   41 
   42 #include "opt_inet.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/kernel.h>
   46 #include <sys/systm.h>
   47 #include <sys/resourcevar.h>
   48 #include <sys/proc.h>
   49 #include <sys/mount.h>
   50 #include <sys/bio.h>
   51 #include <sys/buf.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mbuf.h>
   54 #include <sys/namei.h>
   55 #include <sys/socket.h>
   56 #include <sys/vnode.h>
   57 #include <sys/dirent.h>
   58 #include <sys/fcntl.h>
   59 #include <sys/lockf.h>
   60 #include <sys/stat.h>
   61 #include <sys/sysctl.h>
   62 #include <sys/signalvar.h>
   63 
   64 #include <vm/vm.h>
   65 #include <vm/vm_object.h>
   66 #include <vm/vm_extern.h>
   67 #include <vm/vm_object.h>
   68 
   69 #include <fs/fifofs/fifo.h>
   70 
   71 #include <rpc/rpcclnt.h>
   72 
   73 #include <nfs/rpcv2.h>
   74 #include <nfs/nfsproto.h>
   75 #include <nfsclient/nfs.h>
   76 #include <nfsclient/nfsnode.h>
   77 #include <nfsclient/nfsmount.h>
   78 #include <nfsclient/nfs_lock.h>
   79 #include <nfs/xdr_subs.h>
   80 #include <nfsclient/nfsm_subs.h>
   81 
   82 #include <net/if.h>
   83 #include <netinet/in.h>
   84 #include <netinet/in_var.h>
   85 
   86 /* Defs */
   87 #define TRUE    1
   88 #define FALSE   0
   89 
   90 /*
   91  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
   92  * calls are not in getblk() and brelse() so that they would not be necessary
   93  * here.
   94  */
   95 #ifndef B_VMIO
   96 #define vfs_busy_pages(bp, f)
   97 #endif
   98 
   99 static vop_read_t       nfsfifo_read;
  100 static vop_write_t      nfsfifo_write;
  101 static vop_close_t      nfsfifo_close;
  102 static int      nfs_flush(struct vnode *, int, struct thread *,
  103                     int);
  104 static int      nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
  105                     struct thread *);
  106 static vop_lookup_t     nfs_lookup;
  107 static vop_create_t     nfs_create;
  108 static vop_mknod_t      nfs_mknod;
  109 static vop_open_t       nfs_open;
  110 static vop_close_t      nfs_close;
  111 static vop_access_t     nfs_access;
  112 static vop_getattr_t    nfs_getattr;
  113 static vop_setattr_t    nfs_setattr;
  114 static vop_read_t       nfs_read;
  115 static vop_fsync_t      nfs_fsync;
  116 static vop_remove_t     nfs_remove;
  117 static vop_link_t       nfs_link;
  118 static vop_rename_t     nfs_rename;
  119 static vop_mkdir_t      nfs_mkdir;
  120 static vop_rmdir_t      nfs_rmdir;
  121 static vop_symlink_t    nfs_symlink;
  122 static vop_readdir_t    nfs_readdir;
  123 static vop_strategy_t   nfs_strategy;
  124 static  int     nfs_lookitup(struct vnode *, const char *, int,
  125                     struct ucred *, struct thread *, struct nfsnode **);
  126 static  int     nfs_sillyrename(struct vnode *, struct vnode *,
  127                     struct componentname *);
  128 static vop_access_t     nfsspec_access;
  129 static vop_readlink_t   nfs_readlink;
  130 static vop_print_t      nfs_print;
  131 static vop_advlock_t    nfs_advlock;
  132 static vop_advlockasync_t nfs_advlockasync;
  133 
  134 /*
  135  * Global vfs data structures for nfs
  136  */
  137 struct vop_vector nfs_vnodeops = {
  138         .vop_default =          &default_vnodeops,
  139         .vop_access =           nfs_access,
  140         .vop_advlock =          nfs_advlock,
  141         .vop_advlockasync =     nfs_advlockasync,
  142         .vop_close =            nfs_close,
  143         .vop_create =           nfs_create,
  144         .vop_fsync =            nfs_fsync,
  145         .vop_getattr =          nfs_getattr,
  146         .vop_getpages =         nfs_getpages,
  147         .vop_putpages =         nfs_putpages,
  148         .vop_inactive =         nfs_inactive,
  149         .vop_lease =            VOP_NULL,
  150         .vop_link =             nfs_link,
  151         .vop_lookup =           nfs_lookup,
  152         .vop_mkdir =            nfs_mkdir,
  153         .vop_mknod =            nfs_mknod,
  154         .vop_open =             nfs_open,
  155         .vop_print =            nfs_print,
  156         .vop_read =             nfs_read,
  157         .vop_readdir =          nfs_readdir,
  158         .vop_readlink =         nfs_readlink,
  159         .vop_reclaim =          nfs_reclaim,
  160         .vop_remove =           nfs_remove,
  161         .vop_rename =           nfs_rename,
  162         .vop_rmdir =            nfs_rmdir,
  163         .vop_setattr =          nfs_setattr,
  164         .vop_strategy =         nfs_strategy,
  165         .vop_symlink =          nfs_symlink,
  166         .vop_write =            nfs_write,
  167 };
  168 
  169 struct vop_vector nfs_fifoops = {
  170         .vop_default =          &fifo_specops,
  171         .vop_access =           nfsspec_access,
  172         .vop_close =            nfsfifo_close,
  173         .vop_fsync =            nfs_fsync,
  174         .vop_getattr =          nfs_getattr,
  175         .vop_inactive =         nfs_inactive,
  176         .vop_print =            nfs_print,
  177         .vop_read =             nfsfifo_read,
  178         .vop_reclaim =          nfs_reclaim,
  179         .vop_setattr =          nfs_setattr,
  180         .vop_write =            nfsfifo_write,
  181 };
  182 
  183 static int      nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
  184                              struct componentname *cnp, struct vattr *vap);
  185 static int      nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
  186                               struct ucred *cred, struct thread *td);
  187 static int      nfs_renamerpc(struct vnode *fdvp, const char *fnameptr,
  188                               int fnamelen, struct vnode *tdvp,
  189                               const char *tnameptr, int tnamelen,
  190                               struct ucred *cred, struct thread *td);
  191 static int      nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
  192                              struct sillyrename *sp);
  193 
  194 /*
  195  * Global variables
  196  */
  197 struct mtx      nfs_iod_mtx;
  198 enum nfsiod_state nfs_iodwant[NFS_MAXASYNCDAEMON];
  199 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
  200 int              nfs_numasync = 0;
  201 vop_advlock_t   *nfs_advlock_p = nfs_dolock;
  202 vop_reclaim_t   *nfs_reclaim_p = NULL;
  203 #define DIRHDSIZ        (sizeof (struct dirent) - (MAXNAMLEN + 1))
  204 
  205 SYSCTL_DECL(_vfs_nfs);
  206 
  207 static int      nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
  208 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
  209            &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
  210 
  211 static int      nfs_prime_access_cache = 1;
  212 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
  213            &nfs_prime_access_cache, 0,
  214            "Prime NFS ACCESS cache when fetching attributes");
  215 
  216 static int      nfsv3_commit_on_close = 0;
  217 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
  218            &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
  219 
  220 static int      nfs_clean_pages_on_close = 1;
  221 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
  222            &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
  223 
  224 int nfs_directio_enable = 0;
  225 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
  226            &nfs_directio_enable, 0, "Enable NFS directio");
  227 
  228 static u_int    nametimeo = NFS_DEFAULT_NAMETIMEO;
  229 SYSCTL_UINT(_vfs_nfs, OID_AUTO, name_timeout, CTLFLAG_RW,
  230            &nametimeo, 0, "Positive name cache entry timeout");
  231 
  232 static u_int    negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
  233 SYSCTL_UINT(_vfs_nfs, OID_AUTO, negative_name_timeout, CTLFLAG_RW,
  234            &negnametimeo, 0, "Negative name cache entry timeout");
  235 
  236 /*
  237  * This sysctl allows other processes to mmap a file that has been opened
  238  * O_DIRECT by a process.  In general, having processes mmap the file while
  239  * Direct IO is in progress can lead to Data Inconsistencies.  But, we allow
  240  * this by default to prevent DoS attacks - to prevent a malicious user from
  241  * opening up files O_DIRECT preventing other users from mmap'ing these
  242  * files.  "Protected" environments where stricter consistency guarantees are
  243  * required can disable this knob.  The process that opened the file O_DIRECT
  244  * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
  245  * meaningful.
  246  */
  247 int nfs_directio_allow_mmap = 1;
  248 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
  249            &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
  250 
  251 #if 0
  252 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
  253            &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
  254 
  255 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
  256            &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
  257 #endif
  258 
  259 #define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY          \
  260                          | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE     \
  261                          | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
  262 
  263 /*
  264  * SMP Locking Note :
  265  * The list of locks after the description of the lock is the ordering
  266  * of other locks acquired with the lock held.
  267  * np->n_mtx : Protects the fields in the nfsnode.
  268        VM Object Lock
  269        VI_MTX (acquired indirectly)
  270  * nmp->nm_mtx : Protects the fields in the nfsmount.
  271        rep->r_mtx
  272  * nfs_iod_mtx : Global lock, protects shared nfsiod state.
  273  * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
  274        nmp->nm_mtx
  275        rep->r_mtx
  276  * rep->r_mtx : Protects the fields in an nfsreq.
  277  */
  278 
  279 static int
  280 nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
  281     struct ucred *cred)
  282 {
  283         const int v3 = 1;
  284         u_int32_t *tl;
  285         int error = 0, attrflag;
  286 
  287         struct mbuf *mreq, *mrep, *md, *mb;
  288         caddr_t bpos, dpos;
  289         u_int32_t rmode;
  290         struct nfsnode *np = VTONFS(vp);
  291 
  292         nfsstats.rpccnt[NFSPROC_ACCESS]++;
  293         mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
  294         mb = mreq;
  295         bpos = mtod(mb, caddr_t);
  296         nfsm_fhtom(vp, v3);
  297         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  298         *tl = txdr_unsigned(wmode);
  299         nfsm_request(vp, NFSPROC_ACCESS, td, cred);
  300         nfsm_postop_attr(vp, attrflag);
  301         if (!error) {
  302                 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
  303                 rmode = fxdr_unsigned(u_int32_t, *tl);
  304                 mtx_lock(&np->n_mtx);
  305                 np->n_mode = rmode;
  306                 np->n_modeuid = cred->cr_uid;
  307                 np->n_modestamp = time_second;
  308                 mtx_unlock(&np->n_mtx);
  309         }
  310         m_freem(mrep);
  311 nfsmout:
  312         return (error);
  313 }
  314 
  315 /*
  316  * nfs access vnode op.
  317  * For nfs version 2, just return ok. File accesses may fail later.
  318  * For nfs version 3, use the access rpc to check accessibility. If file modes
  319  * are changed on the server, accesses might still fail later.
  320  */
  321 static int
  322 nfs_access(struct vop_access_args *ap)
  323 {
  324         struct vnode *vp = ap->a_vp;
  325         int error = 0;
  326         u_int32_t mode, wmode;
  327         int v3 = NFS_ISV3(vp);
  328         struct nfsnode *np = VTONFS(vp);
  329 
  330         /*
  331          * Disallow write attempts on filesystems mounted read-only;
  332          * unless the file is a socket, fifo, or a block or character
  333          * device resident on the filesystem.
  334          */
  335         if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
  336                 switch (vp->v_type) {
  337                 case VREG:
  338                 case VDIR:
  339                 case VLNK:
  340                         return (EROFS);
  341                 default:
  342                         break;
  343                 }
  344         }
  345         /*
  346          * For nfs v3, check to see if we have done this recently, and if
  347          * so return our cached result instead of making an ACCESS call.
  348          * If not, do an access rpc, otherwise you are stuck emulating
  349          * ufs_access() locally using the vattr. This may not be correct,
  350          * since the server may apply other access criteria such as
  351          * client uid-->server uid mapping that we do not know about.
  352          */
  353         if (v3) {
  354                 if (ap->a_mode & VREAD)
  355                         mode = NFSV3ACCESS_READ;
  356                 else
  357                         mode = 0;
  358                 if (vp->v_type != VDIR) {
  359                         if (ap->a_mode & VWRITE)
  360                                 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
  361                         if (ap->a_mode & VEXEC)
  362                                 mode |= NFSV3ACCESS_EXECUTE;
  363                 } else {
  364                         if (ap->a_mode & VWRITE)
  365                                 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
  366                                          NFSV3ACCESS_DELETE);
  367                         if (ap->a_mode & VEXEC)
  368                                 mode |= NFSV3ACCESS_LOOKUP;
  369                 }
  370                 /* XXX safety belt, only make blanket request if caching */
  371                 if (nfsaccess_cache_timeout > 0) {
  372                         wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
  373                                 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
  374                                 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
  375                 } else {
  376                         wmode = mode;
  377                 }
  378 
  379                 /*
  380                  * Does our cached result allow us to give a definite yes to
  381                  * this request?
  382                  */
  383                 mtx_lock(&np->n_mtx);
  384                 if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
  385                     (ap->a_cred->cr_uid == np->n_modeuid) &&
  386                     ((np->n_mode & mode) == mode)) {
  387                         nfsstats.accesscache_hits++;
  388                 } else {
  389                         /*
  390                          * Either a no, or a don't know.  Go to the wire.
  391                          */
  392                         nfsstats.accesscache_misses++;
  393                         mtx_unlock(&np->n_mtx);
  394                         error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred);
  395                         mtx_lock(&np->n_mtx);
  396                         if (!error) {
  397                                 if ((np->n_mode & mode) != mode) {
  398                                         error = EACCES;
  399                                 }
  400                         }
  401                 }
  402                 mtx_unlock(&np->n_mtx);
  403                 return (error);
  404         } else {
  405                 if ((error = nfsspec_access(ap)) != 0) {
  406                         return (error);
  407                 }
  408                 /*
  409                  * Attempt to prevent a mapped root from accessing a file
  410                  * which it shouldn't.  We try to read a byte from the file
  411                  * if the user is root and the file is not zero length.
  412                  * After calling nfsspec_access, we should have the correct
  413                  * file size cached.
  414                  */
  415                 mtx_lock(&np->n_mtx);
  416                 if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
  417                     && VTONFS(vp)->n_size > 0) {
  418                         struct iovec aiov;
  419                         struct uio auio;
  420                         char buf[1];
  421 
  422                         mtx_unlock(&np->n_mtx);
  423                         aiov.iov_base = buf;
  424                         aiov.iov_len = 1;
  425                         auio.uio_iov = &aiov;
  426                         auio.uio_iovcnt = 1;
  427                         auio.uio_offset = 0;
  428                         auio.uio_resid = 1;
  429                         auio.uio_segflg = UIO_SYSSPACE;
  430                         auio.uio_rw = UIO_READ;
  431                         auio.uio_td = ap->a_td;
  432 
  433                         if (vp->v_type == VREG)
  434                                 error = nfs_readrpc(vp, &auio, ap->a_cred);
  435                         else if (vp->v_type == VDIR) {
  436                                 char* bp;
  437                                 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
  438                                 aiov.iov_base = bp;
  439                                 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
  440                                 error = nfs_readdirrpc(vp, &auio, ap->a_cred);
  441                                 free(bp, M_TEMP);
  442                         } else if (vp->v_type == VLNK)
  443                                 error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
  444                         else
  445                                 error = EACCES;
  446                 } else
  447                         mtx_unlock(&np->n_mtx);
  448                 return (error);
  449         }
  450 }
  451 
  452 int nfs_otw_getattr_avoid = 0;
  453 
  454 /*
  455  * nfs open vnode op
  456  * Check to see if the type is ok
  457  * and that deletion is not in progress.
  458  * For paged in text files, you will need to flush the page cache
  459  * if consistency is lost.
  460  */
  461 /* ARGSUSED */
  462 static int
  463 nfs_open(struct vop_open_args *ap)
  464 {
  465         struct vnode *vp = ap->a_vp;
  466         struct nfsnode *np = VTONFS(vp);
  467         struct vattr vattr;
  468         int error;
  469         int fmode = ap->a_mode;
  470 
  471         if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
  472                 return (EOPNOTSUPP);
  473 
  474         /*
  475          * Get a valid lease. If cached data is stale, flush it.
  476          */
  477         mtx_lock(&np->n_mtx);
  478         if (np->n_flag & NMODIFIED) {
  479                 mtx_unlock(&np->n_mtx);                 
  480                 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  481                 if (error == EINTR || error == EIO)
  482                         return (error);
  483                 mtx_lock(&np->n_mtx);
  484                 np->n_attrstamp = 0;
  485                 if (vp->v_type == VDIR)
  486                         np->n_direofoffset = 0;
  487                 mtx_unlock(&np->n_mtx);
  488                 error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
  489                 if (error)
  490                         return (error);
  491                 mtx_lock(&np->n_mtx);
  492                 np->n_mtime = vattr.va_mtime;
  493                 mtx_unlock(&np->n_mtx);
  494         } else {
  495                 mtx_unlock(&np->n_mtx);                                         
  496                 error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_td);
  497                 if (error)
  498                         return (error);
  499                 mtx_lock(&np->n_mtx);
  500                 if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
  501                         if (vp->v_type == VDIR)
  502                                 np->n_direofoffset = 0;
  503                         mtx_unlock(&np->n_mtx);
  504                         error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  505                         if (error == EINTR || error == EIO) {
  506                                 return (error);
  507                         }
  508                         mtx_lock(&np->n_mtx);
  509                         np->n_mtime = vattr.va_mtime;
  510                 }
  511                 mtx_unlock(&np->n_mtx);
  512         }
  513         /*
  514          * If the object has >= 1 O_DIRECT active opens, we disable caching.
  515          */
  516         if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
  517                 if (np->n_directio_opens == 0) {
  518                         error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  519                         if (error)
  520                                 return (error);
  521                         mtx_lock(&np->n_mtx);
  522                         np->n_flag |= NNONCACHE;
  523                         mtx_unlock(&np->n_mtx);
  524                 }
  525                 np->n_directio_opens++;
  526         }
  527         vnode_create_vobject(vp, vattr.va_size, ap->a_td);
  528         return (0);
  529 }
  530 
  531 /*
  532  * nfs close vnode op
  533  * What an NFS client should do upon close after writing is a debatable issue.
  534  * Most NFS clients push delayed writes to the server upon close, basically for
  535  * two reasons:
  536  * 1 - So that any write errors may be reported back to the client process
  537  *     doing the close system call. By far the two most likely errors are
  538  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  539  * 2 - To put a worst case upper bound on cache inconsistency between
  540  *     multiple clients for the file.
  541  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  542  * not being able to tell if other clients are writing a file concurrently,
  543  * since there is no way of knowing if the changed modify time in the reply
  544  * is only due to the write for this client.
  545  * (NFS Version 3 provides weak cache consistency data in the reply that
  546  *  should be sufficient to detect and handle this case.)
  547  *
  548  * The current code does the following:
  549  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  550  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  551  *                     or commit them (this satisfies 1 and 2 except for the
  552  *                     case where the server crashes after this close but
  553  *                     before the commit RPC, which is felt to be "good
  554  *                     enough". Changing the last argument to nfs_flush() to
  555  *                     a 1 would force a commit operation, if it is felt a
  556  *                     commit is necessary now.
  557  */
  558 /* ARGSUSED */
  559 static int
  560 nfs_close(struct vop_close_args *ap)
  561 {
  562         struct vnode *vp = ap->a_vp;
  563         struct nfsnode *np = VTONFS(vp);
  564         int error = 0;
  565         int fmode = ap->a_fflag;
  566 
  567         if (vp->v_type == VREG) {
  568             /*
  569              * Examine and clean dirty pages, regardless of NMODIFIED.
  570              * This closes a major hole in close-to-open consistency.
  571              * We want to push out all dirty pages (and buffers) on
  572              * close, regardless of whether they were dirtied by
  573              * mmap'ed writes or via write().
  574              */
  575             if (nfs_clean_pages_on_close && vp->v_object) {
  576                 VM_OBJECT_LOCK(vp->v_object);
  577                 vm_object_page_clean(vp->v_object, 0, 0, 0);
  578                 VM_OBJECT_UNLOCK(vp->v_object);
  579             }
  580             mtx_lock(&np->n_mtx);
  581             if (np->n_flag & NMODIFIED) {
  582                 mtx_unlock(&np->n_mtx);
  583                 if (NFS_ISV3(vp)) {
  584                     /*
  585                      * Under NFSv3 we have dirty buffers to dispose of.  We
  586                      * must flush them to the NFS server.  We have the option
  587                      * of waiting all the way through the commit rpc or just
  588                      * waiting for the initial write.  The default is to only
  589                      * wait through the initial write so the data is in the
  590                      * server's cache, which is roughly similar to the state
  591                      * a standard disk subsystem leaves the file in on close().
  592                      *
  593                      * We cannot clear the NMODIFIED bit in np->n_flag due to
  594                      * potential races with other processes, and certainly
  595                      * cannot clear it if we don't commit.
  596                      */
  597                     int cm = nfsv3_commit_on_close ? 1 : 0;
  598                     error = nfs_flush(vp, MNT_WAIT, ap->a_td, cm);
  599                     /* np->n_flag &= ~NMODIFIED; */
  600                 } else
  601                     error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  602                 mtx_lock(&np->n_mtx);
  603             }
  604             if (np->n_flag & NWRITEERR) {
  605                 np->n_flag &= ~NWRITEERR;
  606                 error = np->n_error;
  607             }
  608             mtx_unlock(&np->n_mtx);
  609         }
  610         if (nfs_directio_enable)
  611                 KASSERT((np->n_directio_asyncwr == 0),
  612                         ("nfs_close: dirty unflushed (%d) directio buffers\n",
  613                          np->n_directio_asyncwr));
  614         if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
  615                 mtx_lock(&np->n_mtx);
  616                 KASSERT((np->n_directio_opens > 0), 
  617                         ("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
  618                 np->n_directio_opens--;
  619                 if (np->n_directio_opens == 0)
  620                         np->n_flag &= ~NNONCACHE;
  621                 mtx_unlock(&np->n_mtx);
  622         }
  623         return (error);
  624 }
  625 
  626 /*
  627  * nfs getattr call from vfs.
  628  */
  629 static int
  630 nfs_getattr(struct vop_getattr_args *ap)
  631 {
  632         struct vnode *vp = ap->a_vp;
  633         struct nfsnode *np = VTONFS(vp);
  634         struct vattr *vap = ap->a_vap;
  635         struct vattr vattr;
  636         caddr_t bpos, dpos;
  637         int error = 0;
  638         struct mbuf *mreq, *mrep, *md, *mb;
  639         int v3 = NFS_ISV3(vp);
  640 
  641         /*
  642          * Update local times for special files.
  643          */
  644         mtx_lock(&np->n_mtx);
  645         if (np->n_flag & (NACC | NUPD))
  646                 np->n_flag |= NCHG;
  647         mtx_unlock(&np->n_mtx);
  648         /*
  649          * First look in the cache.
  650          */
  651         if (nfs_getattrcache(vp, &vattr) == 0)
  652                 goto nfsmout;
  653         if (v3 && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) {
  654                 nfsstats.accesscache_misses++;
  655                 nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_td, ap->a_cred);
  656                 if (nfs_getattrcache(vp, &vattr) == 0)
  657                         goto nfsmout;
  658         }
  659         nfsstats.rpccnt[NFSPROC_GETATTR]++;
  660         mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
  661         mb = mreq;
  662         bpos = mtod(mb, caddr_t);
  663         nfsm_fhtom(vp, v3);
  664         nfsm_request(vp, NFSPROC_GETATTR, ap->a_td, ap->a_cred);
  665         if (!error) {
  666                 nfsm_loadattr(vp, &vattr);
  667         }
  668         m_freem(mrep);
  669 nfsmout:
  670         vap->va_type = vattr.va_type;
  671         vap->va_mode = vattr.va_mode;
  672         vap->va_nlink = vattr.va_nlink;
  673         vap->va_uid = vattr.va_uid;
  674         vap->va_gid = vattr.va_gid;
  675         vap->va_fsid = vattr.va_fsid;
  676         vap->va_fileid = vattr.va_fileid;
  677         vap->va_size = vattr.va_size;
  678         vap->va_blocksize = vattr.va_blocksize;
  679         vap->va_atime = vattr.va_atime;
  680         vap->va_mtime = vattr.va_mtime;
  681         vap->va_ctime = vattr.va_ctime;
  682         vap->va_gen = vattr.va_gen;
  683         vap->va_flags = vattr.va_flags;
  684         vap->va_rdev = vattr.va_rdev;
  685         vap->va_bytes = vattr.va_bytes;
  686         vap->va_filerev = vattr.va_filerev;
  687 
  688         return (error);
  689 }
  690 
  691 /*
  692  * nfs setattr call.
  693  */
  694 static int
  695 nfs_setattr(struct vop_setattr_args *ap)
  696 {
  697         struct vnode *vp = ap->a_vp;
  698         struct nfsnode *np = VTONFS(vp);
  699         struct vattr *vap = ap->a_vap;
  700         int error = 0;
  701         u_quad_t tsize;
  702 
  703 #ifndef nolint
  704         tsize = (u_quad_t)0;
  705 #endif
  706 
  707         /*
  708          * Setting of flags and marking of atimes are not supported.
  709          */
  710         if (vap->va_flags != VNOVAL || (vap->va_vaflags & VA_MARK_ATIME))
  711                 return (EOPNOTSUPP);
  712 
  713         /*
  714          * Disallow write attempts if the filesystem is mounted read-only.
  715          */
  716         if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
  717             vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
  718             vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
  719             (vp->v_mount->mnt_flag & MNT_RDONLY)) {
  720                 error = EROFS;
  721                 goto out;
  722         }
  723         if (vap->va_size != VNOVAL) {
  724                 switch (vp->v_type) {
  725                 case VDIR:
  726                         return (EISDIR);
  727                 case VCHR:
  728                 case VBLK:
  729                 case VSOCK:
  730                 case VFIFO:
  731                         if (vap->va_mtime.tv_sec == VNOVAL &&
  732                             vap->va_atime.tv_sec == VNOVAL &&
  733                             vap->va_mode == (mode_t)VNOVAL &&
  734                             vap->va_uid == (uid_t)VNOVAL &&
  735                             vap->va_gid == (gid_t)VNOVAL)
  736                                 return (0);             
  737                         vap->va_size = VNOVAL;
  738                         break;
  739                 default:
  740                         /*
  741                          * Disallow write attempts if the filesystem is
  742                          * mounted read-only.
  743                          */
  744                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
  745                                 return (EROFS);
  746                         /*
  747                          *  We run vnode_pager_setsize() early (why?),
  748                          * we must set np->n_size now to avoid vinvalbuf
  749                          * V_SAVE races that might setsize a lower
  750                          * value.
  751                          */
  752                         mtx_lock(&np->n_mtx);
  753                         tsize = np->n_size;
  754                         mtx_unlock(&np->n_mtx);
  755                         error = nfs_meta_setsize(vp, ap->a_cred, 
  756                                                  ap->a_td, vap->va_size);
  757                         mtx_lock(&np->n_mtx);
  758                         if (np->n_flag & NMODIFIED) {
  759                             tsize = np->n_size;
  760                             mtx_unlock(&np->n_mtx);
  761                             if (vap->va_size == 0)
  762                                 error = nfs_vinvalbuf(vp, 0, ap->a_td, 1);
  763                             else
  764                                 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  765                             if (error) {
  766                                 vnode_pager_setsize(vp, tsize);
  767                                 goto out;
  768                             }
  769                         } else
  770                             mtx_unlock(&np->n_mtx);
  771                         /*
  772                          * np->n_size has already been set to vap->va_size
  773                          * in nfs_meta_setsize(). We must set it again since
  774                          * nfs_loadattrcache() could be called through
  775                          * nfs_meta_setsize() and could modify np->n_size.
  776                          */
  777                         mtx_lock(&np->n_mtx);
  778                         np->n_vattr.va_size = np->n_size = vap->va_size;
  779                         mtx_unlock(&np->n_mtx);
  780                 };
  781         } else {
  782                 mtx_lock(&np->n_mtx);
  783                 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 
  784                     (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
  785                         mtx_unlock(&np->n_mtx);
  786                         if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1)) != 0 &&
  787                             (error == EINTR || error == EIO))
  788                                 return error;
  789                 } else
  790                         mtx_unlock(&np->n_mtx);
  791         }
  792         error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_td);
  793         if (error && vap->va_size != VNOVAL) {
  794                 mtx_lock(&np->n_mtx);
  795                 np->n_size = np->n_vattr.va_size = tsize;
  796                 vnode_pager_setsize(vp, tsize);
  797                 mtx_unlock(&np->n_mtx);
  798         }
  799 out:
  800         return (error);
  801 }
  802 
  803 /*
  804  * Do an nfs setattr rpc.
  805  */
  806 static int
  807 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
  808     struct thread *td)
  809 {
  810         struct nfsv2_sattr *sp;
  811         struct nfsnode *np = VTONFS(vp);
  812         caddr_t bpos, dpos;
  813         u_int32_t *tl;
  814         int error = 0, wccflag = NFSV3_WCCRATTR;
  815         struct mbuf *mreq, *mrep, *md, *mb;
  816         int v3 = NFS_ISV3(vp);
  817 
  818         nfsstats.rpccnt[NFSPROC_SETATTR]++;
  819         mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
  820         mb = mreq;
  821         bpos = mtod(mb, caddr_t);
  822         nfsm_fhtom(vp, v3);
  823         if (v3) {
  824                 nfsm_v3attrbuild(vap, TRUE);
  825                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  826                 *tl = nfs_false;
  827         } else {
  828                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
  829                 if (vap->va_mode == (mode_t)VNOVAL)
  830                         sp->sa_mode = nfs_xdrneg1;
  831                 else
  832                         sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
  833                 if (vap->va_uid == (uid_t)VNOVAL)
  834                         sp->sa_uid = nfs_xdrneg1;
  835                 else
  836                         sp->sa_uid = txdr_unsigned(vap->va_uid);
  837                 if (vap->va_gid == (gid_t)VNOVAL)
  838                         sp->sa_gid = nfs_xdrneg1;
  839                 else
  840                         sp->sa_gid = txdr_unsigned(vap->va_gid);
  841                 sp->sa_size = txdr_unsigned(vap->va_size);
  842                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
  843                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
  844         }
  845         nfsm_request(vp, NFSPROC_SETATTR, td, cred);
  846         if (v3) {
  847                 np->n_modestamp = 0;
  848                 nfsm_wcc_data(vp, wccflag);
  849         } else
  850                 nfsm_loadattr(vp, NULL);
  851         m_freem(mrep);
  852 nfsmout:
  853         return (error);
  854 }
  855 
  856 /*
  857  * nfs lookup call, one step at a time...
  858  * First look in cache
  859  * If not found, unlock the directory nfsnode and do the rpc
  860  */
  861 static int
  862 nfs_lookup(struct vop_lookup_args *ap)
  863 {
  864         struct componentname *cnp = ap->a_cnp;
  865         struct vnode *dvp = ap->a_dvp;
  866         struct vnode **vpp = ap->a_vpp;
  867         struct mount *mp = dvp->v_mount;
  868         struct vattr dvattr, vattr;
  869         struct timespec nctime;
  870         int flags = cnp->cn_flags;
  871         struct vnode *newvp;
  872         struct nfsmount *nmp;
  873         caddr_t bpos, dpos;
  874         struct mbuf *mreq, *mrep, *md, *mb;
  875         long len;
  876         nfsfh_t *fhp;
  877         struct nfsnode *np, *newnp;
  878         int error = 0, attrflag, dattrflag, fhsize, ltype, ncticks;
  879         int v3 = NFS_ISV3(dvp);
  880         struct thread *td = cnp->cn_thread;
  881 
  882         *vpp = NULLVP;
  883         if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
  884             (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
  885                 return (EROFS);
  886         if (dvp->v_type != VDIR)
  887                 return (ENOTDIR);
  888         nmp = VFSTONFS(mp);
  889         np = VTONFS(dvp);
  890         if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) {
  891                 *vpp = NULLVP;
  892                 return (error);
  893         }
  894         error = cache_lookup_times(dvp, vpp, cnp, &nctime, &ncticks);
  895         if (error > 0 && error != ENOENT)
  896                 return (error);
  897         if (error == -1) {
  898                 /*
  899                  * Lookups of "." are special and always return the
  900                  * current directory.  cache_lookup() already handles
  901                  * associated locking bookkeeping, etc.
  902                  */
  903                 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
  904                         /* XXX: Is this really correct? */
  905                         if (cnp->cn_nameiop != LOOKUP &&
  906                             (flags & ISLASTCN))
  907                                 cnp->cn_flags |= SAVENAME;
  908                         return (0);
  909                 }
  910 
  911                 /*
  912                  * We only accept a positive hit in the cache if the
  913                  * change time of the file matches our cached copy.
  914                  * Otherwise, we discard the cache entry and fallback
  915                  * to doing a lookup RPC.  We also only trust cache
  916                  * entries for less than nametimeo seconds.
  917                  *
  918                  * To better handle stale file handles and attributes,
  919                  * clear the attribute cache of this node if it is a
  920                  * leaf component, part of an open() call, and not
  921                  * locally modified before fetching the attributes.
  922                  * This should allow stale file handles to be detected
  923                  * here where we can fall back to a LOOKUP RPC to
  924                  * recover rather than having nfs_open() detect the
  925                  * stale file handle and failing open(2) with ESTALE.
  926                  */
  927                 newvp = *vpp;
  928                 newnp = VTONFS(newvp);
  929                 if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
  930                     !(newnp->n_flag & NMODIFIED)) {
  931                         mtx_lock(&newnp->n_mtx);
  932                         newnp->n_attrstamp = 0;
  933                         mtx_unlock(&newnp->n_mtx);
  934                 }
  935                 if ((u_int)(ticks - ncticks) < (nametimeo * hz) &&
  936                     VOP_GETATTR(newvp, &vattr, cnp->cn_cred, td) == 0 &&
  937                     timespeccmp(&vattr.va_ctime, &nctime, ==)) {
  938                         nfsstats.lookupcache_hits++;
  939                         if (cnp->cn_nameiop != LOOKUP &&
  940                             (flags & ISLASTCN))
  941                                 cnp->cn_flags |= SAVENAME;
  942                         return (0);
  943                 }
  944                 cache_purge(newvp);
  945                 if (dvp != newvp)
  946                         vput(newvp);
  947                 else 
  948                         vrele(newvp);
  949                 *vpp = NULLVP;
  950         } else if (error == ENOENT) {
  951                 /*
  952                  * We only accept a negative hit in the cache if the
  953                  * modification time of the parent directory matches
  954                  * the cached copy in the name cache entry.
  955                  * Otherwise, we discard all of the negative cache
  956                  * entries for this directory.  We also only trust
  957                  * negative cache entries for up to negnametimeo
  958                  * seconds.
  959                  */
  960                 if ((u_int)(ticks - ncticks) < (negnametimeo * hz) &&
  961                     VOP_GETATTR(dvp, &vattr, cnp->cn_cred, td) == 0 &&
  962                     timespeccmp(&vattr.va_mtime, &nctime, ==)) {
  963                         nfsstats.lookupcache_hits++;
  964                         return (ENOENT);
  965                 }
  966                 cache_purge_negative(dvp);
  967         }
  968 
  969         attrflag = dattrflag = 0;
  970         error = 0;
  971         newvp = NULLVP;
  972         nfsstats.lookupcache_misses++;
  973         nfsstats.rpccnt[NFSPROC_LOOKUP]++;
  974         len = cnp->cn_namelen;
  975         mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
  976                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
  977         mb = mreq;
  978         bpos = mtod(mb, caddr_t);
  979         nfsm_fhtom(dvp, v3);
  980         nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
  981         nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred);
  982         if (error) {
  983                 if (v3) {
  984                         nfsm_postop_attr_va(dvp, dattrflag, &vattr);
  985                         m_freem(mrep);
  986                 }
  987                 goto nfsmout;
  988         }
  989         nfsm_getfh(fhp, fhsize, v3);
  990 
  991         /*
  992          * Handle RENAME case...
  993          */
  994         if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
  995                 if (NFS_CMPFH(np, fhp, fhsize)) {
  996                         m_freem(mrep);
  997                         return (EISDIR);
  998                 }
  999                 error = nfs_nget(mp, fhp, fhsize, &np, LK_EXCLUSIVE);
 1000                 if (error) {
 1001                         m_freem(mrep);
 1002                         return (error);
 1003                 }
 1004                 newvp = NFSTOV(np);
 1005                 if (v3) {
 1006                         nfsm_postop_attr(newvp, attrflag);
 1007                         nfsm_postop_attr(dvp, attrflag);
 1008                 } else
 1009                         nfsm_loadattr(newvp, NULL);
 1010                 *vpp = newvp;
 1011                 m_freem(mrep);
 1012                 cnp->cn_flags |= SAVENAME;
 1013                 return (0);
 1014         }
 1015 
 1016         if (flags & ISDOTDOT) {
 1017                 ltype = VOP_ISLOCKED(dvp, td);
 1018                 error = vfs_busy(mp, LK_NOWAIT, NULL, td);
 1019                 if (error != 0) {
 1020                         VOP_UNLOCK(dvp, 0, td);
 1021                         error = vfs_busy(mp, 0, NULL, td);
 1022                         vn_lock(dvp, ltype | LK_RETRY, td);
 1023                         if (error == 0 && (dvp->v_iflag & VI_DOOMED)) {
 1024                                 vfs_unbusy(mp, td);
 1025                                 error = ENOENT;
 1026                         }
 1027                         if (error != 0) {
 1028                                 m_freem(mrep);
 1029                                 return (error);
 1030                         }
 1031                 }
 1032                 VOP_UNLOCK(dvp, 0, td);
 1033                 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags);
 1034                 if (error == 0)
 1035                         newvp = NFSTOV(np);
 1036                 vfs_unbusy(mp, td);
 1037                 vn_lock(dvp, ltype | LK_RETRY, td);
 1038                 if (dvp->v_iflag & VI_DOOMED) {
 1039                         if (error == 0) {
 1040                                 if (newvp == dvp)
 1041                                         vrele(newvp);
 1042                                 else
 1043                                         vput(newvp);
 1044                         }
 1045                         error = ENOENT;
 1046                 }
 1047                 if (error) {
 1048                         m_freem(mrep);
 1049                         return (error);
 1050                 }
 1051         } else if (NFS_CMPFH(np, fhp, fhsize)) {
 1052                 VREF(dvp);
 1053                 newvp = dvp;
 1054         } else {
 1055                 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags);
 1056                 if (error) {
 1057                         m_freem(mrep);
 1058                         return (error);
 1059                 }
 1060                 newvp = NFSTOV(np);
 1061 
 1062                 /*
 1063                  * Flush the attribute cache when opening a leaf node
 1064                  * to ensure that fresh attributes are fetched in
 1065                  * nfs_open() if we are unable to fetch attributes
 1066                  * from the LOOKUP reply.
 1067                  */
 1068                 if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
 1069                     !(np->n_flag & NMODIFIED)) {
 1070                         mtx_lock(&np->n_mtx);
 1071                         np->n_attrstamp = 0;
 1072                         mtx_unlock(&np->n_mtx);
 1073                 }
 1074         }
 1075         if (v3) {
 1076                 nfsm_postop_attr_va(newvp, attrflag, &vattr);
 1077                 nfsm_postop_attr_va(dvp, dattrflag, &dvattr);
 1078         } else {
 1079                 nfsm_loadattr(newvp, &vattr);
 1080                 attrflag = 1;
 1081         }
 1082         if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 1083                 cnp->cn_flags |= SAVENAME;
 1084         if ((cnp->cn_flags & MAKEENTRY) &&
 1085             (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) &&
 1086             attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0))
 1087                 cache_enter_time(dvp, newvp, cnp, &vattr.va_ctime,
 1088                     newvp->v_type != VDIR ? NULL : &dvattr.va_ctime);
 1089         *vpp = newvp;
 1090         m_freem(mrep);
 1091 nfsmout:
 1092         if (error) {
 1093                 if (newvp != NULLVP) {
 1094                         vput(newvp);
 1095                         *vpp = NULLVP;
 1096                 }
 1097 
 1098                 if (error != ENOENT)
 1099                         goto done;
 1100 
 1101                 /* The requested file was not found. */
 1102                 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 1103                     (flags & ISLASTCN)) {
 1104                         /*
 1105                          * XXX: UFS does a full VOP_ACCESS(dvp,
 1106                          * VWRITE) here instead of just checking
 1107                          * MNT_RDONLY.
 1108                          */
 1109                         if (mp->mnt_flag & MNT_RDONLY)
 1110                                 return (EROFS);
 1111                         cnp->cn_flags |= SAVENAME;
 1112                         return (EJUSTRETURN);
 1113                 }
 1114 
 1115                 if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE &&
 1116                     dattrflag) {
 1117                         /*
 1118                          * Cache the modification time of the parent
 1119                          * directory from the post-op attributes in
 1120                          * the name cache entry.  The negative cache
 1121                          * entry will be ignored once the directory
 1122                          * has changed.  Don't bother adding the entry
 1123                          * if the directory has already changed.
 1124                          */
 1125                         mtx_lock(&np->n_mtx);
 1126                         if (timespeccmp(&np->n_vattr.va_mtime,
 1127                             &vattr.va_mtime, ==)) {
 1128                                 mtx_unlock(&np->n_mtx);
 1129                                 cache_enter_time(dvp, NULL, cnp,
 1130                                     &vattr.va_mtime, NULL);
 1131                         } else
 1132                                 mtx_unlock(&np->n_mtx);
 1133                 }
 1134                 return (ENOENT);
 1135         }
 1136 done:
 1137         return (error);
 1138 }
 1139 
 1140 /*
 1141  * nfs read call.
 1142  * Just call nfs_bioread() to do the work.
 1143  */
 1144 static int
 1145 nfs_read(struct vop_read_args *ap)
 1146 {
 1147         struct vnode *vp = ap->a_vp;
 1148 
 1149         switch (vp->v_type) {
 1150         case VREG:
 1151                 return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
 1152         case VDIR:
 1153                 return (EISDIR);
 1154         default:
 1155                 return (EOPNOTSUPP);
 1156         }
 1157 }
 1158 
 1159 /*
 1160  * nfs readlink call
 1161  */
 1162 static int
 1163 nfs_readlink(struct vop_readlink_args *ap)
 1164 {
 1165         struct vnode *vp = ap->a_vp;
 1166 
 1167         if (vp->v_type != VLNK)
 1168                 return (EINVAL);
 1169         return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
 1170 }
 1171 
 1172 /*
 1173  * Do a readlink rpc.
 1174  * Called by nfs_doio() from below the buffer cache.
 1175  */
 1176 int
 1177 nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 1178 {
 1179         caddr_t bpos, dpos;
 1180         int error = 0, len, attrflag;
 1181         struct mbuf *mreq, *mrep, *md, *mb;
 1182         int v3 = NFS_ISV3(vp);
 1183 
 1184         nfsstats.rpccnt[NFSPROC_READLINK]++;
 1185         mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
 1186         mb = mreq;
 1187         bpos = mtod(mb, caddr_t);
 1188         nfsm_fhtom(vp, v3);
 1189         nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred);
 1190         if (v3)
 1191                 nfsm_postop_attr(vp, attrflag);
 1192         if (!error) {
 1193                 nfsm_strsiz(len, NFS_MAXPATHLEN);
 1194                 if (len == NFS_MAXPATHLEN) {
 1195                         struct nfsnode *np = VTONFS(vp);
 1196                         mtx_lock(&np->n_mtx);
 1197                         if (np->n_size && np->n_size < NFS_MAXPATHLEN)
 1198                                 len = np->n_size;
 1199                         mtx_unlock(&np->n_mtx);
 1200                 }
 1201                 nfsm_mtouio(uiop, len);
 1202         }
 1203         m_freem(mrep);
 1204 nfsmout:
 1205         return (error);
 1206 }
 1207 
 1208 /*
 1209  * nfs read rpc call
 1210  * Ditto above
 1211  */
 1212 int
 1213 nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 1214 {
 1215         u_int32_t *tl;
 1216         caddr_t bpos, dpos;
 1217         struct mbuf *mreq, *mrep, *md, *mb;
 1218         struct nfsmount *nmp;
 1219         int error = 0, len, retlen, tsiz, eof, attrflag;
 1220         int v3 = NFS_ISV3(vp);
 1221         int rsize;
 1222 
 1223 #ifndef nolint
 1224         eof = 0;
 1225 #endif
 1226         nmp = VFSTONFS(vp->v_mount);
 1227         tsiz = uiop->uio_resid;
 1228         mtx_lock(&nmp->nm_mtx);
 1229         if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
 1230                 mtx_unlock(&nmp->nm_mtx);
 1231                 return (EFBIG);
 1232         }
 1233         rsize = nmp->nm_rsize;
 1234         mtx_unlock(&nmp->nm_mtx);
 1235         while (tsiz > 0) {
 1236                 nfsstats.rpccnt[NFSPROC_READ]++;
 1237                 len = (tsiz > rsize) ? rsize : tsiz;
 1238                 mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
 1239                 mb = mreq;
 1240                 bpos = mtod(mb, caddr_t);
 1241                 nfsm_fhtom(vp, v3);
 1242                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3);
 1243                 if (v3) {
 1244                         txdr_hyper(uiop->uio_offset, tl);
 1245                         *(tl + 2) = txdr_unsigned(len);
 1246                 } else {
 1247                         *tl++ = txdr_unsigned(uiop->uio_offset);
 1248                         *tl++ = txdr_unsigned(len);
 1249                         *tl = 0;
 1250                 }
 1251                 nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred);
 1252                 if (v3) {
 1253                         nfsm_postop_attr(vp, attrflag);
 1254                         if (error) {
 1255                                 m_freem(mrep);
 1256                                 goto nfsmout;
 1257                         }
 1258                         tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
 1259                         eof = fxdr_unsigned(int, *(tl + 1));
 1260                 } else {
 1261                         nfsm_loadattr(vp, NULL);
 1262                 }
 1263                 nfsm_strsiz(retlen, rsize);
 1264                 nfsm_mtouio(uiop, retlen);
 1265                 m_freem(mrep);
 1266                 tsiz -= retlen;
 1267                 if (v3) {
 1268                         if (eof || retlen == 0) {
 1269                                 tsiz = 0;
 1270                         }
 1271                 } else if (retlen < len) {
 1272                         tsiz = 0;
 1273                 }
 1274         }
 1275 nfsmout:
 1276         return (error);
 1277 }
 1278 
 1279 /*
 1280  * nfs write call
 1281  */
 1282 int
 1283 nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
 1284              int *iomode, int *must_commit)
 1285 {
 1286         u_int32_t *tl;
 1287         int32_t backup;
 1288         caddr_t bpos, dpos;
 1289         struct mbuf *mreq, *mrep, *md, *mb;
 1290         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 1291         int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
 1292         int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
 1293         int wsize;
 1294         
 1295 #ifndef DIAGNOSTIC
 1296         if (uiop->uio_iovcnt != 1)
 1297                 panic("nfs: writerpc iovcnt > 1");
 1298 #endif
 1299         *must_commit = 0;
 1300         tsiz = uiop->uio_resid;
 1301         mtx_lock(&nmp->nm_mtx);
 1302         if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
 1303                 mtx_unlock(&nmp->nm_mtx);               
 1304                 return (EFBIG);
 1305         }
 1306         wsize = nmp->nm_wsize;
 1307         mtx_unlock(&nmp->nm_mtx);
 1308         while (tsiz > 0) {
 1309                 nfsstats.rpccnt[NFSPROC_WRITE]++;
 1310                 len = (tsiz > wsize) ? wsize : tsiz;
 1311                 mreq = nfsm_reqhead(vp, NFSPROC_WRITE,
 1312                         NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
 1313                 mb = mreq;
 1314                 bpos = mtod(mb, caddr_t);
 1315                 nfsm_fhtom(vp, v3);
 1316                 if (v3) {
 1317                         tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
 1318                         txdr_hyper(uiop->uio_offset, tl);
 1319                         tl += 2;
 1320                         *tl++ = txdr_unsigned(len);
 1321                         *tl++ = txdr_unsigned(*iomode);
 1322                         *tl = txdr_unsigned(len);
 1323                 } else {
 1324                         u_int32_t x;
 1325 
 1326                         tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 1327                         /* Set both "begin" and "current" to non-garbage. */
 1328                         x = txdr_unsigned((u_int32_t)uiop->uio_offset);
 1329                         *tl++ = x;      /* "begin offset" */
 1330                         *tl++ = x;      /* "current offset" */
 1331                         x = txdr_unsigned(len);
 1332                         *tl++ = x;      /* total to this offset */
 1333                         *tl = x;        /* size of this write */
 1334                 }
 1335                 nfsm_uiotom(uiop, len);
 1336                 nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred);
 1337                 if (v3) {
 1338                         wccflag = NFSV3_WCCCHK;
 1339                         nfsm_wcc_data(vp, wccflag);
 1340                         if (!error) {
 1341                                 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED
 1342                                         + NFSX_V3WRITEVERF);
 1343                                 rlen = fxdr_unsigned(int, *tl++);
 1344                                 if (rlen == 0) {
 1345                                         error = NFSERR_IO;
 1346                                         m_freem(mrep);
 1347                                         break;
 1348                                 } else if (rlen < len) {
 1349                                         backup = len - rlen;
 1350                                         uiop->uio_iov->iov_base =
 1351                                             (char *)uiop->uio_iov->iov_base -
 1352                                             backup;
 1353                                         uiop->uio_iov->iov_len += backup;
 1354                                         uiop->uio_offset -= backup;
 1355                                         uiop->uio_resid += backup;
 1356                                         len = rlen;
 1357                                 }
 1358                                 commit = fxdr_unsigned(int, *tl++);
 1359 
 1360                                 /*
 1361                                  * Return the lowest committment level
 1362                                  * obtained by any of the RPCs.
 1363                                  */
 1364                                 if (committed == NFSV3WRITE_FILESYNC)
 1365                                         committed = commit;
 1366                                 else if (committed == NFSV3WRITE_DATASYNC &&
 1367                                         commit == NFSV3WRITE_UNSTABLE)
 1368                                         committed = commit;
 1369                                 mtx_lock(&nmp->nm_mtx);
 1370                                 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
 1371                                     bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 1372                                         NFSX_V3WRITEVERF);
 1373                                     nmp->nm_state |= NFSSTA_HASWRITEVERF;
 1374                                 } else if (bcmp((caddr_t)tl,
 1375                                     (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
 1376                                     *must_commit = 1;
 1377                                     bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 1378                                         NFSX_V3WRITEVERF);
 1379                                 }
 1380                                 mtx_unlock(&nmp->nm_mtx);
 1381                         }
 1382                 } else {
 1383                         nfsm_loadattr(vp, NULL);
 1384                 }
 1385                 if (wccflag) {
 1386                         mtx_lock(&(VTONFS(vp))->n_mtx);
 1387                         VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
 1388                         mtx_unlock(&(VTONFS(vp))->n_mtx);
 1389                 }
 1390                 m_freem(mrep);
 1391                 if (error)
 1392                         break;
 1393                 tsiz -= len;
 1394         }
 1395 nfsmout:
 1396         if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC)
 1397                 committed = NFSV3WRITE_FILESYNC;
 1398         *iomode = committed;
 1399         if (error)
 1400                 uiop->uio_resid = tsiz;
 1401         return (error);
 1402 }
 1403 
 1404 /*
 1405  * nfs mknod rpc
 1406  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
 1407  * mode set to specify the file type and the size field for rdev.
 1408  */
 1409 static int
 1410 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 1411     struct vattr *vap)
 1412 {
 1413         struct nfsv2_sattr *sp;
 1414         u_int32_t *tl;
 1415         struct vnode *newvp = NULL;
 1416         struct nfsnode *np = NULL;
 1417         struct vattr vattr;
 1418         caddr_t bpos, dpos;
 1419         int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
 1420         struct mbuf *mreq, *mrep, *md, *mb;
 1421         u_int32_t rdev;
 1422         int v3 = NFS_ISV3(dvp);
 1423 
 1424         if (vap->va_type == VCHR || vap->va_type == VBLK)
 1425                 rdev = txdr_unsigned(vap->va_rdev);
 1426         else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 1427                 rdev = nfs_xdrneg1;
 1428         else {
 1429                 return (EOPNOTSUPP);
 1430         }
 1431         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) {
 1432                 return (error);
 1433         }
 1434         nfsstats.rpccnt[NFSPROC_MKNOD]++;
 1435         mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
 1436                 + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 1437         mb = mreq;
 1438         bpos = mtod(mb, caddr_t);
 1439         nfsm_fhtom(dvp, v3);
 1440         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 1441         if (v3) {
 1442                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
 1443                 *tl++ = vtonfsv3_type(vap->va_type);
 1444                 nfsm_v3attrbuild(vap, FALSE);
 1445                 if (vap->va_type == VCHR || vap->va_type == VBLK) {
 1446                         tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 1447                         *tl++ = txdr_unsigned(umajor(vap->va_rdev));
 1448                         *tl = txdr_unsigned(uminor(vap->va_rdev));
 1449                 }
 1450         } else {
 1451                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
 1452                 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 1453                 sp->sa_uid = nfs_xdrneg1;
 1454                 sp->sa_gid = nfs_xdrneg1;
 1455                 sp->sa_size = rdev;
 1456                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 1457                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 1458         }
 1459         nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred);
 1460         if (!error) {
 1461                 nfsm_mtofh(dvp, newvp, v3, gotvp);
 1462                 if (!gotvp) {
 1463                         if (newvp) {
 1464                                 vput(newvp);
 1465                                 newvp = NULL;
 1466                         }
 1467                         error = nfs_lookitup(dvp, cnp->cn_nameptr,
 1468                             cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
 1469                         if (!error)
 1470                                 newvp = NFSTOV(np);
 1471                 }
 1472         }
 1473         if (v3)
 1474                 nfsm_wcc_data(dvp, wccflag);
 1475         m_freem(mrep);
 1476 nfsmout:
 1477         if (error) {
 1478                 if (newvp)
 1479                         vput(newvp);
 1480         } else {
 1481                 *vpp = newvp;
 1482         }
 1483         mtx_lock(&(VTONFS(dvp))->n_mtx);
 1484         VTONFS(dvp)->n_flag |= NMODIFIED;
 1485         if (!wccflag)
 1486                 VTONFS(dvp)->n_attrstamp = 0;
 1487         mtx_unlock(&(VTONFS(dvp))->n_mtx);
 1488         return (error);
 1489 }
 1490 
 1491 /*
 1492  * nfs mknod vop
 1493  * just call nfs_mknodrpc() to do the work.
 1494  */
 1495 /* ARGSUSED */
 1496 static int
 1497 nfs_mknod(struct vop_mknod_args *ap)
 1498 {
 1499         return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
 1500 }
 1501 
 1502 static u_long create_verf;
 1503 /*
 1504  * nfs file create call
 1505  */
 1506 static int
 1507 nfs_create(struct vop_create_args *ap)
 1508 {
 1509         struct vnode *dvp = ap->a_dvp;
 1510         struct vattr *vap = ap->a_vap;
 1511         struct componentname *cnp = ap->a_cnp;
 1512         struct nfsv2_sattr *sp;
 1513         u_int32_t *tl;
 1514         struct nfsnode *np = NULL;
 1515         struct vnode *newvp = NULL;
 1516         caddr_t bpos, dpos;
 1517         int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
 1518         struct mbuf *mreq, *mrep, *md, *mb;
 1519         struct vattr vattr;
 1520         int v3 = NFS_ISV3(dvp);
 1521 
 1522         /*
 1523          * Oops, not for me..
 1524          */
 1525         if (vap->va_type == VSOCK)
 1526                 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
 1527 
 1528         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) {
 1529                 return (error);
 1530         }
 1531         if (vap->va_vaflags & VA_EXCLUSIVE)
 1532                 fmode |= O_EXCL;
 1533 again:
 1534         nfsstats.rpccnt[NFSPROC_CREATE]++;
 1535         mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
 1536                 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 1537         mb = mreq;
 1538         bpos = mtod(mb, caddr_t);
 1539         nfsm_fhtom(dvp, v3);
 1540         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 1541         if (v3) {
 1542                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
 1543                 if (fmode & O_EXCL) {
 1544                         *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
 1545                         tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF);
 1546 #ifdef INET
 1547                         if (!TAILQ_EMPTY(&in_ifaddrhead))
 1548                                 *tl++ = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr.s_addr;
 1549                         else
 1550 #endif
 1551                                 *tl++ = create_verf;
 1552                         *tl = ++create_verf;
 1553                 } else {
 1554                         *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
 1555                         nfsm_v3attrbuild(vap, FALSE);
 1556                 }
 1557         } else {
 1558                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
 1559                 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 1560                 sp->sa_uid = nfs_xdrneg1;
 1561                 sp->sa_gid = nfs_xdrneg1;
 1562                 sp->sa_size = 0;
 1563                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 1564                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 1565         }
 1566         nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred);
 1567         if (!error) {
 1568                 nfsm_mtofh(dvp, newvp, v3, gotvp);
 1569                 if (!gotvp) {
 1570                         if (newvp) {
 1571                                 vput(newvp);
 1572                                 newvp = NULL;
 1573                         }
 1574                         error = nfs_lookitup(dvp, cnp->cn_nameptr,
 1575                             cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
 1576                         if (!error)
 1577                                 newvp = NFSTOV(np);
 1578                 }
 1579         }
 1580         if (v3)
 1581                 nfsm_wcc_data(dvp, wccflag);
 1582         m_freem(mrep);
 1583 nfsmout:
 1584         if (error) {
 1585                 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
 1586                         fmode &= ~O_EXCL;
 1587                         goto again;
 1588                 }
 1589                 if (newvp)
 1590                         vput(newvp);
 1591         } else if (v3 && (fmode & O_EXCL)) {
 1592                 /*
 1593                  * We are normally called with only a partially initialized
 1594                  * VAP.  Since the NFSv3 spec says that server may use the
 1595                  * file attributes to store the verifier, the spec requires
 1596                  * us to do a SETATTR RPC. FreeBSD servers store the verifier
 1597                  * in atime, but we can't really assume that all servers will
 1598                  * so we ensure that our SETATTR sets both atime and mtime.
 1599                  */
 1600                 if (vap->va_mtime.tv_sec == VNOVAL)
 1601                         vfs_timestamp(&vap->va_mtime);
 1602                 if (vap->va_atime.tv_sec == VNOVAL)
 1603                         vap->va_atime = vap->va_mtime;
 1604                 error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_thread);
 1605                 if (error)
 1606                         vput(newvp);
 1607         }
 1608         if (!error) {
 1609                 *ap->a_vpp = newvp;
 1610         }
 1611         mtx_lock(&(VTONFS(dvp))->n_mtx);
 1612         VTONFS(dvp)->n_flag |= NMODIFIED;
 1613         if (!wccflag)
 1614                 VTONFS(dvp)->n_attrstamp = 0;
 1615         mtx_unlock(&(VTONFS(dvp))->n_mtx);
 1616         return (error);
 1617 }
 1618 
 1619 /*
 1620  * nfs file remove call
 1621  * To try and make nfs semantics closer to ufs semantics, a file that has
 1622  * other processes using the vnode is renamed instead of removed and then
 1623  * removed later on the last close.
 1624  * - If v_usecount > 1
 1625  *        If a rename is not already in the works
 1626  *           call nfs_sillyrename() to set it up
 1627  *     else
 1628  *        do the remove rpc
 1629  */
 1630 static int
 1631 nfs_remove(struct vop_remove_args *ap)
 1632 {
 1633         struct vnode *vp = ap->a_vp;
 1634         struct vnode *dvp = ap->a_dvp;
 1635         struct componentname *cnp = ap->a_cnp;
 1636         struct nfsnode *np = VTONFS(vp);
 1637         int error = 0;
 1638         struct vattr vattr;
 1639 
 1640 #ifndef DIAGNOSTIC
 1641         if ((cnp->cn_flags & HASBUF) == 0)
 1642                 panic("nfs_remove: no name");
 1643         if (vrefcnt(vp) < 1)
 1644                 panic("nfs_remove: bad v_usecount");
 1645 #endif
 1646         if (vp->v_type == VDIR)
 1647                 error = EPERM;
 1648         else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
 1649             VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_thread) == 0 &&
 1650             vattr.va_nlink > 1)) {
 1651                 /*
 1652                  * Purge the name cache so that the chance of a lookup for
 1653                  * the name succeeding while the remove is in progress is
 1654                  * minimized. Without node locking it can still happen, such
 1655                  * that an I/O op returns ESTALE, but since you get this if
 1656                  * another host removes the file..
 1657                  */
 1658                 cache_purge(vp);
 1659                 /*
 1660                  * throw away biocache buffers, mainly to avoid
 1661                  * unnecessary delayed writes later.
 1662                  */
 1663                 error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1);
 1664                 /* Do the rpc */
 1665                 if (error != EINTR && error != EIO)
 1666                         error = nfs_removerpc(dvp, cnp->cn_nameptr,
 1667                                 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
 1668                 /*
 1669                  * Kludge City: If the first reply to the remove rpc is lost..
 1670                  *   the reply to the retransmitted request will be ENOENT
 1671                  *   since the file was in fact removed
 1672                  *   Therefore, we cheat and return success.
 1673                  */
 1674                 if (error == ENOENT)
 1675                         error = 0;
 1676         } else if (!np->n_sillyrename)
 1677                 error = nfs_sillyrename(dvp, vp, cnp);
 1678         np->n_attrstamp = 0;
 1679         return (error);
 1680 }
 1681 
 1682 /*
 1683  * nfs file remove rpc called from nfs_inactive
 1684  */
 1685 int
 1686 nfs_removeit(struct sillyrename *sp)
 1687 {
 1688         /*
 1689          * Make sure that the directory vnode is still valid.
 1690          * XXX we should lock sp->s_dvp here.
 1691          */
 1692         if (sp->s_dvp->v_type == VBAD)
 1693                 return (0);
 1694         return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 1695                 NULL));
 1696 }
 1697 
 1698 /*
 1699  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
 1700  */
 1701 static int
 1702 nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
 1703     struct ucred *cred, struct thread *td)
 1704 {
 1705         caddr_t bpos, dpos;
 1706         int error = 0, wccflag = NFSV3_WCCRATTR;
 1707         struct mbuf *mreq, *mrep, *md, *mb;
 1708         int v3 = NFS_ISV3(dvp);
 1709 
 1710         nfsstats.rpccnt[NFSPROC_REMOVE]++;
 1711         mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE,
 1712                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
 1713         mb = mreq;
 1714         bpos = mtod(mb, caddr_t);
 1715         nfsm_fhtom(dvp, v3);
 1716         nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
 1717         nfsm_request(dvp, NFSPROC_REMOVE, td, cred);
 1718         if (v3)
 1719                 nfsm_wcc_data(dvp, wccflag);
 1720         m_freem(mrep);
 1721 nfsmout:
 1722         mtx_lock(&(VTONFS(dvp))->n_mtx);
 1723         VTONFS(dvp)->n_flag |= NMODIFIED;
 1724         if (!wccflag)
 1725                 VTONFS(dvp)->n_attrstamp = 0;
 1726         mtx_unlock(&(VTONFS(dvp))->n_mtx);
 1727         return (error);
 1728 }
 1729 
 1730 /*
 1731  * nfs file rename call
 1732  */
 1733 static int
 1734 nfs_rename(struct vop_rename_args *ap)
 1735 {
 1736         struct vnode *fvp = ap->a_fvp;
 1737         struct vnode *tvp = ap->a_tvp;
 1738         struct vnode *fdvp = ap->a_fdvp;
 1739         struct vnode *tdvp = ap->a_tdvp;
 1740         struct componentname *tcnp = ap->a_tcnp;
 1741         struct componentname *fcnp = ap->a_fcnp;
 1742         int error;
 1743 
 1744 #ifndef DIAGNOSTIC
 1745         if ((tcnp->cn_flags & HASBUF) == 0 ||
 1746             (fcnp->cn_flags & HASBUF) == 0)
 1747                 panic("nfs_rename: no name");
 1748 #endif
 1749         /* Check for cross-device rename */
 1750         if ((fvp->v_mount != tdvp->v_mount) ||
 1751             (tvp && (fvp->v_mount != tvp->v_mount))) {
 1752                 error = EXDEV;
 1753                 goto out;
 1754         }
 1755 
 1756         if (fvp == tvp) {
 1757                 nfs_printf("nfs_rename: fvp == tvp (can't happen)\n");
 1758                 error = 0;
 1759                 goto out;
 1760         }
 1761         if ((error = vn_lock(fvp, LK_EXCLUSIVE, fcnp->cn_thread)) != 0)
 1762                 goto out;
 1763 
 1764         /*
 1765          * We have to flush B_DELWRI data prior to renaming
 1766          * the file.  If we don't, the delayed-write buffers
 1767          * can be flushed out later after the file has gone stale
 1768          * under NFSV3.  NFSV2 does not have this problem because
 1769          * ( as far as I can tell ) it flushes dirty buffers more
 1770          * often.
 1771          * 
 1772          * Skip the rename operation if the fsync fails, this can happen
 1773          * due to the server's volume being full, when we pushed out data
 1774          * that was written back to our cache earlier. Not checking for
 1775          * this condition can result in potential (silent) data loss.
 1776          */
 1777         error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
 1778         VOP_UNLOCK(fvp, 0, fcnp->cn_thread);
 1779         if (!error && tvp)
 1780                 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
 1781         if (error)
 1782                 goto out;
 1783 
 1784         /*
 1785          * If the tvp exists and is in use, sillyrename it before doing the
 1786          * rename of the new file over it.
 1787          * XXX Can't sillyrename a directory.
 1788          */
 1789         if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
 1790                 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 1791                 vput(tvp);
 1792                 tvp = NULL;
 1793         }
 1794 
 1795         error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 1796                 tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 1797                 tcnp->cn_thread);
 1798 
 1799         if (fvp->v_type == VDIR) {
 1800                 if (tvp != NULL && tvp->v_type == VDIR)
 1801                         cache_purge(tdvp);
 1802                 cache_purge(fdvp);
 1803         }
 1804 
 1805 out:
 1806         if (tdvp == tvp)
 1807                 vrele(tdvp);
 1808         else
 1809                 vput(tdvp);
 1810         if (tvp)
 1811                 vput(tvp);
 1812         vrele(fdvp);
 1813         vrele(fvp);
 1814         /*
 1815          * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 1816          */
 1817         if (error == ENOENT)
 1818                 error = 0;
 1819         return (error);
 1820 }
 1821 
 1822 /*
 1823  * nfs file rename rpc called from nfs_remove() above
 1824  */
 1825 static int
 1826 nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
 1827     struct sillyrename *sp)
 1828 {
 1829 
 1830         return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp,
 1831             sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread));
 1832 }
 1833 
 1834 /*
 1835  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
 1836  */
 1837 static int
 1838 nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen,
 1839     struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred,
 1840     struct thread *td)
 1841 {
 1842         caddr_t bpos, dpos;
 1843         int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
 1844         struct mbuf *mreq, *mrep, *md, *mb;
 1845         int v3 = NFS_ISV3(fdvp);
 1846 
 1847         nfsstats.rpccnt[NFSPROC_RENAME]++;
 1848         mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME,
 1849                 (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
 1850                 nfsm_rndup(tnamelen));
 1851         mb = mreq;
 1852         bpos = mtod(mb, caddr_t);
 1853         nfsm_fhtom(fdvp, v3);
 1854         nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
 1855         nfsm_fhtom(tdvp, v3);
 1856         nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
 1857         nfsm_request(fdvp, NFSPROC_RENAME, td, cred);
 1858         if (v3) {
 1859                 nfsm_wcc_data(fdvp, fwccflag);
 1860                 nfsm_wcc_data(tdvp, twccflag);
 1861         }
 1862         m_freem(mrep);
 1863 nfsmout:
 1864         mtx_lock(&(VTONFS(fdvp))->n_mtx);
 1865         VTONFS(fdvp)->n_flag |= NMODIFIED;
 1866         mtx_unlock(&(VTONFS(fdvp))->n_mtx);
 1867         mtx_lock(&(VTONFS(tdvp))->n_mtx);
 1868         VTONFS(tdvp)->n_flag |= NMODIFIED;
 1869         mtx_unlock(&(VTONFS(tdvp))->n_mtx);
 1870         if (!fwccflag)
 1871                 VTONFS(fdvp)->n_attrstamp = 0;
 1872         if (!twccflag)
 1873                 VTONFS(tdvp)->n_attrstamp = 0;
 1874         return (error);
 1875 }
 1876 
 1877 /*
 1878  * nfs hard link create call
 1879  */
 1880 static int
 1881 nfs_link(struct vop_link_args *ap)
 1882 {
 1883         struct vnode *vp = ap->a_vp;
 1884         struct vnode *tdvp = ap->a_tdvp;
 1885         struct componentname *cnp = ap->a_cnp;
 1886         caddr_t bpos, dpos;
 1887         int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
 1888         struct mbuf *mreq, *mrep, *md, *mb;
 1889         int v3;
 1890 
 1891         if (vp->v_mount != tdvp->v_mount) {
 1892                 return (EXDEV);
 1893         }
 1894 
 1895         /*
 1896          * Push all writes to the server, so that the attribute cache
 1897          * doesn't get "out of sync" with the server.
 1898          * XXX There should be a better way!
 1899          */
 1900         VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
 1901 
 1902         v3 = NFS_ISV3(vp);
 1903         nfsstats.rpccnt[NFSPROC_LINK]++;
 1904         mreq = nfsm_reqhead(vp, NFSPROC_LINK,
 1905                 NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 1906         mb = mreq;
 1907         bpos = mtod(mb, caddr_t);
 1908         nfsm_fhtom(vp, v3);
 1909         nfsm_fhtom(tdvp, v3);
 1910         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 1911         nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred);
 1912         if (v3) {
 1913                 nfsm_postop_attr(vp, attrflag);
 1914                 nfsm_wcc_data(tdvp, wccflag);
 1915         }
 1916         m_freem(mrep);
 1917 nfsmout:
 1918         mtx_lock(&(VTONFS(tdvp))->n_mtx);
 1919         VTONFS(tdvp)->n_flag |= NMODIFIED;
 1920         mtx_unlock(&(VTONFS(tdvp))->n_mtx);
 1921         if (!attrflag)
 1922                 VTONFS(vp)->n_attrstamp = 0;
 1923         if (!wccflag)
 1924                 VTONFS(tdvp)->n_attrstamp = 0;
 1925         return (error);
 1926 }
 1927 
 1928 /*
 1929  * nfs symbolic link create call
 1930  */
 1931 static int
 1932 nfs_symlink(struct vop_symlink_args *ap)
 1933 {
 1934         struct vnode *dvp = ap->a_dvp;
 1935         struct vattr *vap = ap->a_vap;
 1936         struct componentname *cnp = ap->a_cnp;
 1937         struct nfsv2_sattr *sp;
 1938         caddr_t bpos, dpos;
 1939         int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
 1940         struct mbuf *mreq, *mrep, *md, *mb;
 1941         struct vnode *newvp = NULL;
 1942         int v3 = NFS_ISV3(dvp);
 1943 
 1944         nfsstats.rpccnt[NFSPROC_SYMLINK]++;
 1945         slen = strlen(ap->a_target);
 1946         mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
 1947             nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
 1948         mb = mreq;
 1949         bpos = mtod(mb, caddr_t);
 1950         nfsm_fhtom(dvp, v3);
 1951         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 1952         if (v3) {
 1953                 nfsm_v3attrbuild(vap, FALSE);
 1954         }
 1955         nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
 1956         if (!v3) {
 1957                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
 1958                 sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
 1959                 sp->sa_uid = nfs_xdrneg1;
 1960                 sp->sa_gid = nfs_xdrneg1;
 1961                 sp->sa_size = nfs_xdrneg1;
 1962                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 1963                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 1964         }
 1965 
 1966         /*
 1967          * Issue the NFS request and get the rpc response.
 1968          *
 1969          * Only NFSv3 responses returning an error of 0 actually return
 1970          * a file handle that can be converted into newvp without having
 1971          * to do an extra lookup rpc.
 1972          */
 1973         nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred);
 1974         if (v3) {
 1975                 if (error == 0)
 1976                         nfsm_mtofh(dvp, newvp, v3, gotvp);
 1977                 nfsm_wcc_data(dvp, wccflag);
 1978         }
 1979 
 1980         /*
 1981          * out code jumps -> here, mrep is also freed.
 1982          */
 1983 
 1984         m_freem(mrep);
 1985 nfsmout:
 1986 
 1987         /*
 1988          * If we do not have an error and we could not extract the newvp from
 1989          * the response due to the request being NFSv2, we have to do a
 1990          * lookup in order to obtain a newvp to return.
 1991          */
 1992         if (error == 0 && newvp == NULL) {
 1993                 struct nfsnode *np = NULL;
 1994 
 1995                 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 1996                     cnp->cn_cred, cnp->cn_thread, &np);
 1997                 if (!error)
 1998                         newvp = NFSTOV(np);
 1999         }
 2000         if (error) {
 2001                 if (newvp)
 2002                         vput(newvp);
 2003         } else {
 2004                 *ap->a_vpp = newvp;
 2005         }
 2006         mtx_lock(&(VTONFS(dvp))->n_mtx);
 2007         VTONFS(dvp)->n_flag |= NMODIFIED;
 2008         mtx_unlock(&(VTONFS(dvp))->n_mtx);
 2009         if (!wccflag)
 2010                 VTONFS(dvp)->n_attrstamp = 0;
 2011         return (error);
 2012 }
 2013 
 2014 /*
 2015  * nfs make dir call
 2016  */
 2017 static int
 2018 nfs_mkdir(struct vop_mkdir_args *ap)
 2019 {
 2020         struct vnode *dvp = ap->a_dvp;
 2021         struct vattr *vap = ap->a_vap;
 2022         struct componentname *cnp = ap->a_cnp;
 2023         struct nfsv2_sattr *sp;
 2024         int len;
 2025         struct nfsnode *np = NULL;
 2026         struct vnode *newvp = NULL;
 2027         caddr_t bpos, dpos;
 2028         int error = 0, wccflag = NFSV3_WCCRATTR;
 2029         int gotvp = 0;
 2030         struct mbuf *mreq, *mrep, *md, *mb;
 2031         struct vattr vattr;
 2032         int v3 = NFS_ISV3(dvp);
 2033 
 2034         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_thread)) != 0) {
 2035                 return (error);
 2036         }
 2037         len = cnp->cn_namelen;
 2038         nfsstats.rpccnt[NFSPROC_MKDIR]++;
 2039         mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR,
 2040           NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
 2041         mb = mreq;
 2042         bpos = mtod(mb, caddr_t);
 2043         nfsm_fhtom(dvp, v3);
 2044         nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 2045         if (v3) {
 2046                 nfsm_v3attrbuild(vap, FALSE);
 2047         } else {
 2048                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
 2049                 sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
 2050                 sp->sa_uid = nfs_xdrneg1;
 2051                 sp->sa_gid = nfs_xdrneg1;
 2052                 sp->sa_size = nfs_xdrneg1;
 2053                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 2054                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 2055         }
 2056         nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred);
 2057         if (!error)
 2058                 nfsm_mtofh(dvp, newvp, v3, gotvp);
 2059         if (v3)
 2060                 nfsm_wcc_data(dvp, wccflag);
 2061         m_freem(mrep);
 2062 nfsmout:
 2063         mtx_lock(&(VTONFS(dvp))->n_mtx);
 2064         VTONFS(dvp)->n_flag |= NMODIFIED;
 2065         mtx_unlock(&(VTONFS(dvp))->n_mtx);
 2066         if (!wccflag)
 2067                 VTONFS(dvp)->n_attrstamp = 0;
 2068         if (error == 0 && newvp == NULL) {
 2069                 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
 2070                         cnp->cn_thread, &np);
 2071                 if (!error) {
 2072                         newvp = NFSTOV(np);
 2073                         if (newvp->v_type != VDIR)
 2074                                 error = EEXIST;
 2075                 }
 2076         }
 2077         if (error) {
 2078                 if (newvp)
 2079                         vput(newvp);
 2080         } else
 2081                 *ap->a_vpp = newvp;
 2082         return (error);
 2083 }
 2084 
 2085 /*
 2086  * nfs remove directory call
 2087  */
 2088 static int
 2089 nfs_rmdir(struct vop_rmdir_args *ap)
 2090 {
 2091         struct vnode *vp = ap->a_vp;
 2092         struct vnode *dvp = ap->a_dvp;
 2093         struct componentname *cnp = ap->a_cnp;
 2094         caddr_t bpos, dpos;
 2095         int error = 0, wccflag = NFSV3_WCCRATTR;
 2096         struct mbuf *mreq, *mrep, *md, *mb;
 2097         int v3 = NFS_ISV3(dvp);
 2098 
 2099         if (dvp == vp)
 2100                 return (EINVAL);
 2101         nfsstats.rpccnt[NFSPROC_RMDIR]++;
 2102         mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR,
 2103                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 2104         mb = mreq;
 2105         bpos = mtod(mb, caddr_t);
 2106         nfsm_fhtom(dvp, v3);
 2107         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 2108         nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred);
 2109         if (v3)
 2110                 nfsm_wcc_data(dvp, wccflag);
 2111         m_freem(mrep);
 2112 nfsmout:
 2113         mtx_lock(&(VTONFS(dvp))->n_mtx);
 2114         VTONFS(dvp)->n_flag |= NMODIFIED;
 2115         mtx_unlock(&(VTONFS(dvp))->n_mtx);
 2116         if (!wccflag)
 2117                 VTONFS(dvp)->n_attrstamp = 0;
 2118         cache_purge(dvp);
 2119         cache_purge(vp);
 2120         /*
 2121          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 2122          */
 2123         if (error == ENOENT)
 2124                 error = 0;
 2125         return (error);
 2126 }
 2127 
 2128 /*
 2129  * nfs readdir call
 2130  */
 2131 static int
 2132 nfs_readdir(struct vop_readdir_args *ap)
 2133 {
 2134         struct vnode *vp = ap->a_vp;
 2135         struct nfsnode *np = VTONFS(vp);
 2136         struct uio *uio = ap->a_uio;
 2137         int tresid, error = 0;
 2138         struct vattr vattr;
 2139         
 2140         if (vp->v_type != VDIR) 
 2141                 return(EPERM);
 2142 
 2143         /*
 2144          * First, check for hit on the EOF offset cache
 2145          */
 2146         if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 2147             (np->n_flag & NMODIFIED) == 0) {
 2148                 if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_td) == 0) {
 2149                         mtx_lock(&np->n_mtx);
 2150                         if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
 2151                                 mtx_unlock(&np->n_mtx);
 2152                                 nfsstats.direofcache_hits++;
 2153                                 goto out;
 2154                         } else
 2155                                 mtx_unlock(&np->n_mtx);
 2156                 }
 2157         }
 2158 
 2159         /*
 2160          * Call nfs_bioread() to do the real work.
 2161          */
 2162         tresid = uio->uio_resid;
 2163         error = nfs_bioread(vp, uio, 0, ap->a_cred);
 2164 
 2165         if (!error && uio->uio_resid == tresid) {
 2166                 nfsstats.direofcache_misses++;
 2167         }
 2168 out:
 2169         return (error);
 2170 }
 2171 
 2172 /*
 2173  * Readdir rpc call.
 2174  * Called from below the buffer cache by nfs_doio().
 2175  */
 2176 int
 2177 nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 2178 {
 2179         int len, left;
 2180         struct dirent *dp = NULL;
 2181         u_int32_t *tl;
 2182         caddr_t cp;
 2183         nfsuint64 *cookiep;
 2184         caddr_t bpos, dpos;
 2185         struct mbuf *mreq, *mrep, *md, *mb;
 2186         nfsuint64 cookie;
 2187         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2188         struct nfsnode *dnp = VTONFS(vp);
 2189         u_quad_t fileno;
 2190         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 2191         int attrflag;
 2192         int v3 = NFS_ISV3(vp);
 2193 
 2194 #ifndef DIAGNOSTIC
 2195         if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 2196                 (uiop->uio_resid & (DIRBLKSIZ - 1)))
 2197                 panic("nfs readdirrpc bad uio");
 2198 #endif
 2199 
 2200         /*
 2201          * If there is no cookie, assume directory was stale.
 2202          */
 2203         nfs_dircookie_lock(dnp);
 2204         cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 2205         if (cookiep) {
 2206                 cookie = *cookiep;
 2207                 nfs_dircookie_unlock(dnp);
 2208         } else {
 2209                 nfs_dircookie_unlock(dnp);              
 2210                 return (NFSERR_BAD_COOKIE);
 2211         }
 2212 
 2213         /*
 2214          * Loop around doing readdir rpc's of size nm_readdirsize
 2215          * truncated to a multiple of DIRBLKSIZ.
 2216          * The stopping criteria is EOF or buffer full.
 2217          */
 2218         while (more_dirs && bigenough) {
 2219                 nfsstats.rpccnt[NFSPROC_READDIR]++;
 2220                 mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
 2221                         NFSX_READDIR(v3));
 2222                 mb = mreq;
 2223                 bpos = mtod(mb, caddr_t);
 2224                 nfsm_fhtom(vp, v3);
 2225                 if (v3) {
 2226                         tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
 2227                         *tl++ = cookie.nfsuquad[0];
 2228                         *tl++ = cookie.nfsuquad[1];
 2229                         mtx_lock(&dnp->n_mtx);
 2230                         *tl++ = dnp->n_cookieverf.nfsuquad[0];
 2231                         *tl++ = dnp->n_cookieverf.nfsuquad[1];
 2232                         mtx_unlock(&dnp->n_mtx);
 2233                 } else {
 2234                         tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 2235                         *tl++ = cookie.nfsuquad[0];
 2236                 }
 2237                 *tl = txdr_unsigned(nmp->nm_readdirsize);
 2238                 nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred);
 2239                 if (v3) {
 2240                         nfsm_postop_attr(vp, attrflag);
 2241                         if (!error) {
 2242                                 tl = nfsm_dissect(u_int32_t *,
 2243                                     2 * NFSX_UNSIGNED);
 2244                                 mtx_lock(&dnp->n_mtx);
 2245                                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
 2246                                 dnp->n_cookieverf.nfsuquad[1] = *tl;
 2247                                 mtx_unlock(&dnp->n_mtx);
 2248                         } else {
 2249                                 m_freem(mrep);
 2250                                 goto nfsmout;
 2251                         }
 2252                 }
 2253                 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2254                 more_dirs = fxdr_unsigned(int, *tl);
 2255 
 2256                 /* loop thru the dir entries, doctoring them to 4bsd form */
 2257                 while (more_dirs && bigenough) {
 2258                         if (v3) {
 2259                                 tl = nfsm_dissect(u_int32_t *,
 2260                                     3 * NFSX_UNSIGNED);
 2261                                 fileno = fxdr_hyper(tl);
 2262                                 len = fxdr_unsigned(int, *(tl + 2));
 2263                         } else {
 2264                                 tl = nfsm_dissect(u_int32_t *,
 2265                                     2 * NFSX_UNSIGNED);
 2266                                 fileno = fxdr_unsigned(u_quad_t, *tl++);
 2267                                 len = fxdr_unsigned(int, *tl);
 2268                         }
 2269                         if (len <= 0 || len > NFS_MAXNAMLEN) {
 2270                                 error = EBADRPC;
 2271                                 m_freem(mrep);
 2272                                 goto nfsmout;
 2273                         }
 2274                         tlen = nfsm_rndup(len);
 2275                         if (tlen == len)
 2276                                 tlen += 4;      /* To ensure null termination */
 2277                         left = DIRBLKSIZ - blksiz;
 2278                         if ((tlen + DIRHDSIZ) > left) {
 2279                                 dp->d_reclen += left;
 2280                                 uiop->uio_iov->iov_base =
 2281                                     (char *)uiop->uio_iov->iov_base + left;
 2282                                 uiop->uio_iov->iov_len -= left;
 2283                                 uiop->uio_offset += left;
 2284                                 uiop->uio_resid -= left;
 2285                                 blksiz = 0;
 2286                         }
 2287                         if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 2288                                 bigenough = 0;
 2289                         if (bigenough) {
 2290                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
 2291                                 dp->d_fileno = (int)fileno;
 2292                                 dp->d_namlen = len;
 2293                                 dp->d_reclen = tlen + DIRHDSIZ;
 2294                                 dp->d_type = DT_UNKNOWN;
 2295                                 blksiz += dp->d_reclen;
 2296                                 if (blksiz == DIRBLKSIZ)
 2297                                         blksiz = 0;
 2298                                 uiop->uio_offset += DIRHDSIZ;
 2299                                 uiop->uio_resid -= DIRHDSIZ;
 2300                                 uiop->uio_iov->iov_base =
 2301                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
 2302                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
 2303                                 nfsm_mtouio(uiop, len);
 2304                                 cp = uiop->uio_iov->iov_base;
 2305                                 tlen -= len;
 2306                                 *cp = '\0';     /* null terminate */
 2307                                 uiop->uio_iov->iov_base =
 2308                                     (char *)uiop->uio_iov->iov_base + tlen;
 2309                                 uiop->uio_iov->iov_len -= tlen;
 2310                                 uiop->uio_offset += tlen;
 2311                                 uiop->uio_resid -= tlen;
 2312                         } else
 2313                                 nfsm_adv(nfsm_rndup(len));
 2314                         if (v3) {
 2315                                 tl = nfsm_dissect(u_int32_t *,
 2316                                     3 * NFSX_UNSIGNED);
 2317                         } else {
 2318                                 tl = nfsm_dissect(u_int32_t *,
 2319                                     2 * NFSX_UNSIGNED);
 2320                         }
 2321                         if (bigenough) {
 2322                                 cookie.nfsuquad[0] = *tl++;
 2323                                 if (v3)
 2324                                         cookie.nfsuquad[1] = *tl++;
 2325                         } else if (v3)
 2326                                 tl += 2;
 2327                         else
 2328                                 tl++;
 2329                         more_dirs = fxdr_unsigned(int, *tl);
 2330                 }
 2331                 /*
 2332                  * If at end of rpc data, get the eof boolean
 2333                  */
 2334                 if (!more_dirs) {
 2335                         tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2336                         more_dirs = (fxdr_unsigned(int, *tl) == 0);
 2337                 }
 2338                 m_freem(mrep);
 2339         }
 2340         /*
 2341          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 2342          * by increasing d_reclen for the last record.
 2343          */
 2344         if (blksiz > 0) {
 2345                 left = DIRBLKSIZ - blksiz;
 2346                 dp->d_reclen += left;
 2347                 uiop->uio_iov->iov_base =
 2348                     (char *)uiop->uio_iov->iov_base + left;
 2349                 uiop->uio_iov->iov_len -= left;
 2350                 uiop->uio_offset += left;
 2351                 uiop->uio_resid -= left;
 2352         }
 2353 
 2354         /*
 2355          * We are now either at the end of the directory or have filled the
 2356          * block.
 2357          */
 2358         if (bigenough)
 2359                 dnp->n_direofoffset = uiop->uio_offset;
 2360         else {
 2361                 if (uiop->uio_resid > 0)
 2362                         nfs_printf("EEK! readdirrpc resid > 0\n");
 2363                 nfs_dircookie_lock(dnp);
 2364                 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 2365                 *cookiep = cookie;
 2366                 nfs_dircookie_unlock(dnp);
 2367         }
 2368 nfsmout:
 2369         return (error);
 2370 }
 2371 
 2372 /*
 2373  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
 2374  */
 2375 int
 2376 nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 2377 {
 2378         int len, left;
 2379         struct dirent *dp;
 2380         u_int32_t *tl;
 2381         caddr_t cp;
 2382         struct vnode *newvp;
 2383         nfsuint64 *cookiep;
 2384         caddr_t bpos, dpos, dpossav1, dpossav2;
 2385         struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2;
 2386         struct nameidata nami, *ndp = &nami;
 2387         struct componentname *cnp = &ndp->ni_cnd;
 2388         nfsuint64 cookie;
 2389         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2390         struct nfsnode *dnp = VTONFS(vp), *np;
 2391         struct vattr vattr, dvattr;
 2392         nfsfh_t *fhp;
 2393         u_quad_t fileno;
 2394         int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
 2395         int attrflag, dattrflag, fhsize;
 2396 
 2397 #ifndef nolint
 2398         dp = NULL;
 2399 #endif
 2400 #ifndef DIAGNOSTIC
 2401         if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 2402                 (uiop->uio_resid & (DIRBLKSIZ - 1)))
 2403                 panic("nfs readdirplusrpc bad uio");
 2404 #endif
 2405         ndp->ni_dvp = vp;
 2406         newvp = NULLVP;
 2407 
 2408         /*
 2409          * If there is no cookie, assume directory was stale.
 2410          */
 2411         nfs_dircookie_lock(dnp);
 2412         cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 2413         if (cookiep) {
 2414                 cookie = *cookiep;
 2415                 nfs_dircookie_unlock(dnp);
 2416         } else {
 2417                 nfs_dircookie_unlock(dnp);
 2418                 return (NFSERR_BAD_COOKIE);
 2419         }
 2420         /*
 2421          * Loop around doing readdir rpc's of size nm_readdirsize
 2422          * truncated to a multiple of DIRBLKSIZ.
 2423          * The stopping criteria is EOF or buffer full.
 2424          */
 2425         while (more_dirs && bigenough) {
 2426                 nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
 2427                 mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
 2428                         NFSX_FH(1) + 6 * NFSX_UNSIGNED);
 2429                 mb = mreq;
 2430                 bpos = mtod(mb, caddr_t);
 2431                 nfsm_fhtom(vp, 1);
 2432                 tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED);
 2433                 *tl++ = cookie.nfsuquad[0];
 2434                 *tl++ = cookie.nfsuquad[1];
 2435                 mtx_lock(&dnp->n_mtx);
 2436                 *tl++ = dnp->n_cookieverf.nfsuquad[0];
 2437                 *tl++ = dnp->n_cookieverf.nfsuquad[1];
 2438                 mtx_unlock(&dnp->n_mtx);
 2439                 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
 2440                 *tl = txdr_unsigned(nmp->nm_rsize);
 2441                 nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred);
 2442                 nfsm_postop_attr_va(vp, dattrflag, &dvattr);
 2443                 if (error) {
 2444                         m_freem(mrep);
 2445                         goto nfsmout;
 2446                 }
 2447                 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
 2448                 mtx_lock(&dnp->n_mtx);
 2449                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
 2450                 dnp->n_cookieverf.nfsuquad[1] = *tl++;
 2451                 mtx_unlock(&dnp->n_mtx);
 2452                 more_dirs = fxdr_unsigned(int, *tl);
 2453 
 2454                 /* loop thru the dir entries, doctoring them to 4bsd form */
 2455                 while (more_dirs && bigenough) {
 2456                         tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
 2457                         fileno = fxdr_hyper(tl);
 2458                         len = fxdr_unsigned(int, *(tl + 2));
 2459                         if (len <= 0 || len > NFS_MAXNAMLEN) {
 2460                                 error = EBADRPC;
 2461                                 m_freem(mrep);
 2462                                 goto nfsmout;
 2463                         }
 2464                         tlen = nfsm_rndup(len);
 2465                         if (tlen == len)
 2466                                 tlen += 4;      /* To ensure null termination*/
 2467                         left = DIRBLKSIZ - blksiz;
 2468                         if ((tlen + DIRHDSIZ) > left) {
 2469                                 dp->d_reclen += left;
 2470                                 uiop->uio_iov->iov_base =
 2471                                     (char *)uiop->uio_iov->iov_base + left;
 2472                                 uiop->uio_iov->iov_len -= left;
 2473                                 uiop->uio_offset += left;
 2474                                 uiop->uio_resid -= left;
 2475                                 blksiz = 0;
 2476                         }
 2477                         if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 2478                                 bigenough = 0;
 2479                         if (bigenough) {
 2480                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
 2481                                 dp->d_fileno = (int)fileno;
 2482                                 dp->d_namlen = len;
 2483                                 dp->d_reclen = tlen + DIRHDSIZ;
 2484                                 dp->d_type = DT_UNKNOWN;
 2485                                 blksiz += dp->d_reclen;
 2486                                 if (blksiz == DIRBLKSIZ)
 2487                                         blksiz = 0;
 2488                                 uiop->uio_offset += DIRHDSIZ;
 2489                                 uiop->uio_resid -= DIRHDSIZ;
 2490                                 uiop->uio_iov->iov_base =
 2491                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
 2492                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
 2493                                 cnp->cn_nameptr = uiop->uio_iov->iov_base;
 2494                                 cnp->cn_namelen = len;
 2495                                 nfsm_mtouio(uiop, len);
 2496                                 cp = uiop->uio_iov->iov_base;
 2497                                 tlen -= len;
 2498                                 *cp = '\0';
 2499                                 uiop->uio_iov->iov_base =
 2500                                     (char *)uiop->uio_iov->iov_base + tlen;
 2501                                 uiop->uio_iov->iov_len -= tlen;
 2502                                 uiop->uio_offset += tlen;
 2503                                 uiop->uio_resid -= tlen;
 2504                         } else
 2505                                 nfsm_adv(nfsm_rndup(len));
 2506                         tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
 2507                         if (bigenough) {
 2508                                 cookie.nfsuquad[0] = *tl++;
 2509                                 cookie.nfsuquad[1] = *tl++;
 2510                         } else
 2511                                 tl += 2;
 2512 
 2513                         /*
 2514                          * Since the attributes are before the file handle
 2515                          * (sigh), we must skip over the attributes and then
 2516                          * come back and get them.
 2517                          */
 2518                         attrflag = fxdr_unsigned(int, *tl);
 2519                         if (attrflag) {
 2520                             dpossav1 = dpos;
 2521                             mdsav1 = md;
 2522                             nfsm_adv(NFSX_V3FATTR);
 2523                             tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2524                             doit = fxdr_unsigned(int, *tl);
 2525                             /*
 2526                              * Skip loading the attrs for "..". There's a 
 2527                              * race between loading the attrs here and 
 2528                              * lookups that look for the directory currently
 2529                              * being read (in the parent). We try to acquire
 2530                              * the exclusive lock on ".." here, owning the 
 2531                              * lock on the directory being read. Lookup will
 2532                              * hold the lock on ".." and try to acquire the 
 2533                              * lock on the directory being read.
 2534                              * 
 2535                              * There are other ways of fixing this, one would
 2536                              * be to do a trylock on the ".." vnode and skip
 2537                              * loading the attrs on ".." if it happens to be 
 2538                              * locked by another process. But skipping the
 2539                              * attrload on ".." seems the easiest option.
 2540                              */
 2541                             if (strcmp(dp->d_name, "..") == 0) {
 2542                                     doit = 0;
 2543                                     /*
 2544                                      * We've already skipped over the attrs, 
 2545                                      * skip over the filehandle. And store d_type
 2546                                      * as VDIR.
 2547                                      */
 2548                                     tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2549                                     i = fxdr_unsigned(int, *tl);
 2550                                     nfsm_adv(nfsm_rndup(i));
 2551                                     dp->d_type = IFTODT(VTTOIF(VDIR));
 2552                             }       
 2553                             if (doit) {
 2554                                 nfsm_getfh(fhp, fhsize, 1);
 2555                                 if (NFS_CMPFH(dnp, fhp, fhsize)) {
 2556                                     VREF(vp);
 2557                                     newvp = vp;
 2558                                     np = dnp;
 2559                                 } else {
 2560                                     error = nfs_nget(vp->v_mount, fhp,
 2561                                         fhsize, &np, LK_EXCLUSIVE);
 2562                                     if (error)
 2563                                         doit = 0;
 2564                                     else
 2565                                         newvp = NFSTOV(np);
 2566                                 }
 2567                             }
 2568                             if (doit && bigenough) {
 2569                                 dpossav2 = dpos;
 2570                                 dpos = dpossav1;
 2571                                 mdsav2 = md;
 2572                                 md = mdsav1;
 2573                                 nfsm_loadattr(newvp, &vattr);
 2574                                 dpos = dpossav2;
 2575                                 md = mdsav2;
 2576                                 dp->d_type = IFTODT(VTTOIF(vattr.va_type));
 2577                                 ndp->ni_vp = newvp;
 2578                                 if (newvp->v_type != VDIR || dattrflag != 0)
 2579                                     cache_enter_time(ndp->ni_dvp, ndp->ni_vp,
 2580                                         cnp, &vattr.va_ctime,
 2581                                         newvp->v_type != VDIR ? NULL :
 2582                                         &dvattr.va_ctime);
 2583                             }
 2584                         } else {
 2585                             /* Just skip over the file handle */
 2586                             tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2587                             i = fxdr_unsigned(int, *tl);
 2588                             if (i) {
 2589                                     tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2590                                     fhsize = fxdr_unsigned(int, *tl);
 2591                                     nfsm_adv(nfsm_rndup(fhsize));
 2592                             }
 2593                         }
 2594                         if (newvp != NULLVP) {
 2595                             if (newvp == vp)
 2596                                 vrele(newvp);
 2597                             else
 2598                                 vput(newvp);
 2599                             newvp = NULLVP;
 2600                         }
 2601                         tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2602                         more_dirs = fxdr_unsigned(int, *tl);
 2603                 }
 2604                 /*
 2605                  * If at end of rpc data, get the eof boolean
 2606                  */
 2607                 if (!more_dirs) {
 2608                         tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2609                         more_dirs = (fxdr_unsigned(int, *tl) == 0);
 2610                 }
 2611                 m_freem(mrep);
 2612         }
 2613         /*
 2614          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 2615          * by increasing d_reclen for the last record.
 2616          */
 2617         if (blksiz > 0) {
 2618                 left = DIRBLKSIZ - blksiz;
 2619                 dp->d_reclen += left;
 2620                 uiop->uio_iov->iov_base =
 2621                     (char *)uiop->uio_iov->iov_base + left;
 2622                 uiop->uio_iov->iov_len -= left;
 2623                 uiop->uio_offset += left;
 2624                 uiop->uio_resid -= left;
 2625         }
 2626 
 2627         /*
 2628          * We are now either at the end of the directory or have filled the
 2629          * block.
 2630          */
 2631         if (bigenough)
 2632                 dnp->n_direofoffset = uiop->uio_offset;
 2633         else {
 2634                 if (uiop->uio_resid > 0)
 2635                         nfs_printf("EEK! readdirplusrpc resid > 0\n");
 2636                 nfs_dircookie_lock(dnp);
 2637                 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 2638                 *cookiep = cookie;
 2639                 nfs_dircookie_unlock(dnp);
 2640         }
 2641 nfsmout:
 2642         if (newvp != NULLVP) {
 2643                 if (newvp == vp)
 2644                         vrele(newvp);
 2645                 else
 2646                         vput(newvp);
 2647                 newvp = NULLVP;
 2648         }
 2649         return (error);
 2650 }
 2651 
 2652 /*
 2653  * Silly rename. To make the NFS filesystem that is stateless look a little
 2654  * more like the "ufs" a remove of an active vnode is translated to a rename
 2655  * to a funny looking filename that is removed by nfs_inactive on the
 2656  * nfsnode. There is the potential for another process on a different client
 2657  * to create the same funny name between the nfs_lookitup() fails and the
 2658  * nfs_rename() completes, but...
 2659  */
 2660 static int
 2661 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 2662 {
 2663         struct sillyrename *sp;
 2664         struct nfsnode *np;
 2665         int error;
 2666         short pid;
 2667         unsigned int lticks;
 2668 
 2669         cache_purge(dvp);
 2670         np = VTONFS(vp);
 2671 #ifndef DIAGNOSTIC
 2672         if (vp->v_type == VDIR)
 2673                 panic("nfs: sillyrename dir");
 2674 #endif
 2675         MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
 2676                 M_NFSREQ, M_WAITOK);
 2677         sp->s_cred = crhold(cnp->cn_cred);
 2678         sp->s_dvp = dvp;
 2679         sp->s_removeit = nfs_removeit;
 2680         VREF(dvp);
 2681 
 2682         /* 
 2683          * Fudge together a funny name.
 2684          * Changing the format of the funny name to accomodate more 
 2685          * sillynames per directory.
 2686          * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 
 2687          * CPU ticks since boot.
 2688          */
 2689         pid = cnp->cn_thread->td_proc->p_pid;
 2690         lticks = (unsigned int)ticks;
 2691         for ( ; ; ) {
 2692                 sp->s_namlen = sprintf(sp->s_name, 
 2693                                        ".nfs.%08x.%04x4.4", lticks, 
 2694                                        pid);
 2695                 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 2696                                  cnp->cn_thread, NULL))
 2697                         break;
 2698                 lticks++;
 2699         }
 2700         error = nfs_renameit(dvp, cnp, sp);
 2701         if (error)
 2702                 goto bad;
 2703         error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 2704                 cnp->cn_thread, &np);
 2705         np->n_sillyrename = sp;
 2706         return (0);
 2707 bad:
 2708         vrele(sp->s_dvp);
 2709         crfree(sp->s_cred);
 2710         free((caddr_t)sp, M_NFSREQ);
 2711         return (error);
 2712 }
 2713 
 2714 /*
 2715  * Look up a file name and optionally either update the file handle or
 2716  * allocate an nfsnode, depending on the value of npp.
 2717  * npp == NULL  --> just do the lookup
 2718  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
 2719  *                      handled too
 2720  * *npp != NULL --> update the file handle in the vnode
 2721  */
 2722 static int
 2723 nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
 2724     struct thread *td, struct nfsnode **npp)
 2725 {
 2726         struct vnode *newvp = NULL;
 2727         struct nfsnode *np, *dnp = VTONFS(dvp);
 2728         caddr_t bpos, dpos;
 2729         int error = 0, fhlen, attrflag;
 2730         struct mbuf *mreq, *mrep, *md, *mb;
 2731         nfsfh_t *nfhp;
 2732         int v3 = NFS_ISV3(dvp);
 2733 
 2734         nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 2735         mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 2736                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 2737         mb = mreq;
 2738         bpos = mtod(mb, caddr_t);
 2739         nfsm_fhtom(dvp, v3);
 2740         nfsm_strtom(name, len, NFS_MAXNAMLEN);
 2741         nfsm_request(dvp, NFSPROC_LOOKUP, td, cred);
 2742         if (npp && !error) {
 2743                 nfsm_getfh(nfhp, fhlen, v3);
 2744                 if (*npp) {
 2745                     np = *npp;
 2746                     if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
 2747                         free((caddr_t)np->n_fhp, M_NFSBIGFH);
 2748                         np->n_fhp = &np->n_fh;
 2749                     } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
 2750                         np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK);
 2751                     bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
 2752                     np->n_fhsize = fhlen;
 2753                     newvp = NFSTOV(np);
 2754                 } else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
 2755                     VREF(dvp);
 2756                     newvp = dvp;
 2757                 } else {
 2758                     error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
 2759                     if (error) {
 2760                         m_freem(mrep);
 2761                         return (error);
 2762                     }
 2763                     newvp = NFSTOV(np);
 2764                 }
 2765                 if (v3) {
 2766                         nfsm_postop_attr(newvp, attrflag);
 2767                         if (!attrflag && *npp == NULL) {
 2768                                 m_freem(mrep);
 2769                                 if (newvp == dvp)
 2770                                         vrele(newvp);
 2771                                 else
 2772                                         vput(newvp);
 2773                                 return (ENOENT);
 2774                         }
 2775                 } else
 2776                         nfsm_loadattr(newvp, NULL);
 2777         }
 2778         m_freem(mrep);
 2779 nfsmout:
 2780         if (npp && *npp == NULL) {
 2781                 if (error) {
 2782                         if (newvp) {
 2783                                 if (newvp == dvp)
 2784                                         vrele(newvp);
 2785                                 else
 2786                                         vput(newvp);
 2787                         }
 2788                 } else
 2789                         *npp = np;
 2790         }
 2791         return (error);
 2792 }
 2793 
 2794 /*
 2795  * Nfs Version 3 commit rpc
 2796  */
 2797 int
 2798 nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
 2799            struct thread *td)
 2800 {
 2801         u_int32_t *tl;
 2802         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2803         caddr_t bpos, dpos;
 2804         int error = 0, wccflag = NFSV3_WCCRATTR;
 2805         struct mbuf *mreq, *mrep, *md, *mb;
 2806 
 2807         mtx_lock(&nmp->nm_mtx);
 2808         if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
 2809                 mtx_unlock(&nmp->nm_mtx);
 2810                 return (0);
 2811         }
 2812         mtx_unlock(&nmp->nm_mtx);
 2813         nfsstats.rpccnt[NFSPROC_COMMIT]++;
 2814         mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
 2815         mb = mreq;
 2816         bpos = mtod(mb, caddr_t);
 2817         nfsm_fhtom(vp, 1);
 2818         tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED);
 2819         txdr_hyper(offset, tl);
 2820         tl += 2;
 2821         *tl = txdr_unsigned(cnt);
 2822         nfsm_request(vp, NFSPROC_COMMIT, td, cred);
 2823         nfsm_wcc_data(vp, wccflag);
 2824         if (!error) {
 2825                 tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF);
 2826                 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
 2827                         NFSX_V3WRITEVERF)) {
 2828                         bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 2829                                 NFSX_V3WRITEVERF);
 2830                         error = NFSERR_STALEWRITEVERF;
 2831                 }
 2832         }
 2833         m_freem(mrep);
 2834 nfsmout:
 2835         return (error);
 2836 }
 2837 
 2838 /*
 2839  * Strategy routine.
 2840  * For async requests when nfsiod(s) are running, queue the request by
 2841  * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
 2842  * request.
 2843  */
 2844 static int
 2845 nfs_strategy(struct vop_strategy_args *ap)
 2846 {
 2847         struct buf *bp = ap->a_bp;
 2848         struct ucred *cr;
 2849 
 2850         KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
 2851         KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp));
 2852 
 2853         if (bp->b_iocmd == BIO_READ)
 2854                 cr = bp->b_rcred;
 2855         else
 2856                 cr = bp->b_wcred;
 2857 
 2858         /*
 2859          * If the op is asynchronous and an i/o daemon is waiting
 2860          * queue the request, wake it up and wait for completion
 2861          * otherwise just do it ourselves.
 2862          */
 2863         if ((bp->b_flags & B_ASYNC) == 0 ||
 2864             nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread))
 2865                 (void)nfs_doio(ap->a_vp, bp, cr, curthread);
 2866         return (0);
 2867 }
 2868 
 2869 /*
 2870  * fsync vnode op. Just call nfs_flush() with commit == 1.
 2871  */
 2872 /* ARGSUSED */
 2873 static int
 2874 nfs_fsync(struct vop_fsync_args *ap)
 2875 {
 2876         return (nfs_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1));
 2877 }
 2878 
 2879 /*
 2880  * Flush all the blocks associated with a vnode.
 2881  *      Walk through the buffer pool and push any dirty pages
 2882  *      associated with the vnode.
 2883  */
 2884 static int
 2885 nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
 2886     int commit)
 2887 {
 2888         struct nfsnode *np = VTONFS(vp);
 2889         struct buf *bp;
 2890         int i;
 2891         struct buf *nbp;
 2892         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2893         int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 2894         int passone = 1;
 2895         u_quad_t off, endoff, toff;
 2896         struct ucred* wcred = NULL;
 2897         struct buf **bvec = NULL;
 2898 #ifndef NFS_COMMITBVECSIZ
 2899 #define NFS_COMMITBVECSIZ       20
 2900 #endif
 2901         struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 2902         int bvecsize = 0, bveccount;
 2903 
 2904         if (nmp->nm_flag & NFSMNT_INT)
 2905                 slpflag = PCATCH;
 2906         if (!commit)
 2907                 passone = 0;
 2908         /*
 2909          * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 2910          * server, but has not been committed to stable storage on the server
 2911          * yet. On the first pass, the byte range is worked out and the commit
 2912          * rpc is done. On the second pass, nfs_writebp() is called to do the
 2913          * job.
 2914          */
 2915 again:
 2916         off = (u_quad_t)-1;
 2917         endoff = 0;
 2918         bvecpos = 0;
 2919         if (NFS_ISV3(vp) && commit) {
 2920                 s = splbio();
 2921                 if (bvec != NULL && bvec != bvec_on_stack)
 2922                         free(bvec, M_TEMP);
 2923                 /*
 2924                  * Count up how many buffers waiting for a commit.
 2925                  */
 2926                 bveccount = 0;
 2927                 VI_LOCK(vp);
 2928                 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
 2929                         if (BUF_REFCNT(bp) == 0 &&
 2930                             (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 2931                                 == (B_DELWRI | B_NEEDCOMMIT))
 2932                                 bveccount++;
 2933                 }
 2934                 /*
 2935                  * Allocate space to remember the list of bufs to commit.  It is
 2936                  * important to use M_NOWAIT here to avoid a race with nfs_write.
 2937                  * If we can't get memory (for whatever reason), we will end up
 2938                  * committing the buffers one-by-one in the loop below.
 2939                  */
 2940                 if (bveccount > NFS_COMMITBVECSIZ) {
 2941                         /*
 2942                          * Release the vnode interlock to avoid a lock
 2943                          * order reversal.
 2944                          */
 2945                         VI_UNLOCK(vp);
 2946                         bvec = (struct buf **)
 2947                                 malloc(bveccount * sizeof(struct buf *),
 2948                                        M_TEMP, M_NOWAIT);
 2949                         VI_LOCK(vp);
 2950                         if (bvec == NULL) {
 2951                                 bvec = bvec_on_stack;
 2952                                 bvecsize = NFS_COMMITBVECSIZ;
 2953                         } else
 2954                                 bvecsize = bveccount;
 2955                 } else {
 2956                         bvec = bvec_on_stack;
 2957                         bvecsize = NFS_COMMITBVECSIZ;
 2958                 }
 2959                 TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
 2960                         if (bvecpos >= bvecsize)
 2961                                 break;
 2962                         if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 2963                                 nbp = TAILQ_NEXT(bp, b_bobufs);
 2964                                 continue;
 2965                         }
 2966                         if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
 2967                             (B_DELWRI | B_NEEDCOMMIT)) {
 2968                                 BUF_UNLOCK(bp);
 2969                                 nbp = TAILQ_NEXT(bp, b_bobufs);
 2970                                 continue;
 2971                         }
 2972                         VI_UNLOCK(vp);
 2973                         bremfree(bp);
 2974                         /*
 2975                          * Work out if all buffers are using the same cred
 2976                          * so we can deal with them all with one commit.
 2977                          *
 2978                          * NOTE: we are not clearing B_DONE here, so we have
 2979                          * to do it later on in this routine if we intend to
 2980                          * initiate I/O on the bp.
 2981                          *
 2982                          * Note: to avoid loopback deadlocks, we do not
 2983                          * assign b_runningbufspace.
 2984                          */
 2985                         if (wcred == NULL)
 2986                                 wcred = bp->b_wcred;
 2987                         else if (wcred != bp->b_wcred)
 2988                                 wcred = NOCRED;
 2989                         vfs_busy_pages(bp, 1);
 2990 
 2991                         VI_LOCK(vp);
 2992                         /*
 2993                          * bp is protected by being locked, but nbp is not
 2994                          * and vfs_busy_pages() may sleep.  We have to
 2995                          * recalculate nbp.
 2996                          */
 2997                         nbp = TAILQ_NEXT(bp, b_bobufs);
 2998 
 2999                         /*
 3000                          * A list of these buffers is kept so that the
 3001                          * second loop knows which buffers have actually
 3002                          * been committed. This is necessary, since there
 3003                          * may be a race between the commit rpc and new
 3004                          * uncommitted writes on the file.
 3005                          */
 3006                         bvec[bvecpos++] = bp;
 3007                         toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 3008                                 bp->b_dirtyoff;
 3009                         if (toff < off)
 3010                                 off = toff;
 3011                         toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 3012                         if (toff > endoff)
 3013                                 endoff = toff;
 3014                 }
 3015                 splx(s);
 3016                 VI_UNLOCK(vp);
 3017         }
 3018         if (bvecpos > 0) {
 3019                 /*
 3020                  * Commit data on the server, as required.
 3021                  * If all bufs are using the same wcred, then use that with
 3022                  * one call for all of them, otherwise commit each one
 3023                  * separately.
 3024                  */
 3025                 if (wcred != NOCRED)
 3026                         retv = nfs_commit(vp, off, (int)(endoff - off),
 3027                                           wcred, td);
 3028                 else {
 3029                         retv = 0;
 3030                         for (i = 0; i < bvecpos; i++) {
 3031                                 off_t off, size;
 3032                                 bp = bvec[i];
 3033                                 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 3034                                         bp->b_dirtyoff;
 3035                                 size = (u_quad_t)(bp->b_dirtyend
 3036                                                   - bp->b_dirtyoff);
 3037                                 retv = nfs_commit(vp, off, (int)size,
 3038                                                   bp->b_wcred, td);
 3039                                 if (retv) break;
 3040                         }
 3041                 }
 3042 
 3043                 if (retv == NFSERR_STALEWRITEVERF)
 3044                         nfs_clearcommit(vp->v_mount);
 3045 
 3046                 /*
 3047                  * Now, either mark the blocks I/O done or mark the
 3048                  * blocks dirty, depending on whether the commit
 3049                  * succeeded.
 3050                  */
 3051                 for (i = 0; i < bvecpos; i++) {
 3052                         bp = bvec[i];
 3053                         bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 3054                         if (retv) {
 3055                                 /*
 3056                                  * Error, leave B_DELWRI intact
 3057                                  */
 3058                                 vfs_unbusy_pages(bp);
 3059                                 brelse(bp);
 3060                         } else {
 3061                                 /*
 3062                                  * Success, remove B_DELWRI ( bundirty() ).
 3063                                  *
 3064                                  * b_dirtyoff/b_dirtyend seem to be NFS
 3065                                  * specific.  We should probably move that
 3066                                  * into bundirty(). XXX
 3067                                  */
 3068                                 s = splbio();
 3069                                 bufobj_wref(&vp->v_bufobj);
 3070                                 bp->b_flags |= B_ASYNC;
 3071                                 bundirty(bp);
 3072                                 bp->b_flags &= ~B_DONE;
 3073                                 bp->b_ioflags &= ~BIO_ERROR;
 3074                                 bp->b_dirtyoff = bp->b_dirtyend = 0;
 3075                                 splx(s);
 3076                                 bufdone(bp);
 3077                         }
 3078                 }
 3079         }
 3080 
 3081         /*
 3082          * Start/do any write(s) that are required.
 3083          */
 3084 loop:
 3085         s = splbio();
 3086         VI_LOCK(vp);
 3087         TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
 3088                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 3089                         if (waitfor != MNT_WAIT || passone)
 3090                                 continue;
 3091 
 3092                         error = BUF_TIMELOCK(bp,
 3093                             LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 3094                             VI_MTX(vp), "nfsfsync", slpflag, slptimeo);
 3095                         splx(s);
 3096                         if (error == 0) {
 3097                                 BUF_UNLOCK(bp);
 3098                                 goto loop;
 3099                         }
 3100                         if (error == ENOLCK)
 3101                                 goto loop;
 3102                         if (nfs_sigintr(nmp, NULL, td)) {
 3103                                 error = EINTR;
 3104                                 goto done;
 3105                         }
 3106                         if (slpflag == PCATCH) {
 3107                                 slpflag = 0;
 3108                                 slptimeo = 2 * hz;
 3109                         }
 3110                         goto loop;
 3111                 }
 3112                 if ((bp->b_flags & B_DELWRI) == 0)
 3113                         panic("nfs_fsync: not dirty");
 3114                 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
 3115                         BUF_UNLOCK(bp);
 3116                         continue;
 3117                 }
 3118                 VI_UNLOCK(vp);
 3119                 bremfree(bp);
 3120                 if (passone || !commit)
 3121                     bp->b_flags |= B_ASYNC;
 3122                 else
 3123                     bp->b_flags |= B_ASYNC;
 3124                 splx(s);
 3125                 bwrite(bp);
 3126                 if (nfs_sigintr(nmp, NULL, td)) {
 3127                         error = EINTR;
 3128                         goto done;
 3129                 }
 3130                 goto loop;
 3131         }
 3132         splx(s);
 3133         if (passone) {
 3134                 passone = 0;
 3135                 VI_UNLOCK(vp);
 3136                 goto again;
 3137         }
 3138         if (waitfor == MNT_WAIT) {
 3139                 while (vp->v_bufobj.bo_numoutput) {
 3140                         error = bufobj_wwait(&vp->v_bufobj, slpflag, slptimeo);
 3141                         if (error) {
 3142                             VI_UNLOCK(vp);
 3143                             error = nfs_sigintr(nmp, NULL, td);
 3144                             if (error)
 3145                                 goto done;
 3146                             if (slpflag == PCATCH) {
 3147                                 slpflag = 0;
 3148                                 slptimeo = 2 * hz;
 3149                             }
 3150                             VI_LOCK(vp);
 3151                         }
 3152                 }
 3153                 if (vp->v_bufobj.bo_dirty.bv_cnt != 0 && commit) {
 3154                         VI_UNLOCK(vp);
 3155                         goto loop;
 3156                 }
 3157                 /*
 3158                  * Wait for all the async IO requests to drain
 3159                  */
 3160                 VI_UNLOCK(vp);
 3161                 mtx_lock(&np->n_mtx);
 3162                 while (np->n_directio_asyncwr > 0) {
 3163                         np->n_flag |= NFSYNCWAIT;
 3164                         error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr,
 3165                                            &np->n_mtx, slpflag | (PRIBIO + 1), 
 3166                                            "nfsfsync", 0);
 3167                         if (error) {
 3168                                 if (nfs_sigintr(nmp, (struct nfsreq *)0, td)) {
 3169                                         mtx_unlock(&np->n_mtx);
 3170                                         error = EINTR;  
 3171                                         goto done;
 3172                                 }
 3173                         }
 3174                 }
 3175                 mtx_unlock(&np->n_mtx);
 3176         } else
 3177                 VI_UNLOCK(vp);
 3178         mtx_lock(&np->n_mtx);
 3179         if (np->n_flag & NWRITEERR) {
 3180                 error = np->n_error;
 3181                 np->n_flag &= ~NWRITEERR;
 3182         }
 3183         if (commit && vp->v_bufobj.bo_dirty.bv_cnt == 0 &&
 3184             vp->v_bufobj.bo_numoutput == 0 && np->n_directio_asyncwr == 0)
 3185                 np->n_flag &= ~NMODIFIED;
 3186         mtx_unlock(&np->n_mtx);
 3187 done:
 3188         if (bvec != NULL && bvec != bvec_on_stack)
 3189                 free(bvec, M_TEMP);
 3190         return (error);
 3191 }
 3192 
 3193 /*
 3194  * NFS advisory byte-level locks.
 3195  */
 3196 static int
 3197 nfs_advlock(struct vop_advlock_args *ap)
 3198 {
 3199         struct vnode *vp = ap->a_vp;
 3200         u_quad_t size;
 3201         int error;
 3202 
 3203         error = vn_lock(vp, LK_SHARED, curthread);
 3204         if (error)
 3205                 return (error);
 3206         if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 3207                 size = VTONFS(vp)->n_size;
 3208                 VOP_UNLOCK(vp, 0, curthread);
 3209                 error = lf_advlock(ap, &(vp->v_lockf), size);
 3210         } else {
 3211                 if (nfs_advlock_p)
 3212                         error = nfs_advlock_p(ap);
 3213                 else
 3214                         error = ENOLCK;
 3215         }
 3216 
 3217         return (error);
 3218 }
 3219 
 3220 /*
 3221  * NFS advisory byte-level locks.
 3222  */
 3223 static int
 3224 nfs_advlockasync(struct vop_advlockasync_args *ap)
 3225 {
 3226         struct vnode *vp = ap->a_vp;
 3227         u_quad_t size;
 3228         int error;
 3229         
 3230         error = vn_lock(vp, LK_SHARED, curthread);
 3231         if (error)
 3232                 return (error);
 3233         if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 3234                 size = VTONFS(vp)->n_size;
 3235                 VOP_UNLOCK(vp, 0, curthread);
 3236                 error = lf_advlockasync(ap, &(vp->v_lockf), size);
 3237         } else {
 3238                 VOP_UNLOCK(vp, 0, curthread);
 3239                 error = EOPNOTSUPP;
 3240         }
 3241         return (error);
 3242 }
 3243 
 3244 /*
 3245  * Print out the contents of an nfsnode.
 3246  */
 3247 static int
 3248 nfs_print(struct vop_print_args *ap)
 3249 {
 3250         struct vnode *vp = ap->a_vp;
 3251         struct nfsnode *np = VTONFS(vp);
 3252 
 3253         nfs_printf("\tfileid %ld fsid 0x%x",
 3254            np->n_vattr.va_fileid, np->n_vattr.va_fsid);
 3255         if (vp->v_type == VFIFO)
 3256                 fifo_printinfo(vp);
 3257         printf("\n");
 3258         return (0);
 3259 }
 3260 
 3261 /*
 3262  * This is the "real" nfs::bwrite(struct buf*).
 3263  * We set B_CACHE if this is a VMIO buffer.
 3264  */
 3265 int
 3266 nfs_writebp(struct buf *bp, int force __unused, struct thread *td)
 3267 {
 3268         int s;
 3269         int oldflags = bp->b_flags;
 3270 #if 0
 3271         int retv = 1;
 3272         off_t off;
 3273 #endif
 3274 
 3275         if (BUF_REFCNT(bp) == 0)
 3276                 panic("bwrite: buffer is not locked???");
 3277 
 3278         if (bp->b_flags & B_INVAL) {
 3279                 brelse(bp);
 3280                 return(0);
 3281         }
 3282 
 3283         bp->b_flags |= B_CACHE;
 3284 
 3285         /*
 3286          * Undirty the bp.  We will redirty it later if the I/O fails.
 3287          */
 3288 
 3289         s = splbio();
 3290         bundirty(bp);
 3291         bp->b_flags &= ~B_DONE;
 3292         bp->b_ioflags &= ~BIO_ERROR;
 3293         bp->b_iocmd = BIO_WRITE;
 3294 
 3295         bufobj_wref(bp->b_bufobj);
 3296         curthread->td_ru.ru_oublock++;
 3297         splx(s);
 3298 
 3299         /*
 3300          * Note: to avoid loopback deadlocks, we do not
 3301          * assign b_runningbufspace.
 3302          */
 3303         vfs_busy_pages(bp, 1);
 3304 
 3305         BUF_KERNPROC(bp);
 3306         bp->b_iooffset = dbtob(bp->b_blkno);
 3307         bstrategy(bp);
 3308 
 3309         if( (oldflags & B_ASYNC) == 0) {
 3310                 int rtval = bufwait(bp);
 3311 
 3312                 if (oldflags & B_DELWRI) {
 3313                         s = splbio();
 3314                         reassignbuf(bp);
 3315                         splx(s);
 3316                 }
 3317                 brelse(bp);
 3318                 return (rtval);
 3319         }
 3320 
 3321         return (0);
 3322 }
 3323 
 3324 /*
 3325  * nfs special file access vnode op.
 3326  * Essentially just get vattr and then imitate iaccess() since the device is
 3327  * local to the client.
 3328  */
 3329 static int
 3330 nfsspec_access(struct vop_access_args *ap)
 3331 {
 3332         struct vattr *vap;
 3333         struct ucred *cred = ap->a_cred;
 3334         struct vnode *vp = ap->a_vp;
 3335         mode_t mode = ap->a_mode;
 3336         struct vattr vattr;
 3337         int error;
 3338 
 3339         /*
 3340          * Disallow write attempts on filesystems mounted read-only;
 3341          * unless the file is a socket, fifo, or a block or character
 3342          * device resident on the filesystem.
 3343          */
 3344         if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 3345                 switch (vp->v_type) {
 3346                 case VREG:
 3347                 case VDIR:
 3348                 case VLNK:
 3349                         return (EROFS);
 3350                 default:
 3351                         break;
 3352                 }
 3353         }
 3354         vap = &vattr;
 3355         error = VOP_GETATTR(vp, vap, cred, ap->a_td);
 3356         if (error)
 3357                 goto out;
 3358         error  = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
 3359                          mode, cred, NULL);
 3360 out:
 3361         return error;
 3362 }
 3363 
 3364 /*
 3365  * Read wrapper for fifos.
 3366  */
 3367 static int
 3368 nfsfifo_read(struct vop_read_args *ap)
 3369 {
 3370         struct nfsnode *np = VTONFS(ap->a_vp);
 3371         int error;
 3372 
 3373         /*
 3374          * Set access flag.
 3375          */
 3376         mtx_lock(&np->n_mtx);
 3377         np->n_flag |= NACC;
 3378         getnanotime(&np->n_atim);
 3379         mtx_unlock(&np->n_mtx);
 3380         error = fifo_specops.vop_read(ap);
 3381         return error;   
 3382 }
 3383 
 3384 /*
 3385  * Write wrapper for fifos.
 3386  */
 3387 static int
 3388 nfsfifo_write(struct vop_write_args *ap)
 3389 {
 3390         struct nfsnode *np = VTONFS(ap->a_vp);
 3391 
 3392         /*
 3393          * Set update flag.
 3394          */
 3395         mtx_lock(&np->n_mtx);
 3396         np->n_flag |= NUPD;
 3397         getnanotime(&np->n_mtim);
 3398         mtx_unlock(&np->n_mtx);
 3399         return(fifo_specops.vop_write(ap));
 3400 }
 3401 
 3402 /*
 3403  * Close wrapper for fifos.
 3404  *
 3405  * Update the times on the nfsnode then do fifo close.
 3406  */
 3407 static int
 3408 nfsfifo_close(struct vop_close_args *ap)
 3409 {
 3410         struct vnode *vp = ap->a_vp;
 3411         struct nfsnode *np = VTONFS(vp);
 3412         struct vattr vattr;
 3413         struct timespec ts;
 3414 
 3415         mtx_lock(&np->n_mtx);
 3416         if (np->n_flag & (NACC | NUPD)) {
 3417                 getnanotime(&ts);
 3418                 if (np->n_flag & NACC)
 3419                         np->n_atim = ts;
 3420                 if (np->n_flag & NUPD)
 3421                         np->n_mtim = ts;
 3422                 np->n_flag |= NCHG;
 3423                 if (vrefcnt(vp) == 1 &&
 3424                     (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 3425                         VATTR_NULL(&vattr);
 3426                         if (np->n_flag & NACC)
 3427                                 vattr.va_atime = np->n_atim;
 3428                         if (np->n_flag & NUPD)
 3429                                 vattr.va_mtime = np->n_mtim;
 3430                         mtx_unlock(&np->n_mtx);
 3431                         (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_td);
 3432                         goto out;
 3433                 }
 3434         }
 3435         mtx_unlock(&np->n_mtx);
 3436 out:
 3437         return (fifo_specops.vop_close(ap));
 3438 }
 3439 
 3440 /*
 3441  * Just call nfs_writebp() with the force argument set to 1.
 3442  *
 3443  * NOTE: B_DONE may or may not be set in a_bp on call.
 3444  */
 3445 static int
 3446 nfs_bwrite(struct buf *bp)
 3447 {
 3448 
 3449         return (nfs_writebp(bp, 1, curthread));
 3450 }
 3451 
 3452 struct buf_ops buf_ops_nfs = {
 3453         .bop_name       =       "buf_ops_nfs",
 3454         .bop_write      =       nfs_bwrite,
 3455         .bop_strategy   =       bufstrategy,
 3456         .bop_sync       =       bufsync,
 3457         .bop_bdflush    =       bufbdflush,
 3458 };
Cache object: 0789e402b1d5b781181809de3546957f
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/nfsclient/nfs_vnops.c

FreeBSD/Linux Kernel Cross Reference
sys/nfsclient/nfs_vnops.c