nfs_vnops.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/8.1/sys/nfsclient/nfs_vnops.c 209003 2010-06-10 20:13:03Z jhb $");
   37 
   38 /*
   39  * vnode op calls for Sun NFS version 2 and 3
   40  */
   41 
   42 #include "opt_inet.h"
   43 #include "opt_kdtrace.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/kernel.h>
   47 #include <sys/systm.h>
   48 #include <sys/resourcevar.h>
   49 #include <sys/proc.h>
   50 #include <sys/mount.h>
   51 #include <sys/bio.h>
   52 #include <sys/buf.h>
   53 #include <sys/jail.h>
   54 #include <sys/malloc.h>
   55 #include <sys/mbuf.h>
   56 #include <sys/namei.h>
   57 #include <sys/socket.h>
   58 #include <sys/vnode.h>
   59 #include <sys/dirent.h>
   60 #include <sys/fcntl.h>
   61 #include <sys/lockf.h>
   62 #include <sys/stat.h>
   63 #include <sys/sysctl.h>
   64 #include <sys/signalvar.h>
   65 
   66 #include <vm/vm.h>
   67 #include <vm/vm_object.h>
   68 #include <vm/vm_extern.h>
   69 #include <vm/vm_object.h>
   70 
   71 #include <fs/fifofs/fifo.h>
   72 
   73 #include <nfs/nfsproto.h>
   74 #include <nfsclient/nfs.h>
   75 #include <nfsclient/nfsnode.h>
   76 #include <nfsclient/nfsmount.h>
   77 #include <nfsclient/nfs_kdtrace.h>
   78 #include <nfsclient/nfs_lock.h>
   79 #include <nfs/xdr_subs.h>
   80 #include <nfsclient/nfsm_subs.h>
   81 
   82 #include <net/if.h>
   83 #include <netinet/in.h>
   84 #include <netinet/in_var.h>
   85 
   86 #include <machine/stdarg.h>
   87 
   88 #ifdef KDTRACE_HOOKS
   89 #include <sys/dtrace_bsd.h>
   90 
   91 dtrace_nfsclient_accesscache_flush_probe_func_t
   92     dtrace_nfsclient_accesscache_flush_done_probe;
   93 uint32_t nfsclient_accesscache_flush_done_id;
   94 
   95 dtrace_nfsclient_accesscache_get_probe_func_t
   96     dtrace_nfsclient_accesscache_get_hit_probe,
   97     dtrace_nfsclient_accesscache_get_miss_probe;
   98 uint32_t nfsclient_accesscache_get_hit_id;
   99 uint32_t nfsclient_accesscache_get_miss_id;
  100 
  101 dtrace_nfsclient_accesscache_load_probe_func_t
  102     dtrace_nfsclient_accesscache_load_done_probe;
  103 uint32_t nfsclient_accesscache_load_done_id;
  104 #endif /* !KDTRACE_HOOKS */
  105 
  106 /* Defs */
  107 #define TRUE    1
  108 #define FALSE   0
  109 
  110 /*
  111  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
  112  * calls are not in getblk() and brelse() so that they would not be necessary
  113  * here.
  114  */
  115 #ifndef B_VMIO
  116 #define vfs_busy_pages(bp, f)
  117 #endif
  118 
  119 static vop_read_t       nfsfifo_read;
  120 static vop_write_t      nfsfifo_write;
  121 static vop_close_t      nfsfifo_close;
  122 static int      nfs_flush(struct vnode *, int, int);
  123 static int      nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *);
  124 static vop_lookup_t     nfs_lookup;
  125 static vop_create_t     nfs_create;
  126 static vop_mknod_t      nfs_mknod;
  127 static vop_open_t       nfs_open;
  128 static vop_close_t      nfs_close;
  129 static vop_access_t     nfs_access;
  130 static vop_getattr_t    nfs_getattr;
  131 static vop_setattr_t    nfs_setattr;
  132 static vop_read_t       nfs_read;
  133 static vop_fsync_t      nfs_fsync;
  134 static vop_remove_t     nfs_remove;
  135 static vop_link_t       nfs_link;
  136 static vop_rename_t     nfs_rename;
  137 static vop_mkdir_t      nfs_mkdir;
  138 static vop_rmdir_t      nfs_rmdir;
  139 static vop_symlink_t    nfs_symlink;
  140 static vop_readdir_t    nfs_readdir;
  141 static vop_strategy_t   nfs_strategy;
  142 static  int     nfs_lookitup(struct vnode *, const char *, int,
  143                     struct ucred *, struct thread *, struct nfsnode **);
  144 static  int     nfs_sillyrename(struct vnode *, struct vnode *,
  145                     struct componentname *);
  146 static vop_access_t     nfsspec_access;
  147 static vop_readlink_t   nfs_readlink;
  148 static vop_print_t      nfs_print;
  149 static vop_advlock_t    nfs_advlock;
  150 static vop_advlockasync_t nfs_advlockasync;
  151 
  152 /*
  153  * Global vfs data structures for nfs
  154  */
  155 struct vop_vector nfs_vnodeops = {
  156         .vop_default =          &default_vnodeops,
  157         .vop_access =           nfs_access,
  158         .vop_advlock =          nfs_advlock,
  159         .vop_advlockasync =     nfs_advlockasync,
  160         .vop_close =            nfs_close,
  161         .vop_create =           nfs_create,
  162         .vop_fsync =            nfs_fsync,
  163         .vop_getattr =          nfs_getattr,
  164         .vop_getpages =         nfs_getpages,
  165         .vop_putpages =         nfs_putpages,
  166         .vop_inactive =         nfs_inactive,
  167         .vop_link =             nfs_link,
  168         .vop_lookup =           nfs_lookup,
  169         .vop_mkdir =            nfs_mkdir,
  170         .vop_mknod =            nfs_mknod,
  171         .vop_open =             nfs_open,
  172         .vop_print =            nfs_print,
  173         .vop_read =             nfs_read,
  174         .vop_readdir =          nfs_readdir,
  175         .vop_readlink =         nfs_readlink,
  176         .vop_reclaim =          nfs_reclaim,
  177         .vop_remove =           nfs_remove,
  178         .vop_rename =           nfs_rename,
  179         .vop_rmdir =            nfs_rmdir,
  180         .vop_setattr =          nfs_setattr,
  181         .vop_strategy =         nfs_strategy,
  182         .vop_symlink =          nfs_symlink,
  183         .vop_write =            nfs_write,
  184 };
  185 
  186 struct vop_vector nfs_fifoops = {
  187         .vop_default =          &fifo_specops,
  188         .vop_access =           nfsspec_access,
  189         .vop_close =            nfsfifo_close,
  190         .vop_fsync =            nfs_fsync,
  191         .vop_getattr =          nfs_getattr,
  192         .vop_inactive =         nfs_inactive,
  193         .vop_print =            nfs_print,
  194         .vop_read =             nfsfifo_read,
  195         .vop_reclaim =          nfs_reclaim,
  196         .vop_setattr =          nfs_setattr,
  197         .vop_write =            nfsfifo_write,
  198 };
  199 
  200 static int      nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
  201                              struct componentname *cnp, struct vattr *vap);
  202 static int      nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
  203                               struct ucred *cred, struct thread *td);
  204 static int      nfs_renamerpc(struct vnode *fdvp, const char *fnameptr,
  205                               int fnamelen, struct vnode *tdvp,
  206                               const char *tnameptr, int tnamelen,
  207                               struct ucred *cred, struct thread *td);
  208 static int      nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
  209                              struct sillyrename *sp);
  210 
  211 /*
  212  * Global variables
  213  */
  214 struct mtx      nfs_iod_mtx;
  215 enum nfsiod_state nfs_iodwant[NFS_MAXASYNCDAEMON];
  216 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
  217 int              nfs_numasync = 0;
  218 vop_advlock_t   *nfs_advlock_p = nfs_dolock;
  219 vop_reclaim_t   *nfs_reclaim_p = NULL;
  220 #define DIRHDSIZ        (sizeof (struct dirent) - (MAXNAMLEN + 1))
  221 
  222 SYSCTL_DECL(_vfs_nfs);
  223 
  224 static int      nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
  225 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
  226            &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
  227 
  228 static int      nfs_prime_access_cache = 0;
  229 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
  230            &nfs_prime_access_cache, 0,
  231            "Prime NFS ACCESS cache when fetching attributes");
  232 
  233 static int      nfsv3_commit_on_close = 0;
  234 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW,
  235            &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
  236 
  237 static int      nfs_clean_pages_on_close = 1;
  238 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
  239            &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
  240 
  241 int nfs_directio_enable = 0;
  242 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
  243            &nfs_directio_enable, 0, "Enable NFS directio");
  244 
  245 /*
  246  * This sysctl allows other processes to mmap a file that has been opened
  247  * O_DIRECT by a process.  In general, having processes mmap the file while
  248  * Direct IO is in progress can lead to Data Inconsistencies.  But, we allow
  249  * this by default to prevent DoS attacks - to prevent a malicious user from
  250  * opening up files O_DIRECT preventing other users from mmap'ing these
  251  * files.  "Protected" environments where stricter consistency guarantees are
  252  * required can disable this knob.  The process that opened the file O_DIRECT
  253  * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
  254  * meaningful.
  255  */
  256 int nfs_directio_allow_mmap = 1;
  257 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
  258            &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
  259 
  260 #if 0
  261 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
  262            &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
  263 
  264 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD,
  265            &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
  266 #endif
  267 
  268 #define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY          \
  269                          | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE     \
  270                          | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
  271 
  272 /*
  273  * SMP Locking Note :
  274  * The list of locks after the description of the lock is the ordering
  275  * of other locks acquired with the lock held.
  276  * np->n_mtx : Protects the fields in the nfsnode.
  277        VM Object Lock
  278        VI_MTX (acquired indirectly)
  279  * nmp->nm_mtx : Protects the fields in the nfsmount.
  280        rep->r_mtx
  281  * nfs_iod_mtx : Global lock, protects shared nfsiod state.
  282  * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
  283        nmp->nm_mtx
  284        rep->r_mtx
  285  * rep->r_mtx : Protects the fields in an nfsreq.
  286  */
  287 
  288 static int
  289 nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td,
  290     struct ucred *cred, uint32_t *retmode)
  291 {
  292         const int v3 = 1;
  293         u_int32_t *tl;
  294         int error = 0, attrflag, i, lrupos;
  295 
  296         struct mbuf *mreq, *mrep, *md, *mb;
  297         caddr_t bpos, dpos;
  298         u_int32_t rmode;
  299         struct nfsnode *np = VTONFS(vp);
  300 
  301         nfsstats.rpccnt[NFSPROC_ACCESS]++;
  302         mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
  303         mb = mreq;
  304         bpos = mtod(mb, caddr_t);
  305         nfsm_fhtom(vp, v3);
  306         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  307         *tl = txdr_unsigned(wmode);
  308         nfsm_request(vp, NFSPROC_ACCESS, td, cred);
  309         nfsm_postop_attr(vp, attrflag);
  310         if (!error) {
  311                 lrupos = 0;
  312                 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
  313                 rmode = fxdr_unsigned(u_int32_t, *tl);
  314                 mtx_lock(&np->n_mtx);
  315                 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
  316                         if (np->n_accesscache[i].uid == cred->cr_uid) {
  317                                 np->n_accesscache[i].mode = rmode;
  318                                 np->n_accesscache[i].stamp = time_second;
  319                                 break;
  320                         }
  321                         if (i > 0 && np->n_accesscache[i].stamp <
  322                             np->n_accesscache[lrupos].stamp)
  323                                 lrupos = i;
  324                 }
  325                 if (i == NFS_ACCESSCACHESIZE) {
  326                         np->n_accesscache[lrupos].uid = cred->cr_uid;
  327                         np->n_accesscache[lrupos].mode = rmode;
  328                         np->n_accesscache[lrupos].stamp = time_second;
  329                 }
  330                 mtx_unlock(&np->n_mtx);
  331                 if (retmode != NULL)
  332                         *retmode = rmode;
  333                 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0);
  334         }
  335         m_freem(mrep);
  336 nfsmout:
  337 #ifdef KDTRACE_HOOKS
  338         if (error) {
  339                 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0,
  340                     error);
  341         }
  342 #endif
  343         return (error);
  344 }
  345 
  346 /*
  347  * nfs access vnode op.
  348  * For nfs version 2, just return ok. File accesses may fail later.
  349  * For nfs version 3, use the access rpc to check accessibility. If file modes
  350  * are changed on the server, accesses might still fail later.
  351  */
  352 static int
  353 nfs_access(struct vop_access_args *ap)
  354 {
  355         struct vnode *vp = ap->a_vp;
  356         int error = 0, i, gotahit;
  357         u_int32_t mode, rmode, wmode;
  358         int v3 = NFS_ISV3(vp);
  359         struct nfsnode *np = VTONFS(vp);
  360 
  361         /*
  362          * Disallow write attempts on filesystems mounted read-only;
  363          * unless the file is a socket, fifo, or a block or character
  364          * device resident on the filesystem.
  365          */
  366         if ((ap->a_accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
  367                 switch (vp->v_type) {
  368                 case VREG:
  369                 case VDIR:
  370                 case VLNK:
  371                         return (EROFS);
  372                 default:
  373                         break;
  374                 }
  375         }
  376         /*
  377          * For nfs v3, check to see if we have done this recently, and if
  378          * so return our cached result instead of making an ACCESS call.
  379          * If not, do an access rpc, otherwise you are stuck emulating
  380          * ufs_access() locally using the vattr. This may not be correct,
  381          * since the server may apply other access criteria such as
  382          * client uid-->server uid mapping that we do not know about.
  383          */
  384         if (v3) {
  385                 if (ap->a_accmode & VREAD)
  386                         mode = NFSV3ACCESS_READ;
  387                 else
  388                         mode = 0;
  389                 if (vp->v_type != VDIR) {
  390                         if (ap->a_accmode & VWRITE)
  391                                 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
  392                         if (ap->a_accmode & VEXEC)
  393                                 mode |= NFSV3ACCESS_EXECUTE;
  394                 } else {
  395                         if (ap->a_accmode & VWRITE)
  396                                 mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
  397                                          NFSV3ACCESS_DELETE);
  398                         if (ap->a_accmode & VEXEC)
  399                                 mode |= NFSV3ACCESS_LOOKUP;
  400                 }
  401                 /* XXX safety belt, only make blanket request if caching */
  402                 if (nfsaccess_cache_timeout > 0) {
  403                         wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
  404                                 NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
  405                                 NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
  406                 } else {
  407                         wmode = mode;
  408                 }
  409 
  410                 /*
  411                  * Does our cached result allow us to give a definite yes to
  412                  * this request?
  413                  */
  414                 gotahit = 0;
  415                 mtx_lock(&np->n_mtx);
  416                 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
  417                         if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) {
  418                                 if (time_second < (np->n_accesscache[i].stamp +
  419                                     nfsaccess_cache_timeout) &&
  420                                     (np->n_accesscache[i].mode & mode) == mode) {
  421                                         nfsstats.accesscache_hits++;
  422                                         gotahit = 1;
  423                                 }
  424                                 break;
  425                         }
  426                 }
  427                 mtx_unlock(&np->n_mtx);
  428 #ifdef KDTRACE_HOOKS
  429                 if (gotahit)
  430                         KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp,
  431                             ap->a_cred->cr_uid, mode);
  432                 else
  433                         KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp,
  434                             ap->a_cred->cr_uid, mode);
  435 #endif
  436                 if (gotahit == 0) {
  437                         /*
  438                          * Either a no, or a don't know.  Go to the wire.
  439                          */
  440                         nfsstats.accesscache_misses++;
  441                         error = nfs3_access_otw(vp, wmode, ap->a_td, ap->a_cred,
  442                             &rmode);
  443                         if (!error) {
  444                                 if ((rmode & mode) != mode)
  445                                         error = EACCES;
  446                         }
  447                 }
  448                 return (error);
  449         } else {
  450                 if ((error = nfsspec_access(ap)) != 0) {
  451                         return (error);
  452                 }
  453                 /*
  454                  * Attempt to prevent a mapped root from accessing a file
  455                  * which it shouldn't.  We try to read a byte from the file
  456                  * if the user is root and the file is not zero length.
  457                  * After calling nfsspec_access, we should have the correct
  458                  * file size cached.
  459                  */
  460                 mtx_lock(&np->n_mtx);
  461                 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD)
  462                     && VTONFS(vp)->n_size > 0) {
  463                         struct iovec aiov;
  464                         struct uio auio;
  465                         char buf[1];
  466 
  467                         mtx_unlock(&np->n_mtx);
  468                         aiov.iov_base = buf;
  469                         aiov.iov_len = 1;
  470                         auio.uio_iov = &aiov;
  471                         auio.uio_iovcnt = 1;
  472                         auio.uio_offset = 0;
  473                         auio.uio_resid = 1;
  474                         auio.uio_segflg = UIO_SYSSPACE;
  475                         auio.uio_rw = UIO_READ;
  476                         auio.uio_td = ap->a_td;
  477 
  478                         if (vp->v_type == VREG)
  479                                 error = nfs_readrpc(vp, &auio, ap->a_cred);
  480                         else if (vp->v_type == VDIR) {
  481                                 char* bp;
  482                                 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
  483                                 aiov.iov_base = bp;
  484                                 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
  485                                 error = nfs_readdirrpc(vp, &auio, ap->a_cred);
  486                                 free(bp, M_TEMP);
  487                         } else if (vp->v_type == VLNK)
  488                                 error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
  489                         else
  490                                 error = EACCES;
  491                 } else
  492                         mtx_unlock(&np->n_mtx);
  493                 return (error);
  494         }
  495 }
  496 
  497 int nfs_otw_getattr_avoid = 0;
  498 
  499 /*
  500  * nfs open vnode op
  501  * Check to see if the type is ok
  502  * and that deletion is not in progress.
  503  * For paged in text files, you will need to flush the page cache
  504  * if consistency is lost.
  505  */
  506 /* ARGSUSED */
  507 static int
  508 nfs_open(struct vop_open_args *ap)
  509 {
  510         struct vnode *vp = ap->a_vp;
  511         struct nfsnode *np = VTONFS(vp);
  512         struct vattr vattr;
  513         int error;
  514         int fmode = ap->a_mode;
  515 
  516         if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
  517                 return (EOPNOTSUPP);
  518 
  519         /*
  520          * Get a valid lease. If cached data is stale, flush it.
  521          */
  522         mtx_lock(&np->n_mtx);
  523         if (np->n_flag & NMODIFIED) {
  524                 mtx_unlock(&np->n_mtx);                 
  525                 error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  526                 if (error == EINTR || error == EIO)
  527                         return (error);
  528                 np->n_attrstamp = 0;
  529                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
  530                 if (vp->v_type == VDIR)
  531                         np->n_direofoffset = 0;
  532                 error = VOP_GETATTR(vp, &vattr, ap->a_cred);
  533                 if (error)
  534                         return (error);
  535                 mtx_lock(&np->n_mtx);
  536                 np->n_mtime = vattr.va_mtime;
  537                 mtx_unlock(&np->n_mtx);
  538         } else {
  539                 struct thread *td = curthread;
  540 
  541                 if (np->n_ac_ts_syscalls != td->td_syscalls ||
  542                     np->n_ac_ts_tid != td->td_tid || 
  543                     td->td_proc == NULL ||
  544                     np->n_ac_ts_pid != td->td_proc->p_pid) {
  545                         np->n_attrstamp = 0;
  546                         KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
  547                 }
  548                 mtx_unlock(&np->n_mtx);                                         
  549                 error = VOP_GETATTR(vp, &vattr, ap->a_cred);
  550                 if (error)
  551                         return (error);
  552                 mtx_lock(&np->n_mtx);
  553                 if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
  554                         if (vp->v_type == VDIR)
  555                                 np->n_direofoffset = 0;
  556                         mtx_unlock(&np->n_mtx);
  557                         error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  558                         if (error == EINTR || error == EIO) {
  559                                 return (error);
  560                         }
  561                         mtx_lock(&np->n_mtx);
  562                         np->n_mtime = vattr.va_mtime;
  563                 }
  564                 mtx_unlock(&np->n_mtx);
  565         }
  566         /*
  567          * If the object has >= 1 O_DIRECT active opens, we disable caching.
  568          */
  569         if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
  570                 if (np->n_directio_opens == 0) {
  571                         error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  572                         if (error)
  573                                 return (error);
  574                         mtx_lock(&np->n_mtx);
  575                         np->n_flag |= NNONCACHE;
  576                         mtx_unlock(&np->n_mtx);
  577                 }
  578                 np->n_directio_opens++;
  579         }
  580         vnode_create_vobject(vp, vattr.va_size, ap->a_td);
  581         return (0);
  582 }
  583 
  584 /*
  585  * nfs close vnode op
  586  * What an NFS client should do upon close after writing is a debatable issue.
  587  * Most NFS clients push delayed writes to the server upon close, basically for
  588  * two reasons:
  589  * 1 - So that any write errors may be reported back to the client process
  590  *     doing the close system call. By far the two most likely errors are
  591  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  592  * 2 - To put a worst case upper bound on cache inconsistency between
  593  *     multiple clients for the file.
  594  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  595  * not being able to tell if other clients are writing a file concurrently,
  596  * since there is no way of knowing if the changed modify time in the reply
  597  * is only due to the write for this client.
  598  * (NFS Version 3 provides weak cache consistency data in the reply that
  599  *  should be sufficient to detect and handle this case.)
  600  *
  601  * The current code does the following:
  602  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  603  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  604  *                     or commit them (this satisfies 1 and 2 except for the
  605  *                     case where the server crashes after this close but
  606  *                     before the commit RPC, which is felt to be "good
  607  *                     enough". Changing the last argument to nfs_flush() to
  608  *                     a 1 would force a commit operation, if it is felt a
  609  *                     commit is necessary now.
  610  */
  611 /* ARGSUSED */
  612 static int
  613 nfs_close(struct vop_close_args *ap)
  614 {
  615         struct vnode *vp = ap->a_vp;
  616         struct nfsnode *np = VTONFS(vp);
  617         int error = 0;
  618         int fmode = ap->a_fflag;
  619 
  620         if (vp->v_type == VREG) {
  621             /*
  622              * Examine and clean dirty pages, regardless of NMODIFIED.
  623              * This closes a major hole in close-to-open consistency.
  624              * We want to push out all dirty pages (and buffers) on
  625              * close, regardless of whether they were dirtied by
  626              * mmap'ed writes or via write().
  627              */
  628             if (nfs_clean_pages_on_close && vp->v_object) {
  629                 VM_OBJECT_LOCK(vp->v_object);
  630                 vm_object_page_clean(vp->v_object, 0, 0, 0);
  631                 VM_OBJECT_UNLOCK(vp->v_object);
  632             }
  633             mtx_lock(&np->n_mtx);
  634             if (np->n_flag & NMODIFIED) {
  635                 mtx_unlock(&np->n_mtx);
  636                 if (NFS_ISV3(vp)) {
  637                     /*
  638                      * Under NFSv3 we have dirty buffers to dispose of.  We
  639                      * must flush them to the NFS server.  We have the option
  640                      * of waiting all the way through the commit rpc or just
  641                      * waiting for the initial write.  The default is to only
  642                      * wait through the initial write so the data is in the
  643                      * server's cache, which is roughly similar to the state
  644                      * a standard disk subsystem leaves the file in on close().
  645                      *
  646                      * We cannot clear the NMODIFIED bit in np->n_flag due to
  647                      * potential races with other processes, and certainly
  648                      * cannot clear it if we don't commit.
  649                      */
  650                     int cm = nfsv3_commit_on_close ? 1 : 0;
  651                     error = nfs_flush(vp, MNT_WAIT, cm);
  652                     /* np->n_flag &= ~NMODIFIED; */
  653                 } else
  654                     error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  655                 mtx_lock(&np->n_mtx);
  656             }
  657             if (np->n_flag & NWRITEERR) {
  658                 np->n_flag &= ~NWRITEERR;
  659                 error = np->n_error;
  660             }
  661             mtx_unlock(&np->n_mtx);
  662         }
  663         if (nfs_directio_enable)
  664                 KASSERT((np->n_directio_asyncwr == 0),
  665                         ("nfs_close: dirty unflushed (%d) directio buffers\n",
  666                          np->n_directio_asyncwr));
  667         if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
  668                 mtx_lock(&np->n_mtx);
  669                 KASSERT((np->n_directio_opens > 0), 
  670                         ("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
  671                 np->n_directio_opens--;
  672                 if (np->n_directio_opens == 0)
  673                         np->n_flag &= ~NNONCACHE;
  674                 mtx_unlock(&np->n_mtx);
  675         }
  676         return (error);
  677 }
  678 
  679 /*
  680  * nfs getattr call from vfs.
  681  */
  682 static int
  683 nfs_getattr(struct vop_getattr_args *ap)
  684 {
  685         struct vnode *vp = ap->a_vp;
  686         struct nfsnode *np = VTONFS(vp);
  687         struct thread *td = curthread;
  688         struct vattr *vap = ap->a_vap;
  689         struct vattr vattr;
  690         caddr_t bpos, dpos;
  691         int error = 0;
  692         struct mbuf *mreq, *mrep, *md, *mb;
  693         int v3 = NFS_ISV3(vp);
  694 
  695         /*
  696          * Update local times for special files.
  697          */
  698         mtx_lock(&np->n_mtx);
  699         if (np->n_flag & (NACC | NUPD))
  700                 np->n_flag |= NCHG;
  701         mtx_unlock(&np->n_mtx);
  702         /*
  703          * First look in the cache.
  704          */
  705         if (nfs_getattrcache(vp, &vattr) == 0)
  706                 goto nfsmout;
  707         if (v3 && nfs_prime_access_cache && nfsaccess_cache_timeout > 0) {
  708                 nfsstats.accesscache_misses++;
  709                 nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, ap->a_cred, NULL);
  710                 if (nfs_getattrcache(vp, &vattr) == 0)
  711                         goto nfsmout;
  712         }
  713         nfsstats.rpccnt[NFSPROC_GETATTR]++;
  714         mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
  715         mb = mreq;
  716         bpos = mtod(mb, caddr_t);
  717         nfsm_fhtom(vp, v3);
  718         nfsm_request(vp, NFSPROC_GETATTR, td, ap->a_cred);
  719         if (!error) {
  720                 nfsm_loadattr(vp, &vattr);
  721         }
  722         m_freem(mrep);
  723 nfsmout:
  724         vap->va_type = vattr.va_type;
  725         vap->va_mode = vattr.va_mode;
  726         vap->va_nlink = vattr.va_nlink;
  727         vap->va_uid = vattr.va_uid;
  728         vap->va_gid = vattr.va_gid;
  729         vap->va_fsid = vattr.va_fsid;
  730         vap->va_fileid = vattr.va_fileid;
  731         vap->va_size = vattr.va_size;
  732         vap->va_blocksize = vattr.va_blocksize;
  733         vap->va_atime = vattr.va_atime;
  734         vap->va_mtime = vattr.va_mtime;
  735         vap->va_ctime = vattr.va_ctime;
  736         vap->va_gen = vattr.va_gen;
  737         vap->va_flags = vattr.va_flags;
  738         vap->va_rdev = vattr.va_rdev;
  739         vap->va_bytes = vattr.va_bytes;
  740         vap->va_filerev = vattr.va_filerev;
  741 
  742         return (error);
  743 }
  744 
  745 /*
  746  * nfs setattr call.
  747  */
  748 static int
  749 nfs_setattr(struct vop_setattr_args *ap)
  750 {
  751         struct vnode *vp = ap->a_vp;
  752         struct nfsnode *np = VTONFS(vp);
  753         struct vattr *vap = ap->a_vap;
  754         struct thread *td = curthread;
  755         int error = 0;
  756         u_quad_t tsize;
  757 
  758 #ifndef nolint
  759         tsize = (u_quad_t)0;
  760 #endif
  761 
  762         /*
  763          * Setting of flags is not supported.
  764          */
  765         if (vap->va_flags != VNOVAL)
  766                 return (EOPNOTSUPP);
  767 
  768         /*
  769          * Disallow write attempts if the filesystem is mounted read-only.
  770          */
  771         if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
  772             vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
  773             vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
  774             (vp->v_mount->mnt_flag & MNT_RDONLY)) {
  775                 error = EROFS;
  776                 goto out;
  777         }
  778         if (vap->va_size != VNOVAL) {
  779                 switch (vp->v_type) {
  780                 case VDIR:
  781                         return (EISDIR);
  782                 case VCHR:
  783                 case VBLK:
  784                 case VSOCK:
  785                 case VFIFO:
  786                         if (vap->va_mtime.tv_sec == VNOVAL &&
  787                             vap->va_atime.tv_sec == VNOVAL &&
  788                             vap->va_mode == (mode_t)VNOVAL &&
  789                             vap->va_uid == (uid_t)VNOVAL &&
  790                             vap->va_gid == (gid_t)VNOVAL)
  791                                 return (0);             
  792                         vap->va_size = VNOVAL;
  793                         break;
  794                 default:
  795                         /*
  796                          * Disallow write attempts if the filesystem is
  797                          * mounted read-only.
  798                          */
  799                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
  800                                 return (EROFS);
  801                         /*
  802                          *  We run vnode_pager_setsize() early (why?),
  803                          * we must set np->n_size now to avoid vinvalbuf
  804                          * V_SAVE races that might setsize a lower
  805                          * value.
  806                          */
  807                         mtx_lock(&np->n_mtx);
  808                         tsize = np->n_size;
  809                         mtx_unlock(&np->n_mtx);
  810                         error = nfs_meta_setsize(vp, ap->a_cred, td,
  811                             vap->va_size);
  812                         mtx_lock(&np->n_mtx);
  813                         if (np->n_flag & NMODIFIED) {
  814                             tsize = np->n_size;
  815                             mtx_unlock(&np->n_mtx);
  816                             if (vap->va_size == 0)
  817                                 error = nfs_vinvalbuf(vp, 0, td, 1);
  818                             else
  819                                 error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
  820                             if (error) {
  821                                 vnode_pager_setsize(vp, tsize);
  822                                 goto out;
  823                             }
  824                         } else
  825                             mtx_unlock(&np->n_mtx);
  826                         /*
  827                          * np->n_size has already been set to vap->va_size
  828                          * in nfs_meta_setsize(). We must set it again since
  829                          * nfs_loadattrcache() could be called through
  830                          * nfs_meta_setsize() and could modify np->n_size.
  831                          */
  832                         mtx_lock(&np->n_mtx);
  833                         np->n_vattr.va_size = np->n_size = vap->va_size;
  834                         mtx_unlock(&np->n_mtx);
  835                 };
  836         } else {
  837                 mtx_lock(&np->n_mtx);
  838                 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 
  839                     (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
  840                         mtx_unlock(&np->n_mtx);
  841                         if ((error = nfs_vinvalbuf(vp, V_SAVE, td, 1)) != 0 &&
  842                             (error == EINTR || error == EIO))
  843                                 return error;
  844                 } else
  845                         mtx_unlock(&np->n_mtx);
  846         }
  847         error = nfs_setattrrpc(vp, vap, ap->a_cred);
  848         if (error && vap->va_size != VNOVAL) {
  849                 mtx_lock(&np->n_mtx);
  850                 np->n_size = np->n_vattr.va_size = tsize;
  851                 vnode_pager_setsize(vp, tsize);
  852                 mtx_unlock(&np->n_mtx);
  853         }
  854 out:
  855         return (error);
  856 }
  857 
  858 /*
  859  * Do an nfs setattr rpc.
  860  */
  861 static int
  862 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred)
  863 {
  864         struct nfsv2_sattr *sp;
  865         struct nfsnode *np = VTONFS(vp);
  866         caddr_t bpos, dpos;
  867         u_int32_t *tl;
  868         int error = 0, i, wccflag = NFSV3_WCCRATTR;
  869         struct mbuf *mreq, *mrep, *md, *mb;
  870         int v3 = NFS_ISV3(vp);
  871 
  872         nfsstats.rpccnt[NFSPROC_SETATTR]++;
  873         mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
  874         mb = mreq;
  875         bpos = mtod(mb, caddr_t);
  876         nfsm_fhtom(vp, v3);
  877         if (v3) {
  878                 nfsm_v3attrbuild(vap, TRUE);
  879                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  880                 *tl = nfs_false;
  881         } else {
  882                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
  883                 if (vap->va_mode == (mode_t)VNOVAL)
  884                         sp->sa_mode = nfs_xdrneg1;
  885                 else
  886                         sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
  887                 if (vap->va_uid == (uid_t)VNOVAL)
  888                         sp->sa_uid = nfs_xdrneg1;
  889                 else
  890                         sp->sa_uid = txdr_unsigned(vap->va_uid);
  891                 if (vap->va_gid == (gid_t)VNOVAL)
  892                         sp->sa_gid = nfs_xdrneg1;
  893                 else
  894                         sp->sa_gid = txdr_unsigned(vap->va_gid);
  895                 sp->sa_size = txdr_unsigned(vap->va_size);
  896                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
  897                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
  898         }
  899         nfsm_request(vp, NFSPROC_SETATTR, curthread, cred);
  900         if (v3) {
  901                 mtx_lock(&np->n_mtx);
  902                 for (i = 0; i < NFS_ACCESSCACHESIZE; i++)
  903                         np->n_accesscache[i].stamp = 0;
  904                 mtx_unlock(&np->n_mtx);
  905                 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp);
  906                 nfsm_wcc_data(vp, wccflag);
  907         } else
  908                 nfsm_loadattr(vp, NULL);
  909         m_freem(mrep);
  910 nfsmout:
  911         return (error);
  912 }
  913 
  914 /*
  915  * nfs lookup call, one step at a time...
  916  * First look in cache
  917  * If not found, unlock the directory nfsnode and do the rpc
  918  */
  919 static int
  920 nfs_lookup(struct vop_lookup_args *ap)
  921 {
  922         struct componentname *cnp = ap->a_cnp;
  923         struct vnode *dvp = ap->a_dvp;
  924         struct vnode **vpp = ap->a_vpp;
  925         struct mount *mp = dvp->v_mount;
  926         struct vattr vattr;
  927         time_t dmtime;
  928         int flags = cnp->cn_flags;
  929         struct vnode *newvp;
  930         struct nfsmount *nmp;
  931         caddr_t bpos, dpos;
  932         struct mbuf *mreq, *mrep, *md, *mb;
  933         long len;
  934         nfsfh_t *fhp;
  935         struct nfsnode *np, *newnp;
  936         int error = 0, attrflag, fhsize, ltype;
  937         int v3 = NFS_ISV3(dvp);
  938         struct thread *td = cnp->cn_thread;
  939 
  940         *vpp = NULLVP;
  941         if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
  942             (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
  943                 return (EROFS);
  944         if (dvp->v_type != VDIR)
  945                 return (ENOTDIR);
  946         nmp = VFSTONFS(mp);
  947         np = VTONFS(dvp);
  948         if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) {
  949                 *vpp = NULLVP;
  950                 return (error);
  951         }
  952         error = cache_lookup(dvp, vpp, cnp);
  953         if (error > 0 && error != ENOENT)
  954                 return (error);
  955         if (error == -1) {
  956                 /*
  957                  * We only accept a positive hit in the cache if the
  958                  * change time of the file matches our cached copy.
  959                  * Otherwise, we discard the cache entry and fallback
  960                  * to doing a lookup RPC.
  961                  *
  962                  * To better handle stale file handles and attributes,
  963                  * clear the attribute cache of this node if it is a
  964                  * leaf component, part of an open() call, and not
  965                  * locally modified before fetching the attributes.
  966                  * This should allow stale file handles to be detected
  967                  * here where we can fall back to a LOOKUP RPC to
  968                  * recover rather than having nfs_open() detect the
  969                  * stale file handle and failing open(2) with ESTALE.
  970                  */
  971                 newvp = *vpp;
  972                 newnp = VTONFS(newvp);
  973                 if ((cnp->cn_flags & (ISLASTCN | ISOPEN)) ==
  974                     (ISLASTCN | ISOPEN) && !(newnp->n_flag & NMODIFIED)) {
  975                         mtx_lock(&newnp->n_mtx);
  976                         newnp->n_attrstamp = 0;
  977                         KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
  978                         mtx_unlock(&newnp->n_mtx);
  979                 }
  980                 if (VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
  981                     vattr.va_ctime.tv_sec == newnp->n_ctime) {
  982                         nfsstats.lookupcache_hits++;
  983                         if (cnp->cn_nameiop != LOOKUP &&
  984                             (flags & ISLASTCN))
  985                                 cnp->cn_flags |= SAVENAME;
  986                         return (0);
  987                 }
  988                 cache_purge(newvp);
  989                 if (dvp != newvp)
  990                         vput(newvp);
  991                 else 
  992                         vrele(newvp);
  993                 *vpp = NULLVP;
  994         } else if (error == ENOENT) {
  995                 if (dvp->v_iflag & VI_DOOMED)
  996                         return (ENOENT);
  997                 /*
  998                  * We only accept a negative hit in the cache if the
  999                  * modification time of the parent directory matches
 1000                  * our cached copy.  Otherwise, we discard all of the
 1001                  * negative cache entries for this directory. We also
 1002                  * only trust -ve cache entries for less than
 1003                  * nm_negative_namecache_timeout seconds.
 1004                  */
 1005                 if ((u_int)(ticks - np->n_dmtime_ticks) <
 1006                     (nmp->nm_negnametimeo * hz) &&
 1007                     VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
 1008                     vattr.va_mtime.tv_sec == np->n_dmtime) {
 1009                         nfsstats.lookupcache_hits++;
 1010                         return (ENOENT);
 1011                 }
 1012                 cache_purge_negative(dvp);
 1013                 mtx_lock(&np->n_mtx);
 1014                 np->n_dmtime = 0;
 1015                 mtx_unlock(&np->n_mtx);
 1016         }
 1017 
 1018         /*
 1019          * Cache the modification time of the parent directory in case
 1020          * the lookup fails and results in adding the first negative
 1021          * name cache entry for the directory.  Since this is reading
 1022          * a single time_t, don't bother with locking.  The
 1023          * modification time may be a bit stale, but it must be read
 1024          * before performing the lookup RPC to prevent a race where
 1025          * another lookup updates the timestamp on the directory after
 1026          * the lookup RPC has been performed on the server but before
 1027          * n_dmtime is set at the end of this function.
 1028          */
 1029         dmtime = np->n_vattr.va_mtime.tv_sec;
 1030         error = 0;
 1031         newvp = NULLVP;
 1032         nfsstats.lookupcache_misses++;
 1033         nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 1034         len = cnp->cn_namelen;
 1035         mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 1036                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 1037         mb = mreq;
 1038         bpos = mtod(mb, caddr_t);
 1039         nfsm_fhtom(dvp, v3);
 1040         nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 1041         nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred);
 1042         if (error) {
 1043                 if (v3) {
 1044                         nfsm_postop_attr(dvp, attrflag);
 1045                         m_freem(mrep);
 1046                 }
 1047                 goto nfsmout;
 1048         }
 1049         nfsm_getfh(fhp, fhsize, v3);
 1050 
 1051         /*
 1052          * Handle RENAME case...
 1053          */
 1054         if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
 1055                 if (NFS_CMPFH(np, fhp, fhsize)) {
 1056                         m_freem(mrep);
 1057                         return (EISDIR);
 1058                 }
 1059                 error = nfs_nget(mp, fhp, fhsize, &np, LK_EXCLUSIVE);
 1060                 if (error) {
 1061                         m_freem(mrep);
 1062                         return (error);
 1063                 }
 1064                 newvp = NFSTOV(np);
 1065                 if (v3) {
 1066                         nfsm_postop_attr(newvp, attrflag);
 1067                         nfsm_postop_attr(dvp, attrflag);
 1068                 } else
 1069                         nfsm_loadattr(newvp, NULL);
 1070                 *vpp = newvp;
 1071                 m_freem(mrep);
 1072                 cnp->cn_flags |= SAVENAME;
 1073                 return (0);
 1074         }
 1075 
 1076         if (flags & ISDOTDOT) {
 1077                 ltype = VOP_ISLOCKED(dvp);
 1078                 error = vfs_busy(mp, MBF_NOWAIT);
 1079                 if (error != 0) {
 1080                         vfs_ref(mp);
 1081                         VOP_UNLOCK(dvp, 0);
 1082                         error = vfs_busy(mp, 0);
 1083                         vn_lock(dvp, ltype | LK_RETRY);
 1084                         vfs_rel(mp);
 1085                         if (error == 0 && (dvp->v_iflag & VI_DOOMED)) {
 1086                                 vfs_unbusy(mp);
 1087                                 error = ENOENT;
 1088                         }
 1089                         if (error != 0) {
 1090                                 m_freem(mrep);
 1091                                 return (error);
 1092                         }
 1093                 }
 1094                 VOP_UNLOCK(dvp, 0);
 1095                 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags);
 1096                 if (error == 0)
 1097                         newvp = NFSTOV(np);
 1098                 vfs_unbusy(mp);
 1099                 if (newvp != dvp)
 1100                         vn_lock(dvp, ltype | LK_RETRY);
 1101                 if (dvp->v_iflag & VI_DOOMED) {
 1102                         if (error == 0) {
 1103                                 if (newvp == dvp)
 1104                                         vrele(newvp);
 1105                                 else
 1106                                         vput(newvp);
 1107                         }
 1108                         error = ENOENT;
 1109                 }
 1110                 if (error) {
 1111                         m_freem(mrep);
 1112                         return (error);
 1113                 }
 1114         } else if (NFS_CMPFH(np, fhp, fhsize)) {
 1115                 VREF(dvp);
 1116                 newvp = dvp;
 1117         } else {
 1118                 error = nfs_nget(mp, fhp, fhsize, &np, cnp->cn_lkflags);
 1119                 if (error) {
 1120                         m_freem(mrep);
 1121                         return (error);
 1122                 }
 1123                 newvp = NFSTOV(np);
 1124         }
 1125         if (v3) {
 1126                 nfsm_postop_attr(newvp, attrflag);
 1127                 nfsm_postop_attr(dvp, attrflag);
 1128         } else
 1129                 nfsm_loadattr(newvp, NULL);
 1130         if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 1131                 cnp->cn_flags |= SAVENAME;
 1132         if ((cnp->cn_flags & MAKEENTRY) &&
 1133             (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
 1134                 np->n_ctime = np->n_vattr.va_ctime.tv_sec;
 1135                 cache_enter(dvp, newvp, cnp);
 1136         }
 1137         *vpp = newvp;
 1138         m_freem(mrep);
 1139 nfsmout:
 1140         if (error) {
 1141                 if (newvp != NULLVP) {
 1142                         vput(newvp);
 1143                         *vpp = NULLVP;
 1144                 }
 1145 
 1146                 if (error != ENOENT)
 1147                         goto done;
 1148 
 1149                 /* The requested file was not found. */
 1150                 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 1151                     (flags & ISLASTCN)) {
 1152                         /*
 1153                          * XXX: UFS does a full VOP_ACCESS(dvp,
 1154                          * VWRITE) here instead of just checking
 1155                          * MNT_RDONLY.
 1156                          */
 1157                         if (mp->mnt_flag & MNT_RDONLY)
 1158                                 return (EROFS);
 1159                         cnp->cn_flags |= SAVENAME;
 1160                         return (EJUSTRETURN);
 1161                 }
 1162 
 1163                 if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) {
 1164                         /*
 1165                          * Maintain n_dmtime as the modification time
 1166                          * of the parent directory when the oldest -ve
 1167                          * name cache entry for this directory was
 1168                          * added.  If a -ve cache entry has already
 1169                          * been added with a newer modification time
 1170                          * by a concurrent lookup, then don't bother
 1171                          * adding a cache entry.  The modification
 1172                          * time of the directory might have changed
 1173                          * due to the file this lookup failed to find
 1174                          * being created.  In that case a subsequent
 1175                          * lookup would incorrectly use the entry
 1176                          * added here instead of doing an extra
 1177                          * lookup.
 1178                          */
 1179                         mtx_lock(&np->n_mtx);
 1180                         if (np->n_dmtime <= dmtime) {
 1181                                 if (np->n_dmtime == 0) {
 1182                                         np->n_dmtime = dmtime;
 1183                                         np->n_dmtime_ticks = ticks;
 1184                                 }
 1185                                 mtx_unlock(&np->n_mtx);
 1186                                 cache_enter(dvp, NULL, cnp);
 1187                         } else
 1188                                 mtx_unlock(&np->n_mtx);
 1189                 }
 1190                 return (ENOENT);
 1191         }
 1192 done:
 1193         return (error);
 1194 }
 1195 
 1196 /*
 1197  * nfs read call.
 1198  * Just call nfs_bioread() to do the work.
 1199  */
 1200 static int
 1201 nfs_read(struct vop_read_args *ap)
 1202 {
 1203         struct vnode *vp = ap->a_vp;
 1204 
 1205         switch (vp->v_type) {
 1206         case VREG:
 1207                 return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
 1208         case VDIR:
 1209                 return (EISDIR);
 1210         default:
 1211                 return (EOPNOTSUPP);
 1212         }
 1213 }
 1214 
 1215 /*
 1216  * nfs readlink call
 1217  */
 1218 static int
 1219 nfs_readlink(struct vop_readlink_args *ap)
 1220 {
 1221         struct vnode *vp = ap->a_vp;
 1222 
 1223         if (vp->v_type != VLNK)
 1224                 return (EINVAL);
 1225         return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
 1226 }
 1227 
 1228 /*
 1229  * Do a readlink rpc.
 1230  * Called by nfs_doio() from below the buffer cache.
 1231  */
 1232 int
 1233 nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 1234 {
 1235         caddr_t bpos, dpos;
 1236         int error = 0, len, attrflag;
 1237         struct mbuf *mreq, *mrep, *md, *mb;
 1238         int v3 = NFS_ISV3(vp);
 1239 
 1240         nfsstats.rpccnt[NFSPROC_READLINK]++;
 1241         mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
 1242         mb = mreq;
 1243         bpos = mtod(mb, caddr_t);
 1244         nfsm_fhtom(vp, v3);
 1245         nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred);
 1246         if (v3)
 1247                 nfsm_postop_attr(vp, attrflag);
 1248         if (!error) {
 1249                 nfsm_strsiz(len, NFS_MAXPATHLEN);
 1250                 if (len == NFS_MAXPATHLEN) {
 1251                         struct nfsnode *np = VTONFS(vp);
 1252                         mtx_lock(&np->n_mtx);
 1253                         if (np->n_size && np->n_size < NFS_MAXPATHLEN)
 1254                                 len = np->n_size;
 1255                         mtx_unlock(&np->n_mtx);
 1256                 }
 1257                 nfsm_mtouio(uiop, len);
 1258         }
 1259         m_freem(mrep);
 1260 nfsmout:
 1261         return (error);
 1262 }
 1263 
 1264 /*
 1265  * nfs read rpc call
 1266  * Ditto above
 1267  */
 1268 int
 1269 nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 1270 {
 1271         u_int32_t *tl;
 1272         caddr_t bpos, dpos;
 1273         struct mbuf *mreq, *mrep, *md, *mb;
 1274         struct nfsmount *nmp;
 1275         int error = 0, len, retlen, tsiz, eof, attrflag;
 1276         int v3 = NFS_ISV3(vp);
 1277         int rsize;
 1278 
 1279 #ifndef nolint
 1280         eof = 0;
 1281 #endif
 1282         nmp = VFSTONFS(vp->v_mount);
 1283         tsiz = uiop->uio_resid;
 1284         mtx_lock(&nmp->nm_mtx);
 1285         if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
 1286                 mtx_unlock(&nmp->nm_mtx);
 1287                 return (EFBIG);
 1288         }
 1289         rsize = nmp->nm_rsize;
 1290         mtx_unlock(&nmp->nm_mtx);
 1291         while (tsiz > 0) {
 1292                 nfsstats.rpccnt[NFSPROC_READ]++;
 1293                 len = (tsiz > rsize) ? rsize : tsiz;
 1294                 mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
 1295                 mb = mreq;
 1296                 bpos = mtod(mb, caddr_t);
 1297                 nfsm_fhtom(vp, v3);
 1298                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3);
 1299                 if (v3) {
 1300                         txdr_hyper(uiop->uio_offset, tl);
 1301                         *(tl + 2) = txdr_unsigned(len);
 1302                 } else {
 1303                         *tl++ = txdr_unsigned(uiop->uio_offset);
 1304                         *tl++ = txdr_unsigned(len);
 1305                         *tl = 0;
 1306                 }
 1307                 nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred);
 1308                 if (v3) {
 1309                         nfsm_postop_attr(vp, attrflag);
 1310                         if (error) {
 1311                                 m_freem(mrep);
 1312                                 goto nfsmout;
 1313                         }
 1314                         tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
 1315                         eof = fxdr_unsigned(int, *(tl + 1));
 1316                 } else {
 1317                         nfsm_loadattr(vp, NULL);
 1318                 }
 1319                 nfsm_strsiz(retlen, rsize);
 1320                 nfsm_mtouio(uiop, retlen);
 1321                 m_freem(mrep);
 1322                 tsiz -= retlen;
 1323                 if (v3) {
 1324                         if (eof || retlen == 0) {
 1325                                 tsiz = 0;
 1326                         }
 1327                 } else if (retlen < len) {
 1328                         tsiz = 0;
 1329                 }
 1330         }
 1331 nfsmout:
 1332         return (error);
 1333 }
 1334 
 1335 /*
 1336  * nfs write call
 1337  */
 1338 int
 1339 nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
 1340              int *iomode, int *must_commit)
 1341 {
 1342         u_int32_t *tl;
 1343         int32_t backup;
 1344         caddr_t bpos, dpos;
 1345         struct mbuf *mreq, *mrep, *md, *mb;
 1346         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 1347         int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
 1348         int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
 1349         int wsize;
 1350         
 1351 #ifndef DIAGNOSTIC
 1352         if (uiop->uio_iovcnt != 1)
 1353                 panic("nfs: writerpc iovcnt > 1");
 1354 #endif
 1355         *must_commit = 0;
 1356         tsiz = uiop->uio_resid;
 1357         mtx_lock(&nmp->nm_mtx);
 1358         if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) {
 1359                 mtx_unlock(&nmp->nm_mtx);               
 1360                 return (EFBIG);
 1361         }
 1362         wsize = nmp->nm_wsize;
 1363         mtx_unlock(&nmp->nm_mtx);
 1364         while (tsiz > 0) {
 1365                 nfsstats.rpccnt[NFSPROC_WRITE]++;
 1366                 len = (tsiz > wsize) ? wsize : tsiz;
 1367                 mreq = nfsm_reqhead(vp, NFSPROC_WRITE,
 1368                         NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
 1369                 mb = mreq;
 1370                 bpos = mtod(mb, caddr_t);
 1371                 nfsm_fhtom(vp, v3);
 1372                 if (v3) {
 1373                         tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
 1374                         txdr_hyper(uiop->uio_offset, tl);
 1375                         tl += 2;
 1376                         *tl++ = txdr_unsigned(len);
 1377                         *tl++ = txdr_unsigned(*iomode);
 1378                         *tl = txdr_unsigned(len);
 1379                 } else {
 1380                         u_int32_t x;
 1381 
 1382                         tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 1383                         /* Set both "begin" and "current" to non-garbage. */
 1384                         x = txdr_unsigned((u_int32_t)uiop->uio_offset);
 1385                         *tl++ = x;      /* "begin offset" */
 1386                         *tl++ = x;      /* "current offset" */
 1387                         x = txdr_unsigned(len);
 1388                         *tl++ = x;      /* total to this offset */
 1389                         *tl = x;        /* size of this write */
 1390                 }
 1391                 nfsm_uiotom(uiop, len);
 1392                 nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred);
 1393                 if (v3) {
 1394                         wccflag = NFSV3_WCCCHK;
 1395                         nfsm_wcc_data(vp, wccflag);
 1396                         if (!error) {
 1397                                 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED
 1398                                         + NFSX_V3WRITEVERF);
 1399                                 rlen = fxdr_unsigned(int, *tl++);
 1400                                 if (rlen == 0) {
 1401                                         error = NFSERR_IO;
 1402                                         m_freem(mrep);
 1403                                         break;
 1404                                 } else if (rlen < len) {
 1405                                         backup = len - rlen;
 1406                                         uiop->uio_iov->iov_base =
 1407                                             (char *)uiop->uio_iov->iov_base -
 1408                                             backup;
 1409                                         uiop->uio_iov->iov_len += backup;
 1410                                         uiop->uio_offset -= backup;
 1411                                         uiop->uio_resid += backup;
 1412                                         len = rlen;
 1413                                 }
 1414                                 commit = fxdr_unsigned(int, *tl++);
 1415 
 1416                                 /*
 1417                                  * Return the lowest committment level
 1418                                  * obtained by any of the RPCs.
 1419                                  */
 1420                                 if (committed == NFSV3WRITE_FILESYNC)
 1421                                         committed = commit;
 1422                                 else if (committed == NFSV3WRITE_DATASYNC &&
 1423                                         commit == NFSV3WRITE_UNSTABLE)
 1424                                         committed = commit;
 1425                                 mtx_lock(&nmp->nm_mtx);
 1426                                 if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
 1427                                     bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 1428                                         NFSX_V3WRITEVERF);
 1429                                     nmp->nm_state |= NFSSTA_HASWRITEVERF;
 1430                                 } else if (bcmp((caddr_t)tl,
 1431                                     (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
 1432                                     *must_commit = 1;
 1433                                     bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 1434                                         NFSX_V3WRITEVERF);
 1435                                 }
 1436                                 mtx_unlock(&nmp->nm_mtx);
 1437                         }
 1438                 } else {
 1439                         nfsm_loadattr(vp, NULL);
 1440                 }
 1441                 if (wccflag) {
 1442                         mtx_lock(&(VTONFS(vp))->n_mtx);
 1443                         VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime;
 1444                         mtx_unlock(&(VTONFS(vp))->n_mtx);
 1445                 }
 1446                 m_freem(mrep);
 1447                 if (error)
 1448                         break;
 1449                 tsiz -= len;
 1450         }
 1451 nfsmout:
 1452         if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC)
 1453                 committed = NFSV3WRITE_FILESYNC;
 1454         *iomode = committed;
 1455         if (error)
 1456                 uiop->uio_resid = tsiz;
 1457         return (error);
 1458 }
 1459 
 1460 /*
 1461  * nfs mknod rpc
 1462  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
 1463  * mode set to specify the file type and the size field for rdev.
 1464  */
 1465 static int
 1466 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 1467     struct vattr *vap)
 1468 {
 1469         struct nfsv2_sattr *sp;
 1470         u_int32_t *tl;
 1471         struct vnode *newvp = NULL;
 1472         struct nfsnode *np = NULL;
 1473         struct vattr vattr;
 1474         caddr_t bpos, dpos;
 1475         int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
 1476         struct mbuf *mreq, *mrep, *md, *mb;
 1477         u_int32_t rdev;
 1478         int v3 = NFS_ISV3(dvp);
 1479 
 1480         if (vap->va_type == VCHR || vap->va_type == VBLK)
 1481                 rdev = txdr_unsigned(vap->va_rdev);
 1482         else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 1483                 rdev = nfs_xdrneg1;
 1484         else {
 1485                 return (EOPNOTSUPP);
 1486         }
 1487         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
 1488                 return (error);
 1489         nfsstats.rpccnt[NFSPROC_MKNOD]++;
 1490         mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
 1491                 + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 1492         mb = mreq;
 1493         bpos = mtod(mb, caddr_t);
 1494         nfsm_fhtom(dvp, v3);
 1495         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 1496         if (v3) {
 1497                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
 1498                 *tl++ = vtonfsv3_type(vap->va_type);
 1499                 nfsm_v3attrbuild(vap, FALSE);
 1500                 if (vap->va_type == VCHR || vap->va_type == VBLK) {
 1501                         tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 1502                         *tl++ = txdr_unsigned(major(vap->va_rdev));
 1503                         *tl = txdr_unsigned(minor(vap->va_rdev));
 1504                 }
 1505         } else {
 1506                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
 1507                 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 1508                 sp->sa_uid = nfs_xdrneg1;
 1509                 sp->sa_gid = nfs_xdrneg1;
 1510                 sp->sa_size = rdev;
 1511                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 1512                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 1513         }
 1514         nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred);
 1515         if (!error) {
 1516                 nfsm_mtofh(dvp, newvp, v3, gotvp);
 1517                 if (!gotvp) {
 1518                         if (newvp) {
 1519                                 vput(newvp);
 1520                                 newvp = NULL;
 1521                         }
 1522                         error = nfs_lookitup(dvp, cnp->cn_nameptr,
 1523                             cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
 1524                         if (!error)
 1525                                 newvp = NFSTOV(np);
 1526                 }
 1527         }
 1528         if (v3)
 1529                 nfsm_wcc_data(dvp, wccflag);
 1530         m_freem(mrep);
 1531 nfsmout:
 1532         if (error) {
 1533                 if (newvp)
 1534                         vput(newvp);
 1535         } else {
 1536                 if (cnp->cn_flags & MAKEENTRY)
 1537                         cache_enter(dvp, newvp, cnp);
 1538                 *vpp = newvp;
 1539         }
 1540         mtx_lock(&(VTONFS(dvp))->n_mtx);
 1541         VTONFS(dvp)->n_flag |= NMODIFIED;
 1542         if (!wccflag) {
 1543                 VTONFS(dvp)->n_attrstamp = 0;
 1544                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 1545         }
 1546         mtx_unlock(&(VTONFS(dvp))->n_mtx);
 1547         return (error);
 1548 }
 1549 
 1550 /*
 1551  * nfs mknod vop
 1552  * just call nfs_mknodrpc() to do the work.
 1553  */
 1554 /* ARGSUSED */
 1555 static int
 1556 nfs_mknod(struct vop_mknod_args *ap)
 1557 {
 1558         return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
 1559 }
 1560 
 1561 static u_long create_verf;
 1562 /*
 1563  * nfs file create call
 1564  */
 1565 static int
 1566 nfs_create(struct vop_create_args *ap)
 1567 {
 1568         struct vnode *dvp = ap->a_dvp;
 1569         struct vattr *vap = ap->a_vap;
 1570         struct componentname *cnp = ap->a_cnp;
 1571         struct nfsv2_sattr *sp;
 1572         u_int32_t *tl;
 1573         struct nfsnode *np = NULL;
 1574         struct vnode *newvp = NULL;
 1575         caddr_t bpos, dpos;
 1576         int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
 1577         struct mbuf *mreq, *mrep, *md, *mb;
 1578         struct vattr vattr;
 1579         int v3 = NFS_ISV3(dvp);
 1580 
 1581         /*
 1582          * Oops, not for me..
 1583          */
 1584         if (vap->va_type == VSOCK) {
 1585                 error = nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap);
 1586                 return (error);
 1587         }
 1588 
 1589         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) {
 1590                 return (error);
 1591         }
 1592         if (vap->va_vaflags & VA_EXCLUSIVE)
 1593                 fmode |= O_EXCL;
 1594 again:
 1595         nfsstats.rpccnt[NFSPROC_CREATE]++;
 1596         mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
 1597                 nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 1598         mb = mreq;
 1599         bpos = mtod(mb, caddr_t);
 1600         nfsm_fhtom(dvp, v3);
 1601         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 1602         if (v3) {
 1603                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
 1604                 if (fmode & O_EXCL) {
 1605                         *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
 1606                         tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF);
 1607 #ifdef INET
 1608                         CURVNET_SET(CRED_TO_VNET(cnp->cn_cred));
 1609                         IN_IFADDR_RLOCK();
 1610                         if (!TAILQ_EMPTY(&V_in_ifaddrhead))
 1611                                 *tl++ = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr.s_addr;
 1612                         else
 1613 #endif
 1614                                 *tl++ = create_verf;
 1615 #ifdef INET
 1616                         IN_IFADDR_RUNLOCK();
 1617                         CURVNET_RESTORE();
 1618 #endif
 1619                         *tl = ++create_verf;
 1620                 } else {
 1621                         *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
 1622                         nfsm_v3attrbuild(vap, FALSE);
 1623                 }
 1624         } else {
 1625                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
 1626                 sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 1627                 sp->sa_uid = nfs_xdrneg1;
 1628                 sp->sa_gid = nfs_xdrneg1;
 1629                 sp->sa_size = 0;
 1630                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 1631                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 1632         }
 1633         nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred);
 1634         if (!error) {
 1635                 nfsm_mtofh(dvp, newvp, v3, gotvp);
 1636                 if (!gotvp) {
 1637                         if (newvp) {
 1638                                 vput(newvp);
 1639                                 newvp = NULL;
 1640                         }
 1641                         error = nfs_lookitup(dvp, cnp->cn_nameptr,
 1642                             cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np);
 1643                         if (!error)
 1644                                 newvp = NFSTOV(np);
 1645                 }
 1646         }
 1647         if (v3)
 1648                 nfsm_wcc_data(dvp, wccflag);
 1649         m_freem(mrep);
 1650 nfsmout:
 1651         if (error) {
 1652                 if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
 1653                         fmode &= ~O_EXCL;
 1654                         goto again;
 1655                 }
 1656                 if (newvp)
 1657                         vput(newvp);
 1658         } else if (v3 && (fmode & O_EXCL)) {
 1659                 /*
 1660                  * We are normally called with only a partially initialized
 1661                  * VAP.  Since the NFSv3 spec says that server may use the
 1662                  * file attributes to store the verifier, the spec requires
 1663                  * us to do a SETATTR RPC. FreeBSD servers store the verifier
 1664                  * in atime, but we can't really assume that all servers will
 1665                  * so we ensure that our SETATTR sets both atime and mtime.
 1666                  */
 1667                 if (vap->va_mtime.tv_sec == VNOVAL)
 1668                         vfs_timestamp(&vap->va_mtime);
 1669                 if (vap->va_atime.tv_sec == VNOVAL)
 1670                         vap->va_atime = vap->va_mtime;
 1671                 error = nfs_setattrrpc(newvp, vap, cnp->cn_cred);
 1672                 if (error)
 1673                         vput(newvp);
 1674         }
 1675         if (!error) {
 1676                 if (cnp->cn_flags & MAKEENTRY)
 1677                         cache_enter(dvp, newvp, cnp);
 1678                 *ap->a_vpp = newvp;
 1679         }
 1680         mtx_lock(&(VTONFS(dvp))->n_mtx);
 1681         VTONFS(dvp)->n_flag |= NMODIFIED;
 1682         if (!wccflag) {
 1683                 VTONFS(dvp)->n_attrstamp = 0;
 1684                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 1685         }
 1686         mtx_unlock(&(VTONFS(dvp))->n_mtx);
 1687         return (error);
 1688 }
 1689 
 1690 /*
 1691  * nfs file remove call
 1692  * To try and make nfs semantics closer to ufs semantics, a file that has
 1693  * other processes using the vnode is renamed instead of removed and then
 1694  * removed later on the last close.
 1695  * - If v_usecount > 1
 1696  *        If a rename is not already in the works
 1697  *           call nfs_sillyrename() to set it up
 1698  *     else
 1699  *        do the remove rpc
 1700  */
 1701 static int
 1702 nfs_remove(struct vop_remove_args *ap)
 1703 {
 1704         struct vnode *vp = ap->a_vp;
 1705         struct vnode *dvp = ap->a_dvp;
 1706         struct componentname *cnp = ap->a_cnp;
 1707         struct nfsnode *np = VTONFS(vp);
 1708         int error = 0;
 1709         struct vattr vattr;
 1710 
 1711 #ifndef DIAGNOSTIC
 1712         if ((cnp->cn_flags & HASBUF) == 0)
 1713                 panic("nfs_remove: no name");
 1714         if (vrefcnt(vp) < 1)
 1715                 panic("nfs_remove: bad v_usecount");
 1716 #endif
 1717         if (vp->v_type == VDIR)
 1718                 error = EPERM;
 1719         else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
 1720             !VOP_GETATTR(vp, &vattr, cnp->cn_cred) && vattr.va_nlink > 1)) {
 1721                 /*
 1722                  * Purge the name cache so that the chance of a lookup for
 1723                  * the name succeeding while the remove is in progress is
 1724                  * minimized. Without node locking it can still happen, such
 1725                  * that an I/O op returns ESTALE, but since you get this if
 1726                  * another host removes the file..
 1727                  */
 1728                 cache_purge(vp);
 1729                 /*
 1730                  * throw away biocache buffers, mainly to avoid
 1731                  * unnecessary delayed writes later.
 1732                  */
 1733                 error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1);
 1734                 /* Do the rpc */
 1735                 if (error != EINTR && error != EIO)
 1736                         error = nfs_removerpc(dvp, cnp->cn_nameptr,
 1737                                 cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
 1738                 /*
 1739                  * Kludge City: If the first reply to the remove rpc is lost..
 1740                  *   the reply to the retransmitted request will be ENOENT
 1741                  *   since the file was in fact removed
 1742                  *   Therefore, we cheat and return success.
 1743                  */
 1744                 if (error == ENOENT)
 1745                         error = 0;
 1746         } else if (!np->n_sillyrename)
 1747                 error = nfs_sillyrename(dvp, vp, cnp);
 1748         np->n_attrstamp = 0;
 1749         KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 1750         return (error);
 1751 }
 1752 
 1753 /*
 1754  * nfs file remove rpc called from nfs_inactive
 1755  */
 1756 int
 1757 nfs_removeit(struct sillyrename *sp)
 1758 {
 1759         /*
 1760          * Make sure that the directory vnode is still valid.
 1761          * XXX we should lock sp->s_dvp here.
 1762          */
 1763         if (sp->s_dvp->v_type == VBAD)
 1764                 return (0);
 1765         return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 1766                 NULL));
 1767 }
 1768 
 1769 /*
 1770  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
 1771  */
 1772 static int
 1773 nfs_removerpc(struct vnode *dvp, const char *name, int namelen,
 1774     struct ucred *cred, struct thread *td)
 1775 {
 1776         caddr_t bpos, dpos;
 1777         int error = 0, wccflag = NFSV3_WCCRATTR;
 1778         struct mbuf *mreq, *mrep, *md, *mb;
 1779         int v3 = NFS_ISV3(dvp);
 1780 
 1781         nfsstats.rpccnt[NFSPROC_REMOVE]++;
 1782         mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE,
 1783                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
 1784         mb = mreq;
 1785         bpos = mtod(mb, caddr_t);
 1786         nfsm_fhtom(dvp, v3);
 1787         nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
 1788         nfsm_request(dvp, NFSPROC_REMOVE, td, cred);
 1789         if (v3)
 1790                 nfsm_wcc_data(dvp, wccflag);
 1791         m_freem(mrep);
 1792 nfsmout:
 1793         mtx_lock(&(VTONFS(dvp))->n_mtx);
 1794         VTONFS(dvp)->n_flag |= NMODIFIED;
 1795         if (!wccflag) {
 1796                 VTONFS(dvp)->n_attrstamp = 0;
 1797                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 1798         }
 1799         mtx_unlock(&(VTONFS(dvp))->n_mtx);
 1800         return (error);
 1801 }
 1802 
 1803 /*
 1804  * nfs file rename call
 1805  */
 1806 static int
 1807 nfs_rename(struct vop_rename_args *ap)
 1808 {
 1809         struct vnode *fvp = ap->a_fvp;
 1810         struct vnode *tvp = ap->a_tvp;
 1811         struct vnode *fdvp = ap->a_fdvp;
 1812         struct vnode *tdvp = ap->a_tdvp;
 1813         struct componentname *tcnp = ap->a_tcnp;
 1814         struct componentname *fcnp = ap->a_fcnp;
 1815         int error;
 1816 
 1817 #ifndef DIAGNOSTIC
 1818         if ((tcnp->cn_flags & HASBUF) == 0 ||
 1819             (fcnp->cn_flags & HASBUF) == 0)
 1820                 panic("nfs_rename: no name");
 1821 #endif
 1822         /* Check for cross-device rename */
 1823         if ((fvp->v_mount != tdvp->v_mount) ||
 1824             (tvp && (fvp->v_mount != tvp->v_mount))) {
 1825                 error = EXDEV;
 1826                 goto out;
 1827         }
 1828 
 1829         if (fvp == tvp) {
 1830                 nfs_printf("nfs_rename: fvp == tvp (can't happen)\n");
 1831                 error = 0;
 1832                 goto out;
 1833         }
 1834         if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
 1835                 goto out;
 1836 
 1837         /*
 1838          * We have to flush B_DELWRI data prior to renaming
 1839          * the file.  If we don't, the delayed-write buffers
 1840          * can be flushed out later after the file has gone stale
 1841          * under NFSV3.  NFSV2 does not have this problem because
 1842          * ( as far as I can tell ) it flushes dirty buffers more
 1843          * often.
 1844          * 
 1845          * Skip the rename operation if the fsync fails, this can happen
 1846          * due to the server's volume being full, when we pushed out data
 1847          * that was written back to our cache earlier. Not checking for
 1848          * this condition can result in potential (silent) data loss.
 1849          */
 1850         error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
 1851         VOP_UNLOCK(fvp, 0);
 1852         if (!error && tvp)
 1853                 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
 1854         if (error)
 1855                 goto out;
 1856 
 1857         /*
 1858          * If the tvp exists and is in use, sillyrename it before doing the
 1859          * rename of the new file over it.
 1860          * XXX Can't sillyrename a directory.
 1861          */
 1862         if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
 1863                 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 1864                 vput(tvp);
 1865                 tvp = NULL;
 1866         }
 1867 
 1868         error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 1869                 tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 1870                 tcnp->cn_thread);
 1871 
 1872         if (fvp->v_type == VDIR) {
 1873                 if (tvp != NULL && tvp->v_type == VDIR)
 1874                         cache_purge(tdvp);
 1875                 cache_purge(fdvp);
 1876         }
 1877 
 1878 out:
 1879         if (tdvp == tvp)
 1880                 vrele(tdvp);
 1881         else
 1882                 vput(tdvp);
 1883         if (tvp)
 1884                 vput(tvp);
 1885         vrele(fdvp);
 1886         vrele(fvp);
 1887         /*
 1888          * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 1889          */
 1890         if (error == ENOENT)
 1891                 error = 0;
 1892         return (error);
 1893 }
 1894 
 1895 /*
 1896  * nfs file rename rpc called from nfs_remove() above
 1897  */
 1898 static int
 1899 nfs_renameit(struct vnode *sdvp, struct componentname *scnp,
 1900     struct sillyrename *sp)
 1901 {
 1902 
 1903         return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp,
 1904             sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread));
 1905 }
 1906 
 1907 /*
 1908  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
 1909  */
 1910 static int
 1911 nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen,
 1912     struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred,
 1913     struct thread *td)
 1914 {
 1915         caddr_t bpos, dpos;
 1916         int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
 1917         struct mbuf *mreq, *mrep, *md, *mb;
 1918         int v3 = NFS_ISV3(fdvp);
 1919 
 1920         nfsstats.rpccnt[NFSPROC_RENAME]++;
 1921         mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME,
 1922                 (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
 1923                 nfsm_rndup(tnamelen));
 1924         mb = mreq;
 1925         bpos = mtod(mb, caddr_t);
 1926         nfsm_fhtom(fdvp, v3);
 1927         nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
 1928         nfsm_fhtom(tdvp, v3);
 1929         nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
 1930         nfsm_request(fdvp, NFSPROC_RENAME, td, cred);
 1931         if (v3) {
 1932                 nfsm_wcc_data(fdvp, fwccflag);
 1933                 nfsm_wcc_data(tdvp, twccflag);
 1934         }
 1935         m_freem(mrep);
 1936 nfsmout:
 1937         mtx_lock(&(VTONFS(fdvp))->n_mtx);
 1938         VTONFS(fdvp)->n_flag |= NMODIFIED;
 1939         mtx_unlock(&(VTONFS(fdvp))->n_mtx);
 1940         mtx_lock(&(VTONFS(tdvp))->n_mtx);
 1941         VTONFS(tdvp)->n_flag |= NMODIFIED;
 1942         mtx_unlock(&(VTONFS(tdvp))->n_mtx);
 1943         if (!fwccflag) {
 1944                 VTONFS(fdvp)->n_attrstamp = 0;
 1945                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp);
 1946         }
 1947         if (!twccflag) {
 1948                 VTONFS(tdvp)->n_attrstamp = 0;
 1949                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 1950         }
 1951         return (error);
 1952 }
 1953 
 1954 /*
 1955  * nfs hard link create call
 1956  */
 1957 static int
 1958 nfs_link(struct vop_link_args *ap)
 1959 {
 1960         struct vnode *vp = ap->a_vp;
 1961         struct vnode *tdvp = ap->a_tdvp;
 1962         struct componentname *cnp = ap->a_cnp;
 1963         caddr_t bpos, dpos;
 1964         int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
 1965         struct mbuf *mreq, *mrep, *md, *mb;
 1966         int v3;
 1967 
 1968         if (vp->v_mount != tdvp->v_mount) {
 1969                 return (EXDEV);
 1970         }
 1971 
 1972         /*
 1973          * Push all writes to the server, so that the attribute cache
 1974          * doesn't get "out of sync" with the server.
 1975          * XXX There should be a better way!
 1976          */
 1977         VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
 1978 
 1979         v3 = NFS_ISV3(vp);
 1980         nfsstats.rpccnt[NFSPROC_LINK]++;
 1981         mreq = nfsm_reqhead(vp, NFSPROC_LINK,
 1982                 NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 1983         mb = mreq;
 1984         bpos = mtod(mb, caddr_t);
 1985         nfsm_fhtom(vp, v3);
 1986         nfsm_fhtom(tdvp, v3);
 1987         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 1988         nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred);
 1989         if (v3) {
 1990                 nfsm_postop_attr(vp, attrflag);
 1991                 nfsm_wcc_data(tdvp, wccflag);
 1992         }
 1993         m_freem(mrep);
 1994 nfsmout:
 1995         mtx_lock(&(VTONFS(tdvp))->n_mtx);
 1996         VTONFS(tdvp)->n_flag |= NMODIFIED;
 1997         mtx_unlock(&(VTONFS(tdvp))->n_mtx);
 1998         if (!attrflag) {
 1999                 VTONFS(vp)->n_attrstamp = 0;
 2000                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 2001         }
 2002         if (!wccflag) {
 2003                 VTONFS(tdvp)->n_attrstamp = 0;
 2004                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 2005         }
 2006         return (error);
 2007 }
 2008 
 2009 /*
 2010  * nfs symbolic link create call
 2011  */
 2012 static int
 2013 nfs_symlink(struct vop_symlink_args *ap)
 2014 {
 2015         struct vnode *dvp = ap->a_dvp;
 2016         struct vattr *vap = ap->a_vap;
 2017         struct componentname *cnp = ap->a_cnp;
 2018         struct nfsv2_sattr *sp;
 2019         caddr_t bpos, dpos;
 2020         int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
 2021         struct mbuf *mreq, *mrep, *md, *mb;
 2022         struct vnode *newvp = NULL;
 2023         int v3 = NFS_ISV3(dvp);
 2024 
 2025         nfsstats.rpccnt[NFSPROC_SYMLINK]++;
 2026         slen = strlen(ap->a_target);
 2027         mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
 2028             nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
 2029         mb = mreq;
 2030         bpos = mtod(mb, caddr_t);
 2031         nfsm_fhtom(dvp, v3);
 2032         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 2033         if (v3) {
 2034                 nfsm_v3attrbuild(vap, FALSE);
 2035         }
 2036         nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
 2037         if (!v3) {
 2038                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
 2039                 sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
 2040                 sp->sa_uid = nfs_xdrneg1;
 2041                 sp->sa_gid = nfs_xdrneg1;
 2042                 sp->sa_size = nfs_xdrneg1;
 2043                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 2044                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 2045         }
 2046 
 2047         /*
 2048          * Issue the NFS request and get the rpc response.
 2049          *
 2050          * Only NFSv3 responses returning an error of 0 actually return
 2051          * a file handle that can be converted into newvp without having
 2052          * to do an extra lookup rpc.
 2053          */
 2054         nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred);
 2055         if (v3) {
 2056                 if (error == 0)
 2057                         nfsm_mtofh(dvp, newvp, v3, gotvp);
 2058                 nfsm_wcc_data(dvp, wccflag);
 2059         }
 2060 
 2061         /*
 2062          * out code jumps -> here, mrep is also freed.
 2063          */
 2064 
 2065         m_freem(mrep);
 2066 nfsmout:
 2067 
 2068         /*
 2069          * If we do not have an error and we could not extract the newvp from
 2070          * the response due to the request being NFSv2, we have to do a
 2071          * lookup in order to obtain a newvp to return.
 2072          */
 2073         if (error == 0 && newvp == NULL) {
 2074                 struct nfsnode *np = NULL;
 2075 
 2076                 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 2077                     cnp->cn_cred, cnp->cn_thread, &np);
 2078                 if (!error)
 2079                         newvp = NFSTOV(np);
 2080         }
 2081         if (error) {
 2082                 if (newvp)
 2083                         vput(newvp);
 2084         } else {
 2085                 *ap->a_vpp = newvp;
 2086         }
 2087         mtx_lock(&(VTONFS(dvp))->n_mtx);
 2088         VTONFS(dvp)->n_flag |= NMODIFIED;
 2089         mtx_unlock(&(VTONFS(dvp))->n_mtx);
 2090         if (!wccflag) {
 2091                 VTONFS(dvp)->n_attrstamp = 0;
 2092                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 2093         }
 2094         return (error);
 2095 }
 2096 
 2097 /*
 2098  * nfs make dir call
 2099  */
 2100 static int
 2101 nfs_mkdir(struct vop_mkdir_args *ap)
 2102 {
 2103         struct vnode *dvp = ap->a_dvp;
 2104         struct vattr *vap = ap->a_vap;
 2105         struct componentname *cnp = ap->a_cnp;
 2106         struct nfsv2_sattr *sp;
 2107         int len;
 2108         struct nfsnode *np = NULL;
 2109         struct vnode *newvp = NULL;
 2110         caddr_t bpos, dpos;
 2111         int error = 0, wccflag = NFSV3_WCCRATTR;
 2112         int gotvp = 0;
 2113         struct mbuf *mreq, *mrep, *md, *mb;
 2114         struct vattr vattr;
 2115         int v3 = NFS_ISV3(dvp);
 2116 
 2117         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
 2118                 return (error);
 2119         len = cnp->cn_namelen;
 2120         nfsstats.rpccnt[NFSPROC_MKDIR]++;
 2121         mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR,
 2122           NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
 2123         mb = mreq;
 2124         bpos = mtod(mb, caddr_t);
 2125         nfsm_fhtom(dvp, v3);
 2126         nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 2127         if (v3) {
 2128                 nfsm_v3attrbuild(vap, FALSE);
 2129         } else {
 2130                 sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR);
 2131                 sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
 2132                 sp->sa_uid = nfs_xdrneg1;
 2133                 sp->sa_gid = nfs_xdrneg1;
 2134                 sp->sa_size = nfs_xdrneg1;
 2135                 txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 2136                 txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 2137         }
 2138         nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred);
 2139         if (!error)
 2140                 nfsm_mtofh(dvp, newvp, v3, gotvp);
 2141         if (v3)
 2142                 nfsm_wcc_data(dvp, wccflag);
 2143         m_freem(mrep);
 2144 nfsmout:
 2145         mtx_lock(&(VTONFS(dvp))->n_mtx);
 2146         VTONFS(dvp)->n_flag |= NMODIFIED;
 2147         mtx_unlock(&(VTONFS(dvp))->n_mtx);
 2148         if (!wccflag) {
 2149                 VTONFS(dvp)->n_attrstamp = 0;
 2150                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 2151         }
 2152         if (error == 0 && newvp == NULL) {
 2153                 error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
 2154                         cnp->cn_thread, &np);
 2155                 if (!error) {
 2156                         newvp = NFSTOV(np);
 2157                         if (newvp->v_type != VDIR)
 2158                                 error = EEXIST;
 2159                 }
 2160         }
 2161         if (error) {
 2162                 if (newvp)
 2163                         vput(newvp);
 2164         } else
 2165                 *ap->a_vpp = newvp;
 2166         return (error);
 2167 }
 2168 
 2169 /*
 2170  * nfs remove directory call
 2171  */
 2172 static int
 2173 nfs_rmdir(struct vop_rmdir_args *ap)
 2174 {
 2175         struct vnode *vp = ap->a_vp;
 2176         struct vnode *dvp = ap->a_dvp;
 2177         struct componentname *cnp = ap->a_cnp;
 2178         caddr_t bpos, dpos;
 2179         int error = 0, wccflag = NFSV3_WCCRATTR;
 2180         struct mbuf *mreq, *mrep, *md, *mb;
 2181         int v3 = NFS_ISV3(dvp);
 2182 
 2183         if (dvp == vp)
 2184                 return (EINVAL);
 2185         nfsstats.rpccnt[NFSPROC_RMDIR]++;
 2186         mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR,
 2187                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 2188         mb = mreq;
 2189         bpos = mtod(mb, caddr_t);
 2190         nfsm_fhtom(dvp, v3);
 2191         nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 2192         nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred);
 2193         if (v3)
 2194                 nfsm_wcc_data(dvp, wccflag);
 2195         m_freem(mrep);
 2196 nfsmout:
 2197         mtx_lock(&(VTONFS(dvp))->n_mtx);
 2198         VTONFS(dvp)->n_flag |= NMODIFIED;
 2199         mtx_unlock(&(VTONFS(dvp))->n_mtx);
 2200         if (!wccflag) {
 2201                 VTONFS(dvp)->n_attrstamp = 0;
 2202                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 2203         }
 2204         cache_purge(dvp);
 2205         cache_purge(vp);
 2206         /*
 2207          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 2208          */
 2209         if (error == ENOENT)
 2210                 error = 0;
 2211         return (error);
 2212 }
 2213 
 2214 /*
 2215  * nfs readdir call
 2216  */
 2217 static int
 2218 nfs_readdir(struct vop_readdir_args *ap)
 2219 {
 2220         struct vnode *vp = ap->a_vp;
 2221         struct nfsnode *np = VTONFS(vp);
 2222         struct uio *uio = ap->a_uio;
 2223         int tresid, error = 0;
 2224         struct vattr vattr;
 2225         
 2226         if (vp->v_type != VDIR) 
 2227                 return(EPERM);
 2228 
 2229         /*
 2230          * First, check for hit on the EOF offset cache
 2231          */
 2232         if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 2233             (np->n_flag & NMODIFIED) == 0) {
 2234                 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) {
 2235                         mtx_lock(&np->n_mtx);
 2236                         if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
 2237                                 mtx_unlock(&np->n_mtx);
 2238                                 nfsstats.direofcache_hits++;
 2239                                 goto out;
 2240                         } else
 2241                                 mtx_unlock(&np->n_mtx);
 2242                 }
 2243         }
 2244 
 2245         /*
 2246          * Call nfs_bioread() to do the real work.
 2247          */
 2248         tresid = uio->uio_resid;
 2249         error = nfs_bioread(vp, uio, 0, ap->a_cred);
 2250 
 2251         if (!error && uio->uio_resid == tresid) {
 2252                 nfsstats.direofcache_misses++;
 2253         }
 2254 out:
 2255         return (error);
 2256 }
 2257 
 2258 /*
 2259  * Readdir rpc call.
 2260  * Called from below the buffer cache by nfs_doio().
 2261  */
 2262 int
 2263 nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 2264 {
 2265         int len, left;
 2266         struct dirent *dp = NULL;
 2267         u_int32_t *tl;
 2268         caddr_t cp;
 2269         nfsuint64 *cookiep;
 2270         caddr_t bpos, dpos;
 2271         struct mbuf *mreq, *mrep, *md, *mb;
 2272         nfsuint64 cookie;
 2273         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2274         struct nfsnode *dnp = VTONFS(vp);
 2275         u_quad_t fileno;
 2276         int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 2277         int attrflag;
 2278         int v3 = NFS_ISV3(vp);
 2279 
 2280 #ifndef DIAGNOSTIC
 2281         if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 2282                 (uiop->uio_resid & (DIRBLKSIZ - 1)))
 2283                 panic("nfs readdirrpc bad uio");
 2284 #endif
 2285 
 2286         /*
 2287          * If there is no cookie, assume directory was stale.
 2288          */
 2289         nfs_dircookie_lock(dnp);
 2290         cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 2291         if (cookiep) {
 2292                 cookie = *cookiep;
 2293                 nfs_dircookie_unlock(dnp);
 2294         } else {
 2295                 nfs_dircookie_unlock(dnp);              
 2296                 return (NFSERR_BAD_COOKIE);
 2297         }
 2298 
 2299         /*
 2300          * Loop around doing readdir rpc's of size nm_readdirsize
 2301          * truncated to a multiple of DIRBLKSIZ.
 2302          * The stopping criteria is EOF or buffer full.
 2303          */
 2304         while (more_dirs && bigenough) {
 2305                 nfsstats.rpccnt[NFSPROC_READDIR]++;
 2306                 mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
 2307                         NFSX_READDIR(v3));
 2308                 mb = mreq;
 2309                 bpos = mtod(mb, caddr_t);
 2310                 nfsm_fhtom(vp, v3);
 2311                 if (v3) {
 2312                         tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED);
 2313                         *tl++ = cookie.nfsuquad[0];
 2314                         *tl++ = cookie.nfsuquad[1];
 2315                         mtx_lock(&dnp->n_mtx);
 2316                         *tl++ = dnp->n_cookieverf.nfsuquad[0];
 2317                         *tl++ = dnp->n_cookieverf.nfsuquad[1];
 2318                         mtx_unlock(&dnp->n_mtx);
 2319                 } else {
 2320                         tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 2321                         *tl++ = cookie.nfsuquad[0];
 2322                 }
 2323                 *tl = txdr_unsigned(nmp->nm_readdirsize);
 2324                 nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred);
 2325                 if (v3) {
 2326                         nfsm_postop_attr(vp, attrflag);
 2327                         if (!error) {
 2328                                 tl = nfsm_dissect(u_int32_t *,
 2329                                     2 * NFSX_UNSIGNED);
 2330                                 mtx_lock(&dnp->n_mtx);
 2331                                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
 2332                                 dnp->n_cookieverf.nfsuquad[1] = *tl;
 2333                                 mtx_unlock(&dnp->n_mtx);
 2334                         } else {
 2335                                 m_freem(mrep);
 2336                                 goto nfsmout;
 2337                         }
 2338                 }
 2339                 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2340                 more_dirs = fxdr_unsigned(int, *tl);
 2341 
 2342                 /* loop thru the dir entries, doctoring them to 4bsd form */
 2343                 while (more_dirs && bigenough) {
 2344                         if (v3) {
 2345                                 tl = nfsm_dissect(u_int32_t *,
 2346                                     3 * NFSX_UNSIGNED);
 2347                                 fileno = fxdr_hyper(tl);
 2348                                 len = fxdr_unsigned(int, *(tl + 2));
 2349                         } else {
 2350                                 tl = nfsm_dissect(u_int32_t *,
 2351                                     2 * NFSX_UNSIGNED);
 2352                                 fileno = fxdr_unsigned(u_quad_t, *tl++);
 2353                                 len = fxdr_unsigned(int, *tl);
 2354                         }
 2355                         if (len <= 0 || len > NFS_MAXNAMLEN) {
 2356                                 error = EBADRPC;
 2357                                 m_freem(mrep);
 2358                                 goto nfsmout;
 2359                         }
 2360                         tlen = nfsm_rndup(len);
 2361                         if (tlen == len)
 2362                                 tlen += 4;      /* To ensure null termination */
 2363                         left = DIRBLKSIZ - blksiz;
 2364                         if ((tlen + DIRHDSIZ) > left) {
 2365                                 dp->d_reclen += left;
 2366                                 uiop->uio_iov->iov_base =
 2367                                     (char *)uiop->uio_iov->iov_base + left;
 2368                                 uiop->uio_iov->iov_len -= left;
 2369                                 uiop->uio_offset += left;
 2370                                 uiop->uio_resid -= left;
 2371                                 blksiz = 0;
 2372                         }
 2373                         if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 2374                                 bigenough = 0;
 2375                         if (bigenough) {
 2376                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
 2377                                 dp->d_fileno = (int)fileno;
 2378                                 dp->d_namlen = len;
 2379                                 dp->d_reclen = tlen + DIRHDSIZ;
 2380                                 dp->d_type = DT_UNKNOWN;
 2381                                 blksiz += dp->d_reclen;
 2382                                 if (blksiz == DIRBLKSIZ)
 2383                                         blksiz = 0;
 2384                                 uiop->uio_offset += DIRHDSIZ;
 2385                                 uiop->uio_resid -= DIRHDSIZ;
 2386                                 uiop->uio_iov->iov_base =
 2387                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
 2388                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
 2389                                 nfsm_mtouio(uiop, len);
 2390                                 cp = uiop->uio_iov->iov_base;
 2391                                 tlen -= len;
 2392                                 *cp = '\0';     /* null terminate */
 2393                                 uiop->uio_iov->iov_base =
 2394                                     (char *)uiop->uio_iov->iov_base + tlen;
 2395                                 uiop->uio_iov->iov_len -= tlen;
 2396                                 uiop->uio_offset += tlen;
 2397                                 uiop->uio_resid -= tlen;
 2398                         } else
 2399                                 nfsm_adv(nfsm_rndup(len));
 2400                         if (v3) {
 2401                                 tl = nfsm_dissect(u_int32_t *,
 2402                                     3 * NFSX_UNSIGNED);
 2403                         } else {
 2404                                 tl = nfsm_dissect(u_int32_t *,
 2405                                     2 * NFSX_UNSIGNED);
 2406                         }
 2407                         if (bigenough) {
 2408                                 cookie.nfsuquad[0] = *tl++;
 2409                                 if (v3)
 2410                                         cookie.nfsuquad[1] = *tl++;
 2411                         } else if (v3)
 2412                                 tl += 2;
 2413                         else
 2414                                 tl++;
 2415                         more_dirs = fxdr_unsigned(int, *tl);
 2416                 }
 2417                 /*
 2418                  * If at end of rpc data, get the eof boolean
 2419                  */
 2420                 if (!more_dirs) {
 2421                         tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2422                         more_dirs = (fxdr_unsigned(int, *tl) == 0);
 2423                 }
 2424                 m_freem(mrep);
 2425         }
 2426         /*
 2427          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 2428          * by increasing d_reclen for the last record.
 2429          */
 2430         if (blksiz > 0) {
 2431                 left = DIRBLKSIZ - blksiz;
 2432                 dp->d_reclen += left;
 2433                 uiop->uio_iov->iov_base =
 2434                     (char *)uiop->uio_iov->iov_base + left;
 2435                 uiop->uio_iov->iov_len -= left;
 2436                 uiop->uio_offset += left;
 2437                 uiop->uio_resid -= left;
 2438         }
 2439 
 2440         /*
 2441          * We are now either at the end of the directory or have filled the
 2442          * block.
 2443          */
 2444         if (bigenough)
 2445                 dnp->n_direofoffset = uiop->uio_offset;
 2446         else {
 2447                 if (uiop->uio_resid > 0)
 2448                         nfs_printf("EEK! readdirrpc resid > 0\n");
 2449                 nfs_dircookie_lock(dnp);
 2450                 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 2451                 *cookiep = cookie;
 2452                 nfs_dircookie_unlock(dnp);
 2453         }
 2454 nfsmout:
 2455         return (error);
 2456 }
 2457 
 2458 /*
 2459  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
 2460  */
 2461 int
 2462 nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 2463 {
 2464         int len, left;
 2465         struct dirent *dp;
 2466         u_int32_t *tl;
 2467         caddr_t cp;
 2468         struct vnode *newvp;
 2469         nfsuint64 *cookiep;
 2470         caddr_t bpos, dpos, dpossav1, dpossav2;
 2471         struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2;
 2472         struct nameidata nami, *ndp = &nami;
 2473         struct componentname *cnp = &ndp->ni_cnd;
 2474         nfsuint64 cookie;
 2475         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2476         struct nfsnode *dnp = VTONFS(vp), *np;
 2477         nfsfh_t *fhp;
 2478         u_quad_t fileno;
 2479         int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
 2480         int attrflag, fhsize;
 2481 
 2482 #ifndef nolint
 2483         dp = NULL;
 2484 #endif
 2485 #ifndef DIAGNOSTIC
 2486         if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 2487                 (uiop->uio_resid & (DIRBLKSIZ - 1)))
 2488                 panic("nfs readdirplusrpc bad uio");
 2489 #endif
 2490         ndp->ni_dvp = vp;
 2491         newvp = NULLVP;
 2492 
 2493         /*
 2494          * If there is no cookie, assume directory was stale.
 2495          */
 2496         nfs_dircookie_lock(dnp);
 2497         cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 2498         if (cookiep) {
 2499                 cookie = *cookiep;
 2500                 nfs_dircookie_unlock(dnp);
 2501         } else {
 2502                 nfs_dircookie_unlock(dnp);
 2503                 return (NFSERR_BAD_COOKIE);
 2504         }
 2505         /*
 2506          * Loop around doing readdir rpc's of size nm_readdirsize
 2507          * truncated to a multiple of DIRBLKSIZ.
 2508          * The stopping criteria is EOF or buffer full.
 2509          */
 2510         while (more_dirs && bigenough) {
 2511                 nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
 2512                 mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
 2513                         NFSX_FH(1) + 6 * NFSX_UNSIGNED);
 2514                 mb = mreq;
 2515                 bpos = mtod(mb, caddr_t);
 2516                 nfsm_fhtom(vp, 1);
 2517                 tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED);
 2518                 *tl++ = cookie.nfsuquad[0];
 2519                 *tl++ = cookie.nfsuquad[1];
 2520                 mtx_lock(&dnp->n_mtx);
 2521                 *tl++ = dnp->n_cookieverf.nfsuquad[0];
 2522                 *tl++ = dnp->n_cookieverf.nfsuquad[1];
 2523                 mtx_unlock(&dnp->n_mtx);
 2524                 *tl++ = txdr_unsigned(nmp->nm_readdirsize);
 2525                 *tl = txdr_unsigned(nmp->nm_rsize);
 2526                 nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred);
 2527                 nfsm_postop_attr(vp, attrflag);
 2528                 if (error) {
 2529                         m_freem(mrep);
 2530                         goto nfsmout;
 2531                 }
 2532                 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
 2533                 mtx_lock(&dnp->n_mtx);
 2534                 dnp->n_cookieverf.nfsuquad[0] = *tl++;
 2535                 dnp->n_cookieverf.nfsuquad[1] = *tl++;
 2536                 mtx_unlock(&dnp->n_mtx);
 2537                 more_dirs = fxdr_unsigned(int, *tl);
 2538 
 2539                 /* loop thru the dir entries, doctoring them to 4bsd form */
 2540                 while (more_dirs && bigenough) {
 2541                         tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
 2542                         fileno = fxdr_hyper(tl);
 2543                         len = fxdr_unsigned(int, *(tl + 2));
 2544                         if (len <= 0 || len > NFS_MAXNAMLEN) {
 2545                                 error = EBADRPC;
 2546                                 m_freem(mrep);
 2547                                 goto nfsmout;
 2548                         }
 2549                         tlen = nfsm_rndup(len);
 2550                         if (tlen == len)
 2551                                 tlen += 4;      /* To ensure null termination*/
 2552                         left = DIRBLKSIZ - blksiz;
 2553                         if ((tlen + DIRHDSIZ) > left) {
 2554                                 dp->d_reclen += left;
 2555                                 uiop->uio_iov->iov_base =
 2556                                     (char *)uiop->uio_iov->iov_base + left;
 2557                                 uiop->uio_iov->iov_len -= left;
 2558                                 uiop->uio_offset += left;
 2559                                 uiop->uio_resid -= left;
 2560                                 blksiz = 0;
 2561                         }
 2562                         if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 2563                                 bigenough = 0;
 2564                         if (bigenough) {
 2565                                 dp = (struct dirent *)uiop->uio_iov->iov_base;
 2566                                 dp->d_fileno = (int)fileno;
 2567                                 dp->d_namlen = len;
 2568                                 dp->d_reclen = tlen + DIRHDSIZ;
 2569                                 dp->d_type = DT_UNKNOWN;
 2570                                 blksiz += dp->d_reclen;
 2571                                 if (blksiz == DIRBLKSIZ)
 2572                                         blksiz = 0;
 2573                                 uiop->uio_offset += DIRHDSIZ;
 2574                                 uiop->uio_resid -= DIRHDSIZ;
 2575                                 uiop->uio_iov->iov_base =
 2576                                     (char *)uiop->uio_iov->iov_base + DIRHDSIZ;
 2577                                 uiop->uio_iov->iov_len -= DIRHDSIZ;
 2578                                 cnp->cn_nameptr = uiop->uio_iov->iov_base;
 2579                                 cnp->cn_namelen = len;
 2580                                 nfsm_mtouio(uiop, len);
 2581                                 cp = uiop->uio_iov->iov_base;
 2582                                 tlen -= len;
 2583                                 *cp = '\0';
 2584                                 uiop->uio_iov->iov_base =
 2585                                     (char *)uiop->uio_iov->iov_base + tlen;
 2586                                 uiop->uio_iov->iov_len -= tlen;
 2587                                 uiop->uio_offset += tlen;
 2588                                 uiop->uio_resid -= tlen;
 2589                         } else
 2590                                 nfsm_adv(nfsm_rndup(len));
 2591                         tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
 2592                         if (bigenough) {
 2593                                 cookie.nfsuquad[0] = *tl++;
 2594                                 cookie.nfsuquad[1] = *tl++;
 2595                         } else
 2596                                 tl += 2;
 2597 
 2598                         /*
 2599                          * Since the attributes are before the file handle
 2600                          * (sigh), we must skip over the attributes and then
 2601                          * come back and get them.
 2602                          */
 2603                         attrflag = fxdr_unsigned(int, *tl);
 2604                         if (attrflag) {
 2605                             dpossav1 = dpos;
 2606                             mdsav1 = md;
 2607                             nfsm_adv(NFSX_V3FATTR);
 2608                             tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2609                             doit = fxdr_unsigned(int, *tl);
 2610                             /*
 2611                              * Skip loading the attrs for "..". There's a 
 2612                              * race between loading the attrs here and 
 2613                              * lookups that look for the directory currently
 2614                              * being read (in the parent). We try to acquire
 2615                              * the exclusive lock on ".." here, owning the 
 2616                              * lock on the directory being read. Lookup will
 2617                              * hold the lock on ".." and try to acquire the 
 2618                              * lock on the directory being read.
 2619                              * 
 2620                              * There are other ways of fixing this, one would
 2621                              * be to do a trylock on the ".." vnode and skip
 2622                              * loading the attrs on ".." if it happens to be 
 2623                              * locked by another process. But skipping the
 2624                              * attrload on ".." seems the easiest option.
 2625                              */
 2626                             if (strcmp(dp->d_name, "..") == 0) {
 2627                                     doit = 0;
 2628                                     /*
 2629                                      * We've already skipped over the attrs, 
 2630                                      * skip over the filehandle. And store d_type
 2631                                      * as VDIR.
 2632                                      */
 2633                                     tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2634                                     i = fxdr_unsigned(int, *tl);
 2635                                     nfsm_adv(nfsm_rndup(i));
 2636                                     dp->d_type = IFTODT(VTTOIF(VDIR));
 2637                             }       
 2638                             if (doit) {
 2639                                 nfsm_getfh(fhp, fhsize, 1);
 2640                                 if (NFS_CMPFH(dnp, fhp, fhsize)) {
 2641                                     VREF(vp);
 2642                                     newvp = vp;
 2643                                     np = dnp;
 2644                                 } else {
 2645                                     error = nfs_nget(vp->v_mount, fhp,
 2646                                         fhsize, &np, LK_EXCLUSIVE);
 2647                                     if (error)
 2648                                         doit = 0;
 2649                                     else
 2650                                         newvp = NFSTOV(np);
 2651                                 }
 2652                             }
 2653                             if (doit && bigenough) {
 2654                                 dpossav2 = dpos;
 2655                                 dpos = dpossav1;
 2656                                 mdsav2 = md;
 2657                                 md = mdsav1;
 2658                                 nfsm_loadattr(newvp, NULL);
 2659                                 dpos = dpossav2;
 2660                                 md = mdsav2;
 2661                                 dp->d_type =
 2662                                     IFTODT(VTTOIF(np->n_vattr.va_type));
 2663                                 ndp->ni_vp = newvp;
 2664                                 /* Update n_ctime, so subsequent lookup doesn't purge entry */
 2665                                 np->n_ctime = np->n_vattr.va_ctime.tv_sec;
 2666                                 cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
 2667                             }
 2668                         } else {
 2669                             /* Just skip over the file handle */
 2670                             tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2671                             i = fxdr_unsigned(int, *tl);
 2672                             if (i) {
 2673                                     tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2674                                     fhsize = fxdr_unsigned(int, *tl);
 2675                                     nfsm_adv(nfsm_rndup(fhsize));
 2676                             }
 2677                         }
 2678                         if (newvp != NULLVP) {
 2679                             if (newvp == vp)
 2680                                 vrele(newvp);
 2681                             else
 2682                                 vput(newvp);
 2683                             newvp = NULLVP;
 2684                         }
 2685                         tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2686                         more_dirs = fxdr_unsigned(int, *tl);
 2687                 }
 2688                 /*
 2689                  * If at end of rpc data, get the eof boolean
 2690                  */
 2691                 if (!more_dirs) {
 2692                         tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
 2693                         more_dirs = (fxdr_unsigned(int, *tl) == 0);
 2694                 }
 2695                 m_freem(mrep);
 2696         }
 2697         /*
 2698          * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 2699          * by increasing d_reclen for the last record.
 2700          */
 2701         if (blksiz > 0) {
 2702                 left = DIRBLKSIZ - blksiz;
 2703                 dp->d_reclen += left;
 2704                 uiop->uio_iov->iov_base =
 2705                     (char *)uiop->uio_iov->iov_base + left;
 2706                 uiop->uio_iov->iov_len -= left;
 2707                 uiop->uio_offset += left;
 2708                 uiop->uio_resid -= left;
 2709         }
 2710 
 2711         /*
 2712          * We are now either at the end of the directory or have filled the
 2713          * block.
 2714          */
 2715         if (bigenough)
 2716                 dnp->n_direofoffset = uiop->uio_offset;
 2717         else {
 2718                 if (uiop->uio_resid > 0)
 2719                         nfs_printf("EEK! readdirplusrpc resid > 0\n");
 2720                 nfs_dircookie_lock(dnp);
 2721                 cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 2722                 *cookiep = cookie;
 2723                 nfs_dircookie_unlock(dnp);
 2724         }
 2725 nfsmout:
 2726         if (newvp != NULLVP) {
 2727                 if (newvp == vp)
 2728                         vrele(newvp);
 2729                 else
 2730                         vput(newvp);
 2731                 newvp = NULLVP;
 2732         }
 2733         return (error);
 2734 }
 2735 
 2736 /*
 2737  * Silly rename. To make the NFS filesystem that is stateless look a little
 2738  * more like the "ufs" a remove of an active vnode is translated to a rename
 2739  * to a funny looking filename that is removed by nfs_inactive on the
 2740  * nfsnode. There is the potential for another process on a different client
 2741  * to create the same funny name between the nfs_lookitup() fails and the
 2742  * nfs_rename() completes, but...
 2743  */
 2744 static int
 2745 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 2746 {
 2747         struct sillyrename *sp;
 2748         struct nfsnode *np;
 2749         int error;
 2750         short pid;
 2751         unsigned int lticks;
 2752 
 2753         cache_purge(dvp);
 2754         np = VTONFS(vp);
 2755 #ifndef DIAGNOSTIC
 2756         if (vp->v_type == VDIR)
 2757                 panic("nfs: sillyrename dir");
 2758 #endif
 2759         sp = malloc(sizeof (struct sillyrename),
 2760                 M_NFSREQ, M_WAITOK);
 2761         sp->s_cred = crhold(cnp->cn_cred);
 2762         sp->s_dvp = dvp;
 2763         sp->s_removeit = nfs_removeit;
 2764         VREF(dvp);
 2765 
 2766         /* 
 2767          * Fudge together a funny name.
 2768          * Changing the format of the funny name to accomodate more 
 2769          * sillynames per directory.
 2770          * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 
 2771          * CPU ticks since boot.
 2772          */
 2773         pid = cnp->cn_thread->td_proc->p_pid;
 2774         lticks = (unsigned int)ticks;
 2775         for ( ; ; ) {
 2776                 sp->s_namlen = sprintf(sp->s_name, 
 2777                                        ".nfs.%08x.%04x4.4", lticks, 
 2778                                        pid);
 2779                 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 2780                                  cnp->cn_thread, NULL))
 2781                         break;
 2782                 lticks++;
 2783         }
 2784         error = nfs_renameit(dvp, cnp, sp);
 2785         if (error)
 2786                 goto bad;
 2787         error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 2788                 cnp->cn_thread, &np);
 2789         np->n_sillyrename = sp;
 2790         return (0);
 2791 bad:
 2792         vrele(sp->s_dvp);
 2793         crfree(sp->s_cred);
 2794         free((caddr_t)sp, M_NFSREQ);
 2795         return (error);
 2796 }
 2797 
 2798 /*
 2799  * Look up a file name and optionally either update the file handle or
 2800  * allocate an nfsnode, depending on the value of npp.
 2801  * npp == NULL  --> just do the lookup
 2802  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
 2803  *                      handled too
 2804  * *npp != NULL --> update the file handle in the vnode
 2805  */
 2806 static int
 2807 nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
 2808     struct thread *td, struct nfsnode **npp)
 2809 {
 2810         struct vnode *newvp = NULL;
 2811         struct nfsnode *np, *dnp = VTONFS(dvp);
 2812         caddr_t bpos, dpos;
 2813         int error = 0, fhlen, attrflag;
 2814         struct mbuf *mreq, *mrep, *md, *mb;
 2815         nfsfh_t *nfhp;
 2816         int v3 = NFS_ISV3(dvp);
 2817 
 2818         nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 2819         mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 2820                 NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 2821         mb = mreq;
 2822         bpos = mtod(mb, caddr_t);
 2823         nfsm_fhtom(dvp, v3);
 2824         nfsm_strtom(name, len, NFS_MAXNAMLEN);
 2825         nfsm_request(dvp, NFSPROC_LOOKUP, td, cred);
 2826         if (npp && !error) {
 2827                 nfsm_getfh(nfhp, fhlen, v3);
 2828                 if (*npp) {
 2829                     np = *npp;
 2830                     if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
 2831                         free((caddr_t)np->n_fhp, M_NFSBIGFH);
 2832                         np->n_fhp = &np->n_fh;
 2833                     } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
 2834                         np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK);
 2835                     bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
 2836                     np->n_fhsize = fhlen;
 2837                     newvp = NFSTOV(np);
 2838                 } else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
 2839                     VREF(dvp);
 2840                     newvp = dvp;
 2841                 } else {
 2842                     error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
 2843                     if (error) {
 2844                         m_freem(mrep);
 2845                         return (error);
 2846                     }
 2847                     newvp = NFSTOV(np);
 2848                 }
 2849                 if (v3) {
 2850                         nfsm_postop_attr(newvp, attrflag);
 2851                         if (!attrflag && *npp == NULL) {
 2852                                 m_freem(mrep);
 2853                                 if (newvp == dvp)
 2854                                         vrele(newvp);
 2855                                 else
 2856                                         vput(newvp);
 2857                                 return (ENOENT);
 2858                         }
 2859                 } else
 2860                         nfsm_loadattr(newvp, NULL);
 2861         }
 2862         m_freem(mrep);
 2863 nfsmout:
 2864         if (npp && *npp == NULL) {
 2865                 if (error) {
 2866                         if (newvp) {
 2867                                 if (newvp == dvp)
 2868                                         vrele(newvp);
 2869                                 else
 2870                                         vput(newvp);
 2871                         }
 2872                 } else
 2873                         *npp = np;
 2874         }
 2875         return (error);
 2876 }
 2877 
 2878 /*
 2879  * Nfs Version 3 commit rpc
 2880  */
 2881 int
 2882 nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
 2883            struct thread *td)
 2884 {
 2885         u_int32_t *tl;
 2886         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2887         caddr_t bpos, dpos;
 2888         int error = 0, wccflag = NFSV3_WCCRATTR;
 2889         struct mbuf *mreq, *mrep, *md, *mb;
 2890 
 2891         mtx_lock(&nmp->nm_mtx);
 2892         if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
 2893                 mtx_unlock(&nmp->nm_mtx);
 2894                 return (0);
 2895         }
 2896         mtx_unlock(&nmp->nm_mtx);
 2897         nfsstats.rpccnt[NFSPROC_COMMIT]++;
 2898         mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
 2899         mb = mreq;
 2900         bpos = mtod(mb, caddr_t);
 2901         nfsm_fhtom(vp, 1);
 2902         tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED);
 2903         txdr_hyper(offset, tl);
 2904         tl += 2;
 2905         *tl = txdr_unsigned(cnt);
 2906         nfsm_request(vp, NFSPROC_COMMIT, td, cred);
 2907         nfsm_wcc_data(vp, wccflag);
 2908         if (!error) {
 2909                 tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF);
 2910                 if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
 2911                         NFSX_V3WRITEVERF)) {
 2912                         bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 2913                                 NFSX_V3WRITEVERF);
 2914                         error = NFSERR_STALEWRITEVERF;
 2915                 }
 2916         }
 2917         m_freem(mrep);
 2918 nfsmout:
 2919         return (error);
 2920 }
 2921 
 2922 /*
 2923  * Strategy routine.
 2924  * For async requests when nfsiod(s) are running, queue the request by
 2925  * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
 2926  * request.
 2927  */
 2928 static int
 2929 nfs_strategy(struct vop_strategy_args *ap)
 2930 {
 2931         struct buf *bp = ap->a_bp;
 2932         struct ucred *cr;
 2933 
 2934         KASSERT(!(bp->b_flags & B_DONE),
 2935             ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
 2936         BUF_ASSERT_HELD(bp);
 2937 
 2938         if (bp->b_iocmd == BIO_READ)
 2939                 cr = bp->b_rcred;
 2940         else
 2941                 cr = bp->b_wcred;
 2942 
 2943         /*
 2944          * If the op is asynchronous and an i/o daemon is waiting
 2945          * queue the request, wake it up and wait for completion
 2946          * otherwise just do it ourselves.
 2947          */
 2948         if ((bp->b_flags & B_ASYNC) == 0 ||
 2949             nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread))
 2950                 (void)nfs_doio(ap->a_vp, bp, cr, curthread);
 2951         return (0);
 2952 }
 2953 
 2954 /*
 2955  * fsync vnode op. Just call nfs_flush() with commit == 1.
 2956  */
 2957 /* ARGSUSED */
 2958 static int
 2959 nfs_fsync(struct vop_fsync_args *ap)
 2960 {
 2961 
 2962         return (nfs_flush(ap->a_vp, ap->a_waitfor, 1));
 2963 }
 2964 
 2965 /*
 2966  * Flush all the blocks associated with a vnode.
 2967  *      Walk through the buffer pool and push any dirty pages
 2968  *      associated with the vnode.
 2969  */
 2970 static int
 2971 nfs_flush(struct vnode *vp, int waitfor, int commit)
 2972 {
 2973         struct nfsnode *np = VTONFS(vp);
 2974         struct buf *bp;
 2975         int i;
 2976         struct buf *nbp;
 2977         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2978         int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 2979         int passone = 1;
 2980         u_quad_t off, endoff, toff;
 2981         struct ucred* wcred = NULL;
 2982         struct buf **bvec = NULL;
 2983         struct bufobj *bo;
 2984         struct thread *td = curthread;
 2985 #ifndef NFS_COMMITBVECSIZ
 2986 #define NFS_COMMITBVECSIZ       20
 2987 #endif
 2988         struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 2989         int bvecsize = 0, bveccount;
 2990 
 2991         if (nmp->nm_flag & NFSMNT_INT)
 2992                 slpflag = NFS_PCATCH;
 2993         if (!commit)
 2994                 passone = 0;
 2995         bo = &vp->v_bufobj;
 2996         /*
 2997          * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 2998          * server, but has not been committed to stable storage on the server
 2999          * yet. On the first pass, the byte range is worked out and the commit
 3000          * rpc is done. On the second pass, nfs_writebp() is called to do the
 3001          * job.
 3002          */
 3003 again:
 3004         off = (u_quad_t)-1;
 3005         endoff = 0;
 3006         bvecpos = 0;
 3007         if (NFS_ISV3(vp) && commit) {
 3008                 if (bvec != NULL && bvec != bvec_on_stack)
 3009                         free(bvec, M_TEMP);
 3010                 /*
 3011                  * Count up how many buffers waiting for a commit.
 3012                  */
 3013                 bveccount = 0;
 3014                 BO_LOCK(bo);
 3015                 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 3016                         if (!BUF_ISLOCKED(bp) &&
 3017                             (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 3018                                 == (B_DELWRI | B_NEEDCOMMIT))
 3019                                 bveccount++;
 3020                 }
 3021                 /*
 3022                  * Allocate space to remember the list of bufs to commit.  It is
 3023                  * important to use M_NOWAIT here to avoid a race with nfs_write.
 3024                  * If we can't get memory (for whatever reason), we will end up
 3025                  * committing the buffers one-by-one in the loop below.
 3026                  */
 3027                 if (bveccount > NFS_COMMITBVECSIZ) {
 3028                         /*
 3029                          * Release the vnode interlock to avoid a lock
 3030                          * order reversal.
 3031                          */
 3032                         BO_UNLOCK(bo);
 3033                         bvec = (struct buf **)
 3034                                 malloc(bveccount * sizeof(struct buf *),
 3035                                        M_TEMP, M_NOWAIT);
 3036                         BO_LOCK(bo);
 3037                         if (bvec == NULL) {
 3038                                 bvec = bvec_on_stack;
 3039                                 bvecsize = NFS_COMMITBVECSIZ;
 3040                         } else
 3041                                 bvecsize = bveccount;
 3042                 } else {
 3043                         bvec = bvec_on_stack;
 3044                         bvecsize = NFS_COMMITBVECSIZ;
 3045                 }
 3046                 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 3047                         if (bvecpos >= bvecsize)
 3048                                 break;
 3049                         if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 3050                                 nbp = TAILQ_NEXT(bp, b_bobufs);
 3051                                 continue;
 3052                         }
 3053                         if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
 3054                             (B_DELWRI | B_NEEDCOMMIT)) {
 3055                                 BUF_UNLOCK(bp);
 3056                                 nbp = TAILQ_NEXT(bp, b_bobufs);
 3057                                 continue;
 3058                         }
 3059                         BO_UNLOCK(bo);
 3060                         bremfree(bp);
 3061                         /*
 3062                          * Work out if all buffers are using the same cred
 3063                          * so we can deal with them all with one commit.
 3064                          *
 3065                          * NOTE: we are not clearing B_DONE here, so we have
 3066                          * to do it later on in this routine if we intend to
 3067                          * initiate I/O on the bp.
 3068                          *
 3069                          * Note: to avoid loopback deadlocks, we do not
 3070                          * assign b_runningbufspace.
 3071                          */
 3072                         if (wcred == NULL)
 3073                                 wcred = bp->b_wcred;
 3074                         else if (wcred != bp->b_wcred)
 3075                                 wcred = NOCRED;
 3076                         vfs_busy_pages(bp, 1);
 3077 
 3078                         BO_LOCK(bo);
 3079                         /*
 3080                          * bp is protected by being locked, but nbp is not
 3081                          * and vfs_busy_pages() may sleep.  We have to
 3082                          * recalculate nbp.
 3083                          */
 3084                         nbp = TAILQ_NEXT(bp, b_bobufs);
 3085 
 3086                         /*
 3087                          * A list of these buffers is kept so that the
 3088                          * second loop knows which buffers have actually
 3089                          * been committed. This is necessary, since there
 3090                          * may be a race between the commit rpc and new
 3091                          * uncommitted writes on the file.
 3092                          */
 3093                         bvec[bvecpos++] = bp;
 3094                         toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 3095                                 bp->b_dirtyoff;
 3096                         if (toff < off)
 3097                                 off = toff;
 3098                         toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 3099                         if (toff > endoff)
 3100                                 endoff = toff;
 3101                 }
 3102                 BO_UNLOCK(bo);
 3103         }
 3104         if (bvecpos > 0) {
 3105                 /*
 3106                  * Commit data on the server, as required.
 3107                  * If all bufs are using the same wcred, then use that with
 3108                  * one call for all of them, otherwise commit each one
 3109                  * separately.
 3110                  */
 3111                 if (wcred != NOCRED)
 3112                         retv = nfs_commit(vp, off, (int)(endoff - off),
 3113                                           wcred, td);
 3114                 else {
 3115                         retv = 0;
 3116                         for (i = 0; i < bvecpos; i++) {
 3117                                 off_t off, size;
 3118                                 bp = bvec[i];
 3119                                 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 3120                                         bp->b_dirtyoff;
 3121                                 size = (u_quad_t)(bp->b_dirtyend
 3122                                                   - bp->b_dirtyoff);
 3123                                 retv = nfs_commit(vp, off, (int)size,
 3124                                                   bp->b_wcred, td);
 3125                                 if (retv) break;
 3126                         }
 3127                 }
 3128 
 3129                 if (retv == NFSERR_STALEWRITEVERF)
 3130                         nfs_clearcommit(vp->v_mount);
 3131 
 3132                 /*
 3133                  * Now, either mark the blocks I/O done or mark the
 3134                  * blocks dirty, depending on whether the commit
 3135                  * succeeded.
 3136                  */
 3137                 for (i = 0; i < bvecpos; i++) {
 3138                         bp = bvec[i];
 3139                         bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 3140                         if (retv) {
 3141                                 /*
 3142                                  * Error, leave B_DELWRI intact
 3143                                  */
 3144                                 vfs_unbusy_pages(bp);
 3145                                 brelse(bp);
 3146                         } else {
 3147                                 /*
 3148                                  * Success, remove B_DELWRI ( bundirty() ).
 3149                                  *
 3150                                  * b_dirtyoff/b_dirtyend seem to be NFS
 3151                                  * specific.  We should probably move that
 3152                                  * into bundirty(). XXX
 3153                                  */
 3154                                 bufobj_wref(bo);
 3155                                 bp->b_flags |= B_ASYNC;
 3156                                 bundirty(bp);
 3157                                 bp->b_flags &= ~B_DONE;
 3158                                 bp->b_ioflags &= ~BIO_ERROR;
 3159                                 bp->b_dirtyoff = bp->b_dirtyend = 0;
 3160                                 bufdone(bp);
 3161                         }
 3162                 }
 3163         }
 3164 
 3165         /*
 3166          * Start/do any write(s) that are required.
 3167          */
 3168 loop:
 3169         BO_LOCK(bo);
 3170         TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 3171                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 3172                         if (waitfor != MNT_WAIT || passone)
 3173                                 continue;
 3174 
 3175                         error = BUF_TIMELOCK(bp,
 3176                             LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 3177                             BO_MTX(bo), "nfsfsync", slpflag, slptimeo);
 3178                         if (error == 0) {
 3179                                 BUF_UNLOCK(bp);
 3180                                 goto loop;
 3181                         }
 3182                         if (error == ENOLCK) {
 3183                                 error = 0;
 3184                                 goto loop;
 3185                         }
 3186                         if (nfs_sigintr(nmp, td)) {
 3187                                 error = EINTR;
 3188                                 goto done;
 3189                         }
 3190                         if (slpflag & PCATCH) {
 3191                                 slpflag = 0;
 3192                                 slptimeo = 2 * hz;
 3193                         }
 3194                         goto loop;
 3195                 }
 3196                 if ((bp->b_flags & B_DELWRI) == 0)
 3197                         panic("nfs_fsync: not dirty");
 3198                 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
 3199                         BUF_UNLOCK(bp);
 3200                         continue;
 3201                 }
 3202                 BO_UNLOCK(bo);
 3203                 bremfree(bp);
 3204                 if (passone || !commit)
 3205                     bp->b_flags |= B_ASYNC;
 3206                 else
 3207                     bp->b_flags |= B_ASYNC;
 3208                 bwrite(bp);
 3209                 if (nfs_sigintr(nmp, td)) {
 3210                         error = EINTR;
 3211                         goto done;
 3212                 }
 3213                 goto loop;
 3214         }
 3215         if (passone) {
 3216                 passone = 0;
 3217                 BO_UNLOCK(bo);
 3218                 goto again;
 3219         }
 3220         if (waitfor == MNT_WAIT) {
 3221                 while (bo->bo_numoutput) {
 3222                         error = bufobj_wwait(bo, slpflag, slptimeo);
 3223                         if (error) {
 3224                             BO_UNLOCK(bo);
 3225                             error = nfs_sigintr(nmp, td);
 3226                             if (error)
 3227                                 goto done;
 3228                             if (slpflag & PCATCH) {
 3229                                 slpflag = 0;
 3230                                 slptimeo = 2 * hz;
 3231                             }
 3232                             BO_LOCK(bo);
 3233                         }
 3234                 }
 3235                 if (bo->bo_dirty.bv_cnt != 0 && commit) {
 3236                         BO_UNLOCK(bo);
 3237                         goto loop;
 3238                 }
 3239                 /*
 3240                  * Wait for all the async IO requests to drain
 3241                  */
 3242                 BO_UNLOCK(bo);
 3243                 mtx_lock(&np->n_mtx);
 3244                 while (np->n_directio_asyncwr > 0) {
 3245                         np->n_flag |= NFSYNCWAIT;
 3246                         error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr,
 3247                                            &np->n_mtx, slpflag | (PRIBIO + 1), 
 3248                                            "nfsfsync", 0);
 3249                         if (error) {
 3250                                 if (nfs_sigintr(nmp, td)) {
 3251                                         mtx_unlock(&np->n_mtx);
 3252                                         error = EINTR;  
 3253                                         goto done;
 3254                                 }
 3255                         }
 3256                 }
 3257                 mtx_unlock(&np->n_mtx);
 3258         } else
 3259                 BO_UNLOCK(bo);
 3260         mtx_lock(&np->n_mtx);
 3261         if (np->n_flag & NWRITEERR) {
 3262                 error = np->n_error;
 3263                 np->n_flag &= ~NWRITEERR;
 3264         }
 3265         if (commit && bo->bo_dirty.bv_cnt == 0 &&
 3266             bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
 3267                 np->n_flag &= ~NMODIFIED;
 3268         mtx_unlock(&np->n_mtx);
 3269 done:
 3270         if (bvec != NULL && bvec != bvec_on_stack)
 3271                 free(bvec, M_TEMP);
 3272         return (error);
 3273 }
 3274 
 3275 /*
 3276  * NFS advisory byte-level locks.
 3277  */
 3278 static int
 3279 nfs_advlock(struct vop_advlock_args *ap)
 3280 {
 3281         struct vnode *vp = ap->a_vp;
 3282         u_quad_t size;
 3283         int error;
 3284 
 3285         error = vn_lock(vp, LK_SHARED);
 3286         if (error)
 3287                 return (error);
 3288         if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 3289                 size = VTONFS(vp)->n_size;
 3290                 VOP_UNLOCK(vp, 0);
 3291                 error = lf_advlock(ap, &(vp->v_lockf), size);
 3292         } else {
 3293                 if (nfs_advlock_p)
 3294                         error = nfs_advlock_p(ap);
 3295                 else
 3296                         error = ENOLCK;
 3297         }
 3298 
 3299         return (error);
 3300 }
 3301 
 3302 /*
 3303  * NFS advisory byte-level locks.
 3304  */
 3305 static int
 3306 nfs_advlockasync(struct vop_advlockasync_args *ap)
 3307 {
 3308         struct vnode *vp = ap->a_vp;
 3309         u_quad_t size;
 3310         int error;
 3311         
 3312         error = vn_lock(vp, LK_SHARED);
 3313         if (error)
 3314                 return (error);
 3315         if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 3316                 size = VTONFS(vp)->n_size;
 3317                 VOP_UNLOCK(vp, 0);
 3318                 error = lf_advlockasync(ap, &(vp->v_lockf), size);
 3319         } else {
 3320                 VOP_UNLOCK(vp, 0);
 3321                 error = EOPNOTSUPP;
 3322         }
 3323         return (error);
 3324 }
 3325 
 3326 /*
 3327  * Print out the contents of an nfsnode.
 3328  */
 3329 static int
 3330 nfs_print(struct vop_print_args *ap)
 3331 {
 3332         struct vnode *vp = ap->a_vp;
 3333         struct nfsnode *np = VTONFS(vp);
 3334 
 3335         nfs_printf("\tfileid %ld fsid 0x%x",
 3336            np->n_vattr.va_fileid, np->n_vattr.va_fsid);
 3337         if (vp->v_type == VFIFO)
 3338                 fifo_printinfo(vp);
 3339         printf("\n");
 3340         return (0);
 3341 }
 3342 
 3343 /*
 3344  * This is the "real" nfs::bwrite(struct buf*).
 3345  * We set B_CACHE if this is a VMIO buffer.
 3346  */
 3347 int
 3348 nfs_writebp(struct buf *bp, int force __unused, struct thread *td)
 3349 {
 3350         int s;
 3351         int oldflags = bp->b_flags;
 3352 #if 0
 3353         int retv = 1;
 3354         off_t off;
 3355 #endif
 3356 
 3357         BUF_ASSERT_HELD(bp);
 3358 
 3359         if (bp->b_flags & B_INVAL) {
 3360                 brelse(bp);
 3361                 return(0);
 3362         }
 3363 
 3364         bp->b_flags |= B_CACHE;
 3365 
 3366         /*
 3367          * Undirty the bp.  We will redirty it later if the I/O fails.
 3368          */
 3369 
 3370         s = splbio();
 3371         bundirty(bp);
 3372         bp->b_flags &= ~B_DONE;
 3373         bp->b_ioflags &= ~BIO_ERROR;
 3374         bp->b_iocmd = BIO_WRITE;
 3375 
 3376         bufobj_wref(bp->b_bufobj);
 3377         curthread->td_ru.ru_oublock++;
 3378         splx(s);
 3379 
 3380         /*
 3381          * Note: to avoid loopback deadlocks, we do not
 3382          * assign b_runningbufspace.
 3383          */
 3384         vfs_busy_pages(bp, 1);
 3385 
 3386         BUF_KERNPROC(bp);
 3387         bp->b_iooffset = dbtob(bp->b_blkno);
 3388         bstrategy(bp);
 3389 
 3390         if( (oldflags & B_ASYNC) == 0) {
 3391                 int rtval = bufwait(bp);
 3392 
 3393                 if (oldflags & B_DELWRI) {
 3394                         s = splbio();
 3395                         reassignbuf(bp);
 3396                         splx(s);
 3397                 }
 3398                 brelse(bp);
 3399                 return (rtval);
 3400         }
 3401 
 3402         return (0);
 3403 }
 3404 
 3405 /*
 3406  * nfs special file access vnode op.
 3407  * Essentially just get vattr and then imitate iaccess() since the device is
 3408  * local to the client.
 3409  */
 3410 static int
 3411 nfsspec_access(struct vop_access_args *ap)
 3412 {
 3413         struct vattr *vap;
 3414         struct ucred *cred = ap->a_cred;
 3415         struct vnode *vp = ap->a_vp;
 3416         accmode_t accmode = ap->a_accmode;
 3417         struct vattr vattr;
 3418         int error;
 3419 
 3420         /*
 3421          * Disallow write attempts on filesystems mounted read-only;
 3422          * unless the file is a socket, fifo, or a block or character
 3423          * device resident on the filesystem.
 3424          */
 3425         if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 3426                 switch (vp->v_type) {
 3427                 case VREG:
 3428                 case VDIR:
 3429                 case VLNK:
 3430                         return (EROFS);
 3431                 default:
 3432                         break;
 3433                 }
 3434         }
 3435         vap = &vattr;
 3436         error = VOP_GETATTR(vp, vap, cred);
 3437         if (error)
 3438                 goto out;
 3439         error  = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
 3440                          accmode, cred, NULL);
 3441 out:
 3442         return error;
 3443 }
 3444 
 3445 /*
 3446  * Read wrapper for fifos.
 3447  */
 3448 static int
 3449 nfsfifo_read(struct vop_read_args *ap)
 3450 {
 3451         struct nfsnode *np = VTONFS(ap->a_vp);
 3452         int error;
 3453 
 3454         /*
 3455          * Set access flag.
 3456          */
 3457         mtx_lock(&np->n_mtx);
 3458         np->n_flag |= NACC;
 3459         getnanotime(&np->n_atim);
 3460         mtx_unlock(&np->n_mtx);
 3461         error = fifo_specops.vop_read(ap);
 3462         return error;   
 3463 }
 3464 
 3465 /*
 3466  * Write wrapper for fifos.
 3467  */
 3468 static int
 3469 nfsfifo_write(struct vop_write_args *ap)
 3470 {
 3471         struct nfsnode *np = VTONFS(ap->a_vp);
 3472 
 3473         /*
 3474          * Set update flag.
 3475          */
 3476         mtx_lock(&np->n_mtx);
 3477         np->n_flag |= NUPD;
 3478         getnanotime(&np->n_mtim);
 3479         mtx_unlock(&np->n_mtx);
 3480         return(fifo_specops.vop_write(ap));
 3481 }
 3482 
 3483 /*
 3484  * Close wrapper for fifos.
 3485  *
 3486  * Update the times on the nfsnode then do fifo close.
 3487  */
 3488 static int
 3489 nfsfifo_close(struct vop_close_args *ap)
 3490 {
 3491         struct vnode *vp = ap->a_vp;
 3492         struct nfsnode *np = VTONFS(vp);
 3493         struct vattr vattr;
 3494         struct timespec ts;
 3495 
 3496         mtx_lock(&np->n_mtx);
 3497         if (np->n_flag & (NACC | NUPD)) {
 3498                 getnanotime(&ts);
 3499                 if (np->n_flag & NACC)
 3500                         np->n_atim = ts;
 3501                 if (np->n_flag & NUPD)
 3502                         np->n_mtim = ts;
 3503                 np->n_flag |= NCHG;
 3504                 if (vrefcnt(vp) == 1 &&
 3505                     (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 3506                         VATTR_NULL(&vattr);
 3507                         if (np->n_flag & NACC)
 3508                                 vattr.va_atime = np->n_atim;
 3509                         if (np->n_flag & NUPD)
 3510                                 vattr.va_mtime = np->n_mtim;
 3511                         mtx_unlock(&np->n_mtx);
 3512                         (void)VOP_SETATTR(vp, &vattr, ap->a_cred);
 3513                         goto out;
 3514                 }
 3515         }
 3516         mtx_unlock(&np->n_mtx);
 3517 out:
 3518         return (fifo_specops.vop_close(ap));
 3519 }
 3520 
 3521 /*
 3522  * Just call nfs_writebp() with the force argument set to 1.
 3523  *
 3524  * NOTE: B_DONE may or may not be set in a_bp on call.
 3525  */
 3526 static int
 3527 nfs_bwrite(struct buf *bp)
 3528 {
 3529 
 3530         return (nfs_writebp(bp, 1, curthread));
 3531 }
 3532 
 3533 struct buf_ops buf_ops_nfs = {
 3534         .bop_name       =       "buf_ops_nfs",
 3535         .bop_write      =       nfs_bwrite,
 3536         .bop_strategy   =       bufstrategy,
 3537         .bop_sync       =       bufsync,
 3538         .bop_bdflush    =       bufbdflush,
 3539 };
Cache object: 00fe2c98f5a62e7e3b3fb60173fdfe86
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/nfsclient/nfs_vnops.c

FreeBSD/Linux Kernel Cross Reference
sys/nfsclient/nfs_vnops.c