The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/nfsclient/nfs_clvnops.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      from nfs_vnops.c        8.16 (Berkeley) 5/27/95
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/11.1/sys/fs/nfsclient/nfs_clvnops.c 317577 2017-04-29 00:46:51Z rmacklem $");
   37 
   38 /*
   39  * vnode op calls for Sun NFS version 2, 3 and 4
   40  */
   41 
   42 #include "opt_inet.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/kernel.h>
   46 #include <sys/systm.h>
   47 #include <sys/resourcevar.h>
   48 #include <sys/proc.h>
   49 #include <sys/mount.h>
   50 #include <sys/bio.h>
   51 #include <sys/buf.h>
   52 #include <sys/jail.h>
   53 #include <sys/malloc.h>
   54 #include <sys/mbuf.h>
   55 #include <sys/namei.h>
   56 #include <sys/socket.h>
   57 #include <sys/vnode.h>
   58 #include <sys/dirent.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/lockf.h>
   61 #include <sys/stat.h>
   62 #include <sys/sysctl.h>
   63 #include <sys/signalvar.h>
   64 
   65 #include <vm/vm.h>
   66 #include <vm/vm_extern.h>
   67 #include <vm/vm_object.h>
   68 
   69 #include <fs/nfs/nfsport.h>
   70 #include <fs/nfsclient/nfsnode.h>
   71 #include <fs/nfsclient/nfsmount.h>
   72 #include <fs/nfsclient/nfs.h>
   73 #include <fs/nfsclient/nfs_kdtrace.h>
   74 
   75 #include <net/if.h>
   76 #include <netinet/in.h>
   77 #include <netinet/in_var.h>
   78 
   79 #include <nfs/nfs_lock.h>
   80 
   81 #ifdef KDTRACE_HOOKS
   82 #include <sys/dtrace_bsd.h>
   83 
   84 dtrace_nfsclient_accesscache_flush_probe_func_t
   85                 dtrace_nfscl_accesscache_flush_done_probe;
   86 uint32_t        nfscl_accesscache_flush_done_id;
   87 
   88 dtrace_nfsclient_accesscache_get_probe_func_t
   89                 dtrace_nfscl_accesscache_get_hit_probe,
   90                 dtrace_nfscl_accesscache_get_miss_probe;
   91 uint32_t        nfscl_accesscache_get_hit_id;
   92 uint32_t        nfscl_accesscache_get_miss_id;
   93 
   94 dtrace_nfsclient_accesscache_load_probe_func_t
   95                 dtrace_nfscl_accesscache_load_done_probe;
   96 uint32_t        nfscl_accesscache_load_done_id;
   97 #endif /* !KDTRACE_HOOKS */
   98 
   99 /* Defs */
  100 #define TRUE    1
  101 #define FALSE   0
  102 
  103 extern struct nfsstatsv1 nfsstatsv1;
  104 extern int nfsrv_useacl;
  105 extern int nfscl_debuglevel;
  106 MALLOC_DECLARE(M_NEWNFSREQ);
  107 
  108 static vop_read_t       nfsfifo_read;
  109 static vop_write_t      nfsfifo_write;
  110 static vop_close_t      nfsfifo_close;
  111 static int      nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
  112                     struct thread *);
  113 static vop_lookup_t     nfs_lookup;
  114 static vop_create_t     nfs_create;
  115 static vop_mknod_t      nfs_mknod;
  116 static vop_open_t       nfs_open;
  117 static vop_pathconf_t   nfs_pathconf;
  118 static vop_close_t      nfs_close;
  119 static vop_access_t     nfs_access;
  120 static vop_getattr_t    nfs_getattr;
  121 static vop_setattr_t    nfs_setattr;
  122 static vop_read_t       nfs_read;
  123 static vop_fsync_t      nfs_fsync;
  124 static vop_remove_t     nfs_remove;
  125 static vop_link_t       nfs_link;
  126 static vop_rename_t     nfs_rename;
  127 static vop_mkdir_t      nfs_mkdir;
  128 static vop_rmdir_t      nfs_rmdir;
  129 static vop_symlink_t    nfs_symlink;
  130 static vop_readdir_t    nfs_readdir;
  131 static vop_strategy_t   nfs_strategy;
  132 static  int     nfs_lookitup(struct vnode *, char *, int,
  133                     struct ucred *, struct thread *, struct nfsnode **);
  134 static  int     nfs_sillyrename(struct vnode *, struct vnode *,
  135                     struct componentname *);
  136 static vop_access_t     nfsspec_access;
  137 static vop_readlink_t   nfs_readlink;
  138 static vop_print_t      nfs_print;
  139 static vop_advlock_t    nfs_advlock;
  140 static vop_advlockasync_t nfs_advlockasync;
  141 static vop_getacl_t nfs_getacl;
  142 static vop_setacl_t nfs_setacl;
  143 static vop_set_text_t nfs_set_text;
  144 
  145 /*
  146  * Global vfs data structures for nfs
  147  */
  148 struct vop_vector newnfs_vnodeops = {
  149         .vop_default =          &default_vnodeops,
  150         .vop_access =           nfs_access,
  151         .vop_advlock =          nfs_advlock,
  152         .vop_advlockasync =     nfs_advlockasync,
  153         .vop_close =            nfs_close,
  154         .vop_create =           nfs_create,
  155         .vop_fsync =            nfs_fsync,
  156         .vop_getattr =          nfs_getattr,
  157         .vop_getpages =         ncl_getpages,
  158         .vop_putpages =         ncl_putpages,
  159         .vop_inactive =         ncl_inactive,
  160         .vop_link =             nfs_link,
  161         .vop_lookup =           nfs_lookup,
  162         .vop_mkdir =            nfs_mkdir,
  163         .vop_mknod =            nfs_mknod,
  164         .vop_open =             nfs_open,
  165         .vop_pathconf =         nfs_pathconf,
  166         .vop_print =            nfs_print,
  167         .vop_read =             nfs_read,
  168         .vop_readdir =          nfs_readdir,
  169         .vop_readlink =         nfs_readlink,
  170         .vop_reclaim =          ncl_reclaim,
  171         .vop_remove =           nfs_remove,
  172         .vop_rename =           nfs_rename,
  173         .vop_rmdir =            nfs_rmdir,
  174         .vop_setattr =          nfs_setattr,
  175         .vop_strategy =         nfs_strategy,
  176         .vop_symlink =          nfs_symlink,
  177         .vop_write =            ncl_write,
  178         .vop_getacl =           nfs_getacl,
  179         .vop_setacl =           nfs_setacl,
  180         .vop_set_text =         nfs_set_text,
  181 };
  182 
  183 struct vop_vector newnfs_fifoops = {
  184         .vop_default =          &fifo_specops,
  185         .vop_access =           nfsspec_access,
  186         .vop_close =            nfsfifo_close,
  187         .vop_fsync =            nfs_fsync,
  188         .vop_getattr =          nfs_getattr,
  189         .vop_inactive =         ncl_inactive,
  190         .vop_print =            nfs_print,
  191         .vop_read =             nfsfifo_read,
  192         .vop_reclaim =          ncl_reclaim,
  193         .vop_setattr =          nfs_setattr,
  194         .vop_write =            nfsfifo_write,
  195 };
  196 
  197 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
  198     struct componentname *cnp, struct vattr *vap);
  199 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
  200     int namelen, struct ucred *cred, struct thread *td);
  201 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp,
  202     char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp,
  203     char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td);
  204 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp,
  205     struct componentname *scnp, struct sillyrename *sp);
  206 
  207 /*
  208  * Global variables
  209  */
  210 #define DIRHDSIZ        (sizeof (struct dirent) - (MAXNAMLEN + 1))
  211 
  212 SYSCTL_DECL(_vfs_nfs);
  213 
  214 static int      nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
  215 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
  216            &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
  217 
  218 static int      nfs_prime_access_cache = 0;
  219 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
  220            &nfs_prime_access_cache, 0,
  221            "Prime NFS ACCESS cache when fetching attributes");
  222 
  223 static int      newnfs_commit_on_close = 0;
  224 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW,
  225     &newnfs_commit_on_close, 0, "write+commit on close, else only write");
  226 
  227 static int      nfs_clean_pages_on_close = 1;
  228 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
  229            &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
  230 
  231 int newnfs_directio_enable = 0;
  232 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
  233            &newnfs_directio_enable, 0, "Enable NFS directio");
  234 
  235 int nfs_keep_dirty_on_error;
  236 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW,
  237     &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned");
  238 
  239 /*
  240  * This sysctl allows other processes to mmap a file that has been opened
  241  * O_DIRECT by a process.  In general, having processes mmap the file while
  242  * Direct IO is in progress can lead to Data Inconsistencies.  But, we allow
  243  * this by default to prevent DoS attacks - to prevent a malicious user from
  244  * opening up files O_DIRECT preventing other users from mmap'ing these
  245  * files.  "Protected" environments where stricter consistency guarantees are
  246  * required can disable this knob.  The process that opened the file O_DIRECT
  247  * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
  248  * meaningful.
  249  */
  250 int newnfs_directio_allow_mmap = 1;
  251 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
  252            &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
  253 
  254 #define NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY                \
  255                          | NFSACCESS_EXTEND | NFSACCESS_EXECUTE \
  256                          | NFSACCESS_DELETE | NFSACCESS_LOOKUP)
  257 
  258 /*
  259  * SMP Locking Note :
  260  * The list of locks after the description of the lock is the ordering
  261  * of other locks acquired with the lock held.
  262  * np->n_mtx : Protects the fields in the nfsnode.
  263        VM Object Lock
  264        VI_MTX (acquired indirectly)
  265  * nmp->nm_mtx : Protects the fields in the nfsmount.
  266        rep->r_mtx
  267  * ncl_iod_mutex : Global lock, protects shared nfsiod state.
  268  * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
  269        nmp->nm_mtx
  270        rep->r_mtx
  271  * rep->r_mtx : Protects the fields in an nfsreq.
  272  */
  273 
  274 static int
  275 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td,
  276     struct ucred *cred, u_int32_t *retmode)
  277 {
  278         int error = 0, attrflag, i, lrupos;
  279         u_int32_t rmode;
  280         struct nfsnode *np = VTONFS(vp);
  281         struct nfsvattr nfsva;
  282 
  283         error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag,
  284             &rmode, NULL);
  285         if (attrflag)
  286                 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
  287         if (!error) {
  288                 lrupos = 0;
  289                 mtx_lock(&np->n_mtx);
  290                 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
  291                         if (np->n_accesscache[i].uid == cred->cr_uid) {
  292                                 np->n_accesscache[i].mode = rmode;
  293                                 np->n_accesscache[i].stamp = time_second;
  294                                 break;
  295                         }
  296                         if (i > 0 && np->n_accesscache[i].stamp <
  297                             np->n_accesscache[lrupos].stamp)
  298                                 lrupos = i;
  299                 }
  300                 if (i == NFS_ACCESSCACHESIZE) {
  301                         np->n_accesscache[lrupos].uid = cred->cr_uid;
  302                         np->n_accesscache[lrupos].mode = rmode;
  303                         np->n_accesscache[lrupos].stamp = time_second;
  304                 }
  305                 mtx_unlock(&np->n_mtx);
  306                 if (retmode != NULL)
  307                         *retmode = rmode;
  308                 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0);
  309         } else if (NFS_ISV4(vp)) {
  310                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
  311         }
  312 #ifdef KDTRACE_HOOKS
  313         if (error != 0)
  314                 KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0,
  315                     error);
  316 #endif
  317         return (error);
  318 }
  319 
  320 /*
  321  * nfs access vnode op.
  322  * For nfs version 2, just return ok. File accesses may fail later.
  323  * For nfs version 3, use the access rpc to check accessibility. If file modes
  324  * are changed on the server, accesses might still fail later.
  325  */
  326 static int
  327 nfs_access(struct vop_access_args *ap)
  328 {
  329         struct vnode *vp = ap->a_vp;
  330         int error = 0, i, gotahit;
  331         u_int32_t mode, wmode, rmode;
  332         int v34 = NFS_ISV34(vp);
  333         struct nfsnode *np = VTONFS(vp);
  334 
  335         /*
  336          * Disallow write attempts on filesystems mounted read-only;
  337          * unless the file is a socket, fifo, or a block or character
  338          * device resident on the filesystem.
  339          */
  340         if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS |
  341             VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL |
  342             VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
  343                 switch (vp->v_type) {
  344                 case VREG:
  345                 case VDIR:
  346                 case VLNK:
  347                         return (EROFS);
  348                 default:
  349                         break;
  350                 }
  351         }
  352         /*
  353          * For nfs v3 or v4, check to see if we have done this recently, and if
  354          * so return our cached result instead of making an ACCESS call.
  355          * If not, do an access rpc, otherwise you are stuck emulating
  356          * ufs_access() locally using the vattr. This may not be correct,
  357          * since the server may apply other access criteria such as
  358          * client uid-->server uid mapping that we do not know about.
  359          */
  360         if (v34) {
  361                 if (ap->a_accmode & VREAD)
  362                         mode = NFSACCESS_READ;
  363                 else
  364                         mode = 0;
  365                 if (vp->v_type != VDIR) {
  366                         if (ap->a_accmode & VWRITE)
  367                                 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
  368                         if (ap->a_accmode & VAPPEND)
  369                                 mode |= NFSACCESS_EXTEND;
  370                         if (ap->a_accmode & VEXEC)
  371                                 mode |= NFSACCESS_EXECUTE;
  372                         if (ap->a_accmode & VDELETE)
  373                                 mode |= NFSACCESS_DELETE;
  374                 } else {
  375                         if (ap->a_accmode & VWRITE)
  376                                 mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
  377                         if (ap->a_accmode & VAPPEND)
  378                                 mode |= NFSACCESS_EXTEND;
  379                         if (ap->a_accmode & VEXEC)
  380                                 mode |= NFSACCESS_LOOKUP;
  381                         if (ap->a_accmode & VDELETE)
  382                                 mode |= NFSACCESS_DELETE;
  383                         if (ap->a_accmode & VDELETE_CHILD)
  384                                 mode |= NFSACCESS_MODIFY;
  385                 }
  386                 /* XXX safety belt, only make blanket request if caching */
  387                 if (nfsaccess_cache_timeout > 0) {
  388                         wmode = NFSACCESS_READ | NFSACCESS_MODIFY |
  389                                 NFSACCESS_EXTEND | NFSACCESS_EXECUTE |
  390                                 NFSACCESS_DELETE | NFSACCESS_LOOKUP;
  391                 } else {
  392                         wmode = mode;
  393                 }
  394 
  395                 /*
  396                  * Does our cached result allow us to give a definite yes to
  397                  * this request?
  398                  */
  399                 gotahit = 0;
  400                 mtx_lock(&np->n_mtx);
  401                 for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
  402                         if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) {
  403                             if (time_second < (np->n_accesscache[i].stamp
  404                                 + nfsaccess_cache_timeout) &&
  405                                 (np->n_accesscache[i].mode & mode) == mode) {
  406                                 NFSINCRGLOBAL(nfsstatsv1.accesscache_hits);
  407                                 gotahit = 1;
  408                             }
  409                             break;
  410                         }
  411                 }
  412                 mtx_unlock(&np->n_mtx);
  413 #ifdef KDTRACE_HOOKS
  414                 if (gotahit != 0)
  415                         KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp,
  416                             ap->a_cred->cr_uid, mode);
  417                 else
  418                         KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp,
  419                             ap->a_cred->cr_uid, mode);
  420 #endif
  421                 if (gotahit == 0) {
  422                         /*
  423                          * Either a no, or a don't know.  Go to the wire.
  424                          */
  425                         NFSINCRGLOBAL(nfsstatsv1.accesscache_misses);
  426                         error = nfs34_access_otw(vp, wmode, ap->a_td,
  427                             ap->a_cred, &rmode);
  428                         if (!error &&
  429                             (rmode & mode) != mode)
  430                                 error = EACCES;
  431                 }
  432                 return (error);
  433         } else {
  434                 if ((error = nfsspec_access(ap)) != 0) {
  435                         return (error);
  436                 }
  437                 /*
  438                  * Attempt to prevent a mapped root from accessing a file
  439                  * which it shouldn't.  We try to read a byte from the file
  440                  * if the user is root and the file is not zero length.
  441                  * After calling nfsspec_access, we should have the correct
  442                  * file size cached.
  443                  */
  444                 mtx_lock(&np->n_mtx);
  445                 if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD)
  446                     && VTONFS(vp)->n_size > 0) {
  447                         struct iovec aiov;
  448                         struct uio auio;
  449                         char buf[1];
  450 
  451                         mtx_unlock(&np->n_mtx);
  452                         aiov.iov_base = buf;
  453                         aiov.iov_len = 1;
  454                         auio.uio_iov = &aiov;
  455                         auio.uio_iovcnt = 1;
  456                         auio.uio_offset = 0;
  457                         auio.uio_resid = 1;
  458                         auio.uio_segflg = UIO_SYSSPACE;
  459                         auio.uio_rw = UIO_READ;
  460                         auio.uio_td = ap->a_td;
  461 
  462                         if (vp->v_type == VREG)
  463                                 error = ncl_readrpc(vp, &auio, ap->a_cred);
  464                         else if (vp->v_type == VDIR) {
  465                                 char* bp;
  466                                 bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
  467                                 aiov.iov_base = bp;
  468                                 aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
  469                                 error = ncl_readdirrpc(vp, &auio, ap->a_cred,
  470                                     ap->a_td);
  471                                 free(bp, M_TEMP);
  472                         } else if (vp->v_type == VLNK)
  473                                 error = ncl_readlinkrpc(vp, &auio, ap->a_cred);
  474                         else
  475                                 error = EACCES;
  476                 } else
  477                         mtx_unlock(&np->n_mtx);
  478                 return (error);
  479         }
  480 }
  481 
  482 
  483 /*
  484  * nfs open vnode op
  485  * Check to see if the type is ok
  486  * and that deletion is not in progress.
  487  * For paged in text files, you will need to flush the page cache
  488  * if consistency is lost.
  489  */
  490 /* ARGSUSED */
  491 static int
  492 nfs_open(struct vop_open_args *ap)
  493 {
  494         struct vnode *vp = ap->a_vp;
  495         struct nfsnode *np = VTONFS(vp);
  496         struct vattr vattr;
  497         int error;
  498         int fmode = ap->a_mode;
  499         struct ucred *cred;
  500 
  501         if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
  502                 return (EOPNOTSUPP);
  503 
  504         /*
  505          * For NFSv4, we need to do the Open Op before cache validation,
  506          * so that we conform to RFC3530 Sec. 9.3.1.
  507          */
  508         if (NFS_ISV4(vp)) {
  509                 error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td);
  510                 if (error) {
  511                         error = nfscl_maperr(ap->a_td, error, (uid_t)0,
  512                             (gid_t)0);
  513                         return (error);
  514                 }
  515         }
  516 
  517         /*
  518          * Now, if this Open will be doing reading, re-validate/flush the
  519          * cache, so that Close/Open coherency is maintained.
  520          */
  521         mtx_lock(&np->n_mtx);
  522         if (np->n_flag & NMODIFIED) {
  523                 mtx_unlock(&np->n_mtx);
  524                 error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  525                 if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
  526                         return (EBADF);
  527                 if (error == EINTR || error == EIO) {
  528                         if (NFS_ISV4(vp))
  529                                 (void) nfsrpc_close(vp, 0, ap->a_td);
  530                         return (error);
  531                 }
  532                 mtx_lock(&np->n_mtx);
  533                 np->n_attrstamp = 0;
  534                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
  535                 if (vp->v_type == VDIR)
  536                         np->n_direofoffset = 0;
  537                 mtx_unlock(&np->n_mtx);
  538                 error = VOP_GETATTR(vp, &vattr, ap->a_cred);
  539                 if (error) {
  540                         if (NFS_ISV4(vp))
  541                                 (void) nfsrpc_close(vp, 0, ap->a_td);
  542                         return (error);
  543                 }
  544                 mtx_lock(&np->n_mtx);
  545                 np->n_mtime = vattr.va_mtime;
  546                 if (NFS_ISV4(vp))
  547                         np->n_change = vattr.va_filerev;
  548         } else {
  549                 mtx_unlock(&np->n_mtx);
  550                 error = VOP_GETATTR(vp, &vattr, ap->a_cred);
  551                 if (error) {
  552                         if (NFS_ISV4(vp))
  553                                 (void) nfsrpc_close(vp, 0, ap->a_td);
  554                         return (error);
  555                 }
  556                 mtx_lock(&np->n_mtx);
  557                 if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) ||
  558                     NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
  559                         if (vp->v_type == VDIR)
  560                                 np->n_direofoffset = 0;
  561                         mtx_unlock(&np->n_mtx);
  562                         error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  563                         if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
  564                                 return (EBADF);
  565                         if (error == EINTR || error == EIO) {
  566                                 if (NFS_ISV4(vp))
  567                                         (void) nfsrpc_close(vp, 0, ap->a_td);
  568                                 return (error);
  569                         }
  570                         mtx_lock(&np->n_mtx);
  571                         np->n_mtime = vattr.va_mtime;
  572                         if (NFS_ISV4(vp))
  573                                 np->n_change = vattr.va_filerev;
  574                 }
  575         }
  576 
  577         /*
  578          * If the object has >= 1 O_DIRECT active opens, we disable caching.
  579          */
  580         if (newnfs_directio_enable && (fmode & O_DIRECT) &&
  581             (vp->v_type == VREG)) {
  582                 if (np->n_directio_opens == 0) {
  583                         mtx_unlock(&np->n_mtx);
  584                         error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  585                         if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
  586                                 return (EBADF);
  587                         if (error) {
  588                                 if (NFS_ISV4(vp))
  589                                         (void) nfsrpc_close(vp, 0, ap->a_td);
  590                                 return (error);
  591                         }
  592                         mtx_lock(&np->n_mtx);
  593                         np->n_flag |= NNONCACHE;
  594                 }
  595                 np->n_directio_opens++;
  596         }
  597 
  598         /* If opened for writing via NFSv4.1 or later, mark that for pNFS. */
  599         if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0)
  600                 np->n_flag |= NWRITEOPENED;
  601 
  602         /*
  603          * If this is an open for writing, capture a reference to the
  604          * credentials, so they can be used by ncl_putpages(). Using
  605          * these write credentials is preferable to the credentials of
  606          * whatever thread happens to be doing the VOP_PUTPAGES() since
  607          * the write RPCs are less likely to fail with EACCES.
  608          */
  609         if ((fmode & FWRITE) != 0) {
  610                 cred = np->n_writecred;
  611                 np->n_writecred = crhold(ap->a_cred);
  612         } else
  613                 cred = NULL;
  614         mtx_unlock(&np->n_mtx);
  615 
  616         if (cred != NULL)
  617                 crfree(cred);
  618         vnode_create_vobject(vp, vattr.va_size, ap->a_td);
  619         return (0);
  620 }
  621 
  622 /*
  623  * nfs close vnode op
  624  * What an NFS client should do upon close after writing is a debatable issue.
  625  * Most NFS clients push delayed writes to the server upon close, basically for
  626  * two reasons:
  627  * 1 - So that any write errors may be reported back to the client process
  628  *     doing the close system call. By far the two most likely errors are
  629  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  630  * 2 - To put a worst case upper bound on cache inconsistency between
  631  *     multiple clients for the file.
  632  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  633  * not being able to tell if other clients are writing a file concurrently,
  634  * since there is no way of knowing if the changed modify time in the reply
  635  * is only due to the write for this client.
  636  * (NFS Version 3 provides weak cache consistency data in the reply that
  637  *  should be sufficient to detect and handle this case.)
  638  *
  639  * The current code does the following:
  640  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  641  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  642  *                     or commit them (this satisfies 1 and 2 except for the
  643  *                     case where the server crashes after this close but
  644  *                     before the commit RPC, which is felt to be "good
  645  *                     enough". Changing the last argument to ncl_flush() to
  646  *                     a 1 would force a commit operation, if it is felt a
  647  *                     commit is necessary now.
  648  * for NFS Version 4 - flush the dirty buffers and commit them, if
  649  *                     nfscl_mustflush() says this is necessary.
  650  *                     It is necessary if there is no write delegation held,
  651  *                     in order to satisfy open/close coherency.
  652  *                     If the file isn't cached on local stable storage,
  653  *                     it may be necessary in order to detect "out of space"
  654  *                     errors from the server, if the write delegation
  655  *                     issued by the server doesn't allow the file to grow.
  656  */
  657 /* ARGSUSED */
  658 static int
  659 nfs_close(struct vop_close_args *ap)
  660 {
  661         struct vnode *vp = ap->a_vp;
  662         struct nfsnode *np = VTONFS(vp);
  663         struct nfsvattr nfsva;
  664         struct ucred *cred;
  665         int error = 0, ret, localcred = 0;
  666         int fmode = ap->a_fflag;
  667 
  668         if ((vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF))
  669                 return (0);
  670         /*
  671          * During shutdown, a_cred isn't valid, so just use root.
  672          */
  673         if (ap->a_cred == NOCRED) {
  674                 cred = newnfs_getcred();
  675                 localcred = 1;
  676         } else {
  677                 cred = ap->a_cred;
  678         }
  679         if (vp->v_type == VREG) {
  680             /*
  681              * Examine and clean dirty pages, regardless of NMODIFIED.
  682              * This closes a major hole in close-to-open consistency.
  683              * We want to push out all dirty pages (and buffers) on
  684              * close, regardless of whether they were dirtied by
  685              * mmap'ed writes or via write().
  686              */
  687             if (nfs_clean_pages_on_close && vp->v_object) {
  688                 VM_OBJECT_WLOCK(vp->v_object);
  689                 vm_object_page_clean(vp->v_object, 0, 0, 0);
  690                 VM_OBJECT_WUNLOCK(vp->v_object);
  691             }
  692             mtx_lock(&np->n_mtx);
  693             if (np->n_flag & NMODIFIED) {
  694                 mtx_unlock(&np->n_mtx);
  695                 if (NFS_ISV3(vp)) {
  696                     /*
  697                      * Under NFSv3 we have dirty buffers to dispose of.  We
  698                      * must flush them to the NFS server.  We have the option
  699                      * of waiting all the way through the commit rpc or just
  700                      * waiting for the initial write.  The default is to only
  701                      * wait through the initial write so the data is in the
  702                      * server's cache, which is roughly similar to the state
  703                      * a standard disk subsystem leaves the file in on close().
  704                      *
  705                      * We cannot clear the NMODIFIED bit in np->n_flag due to
  706                      * potential races with other processes, and certainly
  707                      * cannot clear it if we don't commit.
  708                      * These races occur when there is no longer the old
  709                      * traditional vnode locking implemented for Vnode Ops.
  710                      */
  711                     int cm = newnfs_commit_on_close ? 1 : 0;
  712                     error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0);
  713                     /* np->n_flag &= ~NMODIFIED; */
  714                 } else if (NFS_ISV4(vp)) { 
  715                         if (nfscl_mustflush(vp) != 0) {
  716                                 int cm = newnfs_commit_on_close ? 1 : 0;
  717                                 error = ncl_flush(vp, MNT_WAIT, ap->a_td,
  718                                     cm, 0);
  719                                 /*
  720                                  * as above w.r.t races when clearing
  721                                  * NMODIFIED.
  722                                  * np->n_flag &= ~NMODIFIED;
  723                                  */
  724                         }
  725                 } else {
  726                         error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
  727                         if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
  728                                 return (EBADF);
  729                 }
  730                 mtx_lock(&np->n_mtx);
  731             }
  732             /* 
  733              * Invalidate the attribute cache in all cases.
  734              * An open is going to fetch fresh attrs any way, other procs
  735              * on this node that have file open will be forced to do an 
  736              * otw attr fetch, but this is safe.
  737              * --> A user found that their RPC count dropped by 20% when
  738              *     this was commented out and I can't see any requirement
  739              *     for it, so I've disabled it when negative lookups are
  740              *     enabled. (What does this have to do with negative lookup
  741              *     caching? Well nothing, except it was reported by the
  742              *     same user that needed negative lookup caching and I wanted
  743              *     there to be a way to disable it to see if it
  744              *     is the cause of some caching/coherency issue that might
  745              *     crop up.)
  746              */
  747             if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) {
  748                     np->n_attrstamp = 0;
  749                     KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
  750             }
  751             if (np->n_flag & NWRITEERR) {
  752                 np->n_flag &= ~NWRITEERR;
  753                 error = np->n_error;
  754             }
  755             mtx_unlock(&np->n_mtx);
  756         }
  757 
  758         if (NFS_ISV4(vp)) {
  759                 /*
  760                  * Get attributes so "change" is up to date.
  761                  */
  762                 if (error == 0 && nfscl_mustflush(vp) != 0 &&
  763                     vp->v_type == VREG &&
  764                     (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) {
  765                         ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva,
  766                             NULL);
  767                         if (!ret) {
  768                                 np->n_change = nfsva.na_filerev;
  769                                 (void) nfscl_loadattrcache(&vp, &nfsva, NULL,
  770                                     NULL, 0, 0);
  771                         }
  772                 }
  773 
  774                 /*
  775                  * and do the close.
  776                  */
  777                 ret = nfsrpc_close(vp, 0, ap->a_td);
  778                 if (!error && ret)
  779                         error = ret;
  780                 if (error)
  781                         error = nfscl_maperr(ap->a_td, error, (uid_t)0,
  782                             (gid_t)0);
  783         }
  784         if (newnfs_directio_enable)
  785                 KASSERT((np->n_directio_asyncwr == 0),
  786                         ("nfs_close: dirty unflushed (%d) directio buffers\n",
  787                          np->n_directio_asyncwr));
  788         if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
  789                 mtx_lock(&np->n_mtx);
  790                 KASSERT((np->n_directio_opens > 0), 
  791                         ("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
  792                 np->n_directio_opens--;
  793                 if (np->n_directio_opens == 0)
  794                         np->n_flag &= ~NNONCACHE;
  795                 mtx_unlock(&np->n_mtx);
  796         }
  797         if (localcred)
  798                 NFSFREECRED(cred);
  799         return (error);
  800 }
  801 
  802 /*
  803  * nfs getattr call from vfs.
  804  */
  805 static int
  806 nfs_getattr(struct vop_getattr_args *ap)
  807 {
  808         struct vnode *vp = ap->a_vp;
  809         struct thread *td = curthread;  /* XXX */
  810         struct nfsnode *np = VTONFS(vp);
  811         int error = 0;
  812         struct nfsvattr nfsva;
  813         struct vattr *vap = ap->a_vap;
  814         struct vattr vattr;
  815 
  816         /*
  817          * Update local times for special files.
  818          */
  819         mtx_lock(&np->n_mtx);
  820         if (np->n_flag & (NACC | NUPD))
  821                 np->n_flag |= NCHG;
  822         mtx_unlock(&np->n_mtx);
  823         /*
  824          * First look in the cache.
  825          */
  826         if (ncl_getattrcache(vp, &vattr) == 0) {
  827                 vap->va_type = vattr.va_type;
  828                 vap->va_mode = vattr.va_mode;
  829                 vap->va_nlink = vattr.va_nlink;
  830                 vap->va_uid = vattr.va_uid;
  831                 vap->va_gid = vattr.va_gid;
  832                 vap->va_fsid = vattr.va_fsid;
  833                 vap->va_fileid = vattr.va_fileid;
  834                 vap->va_size = vattr.va_size;
  835                 vap->va_blocksize = vattr.va_blocksize;
  836                 vap->va_atime = vattr.va_atime;
  837                 vap->va_mtime = vattr.va_mtime;
  838                 vap->va_ctime = vattr.va_ctime;
  839                 vap->va_gen = vattr.va_gen;
  840                 vap->va_flags = vattr.va_flags;
  841                 vap->va_rdev = vattr.va_rdev;
  842                 vap->va_bytes = vattr.va_bytes;
  843                 vap->va_filerev = vattr.va_filerev;
  844                 /*
  845                  * Get the local modify time for the case of a write
  846                  * delegation.
  847                  */
  848                 nfscl_deleggetmodtime(vp, &vap->va_mtime);
  849                 return (0);
  850         }
  851 
  852         if (NFS_ISV34(vp) && nfs_prime_access_cache &&
  853             nfsaccess_cache_timeout > 0) {
  854                 NFSINCRGLOBAL(nfsstatsv1.accesscache_misses);
  855                 nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL);
  856                 if (ncl_getattrcache(vp, ap->a_vap) == 0) {
  857                         nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime);
  858                         return (0);
  859                 }
  860         }
  861         error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL);
  862         if (!error)
  863                 error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0);
  864         if (!error) {
  865                 /*
  866                  * Get the local modify time for the case of a write
  867                  * delegation.
  868                  */
  869                 nfscl_deleggetmodtime(vp, &vap->va_mtime);
  870         } else if (NFS_ISV4(vp)) {
  871                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
  872         }
  873         return (error);
  874 }
  875 
  876 /*
  877  * nfs setattr call.
  878  */
  879 static int
  880 nfs_setattr(struct vop_setattr_args *ap)
  881 {
  882         struct vnode *vp = ap->a_vp;
  883         struct nfsnode *np = VTONFS(vp);
  884         struct thread *td = curthread;  /* XXX */
  885         struct vattr *vap = ap->a_vap;
  886         int error = 0;
  887         u_quad_t tsize;
  888 
  889 #ifndef nolint
  890         tsize = (u_quad_t)0;
  891 #endif
  892 
  893         /*
  894          * Setting of flags and marking of atimes are not supported.
  895          */
  896         if (vap->va_flags != VNOVAL)
  897                 return (EOPNOTSUPP);
  898 
  899         /*
  900          * Disallow write attempts if the filesystem is mounted read-only.
  901          */
  902         if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
  903             vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
  904             vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
  905             (vp->v_mount->mnt_flag & MNT_RDONLY))
  906                 return (EROFS);
  907         if (vap->va_size != VNOVAL) {
  908                 switch (vp->v_type) {
  909                 case VDIR:
  910                         return (EISDIR);
  911                 case VCHR:
  912                 case VBLK:
  913                 case VSOCK:
  914                 case VFIFO:
  915                         if (vap->va_mtime.tv_sec == VNOVAL &&
  916                             vap->va_atime.tv_sec == VNOVAL &&
  917                             vap->va_mode == (mode_t)VNOVAL &&
  918                             vap->va_uid == (uid_t)VNOVAL &&
  919                             vap->va_gid == (gid_t)VNOVAL)
  920                                 return (0);             
  921                         vap->va_size = VNOVAL;
  922                         break;
  923                 default:
  924                         /*
  925                          * Disallow write attempts if the filesystem is
  926                          * mounted read-only.
  927                          */
  928                         if (vp->v_mount->mnt_flag & MNT_RDONLY)
  929                                 return (EROFS);
  930                         /*
  931                          *  We run vnode_pager_setsize() early (why?),
  932                          * we must set np->n_size now to avoid vinvalbuf
  933                          * V_SAVE races that might setsize a lower
  934                          * value.
  935                          */
  936                         mtx_lock(&np->n_mtx);
  937                         tsize = np->n_size;
  938                         mtx_unlock(&np->n_mtx);
  939                         error = ncl_meta_setsize(vp, ap->a_cred, td,
  940                             vap->va_size);
  941                         mtx_lock(&np->n_mtx);
  942                         if (np->n_flag & NMODIFIED) {
  943                             tsize = np->n_size;
  944                             mtx_unlock(&np->n_mtx);
  945                             error = ncl_vinvalbuf(vp, vap->va_size == 0 ?
  946                                 0 : V_SAVE, td, 1);
  947                             if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
  948                                     error = EBADF;
  949                             if (error != 0) {
  950                                     vnode_pager_setsize(vp, tsize);
  951                                     return (error);
  952                             }
  953                             /*
  954                              * Call nfscl_delegmodtime() to set the modify time
  955                              * locally, as required.
  956                              */
  957                             nfscl_delegmodtime(vp);
  958                         } else
  959                             mtx_unlock(&np->n_mtx);
  960                         /*
  961                          * np->n_size has already been set to vap->va_size
  962                          * in ncl_meta_setsize(). We must set it again since
  963                          * nfs_loadattrcache() could be called through
  964                          * ncl_meta_setsize() and could modify np->n_size.
  965                          */
  966                         mtx_lock(&np->n_mtx);
  967                         np->n_vattr.na_size = np->n_size = vap->va_size;
  968                         mtx_unlock(&np->n_mtx);
  969                 }
  970         } else {
  971                 mtx_lock(&np->n_mtx);
  972                 if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 
  973                     (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
  974                         mtx_unlock(&np->n_mtx);
  975                         error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
  976                         if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
  977                                 return (EBADF);
  978                         if (error == EINTR || error == EIO)
  979                                 return (error);
  980                 } else
  981                         mtx_unlock(&np->n_mtx);
  982         }
  983         error = nfs_setattrrpc(vp, vap, ap->a_cred, td);
  984         if (error && vap->va_size != VNOVAL) {
  985                 mtx_lock(&np->n_mtx);
  986                 np->n_size = np->n_vattr.na_size = tsize;
  987                 vnode_pager_setsize(vp, tsize);
  988                 mtx_unlock(&np->n_mtx);
  989         }
  990         return (error);
  991 }
  992 
  993 /*
  994  * Do an nfs setattr rpc.
  995  */
  996 static int
  997 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
  998     struct thread *td)
  999 {
 1000         struct nfsnode *np = VTONFS(vp);
 1001         int error, ret, attrflag, i;
 1002         struct nfsvattr nfsva;
 1003 
 1004         if (NFS_ISV34(vp)) {
 1005                 mtx_lock(&np->n_mtx);
 1006                 for (i = 0; i < NFS_ACCESSCACHESIZE; i++)
 1007                         np->n_accesscache[i].stamp = 0;
 1008                 np->n_flag |= NDELEGMOD;
 1009                 mtx_unlock(&np->n_mtx);
 1010                 KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp);
 1011         }
 1012         error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag,
 1013             NULL);
 1014         if (attrflag) {
 1015                 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 1016                 if (ret && !error)
 1017                         error = ret;
 1018         }
 1019         if (error && NFS_ISV4(vp))
 1020                 error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid);
 1021         return (error);
 1022 }
 1023 
 1024 /*
 1025  * nfs lookup call, one step at a time...
 1026  * First look in cache
 1027  * If not found, unlock the directory nfsnode and do the rpc
 1028  */
 1029 static int
 1030 nfs_lookup(struct vop_lookup_args *ap)
 1031 {
 1032         struct componentname *cnp = ap->a_cnp;
 1033         struct vnode *dvp = ap->a_dvp;
 1034         struct vnode **vpp = ap->a_vpp;
 1035         struct mount *mp = dvp->v_mount;
 1036         int flags = cnp->cn_flags;
 1037         struct vnode *newvp;
 1038         struct nfsmount *nmp;
 1039         struct nfsnode *np, *newnp;
 1040         int error = 0, attrflag, dattrflag, ltype, ncticks;
 1041         struct thread *td = cnp->cn_thread;
 1042         struct nfsfh *nfhp;
 1043         struct nfsvattr dnfsva, nfsva;
 1044         struct vattr vattr;
 1045         struct timespec nctime;
 1046         
 1047         *vpp = NULLVP;
 1048         if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
 1049             (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 1050                 return (EROFS);
 1051         if (dvp->v_type != VDIR)
 1052                 return (ENOTDIR);
 1053         nmp = VFSTONFS(mp);
 1054         np = VTONFS(dvp);
 1055 
 1056         /* For NFSv4, wait until any remove is done. */
 1057         mtx_lock(&np->n_mtx);
 1058         while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) {
 1059                 np->n_flag |= NREMOVEWANT;
 1060                 (void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0);
 1061         }
 1062         mtx_unlock(&np->n_mtx);
 1063 
 1064         if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0)
 1065                 return (error);
 1066         error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks);
 1067         if (error > 0 && error != ENOENT)
 1068                 return (error);
 1069         if (error == -1) {
 1070                 /*
 1071                  * Lookups of "." are special and always return the
 1072                  * current directory.  cache_lookup() already handles
 1073                  * associated locking bookkeeping, etc.
 1074                  */
 1075                 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
 1076                         /* XXX: Is this really correct? */
 1077                         if (cnp->cn_nameiop != LOOKUP &&
 1078                             (flags & ISLASTCN))
 1079                                 cnp->cn_flags |= SAVENAME;
 1080                         return (0);
 1081                 }
 1082 
 1083                 /*
 1084                  * We only accept a positive hit in the cache if the
 1085                  * change time of the file matches our cached copy.
 1086                  * Otherwise, we discard the cache entry and fallback
 1087                  * to doing a lookup RPC.  We also only trust cache
 1088                  * entries for less than nm_nametimeo seconds.
 1089                  *
 1090                  * To better handle stale file handles and attributes,
 1091                  * clear the attribute cache of this node if it is a
 1092                  * leaf component, part of an open() call, and not
 1093                  * locally modified before fetching the attributes.
 1094                  * This should allow stale file handles to be detected
 1095                  * here where we can fall back to a LOOKUP RPC to
 1096                  * recover rather than having nfs_open() detect the
 1097                  * stale file handle and failing open(2) with ESTALE.
 1098                  */
 1099                 newvp = *vpp;
 1100                 newnp = VTONFS(newvp);
 1101                 if (!(nmp->nm_flag & NFSMNT_NOCTO) &&
 1102                     (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
 1103                     !(newnp->n_flag & NMODIFIED)) {
 1104                         mtx_lock(&newnp->n_mtx);
 1105                         newnp->n_attrstamp = 0;
 1106                         KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
 1107                         mtx_unlock(&newnp->n_mtx);
 1108                 }
 1109                 if (nfscl_nodeleg(newvp, 0) == 0 ||
 1110                     ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) &&
 1111                     VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
 1112                     timespeccmp(&vattr.va_ctime, &nctime, ==))) {
 1113                         NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
 1114                         if (cnp->cn_nameiop != LOOKUP &&
 1115                             (flags & ISLASTCN))
 1116                                 cnp->cn_flags |= SAVENAME;
 1117                         return (0);
 1118                 }
 1119                 cache_purge(newvp);
 1120                 if (dvp != newvp)
 1121                         vput(newvp);
 1122                 else 
 1123                         vrele(newvp);
 1124                 *vpp = NULLVP;
 1125         } else if (error == ENOENT) {
 1126                 if (dvp->v_iflag & VI_DOOMED)
 1127                         return (ENOENT);
 1128                 /*
 1129                  * We only accept a negative hit in the cache if the
 1130                  * modification time of the parent directory matches
 1131                  * the cached copy in the name cache entry.
 1132                  * Otherwise, we discard all of the negative cache
 1133                  * entries for this directory.  We also only trust
 1134                  * negative cache entries for up to nm_negnametimeo
 1135                  * seconds.
 1136                  */
 1137                 if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) &&
 1138                     VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
 1139                     timespeccmp(&vattr.va_mtime, &nctime, ==)) {
 1140                         NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
 1141                         return (ENOENT);
 1142                 }
 1143                 cache_purge_negative(dvp);
 1144         }
 1145 
 1146         error = 0;
 1147         newvp = NULLVP;
 1148         NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses);
 1149         error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 1150             cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 1151             NULL);
 1152         if (dattrflag)
 1153                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 1154         if (error) {
 1155                 if (newvp != NULLVP) {
 1156                         vput(newvp);
 1157                         *vpp = NULLVP;
 1158                 }
 1159 
 1160                 if (error != ENOENT) {
 1161                         if (NFS_ISV4(dvp))
 1162                                 error = nfscl_maperr(td, error, (uid_t)0,
 1163                                     (gid_t)0);
 1164                         return (error);
 1165                 }
 1166 
 1167                 /* The requested file was not found. */
 1168                 if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 1169                     (flags & ISLASTCN)) {
 1170                         /*
 1171                          * XXX: UFS does a full VOP_ACCESS(dvp,
 1172                          * VWRITE) here instead of just checking
 1173                          * MNT_RDONLY.
 1174                          */
 1175                         if (mp->mnt_flag & MNT_RDONLY)
 1176                                 return (EROFS);
 1177                         cnp->cn_flags |= SAVENAME;
 1178                         return (EJUSTRETURN);
 1179                 }
 1180 
 1181                 if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) {
 1182                         /*
 1183                          * Cache the modification time of the parent
 1184                          * directory from the post-op attributes in
 1185                          * the name cache entry.  The negative cache
 1186                          * entry will be ignored once the directory
 1187                          * has changed.  Don't bother adding the entry
 1188                          * if the directory has already changed.
 1189                          */
 1190                         mtx_lock(&np->n_mtx);
 1191                         if (timespeccmp(&np->n_vattr.na_mtime,
 1192                             &dnfsva.na_mtime, ==)) {
 1193                                 mtx_unlock(&np->n_mtx);
 1194                                 cache_enter_time(dvp, NULL, cnp,
 1195                                     &dnfsva.na_mtime, NULL);
 1196                         } else
 1197                                 mtx_unlock(&np->n_mtx);
 1198                 }
 1199                 return (ENOENT);
 1200         }
 1201 
 1202         /*
 1203          * Handle RENAME case...
 1204          */
 1205         if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
 1206                 if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
 1207                         FREE((caddr_t)nfhp, M_NFSFH);
 1208                         return (EISDIR);
 1209                 }
 1210                 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
 1211                     LK_EXCLUSIVE);
 1212                 if (error)
 1213                         return (error);
 1214                 newvp = NFSTOV(np);
 1215                 if (attrflag)
 1216                         (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 1217                             0, 1);
 1218                 *vpp = newvp;
 1219                 cnp->cn_flags |= SAVENAME;
 1220                 return (0);
 1221         }
 1222 
 1223         if (flags & ISDOTDOT) {
 1224                 ltype = NFSVOPISLOCKED(dvp);
 1225                 error = vfs_busy(mp, MBF_NOWAIT);
 1226                 if (error != 0) {
 1227                         vfs_ref(mp);
 1228                         NFSVOPUNLOCK(dvp, 0);
 1229                         error = vfs_busy(mp, 0);
 1230                         NFSVOPLOCK(dvp, ltype | LK_RETRY);
 1231                         vfs_rel(mp);
 1232                         if (error == 0 && (dvp->v_iflag & VI_DOOMED)) {
 1233                                 vfs_unbusy(mp);
 1234                                 error = ENOENT;
 1235                         }
 1236                         if (error != 0)
 1237                                 return (error);
 1238                 }
 1239                 NFSVOPUNLOCK(dvp, 0);
 1240                 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
 1241                     cnp->cn_lkflags);
 1242                 if (error == 0)
 1243                         newvp = NFSTOV(np);
 1244                 vfs_unbusy(mp);
 1245                 if (newvp != dvp)
 1246                         NFSVOPLOCK(dvp, ltype | LK_RETRY);
 1247                 if (dvp->v_iflag & VI_DOOMED) {
 1248                         if (error == 0) {
 1249                                 if (newvp == dvp)
 1250                                         vrele(newvp);
 1251                                 else
 1252                                         vput(newvp);
 1253                         }
 1254                         error = ENOENT;
 1255                 }
 1256                 if (error != 0)
 1257                         return (error);
 1258                 if (attrflag)
 1259                         (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 1260                             0, 1);
 1261         } else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
 1262                 FREE((caddr_t)nfhp, M_NFSFH);
 1263                 VREF(dvp);
 1264                 newvp = dvp;
 1265                 if (attrflag)
 1266                         (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 1267                             0, 1);
 1268         } else {
 1269                 error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
 1270                     cnp->cn_lkflags);
 1271                 if (error)
 1272                         return (error);
 1273                 newvp = NFSTOV(np);
 1274                 if (attrflag)
 1275                         (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 1276                             0, 1);
 1277                 else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
 1278                     !(np->n_flag & NMODIFIED)) {                        
 1279                         /*
 1280                          * Flush the attribute cache when opening a
 1281                          * leaf node to ensure that fresh attributes
 1282                          * are fetched in nfs_open() since we did not
 1283                          * fetch attributes from the LOOKUP reply.
 1284                          */
 1285                         mtx_lock(&np->n_mtx);
 1286                         np->n_attrstamp = 0;
 1287                         KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
 1288                         mtx_unlock(&np->n_mtx);
 1289                 }
 1290         }
 1291         if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 1292                 cnp->cn_flags |= SAVENAME;
 1293         if ((cnp->cn_flags & MAKEENTRY) &&
 1294             (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) &&
 1295             attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0))
 1296                 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
 1297                     newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime);
 1298         *vpp = newvp;
 1299         return (0);
 1300 }
 1301 
 1302 /*
 1303  * nfs read call.
 1304  * Just call ncl_bioread() to do the work.
 1305  */
 1306 static int
 1307 nfs_read(struct vop_read_args *ap)
 1308 {
 1309         struct vnode *vp = ap->a_vp;
 1310 
 1311         switch (vp->v_type) {
 1312         case VREG:
 1313                 return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
 1314         case VDIR:
 1315                 return (EISDIR);
 1316         default:
 1317                 return (EOPNOTSUPP);
 1318         }
 1319 }
 1320 
 1321 /*
 1322  * nfs readlink call
 1323  */
 1324 static int
 1325 nfs_readlink(struct vop_readlink_args *ap)
 1326 {
 1327         struct vnode *vp = ap->a_vp;
 1328 
 1329         if (vp->v_type != VLNK)
 1330                 return (EINVAL);
 1331         return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred));
 1332 }
 1333 
 1334 /*
 1335  * Do a readlink rpc.
 1336  * Called by ncl_doio() from below the buffer cache.
 1337  */
 1338 int
 1339 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 1340 {
 1341         int error, ret, attrflag;
 1342         struct nfsvattr nfsva;
 1343 
 1344         error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva,
 1345             &attrflag, NULL);
 1346         if (attrflag) {
 1347                 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 1348                 if (ret && !error)
 1349                         error = ret;
 1350         }
 1351         if (error && NFS_ISV4(vp))
 1352                 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 1353         return (error);
 1354 }
 1355 
 1356 /*
 1357  * nfs read rpc call
 1358  * Ditto above
 1359  */
 1360 int
 1361 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 1362 {
 1363         int error, ret, attrflag;
 1364         struct nfsvattr nfsva;
 1365         struct nfsmount *nmp;
 1366 
 1367         nmp = VFSTONFS(vnode_mount(vp));
 1368         error = EIO;
 1369         attrflag = 0;
 1370         if (NFSHASPNFS(nmp))
 1371                 error = nfscl_doiods(vp, uiop, NULL, NULL,
 1372                     NFSV4OPEN_ACCESSREAD, cred, uiop->uio_td);
 1373         NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error);
 1374         if (error != 0)
 1375                 error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva,
 1376                     &attrflag, NULL);
 1377         if (attrflag) {
 1378                 ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 1379                 if (ret && !error)
 1380                         error = ret;
 1381         }
 1382         if (error && NFS_ISV4(vp))
 1383                 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 1384         return (error);
 1385 }
 1386 
 1387 /*
 1388  * nfs write call
 1389  */
 1390 int
 1391 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
 1392     int *iomode, int *must_commit, int called_from_strategy)
 1393 {
 1394         struct nfsvattr nfsva;
 1395         int error, attrflag, ret;
 1396         struct nfsmount *nmp;
 1397 
 1398         nmp = VFSTONFS(vnode_mount(vp));
 1399         error = EIO;
 1400         attrflag = 0;
 1401         if (NFSHASPNFS(nmp))
 1402                 error = nfscl_doiods(vp, uiop, iomode, must_commit,
 1403                     NFSV4OPEN_ACCESSWRITE, cred, uiop->uio_td);
 1404         NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error);
 1405         if (error != 0)
 1406                 error = nfsrpc_write(vp, uiop, iomode, must_commit, cred,
 1407                     uiop->uio_td, &nfsva, &attrflag, NULL,
 1408                     called_from_strategy);
 1409         if (attrflag) {
 1410                 if (VTONFS(vp)->n_flag & ND_NFSV4)
 1411                         ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1,
 1412                             1);
 1413                 else
 1414                         ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
 1415                             1);
 1416                 if (ret && !error)
 1417                         error = ret;
 1418         }
 1419         if (DOINGASYNC(vp))
 1420                 *iomode = NFSWRITE_FILESYNC;
 1421         if (error && NFS_ISV4(vp))
 1422                 error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 1423         return (error);
 1424 }
 1425 
 1426 /*
 1427  * nfs mknod rpc
 1428  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
 1429  * mode set to specify the file type and the size field for rdev.
 1430  */
 1431 static int
 1432 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
 1433     struct vattr *vap)
 1434 {
 1435         struct nfsvattr nfsva, dnfsva;
 1436         struct vnode *newvp = NULL;
 1437         struct nfsnode *np = NULL, *dnp;
 1438         struct nfsfh *nfhp;
 1439         struct vattr vattr;
 1440         int error = 0, attrflag, dattrflag;
 1441         u_int32_t rdev;
 1442 
 1443         if (vap->va_type == VCHR || vap->va_type == VBLK)
 1444                 rdev = vap->va_rdev;
 1445         else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 1446                 rdev = 0xffffffff;
 1447         else
 1448                 return (EOPNOTSUPP);
 1449         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
 1450                 return (error);
 1451         error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap,
 1452             rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva,
 1453             &nfsva, &nfhp, &attrflag, &dattrflag, NULL);
 1454         if (!error) {
 1455                 if (!nfhp)
 1456                         (void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
 1457                             cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread,
 1458                             &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 1459                             NULL);
 1460                 if (nfhp)
 1461                         error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
 1462                             cnp->cn_thread, &np, NULL, LK_EXCLUSIVE);
 1463         }
 1464         if (dattrflag)
 1465                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 1466         if (!error) {
 1467                 newvp = NFSTOV(np);
 1468                 if (attrflag != 0) {
 1469                         error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 1470                             0, 1);
 1471                         if (error != 0)
 1472                                 vput(newvp);
 1473                 }
 1474         }
 1475         if (!error) {
 1476                 *vpp = newvp;
 1477         } else if (NFS_ISV4(dvp)) {
 1478                 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid,
 1479                     vap->va_gid);
 1480         }
 1481         dnp = VTONFS(dvp);
 1482         mtx_lock(&dnp->n_mtx);
 1483         dnp->n_flag |= NMODIFIED;
 1484         if (!dattrflag) {
 1485                 dnp->n_attrstamp = 0;
 1486                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 1487         }
 1488         mtx_unlock(&dnp->n_mtx);
 1489         return (error);
 1490 }
 1491 
 1492 /*
 1493  * nfs mknod vop
 1494  * just call nfs_mknodrpc() to do the work.
 1495  */
 1496 /* ARGSUSED */
 1497 static int
 1498 nfs_mknod(struct vop_mknod_args *ap)
 1499 {
 1500         return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
 1501 }
 1502 
 1503 static struct mtx nfs_cverf_mtx;
 1504 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex",
 1505     MTX_DEF);
 1506 
 1507 static nfsquad_t
 1508 nfs_get_cverf(void)
 1509 {
 1510         static nfsquad_t cverf;
 1511         nfsquad_t ret;
 1512         static int cverf_initialized = 0;
 1513 
 1514         mtx_lock(&nfs_cverf_mtx);
 1515         if (cverf_initialized == 0) {
 1516                 cverf.lval[0] = arc4random();
 1517                 cverf.lval[1] = arc4random();
 1518                 cverf_initialized = 1;
 1519         } else
 1520                 cverf.qval++;
 1521         ret = cverf;
 1522         mtx_unlock(&nfs_cverf_mtx);
 1523 
 1524         return (ret);
 1525 }
 1526 
 1527 /*
 1528  * nfs file create call
 1529  */
 1530 static int
 1531 nfs_create(struct vop_create_args *ap)
 1532 {
 1533         struct vnode *dvp = ap->a_dvp;
 1534         struct vattr *vap = ap->a_vap;
 1535         struct componentname *cnp = ap->a_cnp;
 1536         struct nfsnode *np = NULL, *dnp;
 1537         struct vnode *newvp = NULL;
 1538         struct nfsmount *nmp;
 1539         struct nfsvattr dnfsva, nfsva;
 1540         struct nfsfh *nfhp;
 1541         nfsquad_t cverf;
 1542         int error = 0, attrflag, dattrflag, fmode = 0;
 1543         struct vattr vattr;
 1544 
 1545         /*
 1546          * Oops, not for me..
 1547          */
 1548         if (vap->va_type == VSOCK)
 1549                 return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
 1550 
 1551         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
 1552                 return (error);
 1553         if (vap->va_vaflags & VA_EXCLUSIVE)
 1554                 fmode |= O_EXCL;
 1555         dnp = VTONFS(dvp);
 1556         nmp = VFSTONFS(vnode_mount(dvp));
 1557 again:
 1558         /* For NFSv4, wait until any remove is done. */
 1559         mtx_lock(&dnp->n_mtx);
 1560         while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) {
 1561                 dnp->n_flag |= NREMOVEWANT;
 1562                 (void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0);
 1563         }
 1564         mtx_unlock(&dnp->n_mtx);
 1565 
 1566         cverf = nfs_get_cverf();
 1567         error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 1568             vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva,
 1569             &nfhp, &attrflag, &dattrflag, NULL);
 1570         if (!error) {
 1571                 if (nfhp == NULL)
 1572                         (void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
 1573                             cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread,
 1574                             &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 1575                             NULL);
 1576                 if (nfhp != NULL)
 1577                         error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
 1578                             cnp->cn_thread, &np, NULL, LK_EXCLUSIVE);
 1579         }
 1580         if (dattrflag)
 1581                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 1582         if (!error) {
 1583                 newvp = NFSTOV(np);
 1584                 if (attrflag == 0)
 1585                         error = nfsrpc_getattr(newvp, cnp->cn_cred,
 1586                             cnp->cn_thread, &nfsva, NULL);
 1587                 if (error == 0)
 1588                         error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 1589                             0, 1);
 1590         }
 1591         if (error) {
 1592                 if (newvp != NULL) {
 1593                         vput(newvp);
 1594                         newvp = NULL;
 1595                 }
 1596                 if (NFS_ISV34(dvp) && (fmode & O_EXCL) &&
 1597                     error == NFSERR_NOTSUPP) {
 1598                         fmode &= ~O_EXCL;
 1599                         goto again;
 1600                 }
 1601         } else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) {
 1602                 if (nfscl_checksattr(vap, &nfsva)) {
 1603                         error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred,
 1604                             cnp->cn_thread, &nfsva, &attrflag, NULL);
 1605                         if (error && (vap->va_uid != (uid_t)VNOVAL ||
 1606                             vap->va_gid != (gid_t)VNOVAL)) {
 1607                                 /* try again without setting uid/gid */
 1608                                 vap->va_uid = (uid_t)VNOVAL;
 1609                                 vap->va_gid = (uid_t)VNOVAL;
 1610                                 error = nfsrpc_setattr(newvp, vap, NULL, 
 1611                                     cnp->cn_cred, cnp->cn_thread, &nfsva,
 1612                                     &attrflag, NULL);
 1613                         }
 1614                         if (attrflag)
 1615                                 (void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
 1616                                     NULL, 0, 1);
 1617                         if (error != 0)
 1618                                 vput(newvp);
 1619                 }
 1620         }
 1621         if (!error) {
 1622                 if ((cnp->cn_flags & MAKEENTRY) && attrflag)
 1623                         cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
 1624                             NULL);
 1625                 *ap->a_vpp = newvp;
 1626         } else if (NFS_ISV4(dvp)) {
 1627                 error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid,
 1628                     vap->va_gid);
 1629         }
 1630         mtx_lock(&dnp->n_mtx);
 1631         dnp->n_flag |= NMODIFIED;
 1632         if (!dattrflag) {
 1633                 dnp->n_attrstamp = 0;
 1634                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 1635         }
 1636         mtx_unlock(&dnp->n_mtx);
 1637         return (error);
 1638 }
 1639 
 1640 /*
 1641  * nfs file remove call
 1642  * To try and make nfs semantics closer to ufs semantics, a file that has
 1643  * other processes using the vnode is renamed instead of removed and then
 1644  * removed later on the last close.
 1645  * - If v_usecount > 1
 1646  *        If a rename is not already in the works
 1647  *           call nfs_sillyrename() to set it up
 1648  *     else
 1649  *        do the remove rpc
 1650  */
 1651 static int
 1652 nfs_remove(struct vop_remove_args *ap)
 1653 {
 1654         struct vnode *vp = ap->a_vp;
 1655         struct vnode *dvp = ap->a_dvp;
 1656         struct componentname *cnp = ap->a_cnp;
 1657         struct nfsnode *np = VTONFS(vp);
 1658         int error = 0;
 1659         struct vattr vattr;
 1660 
 1661         KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name"));
 1662         KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount"));
 1663         if (vp->v_type == VDIR)
 1664                 error = EPERM;
 1665         else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
 1666             VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 &&
 1667             vattr.va_nlink > 1)) {
 1668                 /*
 1669                  * Purge the name cache so that the chance of a lookup for
 1670                  * the name succeeding while the remove is in progress is
 1671                  * minimized. Without node locking it can still happen, such
 1672                  * that an I/O op returns ESTALE, but since you get this if
 1673                  * another host removes the file..
 1674                  */
 1675                 cache_purge(vp);
 1676                 /*
 1677                  * throw away biocache buffers, mainly to avoid
 1678                  * unnecessary delayed writes later.
 1679                  */
 1680                 error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1);
 1681                 if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
 1682                         error = EBADF;
 1683                 else if (error != EINTR && error != EIO)
 1684                         /* Do the rpc */
 1685                         error = nfs_removerpc(dvp, vp, cnp->cn_nameptr,
 1686                             cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
 1687                 /*
 1688                  * Kludge City: If the first reply to the remove rpc is lost..
 1689                  *   the reply to the retransmitted request will be ENOENT
 1690                  *   since the file was in fact removed
 1691                  *   Therefore, we cheat and return success.
 1692                  */
 1693                 if (error == ENOENT)
 1694                         error = 0;
 1695         } else if (!np->n_sillyrename)
 1696                 error = nfs_sillyrename(dvp, vp, cnp);
 1697         mtx_lock(&np->n_mtx);
 1698         np->n_attrstamp = 0;
 1699         mtx_unlock(&np->n_mtx);
 1700         KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 1701         return (error);
 1702 }
 1703 
 1704 /*
 1705  * nfs file remove rpc called from nfs_inactive
 1706  */
 1707 int
 1708 ncl_removeit(struct sillyrename *sp, struct vnode *vp)
 1709 {
 1710         /*
 1711          * Make sure that the directory vnode is still valid.
 1712          * XXX we should lock sp->s_dvp here.
 1713          */
 1714         if (sp->s_dvp->v_type == VBAD)
 1715                 return (0);
 1716         return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen,
 1717             sp->s_cred, NULL));
 1718 }
 1719 
 1720 /*
 1721  * Nfs remove rpc, called from nfs_remove() and ncl_removeit().
 1722  */
 1723 static int
 1724 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
 1725     int namelen, struct ucred *cred, struct thread *td)
 1726 {
 1727         struct nfsvattr dnfsva;
 1728         struct nfsnode *dnp = VTONFS(dvp);
 1729         int error = 0, dattrflag;
 1730 
 1731         mtx_lock(&dnp->n_mtx);
 1732         dnp->n_flag |= NREMOVEINPROG;
 1733         mtx_unlock(&dnp->n_mtx);
 1734         error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva,
 1735             &dattrflag, NULL);
 1736         mtx_lock(&dnp->n_mtx);
 1737         if ((dnp->n_flag & NREMOVEWANT)) {
 1738                 dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG);
 1739                 mtx_unlock(&dnp->n_mtx);
 1740                 wakeup((caddr_t)dnp);
 1741         } else {
 1742                 dnp->n_flag &= ~NREMOVEINPROG;
 1743                 mtx_unlock(&dnp->n_mtx);
 1744         }
 1745         if (dattrflag)
 1746                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 1747         mtx_lock(&dnp->n_mtx);
 1748         dnp->n_flag |= NMODIFIED;
 1749         if (!dattrflag) {
 1750                 dnp->n_attrstamp = 0;
 1751                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 1752         }
 1753         mtx_unlock(&dnp->n_mtx);
 1754         if (error && NFS_ISV4(dvp))
 1755                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 1756         return (error);
 1757 }
 1758 
 1759 /*
 1760  * nfs file rename call
 1761  */
 1762 static int
 1763 nfs_rename(struct vop_rename_args *ap)
 1764 {
 1765         struct vnode *fvp = ap->a_fvp;
 1766         struct vnode *tvp = ap->a_tvp;
 1767         struct vnode *fdvp = ap->a_fdvp;
 1768         struct vnode *tdvp = ap->a_tdvp;
 1769         struct componentname *tcnp = ap->a_tcnp;
 1770         struct componentname *fcnp = ap->a_fcnp;
 1771         struct nfsnode *fnp = VTONFS(ap->a_fvp);
 1772         struct nfsnode *tdnp = VTONFS(ap->a_tdvp);
 1773         struct nfsv4node *newv4 = NULL;
 1774         int error;
 1775 
 1776         KASSERT((tcnp->cn_flags & HASBUF) != 0 &&
 1777             (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name"));
 1778         /* Check for cross-device rename */
 1779         if ((fvp->v_mount != tdvp->v_mount) ||
 1780             (tvp && (fvp->v_mount != tvp->v_mount))) {
 1781                 error = EXDEV;
 1782                 goto out;
 1783         }
 1784 
 1785         if (fvp == tvp) {
 1786                 printf("nfs_rename: fvp == tvp (can't happen)\n");
 1787                 error = 0;
 1788                 goto out;
 1789         }
 1790         if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0)
 1791                 goto out;
 1792 
 1793         /*
 1794          * We have to flush B_DELWRI data prior to renaming
 1795          * the file.  If we don't, the delayed-write buffers
 1796          * can be flushed out later after the file has gone stale
 1797          * under NFSV3.  NFSV2 does not have this problem because
 1798          * ( as far as I can tell ) it flushes dirty buffers more
 1799          * often.
 1800          * 
 1801          * Skip the rename operation if the fsync fails, this can happen
 1802          * due to the server's volume being full, when we pushed out data
 1803          * that was written back to our cache earlier. Not checking for
 1804          * this condition can result in potential (silent) data loss.
 1805          */
 1806         error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
 1807         NFSVOPUNLOCK(fvp, 0);
 1808         if (!error && tvp)
 1809                 error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
 1810         if (error)
 1811                 goto out;
 1812 
 1813         /*
 1814          * If the tvp exists and is in use, sillyrename it before doing the
 1815          * rename of the new file over it.
 1816          * XXX Can't sillyrename a directory.
 1817          */
 1818         if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
 1819                 tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 1820                 vput(tvp);
 1821                 tvp = NULL;
 1822         }
 1823 
 1824         error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 1825             tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 1826             tcnp->cn_thread);
 1827 
 1828         if (error == 0 && NFS_ISV4(tdvp)) {
 1829                 /*
 1830                  * For NFSv4, check to see if it is the same name and
 1831                  * replace the name, if it is different.
 1832                  */
 1833                 MALLOC(newv4, struct nfsv4node *,
 1834                     sizeof (struct nfsv4node) +
 1835                     tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1,
 1836                     M_NFSV4NODE, M_WAITOK);
 1837                 mtx_lock(&tdnp->n_mtx);
 1838                 mtx_lock(&fnp->n_mtx);
 1839                 if (fnp->n_v4 != NULL && fvp->v_type == VREG &&
 1840                     (fnp->n_v4->n4_namelen != tcnp->cn_namelen ||
 1841                       NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4),
 1842                       tcnp->cn_namelen) ||
 1843                       tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen ||
 1844                       NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
 1845                         tdnp->n_fhp->nfh_len))) {
 1846 #ifdef notdef
 1847 { char nnn[100]; int nnnl;
 1848 nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99;
 1849 bcopy(tcnp->cn_nameptr, nnn, nnnl);
 1850 nnn[nnnl] = '\0';
 1851 printf("ren replace=%s\n",nnn);
 1852 }
 1853 #endif
 1854                         FREE((caddr_t)fnp->n_v4, M_NFSV4NODE);
 1855                         fnp->n_v4 = newv4;
 1856                         newv4 = NULL;
 1857                         fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len;
 1858                         fnp->n_v4->n4_namelen = tcnp->cn_namelen;
 1859                         NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
 1860                             tdnp->n_fhp->nfh_len);
 1861                         NFSBCOPY(tcnp->cn_nameptr,
 1862                             NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen);
 1863                 }
 1864                 mtx_unlock(&tdnp->n_mtx);
 1865                 mtx_unlock(&fnp->n_mtx);
 1866                 if (newv4 != NULL)
 1867                         FREE((caddr_t)newv4, M_NFSV4NODE);
 1868         }
 1869 
 1870         if (fvp->v_type == VDIR) {
 1871                 if (tvp != NULL && tvp->v_type == VDIR)
 1872                         cache_purge(tdvp);
 1873                 cache_purge(fdvp);
 1874         }
 1875 
 1876 out:
 1877         if (tdvp == tvp)
 1878                 vrele(tdvp);
 1879         else
 1880                 vput(tdvp);
 1881         if (tvp)
 1882                 vput(tvp);
 1883         vrele(fdvp);
 1884         vrele(fvp);
 1885         /*
 1886          * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 1887          */
 1888         if (error == ENOENT)
 1889                 error = 0;
 1890         return (error);
 1891 }
 1892 
 1893 /*
 1894  * nfs file rename rpc called from nfs_remove() above
 1895  */
 1896 static int
 1897 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp,
 1898     struct sillyrename *sp)
 1899 {
 1900 
 1901         return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen,
 1902             sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred,
 1903             scnp->cn_thread));
 1904 }
 1905 
 1906 /*
 1907  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
 1908  */
 1909 static int
 1910 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
 1911     int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr,
 1912     int tnamelen, struct ucred *cred, struct thread *td)
 1913 {
 1914         struct nfsvattr fnfsva, tnfsva;
 1915         struct nfsnode *fdnp = VTONFS(fdvp);
 1916         struct nfsnode *tdnp = VTONFS(tdvp);
 1917         int error = 0, fattrflag, tattrflag;
 1918 
 1919         error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp,
 1920             tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag,
 1921             &tattrflag, NULL, NULL);
 1922         mtx_lock(&fdnp->n_mtx);
 1923         fdnp->n_flag |= NMODIFIED;
 1924         if (fattrflag != 0) {
 1925                 mtx_unlock(&fdnp->n_mtx);
 1926                 (void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1);
 1927         } else {
 1928                 fdnp->n_attrstamp = 0;
 1929                 mtx_unlock(&fdnp->n_mtx);
 1930                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp);
 1931         }
 1932         mtx_lock(&tdnp->n_mtx);
 1933         tdnp->n_flag |= NMODIFIED;
 1934         if (tattrflag != 0) {
 1935                 mtx_unlock(&tdnp->n_mtx);
 1936                 (void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1);
 1937         } else {
 1938                 tdnp->n_attrstamp = 0;
 1939                 mtx_unlock(&tdnp->n_mtx);
 1940                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 1941         }
 1942         if (error && NFS_ISV4(fdvp))
 1943                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 1944         return (error);
 1945 }
 1946 
 1947 /*
 1948  * nfs hard link create call
 1949  */
 1950 static int
 1951 nfs_link(struct vop_link_args *ap)
 1952 {
 1953         struct vnode *vp = ap->a_vp;
 1954         struct vnode *tdvp = ap->a_tdvp;
 1955         struct componentname *cnp = ap->a_cnp;
 1956         struct nfsnode *np, *tdnp;
 1957         struct nfsvattr nfsva, dnfsva;
 1958         int error = 0, attrflag, dattrflag;
 1959 
 1960         /*
 1961          * Push all writes to the server, so that the attribute cache
 1962          * doesn't get "out of sync" with the server.
 1963          * XXX There should be a better way!
 1964          */
 1965         VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
 1966 
 1967         error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen,
 1968             cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag,
 1969             &dattrflag, NULL);
 1970         tdnp = VTONFS(tdvp);
 1971         mtx_lock(&tdnp->n_mtx);
 1972         tdnp->n_flag |= NMODIFIED;
 1973         if (dattrflag != 0) {
 1974                 mtx_unlock(&tdnp->n_mtx);
 1975                 (void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1);
 1976         } else {
 1977                 tdnp->n_attrstamp = 0;
 1978                 mtx_unlock(&tdnp->n_mtx);
 1979                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 1980         }
 1981         if (attrflag)
 1982                 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 1983         else {
 1984                 np = VTONFS(vp);
 1985                 mtx_lock(&np->n_mtx);
 1986                 np->n_attrstamp = 0;
 1987                 mtx_unlock(&np->n_mtx);
 1988                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 1989         }
 1990         /*
 1991          * If negative lookup caching is enabled, I might as well
 1992          * add an entry for this node. Not necessary for correctness,
 1993          * but if negative caching is enabled, then the system
 1994          * must care about lookup caching hit rate, so...
 1995          */
 1996         if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 &&
 1997             (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) {
 1998                 cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL);
 1999         }
 2000         if (error && NFS_ISV4(vp))
 2001                 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0,
 2002                     (gid_t)0);
 2003         return (error);
 2004 }
 2005 
 2006 /*
 2007  * nfs symbolic link create call
 2008  */
 2009 static int
 2010 nfs_symlink(struct vop_symlink_args *ap)
 2011 {
 2012         struct vnode *dvp = ap->a_dvp;
 2013         struct vattr *vap = ap->a_vap;
 2014         struct componentname *cnp = ap->a_cnp;
 2015         struct nfsvattr nfsva, dnfsva;
 2016         struct nfsfh *nfhp;
 2017         struct nfsnode *np = NULL, *dnp;
 2018         struct vnode *newvp = NULL;
 2019         int error = 0, attrflag, dattrflag, ret;
 2020 
 2021         vap->va_type = VLNK;
 2022         error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 2023             ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva,
 2024             &nfsva, &nfhp, &attrflag, &dattrflag, NULL);
 2025         if (nfhp) {
 2026                 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread,
 2027                     &np, NULL, LK_EXCLUSIVE);
 2028                 if (!ret)
 2029                         newvp = NFSTOV(np);
 2030                 else if (!error)
 2031                         error = ret;
 2032         }
 2033         if (newvp != NULL) {
 2034                 if (attrflag)
 2035                         (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 2036                             0, 1);
 2037         } else if (!error) {
 2038                 /*
 2039                  * If we do not have an error and we could not extract the
 2040                  * newvp from the response due to the request being NFSv2, we
 2041                  * have to do a lookup in order to obtain a newvp to return.
 2042                  */
 2043                 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 2044                     cnp->cn_cred, cnp->cn_thread, &np);
 2045                 if (!error)
 2046                         newvp = NFSTOV(np);
 2047         }
 2048         if (error) {
 2049                 if (newvp)
 2050                         vput(newvp);
 2051                 if (NFS_ISV4(dvp))
 2052                         error = nfscl_maperr(cnp->cn_thread, error,
 2053                             vap->va_uid, vap->va_gid);
 2054         } else {
 2055                 *ap->a_vpp = newvp;
 2056         }
 2057 
 2058         dnp = VTONFS(dvp);
 2059         mtx_lock(&dnp->n_mtx);
 2060         dnp->n_flag |= NMODIFIED;
 2061         if (dattrflag != 0) {
 2062                 mtx_unlock(&dnp->n_mtx);
 2063                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 2064         } else {
 2065                 dnp->n_attrstamp = 0;
 2066                 mtx_unlock(&dnp->n_mtx);
 2067                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 2068         }
 2069         /*
 2070          * If negative lookup caching is enabled, I might as well
 2071          * add an entry for this node. Not necessary for correctness,
 2072          * but if negative caching is enabled, then the system
 2073          * must care about lookup caching hit rate, so...
 2074          */
 2075         if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 &&
 2076             (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) {
 2077                 cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL);
 2078         }
 2079         return (error);
 2080 }
 2081 
 2082 /*
 2083  * nfs make dir call
 2084  */
 2085 static int
 2086 nfs_mkdir(struct vop_mkdir_args *ap)
 2087 {
 2088         struct vnode *dvp = ap->a_dvp;
 2089         struct vattr *vap = ap->a_vap;
 2090         struct componentname *cnp = ap->a_cnp;
 2091         struct nfsnode *np = NULL, *dnp;
 2092         struct vnode *newvp = NULL;
 2093         struct vattr vattr;
 2094         struct nfsfh *nfhp;
 2095         struct nfsvattr nfsva, dnfsva;
 2096         int error = 0, attrflag, dattrflag, ret;
 2097 
 2098         if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
 2099                 return (error);
 2100         vap->va_type = VDIR;
 2101         error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 2102             vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp,
 2103             &attrflag, &dattrflag, NULL);
 2104         dnp = VTONFS(dvp);
 2105         mtx_lock(&dnp->n_mtx);
 2106         dnp->n_flag |= NMODIFIED;
 2107         if (dattrflag != 0) {
 2108                 mtx_unlock(&dnp->n_mtx);
 2109                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 2110         } else {
 2111                 dnp->n_attrstamp = 0;
 2112                 mtx_unlock(&dnp->n_mtx);
 2113                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 2114         }
 2115         if (nfhp) {
 2116                 ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread,
 2117                     &np, NULL, LK_EXCLUSIVE);
 2118                 if (!ret) {
 2119                         newvp = NFSTOV(np);
 2120                         if (attrflag)
 2121                            (void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
 2122                                 NULL, 0, 1);
 2123                 } else if (!error)
 2124                         error = ret;
 2125         }
 2126         if (!error && newvp == NULL) {
 2127                 error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 2128                     cnp->cn_cred, cnp->cn_thread, &np);
 2129                 if (!error) {
 2130                         newvp = NFSTOV(np);
 2131                         if (newvp->v_type != VDIR)
 2132                                 error = EEXIST;
 2133                 }
 2134         }
 2135         if (error) {
 2136                 if (newvp)
 2137                         vput(newvp);
 2138                 if (NFS_ISV4(dvp))
 2139                         error = nfscl_maperr(cnp->cn_thread, error,
 2140                             vap->va_uid, vap->va_gid);
 2141         } else {
 2142                 /*
 2143                  * If negative lookup caching is enabled, I might as well
 2144                  * add an entry for this node. Not necessary for correctness,
 2145                  * but if negative caching is enabled, then the system
 2146                  * must care about lookup caching hit rate, so...
 2147                  */
 2148                 if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 &&
 2149                     (cnp->cn_flags & MAKEENTRY) &&
 2150                     attrflag != 0 && dattrflag != 0)
 2151                         cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
 2152                             &dnfsva.na_ctime);
 2153                 *ap->a_vpp = newvp;
 2154         }
 2155         return (error);
 2156 }
 2157 
 2158 /*
 2159  * nfs remove directory call
 2160  */
 2161 static int
 2162 nfs_rmdir(struct vop_rmdir_args *ap)
 2163 {
 2164         struct vnode *vp = ap->a_vp;
 2165         struct vnode *dvp = ap->a_dvp;
 2166         struct componentname *cnp = ap->a_cnp;
 2167         struct nfsnode *dnp;
 2168         struct nfsvattr dnfsva;
 2169         int error, dattrflag;
 2170 
 2171         if (dvp == vp)
 2172                 return (EINVAL);
 2173         error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 2174             cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL);
 2175         dnp = VTONFS(dvp);
 2176         mtx_lock(&dnp->n_mtx);
 2177         dnp->n_flag |= NMODIFIED;
 2178         if (dattrflag != 0) {
 2179                 mtx_unlock(&dnp->n_mtx);
 2180                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 2181         } else {
 2182                 dnp->n_attrstamp = 0;
 2183                 mtx_unlock(&dnp->n_mtx);
 2184                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 2185         }
 2186 
 2187         cache_purge(dvp);
 2188         cache_purge(vp);
 2189         if (error && NFS_ISV4(dvp))
 2190                 error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0,
 2191                     (gid_t)0);
 2192         /*
 2193          * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 2194          */
 2195         if (error == ENOENT)
 2196                 error = 0;
 2197         return (error);
 2198 }
 2199 
 2200 /*
 2201  * nfs readdir call
 2202  */
 2203 static int
 2204 nfs_readdir(struct vop_readdir_args *ap)
 2205 {
 2206         struct vnode *vp = ap->a_vp;
 2207         struct nfsnode *np = VTONFS(vp);
 2208         struct uio *uio = ap->a_uio;
 2209         ssize_t tresid, left;
 2210         int error = 0;
 2211         struct vattr vattr;
 2212         
 2213         if (ap->a_eofflag != NULL)
 2214                 *ap->a_eofflag = 0;
 2215         if (vp->v_type != VDIR) 
 2216                 return(EPERM);
 2217 
 2218         /*
 2219          * First, check for hit on the EOF offset cache
 2220          */
 2221         if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 2222             (np->n_flag & NMODIFIED) == 0) {
 2223                 if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) {
 2224                         mtx_lock(&np->n_mtx);
 2225                         if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) ||
 2226                             !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
 2227                                 mtx_unlock(&np->n_mtx);
 2228                                 NFSINCRGLOBAL(nfsstatsv1.direofcache_hits);
 2229                                 if (ap->a_eofflag != NULL)
 2230                                         *ap->a_eofflag = 1;
 2231                                 return (0);
 2232                         } else
 2233                                 mtx_unlock(&np->n_mtx);
 2234                 }
 2235         }
 2236 
 2237         /*
 2238          * NFS always guarantees that directory entries don't straddle
 2239          * DIRBLKSIZ boundaries.  As such, we need to limit the size
 2240          * to an exact multiple of DIRBLKSIZ, to avoid copying a partial
 2241          * directory entry.
 2242          */
 2243         left = uio->uio_resid % DIRBLKSIZ;
 2244         if (left == uio->uio_resid)
 2245                 return (EINVAL);
 2246         uio->uio_resid -= left;
 2247 
 2248         /*
 2249          * Call ncl_bioread() to do the real work.
 2250          */
 2251         tresid = uio->uio_resid;
 2252         error = ncl_bioread(vp, uio, 0, ap->a_cred);
 2253 
 2254         if (!error && uio->uio_resid == tresid) {
 2255                 NFSINCRGLOBAL(nfsstatsv1.direofcache_misses);
 2256                 if (ap->a_eofflag != NULL)
 2257                         *ap->a_eofflag = 1;
 2258         }
 2259         
 2260         /* Add the partial DIRBLKSIZ (left) back in. */
 2261         uio->uio_resid += left;
 2262         return (error);
 2263 }
 2264 
 2265 /*
 2266  * Readdir rpc call.
 2267  * Called from below the buffer cache by ncl_doio().
 2268  */
 2269 int
 2270 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
 2271     struct thread *td)
 2272 {
 2273         struct nfsvattr nfsva;
 2274         nfsuint64 *cookiep, cookie;
 2275         struct nfsnode *dnp = VTONFS(vp);
 2276         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2277         int error = 0, eof, attrflag;
 2278 
 2279         KASSERT(uiop->uio_iovcnt == 1 &&
 2280             (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
 2281             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
 2282             ("nfs readdirrpc bad uio"));
 2283 
 2284         /*
 2285          * If there is no cookie, assume directory was stale.
 2286          */
 2287         ncl_dircookie_lock(dnp);
 2288         cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
 2289         if (cookiep) {
 2290                 cookie = *cookiep;
 2291                 ncl_dircookie_unlock(dnp);
 2292         } else {
 2293                 ncl_dircookie_unlock(dnp);              
 2294                 return (NFSERR_BAD_COOKIE);
 2295         }
 2296 
 2297         if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
 2298                 (void)ncl_fsinfo(nmp, vp, cred, td);
 2299 
 2300         error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva,
 2301             &attrflag, &eof, NULL);
 2302         if (attrflag)
 2303                 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 2304 
 2305         if (!error) {
 2306                 /*
 2307                  * We are now either at the end of the directory or have filled
 2308                  * the block.
 2309                  */
 2310                 if (eof)
 2311                         dnp->n_direofoffset = uiop->uio_offset;
 2312                 else {
 2313                         if (uiop->uio_resid > 0)
 2314                                 printf("EEK! readdirrpc resid > 0\n");
 2315                         ncl_dircookie_lock(dnp);
 2316                         cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
 2317                         *cookiep = cookie;
 2318                         ncl_dircookie_unlock(dnp);
 2319                 }
 2320         } else if (NFS_ISV4(vp)) {
 2321                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 2322         }
 2323         return (error);
 2324 }
 2325 
 2326 /*
 2327  * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc().
 2328  */
 2329 int
 2330 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
 2331     struct thread *td)
 2332 {
 2333         struct nfsvattr nfsva;
 2334         nfsuint64 *cookiep, cookie;
 2335         struct nfsnode *dnp = VTONFS(vp);
 2336         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2337         int error = 0, attrflag, eof;
 2338 
 2339         KASSERT(uiop->uio_iovcnt == 1 &&
 2340             (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
 2341             (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
 2342             ("nfs readdirplusrpc bad uio"));
 2343 
 2344         /*
 2345          * If there is no cookie, assume directory was stale.
 2346          */
 2347         ncl_dircookie_lock(dnp);
 2348         cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
 2349         if (cookiep) {
 2350                 cookie = *cookiep;
 2351                 ncl_dircookie_unlock(dnp);
 2352         } else {
 2353                 ncl_dircookie_unlock(dnp);
 2354                 return (NFSERR_BAD_COOKIE);
 2355         }
 2356 
 2357         if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
 2358                 (void)ncl_fsinfo(nmp, vp, cred, td);
 2359         error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva,
 2360             &attrflag, &eof, NULL);
 2361         if (attrflag)
 2362                 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 2363 
 2364         if (!error) {
 2365                 /*
 2366                  * We are now either at end of the directory or have filled the
 2367                  * the block.
 2368                  */
 2369                 if (eof)
 2370                         dnp->n_direofoffset = uiop->uio_offset;
 2371                 else {
 2372                         if (uiop->uio_resid > 0)
 2373                                 printf("EEK! readdirplusrpc resid > 0\n");
 2374                         ncl_dircookie_lock(dnp);
 2375                         cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
 2376                         *cookiep = cookie;
 2377                         ncl_dircookie_unlock(dnp);
 2378                 }
 2379         } else if (NFS_ISV4(vp)) {
 2380                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 2381         }
 2382         return (error);
 2383 }
 2384 
 2385 /*
 2386  * Silly rename. To make the NFS filesystem that is stateless look a little
 2387  * more like the "ufs" a remove of an active vnode is translated to a rename
 2388  * to a funny looking filename that is removed by nfs_inactive on the
 2389  * nfsnode. There is the potential for another process on a different client
 2390  * to create the same funny name between the nfs_lookitup() fails and the
 2391  * nfs_rename() completes, but...
 2392  */
 2393 static int
 2394 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 2395 {
 2396         struct sillyrename *sp;
 2397         struct nfsnode *np;
 2398         int error;
 2399         short pid;
 2400         unsigned int lticks;
 2401 
 2402         cache_purge(dvp);
 2403         np = VTONFS(vp);
 2404         KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir"));
 2405         MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
 2406             M_NEWNFSREQ, M_WAITOK);
 2407         sp->s_cred = crhold(cnp->cn_cred);
 2408         sp->s_dvp = dvp;
 2409         VREF(dvp);
 2410 
 2411         /* 
 2412          * Fudge together a funny name.
 2413          * Changing the format of the funny name to accommodate more 
 2414          * sillynames per directory.
 2415          * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 
 2416          * CPU ticks since boot.
 2417          */
 2418         pid = cnp->cn_thread->td_proc->p_pid;
 2419         lticks = (unsigned int)ticks;
 2420         for ( ; ; ) {
 2421                 sp->s_namlen = sprintf(sp->s_name, 
 2422                                        ".nfs.%08x.%04x4.4", lticks, 
 2423                                        pid);
 2424                 if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 2425                                  cnp->cn_thread, NULL))
 2426                         break;
 2427                 lticks++;
 2428         }
 2429         error = nfs_renameit(dvp, vp, cnp, sp);
 2430         if (error)
 2431                 goto bad;
 2432         error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 2433                 cnp->cn_thread, &np);
 2434         np->n_sillyrename = sp;
 2435         return (0);
 2436 bad:
 2437         vrele(sp->s_dvp);
 2438         crfree(sp->s_cred);
 2439         free((caddr_t)sp, M_NEWNFSREQ);
 2440         return (error);
 2441 }
 2442 
 2443 /*
 2444  * Look up a file name and optionally either update the file handle or
 2445  * allocate an nfsnode, depending on the value of npp.
 2446  * npp == NULL  --> just do the lookup
 2447  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
 2448  *                      handled too
 2449  * *npp != NULL --> update the file handle in the vnode
 2450  */
 2451 static int
 2452 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred,
 2453     struct thread *td, struct nfsnode **npp)
 2454 {
 2455         struct vnode *newvp = NULL, *vp;
 2456         struct nfsnode *np, *dnp = VTONFS(dvp);
 2457         struct nfsfh *nfhp, *onfhp;
 2458         struct nfsvattr nfsva, dnfsva;
 2459         struct componentname cn;
 2460         int error = 0, attrflag, dattrflag;
 2461         u_int hash;
 2462 
 2463         error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva,
 2464             &nfhp, &attrflag, &dattrflag, NULL);
 2465         if (dattrflag)
 2466                 (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 2467         if (npp && !error) {
 2468                 if (*npp != NULL) {
 2469                     np = *npp;
 2470                     vp = NFSTOV(np);
 2471                     /*
 2472                      * For NFSv4, check to see if it is the same name and
 2473                      * replace the name, if it is different.
 2474                      */
 2475                     if (np->n_v4 != NULL && nfsva.na_type == VREG &&
 2476                         (np->n_v4->n4_namelen != len ||
 2477                          NFSBCMP(name, NFS4NODENAME(np->n_v4), len) ||
 2478                          dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen ||
 2479                          NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 2480                          dnp->n_fhp->nfh_len))) {
 2481 #ifdef notdef
 2482 { char nnn[100]; int nnnl;
 2483 nnnl = (len < 100) ? len : 99;
 2484 bcopy(name, nnn, nnnl);
 2485 nnn[nnnl] = '\0';
 2486 printf("replace=%s\n",nnn);
 2487 }
 2488 #endif
 2489                             FREE((caddr_t)np->n_v4, M_NFSV4NODE);
 2490                             MALLOC(np->n_v4, struct nfsv4node *,
 2491                                 sizeof (struct nfsv4node) +
 2492                                 dnp->n_fhp->nfh_len + len - 1,
 2493                                 M_NFSV4NODE, M_WAITOK);
 2494                             np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len;
 2495                             np->n_v4->n4_namelen = len;
 2496                             NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 2497                                 dnp->n_fhp->nfh_len);
 2498                             NFSBCOPY(name, NFS4NODENAME(np->n_v4), len);
 2499                     }
 2500                     hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len,
 2501                         FNV1_32_INIT);
 2502                     onfhp = np->n_fhp;
 2503                     /*
 2504                      * Rehash node for new file handle.
 2505                      */
 2506                     vfs_hash_rehash(vp, hash);
 2507                     np->n_fhp = nfhp;
 2508                     if (onfhp != NULL)
 2509                         FREE((caddr_t)onfhp, M_NFSFH);
 2510                     newvp = NFSTOV(np);
 2511                 } else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) {
 2512                     FREE((caddr_t)nfhp, M_NFSFH);
 2513                     VREF(dvp);
 2514                     newvp = dvp;
 2515                 } else {
 2516                     cn.cn_nameptr = name;
 2517                     cn.cn_namelen = len;
 2518                     error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td,
 2519                         &np, NULL, LK_EXCLUSIVE);
 2520                     if (error)
 2521                         return (error);
 2522                     newvp = NFSTOV(np);
 2523                 }
 2524                 if (!attrflag && *npp == NULL) {
 2525                         if (newvp == dvp)
 2526                                 vrele(newvp);
 2527                         else
 2528                                 vput(newvp);
 2529                         return (ENOENT);
 2530                 }
 2531                 if (attrflag)
 2532                         (void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 2533                             0, 1);
 2534         }
 2535         if (npp && *npp == NULL) {
 2536                 if (error) {
 2537                         if (newvp) {
 2538                                 if (newvp == dvp)
 2539                                         vrele(newvp);
 2540                                 else
 2541                                         vput(newvp);
 2542                         }
 2543                 } else
 2544                         *npp = np;
 2545         }
 2546         if (error && NFS_ISV4(dvp))
 2547                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 2548         return (error);
 2549 }
 2550 
 2551 /*
 2552  * Nfs Version 3 and 4 commit rpc
 2553  */
 2554 int
 2555 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
 2556    struct thread *td)
 2557 {
 2558         struct nfsvattr nfsva;
 2559         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2560         int error, attrflag;
 2561 
 2562         mtx_lock(&nmp->nm_mtx);
 2563         if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
 2564                 mtx_unlock(&nmp->nm_mtx);
 2565                 return (0);
 2566         }
 2567         mtx_unlock(&nmp->nm_mtx);
 2568         error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva,
 2569             &attrflag, NULL);
 2570         if (attrflag != 0)
 2571                 (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL,
 2572                     0, 1);
 2573         if (error != 0 && NFS_ISV4(vp))
 2574                 error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 2575         return (error);
 2576 }
 2577 
 2578 /*
 2579  * Strategy routine.
 2580  * For async requests when nfsiod(s) are running, queue the request by
 2581  * calling ncl_asyncio(), otherwise just all ncl_doio() to do the
 2582  * request.
 2583  */
 2584 static int
 2585 nfs_strategy(struct vop_strategy_args *ap)
 2586 {
 2587         struct buf *bp;
 2588         struct vnode *vp;
 2589         struct ucred *cr;
 2590 
 2591         bp = ap->a_bp;
 2592         vp = ap->a_vp;
 2593         KASSERT(bp->b_vp == vp, ("missing b_getvp"));
 2594         KASSERT(!(bp->b_flags & B_DONE),
 2595             ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
 2596         BUF_ASSERT_HELD(bp);
 2597 
 2598         if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno)
 2599                 bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize /
 2600                     DEV_BSIZE);
 2601         if (bp->b_iocmd == BIO_READ)
 2602                 cr = bp->b_rcred;
 2603         else
 2604                 cr = bp->b_wcred;
 2605 
 2606         /*
 2607          * If the op is asynchronous and an i/o daemon is waiting
 2608          * queue the request, wake it up and wait for completion
 2609          * otherwise just do it ourselves.
 2610          */
 2611         if ((bp->b_flags & B_ASYNC) == 0 ||
 2612             ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread))
 2613                 (void) ncl_doio(vp, bp, cr, curthread, 1);
 2614         return (0);
 2615 }
 2616 
 2617 /*
 2618  * fsync vnode op. Just call ncl_flush() with commit == 1.
 2619  */
 2620 /* ARGSUSED */
 2621 static int
 2622 nfs_fsync(struct vop_fsync_args *ap)
 2623 {
 2624 
 2625         if (ap->a_vp->v_type != VREG) {
 2626                 /*
 2627                  * For NFS, metadata is changed synchronously on the server,
 2628                  * so there is nothing to flush. Also, ncl_flush() clears
 2629                  * the NMODIFIED flag and that shouldn't be done here for
 2630                  * directories.
 2631                  */
 2632                 return (0);
 2633         }
 2634         return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0));
 2635 }
 2636 
 2637 /*
 2638  * Flush all the blocks associated with a vnode.
 2639  *      Walk through the buffer pool and push any dirty pages
 2640  *      associated with the vnode.
 2641  * If the called_from_renewthread argument is TRUE, it has been called
 2642  * from the NFSv4 renew thread and, as such, cannot block indefinitely
 2643  * waiting for a buffer write to complete.
 2644  */
 2645 int
 2646 ncl_flush(struct vnode *vp, int waitfor, struct thread *td,
 2647     int commit, int called_from_renewthread)
 2648 {
 2649         struct nfsnode *np = VTONFS(vp);
 2650         struct buf *bp;
 2651         int i;
 2652         struct buf *nbp;
 2653         struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 2654         int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 2655         int passone = 1, trycnt = 0;
 2656         u_quad_t off, endoff, toff;
 2657         struct ucred* wcred = NULL;
 2658         struct buf **bvec = NULL;
 2659         struct bufobj *bo;
 2660 #ifndef NFS_COMMITBVECSIZ
 2661 #define NFS_COMMITBVECSIZ       20
 2662 #endif
 2663         struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 2664         int bvecsize = 0, bveccount;
 2665 
 2666         if (called_from_renewthread != 0)
 2667                 slptimeo = hz;
 2668         if (nmp->nm_flag & NFSMNT_INT)
 2669                 slpflag = PCATCH;
 2670         if (!commit)
 2671                 passone = 0;
 2672         bo = &vp->v_bufobj;
 2673         /*
 2674          * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 2675          * server, but has not been committed to stable storage on the server
 2676          * yet. On the first pass, the byte range is worked out and the commit
 2677          * rpc is done. On the second pass, ncl_writebp() is called to do the
 2678          * job.
 2679          */
 2680 again:
 2681         off = (u_quad_t)-1;
 2682         endoff = 0;
 2683         bvecpos = 0;
 2684         if (NFS_ISV34(vp) && commit) {
 2685                 if (bvec != NULL && bvec != bvec_on_stack)
 2686                         free(bvec, M_TEMP);
 2687                 /*
 2688                  * Count up how many buffers waiting for a commit.
 2689                  */
 2690                 bveccount = 0;
 2691                 BO_LOCK(bo);
 2692                 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 2693                         if (!BUF_ISLOCKED(bp) &&
 2694                             (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 2695                                 == (B_DELWRI | B_NEEDCOMMIT))
 2696                                 bveccount++;
 2697                 }
 2698                 /*
 2699                  * Allocate space to remember the list of bufs to commit.  It is
 2700                  * important to use M_NOWAIT here to avoid a race with nfs_write.
 2701                  * If we can't get memory (for whatever reason), we will end up
 2702                  * committing the buffers one-by-one in the loop below.
 2703                  */
 2704                 if (bveccount > NFS_COMMITBVECSIZ) {
 2705                         /*
 2706                          * Release the vnode interlock to avoid a lock
 2707                          * order reversal.
 2708                          */
 2709                         BO_UNLOCK(bo);
 2710                         bvec = (struct buf **)
 2711                                 malloc(bveccount * sizeof(struct buf *),
 2712                                        M_TEMP, M_NOWAIT);
 2713                         BO_LOCK(bo);
 2714                         if (bvec == NULL) {
 2715                                 bvec = bvec_on_stack;
 2716                                 bvecsize = NFS_COMMITBVECSIZ;
 2717                         } else
 2718                                 bvecsize = bveccount;
 2719                 } else {
 2720                         bvec = bvec_on_stack;
 2721                         bvecsize = NFS_COMMITBVECSIZ;
 2722                 }
 2723                 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 2724                         if (bvecpos >= bvecsize)
 2725                                 break;
 2726                         if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 2727                                 nbp = TAILQ_NEXT(bp, b_bobufs);
 2728                                 continue;
 2729                         }
 2730                         if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
 2731                             (B_DELWRI | B_NEEDCOMMIT)) {
 2732                                 BUF_UNLOCK(bp);
 2733                                 nbp = TAILQ_NEXT(bp, b_bobufs);
 2734                                 continue;
 2735                         }
 2736                         BO_UNLOCK(bo);
 2737                         bremfree(bp);
 2738                         /*
 2739                          * Work out if all buffers are using the same cred
 2740                          * so we can deal with them all with one commit.
 2741                          *
 2742                          * NOTE: we are not clearing B_DONE here, so we have
 2743                          * to do it later on in this routine if we intend to
 2744                          * initiate I/O on the bp.
 2745                          *
 2746                          * Note: to avoid loopback deadlocks, we do not
 2747                          * assign b_runningbufspace.
 2748                          */
 2749                         if (wcred == NULL)
 2750                                 wcred = bp->b_wcred;
 2751                         else if (wcred != bp->b_wcred)
 2752                                 wcred = NOCRED;
 2753                         vfs_busy_pages(bp, 1);
 2754 
 2755                         BO_LOCK(bo);
 2756                         /*
 2757                          * bp is protected by being locked, but nbp is not
 2758                          * and vfs_busy_pages() may sleep.  We have to
 2759                          * recalculate nbp.
 2760                          */
 2761                         nbp = TAILQ_NEXT(bp, b_bobufs);
 2762 
 2763                         /*
 2764                          * A list of these buffers is kept so that the
 2765                          * second loop knows which buffers have actually
 2766                          * been committed. This is necessary, since there
 2767                          * may be a race between the commit rpc and new
 2768                          * uncommitted writes on the file.
 2769                          */
 2770                         bvec[bvecpos++] = bp;
 2771                         toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 2772                                 bp->b_dirtyoff;
 2773                         if (toff < off)
 2774                                 off = toff;
 2775                         toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 2776                         if (toff > endoff)
 2777                                 endoff = toff;
 2778                 }
 2779                 BO_UNLOCK(bo);
 2780         }
 2781         if (bvecpos > 0) {
 2782                 /*
 2783                  * Commit data on the server, as required.
 2784                  * If all bufs are using the same wcred, then use that with
 2785                  * one call for all of them, otherwise commit each one
 2786                  * separately.
 2787                  */
 2788                 if (wcred != NOCRED)
 2789                         retv = ncl_commit(vp, off, (int)(endoff - off),
 2790                                           wcred, td);
 2791                 else {
 2792                         retv = 0;
 2793                         for (i = 0; i < bvecpos; i++) {
 2794                                 off_t off, size;
 2795                                 bp = bvec[i];
 2796                                 off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 2797                                         bp->b_dirtyoff;
 2798                                 size = (u_quad_t)(bp->b_dirtyend
 2799                                                   - bp->b_dirtyoff);
 2800                                 retv = ncl_commit(vp, off, (int)size,
 2801                                                   bp->b_wcred, td);
 2802                                 if (retv) break;
 2803                         }
 2804                 }
 2805 
 2806                 if (retv == NFSERR_STALEWRITEVERF)
 2807                         ncl_clearcommit(vp->v_mount);
 2808 
 2809                 /*
 2810                  * Now, either mark the blocks I/O done or mark the
 2811                  * blocks dirty, depending on whether the commit
 2812                  * succeeded.
 2813                  */
 2814                 for (i = 0; i < bvecpos; i++) {
 2815                         bp = bvec[i];
 2816                         bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 2817                         if (retv) {
 2818                                 /*
 2819                                  * Error, leave B_DELWRI intact
 2820                                  */
 2821                                 vfs_unbusy_pages(bp);
 2822                                 brelse(bp);
 2823                         } else {
 2824                                 /*
 2825                                  * Success, remove B_DELWRI ( bundirty() ).
 2826                                  *
 2827                                  * b_dirtyoff/b_dirtyend seem to be NFS
 2828                                  * specific.  We should probably move that
 2829                                  * into bundirty(). XXX
 2830                                  */
 2831                                 bufobj_wref(bo);
 2832                                 bp->b_flags |= B_ASYNC;
 2833                                 bundirty(bp);
 2834                                 bp->b_flags &= ~B_DONE;
 2835                                 bp->b_ioflags &= ~BIO_ERROR;
 2836                                 bp->b_dirtyoff = bp->b_dirtyend = 0;
 2837                                 bufdone(bp);
 2838                         }
 2839                 }
 2840         }
 2841 
 2842         /*
 2843          * Start/do any write(s) that are required.
 2844          */
 2845 loop:
 2846         BO_LOCK(bo);
 2847         TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 2848                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 2849                         if (waitfor != MNT_WAIT || passone)
 2850                                 continue;
 2851 
 2852                         error = BUF_TIMELOCK(bp,
 2853                             LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 2854                             BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo);
 2855                         if (error == 0) {
 2856                                 BUF_UNLOCK(bp);
 2857                                 goto loop;
 2858                         }
 2859                         if (error == ENOLCK) {
 2860                                 error = 0;
 2861                                 goto loop;
 2862                         }
 2863                         if (called_from_renewthread != 0) {
 2864                                 /*
 2865                                  * Return EIO so the flush will be retried
 2866                                  * later.
 2867                                  */
 2868                                 error = EIO;
 2869                                 goto done;
 2870                         }
 2871                         if (newnfs_sigintr(nmp, td)) {
 2872                                 error = EINTR;
 2873                                 goto done;
 2874                         }
 2875                         if (slpflag == PCATCH) {
 2876                                 slpflag = 0;
 2877                                 slptimeo = 2 * hz;
 2878                         }
 2879                         goto loop;
 2880                 }
 2881                 if ((bp->b_flags & B_DELWRI) == 0)
 2882                         panic("nfs_fsync: not dirty");
 2883                 if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
 2884                         BUF_UNLOCK(bp);
 2885                         continue;
 2886                 }
 2887                 BO_UNLOCK(bo);
 2888                 bremfree(bp);
 2889                 if (passone || !commit)
 2890                     bp->b_flags |= B_ASYNC;
 2891                 else
 2892                     bp->b_flags |= B_ASYNC;
 2893                 bwrite(bp);
 2894                 if (newnfs_sigintr(nmp, td)) {
 2895                         error = EINTR;
 2896                         goto done;
 2897                 }
 2898                 goto loop;
 2899         }
 2900         if (passone) {
 2901                 passone = 0;
 2902                 BO_UNLOCK(bo);
 2903                 goto again;
 2904         }
 2905         if (waitfor == MNT_WAIT) {
 2906                 while (bo->bo_numoutput) {
 2907                         error = bufobj_wwait(bo, slpflag, slptimeo);
 2908                         if (error) {
 2909                             BO_UNLOCK(bo);
 2910                             if (called_from_renewthread != 0) {
 2911                                 /*
 2912                                  * Return EIO so that the flush will be
 2913                                  * retried later.
 2914                                  */
 2915                                 error = EIO;
 2916                                 goto done;
 2917                             }
 2918                             error = newnfs_sigintr(nmp, td);
 2919                             if (error)
 2920                                 goto done;
 2921                             if (slpflag == PCATCH) {
 2922                                 slpflag = 0;
 2923                                 slptimeo = 2 * hz;
 2924                             }
 2925                             BO_LOCK(bo);
 2926                         }
 2927                 }
 2928                 if (bo->bo_dirty.bv_cnt != 0 && commit) {
 2929                         BO_UNLOCK(bo);
 2930                         goto loop;
 2931                 }
 2932                 /*
 2933                  * Wait for all the async IO requests to drain
 2934                  */
 2935                 BO_UNLOCK(bo);
 2936                 mtx_lock(&np->n_mtx);
 2937                 while (np->n_directio_asyncwr > 0) {
 2938                         np->n_flag |= NFSYNCWAIT;
 2939                         error = newnfs_msleep(td, &np->n_directio_asyncwr,
 2940                             &np->n_mtx, slpflag | (PRIBIO + 1), 
 2941                             "nfsfsync", 0);
 2942                         if (error) {
 2943                                 if (newnfs_sigintr(nmp, td)) {
 2944                                         mtx_unlock(&np->n_mtx);
 2945                                         error = EINTR;  
 2946                                         goto done;
 2947                                 }
 2948                         }
 2949                 }
 2950                 mtx_unlock(&np->n_mtx);
 2951         } else
 2952                 BO_UNLOCK(bo);
 2953         if (NFSHASPNFS(nmp)) {
 2954                 nfscl_layoutcommit(vp, td);
 2955                 /*
 2956                  * Invalidate the attribute cache, since writes to a DS
 2957                  * won't update the size attribute.
 2958                  */
 2959                 mtx_lock(&np->n_mtx);
 2960                 np->n_attrstamp = 0;
 2961         } else
 2962                 mtx_lock(&np->n_mtx);
 2963         if (np->n_flag & NWRITEERR) {
 2964                 error = np->n_error;
 2965                 np->n_flag &= ~NWRITEERR;
 2966         }
 2967         if (commit && bo->bo_dirty.bv_cnt == 0 &&
 2968             bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
 2969                 np->n_flag &= ~NMODIFIED;
 2970         mtx_unlock(&np->n_mtx);
 2971 done:
 2972         if (bvec != NULL && bvec != bvec_on_stack)
 2973                 free(bvec, M_TEMP);
 2974         if (error == 0 && commit != 0 && waitfor == MNT_WAIT &&
 2975             (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 ||
 2976             np->n_directio_asyncwr != 0)) {
 2977                 if (trycnt++ < 5) {
 2978                         /* try, try again... */
 2979                         passone = 1;
 2980                         wcred = NULL;
 2981                         bvec = NULL;
 2982                         bvecsize = 0;
 2983                         goto again;
 2984                 }
 2985                 vn_printf(vp, "ncl_flush failed");
 2986                 error = called_from_renewthread != 0 ? EIO : EBUSY;
 2987         }
 2988         return (error);
 2989 }
 2990 
 2991 /*
 2992  * NFS advisory byte-level locks.
 2993  */
 2994 static int
 2995 nfs_advlock(struct vop_advlock_args *ap)
 2996 {
 2997         struct vnode *vp = ap->a_vp;
 2998         struct ucred *cred;
 2999         struct nfsnode *np = VTONFS(ap->a_vp);
 3000         struct proc *p = (struct proc *)ap->a_id;
 3001         struct thread *td = curthread;  /* XXX */
 3002         struct vattr va;
 3003         int ret, error = EOPNOTSUPP;
 3004         u_quad_t size;
 3005         
 3006         if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) {
 3007                 if (vp->v_type != VREG)
 3008                         return (EINVAL);
 3009                 if ((ap->a_flags & F_POSIX) != 0)
 3010                         cred = p->p_ucred;
 3011                 else
 3012                         cred = td->td_ucred;
 3013                 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 3014                 if (vp->v_iflag & VI_DOOMED) {
 3015                         NFSVOPUNLOCK(vp, 0);
 3016                         return (EBADF);
 3017                 }
 3018 
 3019                 /*
 3020                  * If this is unlocking a write locked region, flush and
 3021                  * commit them before unlocking. This is required by
 3022                  * RFC3530 Sec. 9.3.2.
 3023                  */
 3024                 if (ap->a_op == F_UNLCK &&
 3025                     nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id,
 3026                     ap->a_flags))
 3027                         (void) ncl_flush(vp, MNT_WAIT, td, 1, 0);
 3028 
 3029                 /*
 3030                  * Loop around doing the lock op, while a blocking lock
 3031                  * must wait for the lock op to succeed.
 3032                  */
 3033                 do {
 3034                         ret = nfsrpc_advlock(vp, np->n_size, ap->a_op,
 3035                             ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags);
 3036                         if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
 3037                             ap->a_op == F_SETLK) {
 3038                                 NFSVOPUNLOCK(vp, 0);
 3039                                 error = nfs_catnap(PZERO | PCATCH, ret,
 3040                                     "ncladvl");
 3041                                 if (error)
 3042                                         return (EINTR);
 3043                                 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 3044                                 if (vp->v_iflag & VI_DOOMED) {
 3045                                         NFSVOPUNLOCK(vp, 0);
 3046                                         return (EBADF);
 3047                                 }
 3048                         }
 3049                 } while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
 3050                      ap->a_op == F_SETLK);
 3051                 if (ret == NFSERR_DENIED) {
 3052                         NFSVOPUNLOCK(vp, 0);
 3053                         return (EAGAIN);
 3054                 } else if (ret == EINVAL || ret == EBADF || ret == EINTR) {
 3055                         NFSVOPUNLOCK(vp, 0);
 3056                         return (ret);
 3057                 } else if (ret != 0) {
 3058                         NFSVOPUNLOCK(vp, 0);
 3059                         return (EACCES);
 3060                 }
 3061 
 3062                 /*
 3063                  * Now, if we just got a lock, invalidate data in the buffer
 3064                  * cache, as required, so that the coherency conforms with
 3065                  * RFC3530 Sec. 9.3.2.
 3066                  */
 3067                 if (ap->a_op == F_SETLK) {
 3068                         if ((np->n_flag & NMODIFIED) == 0) {
 3069                                 np->n_attrstamp = 0;
 3070                                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 3071                                 ret = VOP_GETATTR(vp, &va, cred);
 3072                         }
 3073                         if ((np->n_flag & NMODIFIED) || ret ||
 3074                             np->n_change != va.va_filerev) {
 3075                                 (void) ncl_vinvalbuf(vp, V_SAVE, td, 1);
 3076                                 if ((vp->v_iflag & VI_DOOMED) != 0) {
 3077                                         NFSVOPUNLOCK(vp, 0);
 3078                                         return (EBADF);
 3079                                 }
 3080                                 np->n_attrstamp = 0;
 3081                                 KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 3082                                 ret = VOP_GETATTR(vp, &va, cred);
 3083                                 if (!ret) {
 3084                                         np->n_mtime = va.va_mtime;
 3085                                         np->n_change = va.va_filerev;
 3086                                 }
 3087                         }
 3088                         /* Mark that a file lock has been acquired. */
 3089                         mtx_lock(&np->n_mtx);
 3090                         np->n_flag |= NHASBEENLOCKED;
 3091                         mtx_unlock(&np->n_mtx);
 3092                 }
 3093                 NFSVOPUNLOCK(vp, 0);
 3094                 return (0);
 3095         } else if (!NFS_ISV4(vp)) {
 3096                 error = NFSVOPLOCK(vp, LK_SHARED);
 3097                 if (error)
 3098                         return (error);
 3099                 if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 3100                         size = VTONFS(vp)->n_size;
 3101                         NFSVOPUNLOCK(vp, 0);
 3102                         error = lf_advlock(ap, &(vp->v_lockf), size);
 3103                 } else {
 3104                         if (nfs_advlock_p != NULL)
 3105                                 error = nfs_advlock_p(ap);
 3106                         else {
 3107                                 NFSVOPUNLOCK(vp, 0);
 3108                                 error = ENOLCK;
 3109                         }
 3110                 }
 3111                 if (error == 0 && ap->a_op == F_SETLK) {
 3112                         error = NFSVOPLOCK(vp, LK_SHARED);
 3113                         if (error == 0) {
 3114                                 /* Mark that a file lock has been acquired. */
 3115                                 mtx_lock(&np->n_mtx);
 3116                                 np->n_flag |= NHASBEENLOCKED;
 3117                                 mtx_unlock(&np->n_mtx);
 3118                                 NFSVOPUNLOCK(vp, 0);
 3119                         }
 3120                 }
 3121         }
 3122         return (error);
 3123 }
 3124 
 3125 /*
 3126  * NFS advisory byte-level locks.
 3127  */
 3128 static int
 3129 nfs_advlockasync(struct vop_advlockasync_args *ap)
 3130 {
 3131         struct vnode *vp = ap->a_vp;
 3132         u_quad_t size;
 3133         int error;
 3134         
 3135         if (NFS_ISV4(vp))
 3136                 return (EOPNOTSUPP);
 3137         error = NFSVOPLOCK(vp, LK_SHARED);
 3138         if (error)
 3139                 return (error);
 3140         if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 3141                 size = VTONFS(vp)->n_size;
 3142                 NFSVOPUNLOCK(vp, 0);
 3143                 error = lf_advlockasync(ap, &(vp->v_lockf), size);
 3144         } else {
 3145                 NFSVOPUNLOCK(vp, 0);
 3146                 error = EOPNOTSUPP;
 3147         }
 3148         return (error);
 3149 }
 3150 
 3151 /*
 3152  * Print out the contents of an nfsnode.
 3153  */
 3154 static int
 3155 nfs_print(struct vop_print_args *ap)
 3156 {
 3157         struct vnode *vp = ap->a_vp;
 3158         struct nfsnode *np = VTONFS(vp);
 3159 
 3160         printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid,
 3161             (uintmax_t)np->n_vattr.na_fsid);
 3162         if (vp->v_type == VFIFO)
 3163                 fifo_printinfo(vp);
 3164         printf("\n");
 3165         return (0);
 3166 }
 3167 
 3168 /*
 3169  * This is the "real" nfs::bwrite(struct buf*).
 3170  * We set B_CACHE if this is a VMIO buffer.
 3171  */
 3172 int
 3173 ncl_writebp(struct buf *bp, int force __unused, struct thread *td)
 3174 {
 3175         int oldflags, rtval;
 3176 
 3177         BUF_ASSERT_HELD(bp);
 3178 
 3179         if (bp->b_flags & B_INVAL) {
 3180                 brelse(bp);
 3181                 return (0);
 3182         }
 3183 
 3184         oldflags = bp->b_flags;
 3185         bp->b_flags |= B_CACHE;
 3186 
 3187         /*
 3188          * Undirty the bp.  We will redirty it later if the I/O fails.
 3189          */
 3190         bundirty(bp);
 3191         bp->b_flags &= ~B_DONE;
 3192         bp->b_ioflags &= ~BIO_ERROR;
 3193         bp->b_iocmd = BIO_WRITE;
 3194 
 3195         bufobj_wref(bp->b_bufobj);
 3196         curthread->td_ru.ru_oublock++;
 3197 
 3198         /*
 3199          * Note: to avoid loopback deadlocks, we do not
 3200          * assign b_runningbufspace.
 3201          */
 3202         vfs_busy_pages(bp, 1);
 3203 
 3204         BUF_KERNPROC(bp);
 3205         bp->b_iooffset = dbtob(bp->b_blkno);
 3206         bstrategy(bp);
 3207 
 3208         if ((oldflags & B_ASYNC) != 0)
 3209                 return (0);
 3210 
 3211         rtval = bufwait(bp);
 3212         if (oldflags & B_DELWRI)
 3213                 reassignbuf(bp);
 3214         brelse(bp);
 3215         return (rtval);
 3216 }
 3217 
 3218 /*
 3219  * nfs special file access vnode op.
 3220  * Essentially just get vattr and then imitate iaccess() since the device is
 3221  * local to the client.
 3222  */
 3223 static int
 3224 nfsspec_access(struct vop_access_args *ap)
 3225 {
 3226         struct vattr *vap;
 3227         struct ucred *cred = ap->a_cred;
 3228         struct vnode *vp = ap->a_vp;
 3229         accmode_t accmode = ap->a_accmode;
 3230         struct vattr vattr;
 3231         int error;
 3232 
 3233         /*
 3234          * Disallow write attempts on filesystems mounted read-only;
 3235          * unless the file is a socket, fifo, or a block or character
 3236          * device resident on the filesystem.
 3237          */
 3238         if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 3239                 switch (vp->v_type) {
 3240                 case VREG:
 3241                 case VDIR:
 3242                 case VLNK:
 3243                         return (EROFS);
 3244                 default:
 3245                         break;
 3246                 }
 3247         }
 3248         vap = &vattr;
 3249         error = VOP_GETATTR(vp, vap, cred);
 3250         if (error)
 3251                 goto out;
 3252         error  = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
 3253             accmode, cred, NULL);
 3254 out:
 3255         return error;
 3256 }
 3257 
 3258 /*
 3259  * Read wrapper for fifos.
 3260  */
 3261 static int
 3262 nfsfifo_read(struct vop_read_args *ap)
 3263 {
 3264         struct nfsnode *np = VTONFS(ap->a_vp);
 3265         int error;
 3266 
 3267         /*
 3268          * Set access flag.
 3269          */
 3270         mtx_lock(&np->n_mtx);
 3271         np->n_flag |= NACC;
 3272         vfs_timestamp(&np->n_atim);
 3273         mtx_unlock(&np->n_mtx);
 3274         error = fifo_specops.vop_read(ap);
 3275         return error;   
 3276 }
 3277 
 3278 /*
 3279  * Write wrapper for fifos.
 3280  */
 3281 static int
 3282 nfsfifo_write(struct vop_write_args *ap)
 3283 {
 3284         struct nfsnode *np = VTONFS(ap->a_vp);
 3285 
 3286         /*
 3287          * Set update flag.
 3288          */
 3289         mtx_lock(&np->n_mtx);
 3290         np->n_flag |= NUPD;
 3291         vfs_timestamp(&np->n_mtim);
 3292         mtx_unlock(&np->n_mtx);
 3293         return(fifo_specops.vop_write(ap));
 3294 }
 3295 
 3296 /*
 3297  * Close wrapper for fifos.
 3298  *
 3299  * Update the times on the nfsnode then do fifo close.
 3300  */
 3301 static int
 3302 nfsfifo_close(struct vop_close_args *ap)
 3303 {
 3304         struct vnode *vp = ap->a_vp;
 3305         struct nfsnode *np = VTONFS(vp);
 3306         struct vattr vattr;
 3307         struct timespec ts;
 3308 
 3309         mtx_lock(&np->n_mtx);
 3310         if (np->n_flag & (NACC | NUPD)) {
 3311                 vfs_timestamp(&ts);
 3312                 if (np->n_flag & NACC)
 3313                         np->n_atim = ts;
 3314                 if (np->n_flag & NUPD)
 3315                         np->n_mtim = ts;
 3316                 np->n_flag |= NCHG;
 3317                 if (vrefcnt(vp) == 1 &&
 3318                     (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 3319                         VATTR_NULL(&vattr);
 3320                         if (np->n_flag & NACC)
 3321                                 vattr.va_atime = np->n_atim;
 3322                         if (np->n_flag & NUPD)
 3323                                 vattr.va_mtime = np->n_mtim;
 3324                         mtx_unlock(&np->n_mtx);
 3325                         (void)VOP_SETATTR(vp, &vattr, ap->a_cred);
 3326                         goto out;
 3327                 }
 3328         }
 3329         mtx_unlock(&np->n_mtx);
 3330 out:
 3331         return (fifo_specops.vop_close(ap));
 3332 }
 3333 
 3334 /*
 3335  * Just call ncl_writebp() with the force argument set to 1.
 3336  *
 3337  * NOTE: B_DONE may or may not be set in a_bp on call.
 3338  */
 3339 static int
 3340 nfs_bwrite(struct buf *bp)
 3341 {
 3342 
 3343         return (ncl_writebp(bp, 1, curthread));
 3344 }
 3345 
 3346 struct buf_ops buf_ops_newnfs = {
 3347         .bop_name       =       "buf_ops_nfs",
 3348         .bop_write      =       nfs_bwrite,
 3349         .bop_strategy   =       bufstrategy,
 3350         .bop_sync       =       bufsync,
 3351         .bop_bdflush    =       bufbdflush,
 3352 };
 3353 
 3354 static int
 3355 nfs_getacl(struct vop_getacl_args *ap)
 3356 {
 3357         int error;
 3358 
 3359         if (ap->a_type != ACL_TYPE_NFS4)
 3360                 return (EOPNOTSUPP);
 3361         error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp,
 3362             NULL);
 3363         if (error > NFSERR_STALE) {
 3364                 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
 3365                 error = EPERM;
 3366         }
 3367         return (error);
 3368 }
 3369 
 3370 static int
 3371 nfs_setacl(struct vop_setacl_args *ap)
 3372 {
 3373         int error;
 3374 
 3375         if (ap->a_type != ACL_TYPE_NFS4)
 3376                 return (EOPNOTSUPP);
 3377         error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp,
 3378             NULL);
 3379         if (error > NFSERR_STALE) {
 3380                 (void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
 3381                 error = EPERM;
 3382         }
 3383         return (error);
 3384 }
 3385 
 3386 static int
 3387 nfs_set_text(struct vop_set_text_args *ap)
 3388 {
 3389         struct vnode *vp = ap->a_vp;
 3390         struct nfsnode *np;
 3391 
 3392         /*
 3393          * If the text file has been mmap'd, flush any dirty pages to the
 3394          * buffer cache and then...
 3395          * Make sure all writes are pushed to the NFS server.  If this is not
 3396          * done, the modify time of the file can change while the text
 3397          * file is being executed.  This will cause the process that is
 3398          * executing the text file to be terminated.
 3399          */
 3400         if (vp->v_object != NULL) {
 3401                 VM_OBJECT_WLOCK(vp->v_object);
 3402                 vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
 3403                 VM_OBJECT_WUNLOCK(vp->v_object);
 3404         }
 3405 
 3406         /* Now, flush the buffer cache. */
 3407         ncl_flush(vp, MNT_WAIT, curthread, 0, 0);
 3408 
 3409         /* And, finally, make sure that n_mtime is up to date. */
 3410         np = VTONFS(vp);
 3411         mtx_lock(&np->n_mtx);
 3412         np->n_mtime = np->n_vattr.na_mtime;
 3413         mtx_unlock(&np->n_mtx);
 3414 
 3415         vp->v_vflag |= VV_TEXT;
 3416         return (0);
 3417 }
 3418 
 3419 /*
 3420  * Return POSIX pathconf information applicable to nfs filesystems.
 3421  */
 3422 static int
 3423 nfs_pathconf(struct vop_pathconf_args *ap)
 3424 {
 3425         struct nfsv3_pathconf pc;
 3426         struct nfsvattr nfsva;
 3427         struct vnode *vp = ap->a_vp;
 3428         struct thread *td = curthread;
 3429         int attrflag, error;
 3430 
 3431         if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX ||
 3432             ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED ||
 3433             ap->a_name == _PC_NO_TRUNC)) ||
 3434             (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) {
 3435                 /*
 3436                  * Since only the above 4 a_names are returned by the NFSv3
 3437                  * Pathconf RPC, there is no point in doing it for others.
 3438                  * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can
 3439                  * be used for _PC_NFS4_ACL as well.
 3440                  */
 3441                 error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva,
 3442                     &attrflag, NULL);
 3443                 if (attrflag != 0)
 3444                         (void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
 3445                             1);
 3446                 if (error != 0)
 3447                         return (error);
 3448         } else {
 3449                 /*
 3450                  * For NFSv2 (or NFSv3 when not one of the above 4 a_names),
 3451                  * just fake them.
 3452                  */
 3453                 pc.pc_linkmax = LINK_MAX;
 3454                 pc.pc_namemax = NFS_MAXNAMLEN;
 3455                 pc.pc_notrunc = 1;
 3456                 pc.pc_chownrestricted = 1;
 3457                 pc.pc_caseinsensitive = 0;
 3458                 pc.pc_casepreserving = 1;
 3459                 error = 0;
 3460         }
 3461         switch (ap->a_name) {
 3462         case _PC_LINK_MAX:
 3463                 *ap->a_retval = pc.pc_linkmax;
 3464                 break;
 3465         case _PC_NAME_MAX:
 3466                 *ap->a_retval = pc.pc_namemax;
 3467                 break;
 3468         case _PC_PATH_MAX:
 3469                 *ap->a_retval = PATH_MAX;
 3470                 break;
 3471         case _PC_PIPE_BUF:
 3472                 *ap->a_retval = PIPE_BUF;
 3473                 break;
 3474         case _PC_CHOWN_RESTRICTED:
 3475                 *ap->a_retval = pc.pc_chownrestricted;
 3476                 break;
 3477         case _PC_NO_TRUNC:
 3478                 *ap->a_retval = pc.pc_notrunc;
 3479                 break;
 3480         case _PC_ACL_EXTENDED:
 3481                 *ap->a_retval = 0;
 3482                 break;
 3483         case _PC_ACL_NFS4:
 3484                 if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 &&
 3485                     NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL))
 3486                         *ap->a_retval = 1;
 3487                 else
 3488                         *ap->a_retval = 0;
 3489                 break;
 3490         case _PC_ACL_PATH_MAX:
 3491                 if (NFS_ISV4(vp))
 3492                         *ap->a_retval = ACL_MAX_ENTRIES;
 3493                 else
 3494                         *ap->a_retval = 3;
 3495                 break;
 3496         case _PC_MAC_PRESENT:
 3497                 *ap->a_retval = 0;
 3498                 break;
 3499         case _PC_ASYNC_IO:
 3500                 /* _PC_ASYNC_IO should have been handled by upper layers. */
 3501                 KASSERT(0, ("_PC_ASYNC_IO should not get here"));
 3502                 error = EINVAL;
 3503                 break;
 3504         case _PC_PRIO_IO:
 3505                 *ap->a_retval = 0;
 3506                 break;
 3507         case _PC_SYNC_IO:
 3508                 *ap->a_retval = 0;
 3509                 break;
 3510         case _PC_ALLOC_SIZE_MIN:
 3511                 *ap->a_retval = vp->v_mount->mnt_stat.f_bsize;
 3512                 break;
 3513         case _PC_FILESIZEBITS:
 3514                 if (NFS_ISV34(vp))
 3515                         *ap->a_retval = 64;
 3516                 else
 3517                         *ap->a_retval = 32;
 3518                 break;
 3519         case _PC_REC_INCR_XFER_SIZE:
 3520                 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
 3521                 break;
 3522         case _PC_REC_MAX_XFER_SIZE:
 3523                 *ap->a_retval = -1; /* means ``unlimited'' */
 3524                 break;
 3525         case _PC_REC_MIN_XFER_SIZE:
 3526                 *ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
 3527                 break;
 3528         case _PC_REC_XFER_ALIGN:
 3529                 *ap->a_retval = PAGE_SIZE;
 3530                 break;
 3531         case _PC_SYMLINK_MAX:
 3532                 *ap->a_retval = NFS_MAXPATHLEN;
 3533                 break;
 3534 
 3535         default:
 3536                 error = EINVAL;
 3537                 break;
 3538         }
 3539         return (error);
 3540 }
 3541 

Cache object: 53c3fdd20b9dbf286bc0ad49148d1d9c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.