The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/nfsserver/nfs_nfsdport.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include <sys/capability.h>
   38 
   39 /*
   40  * Functions that perform the vfs operations required by the routines in
   41  * nfsd_serv.c. It is hoped that this change will make the server more
   42  * portable.
   43  */
   44 
   45 #include <fs/nfs/nfsport.h>
   46 #include <sys/hash.h>
   47 #include <sys/sysctl.h>
   48 #include <nlm/nlm_prot.h>
   49 #include <nlm/nlm.h>
   50 
   51 FEATURE(nfsd, "NFSv4 server");
   52 
   53 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
   54 extern int nfsrv_useacl;
   55 extern int newnfs_numnfsd;
   56 extern struct mount nfsv4root_mnt;
   57 extern struct nfsrv_stablefirst nfsrv_stablefirst;
   58 extern void (*nfsd_call_servertimer)(void);
   59 extern SVCPOOL  *nfsrvd_pool;
   60 extern struct nfsv4lock nfsd_suspend_lock;
   61 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
   62 NFSDLOCKMUTEX;
   63 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
   64 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
   65 struct mtx nfsrc_udpmtx;
   66 struct mtx nfs_v4root_mutex;
   67 struct nfsrvfh nfs_rootfh, nfs_pubfh;
   68 int nfs_pubfhset = 0, nfs_rootfhset = 0;
   69 struct proc *nfsd_master_proc = NULL;
   70 static pid_t nfsd_master_pid = (pid_t)-1;
   71 static char nfsd_master_comm[MAXCOMLEN + 1];
   72 static struct timeval nfsd_master_start;
   73 static uint32_t nfsv4_sysid = 0;
   74 
   75 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
   76     struct ucred *);
   77 
   78 int nfsrv_enable_crossmntpt = 1;
   79 static int nfs_commit_blks;
   80 static int nfs_commit_miss;
   81 extern int nfsrv_issuedelegs;
   82 extern int nfsrv_dolocallocks;
   83 extern int nfsd_enable_stringtouid;
   84 
   85 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "New NFS server");
   86 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
   87     &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
   88 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
   89     0, "");
   90 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
   91     0, "");
   92 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
   93     &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
   94 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
   95     &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
   96 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW,
   97     &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names");
   98 
   99 #define MAX_REORDERED_RPC       16
  100 #define NUM_HEURISTIC           1031
  101 #define NHUSE_INIT              64
  102 #define NHUSE_INC               16
  103 #define NHUSE_MAX               2048
  104 
  105 static struct nfsheur {
  106         struct vnode *nh_vp;    /* vp to match (unreferenced pointer) */
  107         off_t nh_nextoff;       /* next offset for sequential detection */
  108         int nh_use;             /* use count for selection */
  109         int nh_seqcount;        /* heuristic */
  110 } nfsheur[NUM_HEURISTIC];
  111 
  112 
  113 /*
  114  * Heuristic to detect sequential operation.
  115  */
  116 static struct nfsheur *
  117 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
  118 {
  119         struct nfsheur *nh;
  120         int hi, try;
  121 
  122         /* Locate best candidate. */
  123         try = 32;
  124         hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
  125         nh = &nfsheur[hi];
  126         while (try--) {
  127                 if (nfsheur[hi].nh_vp == vp) {
  128                         nh = &nfsheur[hi];
  129                         break;
  130                 }
  131                 if (nfsheur[hi].nh_use > 0)
  132                         --nfsheur[hi].nh_use;
  133                 hi = (hi + 1) % NUM_HEURISTIC;
  134                 if (nfsheur[hi].nh_use < nh->nh_use)
  135                         nh = &nfsheur[hi];
  136         }
  137 
  138         /* Initialize hint if this is a new file. */
  139         if (nh->nh_vp != vp) {
  140                 nh->nh_vp = vp;
  141                 nh->nh_nextoff = uio->uio_offset;
  142                 nh->nh_use = NHUSE_INIT;
  143                 if (uio->uio_offset == 0)
  144                         nh->nh_seqcount = 4;
  145                 else
  146                         nh->nh_seqcount = 1;
  147         }
  148 
  149         /* Calculate heuristic. */
  150         if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
  151             uio->uio_offset == nh->nh_nextoff) {
  152                 /* See comments in vfs_vnops.c:sequential_heuristic(). */
  153                 nh->nh_seqcount += howmany(uio->uio_resid, 16384);
  154                 if (nh->nh_seqcount > IO_SEQMAX)
  155                         nh->nh_seqcount = IO_SEQMAX;
  156         } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
  157             imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
  158                 /* Probably a reordered RPC, leave seqcount alone. */
  159         } else if (nh->nh_seqcount > 1) {
  160                 nh->nh_seqcount /= 2;
  161         } else {
  162                 nh->nh_seqcount = 0;
  163         }
  164         nh->nh_use += NHUSE_INC;
  165         if (nh->nh_use > NHUSE_MAX)
  166                 nh->nh_use = NHUSE_MAX;
  167         return (nh);
  168 }
  169 
  170 /*
  171  * Get attributes into nfsvattr structure.
  172  */
  173 int
  174 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
  175     struct thread *p, int vpislocked)
  176 {
  177         int error, lockedit = 0;
  178 
  179         if (vpislocked == 0) {
  180                 /*
  181                  * When vpislocked == 0, the vnode is either exclusively
  182                  * locked by this thread or not locked by this thread.
  183                  * As such, shared lock it, if not exclusively locked.
  184                  */
  185                 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
  186                         lockedit = 1;
  187                         NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
  188                 }
  189         }
  190         error = VOP_GETATTR(vp, &nvap->na_vattr, cred);
  191         if (lockedit != 0)
  192                 NFSVOPUNLOCK(vp, 0);
  193 
  194         NFSEXITCODE(error);
  195         return (error);
  196 }
  197 
  198 /*
  199  * Get a file handle for a vnode.
  200  */
  201 int
  202 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
  203 {
  204         int error;
  205 
  206         NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
  207         fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
  208         error = VOP_VPTOFH(vp, &fhp->fh_fid);
  209 
  210         NFSEXITCODE(error);
  211         return (error);
  212 }
  213 
  214 /*
  215  * Perform access checking for vnodes obtained from file handles that would
  216  * refer to files already opened by a Unix client. You cannot just use
  217  * vn_writechk() and VOP_ACCESSX() for two reasons.
  218  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
  219  *     case.
  220  * 2 - The owner is to be given access irrespective of mode bits for some
  221  *     operations, so that processes that chmod after opening a file don't
  222  *     break.
  223  */
  224 int
  225 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
  226     struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
  227     u_int32_t *supportedtypep)
  228 {
  229         struct vattr vattr;
  230         int error = 0, getret = 0;
  231 
  232         if (vpislocked == 0) {
  233                 if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
  234                         error = EPERM;
  235                         goto out;
  236                 }
  237         }
  238         if (accmode & VWRITE) {
  239                 /* Just vn_writechk() changed to check rdonly */
  240                 /*
  241                  * Disallow write attempts on read-only file systems;
  242                  * unless the file is a socket or a block or character
  243                  * device resident on the file system.
  244                  */
  245                 if (NFSVNO_EXRDONLY(exp) ||
  246                     (vp->v_mount->mnt_flag & MNT_RDONLY)) {
  247                         switch (vp->v_type) {
  248                         case VREG:
  249                         case VDIR:
  250                         case VLNK:
  251                                 error = EROFS;
  252                         default:
  253                                 break;
  254                         }
  255                 }
  256                 /*
  257                  * If there's shared text associated with
  258                  * the inode, try to free it up once.  If
  259                  * we fail, we can't allow writing.
  260                  */
  261                 if (VOP_IS_TEXT(vp) && error == 0)
  262                         error = ETXTBSY;
  263         }
  264         if (error != 0) {
  265                 if (vpislocked == 0)
  266                         NFSVOPUNLOCK(vp, 0);
  267                 goto out;
  268         }
  269 
  270         /*
  271          * Should the override still be applied when ACLs are enabled?
  272          */
  273         error = VOP_ACCESSX(vp, accmode, cred, p);
  274         if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
  275                 /*
  276                  * Try again with VEXPLICIT_DENY, to see if the test for
  277                  * deletion is supported.
  278                  */
  279                 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
  280                 if (error == 0) {
  281                         if (vp->v_type == VDIR) {
  282                                 accmode &= ~(VDELETE | VDELETE_CHILD);
  283                                 accmode |= VWRITE;
  284                                 error = VOP_ACCESSX(vp, accmode, cred, p);
  285                         } else if (supportedtypep != NULL) {
  286                                 *supportedtypep &= ~NFSACCESS_DELETE;
  287                         }
  288                 }
  289         }
  290 
  291         /*
  292          * Allow certain operations for the owner (reads and writes
  293          * on files that are already open).
  294          */
  295         if (override != NFSACCCHK_NOOVERRIDE &&
  296             (error == EPERM || error == EACCES)) {
  297                 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
  298                         error = 0;
  299                 else if (override & NFSACCCHK_ALLOWOWNER) {
  300                         getret = VOP_GETATTR(vp, &vattr, cred);
  301                         if (getret == 0 && cred->cr_uid == vattr.va_uid)
  302                                 error = 0;
  303                 }
  304         }
  305         if (vpislocked == 0)
  306                 NFSVOPUNLOCK(vp, 0);
  307 
  308 out:
  309         NFSEXITCODE(error);
  310         return (error);
  311 }
  312 
  313 /*
  314  * Set attribute(s) vnop.
  315  */
  316 int
  317 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
  318     struct thread *p, struct nfsexstuff *exp)
  319 {
  320         int error;
  321 
  322         error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
  323         NFSEXITCODE(error);
  324         return (error);
  325 }
  326 
  327 /*
  328  * Set up nameidata for a lookup() call and do it.
  329  */
  330 int
  331 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
  332     struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
  333     struct vnode **retdirp)
  334 {
  335         struct componentname *cnp = &ndp->ni_cnd;
  336         int i;
  337         struct iovec aiov;
  338         struct uio auio;
  339         int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
  340         int error = 0, crossmnt;
  341         char *cp;
  342 
  343         *retdirp = NULL;
  344         cnp->cn_nameptr = cnp->cn_pnbuf;
  345         ndp->ni_strictrelative = 0;
  346         /*
  347          * Extract and set starting directory.
  348          */
  349         if (dp->v_type != VDIR) {
  350                 if (islocked)
  351                         vput(dp);
  352                 else
  353                         vrele(dp);
  354                 nfsvno_relpathbuf(ndp);
  355                 error = ENOTDIR;
  356                 goto out1;
  357         }
  358         if (islocked)
  359                 NFSVOPUNLOCK(dp, 0);
  360         VREF(dp);
  361         *retdirp = dp;
  362         if (NFSVNO_EXRDONLY(exp))
  363                 cnp->cn_flags |= RDONLY;
  364         ndp->ni_segflg = UIO_SYSSPACE;
  365         crossmnt = 1;
  366 
  367         if (nd->nd_flag & ND_PUBLOOKUP) {
  368                 ndp->ni_loopcnt = 0;
  369                 if (cnp->cn_pnbuf[0] == '/') {
  370                         vrele(dp);
  371                         /*
  372                          * Check for degenerate pathnames here, since lookup()
  373                          * panics on them.
  374                          */
  375                         for (i = 1; i < ndp->ni_pathlen; i++)
  376                                 if (cnp->cn_pnbuf[i] != '/')
  377                                         break;
  378                         if (i == ndp->ni_pathlen) {
  379                                 error = NFSERR_ACCES;
  380                                 goto out;
  381                         }
  382                         dp = rootvnode;
  383                         VREF(dp);
  384                 }
  385         } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
  386             (nd->nd_flag & ND_NFSV4) == 0) {
  387                 /*
  388                  * Only cross mount points for NFSv4 when doing a
  389                  * mount while traversing the file system above
  390                  * the mount point, unless nfsrv_enable_crossmntpt is set.
  391                  */
  392                 cnp->cn_flags |= NOCROSSMOUNT;
  393                 crossmnt = 0;
  394         }
  395 
  396         /*
  397          * Initialize for scan, set ni_startdir and bump ref on dp again
  398          * becuase lookup() will dereference ni_startdir.
  399          */
  400 
  401         cnp->cn_thread = p;
  402         ndp->ni_startdir = dp;
  403         ndp->ni_rootdir = rootvnode;
  404         ndp->ni_topdir = NULL;
  405 
  406         if (!lockleaf)
  407                 cnp->cn_flags |= LOCKLEAF;
  408         for (;;) {
  409                 cnp->cn_nameptr = cnp->cn_pnbuf;
  410                 /*
  411                  * Call lookup() to do the real work.  If an error occurs,
  412                  * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
  413                  * we do not have to dereference anything before returning.
  414                  * In either case ni_startdir will be dereferenced and NULLed
  415                  * out.
  416                  */
  417                 error = lookup(ndp);
  418                 if (error)
  419                         break;
  420 
  421                 /*
  422                  * Check for encountering a symbolic link.  Trivial
  423                  * termination occurs if no symlink encountered.
  424                  */
  425                 if ((cnp->cn_flags & ISSYMLINK) == 0) {
  426                         if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
  427                                 nfsvno_relpathbuf(ndp);
  428                         if (ndp->ni_vp && !lockleaf)
  429                                 NFSVOPUNLOCK(ndp->ni_vp, 0);
  430                         break;
  431                 }
  432 
  433                 /*
  434                  * Validate symlink
  435                  */
  436                 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
  437                         NFSVOPUNLOCK(ndp->ni_dvp, 0);
  438                 if (!(nd->nd_flag & ND_PUBLOOKUP)) {
  439                         error = EINVAL;
  440                         goto badlink2;
  441                 }
  442 
  443                 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
  444                         error = ELOOP;
  445                         goto badlink2;
  446                 }
  447                 if (ndp->ni_pathlen > 1)
  448                         cp = uma_zalloc(namei_zone, M_WAITOK);
  449                 else
  450                         cp = cnp->cn_pnbuf;
  451                 aiov.iov_base = cp;
  452                 aiov.iov_len = MAXPATHLEN;
  453                 auio.uio_iov = &aiov;
  454                 auio.uio_iovcnt = 1;
  455                 auio.uio_offset = 0;
  456                 auio.uio_rw = UIO_READ;
  457                 auio.uio_segflg = UIO_SYSSPACE;
  458                 auio.uio_td = NULL;
  459                 auio.uio_resid = MAXPATHLEN;
  460                 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
  461                 if (error) {
  462                 badlink1:
  463                         if (ndp->ni_pathlen > 1)
  464                                 uma_zfree(namei_zone, cp);
  465                 badlink2:
  466                         vrele(ndp->ni_dvp);
  467                         vput(ndp->ni_vp);
  468                         break;
  469                 }
  470                 linklen = MAXPATHLEN - auio.uio_resid;
  471                 if (linklen == 0) {
  472                         error = ENOENT;
  473                         goto badlink1;
  474                 }
  475                 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
  476                         error = ENAMETOOLONG;
  477                         goto badlink1;
  478                 }
  479 
  480                 /*
  481                  * Adjust or replace path
  482                  */
  483                 if (ndp->ni_pathlen > 1) {
  484                         NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
  485                         uma_zfree(namei_zone, cnp->cn_pnbuf);
  486                         cnp->cn_pnbuf = cp;
  487                 } else
  488                         cnp->cn_pnbuf[linklen] = '\0';
  489                 ndp->ni_pathlen += linklen;
  490 
  491                 /*
  492                  * Cleanup refs for next loop and check if root directory
  493                  * should replace current directory.  Normally ni_dvp
  494                  * becomes the new base directory and is cleaned up when
  495                  * we loop.  Explicitly null pointers after invalidation
  496                  * to clarify operation.
  497                  */
  498                 vput(ndp->ni_vp);
  499                 ndp->ni_vp = NULL;
  500 
  501                 if (cnp->cn_pnbuf[0] == '/') {
  502                         vrele(ndp->ni_dvp);
  503                         ndp->ni_dvp = ndp->ni_rootdir;
  504                         VREF(ndp->ni_dvp);
  505                 }
  506                 ndp->ni_startdir = ndp->ni_dvp;
  507                 ndp->ni_dvp = NULL;
  508         }
  509         if (!lockleaf)
  510                 cnp->cn_flags &= ~LOCKLEAF;
  511 
  512 out:
  513         if (error) {
  514                 uma_zfree(namei_zone, cnp->cn_pnbuf);
  515                 ndp->ni_vp = NULL;
  516                 ndp->ni_dvp = NULL;
  517                 ndp->ni_startdir = NULL;
  518                 cnp->cn_flags &= ~HASBUF;
  519         } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
  520                 ndp->ni_dvp = NULL;
  521         }
  522 
  523 out1:
  524         NFSEXITCODE2(error, nd);
  525         return (error);
  526 }
  527 
  528 /*
  529  * Set up a pathname buffer and return a pointer to it and, optionally
  530  * set a hash pointer.
  531  */
  532 void
  533 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
  534 {
  535         struct componentname *cnp = &ndp->ni_cnd;
  536 
  537         cnp->cn_flags |= (NOMACCHECK | HASBUF);
  538         cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
  539         if (hashpp != NULL)
  540                 *hashpp = NULL;
  541         *bufpp = cnp->cn_pnbuf;
  542 }
  543 
  544 /*
  545  * Release the above path buffer, if not released by nfsvno_namei().
  546  */
  547 void
  548 nfsvno_relpathbuf(struct nameidata *ndp)
  549 {
  550 
  551         if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
  552                 panic("nfsrelpath");
  553         uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
  554         ndp->ni_cnd.cn_flags &= ~HASBUF;
  555 }
  556 
  557 /*
  558  * Readlink vnode op into an mbuf list.
  559  */
  560 int
  561 nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
  562     struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
  563 {
  564         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
  565         struct iovec *ivp = iv;
  566         struct uio io, *uiop = &io;
  567         struct mbuf *mp, *mp2 = NULL, *mp3 = NULL;
  568         int i, len, tlen, error = 0;
  569 
  570         len = 0;
  571         i = 0;
  572         while (len < NFS_MAXPATHLEN) {
  573                 NFSMGET(mp);
  574                 MCLGET(mp, M_WAIT);
  575                 mp->m_len = NFSMSIZ(mp);
  576                 if (len == 0) {
  577                         mp3 = mp2 = mp;
  578                 } else {
  579                         mp2->m_next = mp;
  580                         mp2 = mp;
  581                 }
  582                 if ((len + mp->m_len) > NFS_MAXPATHLEN) {
  583                         mp->m_len = NFS_MAXPATHLEN - len;
  584                         len = NFS_MAXPATHLEN;
  585                 } else {
  586                         len += mp->m_len;
  587                 }
  588                 ivp->iov_base = mtod(mp, caddr_t);
  589                 ivp->iov_len = mp->m_len;
  590                 i++;
  591                 ivp++;
  592         }
  593         uiop->uio_iov = iv;
  594         uiop->uio_iovcnt = i;
  595         uiop->uio_offset = 0;
  596         uiop->uio_resid = len;
  597         uiop->uio_rw = UIO_READ;
  598         uiop->uio_segflg = UIO_SYSSPACE;
  599         uiop->uio_td = NULL;
  600         error = VOP_READLINK(vp, uiop, cred);
  601         if (error) {
  602                 m_freem(mp3);
  603                 *lenp = 0;
  604                 goto out;
  605         }
  606         if (uiop->uio_resid > 0) {
  607                 len -= uiop->uio_resid;
  608                 tlen = NFSM_RNDUP(len);
  609                 nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
  610         }
  611         *lenp = len;
  612         *mpp = mp3;
  613         *mpendp = mp;
  614 
  615 out:
  616         NFSEXITCODE(error);
  617         return (error);
  618 }
  619 
  620 /*
  621  * Read vnode op call into mbuf list.
  622  */
  623 int
  624 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
  625     struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
  626 {
  627         struct mbuf *m;
  628         int i;
  629         struct iovec *iv;
  630         struct iovec *iv2;
  631         int error = 0, len, left, siz, tlen, ioflag = 0;
  632         struct mbuf *m2 = NULL, *m3;
  633         struct uio io, *uiop = &io;
  634         struct nfsheur *nh;
  635 
  636         len = left = NFSM_RNDUP(cnt);
  637         m3 = NULL;
  638         /*
  639          * Generate the mbuf list with the uio_iov ref. to it.
  640          */
  641         i = 0;
  642         while (left > 0) {
  643                 NFSMGET(m);
  644                 MCLGET(m, M_WAIT);
  645                 m->m_len = 0;
  646                 siz = min(M_TRAILINGSPACE(m), left);
  647                 left -= siz;
  648                 i++;
  649                 if (m3)
  650                         m2->m_next = m;
  651                 else
  652                         m3 = m;
  653                 m2 = m;
  654         }
  655         MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
  656             M_TEMP, M_WAITOK);
  657         uiop->uio_iov = iv2 = iv;
  658         m = m3;
  659         left = len;
  660         i = 0;
  661         while (left > 0) {
  662                 if (m == NULL)
  663                         panic("nfsvno_read iov");
  664                 siz = min(M_TRAILINGSPACE(m), left);
  665                 if (siz > 0) {
  666                         iv->iov_base = mtod(m, caddr_t) + m->m_len;
  667                         iv->iov_len = siz;
  668                         m->m_len += siz;
  669                         left -= siz;
  670                         iv++;
  671                         i++;
  672                 }
  673                 m = m->m_next;
  674         }
  675         uiop->uio_iovcnt = i;
  676         uiop->uio_offset = off;
  677         uiop->uio_resid = len;
  678         uiop->uio_rw = UIO_READ;
  679         uiop->uio_segflg = UIO_SYSSPACE;
  680         uiop->uio_td = NULL;
  681         nh = nfsrv_sequential_heuristic(uiop, vp);
  682         ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
  683         error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
  684         FREE((caddr_t)iv2, M_TEMP);
  685         if (error) {
  686                 m_freem(m3);
  687                 *mpp = NULL;
  688                 goto out;
  689         }
  690         nh->nh_nextoff = uiop->uio_offset;
  691         tlen = len - uiop->uio_resid;
  692         cnt = cnt < tlen ? cnt : tlen;
  693         tlen = NFSM_RNDUP(cnt);
  694         if (tlen == 0) {
  695                 m_freem(m3);
  696                 m3 = NULL;
  697         } else if (len != tlen || tlen != cnt)
  698                 nfsrv_adj(m3, len - tlen, tlen - cnt);
  699         *mpp = m3;
  700         *mpendp = m2;
  701 
  702 out:
  703         NFSEXITCODE(error);
  704         return (error);
  705 }
  706 
  707 /*
  708  * Write vnode op from an mbuf list.
  709  */
  710 int
  711 nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable,
  712     struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
  713 {
  714         struct iovec *ivp;
  715         int i, len;
  716         struct iovec *iv;
  717         int ioflags, error;
  718         struct uio io, *uiop = &io;
  719         struct nfsheur *nh;
  720 
  721         MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
  722             M_WAITOK);
  723         uiop->uio_iov = iv = ivp;
  724         uiop->uio_iovcnt = cnt;
  725         i = mtod(mp, caddr_t) + mp->m_len - cp;
  726         len = retlen;
  727         while (len > 0) {
  728                 if (mp == NULL)
  729                         panic("nfsvno_write");
  730                 if (i > 0) {
  731                         i = min(i, len);
  732                         ivp->iov_base = cp;
  733                         ivp->iov_len = i;
  734                         ivp++;
  735                         len -= i;
  736                 }
  737                 mp = mp->m_next;
  738                 if (mp) {
  739                         i = mp->m_len;
  740                         cp = mtod(mp, caddr_t);
  741                 }
  742         }
  743 
  744         if (stable == NFSWRITE_UNSTABLE)
  745                 ioflags = IO_NODELOCKED;
  746         else
  747                 ioflags = (IO_SYNC | IO_NODELOCKED);
  748         uiop->uio_resid = retlen;
  749         uiop->uio_rw = UIO_WRITE;
  750         uiop->uio_segflg = UIO_SYSSPACE;
  751         NFSUIOPROC(uiop, p);
  752         uiop->uio_offset = off;
  753         nh = nfsrv_sequential_heuristic(uiop, vp);
  754         ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
  755         error = VOP_WRITE(vp, uiop, ioflags, cred);
  756         if (error == 0)
  757                 nh->nh_nextoff = uiop->uio_offset;
  758         FREE((caddr_t)iv, M_TEMP);
  759 
  760         NFSEXITCODE(error);
  761         return (error);
  762 }
  763 
  764 /*
  765  * Common code for creating a regular file (plus special files for V2).
  766  */
  767 int
  768 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
  769     struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
  770     int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp)
  771 {
  772         u_quad_t tempsize;
  773         int error;
  774 
  775         error = nd->nd_repstat;
  776         if (!error && ndp->ni_vp == NULL) {
  777                 if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
  778                         vrele(ndp->ni_startdir);
  779                         error = VOP_CREATE(ndp->ni_dvp,
  780                             &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
  781                         vput(ndp->ni_dvp);
  782                         nfsvno_relpathbuf(ndp);
  783                         if (!error) {
  784                                 if (*exclusive_flagp) {
  785                                         *exclusive_flagp = 0;
  786                                         NFSVNO_ATTRINIT(nvap);
  787                                         nvap->na_atime.tv_sec = cverf[0];
  788                                         nvap->na_atime.tv_nsec = cverf[1];
  789                                         error = VOP_SETATTR(ndp->ni_vp,
  790                                             &nvap->na_vattr, nd->nd_cred);
  791                                         if (error != 0) {
  792                                                 vput(ndp->ni_vp);
  793                                                 ndp->ni_vp = NULL;
  794                                                 error = NFSERR_NOTSUPP;
  795                                         }
  796                                 }
  797                         }
  798                 /*
  799                  * NFS V2 Only. nfsrvd_mknod() does this for V3.
  800                  * (This implies, just get out on an error.)
  801                  */
  802                 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
  803                         nvap->na_type == VFIFO) {
  804                         if (nvap->na_type == VCHR && rdev == 0xffffffff)
  805                                 nvap->na_type = VFIFO;
  806                         if (nvap->na_type != VFIFO &&
  807                             (error = priv_check_cred(nd->nd_cred,
  808                              PRIV_VFS_MKNOD_DEV, 0))) {
  809                                 vrele(ndp->ni_startdir);
  810                                 nfsvno_relpathbuf(ndp);
  811                                 vput(ndp->ni_dvp);
  812                                 goto out;
  813                         }
  814                         nvap->na_rdev = rdev;
  815                         error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
  816                             &ndp->ni_cnd, &nvap->na_vattr);
  817                         vput(ndp->ni_dvp);
  818                         nfsvno_relpathbuf(ndp);
  819                         vrele(ndp->ni_startdir);
  820                         if (error)
  821                                 goto out;
  822                 } else {
  823                         vrele(ndp->ni_startdir);
  824                         nfsvno_relpathbuf(ndp);
  825                         vput(ndp->ni_dvp);
  826                         error = ENXIO;
  827                         goto out;
  828                 }
  829                 *vpp = ndp->ni_vp;
  830         } else {
  831                 /*
  832                  * Handle cases where error is already set and/or
  833                  * the file exists.
  834                  * 1 - clean up the lookup
  835                  * 2 - iff !error and na_size set, truncate it
  836                  */
  837                 vrele(ndp->ni_startdir);
  838                 nfsvno_relpathbuf(ndp);
  839                 *vpp = ndp->ni_vp;
  840                 if (ndp->ni_dvp == *vpp)
  841                         vrele(ndp->ni_dvp);
  842                 else
  843                         vput(ndp->ni_dvp);
  844                 if (!error && nvap->na_size != VNOVAL) {
  845                         error = nfsvno_accchk(*vpp, VWRITE,
  846                             nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
  847                             NFSACCCHK_VPISLOCKED, NULL);
  848                         if (!error) {
  849                                 tempsize = nvap->na_size;
  850                                 NFSVNO_ATTRINIT(nvap);
  851                                 nvap->na_size = tempsize;
  852                                 error = VOP_SETATTR(*vpp,
  853                                     &nvap->na_vattr, nd->nd_cred);
  854                         }
  855                 }
  856                 if (error)
  857                         vput(*vpp);
  858         }
  859 
  860 out:
  861         NFSEXITCODE(error);
  862         return (error);
  863 }
  864 
  865 /*
  866  * Do a mknod vnode op.
  867  */
  868 int
  869 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
  870     struct thread *p)
  871 {
  872         int error = 0;
  873         enum vtype vtyp;
  874 
  875         vtyp = nvap->na_type;
  876         /*
  877          * Iff doesn't exist, create it.
  878          */
  879         if (ndp->ni_vp) {
  880                 vrele(ndp->ni_startdir);
  881                 nfsvno_relpathbuf(ndp);
  882                 vput(ndp->ni_dvp);
  883                 vrele(ndp->ni_vp);
  884                 error = EEXIST;
  885                 goto out;
  886         }
  887         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
  888                 vrele(ndp->ni_startdir);
  889                 nfsvno_relpathbuf(ndp);
  890                 vput(ndp->ni_dvp);
  891                 error = NFSERR_BADTYPE;
  892                 goto out;
  893         }
  894         if (vtyp == VSOCK) {
  895                 vrele(ndp->ni_startdir);
  896                 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
  897                     &ndp->ni_cnd, &nvap->na_vattr);
  898                 vput(ndp->ni_dvp);
  899                 nfsvno_relpathbuf(ndp);
  900         } else {
  901                 if (nvap->na_type != VFIFO &&
  902                     (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) {
  903                         vrele(ndp->ni_startdir);
  904                         nfsvno_relpathbuf(ndp);
  905                         vput(ndp->ni_dvp);
  906                         goto out;
  907                 }
  908                 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
  909                     &ndp->ni_cnd, &nvap->na_vattr);
  910                 vput(ndp->ni_dvp);
  911                 nfsvno_relpathbuf(ndp);
  912                 vrele(ndp->ni_startdir);
  913                 /*
  914                  * Since VOP_MKNOD returns the ni_vp, I can't
  915                  * see any reason to do the lookup.
  916                  */
  917         }
  918 
  919 out:
  920         NFSEXITCODE(error);
  921         return (error);
  922 }
  923 
  924 /*
  925  * Mkdir vnode op.
  926  */
  927 int
  928 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
  929     struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
  930 {
  931         int error = 0;
  932 
  933         if (ndp->ni_vp != NULL) {
  934                 if (ndp->ni_dvp == ndp->ni_vp)
  935                         vrele(ndp->ni_dvp);
  936                 else
  937                         vput(ndp->ni_dvp);
  938                 vrele(ndp->ni_vp);
  939                 nfsvno_relpathbuf(ndp);
  940                 error = EEXIST;
  941                 goto out;
  942         }
  943         error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
  944             &nvap->na_vattr);
  945         vput(ndp->ni_dvp);
  946         nfsvno_relpathbuf(ndp);
  947 
  948 out:
  949         NFSEXITCODE(error);
  950         return (error);
  951 }
  952 
  953 /*
  954  * symlink vnode op.
  955  */
  956 int
  957 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
  958     int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
  959     struct nfsexstuff *exp)
  960 {
  961         int error = 0;
  962 
  963         if (ndp->ni_vp) {
  964                 vrele(ndp->ni_startdir);
  965                 nfsvno_relpathbuf(ndp);
  966                 if (ndp->ni_dvp == ndp->ni_vp)
  967                         vrele(ndp->ni_dvp);
  968                 else
  969                         vput(ndp->ni_dvp);
  970                 vrele(ndp->ni_vp);
  971                 error = EEXIST;
  972                 goto out;
  973         }
  974 
  975         error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
  976             &nvap->na_vattr, pathcp);
  977         vput(ndp->ni_dvp);
  978         vrele(ndp->ni_startdir);
  979         nfsvno_relpathbuf(ndp);
  980         /*
  981          * Although FreeBSD still had the lookup code in
  982          * it for 7/current, there doesn't seem to be any
  983          * point, since VOP_SYMLINK() returns the ni_vp.
  984          * Just vput it for v2.
  985          */
  986         if (!not_v2 && !error)
  987                 vput(ndp->ni_vp);
  988 
  989 out:
  990         NFSEXITCODE(error);
  991         return (error);
  992 }
  993 
  994 /*
  995  * Parse symbolic link arguments.
  996  * This function has an ugly side effect. It will MALLOC() an area for
  997  * the symlink and set iov_base to point to it, only if it succeeds.
  998  * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
  999  * be FREE'd later.
 1000  */
 1001 int
 1002 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
 1003     struct thread *p, char **pathcpp, int *lenp)
 1004 {
 1005         u_int32_t *tl;
 1006         char *pathcp = NULL;
 1007         int error = 0, len;
 1008         struct nfsv2_sattr *sp;
 1009 
 1010         *pathcpp = NULL;
 1011         *lenp = 0;
 1012         if ((nd->nd_flag & ND_NFSV3) &&
 1013             (error = nfsrv_sattr(nd, nvap, NULL, NULL, p)))
 1014                 goto nfsmout;
 1015         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 1016         len = fxdr_unsigned(int, *tl);
 1017         if (len > NFS_MAXPATHLEN || len <= 0) {
 1018                 error = EBADRPC;
 1019                 goto nfsmout;
 1020         }
 1021         MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK);
 1022         error = nfsrv_mtostr(nd, pathcp, len);
 1023         if (error)
 1024                 goto nfsmout;
 1025         if (nd->nd_flag & ND_NFSV2) {
 1026                 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 1027                 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
 1028         }
 1029         *pathcpp = pathcp;
 1030         *lenp = len;
 1031         NFSEXITCODE2(0, nd);
 1032         return (0);
 1033 nfsmout:
 1034         if (pathcp)
 1035                 free(pathcp, M_TEMP);
 1036         NFSEXITCODE2(error, nd);
 1037         return (error);
 1038 }
 1039 
 1040 /*
 1041  * Remove a non-directory object.
 1042  */
 1043 int
 1044 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
 1045     struct thread *p, struct nfsexstuff *exp)
 1046 {
 1047         struct vnode *vp;
 1048         int error = 0;
 1049 
 1050         vp = ndp->ni_vp;
 1051         if (vp->v_type == VDIR)
 1052                 error = NFSERR_ISDIR;
 1053         else if (is_v4)
 1054                 error = nfsrv_checkremove(vp, 1, p);
 1055         if (!error)
 1056                 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
 1057         if (ndp->ni_dvp == vp)
 1058                 vrele(ndp->ni_dvp);
 1059         else
 1060                 vput(ndp->ni_dvp);
 1061         vput(vp);
 1062         if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
 1063                 nfsvno_relpathbuf(ndp);
 1064         NFSEXITCODE(error);
 1065         return (error);
 1066 }
 1067 
 1068 /*
 1069  * Remove a directory.
 1070  */
 1071 int
 1072 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
 1073     struct thread *p, struct nfsexstuff *exp)
 1074 {
 1075         struct vnode *vp;
 1076         int error = 0;
 1077 
 1078         vp = ndp->ni_vp;
 1079         if (vp->v_type != VDIR) {
 1080                 error = ENOTDIR;
 1081                 goto out;
 1082         }
 1083         /*
 1084          * No rmdir "." please.
 1085          */
 1086         if (ndp->ni_dvp == vp) {
 1087                 error = EINVAL;
 1088                 goto out;
 1089         }
 1090         /*
 1091          * The root of a mounted filesystem cannot be deleted.
 1092          */
 1093         if (vp->v_vflag & VV_ROOT)
 1094                 error = EBUSY;
 1095 out:
 1096         if (!error)
 1097                 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
 1098         if (ndp->ni_dvp == vp)
 1099                 vrele(ndp->ni_dvp);
 1100         else
 1101                 vput(ndp->ni_dvp);
 1102         vput(vp);
 1103         if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
 1104                 nfsvno_relpathbuf(ndp);
 1105         NFSEXITCODE(error);
 1106         return (error);
 1107 }
 1108 
 1109 /*
 1110  * Rename vnode op.
 1111  */
 1112 int
 1113 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
 1114     u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
 1115 {
 1116         struct vnode *fvp, *tvp, *tdvp;
 1117         int error = 0;
 1118 
 1119         fvp = fromndp->ni_vp;
 1120         if (ndstat) {
 1121                 vrele(fromndp->ni_dvp);
 1122                 vrele(fvp);
 1123                 error = ndstat;
 1124                 goto out1;
 1125         }
 1126         tdvp = tondp->ni_dvp;
 1127         tvp = tondp->ni_vp;
 1128         if (tvp != NULL) {
 1129                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 1130                         error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
 1131                         goto out;
 1132                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 1133                         error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
 1134                         goto out;
 1135                 }
 1136                 if (tvp->v_type == VDIR && tvp->v_mountedhere) {
 1137                         error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 1138                         goto out;
 1139                 }
 1140 
 1141                 /*
 1142                  * A rename to '.' or '..' results in a prematurely
 1143                  * unlocked vnode on FreeBSD5, so I'm just going to fail that
 1144                  * here.
 1145                  */
 1146                 if ((tondp->ni_cnd.cn_namelen == 1 &&
 1147                      tondp->ni_cnd.cn_nameptr[0] == '.') ||
 1148                     (tondp->ni_cnd.cn_namelen == 2 &&
 1149                      tondp->ni_cnd.cn_nameptr[0] == '.' &&
 1150                      tondp->ni_cnd.cn_nameptr[1] == '.')) {
 1151                         error = EINVAL;
 1152                         goto out;
 1153                 }
 1154         }
 1155         if (fvp->v_type == VDIR && fvp->v_mountedhere) {
 1156                 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 1157                 goto out;
 1158         }
 1159         if (fvp->v_mount != tdvp->v_mount) {
 1160                 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 1161                 goto out;
 1162         }
 1163         if (fvp == tdvp) {
 1164                 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
 1165                 goto out;
 1166         }
 1167         if (fvp == tvp) {
 1168                 /*
 1169                  * If source and destination are the same, there is nothing to
 1170                  * do. Set error to -1 to indicate this.
 1171                  */
 1172                 error = -1;
 1173                 goto out;
 1174         }
 1175         if (ndflag & ND_NFSV4) {
 1176                 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
 1177                         error = nfsrv_checkremove(fvp, 0, p);
 1178                         NFSVOPUNLOCK(fvp, 0);
 1179                 } else
 1180                         error = EPERM;
 1181                 if (tvp && !error)
 1182                         error = nfsrv_checkremove(tvp, 1, p);
 1183         } else {
 1184                 /*
 1185                  * For NFSv2 and NFSv3, try to get rid of the delegation, so
 1186                  * that the NFSv4 client won't be confused by the rename.
 1187                  * Since nfsd_recalldelegation() can only be called on an
 1188                  * unlocked vnode at this point and fvp is the file that will
 1189                  * still exist after the rename, just do fvp.
 1190                  */
 1191                 nfsd_recalldelegation(fvp, p);
 1192         }
 1193 out:
 1194         if (!error) {
 1195                 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
 1196                     &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
 1197                     &tondp->ni_cnd);
 1198         } else {
 1199                 if (tdvp == tvp)
 1200                         vrele(tdvp);
 1201                 else
 1202                         vput(tdvp);
 1203                 if (tvp)
 1204                         vput(tvp);
 1205                 vrele(fromndp->ni_dvp);
 1206                 vrele(fvp);
 1207                 if (error == -1)
 1208                         error = 0;
 1209         }
 1210         vrele(tondp->ni_startdir);
 1211         nfsvno_relpathbuf(tondp);
 1212 out1:
 1213         vrele(fromndp->ni_startdir);
 1214         nfsvno_relpathbuf(fromndp);
 1215         NFSEXITCODE(error);
 1216         return (error);
 1217 }
 1218 
 1219 /*
 1220  * Link vnode op.
 1221  */
 1222 int
 1223 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
 1224     struct thread *p, struct nfsexstuff *exp)
 1225 {
 1226         struct vnode *xp;
 1227         int error = 0;
 1228 
 1229         xp = ndp->ni_vp;
 1230         if (xp != NULL) {
 1231                 error = EEXIST;
 1232         } else {
 1233                 xp = ndp->ni_dvp;
 1234                 if (vp->v_mount != xp->v_mount)
 1235                         error = EXDEV;
 1236         }
 1237         if (!error) {
 1238                 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 1239                 if ((vp->v_iflag & VI_DOOMED) == 0)
 1240                         error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
 1241                 else
 1242                         error = EPERM;
 1243                 if (ndp->ni_dvp == vp)
 1244                         vrele(ndp->ni_dvp);
 1245                 else
 1246                         vput(ndp->ni_dvp);
 1247                 NFSVOPUNLOCK(vp, 0);
 1248         } else {
 1249                 if (ndp->ni_dvp == ndp->ni_vp)
 1250                         vrele(ndp->ni_dvp);
 1251                 else
 1252                         vput(ndp->ni_dvp);
 1253                 if (ndp->ni_vp)
 1254                         vrele(ndp->ni_vp);
 1255         }
 1256         nfsvno_relpathbuf(ndp);
 1257         NFSEXITCODE(error);
 1258         return (error);
 1259 }
 1260 
 1261 /*
 1262  * Do the fsync() appropriate for the commit.
 1263  */
 1264 int
 1265 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
 1266     struct thread *td)
 1267 {
 1268         int error = 0;
 1269 
 1270         /*
 1271          * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of
 1272          * file is done.  At this time VOP_FSYNC does not accept offset and
 1273          * byte count parameters so call VOP_FSYNC the whole file for now.
 1274          * The same is true for NFSv4: RFC 3530 Sec. 14.2.3.
 1275          */
 1276         if (cnt == 0 || cnt > MAX_COMMIT_COUNT) {
 1277                 /*
 1278                  * Give up and do the whole thing
 1279                  */
 1280                 if (vp->v_object &&
 1281                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 1282                         VM_OBJECT_LOCK(vp->v_object);
 1283                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
 1284                         VM_OBJECT_UNLOCK(vp->v_object);
 1285                 }
 1286                 error = VOP_FSYNC(vp, MNT_WAIT, td);
 1287         } else {
 1288                 /*
 1289                  * Locate and synchronously write any buffers that fall
 1290                  * into the requested range.  Note:  we are assuming that
 1291                  * f_iosize is a power of 2.
 1292                  */
 1293                 int iosize = vp->v_mount->mnt_stat.f_iosize;
 1294                 int iomask = iosize - 1;
 1295                 struct bufobj *bo;
 1296                 daddr_t lblkno;
 1297 
 1298                 /*
 1299                  * Align to iosize boundry, super-align to page boundry.
 1300                  */
 1301                 if (off & iomask) {
 1302                         cnt += off & iomask;
 1303                         off &= ~(u_quad_t)iomask;
 1304                 }
 1305                 if (off & PAGE_MASK) {
 1306                         cnt += off & PAGE_MASK;
 1307                         off &= ~(u_quad_t)PAGE_MASK;
 1308                 }
 1309                 lblkno = off / iosize;
 1310 
 1311                 if (vp->v_object &&
 1312                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 1313                         VM_OBJECT_LOCK(vp->v_object);
 1314                         vm_object_page_clean(vp->v_object, off, off + cnt,
 1315                             OBJPC_SYNC);
 1316                         VM_OBJECT_UNLOCK(vp->v_object);
 1317                 }
 1318 
 1319                 bo = &vp->v_bufobj;
 1320                 BO_LOCK(bo);
 1321                 while (cnt > 0) {
 1322                         struct buf *bp;
 1323 
 1324                         /*
 1325                          * If we have a buffer and it is marked B_DELWRI we
 1326                          * have to lock and write it.  Otherwise the prior
 1327                          * write is assumed to have already been committed.
 1328                          *
 1329                          * gbincore() can return invalid buffers now so we
 1330                          * have to check that bit as well (though B_DELWRI
 1331                          * should not be set if B_INVAL is set there could be
 1332                          * a race here since we haven't locked the buffer).
 1333                          */
 1334                         if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
 1335                                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 1336                                     LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) {
 1337                                         BO_LOCK(bo);
 1338                                         continue; /* retry */
 1339                                 }
 1340                                 if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
 1341                                     B_DELWRI) {
 1342                                         bremfree(bp);
 1343                                         bp->b_flags &= ~B_ASYNC;
 1344                                         bwrite(bp);
 1345                                         ++nfs_commit_miss;
 1346                                 } else
 1347                                         BUF_UNLOCK(bp);
 1348                                 BO_LOCK(bo);
 1349                         }
 1350                         ++nfs_commit_blks;
 1351                         if (cnt < iosize)
 1352                                 break;
 1353                         cnt -= iosize;
 1354                         ++lblkno;
 1355                 }
 1356                 BO_UNLOCK(bo);
 1357         }
 1358         NFSEXITCODE(error);
 1359         return (error);
 1360 }
 1361 
 1362 /*
 1363  * Statfs vnode op.
 1364  */
 1365 int
 1366 nfsvno_statfs(struct vnode *vp, struct statfs *sf)
 1367 {
 1368         int error;
 1369 
 1370         error = VFS_STATFS(vp->v_mount, sf);
 1371         if (error == 0) {
 1372                 /*
 1373                  * Since NFS handles these values as unsigned on the
 1374                  * wire, there is no way to represent negative values,
 1375                  * so set them to 0. Without this, they will appear
 1376                  * to be very large positive values for clients like
 1377                  * Solaris10.
 1378                  */
 1379                 if (sf->f_bavail < 0)
 1380                         sf->f_bavail = 0;
 1381                 if (sf->f_ffree < 0)
 1382                         sf->f_ffree = 0;
 1383         }
 1384         NFSEXITCODE(error);
 1385         return (error);
 1386 }
 1387 
 1388 /*
 1389  * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
 1390  * must handle nfsrv_opencheck() calls after any other access checks.
 1391  */
 1392 void
 1393 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
 1394     nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
 1395     int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
 1396     NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p,
 1397     struct nfsexstuff *exp, struct vnode **vpp)
 1398 {
 1399         struct vnode *vp = NULL;
 1400         u_quad_t tempsize;
 1401         struct nfsexstuff nes;
 1402 
 1403         if (ndp->ni_vp == NULL)
 1404                 nd->nd_repstat = nfsrv_opencheck(clientid,
 1405                     stateidp, stp, NULL, nd, p, nd->nd_repstat);
 1406         if (!nd->nd_repstat) {
 1407                 if (ndp->ni_vp == NULL) {
 1408                         vrele(ndp->ni_startdir);
 1409                         nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
 1410                             &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
 1411                         vput(ndp->ni_dvp);
 1412                         nfsvno_relpathbuf(ndp);
 1413                         if (!nd->nd_repstat) {
 1414                                 if (*exclusive_flagp) {
 1415                                         *exclusive_flagp = 0;
 1416                                         NFSVNO_ATTRINIT(nvap);
 1417                                         nvap->na_atime.tv_sec = cverf[0];
 1418                                         nvap->na_atime.tv_nsec = cverf[1];
 1419                                         nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
 1420                                             &nvap->na_vattr, cred);
 1421                                         if (nd->nd_repstat != 0) {
 1422                                                 vput(ndp->ni_vp);
 1423                                                 ndp->ni_vp = NULL;
 1424                                                 nd->nd_repstat = NFSERR_NOTSUPP;
 1425                                         } else
 1426                                                 NFSSETBIT_ATTRBIT(attrbitp,
 1427                                                     NFSATTRBIT_TIMEACCESS);
 1428                                 } else {
 1429                                         nfsrv_fixattr(nd, ndp->ni_vp, nvap,
 1430                                             aclp, p, attrbitp, exp);
 1431                                 }
 1432                         }
 1433                         vp = ndp->ni_vp;
 1434                 } else {
 1435                         if (ndp->ni_startdir)
 1436                                 vrele(ndp->ni_startdir);
 1437                         nfsvno_relpathbuf(ndp);
 1438                         vp = ndp->ni_vp;
 1439                         if (create == NFSV4OPEN_CREATE) {
 1440                                 if (ndp->ni_dvp == vp)
 1441                                         vrele(ndp->ni_dvp);
 1442                                 else
 1443                                         vput(ndp->ni_dvp);
 1444                         }
 1445                         if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
 1446                                 if (ndp->ni_cnd.cn_flags & RDONLY)
 1447                                         NFSVNO_SETEXRDONLY(&nes);
 1448                                 else
 1449                                         NFSVNO_EXINIT(&nes);
 1450                                 nd->nd_repstat = nfsvno_accchk(vp, 
 1451                                     VWRITE, cred, &nes, p,
 1452                                     NFSACCCHK_NOOVERRIDE,
 1453                                     NFSACCCHK_VPISLOCKED, NULL);
 1454                                 nd->nd_repstat = nfsrv_opencheck(clientid,
 1455                                     stateidp, stp, vp, nd, p, nd->nd_repstat);
 1456                                 if (!nd->nd_repstat) {
 1457                                         tempsize = nvap->na_size;
 1458                                         NFSVNO_ATTRINIT(nvap);
 1459                                         nvap->na_size = tempsize;
 1460                                         nd->nd_repstat = VOP_SETATTR(vp,
 1461                                             &nvap->na_vattr, cred);
 1462                                 }
 1463                         } else if (vp->v_type == VREG) {
 1464                                 nd->nd_repstat = nfsrv_opencheck(clientid,
 1465                                     stateidp, stp, vp, nd, p, nd->nd_repstat);
 1466                         }
 1467                 }
 1468         } else {
 1469                 if (ndp->ni_cnd.cn_flags & HASBUF)
 1470                         nfsvno_relpathbuf(ndp);
 1471                 if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
 1472                         vrele(ndp->ni_startdir);
 1473                         if (ndp->ni_dvp == ndp->ni_vp)
 1474                                 vrele(ndp->ni_dvp);
 1475                         else
 1476                                 vput(ndp->ni_dvp);
 1477                         if (ndp->ni_vp)
 1478                                 vput(ndp->ni_vp);
 1479                 }
 1480         }
 1481         *vpp = vp;
 1482 
 1483         NFSEXITCODE2(0, nd);
 1484 }
 1485 
 1486 /*
 1487  * Updates the file rev and sets the mtime and ctime
 1488  * to the current clock time, returning the va_filerev and va_Xtime
 1489  * values.
 1490  * Return ESTALE to indicate the vnode is VI_DOOMED.
 1491  */
 1492 int
 1493 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
 1494     struct ucred *cred, struct thread *p)
 1495 {
 1496         struct vattr va;
 1497 
 1498         VATTR_NULL(&va);
 1499         vfs_timestamp(&va.va_mtime);
 1500         if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
 1501                 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 1502                 if ((vp->v_iflag & VI_DOOMED) != 0)
 1503                         return (ESTALE);
 1504         }
 1505         (void) VOP_SETATTR(vp, &va, cred);
 1506         (void) nfsvno_getattr(vp, nvap, cred, p, 1);
 1507         return (0);
 1508 }
 1509 
 1510 /*
 1511  * Glue routine to nfsv4_fillattr().
 1512  */
 1513 int
 1514 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
 1515     struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
 1516     struct ucred *cred, struct thread *p, int isdgram, int reterr,
 1517     int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
 1518 {
 1519         int error;
 1520 
 1521         error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
 1522             attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
 1523             mounted_on_fileno);
 1524         NFSEXITCODE2(0, nd);
 1525         return (error);
 1526 }
 1527 
 1528 /* Since the Readdir vnode ops vary, put the entire functions in here. */
 1529 /*
 1530  * nfs readdir service
 1531  * - mallocs what it thinks is enough to read
 1532  *      count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
 1533  * - calls VOP_READDIR()
 1534  * - loops around building the reply
 1535  *      if the output generated exceeds count break out of loop
 1536  *      The NFSM_CLGET macro is used here so that the reply will be packed
 1537  *      tightly in mbuf clusters.
 1538  * - it trims out records with d_fileno == 0
 1539  *      this doesn't matter for Unix clients, but they might confuse clients
 1540  *      for other os'.
 1541  * - it trims out records with d_type == DT_WHT
 1542  *      these cannot be seen through NFS (unless we extend the protocol)
 1543  *     The alternate call nfsrvd_readdirplus() does lookups as well.
 1544  * PS: The NFS protocol spec. does not clarify what the "count" byte
 1545  *      argument is a count of.. just name strings and file id's or the
 1546  *      entire reply rpc or ...
 1547  *      I tried just file name and id sizes and it confused the Sun client,
 1548  *      so I am using the full rpc size now. The "paranoia.." comment refers
 1549  *      to including the status longwords that are not a part of the dir.
 1550  *      "entry" structures, but are in the rpc.
 1551  */
 1552 int
 1553 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
 1554     struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
 1555 {
 1556         struct dirent *dp;
 1557         u_int32_t *tl;
 1558         int dirlen;
 1559         char *cpos, *cend, *rbuf;
 1560         struct nfsvattr at;
 1561         int nlen, error = 0, getret = 1;
 1562         int siz, cnt, fullsiz, eofflag, ncookies;
 1563         u_int64_t off, toff, verf;
 1564         u_long *cookies = NULL, *cookiep;
 1565         struct uio io;
 1566         struct iovec iv;
 1567         int not_zfs;
 1568 
 1569         if (nd->nd_repstat) {
 1570                 nfsrv_postopattr(nd, getret, &at);
 1571                 goto out;
 1572         }
 1573         if (nd->nd_flag & ND_NFSV2) {
 1574                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1575                 off = fxdr_unsigned(u_quad_t, *tl++);
 1576         } else {
 1577                 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 1578                 off = fxdr_hyper(tl);
 1579                 tl += 2;
 1580                 verf = fxdr_hyper(tl);
 1581                 tl += 2;
 1582         }
 1583         toff = off;
 1584         cnt = fxdr_unsigned(int, *tl);
 1585         if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
 1586                 cnt = NFS_SRVMAXDATA(nd);
 1587         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 1588         fullsiz = siz;
 1589         if (nd->nd_flag & ND_NFSV3) {
 1590                 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred,
 1591                     p, 1);
 1592 #if 0
 1593                 /*
 1594                  * va_filerev is not sufficient as a cookie verifier,
 1595                  * since it is not supposed to change when entries are
 1596                  * removed/added unless that offset cookies returned to
 1597                  * the client are no longer valid.
 1598                  */
 1599                 if (!nd->nd_repstat && toff && verf != at.na_filerev)
 1600                         nd->nd_repstat = NFSERR_BAD_COOKIE;
 1601 #endif
 1602         }
 1603         if (!nd->nd_repstat && vp->v_type != VDIR)
 1604                 nd->nd_repstat = NFSERR_NOTDIR;
 1605         if (nd->nd_repstat == 0 && cnt == 0) {
 1606                 if (nd->nd_flag & ND_NFSV2)
 1607                         /* NFSv2 does not have NFSERR_TOOSMALL */
 1608                         nd->nd_repstat = EPERM;
 1609                 else
 1610                         nd->nd_repstat = NFSERR_TOOSMALL;
 1611         }
 1612         if (!nd->nd_repstat)
 1613                 nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 1614                     nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 1615                     NFSACCCHK_VPISLOCKED, NULL);
 1616         if (nd->nd_repstat) {
 1617                 vput(vp);
 1618                 if (nd->nd_flag & ND_NFSV3)
 1619                         nfsrv_postopattr(nd, getret, &at);
 1620                 goto out;
 1621         }
 1622         not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs");
 1623         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 1624 again:
 1625         eofflag = 0;
 1626         if (cookies) {
 1627                 free((caddr_t)cookies, M_TEMP);
 1628                 cookies = NULL;
 1629         }
 1630 
 1631         iv.iov_base = rbuf;
 1632         iv.iov_len = siz;
 1633         io.uio_iov = &iv;
 1634         io.uio_iovcnt = 1;
 1635         io.uio_offset = (off_t)off;
 1636         io.uio_resid = siz;
 1637         io.uio_segflg = UIO_SYSSPACE;
 1638         io.uio_rw = UIO_READ;
 1639         io.uio_td = NULL;
 1640         nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
 1641             &cookies);
 1642         off = (u_int64_t)io.uio_offset;
 1643         if (io.uio_resid)
 1644                 siz -= io.uio_resid;
 1645 
 1646         if (!cookies && !nd->nd_repstat)
 1647                 nd->nd_repstat = NFSERR_PERM;
 1648         if (nd->nd_flag & ND_NFSV3) {
 1649                 getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
 1650                 if (!nd->nd_repstat)
 1651                         nd->nd_repstat = getret;
 1652         }
 1653 
 1654         /*
 1655          * Handles the failed cases. nd->nd_repstat == 0 past here.
 1656          */
 1657         if (nd->nd_repstat) {
 1658                 vput(vp);
 1659                 free((caddr_t)rbuf, M_TEMP);
 1660                 if (cookies)
 1661                         free((caddr_t)cookies, M_TEMP);
 1662                 if (nd->nd_flag & ND_NFSV3)
 1663                         nfsrv_postopattr(nd, getret, &at);
 1664                 goto out;
 1665         }
 1666         /*
 1667          * If nothing read, return eof
 1668          * rpc reply
 1669          */
 1670         if (siz == 0) {
 1671                 vput(vp);
 1672                 if (nd->nd_flag & ND_NFSV2) {
 1673                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1674                 } else {
 1675                         nfsrv_postopattr(nd, getret, &at);
 1676                         NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 1677                         txdr_hyper(at.na_filerev, tl);
 1678                         tl += 2;
 1679                 }
 1680                 *tl++ = newnfs_false;
 1681                 *tl = newnfs_true;
 1682                 FREE((caddr_t)rbuf, M_TEMP);
 1683                 FREE((caddr_t)cookies, M_TEMP);
 1684                 goto out;
 1685         }
 1686 
 1687         /*
 1688          * Check for degenerate cases of nothing useful read.
 1689          * If so go try again
 1690          */
 1691         cpos = rbuf;
 1692         cend = rbuf + siz;
 1693         dp = (struct dirent *)cpos;
 1694         cookiep = cookies;
 1695 
 1696         /*
 1697          * For some reason FreeBSD's ufs_readdir() chooses to back the
 1698          * directory offset up to a block boundary, so it is necessary to
 1699          * skip over the records that precede the requested offset. This
 1700          * requires the assumption that file offset cookies monotonically
 1701          * increase.
 1702          * Since the offset cookies don't monotonically increase for ZFS,
 1703          * this is not done when ZFS is the file system.
 1704          */
 1705         while (cpos < cend && ncookies > 0 &&
 1706             (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 1707              (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff))) {
 1708                 cpos += dp->d_reclen;
 1709                 dp = (struct dirent *)cpos;
 1710                 cookiep++;
 1711                 ncookies--;
 1712         }
 1713         if (cpos >= cend || ncookies == 0) {
 1714                 siz = fullsiz;
 1715                 toff = off;
 1716                 goto again;
 1717         }
 1718         vput(vp);
 1719 
 1720         /*
 1721          * dirlen is the size of the reply, including all XDR and must
 1722          * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
 1723          * if the XDR should be included in "count", but to be safe, we do.
 1724          * (Include the two booleans at the end of the reply in dirlen now.)
 1725          */
 1726         if (nd->nd_flag & ND_NFSV3) {
 1727                 nfsrv_postopattr(nd, getret, &at);
 1728                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1729                 txdr_hyper(at.na_filerev, tl);
 1730                 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
 1731         } else {
 1732                 dirlen = 2 * NFSX_UNSIGNED;
 1733         }
 1734 
 1735         /* Loop through the records and build reply */
 1736         while (cpos < cend && ncookies > 0) {
 1737                 nlen = dp->d_namlen;
 1738                 if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
 1739                         nlen <= NFS_MAXNAMLEN) {
 1740                         if (nd->nd_flag & ND_NFSV3)
 1741                                 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
 1742                         else
 1743                                 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
 1744                         if (dirlen > cnt) {
 1745                                 eofflag = 0;
 1746                                 break;
 1747                         }
 1748 
 1749                         /*
 1750                          * Build the directory record xdr from
 1751                          * the dirent entry.
 1752                          */
 1753                         if (nd->nd_flag & ND_NFSV3) {
 1754                                 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 1755                                 *tl++ = newnfs_true;
 1756                                 *tl++ = 0;
 1757                         } else {
 1758                                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1759                                 *tl++ = newnfs_true;
 1760                         }
 1761                         *tl = txdr_unsigned(dp->d_fileno);
 1762                         (void) nfsm_strtom(nd, dp->d_name, nlen);
 1763                         if (nd->nd_flag & ND_NFSV3) {
 1764                                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1765                                 *tl++ = 0;
 1766                         } else
 1767                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 1768                         *tl = txdr_unsigned(*cookiep);
 1769                 }
 1770                 cpos += dp->d_reclen;
 1771                 dp = (struct dirent *)cpos;
 1772                 cookiep++;
 1773                 ncookies--;
 1774         }
 1775         if (cpos < cend)
 1776                 eofflag = 0;
 1777         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1778         *tl++ = newnfs_false;
 1779         if (eofflag)
 1780                 *tl = newnfs_true;
 1781         else
 1782                 *tl = newnfs_false;
 1783         FREE((caddr_t)rbuf, M_TEMP);
 1784         FREE((caddr_t)cookies, M_TEMP);
 1785 
 1786 out:
 1787         NFSEXITCODE2(0, nd);
 1788         return (0);
 1789 nfsmout:
 1790         vput(vp);
 1791         NFSEXITCODE2(error, nd);
 1792         return (error);
 1793 }
 1794 
 1795 /*
 1796  * Readdirplus for V3 and Readdir for V4.
 1797  */
 1798 int
 1799 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
 1800     struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
 1801 {
 1802         struct dirent *dp;
 1803         u_int32_t *tl;
 1804         int dirlen;
 1805         char *cpos, *cend, *rbuf;
 1806         struct vnode *nvp;
 1807         fhandle_t nfh;
 1808         struct nfsvattr nva, at, *nvap = &nva;
 1809         struct mbuf *mb0, *mb1;
 1810         struct nfsreferral *refp;
 1811         int nlen, r, error = 0, getret = 1, usevget = 1;
 1812         int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
 1813         caddr_t bpos0, bpos1;
 1814         u_int64_t off, toff, verf;
 1815         u_long *cookies = NULL, *cookiep;
 1816         nfsattrbit_t attrbits, rderrbits, savbits;
 1817         struct uio io;
 1818         struct iovec iv;
 1819         struct componentname cn;
 1820         int at_root, needs_unbusy, not_zfs, supports_nfsv4acls;
 1821         struct mount *mp, *new_mp;
 1822         uint64_t mounted_on_fileno;
 1823 
 1824         if (nd->nd_repstat) {
 1825                 nfsrv_postopattr(nd, getret, &at);
 1826                 goto out;
 1827         }
 1828         NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 1829         off = fxdr_hyper(tl);
 1830         toff = off;
 1831         tl += 2;
 1832         verf = fxdr_hyper(tl);
 1833         tl += 2;
 1834         siz = fxdr_unsigned(int, *tl++);
 1835         cnt = fxdr_unsigned(int, *tl);
 1836 
 1837         /*
 1838          * Use the server's maximum data transfer size as the upper bound
 1839          * on reply datalen.
 1840          */
 1841         if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
 1842                 cnt = NFS_SRVMAXDATA(nd);
 1843 
 1844         /*
 1845          * siz is a "hint" of how much directory information (name, fileid,
 1846          * cookie) should be in the reply. At least one client "hints" 0,
 1847          * so I set it to cnt for that case. I also round it up to the
 1848          * next multiple of DIRBLKSIZ.
 1849          */
 1850         if (siz <= 0)
 1851                 siz = cnt;
 1852         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 1853 
 1854         if (nd->nd_flag & ND_NFSV4) {
 1855                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 1856                 if (error)
 1857                         goto nfsmout;
 1858                 NFSSET_ATTRBIT(&savbits, &attrbits);
 1859                 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits);
 1860                 NFSZERO_ATTRBIT(&rderrbits);
 1861                 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
 1862         } else {
 1863                 NFSZERO_ATTRBIT(&attrbits);
 1864         }
 1865         fullsiz = siz;
 1866         nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
 1867         if (!nd->nd_repstat) {
 1868             if (off && verf != at.na_filerev) {
 1869                 /*
 1870                  * va_filerev is not sufficient as a cookie verifier,
 1871                  * since it is not supposed to change when entries are
 1872                  * removed/added unless that offset cookies returned to
 1873                  * the client are no longer valid.
 1874                  */
 1875 #if 0
 1876                 if (nd->nd_flag & ND_NFSV4) {
 1877                         nd->nd_repstat = NFSERR_NOTSAME;
 1878                 } else {
 1879                         nd->nd_repstat = NFSERR_BAD_COOKIE;
 1880                 }
 1881 #endif
 1882             } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) {
 1883                 nd->nd_repstat = NFSERR_BAD_COOKIE;
 1884             }
 1885         }
 1886         if (!nd->nd_repstat && vp->v_type != VDIR)
 1887                 nd->nd_repstat = NFSERR_NOTDIR;
 1888         if (!nd->nd_repstat && cnt == 0)
 1889                 nd->nd_repstat = NFSERR_TOOSMALL;
 1890         if (!nd->nd_repstat)
 1891                 nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 1892                     nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 1893                     NFSACCCHK_VPISLOCKED, NULL);
 1894         if (nd->nd_repstat) {
 1895                 vput(vp);
 1896                 if (nd->nd_flag & ND_NFSV3)
 1897                         nfsrv_postopattr(nd, getret, &at);
 1898                 goto out;
 1899         }
 1900         not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs");
 1901 
 1902         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 1903 again:
 1904         eofflag = 0;
 1905         if (cookies) {
 1906                 free((caddr_t)cookies, M_TEMP);
 1907                 cookies = NULL;
 1908         }
 1909 
 1910         iv.iov_base = rbuf;
 1911         iv.iov_len = siz;
 1912         io.uio_iov = &iv;
 1913         io.uio_iovcnt = 1;
 1914         io.uio_offset = (off_t)off;
 1915         io.uio_resid = siz;
 1916         io.uio_segflg = UIO_SYSSPACE;
 1917         io.uio_rw = UIO_READ;
 1918         io.uio_td = NULL;
 1919         nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
 1920             &cookies);
 1921         off = (u_int64_t)io.uio_offset;
 1922         if (io.uio_resid)
 1923                 siz -= io.uio_resid;
 1924 
 1925         getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
 1926 
 1927         if (!cookies && !nd->nd_repstat)
 1928                 nd->nd_repstat = NFSERR_PERM;
 1929         if (!nd->nd_repstat)
 1930                 nd->nd_repstat = getret;
 1931         if (nd->nd_repstat) {
 1932                 vput(vp);
 1933                 if (cookies)
 1934                         free((caddr_t)cookies, M_TEMP);
 1935                 free((caddr_t)rbuf, M_TEMP);
 1936                 if (nd->nd_flag & ND_NFSV3)
 1937                         nfsrv_postopattr(nd, getret, &at);
 1938                 goto out;
 1939         }
 1940         /*
 1941          * If nothing read, return eof
 1942          * rpc reply
 1943          */
 1944         if (siz == 0) {
 1945                 vput(vp);
 1946                 if (nd->nd_flag & ND_NFSV3)
 1947                         nfsrv_postopattr(nd, getret, &at);
 1948                 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 1949                 txdr_hyper(at.na_filerev, tl);
 1950                 tl += 2;
 1951                 *tl++ = newnfs_false;
 1952                 *tl = newnfs_true;
 1953                 free((caddr_t)cookies, M_TEMP);
 1954                 free((caddr_t)rbuf, M_TEMP);
 1955                 goto out;
 1956         }
 1957 
 1958         /*
 1959          * Check for degenerate cases of nothing useful read.
 1960          * If so go try again
 1961          */
 1962         cpos = rbuf;
 1963         cend = rbuf + siz;
 1964         dp = (struct dirent *)cpos;
 1965         cookiep = cookies;
 1966 
 1967         /*
 1968          * For some reason FreeBSD's ufs_readdir() chooses to back the
 1969          * directory offset up to a block boundary, so it is necessary to
 1970          * skip over the records that precede the requested offset. This
 1971          * requires the assumption that file offset cookies monotonically
 1972          * increase.
 1973          * Since the offset cookies don't monotonically increase for ZFS,
 1974          * this is not done when ZFS is the file system.
 1975          */
 1976         while (cpos < cend && ncookies > 0 &&
 1977           (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 1978            (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff) ||
 1979            ((nd->nd_flag & ND_NFSV4) &&
 1980             ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
 1981              (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
 1982                 cpos += dp->d_reclen;
 1983                 dp = (struct dirent *)cpos;
 1984                 cookiep++;
 1985                 ncookies--;
 1986         }
 1987         if (cpos >= cend || ncookies == 0) {
 1988                 siz = fullsiz;
 1989                 toff = off;
 1990                 goto again;
 1991         }
 1992 
 1993         /*
 1994          * Busy the file system so that the mount point won't go away
 1995          * and, as such, VFS_VGET() can be used safely.
 1996          */
 1997         mp = vp->v_mount;
 1998         vfs_ref(mp);
 1999         NFSVOPUNLOCK(vp, 0);
 2000         nd->nd_repstat = vfs_busy(mp, 0);
 2001         vfs_rel(mp);
 2002         if (nd->nd_repstat != 0) {
 2003                 vrele(vp);
 2004                 free(cookies, M_TEMP);
 2005                 free(rbuf, M_TEMP);
 2006                 if (nd->nd_flag & ND_NFSV3)
 2007                         nfsrv_postopattr(nd, getret, &at);
 2008                 goto out;
 2009         }
 2010 
 2011         /*
 2012          * Check to see if entries in this directory can be safely acquired
 2013          * via VFS_VGET() or if a switch to VOP_LOOKUP() is required.
 2014          * ZFS snapshot directories need VOP_LOOKUP(), so that any
 2015          * automount of the snapshot directory that is required will
 2016          * be done.
 2017          * This needs to be done here for NFSv4, since NFSv4 never does
 2018          * a VFS_VGET() for "." or "..".
 2019          */
 2020         if (not_zfs == 0) {
 2021                 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp);
 2022                 if (r == EOPNOTSUPP) {
 2023                         usevget = 0;
 2024                         cn.cn_nameiop = LOOKUP;
 2025                         cn.cn_lkflags = LK_SHARED | LK_RETRY;
 2026                         cn.cn_cred = nd->nd_cred;
 2027                         cn.cn_thread = p;
 2028                 } else if (r == 0)
 2029                         vput(nvp);
 2030         }
 2031 
 2032         /*
 2033          * Save this position, in case there is an error before one entry
 2034          * is created.
 2035          */
 2036         mb0 = nd->nd_mb;
 2037         bpos0 = nd->nd_bpos;
 2038 
 2039         /*
 2040          * Fill in the first part of the reply.
 2041          * dirlen is the reply length in bytes and cannot exceed cnt.
 2042          * (Include the two booleans at the end of the reply in dirlen now,
 2043          *  so we recognize when we have exceeded cnt.)
 2044          */
 2045         if (nd->nd_flag & ND_NFSV3) {
 2046                 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
 2047                 nfsrv_postopattr(nd, getret, &at);
 2048         } else {
 2049                 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
 2050         }
 2051         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 2052         txdr_hyper(at.na_filerev, tl);
 2053 
 2054         /*
 2055          * Save this position, in case there is an empty reply needed.
 2056          */
 2057         mb1 = nd->nd_mb;
 2058         bpos1 = nd->nd_bpos;
 2059 
 2060         /* Loop through the records and build reply */
 2061         entrycnt = 0;
 2062         while (cpos < cend && ncookies > 0 && dirlen < cnt) {
 2063                 nlen = dp->d_namlen;
 2064                 if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
 2065                     nlen <= NFS_MAXNAMLEN &&
 2066                     ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
 2067                      (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
 2068                       || (nlen == 1 && dp->d_name[0] != '.'))) {
 2069                         /*
 2070                          * Save the current position in the reply, in case
 2071                          * this entry exceeds cnt.
 2072                          */
 2073                         mb1 = nd->nd_mb;
 2074                         bpos1 = nd->nd_bpos;
 2075         
 2076                         /*
 2077                          * For readdir_and_lookup get the vnode using
 2078                          * the file number.
 2079                          */
 2080                         nvp = NULL;
 2081                         refp = NULL;
 2082                         r = 0;
 2083                         at_root = 0;
 2084                         needs_unbusy = 0;
 2085                         new_mp = mp;
 2086                         mounted_on_fileno = (uint64_t)dp->d_fileno;
 2087                         if ((nd->nd_flag & ND_NFSV3) ||
 2088                             NFSNONZERO_ATTRBIT(&savbits)) {
 2089                                 if (nd->nd_flag & ND_NFSV4)
 2090                                         refp = nfsv4root_getreferral(NULL,
 2091                                             vp, dp->d_fileno);
 2092                                 if (refp == NULL) {
 2093                                         if (usevget)
 2094                                                 r = VFS_VGET(mp, dp->d_fileno,
 2095                                                     LK_SHARED, &nvp);
 2096                                         else
 2097                                                 r = EOPNOTSUPP;
 2098                                         if (r == EOPNOTSUPP) {
 2099                                                 if (usevget) {
 2100                                                         usevget = 0;
 2101                                                         cn.cn_nameiop = LOOKUP;
 2102                                                         cn.cn_lkflags =
 2103                                                             LK_SHARED |
 2104                                                             LK_RETRY;
 2105                                                         cn.cn_cred =
 2106                                                             nd->nd_cred;
 2107                                                         cn.cn_thread = p;
 2108                                                 }
 2109                                                 cn.cn_nameptr = dp->d_name;
 2110                                                 cn.cn_namelen = nlen;
 2111                                                 cn.cn_flags = ISLASTCN |
 2112                                                     NOFOLLOW | LOCKLEAF |
 2113                                                     MPSAFE;
 2114                                                 if (nlen == 2 &&
 2115                                                     dp->d_name[0] == '.' &&
 2116                                                     dp->d_name[1] == '.')
 2117                                                         cn.cn_flags |=
 2118                                                             ISDOTDOT;
 2119                                                 if (NFSVOPLOCK(vp, LK_SHARED)
 2120                                                     != 0) {
 2121                                                         nd->nd_repstat = EPERM;
 2122                                                         break;
 2123                                                 }
 2124                                                 if ((vp->v_vflag & VV_ROOT) != 0
 2125                                                     && (cn.cn_flags & ISDOTDOT)
 2126                                                     != 0) {
 2127                                                         vref(vp);
 2128                                                         nvp = vp;
 2129                                                         r = 0;
 2130                                                 } else {
 2131                                                         r = VOP_LOOKUP(vp, &nvp,
 2132                                                             &cn);
 2133                                                         if (vp != nvp)
 2134                                                                 NFSVOPUNLOCK(vp,
 2135                                                                     0);
 2136                                                 }
 2137                                         }
 2138 
 2139                                         /*
 2140                                          * For NFSv4, check to see if nvp is
 2141                                          * a mount point and get the mount
 2142                                          * point vnode, as required.
 2143                                          */
 2144                                         if (r == 0 &&
 2145                                             nfsrv_enable_crossmntpt != 0 &&
 2146                                             (nd->nd_flag & ND_NFSV4) != 0 &&
 2147                                             nvp->v_type == VDIR &&
 2148                                             nvp->v_mountedhere != NULL) {
 2149                                                 new_mp = nvp->v_mountedhere;
 2150                                                 r = vfs_busy(new_mp, 0);
 2151                                                 vput(nvp);
 2152                                                 nvp = NULL;
 2153                                                 if (r == 0) {
 2154                                                         r = VFS_ROOT(new_mp,
 2155                                                             LK_SHARED, &nvp);
 2156                                                         needs_unbusy = 1;
 2157                                                         if (r == 0)
 2158                                                                 at_root = 1;
 2159                                                 }
 2160                                         }
 2161                                 }
 2162                                 if (!r) {
 2163                                     if (refp == NULL &&
 2164                                         ((nd->nd_flag & ND_NFSV3) ||
 2165                                          NFSNONZERO_ATTRBIT(&attrbits))) {
 2166                                         r = nfsvno_getfh(nvp, &nfh, p);
 2167                                         if (!r)
 2168                                             r = nfsvno_getattr(nvp, nvap,
 2169                                                 nd->nd_cred, p, 1);
 2170                                         if (r == 0 && not_zfs == 0 &&
 2171                                             nfsrv_enable_crossmntpt != 0 &&
 2172                                             (nd->nd_flag & ND_NFSV4) != 0 &&
 2173                                             nvp->v_type == VDIR &&
 2174                                             vp->v_mount != nvp->v_mount) {
 2175                                             /*
 2176                                              * For a ZFS snapshot, there is a
 2177                                              * pseudo mount that does not set
 2178                                              * v_mountedhere, so it needs to
 2179                                              * be detected via a different
 2180                                              * mount structure.
 2181                                              */
 2182                                             at_root = 1;
 2183                                             if (new_mp == mp)
 2184                                                 new_mp = nvp->v_mount;
 2185                                         }
 2186                                     }
 2187                                 } else {
 2188                                     nvp = NULL;
 2189                                 }
 2190                                 if (r) {
 2191                                         if (!NFSISSET_ATTRBIT(&attrbits,
 2192                                             NFSATTRBIT_RDATTRERROR)) {
 2193                                                 if (nvp != NULL)
 2194                                                         vput(nvp);
 2195                                                 if (needs_unbusy != 0)
 2196                                                         vfs_unbusy(new_mp);
 2197                                                 nd->nd_repstat = r;
 2198                                                 break;
 2199                                         }
 2200                                 }
 2201                         }
 2202 
 2203                         /*
 2204                          * Build the directory record xdr
 2205                          */
 2206                         if (nd->nd_flag & ND_NFSV3) {
 2207                                 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 2208                                 *tl++ = newnfs_true;
 2209                                 *tl++ = 0;
 2210                                 *tl = txdr_unsigned(dp->d_fileno);
 2211                                 dirlen += nfsm_strtom(nd, dp->d_name, nlen);
 2212                                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2213                                 *tl++ = 0;
 2214                                 *tl = txdr_unsigned(*cookiep);
 2215                                 nfsrv_postopattr(nd, 0, nvap);
 2216                                 dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
 2217                                 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
 2218                                 if (nvp != NULL)
 2219                                         vput(nvp);
 2220                         } else {
 2221                                 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 2222                                 *tl++ = newnfs_true;
 2223                                 *tl++ = 0;
 2224                                 *tl = txdr_unsigned(*cookiep);
 2225                                 dirlen += nfsm_strtom(nd, dp->d_name, nlen);
 2226                                 if (nvp != NULL) {
 2227                                         supports_nfsv4acls =
 2228                                             nfs_supportsnfsv4acls(nvp);
 2229                                         NFSVOPUNLOCK(nvp, 0);
 2230                                 } else
 2231                                         supports_nfsv4acls = 0;
 2232                                 if (refp != NULL) {
 2233                                         dirlen += nfsrv_putreferralattr(nd,
 2234                                             &savbits, refp, 0,
 2235                                             &nd->nd_repstat);
 2236                                         if (nd->nd_repstat) {
 2237                                                 if (nvp != NULL)
 2238                                                         vrele(nvp);
 2239                                                 if (needs_unbusy != 0)
 2240                                                         vfs_unbusy(new_mp);
 2241                                                 break;
 2242                                         }
 2243                                 } else if (r) {
 2244                                         dirlen += nfsvno_fillattr(nd, new_mp,
 2245                                             nvp, nvap, &nfh, r, &rderrbits,
 2246                                             nd->nd_cred, p, isdgram, 0,
 2247                                             supports_nfsv4acls, at_root,
 2248                                             mounted_on_fileno);
 2249                                 } else {
 2250                                         dirlen += nfsvno_fillattr(nd, new_mp,
 2251                                             nvp, nvap, &nfh, r, &attrbits,
 2252                                             nd->nd_cred, p, isdgram, 0,
 2253                                             supports_nfsv4acls, at_root,
 2254                                             mounted_on_fileno);
 2255                                 }
 2256                                 if (nvp != NULL)
 2257                                         vrele(nvp);
 2258                                 dirlen += (3 * NFSX_UNSIGNED);
 2259                         }
 2260                         if (needs_unbusy != 0)
 2261                                 vfs_unbusy(new_mp);
 2262                         if (dirlen <= cnt)
 2263                                 entrycnt++;
 2264                 }
 2265                 cpos += dp->d_reclen;
 2266                 dp = (struct dirent *)cpos;
 2267                 cookiep++;
 2268                 ncookies--;
 2269         }
 2270         vrele(vp);
 2271         vfs_unbusy(mp);
 2272 
 2273         /*
 2274          * If dirlen > cnt, we must strip off the last entry. If that
 2275          * results in an empty reply, report NFSERR_TOOSMALL.
 2276          */
 2277         if (dirlen > cnt || nd->nd_repstat) {
 2278                 if (!nd->nd_repstat && entrycnt == 0)
 2279                         nd->nd_repstat = NFSERR_TOOSMALL;
 2280                 if (nd->nd_repstat) {
 2281                         newnfs_trimtrailing(nd, mb0, bpos0);
 2282                         if (nd->nd_flag & ND_NFSV3)
 2283                                 nfsrv_postopattr(nd, getret, &at);
 2284                 } else
 2285                         newnfs_trimtrailing(nd, mb1, bpos1);
 2286                 eofflag = 0;
 2287         } else if (cpos < cend)
 2288                 eofflag = 0;
 2289         if (!nd->nd_repstat) {
 2290                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2291                 *tl++ = newnfs_false;
 2292                 if (eofflag)
 2293                         *tl = newnfs_true;
 2294                 else
 2295                         *tl = newnfs_false;
 2296         }
 2297         FREE((caddr_t)cookies, M_TEMP);
 2298         FREE((caddr_t)rbuf, M_TEMP);
 2299 
 2300 out:
 2301         NFSEXITCODE2(0, nd);
 2302         return (0);
 2303 nfsmout:
 2304         vput(vp);
 2305         NFSEXITCODE2(error, nd);
 2306         return (error);
 2307 }
 2308 
 2309 /*
 2310  * Get the settable attributes out of the mbuf list.
 2311  * (Return 0 or EBADRPC)
 2312  */
 2313 int
 2314 nfsrv_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
 2315     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
 2316 {
 2317         u_int32_t *tl;
 2318         struct nfsv2_sattr *sp;
 2319         int error = 0, toclient = 0;
 2320 
 2321         switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
 2322         case ND_NFSV2:
 2323                 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 2324                 /*
 2325                  * Some old clients didn't fill in the high order 16bits.
 2326                  * --> check the low order 2 bytes for 0xffff
 2327                  */
 2328                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
 2329                         nvap->na_mode = nfstov_mode(sp->sa_mode);
 2330                 if (sp->sa_uid != newnfs_xdrneg1)
 2331                         nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
 2332                 if (sp->sa_gid != newnfs_xdrneg1)
 2333                         nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
 2334                 if (sp->sa_size != newnfs_xdrneg1)
 2335                         nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
 2336                 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
 2337 #ifdef notyet
 2338                         fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
 2339 #else
 2340                         nvap->na_atime.tv_sec =
 2341                                 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
 2342                         nvap->na_atime.tv_nsec = 0;
 2343 #endif
 2344                 }
 2345                 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
 2346                         fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
 2347                 break;
 2348         case ND_NFSV3:
 2349                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2350                 if (*tl == newnfs_true) {
 2351                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2352                         nvap->na_mode = nfstov_mode(*tl);
 2353                 }
 2354                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2355                 if (*tl == newnfs_true) {
 2356                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2357                         nvap->na_uid = fxdr_unsigned(uid_t, *tl);
 2358                 }
 2359                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2360                 if (*tl == newnfs_true) {
 2361                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2362                         nvap->na_gid = fxdr_unsigned(gid_t, *tl);
 2363                 }
 2364                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2365                 if (*tl == newnfs_true) {
 2366                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2367                         nvap->na_size = fxdr_hyper(tl);
 2368                 }
 2369                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2370                 switch (fxdr_unsigned(int, *tl)) {
 2371                 case NFSV3SATTRTIME_TOCLIENT:
 2372                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2373                         fxdr_nfsv3time(tl, &nvap->na_atime);
 2374                         toclient = 1;
 2375                         break;
 2376                 case NFSV3SATTRTIME_TOSERVER:
 2377                         vfs_timestamp(&nvap->na_atime);
 2378                         nvap->na_vaflags |= VA_UTIMES_NULL;
 2379                         break;
 2380                 };
 2381                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2382                 switch (fxdr_unsigned(int, *tl)) {
 2383                 case NFSV3SATTRTIME_TOCLIENT:
 2384                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2385                         fxdr_nfsv3time(tl, &nvap->na_mtime);
 2386                         nvap->na_vaflags &= ~VA_UTIMES_NULL;
 2387                         break;
 2388                 case NFSV3SATTRTIME_TOSERVER:
 2389                         vfs_timestamp(&nvap->na_mtime);
 2390                         if (!toclient)
 2391                                 nvap->na_vaflags |= VA_UTIMES_NULL;
 2392                         break;
 2393                 };
 2394                 break;
 2395         case ND_NFSV4:
 2396                 error = nfsv4_sattr(nd, nvap, attrbitp, aclp, p);
 2397         };
 2398 nfsmout:
 2399         NFSEXITCODE2(error, nd);
 2400         return (error);
 2401 }
 2402 
 2403 /*
 2404  * Handle the setable attributes for V4.
 2405  * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
 2406  */
 2407 int
 2408 nfsv4_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
 2409     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
 2410 {
 2411         u_int32_t *tl;
 2412         int attrsum = 0;
 2413         int i, j;
 2414         int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
 2415         int toclient = 0;
 2416         u_char *cp, namestr[NFSV4_SMALLSTR + 1];
 2417         uid_t uid;
 2418         gid_t gid;
 2419 
 2420         error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
 2421         if (error)
 2422                 goto nfsmout;
 2423         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2424         attrsize = fxdr_unsigned(int, *tl);
 2425 
 2426         /*
 2427          * Loop around getting the setable attributes. If an unsupported
 2428          * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
 2429          */
 2430         if (retnotsup) {
 2431                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2432                 bitpos = NFSATTRBIT_MAX;
 2433         } else {
 2434                 bitpos = 0;
 2435         }
 2436         for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
 2437             if (attrsum > attrsize) {
 2438                 error = NFSERR_BADXDR;
 2439                 goto nfsmout;
 2440             }
 2441             if (NFSISSET_ATTRBIT(attrbitp, bitpos))
 2442                 switch (bitpos) {
 2443                 case NFSATTRBIT_SIZE:
 2444                         NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
 2445                         nvap->na_size = fxdr_hyper(tl);
 2446                         attrsum += NFSX_HYPER;
 2447                         break;
 2448                 case NFSATTRBIT_ACL:
 2449                         error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize,
 2450                             p);
 2451                         if (error)
 2452                                 goto nfsmout;
 2453                         if (aceerr && !nd->nd_repstat)
 2454                                 nd->nd_repstat = aceerr;
 2455                         attrsum += aclsize;
 2456                         break;
 2457                 case NFSATTRBIT_ARCHIVE:
 2458                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2459                         if (!nd->nd_repstat)
 2460                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2461                         attrsum += NFSX_UNSIGNED;
 2462                         break;
 2463                 case NFSATTRBIT_HIDDEN:
 2464                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2465                         if (!nd->nd_repstat)
 2466                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2467                         attrsum += NFSX_UNSIGNED;
 2468                         break;
 2469                 case NFSATTRBIT_MIMETYPE:
 2470                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2471                         i = fxdr_unsigned(int, *tl);
 2472                         error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 2473                         if (error)
 2474                                 goto nfsmout;
 2475                         if (!nd->nd_repstat)
 2476                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2477                         attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
 2478                         break;
 2479                 case NFSATTRBIT_MODE:
 2480                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2481                         nvap->na_mode = nfstov_mode(*tl);
 2482                         attrsum += NFSX_UNSIGNED;
 2483                         break;
 2484                 case NFSATTRBIT_OWNER:
 2485                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2486                         j = fxdr_unsigned(int, *tl);
 2487                         if (j < 0) {
 2488                                 error = NFSERR_BADXDR;
 2489                                 goto nfsmout;
 2490                         }
 2491                         if (j > NFSV4_SMALLSTR)
 2492                                 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
 2493                         else
 2494                                 cp = namestr;
 2495                         error = nfsrv_mtostr(nd, cp, j);
 2496                         if (error) {
 2497                                 if (j > NFSV4_SMALLSTR)
 2498                                         free(cp, M_NFSSTRING);
 2499                                 goto nfsmout;
 2500                         }
 2501                         if (!nd->nd_repstat) {
 2502                                 nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid,
 2503                                     p);
 2504                                 if (!nd->nd_repstat)
 2505                                         nvap->na_uid = uid;
 2506                         }
 2507                         if (j > NFSV4_SMALLSTR)
 2508                                 free(cp, M_NFSSTRING);
 2509                         attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
 2510                         break;
 2511                 case NFSATTRBIT_OWNERGROUP:
 2512                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2513                         j = fxdr_unsigned(int, *tl);
 2514                         if (j < 0) {
 2515                                 error = NFSERR_BADXDR;
 2516                                 goto nfsmout;
 2517                         }
 2518                         if (j > NFSV4_SMALLSTR)
 2519                                 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
 2520                         else
 2521                                 cp = namestr;
 2522                         error = nfsrv_mtostr(nd, cp, j);
 2523                         if (error) {
 2524                                 if (j > NFSV4_SMALLSTR)
 2525                                         free(cp, M_NFSSTRING);
 2526                                 goto nfsmout;
 2527                         }
 2528                         if (!nd->nd_repstat) {
 2529                                 nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid,
 2530                                     p);
 2531                                 if (!nd->nd_repstat)
 2532                                         nvap->na_gid = gid;
 2533                         }
 2534                         if (j > NFSV4_SMALLSTR)
 2535                                 free(cp, M_NFSSTRING);
 2536                         attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
 2537                         break;
 2538                 case NFSATTRBIT_SYSTEM:
 2539                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2540                         if (!nd->nd_repstat)
 2541                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2542                         attrsum += NFSX_UNSIGNED;
 2543                         break;
 2544                 case NFSATTRBIT_TIMEACCESSSET:
 2545                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2546                         attrsum += NFSX_UNSIGNED;
 2547                         if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
 2548                             NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 2549                             fxdr_nfsv4time(tl, &nvap->na_atime);
 2550                             toclient = 1;
 2551                             attrsum += NFSX_V4TIME;
 2552                         } else {
 2553                             vfs_timestamp(&nvap->na_atime);
 2554                             nvap->na_vaflags |= VA_UTIMES_NULL;
 2555                         }
 2556                         break;
 2557                 case NFSATTRBIT_TIMEBACKUP:
 2558                         NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 2559                         if (!nd->nd_repstat)
 2560                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2561                         attrsum += NFSX_V4TIME;
 2562                         break;
 2563                 case NFSATTRBIT_TIMECREATE:
 2564                         NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 2565                         if (!nd->nd_repstat)
 2566                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2567                         attrsum += NFSX_V4TIME;
 2568                         break;
 2569                 case NFSATTRBIT_TIMEMODIFYSET:
 2570                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2571                         attrsum += NFSX_UNSIGNED;
 2572                         if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
 2573                             NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 2574                             fxdr_nfsv4time(tl, &nvap->na_mtime);
 2575                             nvap->na_vaflags &= ~VA_UTIMES_NULL;
 2576                             attrsum += NFSX_V4TIME;
 2577                         } else {
 2578                             vfs_timestamp(&nvap->na_mtime);
 2579                             if (!toclient)
 2580                                 nvap->na_vaflags |= VA_UTIMES_NULL;
 2581                         }
 2582                         break;
 2583                 default:
 2584                         nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2585                         /*
 2586                          * set bitpos so we drop out of the loop.
 2587                          */
 2588                         bitpos = NFSATTRBIT_MAX;
 2589                         break;
 2590                 };
 2591         }
 2592 
 2593         /*
 2594          * some clients pad the attrlist, so we need to skip over the
 2595          * padding.
 2596          */
 2597         if (attrsum > attrsize) {
 2598                 error = NFSERR_BADXDR;
 2599         } else {
 2600                 attrsize = NFSM_RNDUP(attrsize);
 2601                 if (attrsum < attrsize)
 2602                         error = nfsm_advance(nd, attrsize - attrsum, -1);
 2603         }
 2604 nfsmout:
 2605         NFSEXITCODE2(error, nd);
 2606         return (error);
 2607 }
 2608 
 2609 /*
 2610  * Check/setup export credentials.
 2611  */
 2612 int
 2613 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
 2614     struct ucred *credanon)
 2615 {
 2616         int error = 0;
 2617 
 2618         /*
 2619          * Check/setup credentials.
 2620          */
 2621         if (nd->nd_flag & ND_GSS)
 2622                 exp->nes_exflag &= ~MNT_EXPORTANON;
 2623 
 2624         /*
 2625          * Check to see if the operation is allowed for this security flavor.
 2626          * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to
 2627          * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
 2628          * Also, allow Secinfo, so that it can acquire the correct flavor(s).
 2629          */
 2630         if (nfsvno_testexp(nd, exp) &&
 2631             nd->nd_procnum != NFSV4OP_SECINFO &&
 2632             nd->nd_procnum != NFSPROC_FSINFO) {
 2633                 if (nd->nd_flag & ND_NFSV4)
 2634                         error = NFSERR_WRONGSEC;
 2635                 else
 2636                         error = (NFSERR_AUTHERR | AUTH_TOOWEAK);
 2637                 goto out;
 2638         }
 2639 
 2640         /*
 2641          * Check to see if the file system is exported V4 only.
 2642          */
 2643         if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) {
 2644                 error = NFSERR_PROGNOTV4;
 2645                 goto out;
 2646         }
 2647 
 2648         /*
 2649          * Now, map the user credentials.
 2650          * (Note that ND_AUTHNONE will only be set for an NFSv3
 2651          *  Fsinfo RPC. If set for anything else, this code might need
 2652          *  to change.)
 2653          */
 2654         if (NFSVNO_EXPORTED(exp) &&
 2655             ((!(nd->nd_flag & ND_GSS) && nd->nd_cred->cr_uid == 0) ||
 2656              NFSVNO_EXPORTANON(exp) ||
 2657              (nd->nd_flag & ND_AUTHNONE))) {
 2658                 nd->nd_cred->cr_uid = credanon->cr_uid;
 2659                 nd->nd_cred->cr_gid = credanon->cr_gid;
 2660                 crsetgroups(nd->nd_cred, credanon->cr_ngroups,
 2661                     credanon->cr_groups);
 2662         }
 2663 
 2664 out:
 2665         NFSEXITCODE2(error, nd);
 2666         return (error);
 2667 }
 2668 
 2669 /*
 2670  * Check exports.
 2671  */
 2672 int
 2673 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
 2674     struct ucred **credp)
 2675 {
 2676         int i, error, *secflavors;
 2677 
 2678         error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
 2679             &exp->nes_numsecflavor, &secflavors);
 2680         if (error) {
 2681                 if (nfs_rootfhset) {
 2682                         exp->nes_exflag = 0;
 2683                         exp->nes_numsecflavor = 0;
 2684                         error = 0;
 2685                 }
 2686         } else {
 2687                 /* Copy the security flavors. */
 2688                 for (i = 0; i < exp->nes_numsecflavor; i++)
 2689                         exp->nes_secflavors[i] = secflavors[i];
 2690         }
 2691         NFSEXITCODE(error);
 2692         return (error);
 2693 }
 2694 
 2695 /*
 2696  * Get a vnode for a file handle and export stuff.
 2697  */
 2698 int
 2699 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
 2700     int lktype, struct vnode **vpp, struct nfsexstuff *exp,
 2701     struct ucred **credp)
 2702 {
 2703         int i, error, *secflavors;
 2704 
 2705         *credp = NULL;
 2706         exp->nes_numsecflavor = 0;
 2707         if (VFS_NEEDSGIANT(mp))
 2708                 error = ESTALE;
 2709         else
 2710                 error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp);
 2711         if (error != 0)
 2712                 /* Make sure the server replies ESTALE to the client. */
 2713                 error = ESTALE;
 2714         if (nam && !error) {
 2715                 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
 2716                     &exp->nes_numsecflavor, &secflavors);
 2717                 if (error) {
 2718                         if (nfs_rootfhset) {
 2719                                 exp->nes_exflag = 0;
 2720                                 exp->nes_numsecflavor = 0;
 2721                                 error = 0;
 2722                         } else {
 2723                                 vput(*vpp);
 2724                         }
 2725                 } else {
 2726                         /* Copy the security flavors. */
 2727                         for (i = 0; i < exp->nes_numsecflavor; i++)
 2728                                 exp->nes_secflavors[i] = secflavors[i];
 2729                 }
 2730         }
 2731         NFSEXITCODE(error);
 2732         return (error);
 2733 }
 2734 
 2735 /*
 2736  * nfsd_fhtovp() - convert a fh to a vnode ptr
 2737  *      - look up fsid in mount list (if not found ret error)
 2738  *      - get vp and export rights by calling nfsvno_fhtovp()
 2739  *      - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
 2740  *        for AUTH_SYS
 2741  *      - if mpp != NULL, return the mount point so that it can
 2742  *        be used for vn_finished_write() by the caller
 2743  */
 2744 void
 2745 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
 2746     struct vnode **vpp, struct nfsexstuff *exp,
 2747     struct mount **mpp, int startwrite, struct thread *p)
 2748 {
 2749         struct mount *mp;
 2750         struct ucred *credanon;
 2751         fhandle_t *fhp;
 2752 
 2753         fhp = (fhandle_t *)nfp->nfsrvfh_data;
 2754         /*
 2755          * Check for the special case of the nfsv4root_fh.
 2756          */
 2757         mp = vfs_busyfs(&fhp->fh_fsid);
 2758         if (mpp != NULL)
 2759                 *mpp = mp;
 2760         if (mp == NULL) {
 2761                 *vpp = NULL;
 2762                 nd->nd_repstat = ESTALE;
 2763                 goto out;
 2764         }
 2765 
 2766         if (startwrite) {
 2767                 vn_start_write(NULL, mpp, V_WAIT);
 2768                 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp)))
 2769                         lktype = LK_EXCLUSIVE;
 2770         }
 2771         nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
 2772             &credanon);
 2773         vfs_unbusy(mp);
 2774 
 2775         /*
 2776          * For NFSv4 without a pseudo root fs, unexported file handles
 2777          * can be returned, so that Lookup works everywhere.
 2778          */
 2779         if (!nd->nd_repstat && exp->nes_exflag == 0 &&
 2780             !(nd->nd_flag & ND_NFSV4)) {
 2781                 vput(*vpp);
 2782                 nd->nd_repstat = EACCES;
 2783         }
 2784 
 2785         /*
 2786          * Personally, I've never seen any point in requiring a
 2787          * reserved port#, since only in the rare case where the
 2788          * clients are all boxes with secure system priviledges,
 2789          * does it provide any enhanced security, but... some people
 2790          * believe it to be useful and keep putting this code back in.
 2791          * (There is also some "security checker" out there that
 2792          *  complains if the nfs server doesn't enforce this.)
 2793          * However, note the following:
 2794          * RFC3530 (NFSv4) specifies that a reserved port# not be
 2795          *      required.
 2796          * RFC2623 recommends that, if a reserved port# is checked for,
 2797          *      that there be a way to turn that off--> ifdef'd.
 2798          */
 2799 #ifdef NFS_REQRSVPORT
 2800         if (!nd->nd_repstat) {
 2801                 struct sockaddr_in *saddr;
 2802                 struct sockaddr_in6 *saddr6;
 2803 
 2804                 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
 2805                 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
 2806                 if (!(nd->nd_flag & ND_NFSV4) &&
 2807                     ((saddr->sin_family == AF_INET &&
 2808                       ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
 2809                      (saddr6->sin6_family == AF_INET6 &&
 2810                       ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
 2811                         vput(*vpp);
 2812                         nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
 2813                 }
 2814         }
 2815 #endif  /* NFS_REQRSVPORT */
 2816 
 2817         /*
 2818          * Check/setup credentials.
 2819          */
 2820         if (!nd->nd_repstat) {
 2821                 nd->nd_saveduid = nd->nd_cred->cr_uid;
 2822                 nd->nd_repstat = nfsd_excred(nd, exp, credanon);
 2823                 if (nd->nd_repstat)
 2824                         vput(*vpp);
 2825         }
 2826         if (credanon != NULL)
 2827                 crfree(credanon);
 2828         if (nd->nd_repstat) {
 2829                 if (startwrite)
 2830                         vn_finished_write(mp);
 2831                 *vpp = NULL;
 2832                 if (mpp != NULL)
 2833                         *mpp = NULL;
 2834         }
 2835 
 2836 out:
 2837         NFSEXITCODE2(0, nd);
 2838 }
 2839 
 2840 /*
 2841  * glue for fp.
 2842  */
 2843 int
 2844 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
 2845 {
 2846         struct filedesc *fdp;
 2847         struct file *fp;
 2848         int error = 0;
 2849 
 2850         fdp = p->td_proc->p_fd;
 2851         if (fd >= fdp->fd_nfiles ||
 2852             (fp = fdp->fd_ofiles[fd]) == NULL) {
 2853                 error = EBADF;
 2854                 goto out;
 2855         }
 2856         *fpp = fp;
 2857 
 2858 out:
 2859         NFSEXITCODE(error);
 2860         return (error);
 2861 }
 2862 
 2863 /*
 2864  * Called from nfssvc() to update the exports list. Just call
 2865  * vfs_export(). This has to be done, since the v4 root fake fs isn't
 2866  * in the mount list.
 2867  */
 2868 int
 2869 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
 2870 {
 2871         struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
 2872         int error = 0;
 2873         struct nameidata nd;
 2874         fhandle_t fh;
 2875 
 2876         error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
 2877         if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
 2878                 nfs_rootfhset = 0;
 2879         else if (error == 0) {
 2880                 if (nfsexargp->fspec == NULL) {
 2881                         error = EPERM;
 2882                         goto out;
 2883                 }
 2884                 /*
 2885                  * If fspec != NULL, this is the v4root path.
 2886                  */
 2887                 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE,
 2888                     nfsexargp->fspec, p);
 2889                 if ((error = namei(&nd)) != 0)
 2890                         goto out;
 2891                 error = nfsvno_getfh(nd.ni_vp, &fh, p);
 2892                 vrele(nd.ni_vp);
 2893                 if (!error) {
 2894                         nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
 2895                         NFSBCOPY((caddr_t)&fh,
 2896                             nfs_rootfh.nfsrvfh_data,
 2897                             sizeof (fhandle_t));
 2898                         nfs_rootfhset = 1;
 2899                 }
 2900         }
 2901 
 2902 out:
 2903         NFSEXITCODE(error);
 2904         return (error);
 2905 }
 2906 
 2907 /*
 2908  * This function needs to test to see if the system is near its limit
 2909  * for memory allocation via malloc() or mget() and return True iff
 2910  * either of these resources are near their limit.
 2911  * XXX (For now, this is just a stub.)
 2912  */
 2913 int nfsrv_testmalloclimit = 0;
 2914 int
 2915 nfsrv_mallocmget_limit(void)
 2916 {
 2917         static int printmesg = 0;
 2918         static int testval = 1;
 2919 
 2920         if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
 2921                 if ((printmesg++ % 100) == 0)
 2922                         printf("nfsd: malloc/mget near limit\n");
 2923                 return (1);
 2924         }
 2925         return (0);
 2926 }
 2927 
 2928 /*
 2929  * BSD specific initialization of a mount point.
 2930  */
 2931 void
 2932 nfsd_mntinit(void)
 2933 {
 2934         static int inited = 0;
 2935 
 2936         if (inited)
 2937                 return;
 2938         inited = 1;
 2939         nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
 2940         TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
 2941         TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist);
 2942         nfsv4root_mnt.mnt_export = NULL;
 2943         TAILQ_INIT(&nfsv4root_opt);
 2944         TAILQ_INIT(&nfsv4root_newopt);
 2945         nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
 2946         nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
 2947         nfsv4root_mnt.mnt_nvnodelistsize = 0;
 2948         nfsv4root_mnt.mnt_activevnodelistsize = 0;
 2949 }
 2950 
 2951 /*
 2952  * Get a vnode for a file handle, without checking exports, etc.
 2953  */
 2954 struct vnode *
 2955 nfsvno_getvp(fhandle_t *fhp)
 2956 {
 2957         struct mount *mp;
 2958         struct vnode *vp;
 2959         int error;
 2960 
 2961         mp = vfs_busyfs(&fhp->fh_fsid);
 2962         if (mp == NULL)
 2963                 return (NULL);
 2964         error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp);
 2965         vfs_unbusy(mp);
 2966         if (error)
 2967                 return (NULL);
 2968         return (vp);
 2969 }
 2970 
 2971 /*
 2972  * Do a local VOP_ADVLOCK().
 2973  */
 2974 int
 2975 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
 2976     u_int64_t end, struct thread *td)
 2977 {
 2978         int error = 0;
 2979         struct flock fl;
 2980         u_int64_t tlen;
 2981 
 2982         if (nfsrv_dolocallocks == 0)
 2983                 goto out;
 2984         ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked");
 2985 
 2986         fl.l_whence = SEEK_SET;
 2987         fl.l_type = ftype;
 2988         fl.l_start = (off_t)first;
 2989         if (end == NFS64BITSSET) {
 2990                 fl.l_len = 0;
 2991         } else {
 2992                 tlen = end - first;
 2993                 fl.l_len = (off_t)tlen;
 2994         }
 2995         /*
 2996          * For FreeBSD8, the l_pid and l_sysid must be set to the same
 2997          * values for all calls, so that all locks will be held by the
 2998          * nfsd server. (The nfsd server handles conflicts between the
 2999          * various clients.)
 3000          * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
 3001          * bytes, so it can't be put in l_sysid.
 3002          */
 3003         if (nfsv4_sysid == 0)
 3004                 nfsv4_sysid = nlm_acquire_next_sysid();
 3005         fl.l_pid = (pid_t)0;
 3006         fl.l_sysid = (int)nfsv4_sysid;
 3007 
 3008         if (ftype == F_UNLCK)
 3009                 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
 3010                     (F_POSIX | F_REMOTE));
 3011         else
 3012                 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
 3013                     (F_POSIX | F_REMOTE));
 3014 
 3015 out:
 3016         NFSEXITCODE(error);
 3017         return (error);
 3018 }
 3019 
 3020 /*
 3021  * Check the nfsv4 root exports.
 3022  */
 3023 int
 3024 nfsvno_v4rootexport(struct nfsrv_descript *nd)
 3025 {
 3026         struct ucred *credanon;
 3027         int exflags, error = 0, numsecflavor, *secflavors, i;
 3028 
 3029         error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
 3030             &credanon, &numsecflavor, &secflavors);
 3031         if (error) {
 3032                 error = NFSERR_PROGUNAVAIL;
 3033                 goto out;
 3034         }
 3035         if (credanon != NULL)
 3036                 crfree(credanon);
 3037         for (i = 0; i < numsecflavor; i++) {
 3038                 if (secflavors[i] == AUTH_SYS)
 3039                         nd->nd_flag |= ND_EXAUTHSYS;
 3040                 else if (secflavors[i] == RPCSEC_GSS_KRB5)
 3041                         nd->nd_flag |= ND_EXGSS;
 3042                 else if (secflavors[i] == RPCSEC_GSS_KRB5I)
 3043                         nd->nd_flag |= ND_EXGSSINTEGRITY;
 3044                 else if (secflavors[i] == RPCSEC_GSS_KRB5P)
 3045                         nd->nd_flag |= ND_EXGSSPRIVACY;
 3046         }
 3047 
 3048 out:
 3049         NFSEXITCODE(error);
 3050         return (error);
 3051 }
 3052 
 3053 /*
 3054  * Nfs server psuedo system call for the nfsd's
 3055  */
 3056 /*
 3057  * MPSAFE
 3058  */
 3059 static int
 3060 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
 3061 {
 3062         struct file *fp;
 3063         struct nfsd_addsock_args sockarg;
 3064         struct nfsd_nfsd_args nfsdarg;
 3065         int error;
 3066 
 3067         if (uap->flag & NFSSVC_NFSDADDSOCK) {
 3068                 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
 3069                 if (error)
 3070                         goto out;
 3071                 /*
 3072                  * Since we don't know what rights might be required,
 3073                  * pretend that we need them all. It is better to be too
 3074                  * careful than too reckless.
 3075                  */
 3076                 if ((error = fget(td, sockarg.sock, CAP_SOCK_ALL, &fp)) != 0)
 3077                         goto out;
 3078                 if (fp->f_type != DTYPE_SOCKET) {
 3079                         fdrop(fp, td);
 3080                         error = EPERM;
 3081                         goto out;
 3082                 }
 3083                 error = nfsrvd_addsock(fp);
 3084                 fdrop(fp, td);
 3085         } else if (uap->flag & NFSSVC_NFSDNFSD) {
 3086                 if (uap->argp == NULL) {
 3087                         error = EINVAL;
 3088                         goto out;
 3089                 }
 3090                 error = copyin(uap->argp, (caddr_t)&nfsdarg,
 3091                     sizeof (nfsdarg));
 3092                 if (error)
 3093                         goto out;
 3094                 error = nfsrvd_nfsd(td, &nfsdarg);
 3095         } else {
 3096                 error = nfssvc_srvcall(td, uap, td->td_ucred);
 3097         }
 3098 
 3099 out:
 3100         NFSEXITCODE(error);
 3101         return (error);
 3102 }
 3103 
 3104 static int
 3105 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
 3106 {
 3107         struct nfsex_args export;
 3108         struct file *fp = NULL;
 3109         int stablefd, len;
 3110         struct nfsd_clid adminrevoke;
 3111         struct nfsd_dumplist dumplist;
 3112         struct nfsd_dumpclients *dumpclients;
 3113         struct nfsd_dumplocklist dumplocklist;
 3114         struct nfsd_dumplocks *dumplocks;
 3115         struct nameidata nd;
 3116         vnode_t vp;
 3117         int error = EINVAL, igotlock;
 3118         struct proc *procp;
 3119         static int suspend_nfsd = 0;
 3120 
 3121         if (uap->flag & NFSSVC_PUBLICFH) {
 3122                 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
 3123                     sizeof (fhandle_t));
 3124                 error = copyin(uap->argp,
 3125                     &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
 3126                 if (!error)
 3127                         nfs_pubfhset = 1;
 3128         } else if (uap->flag & NFSSVC_V4ROOTEXPORT) {
 3129                 error = copyin(uap->argp,(caddr_t)&export,
 3130                     sizeof (struct nfsex_args));
 3131                 if (!error)
 3132                         error = nfsrv_v4rootexport(&export, cred, p);
 3133         } else if (uap->flag & NFSSVC_NOPUBLICFH) {
 3134                 nfs_pubfhset = 0;
 3135                 error = 0;
 3136         } else if (uap->flag & NFSSVC_STABLERESTART) {
 3137                 error = copyin(uap->argp, (caddr_t)&stablefd,
 3138                     sizeof (int));
 3139                 if (!error)
 3140                         error = fp_getfvp(p, stablefd, &fp, &vp);
 3141                 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
 3142                         error = EBADF;
 3143                 if (!error && newnfs_numnfsd != 0)
 3144                         error = EPERM;
 3145                 if (!error) {
 3146                         nfsrv_stablefirst.nsf_fp = fp;
 3147                         nfsrv_setupstable(p);
 3148                 }
 3149         } else if (uap->flag & NFSSVC_ADMINREVOKE) {
 3150                 error = copyin(uap->argp, (caddr_t)&adminrevoke,
 3151                     sizeof (struct nfsd_clid));
 3152                 if (!error)
 3153                         error = nfsrv_adminrevoke(&adminrevoke, p);
 3154         } else if (uap->flag & NFSSVC_DUMPCLIENTS) {
 3155                 error = copyin(uap->argp, (caddr_t)&dumplist,
 3156                     sizeof (struct nfsd_dumplist));
 3157                 if (!error && (dumplist.ndl_size < 1 ||
 3158                         dumplist.ndl_size > NFSRV_MAXDUMPLIST))
 3159                         error = EPERM;
 3160                 if (!error) {
 3161                     len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
 3162                     dumpclients = (struct nfsd_dumpclients *)malloc(len,
 3163                         M_TEMP, M_WAITOK);
 3164                     nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
 3165                     error = copyout(dumpclients,
 3166                         CAST_USER_ADDR_T(dumplist.ndl_list), len);
 3167                     free((caddr_t)dumpclients, M_TEMP);
 3168                 }
 3169         } else if (uap->flag & NFSSVC_DUMPLOCKS) {
 3170                 error = copyin(uap->argp, (caddr_t)&dumplocklist,
 3171                     sizeof (struct nfsd_dumplocklist));
 3172                 if (!error && (dumplocklist.ndllck_size < 1 ||
 3173                         dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
 3174                         error = EPERM;
 3175                 if (!error)
 3176                         error = nfsrv_lookupfilename(&nd,
 3177                                 dumplocklist.ndllck_fname, p);
 3178                 if (!error) {
 3179                         len = sizeof (struct nfsd_dumplocks) *
 3180                                 dumplocklist.ndllck_size;
 3181                         dumplocks = (struct nfsd_dumplocks *)malloc(len,
 3182                                 M_TEMP, M_WAITOK);
 3183                         nfsrv_dumplocks(nd.ni_vp, dumplocks,
 3184                             dumplocklist.ndllck_size, p);
 3185                         vput(nd.ni_vp);
 3186                         error = copyout(dumplocks,
 3187                             CAST_USER_ADDR_T(dumplocklist.ndllck_list), len);
 3188                         free((caddr_t)dumplocks, M_TEMP);
 3189                 }
 3190         } else if (uap->flag & NFSSVC_BACKUPSTABLE) {
 3191                 procp = p->td_proc;
 3192                 PROC_LOCK(procp);
 3193                 nfsd_master_pid = procp->p_pid;
 3194                 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1);
 3195                 nfsd_master_start = procp->p_stats->p_start;
 3196                 nfsd_master_proc = procp;
 3197                 PROC_UNLOCK(procp);
 3198         } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) {
 3199                 NFSLOCKV4ROOTMUTEX();
 3200                 if (suspend_nfsd == 0) {
 3201                         /* Lock out all nfsd threads */
 3202                         do {
 3203                                 igotlock = nfsv4_lock(&nfsd_suspend_lock, 1,
 3204                                     NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 3205                         } while (igotlock == 0 && suspend_nfsd == 0);
 3206                         suspend_nfsd = 1;
 3207                 }
 3208                 NFSUNLOCKV4ROOTMUTEX();
 3209                 error = 0;
 3210         } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) {
 3211                 NFSLOCKV4ROOTMUTEX();
 3212                 if (suspend_nfsd != 0) {
 3213                         nfsv4_unlock(&nfsd_suspend_lock, 0);
 3214                         suspend_nfsd = 0;
 3215                 }
 3216                 NFSUNLOCKV4ROOTMUTEX();
 3217                 error = 0;
 3218         }
 3219 
 3220         NFSEXITCODE(error);
 3221         return (error);
 3222 }
 3223 
 3224 /*
 3225  * Check exports.
 3226  * Returns 0 if ok, 1 otherwise.
 3227  */
 3228 int
 3229 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
 3230 {
 3231         int i;
 3232 
 3233         /*
 3234          * This seems odd, but allow the case where the security flavor
 3235          * list is empty. This happens when NFSv4 is traversing non-exported
 3236          * file systems. Exported file systems should always have a non-empty
 3237          * security flavor list.
 3238          */
 3239         if (exp->nes_numsecflavor == 0)
 3240                 return (0);
 3241 
 3242         for (i = 0; i < exp->nes_numsecflavor; i++) {
 3243                 /*
 3244                  * The tests for privacy and integrity must be first,
 3245                  * since ND_GSS is set for everything but AUTH_SYS.
 3246                  */
 3247                 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
 3248                     (nd->nd_flag & ND_GSSPRIVACY))
 3249                         return (0);
 3250                 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
 3251                     (nd->nd_flag & ND_GSSINTEGRITY))
 3252                         return (0);
 3253                 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
 3254                     (nd->nd_flag & ND_GSS))
 3255                         return (0);
 3256                 if (exp->nes_secflavors[i] == AUTH_SYS &&
 3257                     (nd->nd_flag & ND_GSS) == 0)
 3258                         return (0);
 3259         }
 3260         return (1);
 3261 }
 3262 
 3263 /*
 3264  * Calculate a hash value for the fid in a file handle.
 3265  */
 3266 uint32_t
 3267 nfsrv_hashfh(fhandle_t *fhp)
 3268 {
 3269         uint32_t hashval;
 3270 
 3271         hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
 3272         return (hashval);
 3273 }
 3274 
 3275 /*
 3276  * Signal the userland master nfsd to backup the stable restart file.
 3277  */
 3278 void
 3279 nfsrv_backupstable(void)
 3280 {
 3281         struct proc *procp;
 3282 
 3283         if (nfsd_master_proc != NULL) {
 3284                 procp = pfind(nfsd_master_pid);
 3285                 /* Try to make sure it is the correct process. */
 3286                 if (procp == nfsd_master_proc &&
 3287                     procp->p_stats->p_start.tv_sec ==
 3288                     nfsd_master_start.tv_sec &&
 3289                     procp->p_stats->p_start.tv_usec ==
 3290                     nfsd_master_start.tv_usec &&
 3291                     strcmp(procp->p_comm, nfsd_master_comm) == 0)
 3292                         kern_psignal(procp, SIGUSR2);
 3293                 else
 3294                         nfsd_master_proc = NULL;
 3295 
 3296                 if (procp != NULL)
 3297                         PROC_UNLOCK(procp);
 3298         }
 3299 }
 3300 
 3301 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
 3302 
 3303 /*
 3304  * Called once to initialize data structures...
 3305  */
 3306 static int
 3307 nfsd_modevent(module_t mod, int type, void *data)
 3308 {
 3309         int error = 0, i;
 3310         static int loaded = 0;
 3311 
 3312         switch (type) {
 3313         case MOD_LOAD:
 3314                 if (loaded)
 3315                         goto out;
 3316                 newnfs_portinit();
 3317                 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 3318                         snprintf(nfsrchash_table[i].lock_name,
 3319                             sizeof(nfsrchash_table[i].lock_name), "nfsrc_tcp%d",
 3320                             i);
 3321                         mtx_init(&nfsrchash_table[i].mtx,
 3322                             nfsrchash_table[i].lock_name, NULL, MTX_DEF);
 3323                         snprintf(nfsrcahash_table[i].lock_name,
 3324                             sizeof(nfsrcahash_table[i].lock_name), "nfsrc_tcpa%d",
 3325                             i);
 3326                         mtx_init(&nfsrcahash_table[i].mtx,
 3327                             nfsrcahash_table[i].lock_name, NULL, MTX_DEF);
 3328                 }
 3329                 mtx_init(&nfsrc_udpmtx, "nfs_udpcache_mutex", NULL, MTX_DEF);
 3330                 mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF);
 3331                 mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL,
 3332                     MTX_DEF);
 3333                 lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
 3334                 nfsrvd_initcache();
 3335                 nfsd_init();
 3336                 NFSD_LOCK();
 3337                 nfsrvd_init(0);
 3338                 NFSD_UNLOCK();
 3339                 nfsd_mntinit();
 3340 #ifdef VV_DISABLEDELEG
 3341                 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
 3342                 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
 3343 #endif
 3344                 nfsd_call_servertimer = nfsrv_servertimer;
 3345                 nfsd_call_nfsd = nfssvc_nfsd;
 3346                 loaded = 1;
 3347                 break;
 3348 
 3349         case MOD_UNLOAD:
 3350                 if (newnfs_numnfsd != 0) {
 3351                         error = EBUSY;
 3352                         break;
 3353                 }
 3354 
 3355 #ifdef VV_DISABLEDELEG
 3356                 vn_deleg_ops.vndeleg_recall = NULL;
 3357                 vn_deleg_ops.vndeleg_disable = NULL;
 3358 #endif
 3359                 nfsd_call_servertimer = NULL;
 3360                 nfsd_call_nfsd = NULL;
 3361 
 3362                 /* Clean out all NFSv4 state. */
 3363                 nfsrv_throwawayallstate(curthread);
 3364 
 3365                 /* Clean the NFS server reply cache */
 3366                 nfsrvd_cleancache();
 3367 
 3368                 /* Free up the krpc server pool. */
 3369                 if (nfsrvd_pool != NULL)
 3370                         svcpool_destroy(nfsrvd_pool);
 3371 
 3372                 /* and get rid of the locks */
 3373                 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 3374                         mtx_destroy(&nfsrchash_table[i].mtx);
 3375                         mtx_destroy(&nfsrcahash_table[i].mtx);
 3376                 }
 3377                 mtx_destroy(&nfsrc_udpmtx);
 3378                 mtx_destroy(&nfs_v4root_mutex);
 3379                 mtx_destroy(&nfsv4root_mnt.mnt_mtx);
 3380                 lockdestroy(&nfsv4root_mnt.mnt_explock);
 3381                 loaded = 0;
 3382                 break;
 3383         default:
 3384                 error = EOPNOTSUPP;
 3385                 break;
 3386         }
 3387 
 3388 out:
 3389         NFSEXITCODE(error);
 3390         return (error);
 3391 }
 3392 static moduledata_t nfsd_mod = {
 3393         "nfsd",
 3394         nfsd_modevent,
 3395         NULL,
 3396 };
 3397 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
 3398 
 3399 /* So that loader and kldload(2) can find us, wherever we are.. */
 3400 MODULE_VERSION(nfsd, 1);
 3401 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
 3402 MODULE_DEPEND(nfsd, nfslock, 1, 1, 1);
 3403 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
 3404 MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
 3405 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);
 3406 

Cache object: 696bdbecf4120664efc4fbc4ef7e3b45


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.