The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/nfsserver/nfs_nfsdport.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1989, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * This code is derived from software contributed to Berkeley by
    8  * Rick Macklem at The University of Guelph.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  */
   35 
   36 #include <sys/cdefs.h>
   37 __FBSDID("$FreeBSD$");
   38 
   39 #include <sys/capsicum.h>
   40 #include <sys/extattr.h>
   41 
   42 /*
   43  * Functions that perform the vfs operations required by the routines in
   44  * nfsd_serv.c. It is hoped that this change will make the server more
   45  * portable.
   46  */
   47 
   48 #include <fs/nfs/nfsport.h>
   49 #include <sys/hash.h>
   50 #include <sys/sysctl.h>
   51 #include <nlm/nlm_prot.h>
   52 #include <nlm/nlm.h>
   53 
   54 FEATURE(nfsd, "NFSv4 server");
   55 
   56 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
   57 extern int nfsrv_useacl;
   58 extern int newnfs_numnfsd;
   59 extern struct mount nfsv4root_mnt;
   60 extern struct nfsrv_stablefirst nfsrv_stablefirst;
   61 extern void (*nfsd_call_servertimer)(void);
   62 extern SVCPOOL  *nfsrvd_pool;
   63 extern struct nfsv4lock nfsd_suspend_lock;
   64 extern struct nfsclienthashhead *nfsclienthash;
   65 extern struct nfslockhashhead *nfslockhash;
   66 extern struct nfssessionhash *nfssessionhash;
   67 extern int nfsrv_sessionhashsize;
   68 extern struct nfsstatsv1 nfsstatsv1;
   69 extern struct nfslayouthash *nfslayouthash;
   70 extern int nfsrv_layouthashsize;
   71 extern struct mtx nfsrv_dslock_mtx;
   72 extern int nfs_pnfsiothreads;
   73 extern struct nfsdontlisthead nfsrv_dontlisthead;
   74 extern volatile int nfsrv_dontlistlen;
   75 extern volatile int nfsrv_devidcnt;
   76 extern int nfsrv_maxpnfsmirror;
   77 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
   78 NFSDLOCKMUTEX;
   79 NFSSTATESPINLOCK;
   80 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
   81 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
   82 struct mtx nfsrc_udpmtx;
   83 struct mtx nfs_v4root_mutex;
   84 struct mtx nfsrv_dontlistlock_mtx;
   85 struct mtx nfsrv_recalllock_mtx;
   86 struct nfsrvfh nfs_rootfh, nfs_pubfh;
   87 int nfs_pubfhset = 0, nfs_rootfhset = 0;
   88 struct proc *nfsd_master_proc = NULL;
   89 int nfsd_debuglevel = 0;
   90 static pid_t nfsd_master_pid = (pid_t)-1;
   91 static char nfsd_master_comm[MAXCOMLEN + 1];
   92 static struct timeval nfsd_master_start;
   93 static uint32_t nfsv4_sysid = 0;
   94 static fhandle_t zerofh;
   95 
   96 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
   97     struct ucred *);
   98 
   99 int nfsrv_enable_crossmntpt = 1;
  100 static int nfs_commit_blks;
  101 static int nfs_commit_miss;
  102 extern int nfsrv_issuedelegs;
  103 extern int nfsrv_dolocallocks;
  104 extern int nfsd_enable_stringtouid;
  105 extern struct nfsdevicehead nfsrv_devidhead;
  106 
  107 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *,
  108     NFSPROC_T *);
  109 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **,
  110     int *, char *, fhandle_t *);
  111 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *,
  112     NFSPROC_T *);
  113 static int nfsrv_proxyds(struct nfsrv_descript *, struct vnode *, off_t, int,
  114     struct ucred *, struct thread *, int, struct mbuf **, char *,
  115     struct mbuf **, struct nfsvattr *, struct acl *);
  116 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *);
  117 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *,
  118     NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **);
  119 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *,
  120     NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **,
  121     char *, int *);
  122 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
  123     struct vnode *, struct nfsmount **, int, struct acl *, int *);
  124 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
  125     struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *);
  126 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
  127     struct vnode *, struct nfsmount *, struct nfsvattr *);
  128 static int nfsrv_putfhname(fhandle_t *, char *);
  129 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *,
  130     struct pnfsdsfile *, struct vnode **, NFSPROC_T *);
  131 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *,
  132     struct vnode *, NFSPROC_T *);
  133 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *);
  134 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *,
  135     NFSPROC_T *);
  136 static int nfsrv_pnfsstatfs(struct statfs *, struct mount *);
  137 
  138 int nfs_pnfsio(task_fn_t *, void *);
  139 
  140 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW, 0, "NFS server");
  141 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
  142     &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
  143 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
  144     0, "");
  145 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
  146     0, "");
  147 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
  148     &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
  149 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
  150     &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
  151 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel,
  152     0, "Debug level for NFS server");
  153 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW,
  154     &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names");
  155 static int nfsrv_pnfsgetdsattr = 1;
  156 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW,
  157     &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC");
  158 
  159 /*
  160  * nfsrv_dsdirsize can only be increased and only when the nfsd threads are
  161  * not running.
  162  * The dsN subdirectories for the increased values must have been created
  163  * on all DS servers before this increase is done.
  164  */
  165 u_int   nfsrv_dsdirsize = 20;
  166 static int
  167 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS)
  168 {
  169         int error, newdsdirsize;
  170 
  171         newdsdirsize = nfsrv_dsdirsize;
  172         error = sysctl_handle_int(oidp, &newdsdirsize, 0, req);
  173         if (error != 0 || req->newptr == NULL)
  174                 return (error);
  175         if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 ||
  176             newnfs_numnfsd != 0)
  177                 return (EINVAL);
  178         nfsrv_dsdirsize = newdsdirsize;
  179         return (0);
  180 }
  181 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize, CTLTYPE_UINT | CTLFLAG_RW, 0,
  182     sizeof(nfsrv_dsdirsize), sysctl_dsdirsize, "IU",
  183     "Number of dsN subdirs on the DS servers");
  184 
  185 #define MAX_REORDERED_RPC       16
  186 #define NUM_HEURISTIC           1031
  187 #define NHUSE_INIT              64
  188 #define NHUSE_INC               16
  189 #define NHUSE_MAX               2048
  190 
  191 static struct nfsheur {
  192         struct vnode *nh_vp;    /* vp to match (unreferenced pointer) */
  193         off_t nh_nextoff;       /* next offset for sequential detection */
  194         int nh_use;             /* use count for selection */
  195         int nh_seqcount;        /* heuristic */
  196 } nfsheur[NUM_HEURISTIC];
  197 
  198 
  199 /*
  200  * Heuristic to detect sequential operation.
  201  */
  202 static struct nfsheur *
  203 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
  204 {
  205         struct nfsheur *nh;
  206         int hi, try;
  207 
  208         /* Locate best candidate. */
  209         try = 32;
  210         hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
  211         nh = &nfsheur[hi];
  212         while (try--) {
  213                 if (nfsheur[hi].nh_vp == vp) {
  214                         nh = &nfsheur[hi];
  215                         break;
  216                 }
  217                 if (nfsheur[hi].nh_use > 0)
  218                         --nfsheur[hi].nh_use;
  219                 hi = (hi + 1) % NUM_HEURISTIC;
  220                 if (nfsheur[hi].nh_use < nh->nh_use)
  221                         nh = &nfsheur[hi];
  222         }
  223 
  224         /* Initialize hint if this is a new file. */
  225         if (nh->nh_vp != vp) {
  226                 nh->nh_vp = vp;
  227                 nh->nh_nextoff = uio->uio_offset;
  228                 nh->nh_use = NHUSE_INIT;
  229                 if (uio->uio_offset == 0)
  230                         nh->nh_seqcount = 4;
  231                 else
  232                         nh->nh_seqcount = 1;
  233         }
  234 
  235         /* Calculate heuristic. */
  236         if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
  237             uio->uio_offset == nh->nh_nextoff) {
  238                 /* See comments in vfs_vnops.c:sequential_heuristic(). */
  239                 nh->nh_seqcount += howmany(uio->uio_resid, 16384);
  240                 if (nh->nh_seqcount > IO_SEQMAX)
  241                         nh->nh_seqcount = IO_SEQMAX;
  242         } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
  243             imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
  244                 /* Probably a reordered RPC, leave seqcount alone. */
  245         } else if (nh->nh_seqcount > 1) {
  246                 nh->nh_seqcount /= 2;
  247         } else {
  248                 nh->nh_seqcount = 0;
  249         }
  250         nh->nh_use += NHUSE_INC;
  251         if (nh->nh_use > NHUSE_MAX)
  252                 nh->nh_use = NHUSE_MAX;
  253         return (nh);
  254 }
  255 
  256 /*
  257  * Get attributes into nfsvattr structure.
  258  */
  259 int
  260 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap,
  261     struct nfsrv_descript *nd, struct thread *p, int vpislocked,
  262     nfsattrbit_t *attrbitp)
  263 {
  264         int error, gotattr, lockedit = 0;
  265         struct nfsvattr na;
  266 
  267         if (vpislocked == 0) {
  268                 /*
  269                  * When vpislocked == 0, the vnode is either exclusively
  270                  * locked by this thread or not locked by this thread.
  271                  * As such, shared lock it, if not exclusively locked.
  272                  */
  273                 if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
  274                         lockedit = 1;
  275                         NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
  276                 }
  277         }
  278 
  279         /*
  280          * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed
  281          * attributes, as required.
  282          * This needs to be done for regular files if:
  283          * - non-NFSv4 RPCs or
  284          * - when attrbitp == NULL or
  285          * - an NFSv4 RPC with any of the above attributes in attrbitp.
  286          * A return of 0 for nfsrv_proxyds() indicates that it has acquired
  287          * these attributes.  nfsrv_proxyds() will return an error if the
  288          * server is not a pNFS one.
  289          */
  290         gotattr = 0;
  291         if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL ||
  292             (nd->nd_flag & ND_NFSV4) == 0 ||
  293             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) ||
  294             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) ||
  295             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) ||
  296             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) ||
  297             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) {
  298                 error = nfsrv_proxyds(nd, vp, 0, 0, nd->nd_cred, p,
  299                     NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL);
  300                 if (error == 0)
  301                         gotattr = 1;
  302         }
  303 
  304         error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred);
  305         if (lockedit != 0)
  306                 NFSVOPUNLOCK(vp, 0);
  307 
  308         /*
  309          * If we got the Change, Size and Modify Time from the DS,
  310          * replace them.
  311          */
  312         if (gotattr != 0) {
  313                 nvap->na_atime = na.na_atime;
  314                 nvap->na_mtime = na.na_mtime;
  315                 nvap->na_filerev = na.na_filerev;
  316                 nvap->na_size = na.na_size;
  317                 nvap->na_bytes = na.na_bytes;
  318         }
  319         NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr,
  320             error, (uintmax_t)na.na_filerev);
  321 
  322         NFSEXITCODE(error);
  323         return (error);
  324 }
  325 
  326 /*
  327  * Get a file handle for a vnode.
  328  */
  329 int
  330 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
  331 {
  332         int error;
  333 
  334         NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
  335         fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
  336         error = VOP_VPTOFH(vp, &fhp->fh_fid);
  337 
  338         NFSEXITCODE(error);
  339         return (error);
  340 }
  341 
  342 /*
  343  * Perform access checking for vnodes obtained from file handles that would
  344  * refer to files already opened by a Unix client. You cannot just use
  345  * vn_writechk() and VOP_ACCESSX() for two reasons.
  346  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
  347  *     case.
  348  * 2 - The owner is to be given access irrespective of mode bits for some
  349  *     operations, so that processes that chmod after opening a file don't
  350  *     break.
  351  */
  352 int
  353 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
  354     struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
  355     u_int32_t *supportedtypep)
  356 {
  357         struct vattr vattr;
  358         int error = 0, getret = 0;
  359 
  360         if (vpislocked == 0) {
  361                 if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
  362                         error = EPERM;
  363                         goto out;
  364                 }
  365         }
  366         if (accmode & VWRITE) {
  367                 /* Just vn_writechk() changed to check rdonly */
  368                 /*
  369                  * Disallow write attempts on read-only file systems;
  370                  * unless the file is a socket or a block or character
  371                  * device resident on the file system.
  372                  */
  373                 if (NFSVNO_EXRDONLY(exp) ||
  374                     (vp->v_mount->mnt_flag & MNT_RDONLY)) {
  375                         switch (vp->v_type) {
  376                         case VREG:
  377                         case VDIR:
  378                         case VLNK:
  379                                 error = EROFS;
  380                         default:
  381                                 break;
  382                         }
  383                 }
  384                 /*
  385                  * If there's shared text associated with
  386                  * the inode, try to free it up once.  If
  387                  * we fail, we can't allow writing.
  388                  */
  389                 if (VOP_IS_TEXT(vp) && error == 0)
  390                         error = ETXTBSY;
  391         }
  392         if (error != 0) {
  393                 if (vpislocked == 0)
  394                         NFSVOPUNLOCK(vp, 0);
  395                 goto out;
  396         }
  397 
  398         /*
  399          * Should the override still be applied when ACLs are enabled?
  400          */
  401         error = VOP_ACCESSX(vp, accmode, cred, p);
  402         if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
  403                 /*
  404                  * Try again with VEXPLICIT_DENY, to see if the test for
  405                  * deletion is supported.
  406                  */
  407                 error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
  408                 if (error == 0) {
  409                         if (vp->v_type == VDIR) {
  410                                 accmode &= ~(VDELETE | VDELETE_CHILD);
  411                                 accmode |= VWRITE;
  412                                 error = VOP_ACCESSX(vp, accmode, cred, p);
  413                         } else if (supportedtypep != NULL) {
  414                                 *supportedtypep &= ~NFSACCESS_DELETE;
  415                         }
  416                 }
  417         }
  418 
  419         /*
  420          * Allow certain operations for the owner (reads and writes
  421          * on files that are already open).
  422          */
  423         if (override != NFSACCCHK_NOOVERRIDE &&
  424             (error == EPERM || error == EACCES)) {
  425                 if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
  426                         error = 0;
  427                 else if (override & NFSACCCHK_ALLOWOWNER) {
  428                         getret = VOP_GETATTR(vp, &vattr, cred);
  429                         if (getret == 0 && cred->cr_uid == vattr.va_uid)
  430                                 error = 0;
  431                 }
  432         }
  433         if (vpislocked == 0)
  434                 NFSVOPUNLOCK(vp, 0);
  435 
  436 out:
  437         NFSEXITCODE(error);
  438         return (error);
  439 }
  440 
  441 /*
  442  * Set attribute(s) vnop.
  443  */
  444 int
  445 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
  446     struct thread *p, struct nfsexstuff *exp)
  447 {
  448         u_quad_t savsize = 0;
  449         int error, savedit;
  450 
  451         /*
  452          * If this is an exported file system and a pNFS service is running,
  453          * don't VOP_SETATTR() of size for the MDS file system.
  454          */
  455         savedit = 0;
  456         error = 0;
  457         if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 &&
  458             nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL &&
  459             nvap->na_vattr.va_size > 0) {
  460                 savsize = nvap->na_vattr.va_size;
  461                 nvap->na_vattr.va_size = VNOVAL;
  462                 if (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
  463                     nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
  464                     nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
  465                     nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
  466                     nvap->na_vattr.va_mtime.tv_sec != VNOVAL)
  467                         savedit = 1;
  468                 else
  469                         savedit = 2;
  470         }
  471         if (savedit != 2)
  472                 error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
  473         if (savedit != 0)
  474                 nvap->na_vattr.va_size = savsize;
  475         if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
  476             nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
  477             nvap->na_vattr.va_size != VNOVAL ||
  478             nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
  479             nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
  480             nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) {
  481                 /* For a pNFS server, set the attributes on the DS file. */
  482                 error = nfsrv_proxyds(NULL, vp, 0, 0, cred, p, NFSPROC_SETATTR,
  483                     NULL, NULL, NULL, nvap, NULL);
  484                 if (error == ENOENT)
  485                         error = 0;
  486         }
  487         NFSEXITCODE(error);
  488         return (error);
  489 }
  490 
  491 /*
  492  * Set up nameidata for a lookup() call and do it.
  493  */
  494 int
  495 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
  496     struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
  497     struct vnode **retdirp)
  498 {
  499         struct componentname *cnp = &ndp->ni_cnd;
  500         int i;
  501         struct iovec aiov;
  502         struct uio auio;
  503         int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
  504         int error = 0;
  505         char *cp;
  506 
  507         *retdirp = NULL;
  508         cnp->cn_nameptr = cnp->cn_pnbuf;
  509         ndp->ni_lcf = 0;
  510         /*
  511          * Extract and set starting directory.
  512          */
  513         if (dp->v_type != VDIR) {
  514                 if (islocked)
  515                         vput(dp);
  516                 else
  517                         vrele(dp);
  518                 nfsvno_relpathbuf(ndp);
  519                 error = ENOTDIR;
  520                 goto out1;
  521         }
  522         if (islocked)
  523                 NFSVOPUNLOCK(dp, 0);
  524         VREF(dp);
  525         *retdirp = dp;
  526         if (NFSVNO_EXRDONLY(exp))
  527                 cnp->cn_flags |= RDONLY;
  528         ndp->ni_segflg = UIO_SYSSPACE;
  529 
  530         if (nd->nd_flag & ND_PUBLOOKUP) {
  531                 ndp->ni_loopcnt = 0;
  532                 if (cnp->cn_pnbuf[0] == '/') {
  533                         vrele(dp);
  534                         /*
  535                          * Check for degenerate pathnames here, since lookup()
  536                          * panics on them.
  537                          */
  538                         for (i = 1; i < ndp->ni_pathlen; i++)
  539                                 if (cnp->cn_pnbuf[i] != '/')
  540                                         break;
  541                         if (i == ndp->ni_pathlen) {
  542                                 error = NFSERR_ACCES;
  543                                 goto out;
  544                         }
  545                         dp = rootvnode;
  546                         VREF(dp);
  547                 }
  548         } else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
  549             (nd->nd_flag & ND_NFSV4) == 0) {
  550                 /*
  551                  * Only cross mount points for NFSv4 when doing a
  552                  * mount while traversing the file system above
  553                  * the mount point, unless nfsrv_enable_crossmntpt is set.
  554                  */
  555                 cnp->cn_flags |= NOCROSSMOUNT;
  556         }
  557 
  558         /*
  559          * Initialize for scan, set ni_startdir and bump ref on dp again
  560          * because lookup() will dereference ni_startdir.
  561          */
  562 
  563         cnp->cn_thread = p;
  564         ndp->ni_startdir = dp;
  565         ndp->ni_rootdir = rootvnode;
  566         ndp->ni_topdir = NULL;
  567 
  568         if (!lockleaf)
  569                 cnp->cn_flags |= LOCKLEAF;
  570         for (;;) {
  571                 cnp->cn_nameptr = cnp->cn_pnbuf;
  572                 /*
  573                  * Call lookup() to do the real work.  If an error occurs,
  574                  * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
  575                  * we do not have to dereference anything before returning.
  576                  * In either case ni_startdir will be dereferenced and NULLed
  577                  * out.
  578                  */
  579                 error = lookup(ndp);
  580                 if (error)
  581                         break;
  582 
  583                 /*
  584                  * Check for encountering a symbolic link.  Trivial
  585                  * termination occurs if no symlink encountered.
  586                  */
  587                 if ((cnp->cn_flags & ISSYMLINK) == 0) {
  588                         if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
  589                                 nfsvno_relpathbuf(ndp);
  590                         if (ndp->ni_vp && !lockleaf)
  591                                 NFSVOPUNLOCK(ndp->ni_vp, 0);
  592                         break;
  593                 }
  594 
  595                 /*
  596                  * Validate symlink
  597                  */
  598                 if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
  599                         NFSVOPUNLOCK(ndp->ni_dvp, 0);
  600                 if (!(nd->nd_flag & ND_PUBLOOKUP)) {
  601                         error = EINVAL;
  602                         goto badlink2;
  603                 }
  604 
  605                 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
  606                         error = ELOOP;
  607                         goto badlink2;
  608                 }
  609                 if (ndp->ni_pathlen > 1)
  610                         cp = uma_zalloc(namei_zone, M_WAITOK);
  611                 else
  612                         cp = cnp->cn_pnbuf;
  613                 aiov.iov_base = cp;
  614                 aiov.iov_len = MAXPATHLEN;
  615                 auio.uio_iov = &aiov;
  616                 auio.uio_iovcnt = 1;
  617                 auio.uio_offset = 0;
  618                 auio.uio_rw = UIO_READ;
  619                 auio.uio_segflg = UIO_SYSSPACE;
  620                 auio.uio_td = NULL;
  621                 auio.uio_resid = MAXPATHLEN;
  622                 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
  623                 if (error) {
  624                 badlink1:
  625                         if (ndp->ni_pathlen > 1)
  626                                 uma_zfree(namei_zone, cp);
  627                 badlink2:
  628                         vrele(ndp->ni_dvp);
  629                         vput(ndp->ni_vp);
  630                         break;
  631                 }
  632                 linklen = MAXPATHLEN - auio.uio_resid;
  633                 if (linklen == 0) {
  634                         error = ENOENT;
  635                         goto badlink1;
  636                 }
  637                 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
  638                         error = ENAMETOOLONG;
  639                         goto badlink1;
  640                 }
  641 
  642                 /*
  643                  * Adjust or replace path
  644                  */
  645                 if (ndp->ni_pathlen > 1) {
  646                         NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
  647                         uma_zfree(namei_zone, cnp->cn_pnbuf);
  648                         cnp->cn_pnbuf = cp;
  649                 } else
  650                         cnp->cn_pnbuf[linklen] = '\0';
  651                 ndp->ni_pathlen += linklen;
  652 
  653                 /*
  654                  * Cleanup refs for next loop and check if root directory
  655                  * should replace current directory.  Normally ni_dvp
  656                  * becomes the new base directory and is cleaned up when
  657                  * we loop.  Explicitly null pointers after invalidation
  658                  * to clarify operation.
  659                  */
  660                 vput(ndp->ni_vp);
  661                 ndp->ni_vp = NULL;
  662 
  663                 if (cnp->cn_pnbuf[0] == '/') {
  664                         vrele(ndp->ni_dvp);
  665                         ndp->ni_dvp = ndp->ni_rootdir;
  666                         VREF(ndp->ni_dvp);
  667                 }
  668                 ndp->ni_startdir = ndp->ni_dvp;
  669                 ndp->ni_dvp = NULL;
  670         }
  671         if (!lockleaf)
  672                 cnp->cn_flags &= ~LOCKLEAF;
  673 
  674 out:
  675         if (error) {
  676                 nfsvno_relpathbuf(ndp);
  677                 ndp->ni_vp = NULL;
  678                 ndp->ni_dvp = NULL;
  679                 ndp->ni_startdir = NULL;
  680         } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
  681                 ndp->ni_dvp = NULL;
  682         }
  683 
  684 out1:
  685         NFSEXITCODE2(error, nd);
  686         return (error);
  687 }
  688 
  689 /*
  690  * Set up a pathname buffer and return a pointer to it and, optionally
  691  * set a hash pointer.
  692  */
  693 void
  694 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
  695 {
  696         struct componentname *cnp = &ndp->ni_cnd;
  697 
  698         cnp->cn_flags |= (NOMACCHECK | HASBUF);
  699         cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
  700         if (hashpp != NULL)
  701                 *hashpp = NULL;
  702         *bufpp = cnp->cn_pnbuf;
  703 }
  704 
  705 /*
  706  * Release the above path buffer, if not released by nfsvno_namei().
  707  */
  708 void
  709 nfsvno_relpathbuf(struct nameidata *ndp)
  710 {
  711 
  712         if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
  713                 panic("nfsrelpath");
  714         uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
  715         ndp->ni_cnd.cn_flags &= ~HASBUF;
  716 }
  717 
  718 /*
  719  * Readlink vnode op into an mbuf list.
  720  */
  721 int
  722 nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
  723     struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
  724 {
  725         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
  726         struct iovec *ivp = iv;
  727         struct uio io, *uiop = &io;
  728         struct mbuf *mp, *mp2 = NULL, *mp3 = NULL;
  729         int i, len, tlen, error = 0;
  730 
  731         len = 0;
  732         i = 0;
  733         while (len < NFS_MAXPATHLEN) {
  734                 NFSMGET(mp);
  735                 MCLGET(mp, M_WAITOK);
  736                 mp->m_len = M_SIZE(mp);
  737                 if (len == 0) {
  738                         mp3 = mp2 = mp;
  739                 } else {
  740                         mp2->m_next = mp;
  741                         mp2 = mp;
  742                 }
  743                 if ((len + mp->m_len) > NFS_MAXPATHLEN) {
  744                         mp->m_len = NFS_MAXPATHLEN - len;
  745                         len = NFS_MAXPATHLEN;
  746                 } else {
  747                         len += mp->m_len;
  748                 }
  749                 ivp->iov_base = mtod(mp, caddr_t);
  750                 ivp->iov_len = mp->m_len;
  751                 i++;
  752                 ivp++;
  753         }
  754         uiop->uio_iov = iv;
  755         uiop->uio_iovcnt = i;
  756         uiop->uio_offset = 0;
  757         uiop->uio_resid = len;
  758         uiop->uio_rw = UIO_READ;
  759         uiop->uio_segflg = UIO_SYSSPACE;
  760         uiop->uio_td = NULL;
  761         error = VOP_READLINK(vp, uiop, cred);
  762         if (error) {
  763                 m_freem(mp3);
  764                 *lenp = 0;
  765                 goto out;
  766         }
  767         if (uiop->uio_resid > 0) {
  768                 len -= uiop->uio_resid;
  769                 tlen = NFSM_RNDUP(len);
  770                 nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
  771         }
  772         *lenp = len;
  773         *mpp = mp3;
  774         *mpendp = mp;
  775 
  776 out:
  777         NFSEXITCODE(error);
  778         return (error);
  779 }
  780 
  781 /*
  782  * Read vnode op call into mbuf list.
  783  */
  784 int
  785 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
  786     struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
  787 {
  788         struct mbuf *m;
  789         int i;
  790         struct iovec *iv;
  791         struct iovec *iv2;
  792         int error = 0, len, left, siz, tlen, ioflag = 0;
  793         struct mbuf *m2 = NULL, *m3;
  794         struct uio io, *uiop = &io;
  795         struct nfsheur *nh;
  796 
  797         /*
  798          * Attempt to read from a DS file. A return of ENOENT implies
  799          * there is no DS file to read.
  800          */
  801         error = nfsrv_proxyds(NULL, vp, off, cnt, cred, p, NFSPROC_READDS, mpp,
  802             NULL, mpendp, NULL, NULL);
  803         if (error != ENOENT)
  804                 return (error);
  805 
  806         len = left = NFSM_RNDUP(cnt);
  807         m3 = NULL;
  808         /*
  809          * Generate the mbuf list with the uio_iov ref. to it.
  810          */
  811         i = 0;
  812         while (left > 0) {
  813                 NFSMGET(m);
  814                 MCLGET(m, M_WAITOK);
  815                 m->m_len = 0;
  816                 siz = min(M_TRAILINGSPACE(m), left);
  817                 left -= siz;
  818                 i++;
  819                 if (m3)
  820                         m2->m_next = m;
  821                 else
  822                         m3 = m;
  823                 m2 = m;
  824         }
  825         iv = malloc(i * sizeof (struct iovec),
  826             M_TEMP, M_WAITOK);
  827         uiop->uio_iov = iv2 = iv;
  828         m = m3;
  829         left = len;
  830         i = 0;
  831         while (left > 0) {
  832                 if (m == NULL)
  833                         panic("nfsvno_read iov");
  834                 siz = min(M_TRAILINGSPACE(m), left);
  835                 if (siz > 0) {
  836                         iv->iov_base = mtod(m, caddr_t) + m->m_len;
  837                         iv->iov_len = siz;
  838                         m->m_len += siz;
  839                         left -= siz;
  840                         iv++;
  841                         i++;
  842                 }
  843                 m = m->m_next;
  844         }
  845         uiop->uio_iovcnt = i;
  846         uiop->uio_offset = off;
  847         uiop->uio_resid = len;
  848         uiop->uio_rw = UIO_READ;
  849         uiop->uio_segflg = UIO_SYSSPACE;
  850         uiop->uio_td = NULL;
  851         nh = nfsrv_sequential_heuristic(uiop, vp);
  852         ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
  853         /* XXX KDM make this more systematic? */
  854         nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid;
  855         error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
  856         free(iv2, M_TEMP);
  857         if (error) {
  858                 m_freem(m3);
  859                 *mpp = NULL;
  860                 goto out;
  861         }
  862         nh->nh_nextoff = uiop->uio_offset;
  863         tlen = len - uiop->uio_resid;
  864         cnt = cnt < tlen ? cnt : tlen;
  865         tlen = NFSM_RNDUP(cnt);
  866         if (tlen == 0) {
  867                 m_freem(m3);
  868                 m3 = NULL;
  869         } else if (len != tlen || tlen != cnt)
  870                 nfsrv_adj(m3, len - tlen, tlen - cnt);
  871         *mpp = m3;
  872         *mpendp = m2;
  873 
  874 out:
  875         NFSEXITCODE(error);
  876         return (error);
  877 }
  878 
  879 /*
  880  * Write vnode op from an mbuf list.
  881  */
  882 int
  883 nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int *stable,
  884     struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
  885 {
  886         struct iovec *ivp;
  887         int i, len;
  888         struct iovec *iv;
  889         int ioflags, error;
  890         struct uio io, *uiop = &io;
  891         struct nfsheur *nh;
  892 
  893         /*
  894          * Attempt to write to a DS file. A return of ENOENT implies
  895          * there is no DS file to write.
  896          */
  897         error = nfsrv_proxyds(NULL, vp, off, retlen, cred, p, NFSPROC_WRITEDS,
  898             &mp, cp, NULL, NULL, NULL);
  899         if (error != ENOENT) {
  900                 *stable = NFSWRITE_FILESYNC;
  901                 return (error);
  902         }
  903 
  904         ivp = malloc(cnt * sizeof (struct iovec), M_TEMP,
  905             M_WAITOK);
  906         uiop->uio_iov = iv = ivp;
  907         uiop->uio_iovcnt = cnt;
  908         i = mtod(mp, caddr_t) + mp->m_len - cp;
  909         len = retlen;
  910         while (len > 0) {
  911                 if (mp == NULL)
  912                         panic("nfsvno_write");
  913                 if (i > 0) {
  914                         i = min(i, len);
  915                         ivp->iov_base = cp;
  916                         ivp->iov_len = i;
  917                         ivp++;
  918                         len -= i;
  919                 }
  920                 mp = mp->m_next;
  921                 if (mp) {
  922                         i = mp->m_len;
  923                         cp = mtod(mp, caddr_t);
  924                 }
  925         }
  926 
  927         if (*stable == NFSWRITE_UNSTABLE)
  928                 ioflags = IO_NODELOCKED;
  929         else
  930                 ioflags = (IO_SYNC | IO_NODELOCKED);
  931         uiop->uio_resid = retlen;
  932         uiop->uio_rw = UIO_WRITE;
  933         uiop->uio_segflg = UIO_SYSSPACE;
  934         NFSUIOPROC(uiop, p);
  935         uiop->uio_offset = off;
  936         nh = nfsrv_sequential_heuristic(uiop, vp);
  937         ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
  938         /* XXX KDM make this more systematic? */
  939         nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid;
  940         error = VOP_WRITE(vp, uiop, ioflags, cred);
  941         if (error == 0)
  942                 nh->nh_nextoff = uiop->uio_offset;
  943         free(iv, M_TEMP);
  944 
  945         NFSEXITCODE(error);
  946         return (error);
  947 }
  948 
  949 /*
  950  * Common code for creating a regular file (plus special files for V2).
  951  */
  952 int
  953 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
  954     struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
  955     int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp)
  956 {
  957         u_quad_t tempsize;
  958         int error;
  959 
  960         error = nd->nd_repstat;
  961         if (!error && ndp->ni_vp == NULL) {
  962                 if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
  963                         vrele(ndp->ni_startdir);
  964                         error = VOP_CREATE(ndp->ni_dvp,
  965                             &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
  966                         /* For a pNFS server, create the data file on a DS. */
  967                         if (error == 0 && nvap->na_type == VREG) {
  968                                 /*
  969                                  * Create a data file on a DS for a pNFS server.
  970                                  * This function just returns if not
  971                                  * running a pNFS DS or the creation fails.
  972                                  */
  973                                 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr,
  974                                     nd->nd_cred, p);
  975                         }
  976                         vput(ndp->ni_dvp);
  977                         nfsvno_relpathbuf(ndp);
  978                         if (!error) {
  979                                 if (*exclusive_flagp) {
  980                                         *exclusive_flagp = 0;
  981                                         NFSVNO_ATTRINIT(nvap);
  982                                         nvap->na_atime.tv_sec = cverf[0];
  983                                         nvap->na_atime.tv_nsec = cverf[1];
  984                                         error = VOP_SETATTR(ndp->ni_vp,
  985                                             &nvap->na_vattr, nd->nd_cred);
  986                                         if (error != 0) {
  987                                                 vput(ndp->ni_vp);
  988                                                 ndp->ni_vp = NULL;
  989                                                 error = NFSERR_NOTSUPP;
  990                                         }
  991                                 }
  992                         }
  993                 /*
  994                  * NFS V2 Only. nfsrvd_mknod() does this for V3.
  995                  * (This implies, just get out on an error.)
  996                  */
  997                 } else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
  998                         nvap->na_type == VFIFO) {
  999                         if (nvap->na_type == VCHR && rdev == 0xffffffff)
 1000                                 nvap->na_type = VFIFO;
 1001                         if (nvap->na_type != VFIFO &&
 1002                             (error = priv_check_cred(nd->nd_cred,
 1003                              PRIV_VFS_MKNOD_DEV, 0))) {
 1004                                 vrele(ndp->ni_startdir);
 1005                                 nfsvno_relpathbuf(ndp);
 1006                                 vput(ndp->ni_dvp);
 1007                                 goto out;
 1008                         }
 1009                         nvap->na_rdev = rdev;
 1010                         error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
 1011                             &ndp->ni_cnd, &nvap->na_vattr);
 1012                         vput(ndp->ni_dvp);
 1013                         nfsvno_relpathbuf(ndp);
 1014                         vrele(ndp->ni_startdir);
 1015                         if (error)
 1016                                 goto out;
 1017                 } else {
 1018                         vrele(ndp->ni_startdir);
 1019                         nfsvno_relpathbuf(ndp);
 1020                         vput(ndp->ni_dvp);
 1021                         error = ENXIO;
 1022                         goto out;
 1023                 }
 1024                 *vpp = ndp->ni_vp;
 1025         } else {
 1026                 /*
 1027                  * Handle cases where error is already set and/or
 1028                  * the file exists.
 1029                  * 1 - clean up the lookup
 1030                  * 2 - iff !error and na_size set, truncate it
 1031                  */
 1032                 vrele(ndp->ni_startdir);
 1033                 nfsvno_relpathbuf(ndp);
 1034                 *vpp = ndp->ni_vp;
 1035                 if (ndp->ni_dvp == *vpp)
 1036                         vrele(ndp->ni_dvp);
 1037                 else
 1038                         vput(ndp->ni_dvp);
 1039                 if (!error && nvap->na_size != VNOVAL) {
 1040                         error = nfsvno_accchk(*vpp, VWRITE,
 1041                             nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 1042                             NFSACCCHK_VPISLOCKED, NULL);
 1043                         if (!error) {
 1044                                 tempsize = nvap->na_size;
 1045                                 NFSVNO_ATTRINIT(nvap);
 1046                                 nvap->na_size = tempsize;
 1047                                 error = nfsvno_setattr(*vpp, nvap,
 1048                                     nd->nd_cred, p, exp);
 1049                         }
 1050                 }
 1051                 if (error)
 1052                         vput(*vpp);
 1053         }
 1054 
 1055 out:
 1056         NFSEXITCODE(error);
 1057         return (error);
 1058 }
 1059 
 1060 /*
 1061  * Do a mknod vnode op.
 1062  */
 1063 int
 1064 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
 1065     struct thread *p)
 1066 {
 1067         int error = 0;
 1068         enum vtype vtyp;
 1069 
 1070         vtyp = nvap->na_type;
 1071         /*
 1072          * Iff doesn't exist, create it.
 1073          */
 1074         if (ndp->ni_vp) {
 1075                 vrele(ndp->ni_startdir);
 1076                 nfsvno_relpathbuf(ndp);
 1077                 vput(ndp->ni_dvp);
 1078                 vrele(ndp->ni_vp);
 1079                 error = EEXIST;
 1080                 goto out;
 1081         }
 1082         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
 1083                 vrele(ndp->ni_startdir);
 1084                 nfsvno_relpathbuf(ndp);
 1085                 vput(ndp->ni_dvp);
 1086                 error = NFSERR_BADTYPE;
 1087                 goto out;
 1088         }
 1089         if (vtyp == VSOCK) {
 1090                 vrele(ndp->ni_startdir);
 1091                 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
 1092                     &ndp->ni_cnd, &nvap->na_vattr);
 1093                 vput(ndp->ni_dvp);
 1094                 nfsvno_relpathbuf(ndp);
 1095         } else {
 1096                 if (nvap->na_type != VFIFO &&
 1097                     (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) {
 1098                         vrele(ndp->ni_startdir);
 1099                         nfsvno_relpathbuf(ndp);
 1100                         vput(ndp->ni_dvp);
 1101                         goto out;
 1102                 }
 1103                 error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
 1104                     &ndp->ni_cnd, &nvap->na_vattr);
 1105                 vput(ndp->ni_dvp);
 1106                 nfsvno_relpathbuf(ndp);
 1107                 vrele(ndp->ni_startdir);
 1108                 /*
 1109                  * Since VOP_MKNOD returns the ni_vp, I can't
 1110                  * see any reason to do the lookup.
 1111                  */
 1112         }
 1113 
 1114 out:
 1115         NFSEXITCODE(error);
 1116         return (error);
 1117 }
 1118 
 1119 /*
 1120  * Mkdir vnode op.
 1121  */
 1122 int
 1123 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
 1124     struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
 1125 {
 1126         int error = 0;
 1127 
 1128         if (ndp->ni_vp != NULL) {
 1129                 if (ndp->ni_dvp == ndp->ni_vp)
 1130                         vrele(ndp->ni_dvp);
 1131                 else
 1132                         vput(ndp->ni_dvp);
 1133                 vrele(ndp->ni_vp);
 1134                 nfsvno_relpathbuf(ndp);
 1135                 error = EEXIST;
 1136                 goto out;
 1137         }
 1138         error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
 1139             &nvap->na_vattr);
 1140         vput(ndp->ni_dvp);
 1141         nfsvno_relpathbuf(ndp);
 1142 
 1143 out:
 1144         NFSEXITCODE(error);
 1145         return (error);
 1146 }
 1147 
 1148 /*
 1149  * symlink vnode op.
 1150  */
 1151 int
 1152 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
 1153     int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
 1154     struct nfsexstuff *exp)
 1155 {
 1156         int error = 0;
 1157 
 1158         if (ndp->ni_vp) {
 1159                 vrele(ndp->ni_startdir);
 1160                 nfsvno_relpathbuf(ndp);
 1161                 if (ndp->ni_dvp == ndp->ni_vp)
 1162                         vrele(ndp->ni_dvp);
 1163                 else
 1164                         vput(ndp->ni_dvp);
 1165                 vrele(ndp->ni_vp);
 1166                 error = EEXIST;
 1167                 goto out;
 1168         }
 1169 
 1170         error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
 1171             &nvap->na_vattr, pathcp);
 1172         vput(ndp->ni_dvp);
 1173         vrele(ndp->ni_startdir);
 1174         nfsvno_relpathbuf(ndp);
 1175         /*
 1176          * Although FreeBSD still had the lookup code in
 1177          * it for 7/current, there doesn't seem to be any
 1178          * point, since VOP_SYMLINK() returns the ni_vp.
 1179          * Just vput it for v2.
 1180          */
 1181         if (!not_v2 && !error)
 1182                 vput(ndp->ni_vp);
 1183 
 1184 out:
 1185         NFSEXITCODE(error);
 1186         return (error);
 1187 }
 1188 
 1189 /*
 1190  * Parse symbolic link arguments.
 1191  * This function has an ugly side effect. It will malloc() an area for
 1192  * the symlink and set iov_base to point to it, only if it succeeds.
 1193  * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
 1194  * be FREE'd later.
 1195  */
 1196 int
 1197 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
 1198     struct thread *p, char **pathcpp, int *lenp)
 1199 {
 1200         u_int32_t *tl;
 1201         char *pathcp = NULL;
 1202         int error = 0, len;
 1203         struct nfsv2_sattr *sp;
 1204 
 1205         *pathcpp = NULL;
 1206         *lenp = 0;
 1207         if ((nd->nd_flag & ND_NFSV3) &&
 1208             (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p)))
 1209                 goto nfsmout;
 1210         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 1211         len = fxdr_unsigned(int, *tl);
 1212         if (len > NFS_MAXPATHLEN || len <= 0) {
 1213                 error = EBADRPC;
 1214                 goto nfsmout;
 1215         }
 1216         pathcp = malloc(len + 1, M_TEMP, M_WAITOK);
 1217         error = nfsrv_mtostr(nd, pathcp, len);
 1218         if (error)
 1219                 goto nfsmout;
 1220         if (nd->nd_flag & ND_NFSV2) {
 1221                 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 1222                 nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
 1223         }
 1224         *pathcpp = pathcp;
 1225         *lenp = len;
 1226         NFSEXITCODE2(0, nd);
 1227         return (0);
 1228 nfsmout:
 1229         if (pathcp)
 1230                 free(pathcp, M_TEMP);
 1231         NFSEXITCODE2(error, nd);
 1232         return (error);
 1233 }
 1234 
 1235 /*
 1236  * Remove a non-directory object.
 1237  */
 1238 int
 1239 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
 1240     struct thread *p, struct nfsexstuff *exp)
 1241 {
 1242         struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS];
 1243         int error = 0, mirrorcnt;
 1244         char fname[PNFS_FILENAME_LEN + 1];
 1245         fhandle_t fh;
 1246 
 1247         vp = ndp->ni_vp;
 1248         dsdvp[0] = NULL;
 1249         if (vp->v_type == VDIR)
 1250                 error = NFSERR_ISDIR;
 1251         else if (is_v4)
 1252                 error = nfsrv_checkremove(vp, 1, p);
 1253         if (error == 0)
 1254                 nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh);
 1255         if (!error)
 1256                 error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
 1257         if (error == 0 && dsdvp[0] != NULL)
 1258                 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
 1259         if (ndp->ni_dvp == vp)
 1260                 vrele(ndp->ni_dvp);
 1261         else
 1262                 vput(ndp->ni_dvp);
 1263         vput(vp);
 1264         if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
 1265                 nfsvno_relpathbuf(ndp);
 1266         NFSEXITCODE(error);
 1267         return (error);
 1268 }
 1269 
 1270 /*
 1271  * Remove a directory.
 1272  */
 1273 int
 1274 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
 1275     struct thread *p, struct nfsexstuff *exp)
 1276 {
 1277         struct vnode *vp;
 1278         int error = 0;
 1279 
 1280         vp = ndp->ni_vp;
 1281         if (vp->v_type != VDIR) {
 1282                 error = ENOTDIR;
 1283                 goto out;
 1284         }
 1285         /*
 1286          * No rmdir "." please.
 1287          */
 1288         if (ndp->ni_dvp == vp) {
 1289                 error = EINVAL;
 1290                 goto out;
 1291         }
 1292         /*
 1293          * The root of a mounted filesystem cannot be deleted.
 1294          */
 1295         if (vp->v_vflag & VV_ROOT)
 1296                 error = EBUSY;
 1297 out:
 1298         if (!error)
 1299                 error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
 1300         if (ndp->ni_dvp == vp)
 1301                 vrele(ndp->ni_dvp);
 1302         else
 1303                 vput(ndp->ni_dvp);
 1304         vput(vp);
 1305         if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
 1306                 nfsvno_relpathbuf(ndp);
 1307         NFSEXITCODE(error);
 1308         return (error);
 1309 }
 1310 
 1311 /*
 1312  * Rename vnode op.
 1313  */
 1314 int
 1315 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
 1316     u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
 1317 {
 1318         struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS];
 1319         int error = 0, mirrorcnt;
 1320         char fname[PNFS_FILENAME_LEN + 1];
 1321         fhandle_t fh;
 1322 
 1323         dsdvp[0] = NULL;
 1324         fvp = fromndp->ni_vp;
 1325         if (ndstat) {
 1326                 vrele(fromndp->ni_dvp);
 1327                 vrele(fvp);
 1328                 error = ndstat;
 1329                 goto out1;
 1330         }
 1331         tdvp = tondp->ni_dvp;
 1332         tvp = tondp->ni_vp;
 1333         if (tvp != NULL) {
 1334                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 1335                         error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
 1336                         goto out;
 1337                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 1338                         error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
 1339                         goto out;
 1340                 }
 1341                 if (tvp->v_type == VDIR && tvp->v_mountedhere) {
 1342                         error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 1343                         goto out;
 1344                 }
 1345 
 1346                 /*
 1347                  * A rename to '.' or '..' results in a prematurely
 1348                  * unlocked vnode on FreeBSD5, so I'm just going to fail that
 1349                  * here.
 1350                  */
 1351                 if ((tondp->ni_cnd.cn_namelen == 1 &&
 1352                      tondp->ni_cnd.cn_nameptr[0] == '.') ||
 1353                     (tondp->ni_cnd.cn_namelen == 2 &&
 1354                      tondp->ni_cnd.cn_nameptr[0] == '.' &&
 1355                      tondp->ni_cnd.cn_nameptr[1] == '.')) {
 1356                         error = EINVAL;
 1357                         goto out;
 1358                 }
 1359         }
 1360         if (fvp->v_type == VDIR && fvp->v_mountedhere) {
 1361                 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 1362                 goto out;
 1363         }
 1364         if (fvp->v_mount != tdvp->v_mount) {
 1365                 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 1366                 goto out;
 1367         }
 1368         if (fvp == tdvp) {
 1369                 error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
 1370                 goto out;
 1371         }
 1372         if (fvp == tvp) {
 1373                 /*
 1374                  * If source and destination are the same, there is nothing to
 1375                  * do. Set error to -1 to indicate this.
 1376                  */
 1377                 error = -1;
 1378                 goto out;
 1379         }
 1380         if (ndflag & ND_NFSV4) {
 1381                 if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
 1382                         error = nfsrv_checkremove(fvp, 0, p);
 1383                         NFSVOPUNLOCK(fvp, 0);
 1384                 } else
 1385                         error = EPERM;
 1386                 if (tvp && !error)
 1387                         error = nfsrv_checkremove(tvp, 1, p);
 1388         } else {
 1389                 /*
 1390                  * For NFSv2 and NFSv3, try to get rid of the delegation, so
 1391                  * that the NFSv4 client won't be confused by the rename.
 1392                  * Since nfsd_recalldelegation() can only be called on an
 1393                  * unlocked vnode at this point and fvp is the file that will
 1394                  * still exist after the rename, just do fvp.
 1395                  */
 1396                 nfsd_recalldelegation(fvp, p);
 1397         }
 1398         if (error == 0 && tvp != NULL) {
 1399                 nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh);
 1400                 NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup"
 1401                     " dsdvp=%p\n", dsdvp[0]);
 1402         }
 1403 out:
 1404         if (!error) {
 1405                 error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
 1406                     &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
 1407                     &tondp->ni_cnd);
 1408         } else {
 1409                 if (tdvp == tvp)
 1410                         vrele(tdvp);
 1411                 else
 1412                         vput(tdvp);
 1413                 if (tvp)
 1414                         vput(tvp);
 1415                 vrele(fromndp->ni_dvp);
 1416                 vrele(fvp);
 1417                 if (error == -1)
 1418                         error = 0;
 1419         }
 1420 
 1421         /*
 1422          * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and
 1423          * if the rename succeeded, the DS file for the tvp needs to be
 1424          * removed.
 1425          */
 1426         if (error == 0 && dsdvp[0] != NULL) {
 1427                 nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
 1428                 NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n");
 1429         }
 1430 
 1431         vrele(tondp->ni_startdir);
 1432         nfsvno_relpathbuf(tondp);
 1433 out1:
 1434         vrele(fromndp->ni_startdir);
 1435         nfsvno_relpathbuf(fromndp);
 1436         NFSEXITCODE(error);
 1437         return (error);
 1438 }
 1439 
 1440 /*
 1441  * Link vnode op.
 1442  */
 1443 int
 1444 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
 1445     struct thread *p, struct nfsexstuff *exp)
 1446 {
 1447         struct vnode *xp;
 1448         int error = 0;
 1449 
 1450         xp = ndp->ni_vp;
 1451         if (xp != NULL) {
 1452                 error = EEXIST;
 1453         } else {
 1454                 xp = ndp->ni_dvp;
 1455                 if (vp->v_mount != xp->v_mount)
 1456                         error = EXDEV;
 1457         }
 1458         if (!error) {
 1459                 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 1460                 if ((vp->v_iflag & VI_DOOMED) == 0)
 1461                         error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
 1462                 else
 1463                         error = EPERM;
 1464                 if (ndp->ni_dvp == vp)
 1465                         vrele(ndp->ni_dvp);
 1466                 else
 1467                         vput(ndp->ni_dvp);
 1468                 NFSVOPUNLOCK(vp, 0);
 1469         } else {
 1470                 if (ndp->ni_dvp == ndp->ni_vp)
 1471                         vrele(ndp->ni_dvp);
 1472                 else
 1473                         vput(ndp->ni_dvp);
 1474                 if (ndp->ni_vp)
 1475                         vrele(ndp->ni_vp);
 1476         }
 1477         nfsvno_relpathbuf(ndp);
 1478         NFSEXITCODE(error);
 1479         return (error);
 1480 }
 1481 
 1482 /*
 1483  * Do the fsync() appropriate for the commit.
 1484  */
 1485 int
 1486 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
 1487     struct thread *td)
 1488 {
 1489         int error = 0;
 1490 
 1491         /*
 1492          * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of
 1493          * file is done.  At this time VOP_FSYNC does not accept offset and
 1494          * byte count parameters so call VOP_FSYNC the whole file for now.
 1495          * The same is true for NFSv4: RFC 3530 Sec. 14.2.3.
 1496          * File systems that do not use the buffer cache (as indicated
 1497          * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC().
 1498          */
 1499         if (cnt == 0 || cnt > MAX_COMMIT_COUNT ||
 1500             (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) {
 1501                 /*
 1502                  * Give up and do the whole thing
 1503                  */
 1504                 if (vp->v_object &&
 1505                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 1506                         VM_OBJECT_WLOCK(vp->v_object);
 1507                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
 1508                         VM_OBJECT_WUNLOCK(vp->v_object);
 1509                 }
 1510                 error = VOP_FSYNC(vp, MNT_WAIT, td);
 1511         } else {
 1512                 /*
 1513                  * Locate and synchronously write any buffers that fall
 1514                  * into the requested range.  Note:  we are assuming that
 1515                  * f_iosize is a power of 2.
 1516                  */
 1517                 int iosize = vp->v_mount->mnt_stat.f_iosize;
 1518                 int iomask = iosize - 1;
 1519                 struct bufobj *bo;
 1520                 daddr_t lblkno;
 1521 
 1522                 /*
 1523                  * Align to iosize boundary, super-align to page boundary.
 1524                  */
 1525                 if (off & iomask) {
 1526                         cnt += off & iomask;
 1527                         off &= ~(u_quad_t)iomask;
 1528                 }
 1529                 if (off & PAGE_MASK) {
 1530                         cnt += off & PAGE_MASK;
 1531                         off &= ~(u_quad_t)PAGE_MASK;
 1532                 }
 1533                 lblkno = off / iosize;
 1534 
 1535                 if (vp->v_object &&
 1536                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 1537                         VM_OBJECT_WLOCK(vp->v_object);
 1538                         vm_object_page_clean(vp->v_object, off, off + cnt,
 1539                             OBJPC_SYNC);
 1540                         VM_OBJECT_WUNLOCK(vp->v_object);
 1541                 }
 1542 
 1543                 bo = &vp->v_bufobj;
 1544                 BO_LOCK(bo);
 1545                 while (cnt > 0) {
 1546                         struct buf *bp;
 1547 
 1548                         /*
 1549                          * If we have a buffer and it is marked B_DELWRI we
 1550                          * have to lock and write it.  Otherwise the prior
 1551                          * write is assumed to have already been committed.
 1552                          *
 1553                          * gbincore() can return invalid buffers now so we
 1554                          * have to check that bit as well (though B_DELWRI
 1555                          * should not be set if B_INVAL is set there could be
 1556                          * a race here since we haven't locked the buffer).
 1557                          */
 1558                         if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
 1559                                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 1560                                     LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) {
 1561                                         BO_LOCK(bo);
 1562                                         continue; /* retry */
 1563                                 }
 1564                                 if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
 1565                                     B_DELWRI) {
 1566                                         bremfree(bp);
 1567                                         bp->b_flags &= ~B_ASYNC;
 1568                                         bwrite(bp);
 1569                                         ++nfs_commit_miss;
 1570                                 } else
 1571                                         BUF_UNLOCK(bp);
 1572                                 BO_LOCK(bo);
 1573                         }
 1574                         ++nfs_commit_blks;
 1575                         if (cnt < iosize)
 1576                                 break;
 1577                         cnt -= iosize;
 1578                         ++lblkno;
 1579                 }
 1580                 BO_UNLOCK(bo);
 1581         }
 1582         NFSEXITCODE(error);
 1583         return (error);
 1584 }
 1585 
 1586 /*
 1587  * Statfs vnode op.
 1588  */
 1589 int
 1590 nfsvno_statfs(struct vnode *vp, struct statfs *sf)
 1591 {
 1592         struct statfs *tsf;
 1593         int error;
 1594 
 1595         tsf = NULL;
 1596         if (nfsrv_devidcnt > 0) {
 1597                 /* For a pNFS service, get the DS numbers. */
 1598                 tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO);
 1599                 error = nfsrv_pnfsstatfs(tsf, vp->v_mount);
 1600                 if (error != 0) {
 1601                         free(tsf, M_TEMP);
 1602                         tsf = NULL;
 1603                 }
 1604         }
 1605         error = VFS_STATFS(vp->v_mount, sf);
 1606         if (error == 0) {
 1607                 if (tsf != NULL) {
 1608                         sf->f_blocks = tsf->f_blocks;
 1609                         sf->f_bavail = tsf->f_bavail;
 1610                         sf->f_bfree = tsf->f_bfree;
 1611                         sf->f_bsize = tsf->f_bsize;
 1612                 }
 1613                 /*
 1614                  * Since NFS handles these values as unsigned on the
 1615                  * wire, there is no way to represent negative values,
 1616                  * so set them to 0. Without this, they will appear
 1617                  * to be very large positive values for clients like
 1618                  * Solaris10.
 1619                  */
 1620                 if (sf->f_bavail < 0)
 1621                         sf->f_bavail = 0;
 1622                 if (sf->f_ffree < 0)
 1623                         sf->f_ffree = 0;
 1624         }
 1625         free(tsf, M_TEMP);
 1626         NFSEXITCODE(error);
 1627         return (error);
 1628 }
 1629 
 1630 /*
 1631  * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
 1632  * must handle nfsrv_opencheck() calls after any other access checks.
 1633  */
 1634 void
 1635 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
 1636     nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
 1637     int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
 1638     NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p,
 1639     struct nfsexstuff *exp, struct vnode **vpp)
 1640 {
 1641         struct vnode *vp = NULL;
 1642         u_quad_t tempsize;
 1643         struct nfsexstuff nes;
 1644 
 1645         if (ndp->ni_vp == NULL)
 1646                 nd->nd_repstat = nfsrv_opencheck(clientid,
 1647                     stateidp, stp, NULL, nd, p, nd->nd_repstat);
 1648         if (!nd->nd_repstat) {
 1649                 if (ndp->ni_vp == NULL) {
 1650                         vrele(ndp->ni_startdir);
 1651                         nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
 1652                             &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
 1653                         /* For a pNFS server, create the data file on a DS. */
 1654                         if (nd->nd_repstat == 0) {
 1655                                 /*
 1656                                  * Create a data file on a DS for a pNFS server.
 1657                                  * This function just returns if not
 1658                                  * running a pNFS DS or the creation fails.
 1659                                  */
 1660                                 nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr,
 1661                                     cred, p);
 1662                         }
 1663                         vput(ndp->ni_dvp);
 1664                         nfsvno_relpathbuf(ndp);
 1665                         if (!nd->nd_repstat) {
 1666                                 if (*exclusive_flagp) {
 1667                                         *exclusive_flagp = 0;
 1668                                         NFSVNO_ATTRINIT(nvap);
 1669                                         nvap->na_atime.tv_sec = cverf[0];
 1670                                         nvap->na_atime.tv_nsec = cverf[1];
 1671                                         nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
 1672                                             &nvap->na_vattr, cred);
 1673                                         if (nd->nd_repstat != 0) {
 1674                                                 vput(ndp->ni_vp);
 1675                                                 ndp->ni_vp = NULL;
 1676                                                 nd->nd_repstat = NFSERR_NOTSUPP;
 1677                                         } else
 1678                                                 NFSSETBIT_ATTRBIT(attrbitp,
 1679                                                     NFSATTRBIT_TIMEACCESS);
 1680                                 } else {
 1681                                         nfsrv_fixattr(nd, ndp->ni_vp, nvap,
 1682                                             aclp, p, attrbitp, exp);
 1683                                 }
 1684                         }
 1685                         vp = ndp->ni_vp;
 1686                 } else {
 1687                         if (ndp->ni_startdir)
 1688                                 vrele(ndp->ni_startdir);
 1689                         nfsvno_relpathbuf(ndp);
 1690                         vp = ndp->ni_vp;
 1691                         if (create == NFSV4OPEN_CREATE) {
 1692                                 if (ndp->ni_dvp == vp)
 1693                                         vrele(ndp->ni_dvp);
 1694                                 else
 1695                                         vput(ndp->ni_dvp);
 1696                         }
 1697                         if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
 1698                                 if (ndp->ni_cnd.cn_flags & RDONLY)
 1699                                         NFSVNO_SETEXRDONLY(&nes);
 1700                                 else
 1701                                         NFSVNO_EXINIT(&nes);
 1702                                 nd->nd_repstat = nfsvno_accchk(vp, 
 1703                                     VWRITE, cred, &nes, p,
 1704                                     NFSACCCHK_NOOVERRIDE,
 1705                                     NFSACCCHK_VPISLOCKED, NULL);
 1706                                 nd->nd_repstat = nfsrv_opencheck(clientid,
 1707                                     stateidp, stp, vp, nd, p, nd->nd_repstat);
 1708                                 if (!nd->nd_repstat) {
 1709                                         tempsize = nvap->na_size;
 1710                                         NFSVNO_ATTRINIT(nvap);
 1711                                         nvap->na_size = tempsize;
 1712                                         nd->nd_repstat = nfsvno_setattr(vp,
 1713                                             nvap, cred, p, exp);
 1714                                 }
 1715                         } else if (vp->v_type == VREG) {
 1716                                 nd->nd_repstat = nfsrv_opencheck(clientid,
 1717                                     stateidp, stp, vp, nd, p, nd->nd_repstat);
 1718                         }
 1719                 }
 1720         } else {
 1721                 if (ndp->ni_cnd.cn_flags & HASBUF)
 1722                         nfsvno_relpathbuf(ndp);
 1723                 if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
 1724                         vrele(ndp->ni_startdir);
 1725                         if (ndp->ni_dvp == ndp->ni_vp)
 1726                                 vrele(ndp->ni_dvp);
 1727                         else
 1728                                 vput(ndp->ni_dvp);
 1729                         if (ndp->ni_vp)
 1730                                 vput(ndp->ni_vp);
 1731                 }
 1732         }
 1733         *vpp = vp;
 1734 
 1735         NFSEXITCODE2(0, nd);
 1736 }
 1737 
 1738 /*
 1739  * Updates the file rev and sets the mtime and ctime
 1740  * to the current clock time, returning the va_filerev and va_Xtime
 1741  * values.
 1742  * Return ESTALE to indicate the vnode is VI_DOOMED.
 1743  */
 1744 int
 1745 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
 1746     struct nfsrv_descript *nd, struct thread *p)
 1747 {
 1748         struct vattr va;
 1749 
 1750         VATTR_NULL(&va);
 1751         vfs_timestamp(&va.va_mtime);
 1752         if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
 1753                 NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 1754                 if ((vp->v_iflag & VI_DOOMED) != 0)
 1755                         return (ESTALE);
 1756         }
 1757         (void) VOP_SETATTR(vp, &va, nd->nd_cred);
 1758         (void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL);
 1759         return (0);
 1760 }
 1761 
 1762 /*
 1763  * Glue routine to nfsv4_fillattr().
 1764  */
 1765 int
 1766 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
 1767     struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
 1768     struct ucred *cred, struct thread *p, int isdgram, int reterr,
 1769     int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
 1770 {
 1771         struct statfs *sf;
 1772         int error;
 1773 
 1774         sf = NULL;
 1775         if (nfsrv_devidcnt > 0 &&
 1776             (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) ||
 1777              NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) ||
 1778              NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) {
 1779                 sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO);
 1780                 error = nfsrv_pnfsstatfs(sf, mp);
 1781                 if (error != 0) {
 1782                         free(sf, M_TEMP);
 1783                         sf = NULL;
 1784                 }
 1785         }
 1786         error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
 1787             attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
 1788             mounted_on_fileno, sf);
 1789         free(sf, M_TEMP);
 1790         NFSEXITCODE2(0, nd);
 1791         return (error);
 1792 }
 1793 
 1794 /* Since the Readdir vnode ops vary, put the entire functions in here. */
 1795 /*
 1796  * nfs readdir service
 1797  * - mallocs what it thinks is enough to read
 1798  *      count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
 1799  * - calls VOP_READDIR()
 1800  * - loops around building the reply
 1801  *      if the output generated exceeds count break out of loop
 1802  *      The NFSM_CLGET macro is used here so that the reply will be packed
 1803  *      tightly in mbuf clusters.
 1804  * - it trims out records with d_fileno == 0
 1805  *      this doesn't matter for Unix clients, but they might confuse clients
 1806  *      for other os'.
 1807  * - it trims out records with d_type == DT_WHT
 1808  *      these cannot be seen through NFS (unless we extend the protocol)
 1809  *     The alternate call nfsrvd_readdirplus() does lookups as well.
 1810  * PS: The NFS protocol spec. does not clarify what the "count" byte
 1811  *      argument is a count of.. just name strings and file id's or the
 1812  *      entire reply rpc or ...
 1813  *      I tried just file name and id sizes and it confused the Sun client,
 1814  *      so I am using the full rpc size now. The "paranoia.." comment refers
 1815  *      to including the status longwords that are not a part of the dir.
 1816  *      "entry" structures, but are in the rpc.
 1817  */
 1818 int
 1819 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
 1820     struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
 1821 {
 1822         struct dirent *dp;
 1823         u_int32_t *tl;
 1824         int dirlen;
 1825         char *cpos, *cend, *rbuf;
 1826         struct nfsvattr at;
 1827         int nlen, error = 0, getret = 1;
 1828         int siz, cnt, fullsiz, eofflag, ncookies;
 1829         u_int64_t off, toff, verf __unused;
 1830         u_long *cookies = NULL, *cookiep;
 1831         struct uio io;
 1832         struct iovec iv;
 1833         int is_ufs;
 1834 
 1835         if (nd->nd_repstat) {
 1836                 nfsrv_postopattr(nd, getret, &at);
 1837                 goto out;
 1838         }
 1839         if (nd->nd_flag & ND_NFSV2) {
 1840                 NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1841                 off = fxdr_unsigned(u_quad_t, *tl++);
 1842         } else {
 1843                 NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 1844                 off = fxdr_hyper(tl);
 1845                 tl += 2;
 1846                 verf = fxdr_hyper(tl);
 1847                 tl += 2;
 1848         }
 1849         toff = off;
 1850         cnt = fxdr_unsigned(int, *tl);
 1851         if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
 1852                 cnt = NFS_SRVMAXDATA(nd);
 1853         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 1854         fullsiz = siz;
 1855         if (nd->nd_flag & ND_NFSV3) {
 1856                 nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1,
 1857                     NULL);
 1858 #if 0
 1859                 /*
 1860                  * va_filerev is not sufficient as a cookie verifier,
 1861                  * since it is not supposed to change when entries are
 1862                  * removed/added unless that offset cookies returned to
 1863                  * the client are no longer valid.
 1864                  */
 1865                 if (!nd->nd_repstat && toff && verf != at.na_filerev)
 1866                         nd->nd_repstat = NFSERR_BAD_COOKIE;
 1867 #endif
 1868         }
 1869         if (!nd->nd_repstat && vp->v_type != VDIR)
 1870                 nd->nd_repstat = NFSERR_NOTDIR;
 1871         if (nd->nd_repstat == 0 && cnt == 0) {
 1872                 if (nd->nd_flag & ND_NFSV2)
 1873                         /* NFSv2 does not have NFSERR_TOOSMALL */
 1874                         nd->nd_repstat = EPERM;
 1875                 else
 1876                         nd->nd_repstat = NFSERR_TOOSMALL;
 1877         }
 1878         if (!nd->nd_repstat)
 1879                 nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 1880                     nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 1881                     NFSACCCHK_VPISLOCKED, NULL);
 1882         if (nd->nd_repstat) {
 1883                 vput(vp);
 1884                 if (nd->nd_flag & ND_NFSV3)
 1885                         nfsrv_postopattr(nd, getret, &at);
 1886                 goto out;
 1887         }
 1888         is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
 1889         rbuf = malloc(siz, M_TEMP, M_WAITOK);
 1890 again:
 1891         eofflag = 0;
 1892         if (cookies) {
 1893                 free(cookies, M_TEMP);
 1894                 cookies = NULL;
 1895         }
 1896 
 1897         iv.iov_base = rbuf;
 1898         iv.iov_len = siz;
 1899         io.uio_iov = &iv;
 1900         io.uio_iovcnt = 1;
 1901         io.uio_offset = (off_t)off;
 1902         io.uio_resid = siz;
 1903         io.uio_segflg = UIO_SYSSPACE;
 1904         io.uio_rw = UIO_READ;
 1905         io.uio_td = NULL;
 1906         nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
 1907             &cookies);
 1908         off = (u_int64_t)io.uio_offset;
 1909         if (io.uio_resid)
 1910                 siz -= io.uio_resid;
 1911 
 1912         if (!cookies && !nd->nd_repstat)
 1913                 nd->nd_repstat = NFSERR_PERM;
 1914         if (nd->nd_flag & ND_NFSV3) {
 1915                 getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
 1916                 if (!nd->nd_repstat)
 1917                         nd->nd_repstat = getret;
 1918         }
 1919 
 1920         /*
 1921          * Handles the failed cases. nd->nd_repstat == 0 past here.
 1922          */
 1923         if (nd->nd_repstat) {
 1924                 vput(vp);
 1925                 free(rbuf, M_TEMP);
 1926                 if (cookies)
 1927                         free(cookies, M_TEMP);
 1928                 if (nd->nd_flag & ND_NFSV3)
 1929                         nfsrv_postopattr(nd, getret, &at);
 1930                 goto out;
 1931         }
 1932         /*
 1933          * If nothing read, return eof
 1934          * rpc reply
 1935          */
 1936         if (siz == 0) {
 1937                 vput(vp);
 1938                 if (nd->nd_flag & ND_NFSV2) {
 1939                         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1940                 } else {
 1941                         nfsrv_postopattr(nd, getret, &at);
 1942                         NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 1943                         txdr_hyper(at.na_filerev, tl);
 1944                         tl += 2;
 1945                 }
 1946                 *tl++ = newnfs_false;
 1947                 *tl = newnfs_true;
 1948                 free(rbuf, M_TEMP);
 1949                 free(cookies, M_TEMP);
 1950                 goto out;
 1951         }
 1952 
 1953         /*
 1954          * Check for degenerate cases of nothing useful read.
 1955          * If so go try again
 1956          */
 1957         cpos = rbuf;
 1958         cend = rbuf + siz;
 1959         dp = (struct dirent *)cpos;
 1960         cookiep = cookies;
 1961 
 1962         /*
 1963          * For some reason FreeBSD's ufs_readdir() chooses to back the
 1964          * directory offset up to a block boundary, so it is necessary to
 1965          * skip over the records that precede the requested offset. This
 1966          * requires the assumption that file offset cookies monotonically
 1967          * increase.
 1968          */
 1969         while (cpos < cend && ncookies > 0 &&
 1970             (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 1971              (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) {
 1972                 cpos += dp->d_reclen;
 1973                 dp = (struct dirent *)cpos;
 1974                 cookiep++;
 1975                 ncookies--;
 1976         }
 1977         if (cpos >= cend || ncookies == 0) {
 1978                 siz = fullsiz;
 1979                 toff = off;
 1980                 goto again;
 1981         }
 1982         vput(vp);
 1983 
 1984         /*
 1985          * dirlen is the size of the reply, including all XDR and must
 1986          * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
 1987          * if the XDR should be included in "count", but to be safe, we do.
 1988          * (Include the two booleans at the end of the reply in dirlen now.)
 1989          */
 1990         if (nd->nd_flag & ND_NFSV3) {
 1991                 nfsrv_postopattr(nd, getret, &at);
 1992                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1993                 txdr_hyper(at.na_filerev, tl);
 1994                 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
 1995         } else {
 1996                 dirlen = 2 * NFSX_UNSIGNED;
 1997         }
 1998 
 1999         /* Loop through the records and build reply */
 2000         while (cpos < cend && ncookies > 0) {
 2001                 nlen = dp->d_namlen;
 2002                 if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
 2003                         nlen <= NFS_MAXNAMLEN) {
 2004                         if (nd->nd_flag & ND_NFSV3)
 2005                                 dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
 2006                         else
 2007                                 dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
 2008                         if (dirlen > cnt) {
 2009                                 eofflag = 0;
 2010                                 break;
 2011                         }
 2012 
 2013                         /*
 2014                          * Build the directory record xdr from
 2015                          * the dirent entry.
 2016                          */
 2017                         if (nd->nd_flag & ND_NFSV3) {
 2018                                 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 2019                                 *tl++ = newnfs_true;
 2020                                 *tl++ = 0;
 2021                         } else {
 2022                                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2023                                 *tl++ = newnfs_true;
 2024                         }
 2025                         *tl = txdr_unsigned(dp->d_fileno);
 2026                         (void) nfsm_strtom(nd, dp->d_name, nlen);
 2027                         if (nd->nd_flag & ND_NFSV3) {
 2028                                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2029                                 txdr_hyper(*cookiep, tl);
 2030                         } else {
 2031                                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 2032                                 *tl = txdr_unsigned(*cookiep);
 2033                         }
 2034                 }
 2035                 cpos += dp->d_reclen;
 2036                 dp = (struct dirent *)cpos;
 2037                 cookiep++;
 2038                 ncookies--;
 2039         }
 2040         if (cpos < cend)
 2041                 eofflag = 0;
 2042         NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2043         *tl++ = newnfs_false;
 2044         if (eofflag)
 2045                 *tl = newnfs_true;
 2046         else
 2047                 *tl = newnfs_false;
 2048         free(rbuf, M_TEMP);
 2049         free(cookies, M_TEMP);
 2050 
 2051 out:
 2052         NFSEXITCODE2(0, nd);
 2053         return (0);
 2054 nfsmout:
 2055         vput(vp);
 2056         NFSEXITCODE2(error, nd);
 2057         return (error);
 2058 }
 2059 
 2060 /*
 2061  * Readdirplus for V3 and Readdir for V4.
 2062  */
 2063 int
 2064 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
 2065     struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
 2066 {
 2067         struct dirent *dp;
 2068         u_int32_t *tl;
 2069         int dirlen;
 2070         char *cpos, *cend, *rbuf;
 2071         struct vnode *nvp;
 2072         fhandle_t nfh;
 2073         struct nfsvattr nva, at, *nvap = &nva;
 2074         struct mbuf *mb0, *mb1;
 2075         struct nfsreferral *refp;
 2076         int nlen, r, error = 0, getret = 1, usevget = 1;
 2077         int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
 2078         caddr_t bpos0, bpos1;
 2079         u_int64_t off, toff, verf;
 2080         u_long *cookies = NULL, *cookiep;
 2081         nfsattrbit_t attrbits, rderrbits, savbits;
 2082         struct uio io;
 2083         struct iovec iv;
 2084         struct componentname cn;
 2085         int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls;
 2086         struct mount *mp, *new_mp;
 2087         uint64_t mounted_on_fileno;
 2088 
 2089         if (nd->nd_repstat) {
 2090                 nfsrv_postopattr(nd, getret, &at);
 2091                 goto out;
 2092         }
 2093         NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 2094         off = fxdr_hyper(tl);
 2095         toff = off;
 2096         tl += 2;
 2097         verf = fxdr_hyper(tl);
 2098         tl += 2;
 2099         siz = fxdr_unsigned(int, *tl++);
 2100         cnt = fxdr_unsigned(int, *tl);
 2101 
 2102         /*
 2103          * Use the server's maximum data transfer size as the upper bound
 2104          * on reply datalen.
 2105          */
 2106         if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
 2107                 cnt = NFS_SRVMAXDATA(nd);
 2108 
 2109         /*
 2110          * siz is a "hint" of how much directory information (name, fileid,
 2111          * cookie) should be in the reply. At least one client "hints" 0,
 2112          * so I set it to cnt for that case. I also round it up to the
 2113          * next multiple of DIRBLKSIZ.
 2114          * Since the size of a Readdirplus directory entry reply will always
 2115          * be greater than a directory entry returned by VOP_READDIR(), it
 2116          * does not make sense to read more than NFS_SRVMAXDATA() via
 2117          * VOP_READDIR().
 2118          */
 2119         if (siz <= 0)
 2120                 siz = cnt;
 2121         else if (siz > NFS_SRVMAXDATA(nd))
 2122                 siz = NFS_SRVMAXDATA(nd);
 2123         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 2124 
 2125         if (nd->nd_flag & ND_NFSV4) {
 2126                 error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 2127                 if (error)
 2128                         goto nfsmout;
 2129                 NFSSET_ATTRBIT(&savbits, &attrbits);
 2130                 NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd);
 2131                 NFSZERO_ATTRBIT(&rderrbits);
 2132                 NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
 2133         } else {
 2134                 NFSZERO_ATTRBIT(&attrbits);
 2135         }
 2136         fullsiz = siz;
 2137         nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
 2138 #if 0
 2139         if (!nd->nd_repstat) {
 2140             if (off && verf != at.na_filerev) {
 2141                 /*
 2142                  * va_filerev is not sufficient as a cookie verifier,
 2143                  * since it is not supposed to change when entries are
 2144                  * removed/added unless that offset cookies returned to
 2145                  * the client are no longer valid.
 2146                  */
 2147                 if (nd->nd_flag & ND_NFSV4) {
 2148                         nd->nd_repstat = NFSERR_NOTSAME;
 2149                 } else {
 2150                         nd->nd_repstat = NFSERR_BAD_COOKIE;
 2151                 }
 2152             }
 2153         }
 2154 #endif
 2155         if (!nd->nd_repstat && vp->v_type != VDIR)
 2156                 nd->nd_repstat = NFSERR_NOTDIR;
 2157         if (!nd->nd_repstat && cnt == 0)
 2158                 nd->nd_repstat = NFSERR_TOOSMALL;
 2159         if (!nd->nd_repstat)
 2160                 nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 2161                     nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 2162                     NFSACCCHK_VPISLOCKED, NULL);
 2163         if (nd->nd_repstat) {
 2164                 vput(vp);
 2165                 if (nd->nd_flag & ND_NFSV3)
 2166                         nfsrv_postopattr(nd, getret, &at);
 2167                 goto out;
 2168         }
 2169         is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
 2170         is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0;
 2171 
 2172         rbuf = malloc(siz, M_TEMP, M_WAITOK);
 2173 again:
 2174         eofflag = 0;
 2175         if (cookies) {
 2176                 free(cookies, M_TEMP);
 2177                 cookies = NULL;
 2178         }
 2179 
 2180         iv.iov_base = rbuf;
 2181         iv.iov_len = siz;
 2182         io.uio_iov = &iv;
 2183         io.uio_iovcnt = 1;
 2184         io.uio_offset = (off_t)off;
 2185         io.uio_resid = siz;
 2186         io.uio_segflg = UIO_SYSSPACE;
 2187         io.uio_rw = UIO_READ;
 2188         io.uio_td = NULL;
 2189         nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
 2190             &cookies);
 2191         off = (u_int64_t)io.uio_offset;
 2192         if (io.uio_resid)
 2193                 siz -= io.uio_resid;
 2194 
 2195         getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
 2196 
 2197         if (!cookies && !nd->nd_repstat)
 2198                 nd->nd_repstat = NFSERR_PERM;
 2199         if (!nd->nd_repstat)
 2200                 nd->nd_repstat = getret;
 2201         if (nd->nd_repstat) {
 2202                 vput(vp);
 2203                 if (cookies)
 2204                         free(cookies, M_TEMP);
 2205                 free(rbuf, M_TEMP);
 2206                 if (nd->nd_flag & ND_NFSV3)
 2207                         nfsrv_postopattr(nd, getret, &at);
 2208                 goto out;
 2209         }
 2210         /*
 2211          * If nothing read, return eof
 2212          * rpc reply
 2213          */
 2214         if (siz == 0) {
 2215                 vput(vp);
 2216                 if (nd->nd_flag & ND_NFSV3)
 2217                         nfsrv_postopattr(nd, getret, &at);
 2218                 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 2219                 txdr_hyper(at.na_filerev, tl);
 2220                 tl += 2;
 2221                 *tl++ = newnfs_false;
 2222                 *tl = newnfs_true;
 2223                 free(cookies, M_TEMP);
 2224                 free(rbuf, M_TEMP);
 2225                 goto out;
 2226         }
 2227 
 2228         /*
 2229          * Check for degenerate cases of nothing useful read.
 2230          * If so go try again
 2231          */
 2232         cpos = rbuf;
 2233         cend = rbuf + siz;
 2234         dp = (struct dirent *)cpos;
 2235         cookiep = cookies;
 2236 
 2237         /*
 2238          * For some reason FreeBSD's ufs_readdir() chooses to back the
 2239          * directory offset up to a block boundary, so it is necessary to
 2240          * skip over the records that precede the requested offset. This
 2241          * requires the assumption that file offset cookies monotonically
 2242          * increase.
 2243          */
 2244         while (cpos < cend && ncookies > 0 &&
 2245           (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 2246            (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) ||
 2247            ((nd->nd_flag & ND_NFSV4) &&
 2248             ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
 2249              (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
 2250                 cpos += dp->d_reclen;
 2251                 dp = (struct dirent *)cpos;
 2252                 cookiep++;
 2253                 ncookies--;
 2254         }
 2255         if (cpos >= cend || ncookies == 0) {
 2256                 siz = fullsiz;
 2257                 toff = off;
 2258                 goto again;
 2259         }
 2260 
 2261         /*
 2262          * Busy the file system so that the mount point won't go away
 2263          * and, as such, VFS_VGET() can be used safely.
 2264          */
 2265         mp = vp->v_mount;
 2266         vfs_ref(mp);
 2267         NFSVOPUNLOCK(vp, 0);
 2268         nd->nd_repstat = vfs_busy(mp, 0);
 2269         vfs_rel(mp);
 2270         if (nd->nd_repstat != 0) {
 2271                 vrele(vp);
 2272                 free(cookies, M_TEMP);
 2273                 free(rbuf, M_TEMP);
 2274                 if (nd->nd_flag & ND_NFSV3)
 2275                         nfsrv_postopattr(nd, getret, &at);
 2276                 goto out;
 2277         }
 2278 
 2279         /*
 2280          * Check to see if entries in this directory can be safely acquired
 2281          * via VFS_VGET() or if a switch to VOP_LOOKUP() is required.
 2282          * ZFS snapshot directories need VOP_LOOKUP(), so that any
 2283          * automount of the snapshot directory that is required will
 2284          * be done.
 2285          * This needs to be done here for NFSv4, since NFSv4 never does
 2286          * a VFS_VGET() for "." or "..".
 2287          */
 2288         if (is_zfs == 1) {
 2289                 r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp);
 2290                 if (r == EOPNOTSUPP) {
 2291                         usevget = 0;
 2292                         cn.cn_nameiop = LOOKUP;
 2293                         cn.cn_lkflags = LK_SHARED | LK_RETRY;
 2294                         cn.cn_cred = nd->nd_cred;
 2295                         cn.cn_thread = p;
 2296                 } else if (r == 0)
 2297                         vput(nvp);
 2298         }
 2299 
 2300         /*
 2301          * Save this position, in case there is an error before one entry
 2302          * is created.
 2303          */
 2304         mb0 = nd->nd_mb;
 2305         bpos0 = nd->nd_bpos;
 2306 
 2307         /*
 2308          * Fill in the first part of the reply.
 2309          * dirlen is the reply length in bytes and cannot exceed cnt.
 2310          * (Include the two booleans at the end of the reply in dirlen now,
 2311          *  so we recognize when we have exceeded cnt.)
 2312          */
 2313         if (nd->nd_flag & ND_NFSV3) {
 2314                 dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
 2315                 nfsrv_postopattr(nd, getret, &at);
 2316         } else {
 2317                 dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
 2318         }
 2319         NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 2320         txdr_hyper(at.na_filerev, tl);
 2321 
 2322         /*
 2323          * Save this position, in case there is an empty reply needed.
 2324          */
 2325         mb1 = nd->nd_mb;
 2326         bpos1 = nd->nd_bpos;
 2327 
 2328         /* Loop through the records and build reply */
 2329         entrycnt = 0;
 2330         while (cpos < cend && ncookies > 0 && dirlen < cnt) {
 2331                 nlen = dp->d_namlen;
 2332                 if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
 2333                     nlen <= NFS_MAXNAMLEN &&
 2334                     ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
 2335                      (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
 2336                       || (nlen == 1 && dp->d_name[0] != '.'))) {
 2337                         /*
 2338                          * Save the current position in the reply, in case
 2339                          * this entry exceeds cnt.
 2340                          */
 2341                         mb1 = nd->nd_mb;
 2342                         bpos1 = nd->nd_bpos;
 2343         
 2344                         /*
 2345                          * For readdir_and_lookup get the vnode using
 2346                          * the file number.
 2347                          */
 2348                         nvp = NULL;
 2349                         refp = NULL;
 2350                         r = 0;
 2351                         at_root = 0;
 2352                         needs_unbusy = 0;
 2353                         new_mp = mp;
 2354                         mounted_on_fileno = (uint64_t)dp->d_fileno;
 2355                         if ((nd->nd_flag & ND_NFSV3) ||
 2356                             NFSNONZERO_ATTRBIT(&savbits)) {
 2357                                 if (nd->nd_flag & ND_NFSV4)
 2358                                         refp = nfsv4root_getreferral(NULL,
 2359                                             vp, dp->d_fileno);
 2360                                 if (refp == NULL) {
 2361                                         if (usevget)
 2362                                                 r = VFS_VGET(mp, dp->d_fileno,
 2363                                                     LK_SHARED, &nvp);
 2364                                         else
 2365                                                 r = EOPNOTSUPP;
 2366                                         if (r == EOPNOTSUPP) {
 2367                                                 if (usevget) {
 2368                                                         usevget = 0;
 2369                                                         cn.cn_nameiop = LOOKUP;
 2370                                                         cn.cn_lkflags =
 2371                                                             LK_SHARED |
 2372                                                             LK_RETRY;
 2373                                                         cn.cn_cred =
 2374                                                             nd->nd_cred;
 2375                                                         cn.cn_thread = p;
 2376                                                 }
 2377                                                 cn.cn_nameptr = dp->d_name;
 2378                                                 cn.cn_namelen = nlen;
 2379                                                 cn.cn_flags = ISLASTCN |
 2380                                                     NOFOLLOW | LOCKLEAF;
 2381                                                 if (nlen == 2 &&
 2382                                                     dp->d_name[0] == '.' &&
 2383                                                     dp->d_name[1] == '.')
 2384                                                         cn.cn_flags |=
 2385                                                             ISDOTDOT;
 2386                                                 if (NFSVOPLOCK(vp, LK_SHARED)
 2387                                                     != 0) {
 2388                                                         nd->nd_repstat = EPERM;
 2389                                                         break;
 2390                                                 }
 2391                                                 if ((vp->v_vflag & VV_ROOT) != 0
 2392                                                     && (cn.cn_flags & ISDOTDOT)
 2393                                                     != 0) {
 2394                                                         vref(vp);
 2395                                                         nvp = vp;
 2396                                                         r = 0;
 2397                                                 } else {
 2398                                                         r = VOP_LOOKUP(vp, &nvp,
 2399                                                             &cn);
 2400                                                         if (vp != nvp)
 2401                                                                 NFSVOPUNLOCK(vp,
 2402                                                                     0);
 2403                                                 }
 2404                                         }
 2405 
 2406                                         /*
 2407                                          * For NFSv4, check to see if nvp is
 2408                                          * a mount point and get the mount
 2409                                          * point vnode, as required.
 2410                                          */
 2411                                         if (r == 0 &&
 2412                                             nfsrv_enable_crossmntpt != 0 &&
 2413                                             (nd->nd_flag & ND_NFSV4) != 0 &&
 2414                                             nvp->v_type == VDIR &&
 2415                                             nvp->v_mountedhere != NULL) {
 2416                                                 new_mp = nvp->v_mountedhere;
 2417                                                 r = vfs_busy(new_mp, 0);
 2418                                                 vput(nvp);
 2419                                                 nvp = NULL;
 2420                                                 if (r == 0) {
 2421                                                         r = VFS_ROOT(new_mp,
 2422                                                             LK_SHARED, &nvp);
 2423                                                         needs_unbusy = 1;
 2424                                                         if (r == 0)
 2425                                                                 at_root = 1;
 2426                                                 }
 2427                                         }
 2428                                 }
 2429 
 2430                                 /*
 2431                                  * If we failed to look up the entry, then it
 2432                                  * has become invalid, most likely removed.
 2433                                  */
 2434                                 if (r != 0) {
 2435                                         if (needs_unbusy)
 2436                                                 vfs_unbusy(new_mp);
 2437                                         goto invalid;
 2438                                 }
 2439                                 KASSERT(refp != NULL || nvp != NULL,
 2440                                     ("%s: undetected lookup error", __func__));
 2441 
 2442                                 if (refp == NULL &&
 2443                                     ((nd->nd_flag & ND_NFSV3) ||
 2444                                      NFSNONZERO_ATTRBIT(&attrbits))) {
 2445                                         r = nfsvno_getfh(nvp, &nfh, p);
 2446                                         if (!r)
 2447                                             r = nfsvno_getattr(nvp, nvap, nd, p,
 2448                                                 1, &attrbits);
 2449                                         if (r == 0 && is_zfs == 1 &&
 2450                                             nfsrv_enable_crossmntpt != 0 &&
 2451                                             (nd->nd_flag & ND_NFSV4) != 0 &&
 2452                                             nvp->v_type == VDIR &&
 2453                                             vp->v_mount != nvp->v_mount) {
 2454                                             /*
 2455                                              * For a ZFS snapshot, there is a
 2456                                              * pseudo mount that does not set
 2457                                              * v_mountedhere, so it needs to
 2458                                              * be detected via a different
 2459                                              * mount structure.
 2460                                              */
 2461                                             at_root = 1;
 2462                                             if (new_mp == mp)
 2463                                                 new_mp = nvp->v_mount;
 2464                                         }
 2465                                 }
 2466 
 2467                                 /*
 2468                                  * If we failed to get attributes of the entry,
 2469                                  * then just skip it for NFSv3 (the traditional
 2470                                  * behavior in the old NFS server).
 2471                                  * For NFSv4 the behavior is controlled by
 2472                                  * RDATTRERROR: we either ignore the error or
 2473                                  * fail the request.
 2474                                  * Note that RDATTRERROR is never set for NFSv3.
 2475                                  */
 2476                                 if (r != 0) {
 2477                                         if (!NFSISSET_ATTRBIT(&attrbits,
 2478                                             NFSATTRBIT_RDATTRERROR)) {
 2479                                                 vput(nvp);
 2480                                                 if (needs_unbusy != 0)
 2481                                                         vfs_unbusy(new_mp);
 2482                                                 if ((nd->nd_flag & ND_NFSV3))
 2483                                                         goto invalid;
 2484                                                 nd->nd_repstat = r;
 2485                                                 break;
 2486                                         }
 2487                                 }
 2488                         }
 2489 
 2490                         /*
 2491                          * Build the directory record xdr
 2492                          */
 2493                         if (nd->nd_flag & ND_NFSV3) {
 2494                                 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 2495                                 *tl++ = newnfs_true;
 2496                                 *tl++ = 0;
 2497                                 *tl = txdr_unsigned(dp->d_fileno);
 2498                                 dirlen += nfsm_strtom(nd, dp->d_name, nlen);
 2499                                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2500                                 txdr_hyper(*cookiep, tl);
 2501                                 nfsrv_postopattr(nd, 0, nvap);
 2502                                 dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
 2503                                 dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
 2504                                 if (nvp != NULL)
 2505                                         vput(nvp);
 2506                         } else {
 2507                                 NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 2508                                 *tl++ = newnfs_true;
 2509                                 txdr_hyper(*cookiep, tl);
 2510                                 dirlen += nfsm_strtom(nd, dp->d_name, nlen);
 2511                                 if (nvp != NULL) {
 2512                                         supports_nfsv4acls =
 2513                                             nfs_supportsnfsv4acls(nvp);
 2514                                         NFSVOPUNLOCK(nvp, 0);
 2515                                 } else
 2516                                         supports_nfsv4acls = 0;
 2517                                 if (refp != NULL) {
 2518                                         dirlen += nfsrv_putreferralattr(nd,
 2519                                             &savbits, refp, 0,
 2520                                             &nd->nd_repstat);
 2521                                         if (nd->nd_repstat) {
 2522                                                 if (nvp != NULL)
 2523                                                         vrele(nvp);
 2524                                                 if (needs_unbusy != 0)
 2525                                                         vfs_unbusy(new_mp);
 2526                                                 break;
 2527                                         }
 2528                                 } else if (r) {
 2529                                         dirlen += nfsvno_fillattr(nd, new_mp,
 2530                                             nvp, nvap, &nfh, r, &rderrbits,
 2531                                             nd->nd_cred, p, isdgram, 0,
 2532                                             supports_nfsv4acls, at_root,
 2533                                             mounted_on_fileno);
 2534                                 } else {
 2535                                         dirlen += nfsvno_fillattr(nd, new_mp,
 2536                                             nvp, nvap, &nfh, r, &attrbits,
 2537                                             nd->nd_cred, p, isdgram, 0,
 2538                                             supports_nfsv4acls, at_root,
 2539                                             mounted_on_fileno);
 2540                                 }
 2541                                 if (nvp != NULL)
 2542                                         vrele(nvp);
 2543                                 dirlen += (3 * NFSX_UNSIGNED);
 2544                         }
 2545                         if (needs_unbusy != 0)
 2546                                 vfs_unbusy(new_mp);
 2547                         if (dirlen <= cnt)
 2548                                 entrycnt++;
 2549                 }
 2550 invalid:
 2551                 cpos += dp->d_reclen;
 2552                 dp = (struct dirent *)cpos;
 2553                 cookiep++;
 2554                 ncookies--;
 2555         }
 2556         vrele(vp);
 2557         vfs_unbusy(mp);
 2558 
 2559         /*
 2560          * If dirlen > cnt, we must strip off the last entry. If that
 2561          * results in an empty reply, report NFSERR_TOOSMALL.
 2562          */
 2563         if (dirlen > cnt || nd->nd_repstat) {
 2564                 if (!nd->nd_repstat && entrycnt == 0)
 2565                         nd->nd_repstat = NFSERR_TOOSMALL;
 2566                 if (nd->nd_repstat) {
 2567                         newnfs_trimtrailing(nd, mb0, bpos0);
 2568                         if (nd->nd_flag & ND_NFSV3)
 2569                                 nfsrv_postopattr(nd, getret, &at);
 2570                 } else
 2571                         newnfs_trimtrailing(nd, mb1, bpos1);
 2572                 eofflag = 0;
 2573         } else if (cpos < cend)
 2574                 eofflag = 0;
 2575         if (!nd->nd_repstat) {
 2576                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2577                 *tl++ = newnfs_false;
 2578                 if (eofflag)
 2579                         *tl = newnfs_true;
 2580                 else
 2581                         *tl = newnfs_false;
 2582         }
 2583         free(cookies, M_TEMP);
 2584         free(rbuf, M_TEMP);
 2585 
 2586 out:
 2587         NFSEXITCODE2(0, nd);
 2588         return (0);
 2589 nfsmout:
 2590         vput(vp);
 2591         NFSEXITCODE2(error, nd);
 2592         return (error);
 2593 }
 2594 
 2595 /*
 2596  * Get the settable attributes out of the mbuf list.
 2597  * (Return 0 or EBADRPC)
 2598  */
 2599 int
 2600 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
 2601     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
 2602 {
 2603         u_int32_t *tl;
 2604         struct nfsv2_sattr *sp;
 2605         int error = 0, toclient = 0;
 2606 
 2607         switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
 2608         case ND_NFSV2:
 2609                 NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 2610                 /*
 2611                  * Some old clients didn't fill in the high order 16bits.
 2612                  * --> check the low order 2 bytes for 0xffff
 2613                  */
 2614                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
 2615                         nvap->na_mode = nfstov_mode(sp->sa_mode);
 2616                 if (sp->sa_uid != newnfs_xdrneg1)
 2617                         nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
 2618                 if (sp->sa_gid != newnfs_xdrneg1)
 2619                         nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
 2620                 if (sp->sa_size != newnfs_xdrneg1)
 2621                         nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
 2622                 if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
 2623 #ifdef notyet
 2624                         fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
 2625 #else
 2626                         nvap->na_atime.tv_sec =
 2627                                 fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
 2628                         nvap->na_atime.tv_nsec = 0;
 2629 #endif
 2630                 }
 2631                 if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
 2632                         fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
 2633                 break;
 2634         case ND_NFSV3:
 2635                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2636                 if (*tl == newnfs_true) {
 2637                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2638                         nvap->na_mode = nfstov_mode(*tl);
 2639                 }
 2640                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2641                 if (*tl == newnfs_true) {
 2642                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2643                         nvap->na_uid = fxdr_unsigned(uid_t, *tl);
 2644                 }
 2645                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2646                 if (*tl == newnfs_true) {
 2647                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2648                         nvap->na_gid = fxdr_unsigned(gid_t, *tl);
 2649                 }
 2650                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2651                 if (*tl == newnfs_true) {
 2652                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2653                         nvap->na_size = fxdr_hyper(tl);
 2654                 }
 2655                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2656                 switch (fxdr_unsigned(int, *tl)) {
 2657                 case NFSV3SATTRTIME_TOCLIENT:
 2658                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2659                         fxdr_nfsv3time(tl, &nvap->na_atime);
 2660                         toclient = 1;
 2661                         break;
 2662                 case NFSV3SATTRTIME_TOSERVER:
 2663                         vfs_timestamp(&nvap->na_atime);
 2664                         nvap->na_vaflags |= VA_UTIMES_NULL;
 2665                         break;
 2666                 }
 2667                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2668                 switch (fxdr_unsigned(int, *tl)) {
 2669                 case NFSV3SATTRTIME_TOCLIENT:
 2670                         NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2671                         fxdr_nfsv3time(tl, &nvap->na_mtime);
 2672                         nvap->na_vaflags &= ~VA_UTIMES_NULL;
 2673                         break;
 2674                 case NFSV3SATTRTIME_TOSERVER:
 2675                         vfs_timestamp(&nvap->na_mtime);
 2676                         if (!toclient)
 2677                                 nvap->na_vaflags |= VA_UTIMES_NULL;
 2678                         break;
 2679                 }
 2680                 break;
 2681         case ND_NFSV4:
 2682                 error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p);
 2683         }
 2684 nfsmout:
 2685         NFSEXITCODE2(error, nd);
 2686         return (error);
 2687 }
 2688 
 2689 /*
 2690  * Handle the setable attributes for V4.
 2691  * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
 2692  */
 2693 int
 2694 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
 2695     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
 2696 {
 2697         u_int32_t *tl;
 2698         int attrsum = 0;
 2699         int i, j;
 2700         int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
 2701         int moderet, toclient = 0;
 2702         u_char *cp, namestr[NFSV4_SMALLSTR + 1];
 2703         uid_t uid;
 2704         gid_t gid;
 2705         u_short mode, mask;             /* Same type as va_mode. */
 2706         struct vattr va;
 2707 
 2708         error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
 2709         if (error)
 2710                 goto nfsmout;
 2711         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2712         attrsize = fxdr_unsigned(int, *tl);
 2713 
 2714         /*
 2715          * Loop around getting the setable attributes. If an unsupported
 2716          * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
 2717          */
 2718         if (retnotsup) {
 2719                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2720                 bitpos = NFSATTRBIT_MAX;
 2721         } else {
 2722                 bitpos = 0;
 2723         }
 2724         moderet = 0;
 2725         for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
 2726             if (attrsum > attrsize) {
 2727                 error = NFSERR_BADXDR;
 2728                 goto nfsmout;
 2729             }
 2730             if (NFSISSET_ATTRBIT(attrbitp, bitpos))
 2731                 switch (bitpos) {
 2732                 case NFSATTRBIT_SIZE:
 2733                         NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
 2734                      if (vp != NULL && vp->v_type != VREG) {
 2735                             error = (vp->v_type == VDIR) ? NFSERR_ISDIR :
 2736                                 NFSERR_INVAL;
 2737                             goto nfsmout;
 2738                         }
 2739                         nvap->na_size = fxdr_hyper(tl);
 2740                         attrsum += NFSX_HYPER;
 2741                         break;
 2742                 case NFSATTRBIT_ACL:
 2743                         error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize,
 2744                             p);
 2745                         if (error)
 2746                                 goto nfsmout;
 2747                         if (aceerr && !nd->nd_repstat)
 2748                                 nd->nd_repstat = aceerr;
 2749                         attrsum += aclsize;
 2750                         break;
 2751                 case NFSATTRBIT_ARCHIVE:
 2752                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2753                         if (!nd->nd_repstat)
 2754                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2755                         attrsum += NFSX_UNSIGNED;
 2756                         break;
 2757                 case NFSATTRBIT_HIDDEN:
 2758                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2759                         if (!nd->nd_repstat)
 2760                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2761                         attrsum += NFSX_UNSIGNED;
 2762                         break;
 2763                 case NFSATTRBIT_MIMETYPE:
 2764                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2765                         i = fxdr_unsigned(int, *tl);
 2766                         error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 2767                         if (error)
 2768                                 goto nfsmout;
 2769                         if (!nd->nd_repstat)
 2770                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2771                         attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
 2772                         break;
 2773                 case NFSATTRBIT_MODE:
 2774                         moderet = NFSERR_INVAL; /* Can't do MODESETMASKED. */
 2775                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2776                         nvap->na_mode = nfstov_mode(*tl);
 2777                         attrsum += NFSX_UNSIGNED;
 2778                         break;
 2779                 case NFSATTRBIT_OWNER:
 2780                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2781                         j = fxdr_unsigned(int, *tl);
 2782                         if (j < 0) {
 2783                                 error = NFSERR_BADXDR;
 2784                                 goto nfsmout;
 2785                         }
 2786                         if (j > NFSV4_SMALLSTR)
 2787                                 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
 2788                         else
 2789                                 cp = namestr;
 2790                         error = nfsrv_mtostr(nd, cp, j);
 2791                         if (error) {
 2792                                 if (j > NFSV4_SMALLSTR)
 2793                                         free(cp, M_NFSSTRING);
 2794                                 goto nfsmout;
 2795                         }
 2796                         if (!nd->nd_repstat) {
 2797                                 nd->nd_repstat = nfsv4_strtouid(nd, cp, j, &uid,
 2798                                     p);
 2799                                 if (!nd->nd_repstat)
 2800                                         nvap->na_uid = uid;
 2801                         }
 2802                         if (j > NFSV4_SMALLSTR)
 2803                                 free(cp, M_NFSSTRING);
 2804                         attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
 2805                         break;
 2806                 case NFSATTRBIT_OWNERGROUP:
 2807                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2808                         j = fxdr_unsigned(int, *tl);
 2809                         if (j < 0) {
 2810                                 error = NFSERR_BADXDR;
 2811                                 goto nfsmout;
 2812                         }
 2813                         if (j > NFSV4_SMALLSTR)
 2814                                 cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
 2815                         else
 2816                                 cp = namestr;
 2817                         error = nfsrv_mtostr(nd, cp, j);
 2818                         if (error) {
 2819                                 if (j > NFSV4_SMALLSTR)
 2820                                         free(cp, M_NFSSTRING);
 2821                                 goto nfsmout;
 2822                         }
 2823                         if (!nd->nd_repstat) {
 2824                                 nd->nd_repstat = nfsv4_strtogid(nd, cp, j, &gid,
 2825                                     p);
 2826                                 if (!nd->nd_repstat)
 2827                                         nvap->na_gid = gid;
 2828                         }
 2829                         if (j > NFSV4_SMALLSTR)
 2830                                 free(cp, M_NFSSTRING);
 2831                         attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
 2832                         break;
 2833                 case NFSATTRBIT_SYSTEM:
 2834                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2835                         if (!nd->nd_repstat)
 2836                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2837                         attrsum += NFSX_UNSIGNED;
 2838                         break;
 2839                 case NFSATTRBIT_TIMEACCESSSET:
 2840                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2841                         attrsum += NFSX_UNSIGNED;
 2842                         if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
 2843                             NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 2844                             fxdr_nfsv4time(tl, &nvap->na_atime);
 2845                             toclient = 1;
 2846                             attrsum += NFSX_V4TIME;
 2847                         } else {
 2848                             vfs_timestamp(&nvap->na_atime);
 2849                             nvap->na_vaflags |= VA_UTIMES_NULL;
 2850                         }
 2851                         break;
 2852                 case NFSATTRBIT_TIMEBACKUP:
 2853                         NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 2854                         if (!nd->nd_repstat)
 2855                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2856                         attrsum += NFSX_V4TIME;
 2857                         break;
 2858                 case NFSATTRBIT_TIMECREATE:
 2859                         NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 2860                         if (!nd->nd_repstat)
 2861                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2862                         attrsum += NFSX_V4TIME;
 2863                         break;
 2864                 case NFSATTRBIT_TIMEMODIFYSET:
 2865                         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 2866                         attrsum += NFSX_UNSIGNED;
 2867                         if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
 2868                             NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 2869                             fxdr_nfsv4time(tl, &nvap->na_mtime);
 2870                             nvap->na_vaflags &= ~VA_UTIMES_NULL;
 2871                             attrsum += NFSX_V4TIME;
 2872                         } else {
 2873                             vfs_timestamp(&nvap->na_mtime);
 2874                             if (!toclient)
 2875                                 nvap->na_vaflags |= VA_UTIMES_NULL;
 2876                         }
 2877                         break;
 2878                 case NFSATTRBIT_MODESETMASKED:
 2879                         NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 2880                         mode = fxdr_unsigned(u_short, *tl++);
 2881                         mask = fxdr_unsigned(u_short, *tl);
 2882                         /*
 2883                          * vp == NULL implies an Open/Create operation.
 2884                          * This attribute can only be used for Setattr and
 2885                          * only for NFSv4.1 or higher.
 2886                          * If moderet != 0, a mode attribute has also been
 2887                          * specified and this attribute cannot be done in the
 2888                          * same Setattr operation.
 2889                          */
 2890                         if ((nd->nd_flag & ND_NFSV41) == 0)
 2891                                 nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2892                         else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 ||
 2893                             vp == NULL)
 2894                                 nd->nd_repstat = NFSERR_INVAL;
 2895                         else if (moderet == 0)
 2896                                 moderet = VOP_GETATTR(vp, &va, nd->nd_cred);
 2897                         if (moderet == 0)
 2898                                 nvap->na_mode = (mode & mask) |
 2899                                     (va.va_mode & ~mask);
 2900                         else
 2901                                 nd->nd_repstat = moderet;
 2902                         attrsum += 2 * NFSX_UNSIGNED;
 2903                         break;
 2904                 default:
 2905                         nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 2906                         /*
 2907                          * set bitpos so we drop out of the loop.
 2908                          */
 2909                         bitpos = NFSATTRBIT_MAX;
 2910                         break;
 2911                 }
 2912         }
 2913 
 2914         /*
 2915          * some clients pad the attrlist, so we need to skip over the
 2916          * padding.
 2917          */
 2918         if (attrsum > attrsize) {
 2919                 error = NFSERR_BADXDR;
 2920         } else {
 2921                 attrsize = NFSM_RNDUP(attrsize);
 2922                 if (attrsum < attrsize)
 2923                         error = nfsm_advance(nd, attrsize - attrsum, -1);
 2924         }
 2925 nfsmout:
 2926         NFSEXITCODE2(error, nd);
 2927         return (error);
 2928 }
 2929 
 2930 /*
 2931  * Check/setup export credentials.
 2932  */
 2933 int
 2934 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
 2935     struct ucred *credanon)
 2936 {
 2937         int error = 0;
 2938 
 2939         /*
 2940          * Check/setup credentials.
 2941          */
 2942         if (nd->nd_flag & ND_GSS)
 2943                 exp->nes_exflag &= ~MNT_EXPORTANON;
 2944 
 2945         /*
 2946          * Check to see if the operation is allowed for this security flavor.
 2947          * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to
 2948          * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
 2949          * Also, allow Secinfo, so that it can acquire the correct flavor(s).
 2950          */
 2951         if (nfsvno_testexp(nd, exp) &&
 2952             nd->nd_procnum != NFSV4OP_SECINFO &&
 2953             nd->nd_procnum != NFSPROC_FSINFO) {
 2954                 if (nd->nd_flag & ND_NFSV4)
 2955                         error = NFSERR_WRONGSEC;
 2956                 else
 2957                         error = (NFSERR_AUTHERR | AUTH_TOOWEAK);
 2958                 goto out;
 2959         }
 2960 
 2961         /*
 2962          * Check to see if the file system is exported V4 only.
 2963          */
 2964         if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) {
 2965                 error = NFSERR_PROGNOTV4;
 2966                 goto out;
 2967         }
 2968 
 2969         /*
 2970          * Now, map the user credentials.
 2971          * (Note that ND_AUTHNONE will only be set for an NFSv3
 2972          *  Fsinfo RPC. If set for anything else, this code might need
 2973          *  to change.)
 2974          */
 2975         if (NFSVNO_EXPORTED(exp)) {
 2976                 if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) ||
 2977                      NFSVNO_EXPORTANON(exp) ||
 2978                      (nd->nd_flag & ND_AUTHNONE) != 0) {
 2979                         nd->nd_cred->cr_uid = credanon->cr_uid;
 2980                         nd->nd_cred->cr_gid = credanon->cr_gid;
 2981                         crsetgroups(nd->nd_cred, credanon->cr_ngroups,
 2982                             credanon->cr_groups);
 2983                 } else if ((nd->nd_flag & ND_GSS) == 0) {
 2984                         /*
 2985                          * If using AUTH_SYS, call nfsrv_getgrpscred() to see
 2986                          * if there is a replacement credential with a group
 2987                          * list set up by "nfsuserd -manage-gids".
 2988                          * If there is no replacement, nfsrv_getgrpscred()
 2989                          * simply returns its argument.
 2990                          */
 2991                         nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred);
 2992                 }
 2993         }
 2994 
 2995 out:
 2996         NFSEXITCODE2(error, nd);
 2997         return (error);
 2998 }
 2999 
 3000 /*
 3001  * Check exports.
 3002  */
 3003 int
 3004 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
 3005     struct ucred **credp)
 3006 {
 3007         int i, error, *secflavors;
 3008 
 3009         error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
 3010             &exp->nes_numsecflavor, &secflavors);
 3011         if (error) {
 3012                 if (nfs_rootfhset) {
 3013                         exp->nes_exflag = 0;
 3014                         exp->nes_numsecflavor = 0;
 3015                         error = 0;
 3016                 }
 3017         } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
 3018             MAXSECFLAVORS) {
 3019                 printf("nfsvno_checkexp: numsecflavors out of range\n");
 3020                 exp->nes_numsecflavor = 0;
 3021                 error = EACCES;
 3022         } else {
 3023                 /* Copy the security flavors. */
 3024                 for (i = 0; i < exp->nes_numsecflavor; i++)
 3025                         exp->nes_secflavors[i] = secflavors[i];
 3026         }
 3027         NFSEXITCODE(error);
 3028         return (error);
 3029 }
 3030 
 3031 /*
 3032  * Get a vnode for a file handle and export stuff.
 3033  */
 3034 int
 3035 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
 3036     int lktype, struct vnode **vpp, struct nfsexstuff *exp,
 3037     struct ucred **credp)
 3038 {
 3039         int i, error, *secflavors;
 3040 
 3041         *credp = NULL;
 3042         exp->nes_numsecflavor = 0;
 3043         error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp);
 3044         if (error != 0)
 3045                 /* Make sure the server replies ESTALE to the client. */
 3046                 error = ESTALE;
 3047         if (nam && !error) {
 3048                 error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
 3049                     &exp->nes_numsecflavor, &secflavors);
 3050                 if (error) {
 3051                         if (nfs_rootfhset) {
 3052                                 exp->nes_exflag = 0;
 3053                                 exp->nes_numsecflavor = 0;
 3054                                 error = 0;
 3055                         } else {
 3056                                 vput(*vpp);
 3057                         }
 3058                 } else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
 3059                     MAXSECFLAVORS) {
 3060                         printf("nfsvno_fhtovp: numsecflavors out of range\n");
 3061                         exp->nes_numsecflavor = 0;
 3062                         error = EACCES;
 3063                         vput(*vpp);
 3064                 } else {
 3065                         /* Copy the security flavors. */
 3066                         for (i = 0; i < exp->nes_numsecflavor; i++)
 3067                                 exp->nes_secflavors[i] = secflavors[i];
 3068                 }
 3069         }
 3070         NFSEXITCODE(error);
 3071         return (error);
 3072 }
 3073 
 3074 /*
 3075  * nfsd_fhtovp() - convert a fh to a vnode ptr
 3076  *      - look up fsid in mount list (if not found ret error)
 3077  *      - get vp and export rights by calling nfsvno_fhtovp()
 3078  *      - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
 3079  *        for AUTH_SYS
 3080  *      - if mpp != NULL, return the mount point so that it can
 3081  *        be used for vn_finished_write() by the caller
 3082  */
 3083 void
 3084 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
 3085     struct vnode **vpp, struct nfsexstuff *exp,
 3086     struct mount **mpp, int startwrite, struct thread *p)
 3087 {
 3088         struct mount *mp, *mpw;
 3089         struct ucred *credanon;
 3090         fhandle_t *fhp;
 3091         int error;
 3092 
 3093         if (mpp != NULL)
 3094                 *mpp = NULL;
 3095         *vpp = NULL;
 3096         fhp = (fhandle_t *)nfp->nfsrvfh_data;
 3097         mp = vfs_busyfs(&fhp->fh_fsid);
 3098         if (mp == NULL) {
 3099                 nd->nd_repstat = ESTALE;
 3100                 goto out;
 3101         }
 3102 
 3103         if (startwrite) {
 3104                 mpw = mp;
 3105                 error = vn_start_write(NULL, &mpw, V_WAIT);
 3106                 if (error != 0) {
 3107                         mpw = NULL;
 3108                         vfs_unbusy(mp);
 3109                         nd->nd_repstat = ESTALE;
 3110                         goto out;
 3111                 }
 3112                 if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp)))
 3113                         lktype = LK_EXCLUSIVE;
 3114         } else
 3115                 mpw = NULL;
 3116 
 3117         nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
 3118             &credanon);
 3119         vfs_unbusy(mp);
 3120 
 3121         /*
 3122          * For NFSv4 without a pseudo root fs, unexported file handles
 3123          * can be returned, so that Lookup works everywhere.
 3124          */
 3125         if (!nd->nd_repstat && exp->nes_exflag == 0 &&
 3126             !(nd->nd_flag & ND_NFSV4)) {
 3127                 vput(*vpp);
 3128                 *vpp = NULL;
 3129                 nd->nd_repstat = EACCES;
 3130         }
 3131 
 3132         /*
 3133          * Personally, I've never seen any point in requiring a
 3134          * reserved port#, since only in the rare case where the
 3135          * clients are all boxes with secure system privileges,
 3136          * does it provide any enhanced security, but... some people
 3137          * believe it to be useful and keep putting this code back in.
 3138          * (There is also some "security checker" out there that
 3139          *  complains if the nfs server doesn't enforce this.)
 3140          * However, note the following:
 3141          * RFC3530 (NFSv4) specifies that a reserved port# not be
 3142          *      required.
 3143          * RFC2623 recommends that, if a reserved port# is checked for,
 3144          *      that there be a way to turn that off--> ifdef'd.
 3145          */
 3146 #ifdef NFS_REQRSVPORT
 3147         if (!nd->nd_repstat) {
 3148                 struct sockaddr_in *saddr;
 3149                 struct sockaddr_in6 *saddr6;
 3150 
 3151                 saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
 3152                 saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
 3153                 if (!(nd->nd_flag & ND_NFSV4) &&
 3154                     ((saddr->sin_family == AF_INET &&
 3155                       ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
 3156                      (saddr6->sin6_family == AF_INET6 &&
 3157                       ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
 3158                         vput(*vpp);
 3159                         nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
 3160                 }
 3161         }
 3162 #endif  /* NFS_REQRSVPORT */
 3163 
 3164         /*
 3165          * Check/setup credentials.
 3166          */
 3167         if (!nd->nd_repstat) {
 3168                 nd->nd_saveduid = nd->nd_cred->cr_uid;
 3169                 nd->nd_repstat = nfsd_excred(nd, exp, credanon);
 3170                 if (nd->nd_repstat)
 3171                         vput(*vpp);
 3172         }
 3173         if (credanon != NULL)
 3174                 crfree(credanon);
 3175         if (nd->nd_repstat) {
 3176                 vn_finished_write(mpw);
 3177                 *vpp = NULL;
 3178         } else if (mpp != NULL) {
 3179                 *mpp = mpw;
 3180         }
 3181 
 3182 out:
 3183         NFSEXITCODE2(0, nd);
 3184 }
 3185 
 3186 /*
 3187  * glue for fp.
 3188  */
 3189 static int
 3190 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
 3191 {
 3192         struct filedesc *fdp;
 3193         struct file *fp;
 3194         int error = 0;
 3195 
 3196         fdp = p->td_proc->p_fd;
 3197         if (fd < 0 || fd >= fdp->fd_nfiles ||
 3198             (fp = fdp->fd_ofiles[fd].fde_file) == NULL) {
 3199                 error = EBADF;
 3200                 goto out;
 3201         }
 3202         *fpp = fp;
 3203 
 3204 out:
 3205         NFSEXITCODE(error);
 3206         return (error);
 3207 }
 3208 
 3209 /*
 3210  * Called from nfssvc() to update the exports list. Just call
 3211  * vfs_export(). This has to be done, since the v4 root fake fs isn't
 3212  * in the mount list.
 3213  */
 3214 int
 3215 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
 3216 {
 3217         struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
 3218         int error = 0;
 3219         struct nameidata nd;
 3220         fhandle_t fh;
 3221 
 3222         error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
 3223         if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
 3224                 nfs_rootfhset = 0;
 3225         else if (error == 0) {
 3226                 if (nfsexargp->fspec == NULL) {
 3227                         error = EPERM;
 3228                         goto out;
 3229                 }
 3230                 /*
 3231                  * If fspec != NULL, this is the v4root path.
 3232                  */
 3233                 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
 3234                     nfsexargp->fspec, p);
 3235                 if ((error = namei(&nd)) != 0)
 3236                         goto out;
 3237                 error = nfsvno_getfh(nd.ni_vp, &fh, p);
 3238                 vrele(nd.ni_vp);
 3239                 if (!error) {
 3240                         nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
 3241                         NFSBCOPY((caddr_t)&fh,
 3242                             nfs_rootfh.nfsrvfh_data,
 3243                             sizeof (fhandle_t));
 3244                         nfs_rootfhset = 1;
 3245                 }
 3246         }
 3247 
 3248 out:
 3249         NFSEXITCODE(error);
 3250         return (error);
 3251 }
 3252 
 3253 /*
 3254  * This function needs to test to see if the system is near its limit
 3255  * for memory allocation via malloc() or mget() and return True iff
 3256  * either of these resources are near their limit.
 3257  * XXX (For now, this is just a stub.)
 3258  */
 3259 int nfsrv_testmalloclimit = 0;
 3260 int
 3261 nfsrv_mallocmget_limit(void)
 3262 {
 3263         static int printmesg = 0;
 3264         static int testval = 1;
 3265 
 3266         if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
 3267                 if ((printmesg++ % 100) == 0)
 3268                         printf("nfsd: malloc/mget near limit\n");
 3269                 return (1);
 3270         }
 3271         return (0);
 3272 }
 3273 
 3274 /*
 3275  * BSD specific initialization of a mount point.
 3276  */
 3277 void
 3278 nfsd_mntinit(void)
 3279 {
 3280         static int inited = 0;
 3281 
 3282         if (inited)
 3283                 return;
 3284         inited = 1;
 3285         nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
 3286         TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
 3287         TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist);
 3288         nfsv4root_mnt.mnt_export = NULL;
 3289         TAILQ_INIT(&nfsv4root_opt);
 3290         TAILQ_INIT(&nfsv4root_newopt);
 3291         nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
 3292         nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
 3293         nfsv4root_mnt.mnt_nvnodelistsize = 0;
 3294         nfsv4root_mnt.mnt_activevnodelistsize = 0;
 3295 }
 3296 
 3297 /*
 3298  * Get a vnode for a file handle, without checking exports, etc.
 3299  */
 3300 struct vnode *
 3301 nfsvno_getvp(fhandle_t *fhp)
 3302 {
 3303         struct mount *mp;
 3304         struct vnode *vp;
 3305         int error;
 3306 
 3307         mp = vfs_busyfs(&fhp->fh_fsid);
 3308         if (mp == NULL)
 3309                 return (NULL);
 3310         error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp);
 3311         vfs_unbusy(mp);
 3312         if (error)
 3313                 return (NULL);
 3314         return (vp);
 3315 }
 3316 
 3317 /*
 3318  * Do a local VOP_ADVLOCK().
 3319  */
 3320 int
 3321 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
 3322     u_int64_t end, struct thread *td)
 3323 {
 3324         int error = 0;
 3325         struct flock fl;
 3326         u_int64_t tlen;
 3327 
 3328         if (nfsrv_dolocallocks == 0)
 3329                 goto out;
 3330         ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked");
 3331 
 3332         fl.l_whence = SEEK_SET;
 3333         fl.l_type = ftype;
 3334         fl.l_start = (off_t)first;
 3335         if (end == NFS64BITSSET) {
 3336                 fl.l_len = 0;
 3337         } else {
 3338                 tlen = end - first;
 3339                 fl.l_len = (off_t)tlen;
 3340         }
 3341         /*
 3342          * For FreeBSD8, the l_pid and l_sysid must be set to the same
 3343          * values for all calls, so that all locks will be held by the
 3344          * nfsd server. (The nfsd server handles conflicts between the
 3345          * various clients.)
 3346          * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
 3347          * bytes, so it can't be put in l_sysid.
 3348          */
 3349         if (nfsv4_sysid == 0)
 3350                 nfsv4_sysid = nlm_acquire_next_sysid();
 3351         fl.l_pid = (pid_t)0;
 3352         fl.l_sysid = (int)nfsv4_sysid;
 3353 
 3354         if (ftype == F_UNLCK)
 3355                 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
 3356                     (F_POSIX | F_REMOTE));
 3357         else
 3358                 error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
 3359                     (F_POSIX | F_REMOTE));
 3360 
 3361 out:
 3362         NFSEXITCODE(error);
 3363         return (error);
 3364 }
 3365 
 3366 /*
 3367  * Check the nfsv4 root exports.
 3368  */
 3369 int
 3370 nfsvno_v4rootexport(struct nfsrv_descript *nd)
 3371 {
 3372         struct ucred *credanon;
 3373         int exflags, error = 0, numsecflavor, *secflavors, i;
 3374 
 3375         error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
 3376             &credanon, &numsecflavor, &secflavors);
 3377         if (error) {
 3378                 error = NFSERR_PROGUNAVAIL;
 3379                 goto out;
 3380         }
 3381         if (credanon != NULL)
 3382                 crfree(credanon);
 3383         for (i = 0; i < numsecflavor; i++) {
 3384                 if (secflavors[i] == AUTH_SYS)
 3385                         nd->nd_flag |= ND_EXAUTHSYS;
 3386                 else if (secflavors[i] == RPCSEC_GSS_KRB5)
 3387                         nd->nd_flag |= ND_EXGSS;
 3388                 else if (secflavors[i] == RPCSEC_GSS_KRB5I)
 3389                         nd->nd_flag |= ND_EXGSSINTEGRITY;
 3390                 else if (secflavors[i] == RPCSEC_GSS_KRB5P)
 3391                         nd->nd_flag |= ND_EXGSSPRIVACY;
 3392         }
 3393 
 3394 out:
 3395         NFSEXITCODE(error);
 3396         return (error);
 3397 }
 3398 
 3399 /*
 3400  * Nfs server pseudo system call for the nfsd's
 3401  */
 3402 /*
 3403  * MPSAFE
 3404  */
 3405 static int
 3406 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
 3407 {
 3408         struct file *fp;
 3409         struct nfsd_addsock_args sockarg;
 3410         struct nfsd_nfsd_args nfsdarg;
 3411         struct nfsd_nfsd_oargs onfsdarg;
 3412         struct nfsd_pnfsd_args pnfsdarg;
 3413         struct vnode *vp, *nvp, *curdvp;
 3414         struct pnfsdsfile *pf;
 3415         struct nfsdevice *ds, *fds;
 3416         cap_rights_t rights;
 3417         int buflen, error, ret;
 3418         char *buf, *cp, *cp2, *cp3;
 3419         char fname[PNFS_FILENAME_LEN + 1];
 3420 
 3421         if (uap->flag & NFSSVC_NFSDADDSOCK) {
 3422                 error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
 3423                 if (error)
 3424                         goto out;
 3425                 /*
 3426                  * Since we don't know what rights might be required,
 3427                  * pretend that we need them all. It is better to be too
 3428                  * careful than too reckless.
 3429                  */
 3430                 error = fget(td, sockarg.sock,
 3431                     cap_rights_init(&rights, CAP_SOCK_SERVER), &fp);
 3432                 if (error != 0)
 3433                         goto out;
 3434                 if (fp->f_type != DTYPE_SOCKET) {
 3435                         fdrop(fp, td);
 3436                         error = EPERM;
 3437                         goto out;
 3438                 }
 3439                 error = nfsrvd_addsock(fp);
 3440                 fdrop(fp, td);
 3441         } else if (uap->flag & NFSSVC_NFSDNFSD) {
 3442                 if (uap->argp == NULL) {
 3443                         error = EINVAL;
 3444                         goto out;
 3445                 }
 3446                 if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) {
 3447                         error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg));
 3448                         if (error == 0) {
 3449                                 nfsdarg.principal = onfsdarg.principal;
 3450                                 nfsdarg.minthreads = onfsdarg.minthreads;
 3451                                 nfsdarg.maxthreads = onfsdarg.maxthreads;
 3452                                 nfsdarg.version = 1;
 3453                                 nfsdarg.addr = NULL;
 3454                                 nfsdarg.addrlen = 0;
 3455                                 nfsdarg.dnshost = NULL;
 3456                                 nfsdarg.dnshostlen = 0;
 3457                                 nfsdarg.dspath = NULL;
 3458                                 nfsdarg.dspathlen = 0;
 3459                                 nfsdarg.mdspath = NULL;
 3460                                 nfsdarg.mdspathlen = 0;
 3461                                 nfsdarg.mirrorcnt = 1;
 3462                         }
 3463                 } else
 3464                         error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg));
 3465                 if (error)
 3466                         goto out;
 3467                 if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 &&
 3468                     nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 &&
 3469                     nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 &&
 3470                     nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 &&
 3471                     nfsdarg.mirrorcnt >= 1 &&
 3472                     nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS &&
 3473                     nfsdarg.addr != NULL && nfsdarg.dnshost != NULL &&
 3474                     nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) {
 3475                         NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d"
 3476                             " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen,
 3477                             nfsdarg.dspathlen, nfsdarg.dnshostlen,
 3478                             nfsdarg.mdspathlen, nfsdarg.mirrorcnt);
 3479                         cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK);
 3480                         error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen);
 3481                         if (error != 0) {
 3482                                 free(cp, M_TEMP);
 3483                                 goto out;
 3484                         }
 3485                         cp[nfsdarg.addrlen] = '\0';     /* Ensure nul term. */
 3486                         nfsdarg.addr = cp;
 3487                         cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK);
 3488                         error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen);
 3489                         if (error != 0) {
 3490                                 free(nfsdarg.addr, M_TEMP);
 3491                                 free(cp, M_TEMP);
 3492                                 goto out;
 3493                         }
 3494                         cp[nfsdarg.dnshostlen] = '\0';  /* Ensure nul term. */
 3495                         nfsdarg.dnshost = cp;
 3496                         cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK);
 3497                         error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen);
 3498                         if (error != 0) {
 3499                                 free(nfsdarg.addr, M_TEMP);
 3500                                 free(nfsdarg.dnshost, M_TEMP);
 3501                                 free(cp, M_TEMP);
 3502                                 goto out;
 3503                         }
 3504                         cp[nfsdarg.dspathlen] = '\0';   /* Ensure nul term. */
 3505                         nfsdarg.dspath = cp;
 3506                         cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK);
 3507                         error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen);
 3508                         if (error != 0) {
 3509                                 free(nfsdarg.addr, M_TEMP);
 3510                                 free(nfsdarg.dnshost, M_TEMP);
 3511                                 free(nfsdarg.dspath, M_TEMP);
 3512                                 free(cp, M_TEMP);
 3513                                 goto out;
 3514                         }
 3515                         cp[nfsdarg.mdspathlen] = '\0';  /* Ensure nul term. */
 3516                         nfsdarg.mdspath = cp;
 3517                 } else {
 3518                         nfsdarg.addr = NULL;
 3519                         nfsdarg.addrlen = 0;
 3520                         nfsdarg.dnshost = NULL;
 3521                         nfsdarg.dnshostlen = 0;
 3522                         nfsdarg.dspath = NULL;
 3523                         nfsdarg.dspathlen = 0;
 3524                         nfsdarg.mdspath = NULL;
 3525                         nfsdarg.mdspathlen = 0;
 3526                         nfsdarg.mirrorcnt = 1;
 3527                 }
 3528                 error = nfsrvd_nfsd(td, &nfsdarg);
 3529                 free(nfsdarg.addr, M_TEMP);
 3530                 free(nfsdarg.dnshost, M_TEMP);
 3531                 free(nfsdarg.dspath, M_TEMP);
 3532                 free(nfsdarg.mdspath, M_TEMP);
 3533         } else if (uap->flag & NFSSVC_PNFSDS) {
 3534                 error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg));
 3535                 if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER ||
 3536                     pnfsdarg.op == PNFSDOP_FORCEDELDS)) {
 3537                         cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
 3538                         error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1,
 3539                             NULL);
 3540                         if (error == 0)
 3541                                 error = nfsrv_deldsserver(pnfsdarg.op, cp, td);
 3542                         free(cp, M_TEMP);
 3543                 } else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) {
 3544                         cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
 3545                         buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS;
 3546                         buf = malloc(buflen, M_TEMP, M_WAITOK);
 3547                         error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1,
 3548                             NULL);
 3549                         NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error);
 3550                         if (error == 0 && pnfsdarg.dspath != NULL) {
 3551                                 cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
 3552                                 error = copyinstr(pnfsdarg.dspath, cp2,
 3553                                     PATH_MAX + 1, NULL);
 3554                                 NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n",
 3555                                     error);
 3556                         } else
 3557                                 cp2 = NULL;
 3558                         if (error == 0 && pnfsdarg.curdspath != NULL) {
 3559                                 cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
 3560                                 error = copyinstr(pnfsdarg.curdspath, cp3,
 3561                                     PATH_MAX + 1, NULL);
 3562                                 NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n",
 3563                                     error);
 3564                         } else
 3565                                 cp3 = NULL;
 3566                         curdvp = NULL;
 3567                         fds = NULL;
 3568                         if (error == 0)
 3569                                 error = nfsrv_mdscopymr(cp, cp2, cp3, buf,
 3570                                     &buflen, fname, td, &vp, &nvp, &pf, &ds,
 3571                                     &fds);
 3572                         NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error);
 3573                         if (error == 0) {
 3574                                 if (pf->dsf_dir >= nfsrv_dsdirsize) {
 3575                                         printf("copymr: dsdir out of range\n");
 3576                                         pf->dsf_dir = 0;
 3577                                 }
 3578                                 NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen);
 3579                                 error = nfsrv_copymr(vp, nvp,
 3580                                     ds->nfsdev_dsdir[pf->dsf_dir], ds, pf,
 3581                                     (struct pnfsdsfile *)buf,
 3582                                     buflen / sizeof(*pf), td->td_ucred, td);
 3583                                 vput(vp);
 3584                                 vput(nvp);
 3585                                 if (fds != NULL && error == 0) {
 3586                                         curdvp = fds->nfsdev_dsdir[pf->dsf_dir];
 3587                                         ret = vn_lock(curdvp, LK_EXCLUSIVE);
 3588                                         if (ret == 0) {
 3589                                                 nfsrv_dsremove(curdvp, fname,
 3590                                                     td->td_ucred, td);
 3591                                                 NFSVOPUNLOCK(curdvp, 0);
 3592                                         }
 3593                                 }
 3594                                 NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error);
 3595                         }
 3596                         free(cp, M_TEMP);
 3597                         free(cp2, M_TEMP);
 3598                         free(cp3, M_TEMP);
 3599                         free(buf, M_TEMP);
 3600                 }
 3601         } else {
 3602                 error = nfssvc_srvcall(td, uap, td->td_ucred);
 3603         }
 3604 
 3605 out:
 3606         NFSEXITCODE(error);
 3607         return (error);
 3608 }
 3609 
 3610 static int
 3611 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
 3612 {
 3613         struct nfsex_args export;
 3614         struct file *fp = NULL;
 3615         int stablefd, len;
 3616         struct nfsd_clid adminrevoke;
 3617         struct nfsd_dumplist dumplist;
 3618         struct nfsd_dumpclients *dumpclients;
 3619         struct nfsd_dumplocklist dumplocklist;
 3620         struct nfsd_dumplocks *dumplocks;
 3621         struct nameidata nd;
 3622         vnode_t vp;
 3623         int error = EINVAL, igotlock;
 3624         struct proc *procp;
 3625         static int suspend_nfsd = 0;
 3626 
 3627         if (uap->flag & NFSSVC_PUBLICFH) {
 3628                 NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
 3629                     sizeof (fhandle_t));
 3630                 error = copyin(uap->argp,
 3631                     &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
 3632                 if (!error)
 3633                         nfs_pubfhset = 1;
 3634         } else if (uap->flag & NFSSVC_V4ROOTEXPORT) {
 3635                 error = copyin(uap->argp,(caddr_t)&export,
 3636                     sizeof (struct nfsex_args));
 3637                 if (!error)
 3638                         error = nfsrv_v4rootexport(&export, cred, p);
 3639         } else if (uap->flag & NFSSVC_NOPUBLICFH) {
 3640                 nfs_pubfhset = 0;
 3641                 error = 0;
 3642         } else if (uap->flag & NFSSVC_STABLERESTART) {
 3643                 error = copyin(uap->argp, (caddr_t)&stablefd,
 3644                     sizeof (int));
 3645                 if (!error)
 3646                         error = fp_getfvp(p, stablefd, &fp, &vp);
 3647                 if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
 3648                         error = EBADF;
 3649                 if (!error && newnfs_numnfsd != 0)
 3650                         error = EPERM;
 3651                 if (!error) {
 3652                         nfsrv_stablefirst.nsf_fp = fp;
 3653                         nfsrv_setupstable(p);
 3654                 }
 3655         } else if (uap->flag & NFSSVC_ADMINREVOKE) {
 3656                 error = copyin(uap->argp, (caddr_t)&adminrevoke,
 3657                     sizeof (struct nfsd_clid));
 3658                 if (!error)
 3659                         error = nfsrv_adminrevoke(&adminrevoke, p);
 3660         } else if (uap->flag & NFSSVC_DUMPCLIENTS) {
 3661                 error = copyin(uap->argp, (caddr_t)&dumplist,
 3662                     sizeof (struct nfsd_dumplist));
 3663                 if (!error && (dumplist.ndl_size < 1 ||
 3664                         dumplist.ndl_size > NFSRV_MAXDUMPLIST))
 3665                         error = EPERM;
 3666                 if (!error) {
 3667                     len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
 3668                     dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
 3669                     nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
 3670                     error = copyout(dumpclients,
 3671                         CAST_USER_ADDR_T(dumplist.ndl_list), len);
 3672                     free(dumpclients, M_TEMP);
 3673                 }
 3674         } else if (uap->flag & NFSSVC_DUMPLOCKS) {
 3675                 error = copyin(uap->argp, (caddr_t)&dumplocklist,
 3676                     sizeof (struct nfsd_dumplocklist));
 3677                 if (!error && (dumplocklist.ndllck_size < 1 ||
 3678                         dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
 3679                         error = EPERM;
 3680                 if (!error)
 3681                         error = nfsrv_lookupfilename(&nd,
 3682                                 dumplocklist.ndllck_fname, p);
 3683                 if (!error) {
 3684                         len = sizeof (struct nfsd_dumplocks) *
 3685                                 dumplocklist.ndllck_size;
 3686                         dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
 3687                         nfsrv_dumplocks(nd.ni_vp, dumplocks,
 3688                             dumplocklist.ndllck_size, p);
 3689                         vput(nd.ni_vp);
 3690                         error = copyout(dumplocks,
 3691                             CAST_USER_ADDR_T(dumplocklist.ndllck_list), len);
 3692                         free(dumplocks, M_TEMP);
 3693                 }
 3694         } else if (uap->flag & NFSSVC_BACKUPSTABLE) {
 3695                 procp = p->td_proc;
 3696                 PROC_LOCK(procp);
 3697                 nfsd_master_pid = procp->p_pid;
 3698                 bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1);
 3699                 nfsd_master_start = procp->p_stats->p_start;
 3700                 nfsd_master_proc = procp;
 3701                 PROC_UNLOCK(procp);
 3702         } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) {
 3703                 NFSLOCKV4ROOTMUTEX();
 3704                 if (suspend_nfsd == 0) {
 3705                         /* Lock out all nfsd threads */
 3706                         do {
 3707                                 igotlock = nfsv4_lock(&nfsd_suspend_lock, 1,
 3708                                     NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 3709                         } while (igotlock == 0 && suspend_nfsd == 0);
 3710                         suspend_nfsd = 1;
 3711                 }
 3712                 NFSUNLOCKV4ROOTMUTEX();
 3713                 error = 0;
 3714         } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) {
 3715                 NFSLOCKV4ROOTMUTEX();
 3716                 if (suspend_nfsd != 0) {
 3717                         nfsv4_unlock(&nfsd_suspend_lock, 0);
 3718                         suspend_nfsd = 0;
 3719                 }
 3720                 NFSUNLOCKV4ROOTMUTEX();
 3721                 error = 0;
 3722         }
 3723 
 3724         NFSEXITCODE(error);
 3725         return (error);
 3726 }
 3727 
 3728 /*
 3729  * Check exports.
 3730  * Returns 0 if ok, 1 otherwise.
 3731  */
 3732 int
 3733 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
 3734 {
 3735         int i;
 3736 
 3737         /*
 3738          * This seems odd, but allow the case where the security flavor
 3739          * list is empty. This happens when NFSv4 is traversing non-exported
 3740          * file systems. Exported file systems should always have a non-empty
 3741          * security flavor list.
 3742          */
 3743         if (exp->nes_numsecflavor == 0)
 3744                 return (0);
 3745 
 3746         for (i = 0; i < exp->nes_numsecflavor; i++) {
 3747                 /*
 3748                  * The tests for privacy and integrity must be first,
 3749                  * since ND_GSS is set for everything but AUTH_SYS.
 3750                  */
 3751                 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
 3752                     (nd->nd_flag & ND_GSSPRIVACY))
 3753                         return (0);
 3754                 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
 3755                     (nd->nd_flag & ND_GSSINTEGRITY))
 3756                         return (0);
 3757                 if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
 3758                     (nd->nd_flag & ND_GSS))
 3759                         return (0);
 3760                 if (exp->nes_secflavors[i] == AUTH_SYS &&
 3761                     (nd->nd_flag & ND_GSS) == 0)
 3762                         return (0);
 3763         }
 3764         return (1);
 3765 }
 3766 
 3767 /*
 3768  * Calculate a hash value for the fid in a file handle.
 3769  */
 3770 uint32_t
 3771 nfsrv_hashfh(fhandle_t *fhp)
 3772 {
 3773         uint32_t hashval;
 3774 
 3775         hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
 3776         return (hashval);
 3777 }
 3778 
 3779 /*
 3780  * Calculate a hash value for the sessionid.
 3781  */
 3782 uint32_t
 3783 nfsrv_hashsessionid(uint8_t *sessionid)
 3784 {
 3785         uint32_t hashval;
 3786 
 3787         hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0);
 3788         return (hashval);
 3789 }
 3790 
 3791 /*
 3792  * Signal the userland master nfsd to backup the stable restart file.
 3793  */
 3794 void
 3795 nfsrv_backupstable(void)
 3796 {
 3797         struct proc *procp;
 3798 
 3799         if (nfsd_master_proc != NULL) {
 3800                 procp = pfind(nfsd_master_pid);
 3801                 /* Try to make sure it is the correct process. */
 3802                 if (procp == nfsd_master_proc &&
 3803                     procp->p_stats->p_start.tv_sec ==
 3804                     nfsd_master_start.tv_sec &&
 3805                     procp->p_stats->p_start.tv_usec ==
 3806                     nfsd_master_start.tv_usec &&
 3807                     strcmp(procp->p_comm, nfsd_master_comm) == 0)
 3808                         kern_psignal(procp, SIGUSR2);
 3809                 else
 3810                         nfsd_master_proc = NULL;
 3811 
 3812                 if (procp != NULL)
 3813                         PROC_UNLOCK(procp);
 3814         }
 3815 }
 3816 
 3817 /*
 3818  * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror.
 3819  * The arguments are in a structure, so that they can be passed through
 3820  * taskqueue for a kernel process to execute this function.
 3821  */
 3822 struct nfsrvdscreate {
 3823         int                     done;
 3824         int                     inprog;
 3825         struct task             tsk;
 3826         struct ucred            *tcred;
 3827         struct vnode            *dvp;
 3828         NFSPROC_T               *p;
 3829         struct pnfsdsfile       *pf;
 3830         int                     err;
 3831         fhandle_t               fh;
 3832         struct vattr            va;
 3833         struct vattr            createva;
 3834 };
 3835 
 3836 int
 3837 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap,
 3838     fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa,
 3839     char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp)
 3840 {
 3841         struct vnode *nvp;
 3842         struct nameidata named;
 3843         struct vattr va;
 3844         char *bufp;
 3845         u_long *hashp;
 3846         struct nfsnode *np;
 3847         struct nfsmount *nmp;
 3848         int error;
 3849 
 3850         NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE,
 3851             LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE);
 3852         nfsvno_setpathbuf(&named, &bufp, &hashp);
 3853         named.ni_cnd.cn_lkflags = LK_EXCLUSIVE;
 3854         named.ni_cnd.cn_thread = p;
 3855         named.ni_cnd.cn_nameptr = bufp;
 3856         if (fnamep != NULL) {
 3857                 strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1);
 3858                 named.ni_cnd.cn_namelen = strlen(bufp);
 3859         } else
 3860                 named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp);
 3861         NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp);
 3862 
 3863         /* Create the date file in the DS mount. */
 3864         error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
 3865         if (error == 0) {
 3866                 error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap);
 3867                 NFSVOPUNLOCK(dvp, 0);
 3868                 if (error == 0) {
 3869                         /* Set the ownership of the file. */
 3870                         error = VOP_SETATTR(nvp, nvap, tcred);
 3871                         NFSD_DEBUG(4, "nfsrv_dscreate:"
 3872                             " setattr-uid=%d\n", error);
 3873                         if (error != 0)
 3874                                 vput(nvp);
 3875                 }
 3876                 if (error != 0)
 3877                         printf("pNFS: pnfscreate failed=%d\n", error);
 3878         } else
 3879                 printf("pNFS: pnfscreate vnlock=%d\n", error);
 3880         if (error == 0) {
 3881                 np = VTONFS(nvp);
 3882                 nmp = VFSTONFS(nvp->v_mount);
 3883                 if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs")
 3884                     != 0 || nmp->nm_nam->sa_len > sizeof(
 3885                     struct sockaddr_in6) ||
 3886                     np->n_fhp->nfh_len != NFSX_MYFH) {
 3887                         printf("Bad DS file: fstype=%s salen=%d"
 3888                             " fhlen=%d\n",
 3889                             nvp->v_mount->mnt_vfc->vfc_name,
 3890                             nmp->nm_nam->sa_len, np->n_fhp->nfh_len);
 3891                         error = ENOENT;
 3892                 }
 3893 
 3894                 /* Set extattrs for the DS on the MDS file. */
 3895                 if (error == 0) {
 3896                         if (dsa != NULL) {
 3897                                 error = VOP_GETATTR(nvp, &va, tcred);
 3898                                 if (error == 0) {
 3899                                         dsa->dsa_filerev = va.va_filerev;
 3900                                         dsa->dsa_size = va.va_size;
 3901                                         dsa->dsa_atime = va.va_atime;
 3902                                         dsa->dsa_mtime = va.va_mtime;
 3903                                         dsa->dsa_bytes = va.va_bytes;
 3904                                 }
 3905                         }
 3906                         if (error == 0) {
 3907                                 NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh,
 3908                                     NFSX_MYFH);
 3909                                 NFSBCOPY(nmp->nm_nam, &pf->dsf_sin,
 3910                                     nmp->nm_nam->sa_len);
 3911                                 NFSBCOPY(named.ni_cnd.cn_nameptr,
 3912                                     pf->dsf_filename,
 3913                                     sizeof(pf->dsf_filename));
 3914                         }
 3915                 } else
 3916                         printf("pNFS: pnfscreate can't get DS"
 3917                             " attr=%d\n", error);
 3918                 if (nvpp != NULL && error == 0)
 3919                         *nvpp = nvp;
 3920                 else
 3921                         vput(nvp);
 3922         }
 3923         nfsvno_relpathbuf(&named);
 3924         return (error);
 3925 }
 3926 
 3927 /*
 3928  * Start up the thread that will execute nfsrv_dscreate().
 3929  */
 3930 static void
 3931 start_dscreate(void *arg, int pending)
 3932 {
 3933         struct nfsrvdscreate *dsc;
 3934 
 3935         dsc = (struct nfsrvdscreate *)arg;
 3936         dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh,
 3937             dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL);
 3938         dsc->done = 1;
 3939         NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err);
 3940 }
 3941 
 3942 /*
 3943  * Create a pNFS data file on the Data Server(s).
 3944  */
 3945 static void
 3946 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
 3947     NFSPROC_T *p)
 3948 {
 3949         struct nfsrvdscreate *dsc, *tdsc;
 3950         struct nfsdevice *ds, *tds, *fds;
 3951         struct mount *mp;
 3952         struct pnfsdsfile *pf, *tpf;
 3953         struct pnfsdsattr dsattr;
 3954         struct vattr va;
 3955         struct vnode *dvp[NFSDEV_MAXMIRRORS];
 3956         struct nfsmount *nmp;
 3957         fhandle_t fh;
 3958         uid_t vauid;
 3959         gid_t vagid;
 3960         u_short vamode;
 3961         struct ucred *tcred;
 3962         int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret;
 3963         int failpos, timo;
 3964 
 3965         /* Get a DS server directory in a round-robin order. */
 3966         mirrorcnt = 1;
 3967         mp = vp->v_mount;
 3968         ds = fds = NULL;
 3969         NFSDDSLOCK();
 3970         /*
 3971          * Search for the first entry that handles this MDS fs, but use the
 3972          * first entry for all MDS fs's otherwise.
 3973          */
 3974         TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) {
 3975                 if (tds->nfsdev_nmp != NULL) {
 3976                         if (tds->nfsdev_mdsisset == 0 && ds == NULL)
 3977                                 ds = tds;
 3978                         else if (tds->nfsdev_mdsisset != 0 && fsidcmp(
 3979                             &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) {
 3980                                 ds = fds = tds;
 3981                                 break;
 3982                         }
 3983                 }
 3984         }
 3985         if (ds == NULL) {
 3986                 NFSDDSUNLOCK();
 3987                 NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n");
 3988                 return;
 3989         }
 3990         i = dsdir[0] = ds->nfsdev_nextdir;
 3991         ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize;
 3992         dvp[0] = ds->nfsdev_dsdir[i];
 3993         tds = TAILQ_NEXT(ds, nfsdev_list);
 3994         if (nfsrv_maxpnfsmirror > 1 && tds != NULL) {
 3995                 TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) {
 3996                         if (tds->nfsdev_nmp != NULL &&
 3997                             ((tds->nfsdev_mdsisset == 0 && fds == NULL) ||
 3998                              (tds->nfsdev_mdsisset != 0 && fds != NULL &&
 3999                               fsidcmp(&mp->mnt_stat.f_fsid,
 4000                               &tds->nfsdev_mdsfsid) == 0))) {
 4001                                 dsdir[mirrorcnt] = i;
 4002                                 dvp[mirrorcnt] = tds->nfsdev_dsdir[i];
 4003                                 mirrorcnt++;
 4004                                 if (mirrorcnt >= nfsrv_maxpnfsmirror)
 4005                                         break;
 4006                         }
 4007                 }
 4008         }
 4009         /* Put at end of list to implement round-robin usage. */
 4010         TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
 4011         TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
 4012         NFSDDSUNLOCK();
 4013         dsc = NULL;
 4014         if (mirrorcnt > 1)
 4015                 tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP,
 4016                     M_WAITOK | M_ZERO);
 4017         tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK |
 4018             M_ZERO);
 4019 
 4020         error = nfsvno_getfh(vp, &fh, p);
 4021         if (error == 0)
 4022                 error = VOP_GETATTR(vp, &va, cred);
 4023         if (error == 0) {
 4024                 /* Set the attributes for "vp" to Setattr the DS vp. */
 4025                 vauid = va.va_uid;
 4026                 vagid = va.va_gid;
 4027                 vamode = va.va_mode;
 4028                 VATTR_NULL(&va);
 4029                 va.va_uid = vauid;
 4030                 va.va_gid = vagid;
 4031                 va.va_mode = vamode;
 4032                 va.va_size = 0;
 4033         } else
 4034                 printf("pNFS: pnfscreate getfh+attr=%d\n", error);
 4035 
 4036         NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid,
 4037             cred->cr_gid);
 4038         /* Make data file name based on FH. */
 4039         tcred = newnfs_getcred();
 4040 
 4041         /*
 4042          * Create the file on each DS mirror, using kernel process(es) for the
 4043          * additional mirrors.
 4044          */
 4045         failpos = -1;
 4046         for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) {
 4047                 tpf->dsf_dir = dsdir[i];
 4048                 tdsc->tcred = tcred;
 4049                 tdsc->p = p;
 4050                 tdsc->pf = tpf;
 4051                 tdsc->createva = *vap;
 4052                 NFSBCOPY(&fh, &tdsc->fh, sizeof(fh));
 4053                 tdsc->va = va;
 4054                 tdsc->dvp = dvp[i];
 4055                 tdsc->done = 0;
 4056                 tdsc->inprog = 0;
 4057                 tdsc->err = 0;
 4058                 ret = EIO;
 4059                 if (nfs_pnfsiothreads != 0) {
 4060                         ret = nfs_pnfsio(start_dscreate, tdsc);
 4061                         NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret);
 4062                 }
 4063                 if (ret != 0) {
 4064                         ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL,
 4065                             NULL, tcred, p, NULL);
 4066                         if (ret != 0) {
 4067                                 KASSERT(error == 0, ("nfsrv_dscreate err=%d",
 4068                                     error));
 4069                                 if (failpos == -1 && nfsds_failerr(ret))
 4070                                         failpos = i;
 4071                                 else
 4072                                         error = ret;
 4073                         }
 4074                 }
 4075         }
 4076         if (error == 0) {
 4077                 tpf->dsf_dir = dsdir[mirrorcnt - 1];
 4078                 error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf,
 4079                     &dsattr, NULL, tcred, p, NULL);
 4080                 if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) {
 4081                         failpos = mirrorcnt - 1;
 4082                         error = 0;
 4083                 }
 4084         }
 4085         timo = hz / 50;         /* Wait for 20msec. */
 4086         if (timo < 1)
 4087                 timo = 1;
 4088         /* Wait for kernel task(s) to complete. */
 4089         for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) {
 4090                 while (tdsc->inprog != 0 && tdsc->done == 0)
 4091                         tsleep(&tdsc->tsk, PVFS, "srvdcr", timo);
 4092                 if (tdsc->err != 0) {
 4093                         if (failpos == -1 && nfsds_failerr(tdsc->err))
 4094                                 failpos = i;
 4095                         else if (error == 0)
 4096                                 error = tdsc->err;
 4097                 }
 4098         }
 4099 
 4100         /*
 4101          * If failpos has been set, that mirror has failed, so it needs
 4102          * to be disabled.
 4103          */
 4104         if (failpos >= 0) {
 4105                 nmp = VFSTONFS(dvp[failpos]->v_mount);
 4106                 NFSLOCKMNT(nmp);
 4107                 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 4108                      NFSMNTP_CANCELRPCS)) == 0) {
 4109                         nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 4110                         NFSUNLOCKMNT(nmp);
 4111                         ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
 4112                         NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos,
 4113                             ds);
 4114                         if (ds != NULL)
 4115                                 nfsrv_killrpcs(nmp);
 4116                         NFSLOCKMNT(nmp);
 4117                         nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 4118                         wakeup(nmp);
 4119                 }
 4120                 NFSUNLOCKMNT(nmp);
 4121         }
 4122 
 4123         NFSFREECRED(tcred);
 4124         if (error == 0) {
 4125                 ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp");
 4126 
 4127                 NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n",
 4128                     mirrorcnt, nfsrv_maxpnfsmirror);
 4129                 /*
 4130                  * For all mirrors that couldn't be created, fill in the
 4131                  * *pf structure, but with an IP address == 0.0.0.0.
 4132                  */
 4133                 tpf = pf + mirrorcnt;
 4134                 for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) {
 4135                         *tpf = *pf;
 4136                         tpf->dsf_sin.sin_family = AF_INET;
 4137                         tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in);
 4138                         tpf->dsf_sin.sin_addr.s_addr = 0;
 4139                         tpf->dsf_sin.sin_port = 0;
 4140                 }
 4141 
 4142                 error = vn_extattr_set(vp, IO_NODELOCKED,
 4143                     EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
 4144                     sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p);
 4145                 if (error == 0)
 4146                         error = vn_extattr_set(vp, IO_NODELOCKED,
 4147                             EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
 4148                             sizeof(dsattr), (char *)&dsattr, p);
 4149                 if (error != 0)
 4150                         printf("pNFS: pnfscreate setextattr=%d\n",
 4151                             error);
 4152         } else
 4153                 printf("pNFS: pnfscreate=%d\n", error);
 4154         free(pf, M_TEMP);
 4155         free(dsc, M_TEMP);
 4156 }
 4157 
 4158 /*
 4159  * Get the information needed to remove the pNFS Data Server file from the
 4160  * Metadata file.  Upon success, ddvp is set non-NULL to the locked
 4161  * DS directory vnode.  The caller must unlock *ddvp when done with it.
 4162  */
 4163 static void
 4164 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp,
 4165     int *mirrorcntp, char *fname, fhandle_t *fhp)
 4166 {
 4167         struct vattr va;
 4168         struct ucred *tcred;
 4169         char *buf;
 4170         int buflen, error;
 4171 
 4172         dvpp[0] = NULL;
 4173         /* If not an exported regular file or not a pNFS server, just return. */
 4174         if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
 4175             nfsrv_devidcnt == 0)
 4176                 return;
 4177 
 4178         /* Check to see if this is the last hard link. */
 4179         tcred = newnfs_getcred();
 4180         error = VOP_GETATTR(vp, &va, tcred);
 4181         NFSFREECRED(tcred);
 4182         if (error != 0) {
 4183                 printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error);
 4184                 return;
 4185         }
 4186         if (va.va_nlink > 1)
 4187                 return;
 4188 
 4189         error = nfsvno_getfh(vp, fhp, p);
 4190         if (error != 0) {
 4191                 printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error);
 4192                 return;
 4193         }
 4194 
 4195         buflen = 1024;
 4196         buf = malloc(buflen, M_TEMP, M_WAITOK);
 4197         /* Get the directory vnode for the DS mount and the file handle. */
 4198         error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp,
 4199             NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL);
 4200         free(buf, M_TEMP);
 4201         if (error != 0)
 4202                 printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error);
 4203 }
 4204 
 4205 /*
 4206  * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror.
 4207  * The arguments are in a structure, so that they can be passed through
 4208  * taskqueue for a kernel process to execute this function.
 4209  */
 4210 struct nfsrvdsremove {
 4211         int                     done;
 4212         int                     inprog;
 4213         struct task             tsk;
 4214         struct ucred            *tcred;
 4215         struct vnode            *dvp;
 4216         NFSPROC_T               *p;
 4217         int                     err;
 4218         char                    fname[PNFS_FILENAME_LEN + 1];
 4219 };
 4220 
 4221 static int
 4222 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred,
 4223     NFSPROC_T *p)
 4224 {
 4225         struct nameidata named;
 4226         struct vnode *nvp;
 4227         char *bufp;
 4228         u_long *hashp;
 4229         int error;
 4230 
 4231         error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
 4232         if (error != 0)
 4233                 return (error);
 4234         named.ni_cnd.cn_nameiop = DELETE;
 4235         named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
 4236         named.ni_cnd.cn_cred = tcred;
 4237         named.ni_cnd.cn_thread = p;
 4238         named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME;
 4239         nfsvno_setpathbuf(&named, &bufp, &hashp);
 4240         named.ni_cnd.cn_nameptr = bufp;
 4241         named.ni_cnd.cn_namelen = strlen(fname);
 4242         strlcpy(bufp, fname, NAME_MAX);
 4243         NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp);
 4244         error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
 4245         NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error);
 4246         if (error == 0) {
 4247                 error = VOP_REMOVE(dvp, nvp, &named.ni_cnd);
 4248                 vput(nvp);
 4249         }
 4250         NFSVOPUNLOCK(dvp, 0);
 4251         nfsvno_relpathbuf(&named);
 4252         if (error != 0)
 4253                 printf("pNFS: nfsrv_pnfsremove failed=%d\n", error);
 4254         return (error);
 4255 }
 4256 
 4257 /*
 4258  * Start up the thread that will execute nfsrv_dsremove().
 4259  */
 4260 static void
 4261 start_dsremove(void *arg, int pending)
 4262 {
 4263         struct nfsrvdsremove *dsrm;
 4264 
 4265         dsrm = (struct nfsrvdsremove *)arg;
 4266         dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred,
 4267             dsrm->p);
 4268         dsrm->done = 1;
 4269         NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err);
 4270 }
 4271 
 4272 /*
 4273  * Remove a pNFS data file from a Data Server.
 4274  * nfsrv_pnfsremovesetup() must have been called before the MDS file was
 4275  * removed to set up the dvp and fill in the FH.
 4276  */
 4277 static void
 4278 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp,
 4279     NFSPROC_T *p)
 4280 {
 4281         struct ucred *tcred;
 4282         struct nfsrvdsremove *dsrm, *tdsrm;
 4283         struct nfsdevice *ds;
 4284         struct nfsmount *nmp;
 4285         int failpos, i, ret, timo;
 4286 
 4287         tcred = newnfs_getcred();
 4288         dsrm = NULL;
 4289         if (mirrorcnt > 1)
 4290                 dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK);
 4291         /*
 4292          * Remove the file on each DS mirror, using kernel process(es) for the
 4293          * additional mirrors.
 4294          */
 4295         failpos = -1;
 4296         for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) {
 4297                 tdsrm->tcred = tcred;
 4298                 tdsrm->p = p;
 4299                 tdsrm->dvp = dvp[i];
 4300                 strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1);
 4301                 tdsrm->inprog = 0;
 4302                 tdsrm->done = 0;
 4303                 tdsrm->err = 0;
 4304                 ret = EIO;
 4305                 if (nfs_pnfsiothreads != 0) {
 4306                         ret = nfs_pnfsio(start_dsremove, tdsrm);
 4307                         NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret);
 4308                 }
 4309                 if (ret != 0) {
 4310                         ret = nfsrv_dsremove(dvp[i], fname, tcred, p);
 4311                         if (failpos == -1 && nfsds_failerr(ret))
 4312                                 failpos = i;
 4313                 }
 4314         }
 4315         ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p);
 4316         if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret))
 4317                 failpos = mirrorcnt - 1;
 4318         timo = hz / 50;         /* Wait for 20msec. */
 4319         if (timo < 1)
 4320                 timo = 1;
 4321         /* Wait for kernel task(s) to complete. */
 4322         for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) {
 4323                 while (tdsrm->inprog != 0 && tdsrm->done == 0)
 4324                         tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo);
 4325                 if (failpos == -1 && nfsds_failerr(tdsrm->err))
 4326                         failpos = i;
 4327         }
 4328 
 4329         /*
 4330          * If failpos has been set, that mirror has failed, so it needs
 4331          * to be disabled.
 4332          */
 4333         if (failpos >= 0) {
 4334                 nmp = VFSTONFS(dvp[failpos]->v_mount);
 4335                 NFSLOCKMNT(nmp);
 4336                 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 4337                      NFSMNTP_CANCELRPCS)) == 0) {
 4338                         nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 4339                         NFSUNLOCKMNT(nmp);
 4340                         ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
 4341                         NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos,
 4342                             ds);
 4343                         if (ds != NULL)
 4344                                 nfsrv_killrpcs(nmp);
 4345                         NFSLOCKMNT(nmp);
 4346                         nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 4347                         wakeup(nmp);
 4348                 }
 4349                 NFSUNLOCKMNT(nmp);
 4350         }
 4351 
 4352         /* Get rid all layouts for the file. */
 4353         nfsrv_freefilelayouts(fhp);
 4354 
 4355         NFSFREECRED(tcred);
 4356         free(dsrm, M_TEMP);
 4357 }
 4358 
 4359 /*
 4360  * Generate a file name based on the file handle and put it in *bufp.
 4361  * Return the number of bytes generated.
 4362  */
 4363 static int
 4364 nfsrv_putfhname(fhandle_t *fhp, char *bufp)
 4365 {
 4366         int i;
 4367         uint8_t *cp;
 4368         const uint8_t *hexdigits = "0123456789abcdef";
 4369 
 4370         cp = (uint8_t *)fhp;
 4371         for (i = 0; i < sizeof(*fhp); i++) {
 4372                 bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf];
 4373                 bufp[2 * i + 1] = hexdigits[*cp++ & 0xf];
 4374         }
 4375         bufp[2 * i] = '\0';
 4376         return (2 * i);
 4377 }
 4378 
 4379 /*
 4380  * Update the Metadata file's attributes from the DS file when a Read/Write
 4381  * layout is returned.
 4382  * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN
 4383  * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file.
 4384  */
 4385 int
 4386 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
 4387 {
 4388         struct ucred *tcred;
 4389         int error;
 4390 
 4391         /* Do this as root so that it won't fail with EACCES. */
 4392         tcred = newnfs_getcred();
 4393         error = nfsrv_proxyds(NULL, vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN,
 4394             NULL, NULL, NULL, nap, NULL);
 4395         NFSFREECRED(tcred);
 4396         return (error);
 4397 }
 4398 
 4399 /*
 4400  * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file.
 4401  */
 4402 static int
 4403 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred,
 4404     NFSPROC_T *p)
 4405 {
 4406         int error;
 4407 
 4408         error = nfsrv_proxyds(NULL, vp, 0, 0, cred, p, NFSPROC_SETACL,
 4409             NULL, NULL, NULL, NULL, aclp);
 4410         return (error);
 4411 }
 4412 
 4413 static int
 4414 nfsrv_proxyds(struct nfsrv_descript *nd, struct vnode *vp, off_t off, int cnt,
 4415     struct ucred *cred, struct thread *p, int ioproc, struct mbuf **mpp,
 4416     char *cp, struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp)
 4417 {
 4418         struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp;
 4419         fhandle_t fh[NFSDEV_MAXMIRRORS];
 4420         struct vnode *dvp[NFSDEV_MAXMIRRORS];
 4421         struct nfsdevice *ds;
 4422         struct pnfsdsattr dsattr;
 4423         struct opnfsdsattr odsattr;
 4424         char *buf;
 4425         int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt;
 4426 
 4427         NFSD_DEBUG(4, "in nfsrv_proxyds\n");
 4428         /*
 4429          * If not a regular file, not exported or not a pNFS server,
 4430          * just return ENOENT.
 4431          */
 4432         if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
 4433             nfsrv_devidcnt == 0)
 4434                 return (ENOENT);
 4435 
 4436         buflen = 1024;
 4437         buf = malloc(buflen, M_TEMP, M_WAITOK);
 4438         error = 0;
 4439 
 4440         /*
 4441          * For Getattr, get the Change attribute (va_filerev) and size (va_size)
 4442          * from the MetaData file's extended attribute.
 4443          */
 4444         if (ioproc == NFSPROC_GETATTR) {
 4445                 error = vn_extattr_get(vp, IO_NODELOCKED,
 4446                     EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf,
 4447                     p);
 4448                 if (error == 0) {
 4449                         if (buflen == sizeof(odsattr)) {
 4450                                 NFSBCOPY(buf, &odsattr, buflen);
 4451                                 nap->na_filerev = odsattr.dsa_filerev;
 4452                                 nap->na_size = odsattr.dsa_size;
 4453                                 nap->na_atime = odsattr.dsa_atime;
 4454                                 nap->na_mtime = odsattr.dsa_mtime;
 4455                                 /*
 4456                                  * Fake na_bytes by rounding up na_size.
 4457                                  * Since we don't know the block size, just
 4458                                  * use BLKDEV_IOSIZE.
 4459                                  */
 4460                                 nap->na_bytes = (odsattr.dsa_size +
 4461                                     BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1);
 4462                         } else if (buflen == sizeof(dsattr)) {
 4463                                 NFSBCOPY(buf, &dsattr, buflen);
 4464                                 nap->na_filerev = dsattr.dsa_filerev;
 4465                                 nap->na_size = dsattr.dsa_size;
 4466                                 nap->na_atime = dsattr.dsa_atime;
 4467                                 nap->na_mtime = dsattr.dsa_mtime;
 4468                                 nap->na_bytes = dsattr.dsa_bytes;
 4469                         } else
 4470                                 error = ENXIO;
 4471                 }
 4472                 if (error == 0) {
 4473                         /*
 4474                          * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr()
 4475                          * returns 0, just return now.  nfsrv_checkdsattr()
 4476                          * returns 0 if there is no Read/Write layout
 4477                          * plus either an Open/Write_access or Write
 4478                          * delegation issued to a client for the file.
 4479                          */
 4480                         if (nfsrv_pnfsgetdsattr == 0 ||
 4481                             nfsrv_checkdsattr(nd, vp, p) == 0) {
 4482                                 free(buf, M_TEMP);
 4483                                 return (error);
 4484                         }
 4485                 }
 4486 
 4487                 /*
 4488                  * Clear ENOATTR so the code below will attempt to do a
 4489                  * nfsrv_getattrdsrpc() to get the attributes and (re)create
 4490                  * the extended attribute.
 4491                  */
 4492                 if (error == ENOATTR)
 4493                         error = 0;
 4494         }
 4495 
 4496         origmircnt = -1;
 4497         trycnt = 0;
 4498 tryagain:
 4499         if (error == 0) {
 4500                 buflen = 1024;
 4501                 if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) ==
 4502                     LK_EXCLUSIVE)
 4503                         printf("nfsrv_proxyds: Readds vp exclusively locked\n");
 4504                 error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen,
 4505                     &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL,
 4506                     NULL, NULL);
 4507                 if (error == 0) {
 4508                         for (i = 0; i < mirrorcnt; i++)
 4509                                 nmp[i] = VFSTONFS(dvp[i]->v_mount);
 4510                 } else
 4511                         printf("pNFS: proxy getextattr sockaddr=%d\n", error);
 4512         } else
 4513                 printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error);
 4514         if (error == 0) {
 4515                 failpos = -1;
 4516                 if (origmircnt == -1)
 4517                         origmircnt = mirrorcnt;
 4518                 /*
 4519                  * If failpos is set to a mirror#, then that mirror has
 4520                  * failed and will be disabled. For Read and Getattr, the
 4521                  * function only tries one mirror, so if that mirror has
 4522                  * failed, it will need to be retried. As such, increment
 4523                  * tryitagain for these cases.
 4524                  * For Write, Setattr and Setacl, the function tries all
 4525                  * mirrors and will not return an error for the case where
 4526                  * one mirror has failed. For these cases, the functioning
 4527                  * mirror(s) will have been modified, so a retry isn't
 4528                  * necessary. These functions will set failpos for the
 4529                  * failed mirror#.
 4530                  */
 4531                 if (ioproc == NFSPROC_READDS) {
 4532                         error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0],
 4533                             mpp, mpp2);
 4534                         if (nfsds_failerr(error) && mirrorcnt > 1) {
 4535                                 /*
 4536                                  * Setting failpos will cause the mirror
 4537                                  * to be disabled and then a retry of this
 4538                                  * read is required.
 4539                                  */
 4540                                 failpos = 0;
 4541                                 error = 0;
 4542                                 trycnt++;
 4543                         }
 4544                 } else if (ioproc == NFSPROC_WRITEDS)
 4545                         error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp,
 4546                             &nmp[0], mirrorcnt, mpp, cp, &failpos);
 4547                 else if (ioproc == NFSPROC_SETATTR)
 4548                         error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0],
 4549                             mirrorcnt, nap, &failpos);
 4550                 else if (ioproc == NFSPROC_SETACL)
 4551                         error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0],
 4552                             mirrorcnt, aclp, &failpos);
 4553                 else {
 4554                         error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p,
 4555                             vp, nmp[mirrorcnt - 1], nap);
 4556                         if (nfsds_failerr(error) && mirrorcnt > 1) {
 4557                                 /*
 4558                                  * Setting failpos will cause the mirror
 4559                                  * to be disabled and then a retry of this
 4560                                  * getattr is required.
 4561                                  */
 4562                                 failpos = mirrorcnt - 1;
 4563                                 error = 0;
 4564                                 trycnt++;
 4565                         }
 4566                 }
 4567                 ds = NULL;
 4568                 if (failpos >= 0) {
 4569                         failnmp = nmp[failpos];
 4570                         NFSLOCKMNT(failnmp);
 4571                         if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM |
 4572                              NFSMNTP_CANCELRPCS)) == 0) {
 4573                                 failnmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 4574                                 NFSUNLOCKMNT(failnmp);
 4575                                 ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER,
 4576                                     failnmp, p);
 4577                                 NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n",
 4578                                     failpos, ds);
 4579                                 if (ds != NULL)
 4580                                         nfsrv_killrpcs(failnmp);
 4581                                 NFSLOCKMNT(failnmp);
 4582                                 failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 4583                                 wakeup(failnmp);
 4584                         }
 4585                         NFSUNLOCKMNT(failnmp);
 4586                 }
 4587                 for (i = 0; i < mirrorcnt; i++)
 4588                         NFSVOPUNLOCK(dvp[i], 0);
 4589                 NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error,
 4590                     trycnt);
 4591                 /* Try the Read/Getattr again if a mirror was deleted. */
 4592                 if (ds != NULL && trycnt > 0 && trycnt < origmircnt)
 4593                         goto tryagain;
 4594         } else {
 4595                 /* Return ENOENT for any Extended Attribute error. */
 4596                 error = ENOENT;
 4597         }
 4598         free(buf, M_TEMP);
 4599         NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error);
 4600         return (error);
 4601 }
 4602 
 4603 /*
 4604  * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended
 4605  * attribute.
 4606  * newnmpp - If it points to a non-NULL nmp, that is the destination and needs
 4607  *           to be checked.  If it points to a NULL nmp, then it returns
 4608  *           a suitable destination.
 4609  * curnmp - If non-NULL, it is the source mount for the copy.
 4610  */
 4611 int
 4612 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp,
 4613     int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp,
 4614     char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp,
 4615     struct nfsmount *curnmp, int *ippos, int *dsdirp)
 4616 {
 4617         struct vnode *dvp, *nvp, **tdvpp;
 4618         struct mount *mp;
 4619         struct nfsmount *nmp, *newnmp;
 4620         struct sockaddr *sad;
 4621         struct sockaddr_in *sin;
 4622         struct nfsdevice *ds, *tds, *fndds;
 4623         struct pnfsdsfile *pf;
 4624         uint32_t dsdir;
 4625         int error, fhiszero, fnd, gotone, i, mirrorcnt;
 4626 
 4627         ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp");
 4628         *mirrorcntp = 1;
 4629         tdvpp = dvpp;
 4630         if (nvpp != NULL)
 4631                 *nvpp = NULL;
 4632         if (dvpp != NULL)
 4633                 *dvpp = NULL;
 4634         if (ippos != NULL)
 4635                 *ippos = -1;
 4636         if (newnmpp != NULL)
 4637                 newnmp = *newnmpp;
 4638         else
 4639                 newnmp = NULL;
 4640         mp = vp->v_mount;
 4641         error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
 4642             "pnfsd.dsfile", buflenp, buf, p);
 4643         mirrorcnt = *buflenp / sizeof(*pf);
 4644         if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS ||
 4645             *buflenp != sizeof(*pf) * mirrorcnt))
 4646                 error = ENOATTR;
 4647 
 4648         pf = (struct pnfsdsfile *)buf;
 4649         /* If curnmp != NULL, check for a match in the mirror list. */
 4650         if (curnmp != NULL && error == 0) {
 4651                 fnd = 0;
 4652                 for (i = 0; i < mirrorcnt; i++, pf++) {
 4653                         sad = (struct sockaddr *)&pf->dsf_sin;
 4654                         if (nfsaddr2_match(sad, curnmp->nm_nam)) {
 4655                                 if (ippos != NULL)
 4656                                         *ippos = i;
 4657                                 fnd = 1;
 4658                                 break;
 4659                         }
 4660                 }
 4661                 if (fnd == 0)
 4662                         error = ENXIO;
 4663         }
 4664 
 4665         gotone = 0;
 4666         pf = (struct pnfsdsfile *)buf;
 4667         NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt,
 4668             error);
 4669         for (i = 0; i < mirrorcnt && error == 0; i++, pf++) {
 4670                 fhiszero = 0;
 4671                 sad = (struct sockaddr *)&pf->dsf_sin;
 4672                 sin = &pf->dsf_sin;
 4673                 dsdir = pf->dsf_dir;
 4674                 if (dsdir >= nfsrv_dsdirsize) {
 4675                         printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir);
 4676                         error = ENOATTR;
 4677                 } else if (nvpp != NULL && newnmp != NULL &&
 4678                     nfsaddr2_match(sad, newnmp->nm_nam))
 4679                         error = EEXIST;
 4680                 if (error == 0) {
 4681                         if (ippos != NULL && curnmp == NULL &&
 4682                             sad->sa_family == AF_INET &&
 4683                             sin->sin_addr.s_addr == 0)
 4684                                 *ippos = i;
 4685                         if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0)
 4686                                 fhiszero = 1;
 4687                         /* Use the socket address to find the mount point. */
 4688                         fndds = NULL;
 4689                         NFSDDSLOCK();
 4690                         /* Find a match for the IP address. */
 4691                         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 4692                                 if (ds->nfsdev_nmp != NULL) {
 4693                                         dvp = ds->nfsdev_dvp;
 4694                                         nmp = VFSTONFS(dvp->v_mount);
 4695                                         if (nmp != ds->nfsdev_nmp)
 4696                                                 printf("different2 nmp %p %p\n",
 4697                                                     nmp, ds->nfsdev_nmp);
 4698                                         if (nfsaddr2_match(sad, nmp->nm_nam)) {
 4699                                                 fndds = ds;
 4700                                                 break;
 4701                                         }
 4702                                 }
 4703                         }
 4704                         if (fndds != NULL && newnmpp != NULL &&
 4705                             newnmp == NULL) {
 4706                                 /* Search for a place to make a mirror copy. */
 4707                                 TAILQ_FOREACH(tds, &nfsrv_devidhead,
 4708                                     nfsdev_list) {
 4709                                         if (tds->nfsdev_nmp != NULL &&
 4710                                             fndds != tds &&
 4711                                             ((tds->nfsdev_mdsisset == 0 &&
 4712                                               fndds->nfsdev_mdsisset == 0) ||
 4713                                              (tds->nfsdev_mdsisset != 0 &&
 4714                                               fndds->nfsdev_mdsisset != 0 &&
 4715                                               fsidcmp(&tds->nfsdev_mdsfsid,
 4716                                               &mp->mnt_stat.f_fsid) == 0))) {
 4717                                                 *newnmpp = tds->nfsdev_nmp;
 4718                                                 break;
 4719                                         }
 4720                                 }
 4721                                 if (tds != NULL) {
 4722                                         /*
 4723                                          * Move this entry to the end of the
 4724                                          * list, so it won't be selected as
 4725                                          * easily the next time.
 4726                                          */
 4727                                         TAILQ_REMOVE(&nfsrv_devidhead, tds,
 4728                                             nfsdev_list);
 4729                                         TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds,
 4730                                             nfsdev_list);
 4731                                 }
 4732                         }
 4733                         NFSDDSUNLOCK();
 4734                         if (fndds != NULL) {
 4735                                 dvp = fndds->nfsdev_dsdir[dsdir];
 4736                                 if (lktype != 0 || fhiszero != 0 ||
 4737                                     (nvpp != NULL && *nvpp == NULL)) {
 4738                                         if (fhiszero != 0)
 4739                                                 error = vn_lock(dvp,
 4740                                                     LK_EXCLUSIVE);
 4741                                         else if (lktype != 0)
 4742                                                 error = vn_lock(dvp, lktype);
 4743                                         else
 4744                                                 error = vn_lock(dvp, LK_SHARED);
 4745                                         /*
 4746                                          * If the file handle is all 0's, try to
 4747                                          * do a Lookup against the DS to acquire
 4748                                          * it.
 4749                                          * If dvpp == NULL or the Lookup fails,
 4750                                          * unlock dvp after the call.
 4751                                          */
 4752                                         if (error == 0 && (fhiszero != 0 ||
 4753                                             (nvpp != NULL && *nvpp == NULL))) {
 4754                                                 error = nfsrv_pnfslookupds(vp,
 4755                                                     dvp, pf, &nvp, p);
 4756                                                 if (error == 0) {
 4757                                                         if (fhiszero != 0)
 4758                                                                 nfsrv_pnfssetfh(
 4759                                                                     vp, pf,
 4760                                                                     devid,
 4761                                                                     fnamep,
 4762                                                                     nvp, p);
 4763                                                         if (nvpp != NULL &&
 4764                                                             *nvpp == NULL) {
 4765                                                                 *nvpp = nvp;
 4766                                                                 *dsdirp = dsdir;
 4767                                                         } else
 4768                                                                 vput(nvp);
 4769                                                 }
 4770                                                 if (error != 0 || lktype == 0)
 4771                                                         NFSVOPUNLOCK(dvp, 0);
 4772                                         }
 4773                                 }
 4774                                 if (error == 0) {
 4775                                         gotone++;
 4776                                         NFSD_DEBUG(4, "gotone=%d\n", gotone);
 4777                                         if (devid != NULL) {
 4778                                                 NFSBCOPY(fndds->nfsdev_deviceid,
 4779                                                     devid, NFSX_V4DEVICEID);
 4780                                                 devid += NFSX_V4DEVICEID;
 4781                                         }
 4782                                         if (dvpp != NULL)
 4783                                                 *tdvpp++ = dvp;
 4784                                         if (fhp != NULL)
 4785                                                 NFSBCOPY(&pf->dsf_fh, fhp++,
 4786                                                     NFSX_MYFH);
 4787                                         if (fnamep != NULL && gotone == 1)
 4788                                                 strlcpy(fnamep,
 4789                                                     pf->dsf_filename,
 4790                                                     sizeof(pf->dsf_filename));
 4791                                 } else
 4792                                         NFSD_DEBUG(4, "nfsrv_dsgetsockmnt "
 4793                                             "err=%d\n", error);
 4794                         }
 4795                 }
 4796         }
 4797         if (error == 0 && gotone == 0)
 4798                 error = ENOENT;
 4799 
 4800         NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone,
 4801             error);
 4802         if (error == 0)
 4803                 *mirrorcntp = gotone;
 4804         else {
 4805                 if (gotone > 0 && dvpp != NULL) {
 4806                         /*
 4807                          * If the error didn't occur on the first one and
 4808                          * dvpp != NULL, the one(s) prior to the failure will
 4809                          * have locked dvp's that need to be unlocked.
 4810                          */
 4811                         for (i = 0; i < gotone; i++) {
 4812                                 NFSVOPUNLOCK(*dvpp, 0);
 4813                                 *dvpp++ = NULL;
 4814                         }
 4815                 }
 4816                 /*
 4817                  * If it found the vnode to be copied from before a failure,
 4818                  * it needs to be vput()'d.
 4819                  */
 4820                 if (nvpp != NULL && *nvpp != NULL) {
 4821                         vput(*nvpp);
 4822                         *nvpp = NULL;
 4823                 }
 4824         }
 4825         return (error);
 4826 }
 4827 
 4828 /*
 4829  * Set the extended attribute for the Change attribute.
 4830  */
 4831 static int
 4832 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
 4833 {
 4834         struct pnfsdsattr dsattr;
 4835         int error;
 4836 
 4837         ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp");
 4838         dsattr.dsa_filerev = nap->na_filerev;
 4839         dsattr.dsa_size = nap->na_size;
 4840         dsattr.dsa_atime = nap->na_atime;
 4841         dsattr.dsa_mtime = nap->na_mtime;
 4842         dsattr.dsa_bytes = nap->na_bytes;
 4843         error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
 4844             "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p);
 4845         if (error != 0)
 4846                 printf("pNFS: setextattr=%d\n", error);
 4847         return (error);
 4848 }
 4849 
 4850 static int
 4851 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
 4852     NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp)
 4853 {
 4854         uint32_t *tl;
 4855         struct nfsrv_descript *nd;
 4856         nfsv4stateid_t st;
 4857         struct mbuf *m, *m2;
 4858         int error = 0, retlen, tlen, trimlen;
 4859 
 4860         NFSD_DEBUG(4, "in nfsrv_readdsrpc\n");
 4861         nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 4862         *mpp = NULL;
 4863         /*
 4864          * Use a stateid where other is an alternating 01010 pattern and
 4865          * seqid is 0xffffffff.  This value is not defined as special by
 4866          * the RFC and is used by the FreeBSD NFS server to indicate an
 4867          * MDS->DS proxy operation.
 4868          */
 4869         st.other[0] = 0x55555555;
 4870         st.other[1] = 0x55555555;
 4871         st.other[2] = 0x55555555;
 4872         st.seqid = 0xffffffff;
 4873         nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp),
 4874             NULL, NULL, 0, 0);
 4875         nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 4876         NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
 4877         txdr_hyper(off, tl);
 4878         *(tl + 2) = txdr_unsigned(len);
 4879         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 4880             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 4881         if (error != 0) {
 4882                 free(nd, M_TEMP);
 4883                 return (error);
 4884         }
 4885         if (nd->nd_repstat == 0) {
 4886                 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 4887                 NFSM_STRSIZ(retlen, len);
 4888                 if (retlen > 0) {
 4889                         /* Trim off the pre-data XDR from the mbuf chain. */
 4890                         m = nd->nd_mrep;
 4891                         while (m != NULL && m != nd->nd_md) {
 4892                                 if (m->m_next == nd->nd_md) {
 4893                                         m->m_next = NULL;
 4894                                         m_freem(nd->nd_mrep);
 4895                                         nd->nd_mrep = m = nd->nd_md;
 4896                                 } else
 4897                                         m = m->m_next;
 4898                         }
 4899                         if (m == NULL) {
 4900                                 printf("nfsrv_readdsrpc: busted mbuf list\n");
 4901                                 error = ENOENT;
 4902                                 goto nfsmout;
 4903                         }
 4904         
 4905                         /*
 4906                          * Now, adjust first mbuf so that any XDR before the
 4907                          * read data is skipped over.
 4908                          */
 4909                         trimlen = nd->nd_dpos - mtod(m, char *);
 4910                         if (trimlen > 0) {
 4911                                 m->m_len -= trimlen;
 4912                                 NFSM_DATAP(m, trimlen);
 4913                         }
 4914         
 4915                         /*
 4916                          * Truncate the mbuf chain at retlen bytes of data,
 4917                          * plus XDR padding that brings the length up to a
 4918                          * multiple of 4.
 4919                          */
 4920                         tlen = NFSM_RNDUP(retlen);
 4921                         do {
 4922                                 if (m->m_len >= tlen) {
 4923                                         m->m_len = tlen;
 4924                                         tlen = 0;
 4925                                         m2 = m->m_next;
 4926                                         m->m_next = NULL;
 4927                                         m_freem(m2);
 4928                                         break;
 4929                                 }
 4930                                 tlen -= m->m_len;
 4931                                 m = m->m_next;
 4932                         } while (m != NULL);
 4933                         if (tlen > 0) {
 4934                                 printf("nfsrv_readdsrpc: busted mbuf list\n");
 4935                                 error = ENOENT;
 4936                                 goto nfsmout;
 4937                         }
 4938                         *mpp = nd->nd_mrep;
 4939                         *mpendp = m;
 4940                         nd->nd_mrep = NULL;
 4941                 }
 4942         } else
 4943                 error = nd->nd_repstat;
 4944 nfsmout:
 4945         /* If nd->nd_mrep is already NULL, this is a no-op. */
 4946         m_freem(nd->nd_mrep);
 4947         free(nd, M_TEMP);
 4948         NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error);
 4949         return (error);
 4950 }
 4951 
 4952 /*
 4953  * Do a write RPC on a DS data file, using this structure for the arguments,
 4954  * so that this function can be executed by a separate kernel process.
 4955  */
 4956 struct nfsrvwritedsdorpc {
 4957         int                     done;
 4958         int                     inprog;
 4959         struct task             tsk;
 4960         fhandle_t               fh;
 4961         off_t                   off;
 4962         int                     len;
 4963         struct nfsmount         *nmp;
 4964         struct ucred            *cred;
 4965         NFSPROC_T               *p;
 4966         struct mbuf             *m;
 4967         int                     err;
 4968 };
 4969 
 4970 static int
 4971 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len,
 4972     struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p)
 4973 {
 4974         uint32_t *tl;
 4975         struct nfsrv_descript *nd;
 4976         nfsattrbit_t attrbits;
 4977         nfsv4stateid_t st;
 4978         int commit, error, retlen;
 4979 
 4980         nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 4981         nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp,
 4982             sizeof(fhandle_t), NULL, NULL, 0, 0);
 4983 
 4984         /*
 4985          * Use a stateid where other is an alternating 01010 pattern and
 4986          * seqid is 0xffffffff.  This value is not defined as special by
 4987          * the RFC and is used by the FreeBSD NFS server to indicate an
 4988          * MDS->DS proxy operation.
 4989          */
 4990         st.other[0] = 0x55555555;
 4991         st.other[1] = 0x55555555;
 4992         st.other[2] = 0x55555555;
 4993         st.seqid = 0xffffffff;
 4994         nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 4995         NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
 4996         txdr_hyper(off, tl);
 4997         tl += 2;
 4998         /*
 4999          * Do all writes FileSync, since the server doesn't hold onto dirty
 5000          * buffers.  Since clients should be accessing the DS servers directly
 5001          * using the pNFS layouts, this just needs to work correctly as a
 5002          * fallback.
 5003          */
 5004         *tl++ = txdr_unsigned(NFSWRITE_FILESYNC);
 5005         *tl = txdr_unsigned(len);
 5006         NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len);
 5007 
 5008         /* Put data in mbuf chain. */
 5009         nd->nd_mb->m_next = m;
 5010 
 5011         /* Set nd_mb and nd_bpos to end of data. */
 5012         while (m->m_next != NULL)
 5013                 m = m->m_next;
 5014         nd->nd_mb = m;
 5015         nd->nd_bpos = mtod(m, char *) + m->m_len;
 5016         NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len);
 5017 
 5018         /* Do a Getattr for the attributes that change upon writing. */
 5019         NFSZERO_ATTRBIT(&attrbits);
 5020         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
 5021         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 5022         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
 5023         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 5024         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
 5025         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 5026         *tl = txdr_unsigned(NFSV4OP_GETATTR);
 5027         (void) nfsrv_putattrbit(nd, &attrbits);
 5028         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
 5029             cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 5030         if (error != 0) {
 5031                 free(nd, M_TEMP);
 5032                 return (error);
 5033         }
 5034         NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat);
 5035         /* Get rid of weak cache consistency data for now. */
 5036         if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
 5037             (ND_NFSV4 | ND_V4WCCATTR)) {
 5038                 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
 5039                     NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 5040                 NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error);
 5041                 if (error != 0)
 5042                         goto nfsmout;
 5043                 /*
 5044                  * Get rid of Op# and status for next op.
 5045                  */
 5046                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 5047                 if (*++tl != 0)
 5048                         nd->nd_flag |= ND_NOMOREDATA;
 5049         }
 5050         if (nd->nd_repstat == 0) {
 5051                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
 5052                 retlen = fxdr_unsigned(int, *tl++);
 5053                 commit = fxdr_unsigned(int, *tl);
 5054                 if (commit != NFSWRITE_FILESYNC)
 5055                         error = NFSERR_IO;
 5056                 NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n",
 5057                     retlen, commit, error);
 5058         } else
 5059                 error = nd->nd_repstat;
 5060         /* We have no use for the Write Verifier since we use FileSync. */
 5061 
 5062         /*
 5063          * Get the Change, Size, Access Time and Modify Time attributes and set
 5064          * on the Metadata file, so its attributes will be what the file's
 5065          * would be if it had been written.
 5066          */
 5067         if (error == 0) {
 5068                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 5069                 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
 5070                     NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 5071         }
 5072         NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error);
 5073 nfsmout:
 5074         m_freem(nd->nd_mrep);
 5075         free(nd, M_TEMP);
 5076         NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error);
 5077         return (error);
 5078 }
 5079 
 5080 /*
 5081  * Start up the thread that will execute nfsrv_writedsdorpc().
 5082  */
 5083 static void
 5084 start_writedsdorpc(void *arg, int pending)
 5085 {
 5086         struct nfsrvwritedsdorpc *drpc;
 5087 
 5088         drpc = (struct nfsrvwritedsdorpc *)arg;
 5089         drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
 5090             drpc->len, NULL, drpc->m, drpc->cred, drpc->p);
 5091         drpc->done = 1;
 5092         NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err);
 5093 }
 5094 
 5095 static int
 5096 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
 5097     NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
 5098     struct mbuf **mpp, char *cp, int *failposp)
 5099 {
 5100         struct nfsrvwritedsdorpc *drpc, *tdrpc;
 5101         struct nfsvattr na;
 5102         struct mbuf *m;
 5103         int error, i, offs, ret, timo;
 5104 
 5105         NFSD_DEBUG(4, "in nfsrv_writedsrpc\n");
 5106         KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain"));
 5107         drpc = NULL;
 5108         if (mirrorcnt > 1)
 5109                 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
 5110                     M_WAITOK);
 5111 
 5112         /* Calculate offset in mbuf chain that data starts. */
 5113         offs = cp - mtod(*mpp, char *);
 5114         NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len);
 5115 
 5116         /*
 5117          * Do the write RPC for every DS, using a separate kernel process
 5118          * for every DS except the last one.
 5119          */
 5120         error = 0;
 5121         for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 5122                 tdrpc->done = 0;
 5123                 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
 5124                 tdrpc->off = off;
 5125                 tdrpc->len = len;
 5126                 tdrpc->nmp = *nmpp;
 5127                 tdrpc->cred = cred;
 5128                 tdrpc->p = p;
 5129                 tdrpc->inprog = 0;
 5130                 tdrpc->err = 0;
 5131                 tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK);
 5132                 ret = EIO;
 5133                 if (nfs_pnfsiothreads != 0) {
 5134                         ret = nfs_pnfsio(start_writedsdorpc, tdrpc);
 5135                         NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n",
 5136                             ret);
 5137                 }
 5138                 if (ret != 0) {
 5139                         ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL,
 5140                             tdrpc->m, cred, p);
 5141                         if (nfsds_failerr(ret) && *failposp == -1)
 5142                                 *failposp = i;
 5143                         else if (error == 0 && ret != 0)
 5144                                 error = ret;
 5145                 }
 5146                 nmpp++;
 5147                 fhp++;
 5148         }
 5149         m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK);
 5150         ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p);
 5151         if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
 5152                 *failposp = mirrorcnt - 1;
 5153         else if (error == 0 && ret != 0)
 5154                 error = ret;
 5155         if (error == 0)
 5156                 error = nfsrv_setextattr(vp, &na, p);
 5157         NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error);
 5158         tdrpc = drpc;
 5159         timo = hz / 50;         /* Wait for 20msec. */
 5160         if (timo < 1)
 5161                 timo = 1;
 5162         for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 5163                 /* Wait for RPCs on separate threads to complete. */
 5164                 while (tdrpc->inprog != 0 && tdrpc->done == 0)
 5165                         tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo);
 5166                 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
 5167                         *failposp = i;
 5168                 else if (error == 0 && tdrpc->err != 0)
 5169                         error = tdrpc->err;
 5170         }
 5171         free(drpc, M_TEMP);
 5172         return (error);
 5173 }
 5174 
 5175 static int
 5176 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
 5177     struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap,
 5178     struct nfsvattr *dsnap)
 5179 {
 5180         uint32_t *tl;
 5181         struct nfsrv_descript *nd;
 5182         nfsv4stateid_t st;
 5183         nfsattrbit_t attrbits;
 5184         int error;
 5185 
 5186         NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n");
 5187         nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 5188         /*
 5189          * Use a stateid where other is an alternating 01010 pattern and
 5190          * seqid is 0xffffffff.  This value is not defined as special by
 5191          * the RFC and is used by the FreeBSD NFS server to indicate an
 5192          * MDS->DS proxy operation.
 5193          */
 5194         st.other[0] = 0x55555555;
 5195         st.other[1] = 0x55555555;
 5196         st.other[2] = 0x55555555;
 5197         st.seqid = 0xffffffff;
 5198         nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp),
 5199             NULL, NULL, 0, 0);
 5200         nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 5201         nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0);
 5202 
 5203         /* Do a Getattr for the attributes that change due to writing. */
 5204         NFSZERO_ATTRBIT(&attrbits);
 5205         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
 5206         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 5207         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
 5208         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 5209         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
 5210         NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 5211         *tl = txdr_unsigned(NFSV4OP_GETATTR);
 5212         (void) nfsrv_putattrbit(nd, &attrbits);
 5213         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 5214             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 5215         if (error != 0) {
 5216                 free(nd, M_TEMP);
 5217                 return (error);
 5218         }
 5219         NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n",
 5220             nd->nd_repstat);
 5221         /* Get rid of weak cache consistency data for now. */
 5222         if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
 5223             (ND_NFSV4 | ND_V4WCCATTR)) {
 5224                 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
 5225                     NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 5226                 NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error);
 5227                 if (error != 0)
 5228                         goto nfsmout;
 5229                 /*
 5230                  * Get rid of Op# and status for next op.
 5231                  */
 5232                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 5233                 if (*++tl != 0)
 5234                         nd->nd_flag |= ND_NOMOREDATA;
 5235         }
 5236         error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 5237         if (error != 0)
 5238                 goto nfsmout;
 5239         if (nd->nd_repstat != 0)
 5240                 error = nd->nd_repstat;
 5241         /*
 5242          * Get the Change, Size, Access Time and Modify Time attributes and set
 5243          * on the Metadata file, so its attributes will be what the file's
 5244          * would be if it had been written.
 5245          */
 5246         if (error == 0) {
 5247                 NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 5248                 error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
 5249                     NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 5250         }
 5251         NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error);
 5252 nfsmout:
 5253         m_freem(nd->nd_mrep);
 5254         free(nd, M_TEMP);
 5255         NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error);
 5256         return (error);
 5257 }
 5258 
 5259 struct nfsrvsetattrdsdorpc {
 5260         int                     done;
 5261         int                     inprog;
 5262         struct task             tsk;
 5263         fhandle_t               fh;
 5264         struct nfsmount         *nmp;
 5265         struct vnode            *vp;
 5266         struct ucred            *cred;
 5267         NFSPROC_T               *p;
 5268         struct nfsvattr         na;
 5269         struct nfsvattr         dsna;
 5270         int                     err;
 5271 };
 5272 
 5273 /*
 5274  * Start up the thread that will execute nfsrv_setattrdsdorpc().
 5275  */
 5276 static void
 5277 start_setattrdsdorpc(void *arg, int pending)
 5278 {
 5279         struct nfsrvsetattrdsdorpc *drpc;
 5280 
 5281         drpc = (struct nfsrvsetattrdsdorpc *)arg;
 5282         drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p,
 5283             drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna);
 5284         drpc->done = 1;
 5285 }
 5286 
 5287 static int
 5288 nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
 5289     struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
 5290     struct nfsvattr *nap, int *failposp)
 5291 {
 5292         struct nfsrvsetattrdsdorpc *drpc, *tdrpc;
 5293         struct nfsvattr na;
 5294         int error, i, ret, timo;
 5295 
 5296         NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n");
 5297         drpc = NULL;
 5298         if (mirrorcnt > 1)
 5299                 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
 5300                     M_WAITOK);
 5301 
 5302         /*
 5303          * Do the setattr RPC for every DS, using a separate kernel process
 5304          * for every DS except the last one.
 5305          */
 5306         error = 0;
 5307         for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 5308                 tdrpc->done = 0;
 5309                 tdrpc->inprog = 0;
 5310                 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
 5311                 tdrpc->nmp = *nmpp;
 5312                 tdrpc->vp = vp;
 5313                 tdrpc->cred = cred;
 5314                 tdrpc->p = p;
 5315                 tdrpc->na = *nap;
 5316                 tdrpc->err = 0;
 5317                 ret = EIO;
 5318                 if (nfs_pnfsiothreads != 0) {
 5319                         ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc);
 5320                         NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n",
 5321                             ret);
 5322                 }
 5323                 if (ret != 0) {
 5324                         ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap,
 5325                             &na);
 5326                         if (nfsds_failerr(ret) && *failposp == -1)
 5327                                 *failposp = i;
 5328                         else if (error == 0 && ret != 0)
 5329                                 error = ret;
 5330                 }
 5331                 nmpp++;
 5332                 fhp++;
 5333         }
 5334         ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na);
 5335         if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
 5336                 *failposp = mirrorcnt - 1;
 5337         else if (error == 0 && ret != 0)
 5338                 error = ret;
 5339         if (error == 0)
 5340                 error = nfsrv_setextattr(vp, &na, p);
 5341         NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error);
 5342         tdrpc = drpc;
 5343         timo = hz / 50;         /* Wait for 20msec. */
 5344         if (timo < 1)
 5345                 timo = 1;
 5346         for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 5347                 /* Wait for RPCs on separate threads to complete. */
 5348                 while (tdrpc->inprog != 0 && tdrpc->done == 0)
 5349                         tsleep(&tdrpc->tsk, PVFS, "srvsads", timo);
 5350                 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
 5351                         *failposp = i;
 5352                 else if (error == 0 && tdrpc->err != 0)
 5353                         error = tdrpc->err;
 5354         }
 5355         free(drpc, M_TEMP);
 5356         return (error);
 5357 }
 5358 
 5359 /*
 5360  * Do a Setattr of an NFSv4 ACL on the DS file.
 5361  */
 5362 static int
 5363 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
 5364     struct vnode *vp, struct nfsmount *nmp, struct acl *aclp)
 5365 {
 5366         struct nfsrv_descript *nd;
 5367         nfsv4stateid_t st;
 5368         nfsattrbit_t attrbits;
 5369         int error;
 5370 
 5371         NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n");
 5372         nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 5373         /*
 5374          * Use a stateid where other is an alternating 01010 pattern and
 5375          * seqid is 0xffffffff.  This value is not defined as special by
 5376          * the RFC and is used by the FreeBSD NFS server to indicate an
 5377          * MDS->DS proxy operation.
 5378          */
 5379         st.other[0] = 0x55555555;
 5380         st.other[1] = 0x55555555;
 5381         st.other[2] = 0x55555555;
 5382         st.seqid = 0xffffffff;
 5383         nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp),
 5384             NULL, NULL, 0, 0);
 5385         nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 5386         NFSZERO_ATTRBIT(&attrbits);
 5387         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
 5388         /*
 5389          * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(),
 5390          * so passing in the metadata "vp" will be ok, since it is of
 5391          * the same type (VREG).
 5392          */
 5393         nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL,
 5394             NULL, 0, 0, 0, 0, 0, NULL);
 5395         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 5396             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 5397         if (error != 0) {
 5398                 free(nd, M_TEMP);
 5399                 return (error);
 5400         }
 5401         NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n",
 5402             nd->nd_repstat);
 5403         error = nd->nd_repstat;
 5404         m_freem(nd->nd_mrep);
 5405         free(nd, M_TEMP);
 5406         return (error);
 5407 }
 5408 
 5409 struct nfsrvsetacldsdorpc {
 5410         int                     done;
 5411         int                     inprog;
 5412         struct task             tsk;
 5413         fhandle_t               fh;
 5414         struct nfsmount         *nmp;
 5415         struct vnode            *vp;
 5416         struct ucred            *cred;
 5417         NFSPROC_T               *p;
 5418         struct acl              *aclp;
 5419         int                     err;
 5420 };
 5421 
 5422 /*
 5423  * Start up the thread that will execute nfsrv_setacldsdorpc().
 5424  */
 5425 static void
 5426 start_setacldsdorpc(void *arg, int pending)
 5427 {
 5428         struct nfsrvsetacldsdorpc *drpc;
 5429 
 5430         drpc = (struct nfsrvsetacldsdorpc *)arg;
 5431         drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p,
 5432             drpc->vp, drpc->nmp, drpc->aclp);
 5433         drpc->done = 1;
 5434 }
 5435 
 5436 static int
 5437 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
 5438     struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp,
 5439     int *failposp)
 5440 {
 5441         struct nfsrvsetacldsdorpc *drpc, *tdrpc;
 5442         int error, i, ret, timo;
 5443 
 5444         NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n");
 5445         drpc = NULL;
 5446         if (mirrorcnt > 1)
 5447                 tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
 5448                     M_WAITOK);
 5449 
 5450         /*
 5451          * Do the setattr RPC for every DS, using a separate kernel process
 5452          * for every DS except the last one.
 5453          */
 5454         error = 0;
 5455         for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 5456                 tdrpc->done = 0;
 5457                 tdrpc->inprog = 0;
 5458                 NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
 5459                 tdrpc->nmp = *nmpp;
 5460                 tdrpc->vp = vp;
 5461                 tdrpc->cred = cred;
 5462                 tdrpc->p = p;
 5463                 tdrpc->aclp = aclp;
 5464                 tdrpc->err = 0;
 5465                 ret = EIO;
 5466                 if (nfs_pnfsiothreads != 0) {
 5467                         ret = nfs_pnfsio(start_setacldsdorpc, tdrpc);
 5468                         NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n",
 5469                             ret);
 5470                 }
 5471                 if (ret != 0) {
 5472                         ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp,
 5473                             aclp);
 5474                         if (nfsds_failerr(ret) && *failposp == -1)
 5475                                 *failposp = i;
 5476                         else if (error == 0 && ret != 0)
 5477                                 error = ret;
 5478                 }
 5479                 nmpp++;
 5480                 fhp++;
 5481         }
 5482         ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp);
 5483         if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
 5484                 *failposp = mirrorcnt - 1;
 5485         else if (error == 0 && ret != 0)
 5486                 error = ret;
 5487         NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error);
 5488         tdrpc = drpc;
 5489         timo = hz / 50;         /* Wait for 20msec. */
 5490         if (timo < 1)
 5491                 timo = 1;
 5492         for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 5493                 /* Wait for RPCs on separate threads to complete. */
 5494                 while (tdrpc->inprog != 0 && tdrpc->done == 0)
 5495                         tsleep(&tdrpc->tsk, PVFS, "srvacds", timo);
 5496                 if (nfsds_failerr(tdrpc->err) && *failposp == -1)
 5497                         *failposp = i;
 5498                 else if (error == 0 && tdrpc->err != 0)
 5499                         error = tdrpc->err;
 5500         }
 5501         free(drpc, M_TEMP);
 5502         return (error);
 5503 }
 5504 
 5505 /*
 5506  * Getattr call to the DS for the attributes that change due to writing.
 5507  */
 5508 static int
 5509 nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
 5510     struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap)
 5511 {
 5512         struct nfsrv_descript *nd;
 5513         int error;
 5514         nfsattrbit_t attrbits;
 5515         
 5516         NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n");
 5517         nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 5518         nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp,
 5519             sizeof(fhandle_t), NULL, NULL, 0, 0);
 5520         NFSZERO_ATTRBIT(&attrbits);
 5521         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
 5522         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 5523         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
 5524         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 5525         NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
 5526         (void) nfsrv_putattrbit(nd, &attrbits);
 5527         error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 5528             NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 5529         if (error != 0) {
 5530                 free(nd, M_TEMP);
 5531                 return (error);
 5532         }
 5533         NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n",
 5534             nd->nd_repstat);
 5535         if (nd->nd_repstat == 0) {
 5536                 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 5537                     NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
 5538                     NULL, NULL);
 5539                 /*
 5540                  * We can only save the updated values in the extended
 5541                  * attribute if the vp is exclusively locked.
 5542                  * This should happen when any of the following operations
 5543                  * occur on the vnode:
 5544                  *    Close, Delegreturn, LayoutCommit, LayoutReturn
 5545                  * As such, the updated extended attribute should get saved
 5546                  * before nfsrv_checkdsattr() returns 0 and allows the cached
 5547                  * attributes to be returned without calling this function.
 5548                  */
 5549                 if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
 5550                         error = nfsrv_setextattr(vp, nap, p);
 5551                         NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n",
 5552                             error);
 5553                 }
 5554         } else
 5555                 error = nd->nd_repstat;
 5556         m_freem(nd->nd_mrep);
 5557         free(nd, M_TEMP);
 5558         NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error);
 5559         return (error);
 5560 }
 5561 
 5562 /*
 5563  * Get the device id and file handle for a DS file.
 5564  */
 5565 int
 5566 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp,
 5567     fhandle_t *fhp, char *devid)
 5568 {
 5569         int buflen, error;
 5570         char *buf;
 5571 
 5572         buflen = 1024;
 5573         buf = malloc(buflen, M_TEMP, M_WAITOK);
 5574         error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL,
 5575             fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL);
 5576         free(buf, M_TEMP);
 5577         return (error);
 5578 }
 5579 
 5580 /*
 5581  * Do a Lookup against the DS for the filename.
 5582  */
 5583 static int
 5584 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf,
 5585     struct vnode **nvpp, NFSPROC_T *p)
 5586 {
 5587         struct nameidata named;
 5588         struct ucred *tcred;
 5589         char *bufp;
 5590         u_long *hashp;
 5591         struct vnode *nvp;
 5592         int error;
 5593 
 5594         tcred = newnfs_getcred();
 5595         named.ni_cnd.cn_nameiop = LOOKUP;
 5596         named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY;
 5597         named.ni_cnd.cn_cred = tcred;
 5598         named.ni_cnd.cn_thread = p;
 5599         named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME;
 5600         nfsvno_setpathbuf(&named, &bufp, &hashp);
 5601         named.ni_cnd.cn_nameptr = bufp;
 5602         named.ni_cnd.cn_namelen = strlen(pf->dsf_filename);
 5603         strlcpy(bufp, pf->dsf_filename, NAME_MAX);
 5604         NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp);
 5605         error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
 5606         NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error);
 5607         NFSFREECRED(tcred);
 5608         nfsvno_relpathbuf(&named);
 5609         if (error == 0)
 5610                 *nvpp = nvp;
 5611         NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error);
 5612         return (error);
 5613 }
 5614 
 5615 /*
 5616  * Set the file handle to the correct one.
 5617  */
 5618 static void
 5619 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid,
 5620     char *fnamep, struct vnode *nvp, NFSPROC_T *p)
 5621 {
 5622         struct nfsnode *np;
 5623         int ret;
 5624 
 5625         np = VTONFS(nvp);
 5626         NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH);
 5627         /*
 5628          * We can only do a vn_set_extattr() if the vnode is exclusively
 5629          * locked and vn_start_write() has been done.  If devid != NULL or
 5630          * fnamep != NULL or the vnode is shared locked, vn_start_write()
 5631          * may not have been done.
 5632          * If not done now, it will be done on a future call.
 5633          */
 5634         if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) ==
 5635             LK_EXCLUSIVE)
 5636                 ret = vn_extattr_set(vp, IO_NODELOCKED,
 5637                     EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf),
 5638                     (char *)pf, p);
 5639         NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret);
 5640 }
 5641 
 5642 /*
 5643  * Cause RPCs waiting on "nmp" to fail.  This is called for a DS mount point
 5644  * when the DS has failed.
 5645  */
 5646 void
 5647 nfsrv_killrpcs(struct nfsmount *nmp)
 5648 {
 5649 
 5650         /*
 5651          * Call newnfs_nmcancelreqs() to cause
 5652          * any RPCs in progress on the mount point to
 5653          * fail.
 5654          * This will cause any process waiting for an
 5655          * RPC to complete while holding a vnode lock
 5656          * on the mounted-on vnode (such as "df" or
 5657          * a non-forced "umount") to fail.
 5658          * This will unlock the mounted-on vnode so
 5659          * a forced dismount can succeed.
 5660          * The NFSMNTP_CANCELRPCS flag should be set when this function is
 5661          * called.
 5662          */
 5663         newnfs_nmcancelreqs(nmp);
 5664 }
 5665 
 5666 /*
 5667  * Sum up the statfs info for each of the DSs, so that the client will
 5668  * receive the total for all DSs.
 5669  */
 5670 static int
 5671 nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp)
 5672 {
 5673         struct statfs *tsf;
 5674         struct nfsdevice *ds;
 5675         struct vnode **dvpp, **tdvpp, *dvp;
 5676         uint64_t tot;
 5677         int cnt, error = 0, i;
 5678 
 5679         if (nfsrv_devidcnt <= 0)
 5680                 return (ENXIO);
 5681         dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK);
 5682         tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK);
 5683 
 5684         /* Get an array of the dvps for the DSs. */
 5685         tdvpp = dvpp;
 5686         i = 0;
 5687         NFSDDSLOCK();
 5688         /* First, search for matches for same file system. */
 5689         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 5690                 if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 &&
 5691                     fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) {
 5692                         if (++i > nfsrv_devidcnt)
 5693                                 break;
 5694                         *tdvpp++ = ds->nfsdev_dvp;
 5695                 }
 5696         }
 5697         /*
 5698          * If no matches for same file system, total all servers not assigned
 5699          * to a file system.
 5700          */
 5701         if (i == 0) {
 5702                 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 5703                         if (ds->nfsdev_nmp != NULL &&
 5704                             ds->nfsdev_mdsisset == 0) {
 5705                                 if (++i > nfsrv_devidcnt)
 5706                                         break;
 5707                                 *tdvpp++ = ds->nfsdev_dvp;
 5708                         }
 5709                 }
 5710         }
 5711         NFSDDSUNLOCK();
 5712         cnt = i;
 5713 
 5714         /* Do a VFS_STATFS() for each of the DSs and sum them up. */
 5715         tdvpp = dvpp;
 5716         for (i = 0; i < cnt && error == 0; i++) {
 5717                 dvp = *tdvpp++;
 5718                 error = VFS_STATFS(dvp->v_mount, tsf);
 5719                 if (error == 0) {
 5720                         if (sf->f_bsize == 0) {
 5721                                 if (tsf->f_bsize > 0)
 5722                                         sf->f_bsize = tsf->f_bsize;
 5723                                 else
 5724                                         sf->f_bsize = 8192;
 5725                         }
 5726                         if (tsf->f_blocks > 0) {
 5727                                 if (sf->f_bsize != tsf->f_bsize) {
 5728                                         tot = tsf->f_blocks * tsf->f_bsize;
 5729                                         sf->f_blocks += (tot / sf->f_bsize);
 5730                                 } else
 5731                                         sf->f_blocks += tsf->f_blocks;
 5732                         }
 5733                         if (tsf->f_bfree > 0) {
 5734                                 if (sf->f_bsize != tsf->f_bsize) {
 5735                                         tot = tsf->f_bfree * tsf->f_bsize;
 5736                                         sf->f_bfree += (tot / sf->f_bsize);
 5737                                 } else
 5738                                         sf->f_bfree += tsf->f_bfree;
 5739                         }
 5740                         if (tsf->f_bavail > 0) {
 5741                                 if (sf->f_bsize != tsf->f_bsize) {
 5742                                         tot = tsf->f_bavail * tsf->f_bsize;
 5743                                         sf->f_bavail += (tot / sf->f_bsize);
 5744                                 } else
 5745                                         sf->f_bavail += tsf->f_bavail;
 5746                         }
 5747                 }
 5748         }
 5749         free(tsf, M_TEMP);
 5750         free(dvpp, M_TEMP);
 5751         return (error);
 5752 }
 5753 
 5754 /*
 5755  * Set an NFSv4 acl.
 5756  */
 5757 int
 5758 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p)
 5759 {
 5760         int error;
 5761 
 5762         if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) {
 5763                 error = NFSERR_ATTRNOTSUPP;
 5764                 goto out;
 5765         }
 5766         /*
 5767          * With NFSv4 ACLs, chmod(2) may need to add additional entries.
 5768          * Make sure it has enough room for that - splitting every entry
 5769          * into two and appending "canonical six" entries at the end.
 5770          * Cribbed out of kern/vfs_acl.c - Rick M.
 5771          */
 5772         if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) {
 5773                 error = NFSERR_ATTRNOTSUPP;
 5774                 goto out;
 5775         }
 5776         error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
 5777         if (error == 0) {
 5778                 error = nfsrv_dssetacl(vp, aclp, cred, p);
 5779                 if (error == ENOENT)
 5780                         error = 0;
 5781         }
 5782 
 5783 out:
 5784         NFSEXITCODE(error);
 5785         return (error);
 5786 }
 5787 
 5788 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
 5789 
 5790 /*
 5791  * Called once to initialize data structures...
 5792  */
 5793 static int
 5794 nfsd_modevent(module_t mod, int type, void *data)
 5795 {
 5796         int error = 0, i;
 5797         static int loaded = 0;
 5798 
 5799         switch (type) {
 5800         case MOD_LOAD:
 5801                 if (loaded)
 5802                         goto out;
 5803                 newnfs_portinit();
 5804                 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 5805                         mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL,
 5806                             MTX_DEF);
 5807                         mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL,
 5808                             MTX_DEF);
 5809                 }
 5810                 mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF);
 5811                 mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF);
 5812                 mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF);
 5813                 mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF);
 5814                 mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF);
 5815                 lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
 5816                 nfsrvd_initcache();
 5817                 nfsd_init();
 5818                 NFSD_LOCK();
 5819                 nfsrvd_init(0);
 5820                 NFSD_UNLOCK();
 5821                 nfsd_mntinit();
 5822 #ifdef VV_DISABLEDELEG
 5823                 vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
 5824                 vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
 5825 #endif
 5826                 nfsd_call_servertimer = nfsrv_servertimer;
 5827                 nfsd_call_nfsd = nfssvc_nfsd;
 5828                 loaded = 1;
 5829                 break;
 5830 
 5831         case MOD_UNLOAD:
 5832                 if (newnfs_numnfsd != 0) {
 5833                         error = EBUSY;
 5834                         break;
 5835                 }
 5836 
 5837 #ifdef VV_DISABLEDELEG
 5838                 vn_deleg_ops.vndeleg_recall = NULL;
 5839                 vn_deleg_ops.vndeleg_disable = NULL;
 5840 #endif
 5841                 nfsd_call_servertimer = NULL;
 5842                 nfsd_call_nfsd = NULL;
 5843 
 5844                 /* Clean out all NFSv4 state. */
 5845                 nfsrv_throwawayallstate(curthread);
 5846 
 5847                 /* Clean the NFS server reply cache */
 5848                 nfsrvd_cleancache();
 5849 
 5850                 /* Free up the krpc server pool. */
 5851                 if (nfsrvd_pool != NULL)
 5852                         svcpool_destroy(nfsrvd_pool);
 5853 
 5854                 /* and get rid of the locks */
 5855                 for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 5856                         mtx_destroy(&nfsrchash_table[i].mtx);
 5857                         mtx_destroy(&nfsrcahash_table[i].mtx);
 5858                 }
 5859                 mtx_destroy(&nfsrc_udpmtx);
 5860                 mtx_destroy(&nfs_v4root_mutex);
 5861                 mtx_destroy(&nfsv4root_mnt.mnt_mtx);
 5862                 mtx_destroy(&nfsrv_dontlistlock_mtx);
 5863                 mtx_destroy(&nfsrv_recalllock_mtx);
 5864                 for (i = 0; i < nfsrv_sessionhashsize; i++)
 5865                         mtx_destroy(&nfssessionhash[i].mtx);
 5866                 if (nfslayouthash != NULL) {
 5867                         for (i = 0; i < nfsrv_layouthashsize; i++)
 5868                                 mtx_destroy(&nfslayouthash[i].mtx);
 5869                         free(nfslayouthash, M_NFSDSESSION);
 5870                 }
 5871                 lockdestroy(&nfsv4root_mnt.mnt_explock);
 5872                 free(nfsclienthash, M_NFSDCLIENT);
 5873                 free(nfslockhash, M_NFSDLOCKFILE);
 5874                 free(nfssessionhash, M_NFSDSESSION);
 5875                 loaded = 0;
 5876                 break;
 5877         default:
 5878                 error = EOPNOTSUPP;
 5879                 break;
 5880         }
 5881 
 5882 out:
 5883         NFSEXITCODE(error);
 5884         return (error);
 5885 }
 5886 static moduledata_t nfsd_mod = {
 5887         "nfsd",
 5888         nfsd_modevent,
 5889         NULL,
 5890 };
 5891 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
 5892 
 5893 /* So that loader and kldload(2) can find us, wherever we are.. */
 5894 MODULE_VERSION(nfsd, 1);
 5895 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
 5896 MODULE_DEPEND(nfsd, nfslock, 1, 1, 1);
 5897 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
 5898 MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
 5899 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);
 5900 

Cache object: 02e6180cbb8ed9c00effec1247ad7cd4


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.