The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/nfsserver/nfs_serv.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/6.4/sys/nfsserver/nfs_serv.c 173045 2007-10-26 22:06:55Z jhb $");
   37 
   38 /*
   39  * nfs version 2 and 3 server calls to vnode ops
   40  * - these routines generally have 3 phases
   41  *   1 - break down and validate rpc request in mbuf list
   42  *   2 - do the vnode ops for the request
   43  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
   44  *   3 - build the rpc reply in an mbuf list
   45  *   nb:
   46  *      - do not mix the phases, since the nfsm_?? macros can return failures
   47  *        on a bad rpc or similar and do not do any vrele() or vput()'s
   48  *
   49  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
   50  *      error number iff error != 0 whereas
   51  *      returning an error from the server function implies a fatal error
   52  *      such as a badly constructed rpc request that should be dropped without
   53  *      a reply.
   54  *      For nfsm_reply(), the case where error == EBADRPC is treated
   55  *      specially; after constructing a reply, it does an immediate
   56  *      `goto nfsmout' to avoid getting any V3 post-op status appended.
   57  *
   58  * Other notes:
   59  *      Warning: always pay careful attention to resource cleanup on return
   60  *      and note that nfsm_*() macros can terminate a procedure on certain
   61  *      errors.
   62  *
   63  *      lookup() and namei()
   64  *      may return garbage in various structural fields/return elements
   65  *      if an error is returned, and may garbage up nd.ni_dvp even if no
   66  *      error is returned and you did not request LOCKPARENT or WANTPARENT.
   67  *
   68  *      We use the ni_cnd.cn_flags 'HASBUF' flag to track whether the name
   69  *      buffer has been freed or not.
   70  */
   71 
   72 #include <sys/param.h>
   73 #include <sys/systm.h>
   74 #include <sys/proc.h>
   75 #include <sys/namei.h>
   76 #include <sys/unistd.h>
   77 #include <sys/vnode.h>
   78 #include <sys/mount.h>
   79 #include <sys/socket.h>
   80 #include <sys/socketvar.h>
   81 #include <sys/malloc.h>
   82 #include <sys/mbuf.h>
   83 #include <sys/dirent.h>
   84 #include <sys/stat.h>
   85 #include <sys/kernel.h>
   86 #include <sys/sysctl.h>
   87 #include <sys/bio.h>
   88 #include <sys/buf.h>
   89 
   90 #include <vm/vm.h>
   91 #include <vm/vm_extern.h>
   92 #include <vm/vm_object.h>
   93 
   94 #include <nfs/nfsproto.h>
   95 #include <nfs/rpcv2.h>
   96 #include <nfsserver/nfs.h>
   97 #include <nfs/xdr_subs.h>
   98 #include <nfsserver/nfsm_subs.h>
   99 
  100 #ifdef NFSRV_DEBUG
  101 #define nfsdbprintf(info)       printf info
  102 #else
  103 #define nfsdbprintf(info)
  104 #endif
  105 
  106 #define MAX_COMMIT_COUNT        (1024 * 1024)
  107 
  108 #define NUM_HEURISTIC           1017
  109 #define NHUSE_INIT              64
  110 #define NHUSE_INC               16
  111 #define NHUSE_MAX               2048
  112 
  113 static struct nfsheur {
  114         struct vnode *nh_vp;    /* vp to match (unreferenced pointer) */
  115         off_t nh_nextr;         /* next offset for sequential detection */
  116         int nh_use;             /* use count for selection */
  117         int nh_seqcount;        /* heuristic */
  118 } nfsheur[NUM_HEURISTIC];
  119 
  120 /* Global vars */
  121 
  122 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
  123 int nfsrvw_procrastinate_v3 = 0;
  124 
  125 static struct timeval   nfsver = { 0 };
  126 
  127 SYSCTL_NODE(_vfs, OID_AUTO, nfsrv, CTLFLAG_RW, 0, "NFS server");
  128 
  129 static int nfs_async;
  130 static int nfs_commit_blks;
  131 static int nfs_commit_miss;
  132 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
  133 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
  134 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
  135 
  136 struct nfsrvstats nfsrvstats;
  137 SYSCTL_STRUCT(_vfs_nfsrv, NFS_NFSRVSTATS, nfsrvstats, CTLFLAG_RW,
  138         &nfsrvstats, nfsrvstats, "S,nfsrvstats");
  139 
  140 static int      nfsrv_access_withgiant(struct vnode *vp, int flags,
  141                     struct ucred *cred, int rdonly, struct thread *td,
  142                     int override);
  143 static int      nfsrv_access(struct vnode *, int, struct ucred *, int,
  144                     struct thread *, int);
  145 static void     nfsrvw_coalesce(struct nfsrv_descript *,
  146                     struct nfsrv_descript *);
  147 
  148 /*
  149  * Clear nameidata fields that are tested in nsfmout cleanup code prior
  150  * to using first nfsm macro (that might jump to the cleanup code).
  151  */
  152 
  153 static __inline void
  154 ndclear(struct nameidata *nd)
  155 {
  156 
  157         nd->ni_cnd.cn_flags = 0;
  158         nd->ni_vp = NULL;
  159         nd->ni_dvp = NULL;
  160         nd->ni_startdir = NULL;
  161 }
  162 
  163 /*
  164  * nfs v3 access service
  165  */
  166 int
  167 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  168     struct thread *td, struct mbuf **mrq)
  169 {
  170         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  171         struct sockaddr *nam = nfsd->nd_nam;
  172         caddr_t dpos = nfsd->nd_dpos;
  173         struct ucred *cred = nfsd->nd_cr;
  174         struct vnode *vp = NULL;
  175         nfsfh_t nfh;
  176         fhandle_t *fhp;
  177         u_int32_t *tl;
  178         caddr_t bpos;
  179         int error = 0, rdonly, getret;
  180         struct mbuf *mb, *mreq;
  181         struct vattr vattr, *vap = &vattr;
  182         u_long testmode, nfsmode;
  183         int v3 = (nfsd->nd_flag & ND_NFSV3);
  184 
  185         NFSD_LOCK_ASSERT();
  186 
  187         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  188         if (!v3)
  189                 panic("nfsrv3_access: v3 proc called on a v2 connection");
  190         fhp = &nfh.fh_generic;
  191         nfsm_srvmtofh(fhp);
  192         tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
  193         error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
  194         if (error) {
  195                 nfsm_reply(NFSX_UNSIGNED);
  196                 nfsm_srvpostop_attr(1, NULL);
  197                 error = 0;
  198                 goto nfsmout;
  199         }
  200         nfsmode = fxdr_unsigned(u_int32_t, *tl);
  201         NFSD_UNLOCK();
  202         mtx_lock(&Giant);       /* VFS */
  203         if ((nfsmode & NFSV3ACCESS_READ) &&
  204                 nfsrv_access_withgiant(vp, VREAD, cred, rdonly, td, 0))
  205                 nfsmode &= ~NFSV3ACCESS_READ;
  206         if (vp->v_type == VDIR)
  207                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
  208                         NFSV3ACCESS_DELETE);
  209         else
  210                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
  211         if ((nfsmode & testmode) &&
  212                 nfsrv_access_withgiant(vp, VWRITE, cred, rdonly, td, 0))
  213                 nfsmode &= ~testmode;
  214         if (vp->v_type == VDIR)
  215                 testmode = NFSV3ACCESS_LOOKUP;
  216         else
  217                 testmode = NFSV3ACCESS_EXECUTE;
  218         if ((nfsmode & testmode) &&
  219                 nfsrv_access_withgiant(vp, VEXEC, cred, rdonly, td, 0))
  220                 nfsmode &= ~testmode;
  221         getret = VOP_GETATTR(vp, vap, cred, td);
  222         vput(vp);
  223         mtx_unlock(&Giant);     /* VFS */
  224         vp = NULL;
  225         NFSD_LOCK();
  226         nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
  227         nfsm_srvpostop_attr(getret, vap);
  228         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  229         *tl = txdr_unsigned(nfsmode);
  230 nfsmout:
  231         NFSD_LOCK_ASSERT();
  232         if (vp) {
  233                 NFSD_UNLOCK();
  234                 mtx_lock(&Giant);       /* VFS */
  235                 vput(vp);
  236                 mtx_unlock(&Giant);     /* VFS */
  237                 NFSD_LOCK();
  238         }
  239         return(error);
  240 }
  241 
  242 /*
  243  * nfs getattr service
  244  */
  245 int
  246 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  247     struct thread *td, struct mbuf **mrq)
  248 {
  249         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  250         struct sockaddr *nam = nfsd->nd_nam;
  251         caddr_t dpos = nfsd->nd_dpos;
  252         struct ucred *cred = nfsd->nd_cr;
  253         struct nfs_fattr *fp;
  254         struct vattr va;
  255         struct vattr *vap = &va;
  256         struct vnode *vp = NULL;
  257         nfsfh_t nfh;
  258         fhandle_t *fhp;
  259         caddr_t bpos;
  260         int error = 0, rdonly;
  261         struct mbuf *mb, *mreq;
  262 
  263         NFSD_LOCK_ASSERT();
  264 
  265         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  266         fhp = &nfh.fh_generic;
  267         nfsm_srvmtofh(fhp);
  268         error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
  269         if (error) {
  270                 nfsm_reply(0);
  271                 error = 0;
  272                 goto nfsmout;
  273         }
  274         NFSD_UNLOCK();
  275         mtx_lock(&Giant);       /* VFS */
  276         error = VOP_GETATTR(vp, vap, cred, td);
  277         vput(vp);
  278         mtx_unlock(&Giant);     /* VFS */
  279         vp = NULL;
  280         NFSD_LOCK();
  281         nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
  282         if (error) {
  283                 error = 0;
  284                 goto nfsmout;
  285         }
  286         fp = nfsm_build(struct nfs_fattr *,
  287             NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
  288         nfsm_srvfillattr(vap, fp);
  289         /* fall through */
  290 
  291 nfsmout:
  292         NFSD_LOCK_ASSERT();
  293         if (vp) {
  294                 NFSD_UNLOCK();
  295                 mtx_lock(&Giant);       /* VFS */
  296                 vput(vp);
  297                 mtx_unlock(&Giant);     /* VFS */
  298                 NFSD_LOCK();
  299         }
  300         return(error);
  301 }
  302 
  303 /*
  304  * nfs setattr service
  305  */
  306 int
  307 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  308     struct thread *td, struct mbuf **mrq)
  309 {
  310         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  311         struct sockaddr *nam = nfsd->nd_nam;
  312         caddr_t dpos = nfsd->nd_dpos;
  313         struct ucred *cred = nfsd->nd_cr;
  314         struct vattr va, preat;
  315         struct vattr *vap = &va;
  316         struct nfsv2_sattr *sp;
  317         struct nfs_fattr *fp;
  318         struct vnode *vp = NULL;
  319         nfsfh_t nfh;
  320         fhandle_t *fhp;
  321         u_int32_t *tl;
  322         caddr_t bpos;
  323         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
  324         int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
  325         struct mbuf *mb, *mreq;
  326         struct timespec guard;
  327         struct mount *mp = NULL;
  328 
  329         NFSD_LOCK_ASSERT();
  330 
  331         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  332         fhp = &nfh.fh_generic;
  333         nfsm_srvmtofh(fhp);
  334         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
  335                 error = ESTALE;
  336                 goto out;
  337         }
  338         NFSD_UNLOCK();
  339         mtx_lock(&Giant);       /* VFS */
  340         (void) vn_start_write(NULL, &mp, V_WAIT);
  341         vfs_rel(mp);            /* The write holds a ref. */
  342         mtx_unlock(&Giant);     /* VFS */
  343         NFSD_LOCK();
  344         VATTR_NULL(vap);
  345         if (v3) {
  346                 nfsm_srvsattr(vap);
  347                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
  348                 gcheck = fxdr_unsigned(int, *tl);
  349                 if (gcheck) {
  350                         tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
  351                         fxdr_nfsv3time(tl, &guard);
  352                 }
  353         } else {
  354                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
  355                 /*
  356                  * Nah nah nah nah na nah
  357                  * There is a bug in the Sun client that puts 0xffff in the mode
  358                  * field of sattr when it should put in 0xffffffff. The u_short
  359                  * doesn't sign extend.
  360                  * --> check the low order 2 bytes for 0xffff
  361                  */
  362                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
  363                         vap->va_mode = nfstov_mode(sp->sa_mode);
  364                 if (sp->sa_uid != nfsrv_nfs_xdrneg1)
  365                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
  366                 if (sp->sa_gid != nfsrv_nfs_xdrneg1)
  367                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
  368                 if (sp->sa_size != nfsrv_nfs_xdrneg1)
  369                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
  370                 if (sp->sa_atime.nfsv2_sec != nfsrv_nfs_xdrneg1) {
  371 #ifdef notyet
  372                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
  373 #else
  374                         vap->va_atime.tv_sec =
  375                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
  376                         vap->va_atime.tv_nsec = 0;
  377 #endif
  378                 }
  379                 if (sp->sa_mtime.nfsv2_sec != nfsrv_nfs_xdrneg1)
  380                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
  381 
  382         }
  383 
  384         /*
  385          * Now that we have all the fields, lets do it.
  386          */
  387         error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
  388         if (error) {
  389                 nfsm_reply(2 * NFSX_UNSIGNED);
  390                 if (v3)
  391                         nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
  392                 error = 0;
  393                 goto nfsmout;
  394         }
  395 
  396         /*
  397          * vp now an active resource, pay careful attention to cleanup
  398          */
  399         if (v3) {
  400                 NFSD_UNLOCK();
  401                 mtx_lock(&Giant);       /* VFS */
  402                 error = preat_ret = VOP_GETATTR(vp, &preat, cred, td);
  403                 if (!error && gcheck &&
  404                         (preat.va_ctime.tv_sec != guard.tv_sec ||
  405                          preat.va_ctime.tv_nsec != guard.tv_nsec))
  406                         error = NFSERR_NOT_SYNC;
  407                 if (error) {
  408                         vput(vp);
  409                         mtx_unlock(&Giant);     /* VFS */
  410                         vp = NULL;
  411                         NFSD_LOCK();
  412                         nfsm_reply(NFSX_WCCDATA(v3));
  413                         if (v3)
  414                                 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
  415                         error = 0;
  416                         goto nfsmout;
  417                 }
  418                 mtx_unlock(&Giant);     /* VFS */
  419                 NFSD_LOCK();
  420         }
  421         NFSD_LOCK_ASSERT();
  422 
  423         /*
  424          * If the size is being changed write acces is required, otherwise
  425          * just check for a read only filesystem.
  426          */
  427         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
  428                 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
  429                         error = EROFS;
  430                         goto out;
  431                 }
  432         } else {
  433                 if (vp->v_type == VDIR) {
  434                         error = EISDIR;
  435                         goto out;
  436                 } else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
  437                         td, 0)) != 0)
  438                         goto out;
  439         }
  440         NFSD_UNLOCK();
  441         mtx_lock(&Giant);       /* VFS */
  442         error = VOP_SETATTR(vp, vap, cred, td);
  443         postat_ret = VOP_GETATTR(vp, vap, cred, td);
  444         mtx_unlock(&Giant);     /* VFS */
  445         NFSD_LOCK();
  446         if (!error)
  447                 error = postat_ret;
  448 out:
  449         NFSD_LOCK_ASSERT();
  450         if (vp != NULL) {
  451                 NFSD_UNLOCK();
  452                 mtx_lock(&Giant);       /* VFS */
  453                 vput(vp);
  454                 mtx_unlock(&Giant);     /* VFS */
  455                 NFSD_LOCK();
  456         }
  457 
  458         vp = NULL;
  459         nfsm_reply(NFSX_WCCORFATTR(v3));
  460         if (v3) {
  461                 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
  462         } else if (!error) {
  463                 /* v2 non-error case. */
  464                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
  465                 nfsm_srvfillattr(vap, fp);
  466         }
  467         error = 0;
  468         /* fall through */
  469 
  470 nfsmout:
  471         NFSD_LOCK_ASSERT();
  472         NFSD_UNLOCK();
  473         mtx_lock(&Giant);       /* VFS */
  474         if (vp)
  475                 vput(vp);
  476         vn_finished_write(mp);
  477         mtx_unlock(&Giant);     /* VFS */
  478         NFSD_LOCK();
  479         return(error);
  480 }
  481 
  482 /*
  483  * nfs lookup rpc
  484  */
  485 int
  486 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  487     struct thread *td, struct mbuf **mrq)
  488 {
  489         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  490         struct sockaddr *nam = nfsd->nd_nam;
  491         caddr_t dpos = nfsd->nd_dpos;
  492         struct ucred *cred = nfsd->nd_cr;
  493         struct nfs_fattr *fp;
  494         struct nameidata nd, ind, *ndp = &nd;
  495         struct vnode *vp, *dirp = NULL;
  496         nfsfh_t nfh;
  497         fhandle_t *fhp;
  498         caddr_t bpos;
  499         int error = 0, len, dirattr_ret = 1;
  500         int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
  501         struct mbuf *mb, *mreq;
  502         struct vattr va, dirattr, *vap = &va;
  503 
  504         NFSD_LOCK_ASSERT();
  505 
  506         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  507         ndclear(&nd);
  508 
  509         fhp = &nfh.fh_generic;
  510         nfsm_srvmtofh(fhp);
  511         nfsm_srvnamesiz(len);
  512 
  513         pubflag = nfs_ispublicfh(fhp);
  514 
  515         nd.ni_cnd.cn_cred = cred;
  516         nd.ni_cnd.cn_nameiop = LOOKUP;
  517         nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART;
  518         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
  519                 &dirp, v3, &dirattr, &dirattr_ret, td, pubflag);
  520 
  521         /*
  522          * namei failure, only dirp to cleanup.  Clear out garbarge from
  523          * structure in case macros jump to nfsmout.
  524          */
  525 
  526         NFSD_UNLOCK();
  527         mtx_lock(&Giant);       /* VFS */
  528         if (error) {
  529                 if (dirp) {
  530                         vrele(dirp);
  531                         dirp = NULL;
  532                 }
  533                 mtx_unlock(&Giant);     /* VFS */
  534                 NFSD_LOCK();
  535                 nfsm_reply(NFSX_POSTOPATTR(v3));
  536                 if (v3)
  537                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  538                 error = 0;
  539                 goto nfsmout;
  540         }
  541 
  542         /*
  543          * Locate index file for public filehandle
  544          *
  545          * error is 0 on entry and 0 on exit from this block.
  546          */
  547 
  548         if (pubflag) {
  549                 if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) {
  550                         /*
  551                          * Setup call to lookup() to see if we can find
  552                          * the index file. Arguably, this doesn't belong
  553                          * in a kernel.. Ugh.  If an error occurs, do not
  554                          * try to install an index file and then clear the
  555                          * error.
  556                          *
  557                          * When we replace nd with ind and redirect ndp,
  558                          * maintenance of ni_startdir and ni_vp shift to
  559                          * ind and we have to clean them up in the old nd.
  560                          * However, the cnd resource continues to be maintained
  561                          * via the original nd.  Confused?  You aren't alone!
  562                          */
  563                         ind = nd;
  564                         VOP_UNLOCK(nd.ni_vp, 0, td);
  565                         ind.ni_pathlen = strlen(nfs_pub.np_index);
  566                         ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf =
  567                             nfs_pub.np_index;
  568                         ind.ni_startdir = nd.ni_vp;
  569                         VREF(ind.ni_startdir);
  570 
  571                         error = lookup(&ind);
  572                         ind.ni_dvp = NULL;
  573                         if (ind.ni_cnd.cn_flags & GIANTHELD) {
  574                                 mtx_unlock(&Giant);
  575                                 ind.ni_cnd.cn_flags &= ~GIANTHELD;
  576                         }
  577 
  578                         if (error == 0) {
  579                                 /*
  580                                  * Found an index file. Get rid of
  581                                  * the old references.  transfer nd.ni_vp'
  582                                  */
  583                                 if (dirp)
  584                                         vrele(dirp);
  585                                 dirp = nd.ni_vp;
  586                                 nd.ni_vp = NULL;
  587                                 vrele(nd.ni_startdir);
  588                                 nd.ni_startdir = NULL;
  589                                 ndp = &ind;
  590                         }
  591                         error = 0;
  592                 }
  593                 /*
  594                  * If the public filehandle was used, check that this lookup
  595                  * didn't result in a filehandle outside the publicly exported
  596                  * filesystem.  We clear the poor vp here to avoid lockups due
  597                  * to NFS I/O.
  598                  */
  599 
  600                 if (ndp->ni_vp->v_mount != nfs_pub.np_mount) {
  601                         vput(nd.ni_vp);
  602                         nd.ni_vp = NULL;
  603                         error = EPERM;
  604                 }
  605         }
  606 
  607         /*
  608          * Resources at this point:
  609          *      ndp->ni_vp      may not be NULL
  610          */
  611 
  612         if (error) {
  613                 mtx_unlock(&Giant);     /* VFS */
  614                 NFSD_LOCK();
  615                 nfsm_reply(NFSX_POSTOPATTR(v3));
  616                 if (v3)
  617                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  618                 error = 0;
  619                 goto nfsmout;
  620         }
  621 
  622         /*
  623          * Get underlying attribute, then release remaining resources ( for
  624          * the same potential blocking reason ) and reply.
  625          */
  626         vp = ndp->ni_vp;
  627         bzero((caddr_t)fhp, sizeof(nfh));
  628         fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
  629         error = VFS_VPTOFH(vp, &fhp->fh_fid);
  630         if (!error)
  631                 error = VOP_GETATTR(vp, vap, cred, td);
  632 
  633         vput(vp);
  634         vrele(ndp->ni_startdir);
  635         vrele(dirp);
  636         ndp->ni_vp = NULL;
  637         ndp->ni_startdir = NULL;
  638         dirp = NULL;
  639         mtx_unlock(&Giant);     /* VFS */
  640         NFSD_LOCK();
  641         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3));
  642         if (error) {
  643                 if (v3)
  644                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  645                 error = 0;
  646                 goto nfsmout;
  647         }
  648         nfsm_srvfhtom(fhp, v3);
  649         if (v3) {
  650                 nfsm_srvpostop_attr(0, vap);
  651                 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  652         } else {
  653                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
  654                 nfsm_srvfillattr(vap, fp);
  655         }
  656 
  657 nfsmout:
  658         NFSD_LOCK_ASSERT();
  659         if (ndp->ni_vp || dirp || ndp->ni_startdir) {
  660                 NFSD_UNLOCK();
  661                 mtx_lock(&Giant);       /* VFS */
  662                 if (ndp->ni_vp)
  663                         vput(ndp->ni_vp);
  664                 if (dirp)
  665                         vrele(dirp);
  666                 if (ndp->ni_startdir)
  667                         vrele(ndp->ni_startdir);
  668                 mtx_unlock(&Giant);     /* VFS */
  669                 NFSD_LOCK();
  670         }
  671         NDFREE(&nd, NDF_ONLY_PNBUF);
  672         return (error);
  673 }
  674 
  675 /*
  676  * nfs readlink service
  677  */
  678 int
  679 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  680     struct thread *td, struct mbuf **mrq)
  681 {
  682         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  683         struct sockaddr *nam = nfsd->nd_nam;
  684         caddr_t dpos = nfsd->nd_dpos;
  685         struct ucred *cred = nfsd->nd_cr;
  686         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
  687         struct iovec *ivp = iv;
  688         struct mbuf *mp;
  689         u_int32_t *tl;
  690         caddr_t bpos;
  691         int error = 0, rdonly, i, tlen, len, getret;
  692         int v3 = (nfsd->nd_flag & ND_NFSV3);
  693         struct mbuf *mb, *mp3, *nmp, *mreq;
  694         struct vnode *vp = NULL;
  695         struct vattr attr;
  696         nfsfh_t nfh;
  697         fhandle_t *fhp;
  698         struct uio io, *uiop = &io;
  699 
  700         NFSD_LOCK_ASSERT();
  701 
  702         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  703 #ifndef nolint
  704         mp = NULL;
  705 #endif
  706         mp3 = NULL;
  707         fhp = &nfh.fh_generic;
  708         nfsm_srvmtofh(fhp);
  709         len = 0;
  710         i = 0;
  711         NFSD_UNLOCK();
  712         while (len < NFS_MAXPATHLEN) {
  713                 MGET(nmp, M_TRYWAIT, MT_DATA);
  714                 MCLGET(nmp, M_TRYWAIT);
  715                 nmp->m_len = NFSMSIZ(nmp);
  716                 if (len == 0)
  717                         mp3 = mp = nmp;
  718                 else {
  719                         mp->m_next = nmp;
  720                         mp = nmp;
  721                 }
  722                 if ((len + mp->m_len) > NFS_MAXPATHLEN) {
  723                         mp->m_len = NFS_MAXPATHLEN - len;
  724                         len = NFS_MAXPATHLEN;
  725                 } else
  726                         len += mp->m_len;
  727                 ivp->iov_base = mtod(mp, caddr_t);
  728                 ivp->iov_len = mp->m_len;
  729                 i++;
  730                 ivp++;
  731         }
  732         uiop->uio_iov = iv;
  733         uiop->uio_iovcnt = i;
  734         uiop->uio_offset = 0;
  735         uiop->uio_resid = len;
  736         uiop->uio_rw = UIO_READ;
  737         uiop->uio_segflg = UIO_SYSSPACE;
  738         uiop->uio_td = NULL;
  739         NFSD_LOCK();
  740         error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
  741         if (error) {
  742                 nfsm_reply(2 * NFSX_UNSIGNED);
  743                 if (v3)
  744                         nfsm_srvpostop_attr(1, NULL);
  745                 error = 0;
  746                 goto nfsmout;
  747         }
  748         NFSD_UNLOCK();
  749         mtx_lock(&Giant);       /* VFS */
  750         if (vp->v_type != VLNK) {
  751                 if (v3)
  752                         error = EINVAL;
  753                 else
  754                         error = ENXIO;
  755         } else 
  756                 error = VOP_READLINK(vp, uiop, cred);
  757         getret = VOP_GETATTR(vp, &attr, cred, td);
  758         vput(vp);
  759         mtx_unlock(&Giant);     /* VFS */
  760         vp = NULL;
  761         NFSD_LOCK();
  762         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
  763         if (v3)
  764                 nfsm_srvpostop_attr(getret, &attr);
  765         if (error) {
  766                 error = 0;
  767                 goto nfsmout;
  768         }
  769         if (uiop->uio_resid > 0) {
  770                 len -= uiop->uio_resid;
  771                 tlen = nfsm_rndup(len);
  772                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
  773         }
  774         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  775         *tl = txdr_unsigned(len);
  776         mb->m_next = mp3;
  777         mp3 = NULL;
  778 nfsmout:
  779         NFSD_LOCK_ASSERT();
  780         if (mp3)
  781                 m_freem(mp3);
  782         if (vp) {
  783                 NFSD_UNLOCK();
  784                 mtx_lock(&Giant);       /* VFS */
  785                 vput(vp);
  786                 mtx_unlock(&Giant);     /* VFS */
  787                 NFSD_LOCK();
  788         }
  789         return(error);
  790 }
  791 
  792 /*
  793  * nfs read service
  794  */
  795 int
  796 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  797     struct thread *td, struct mbuf **mrq)
  798 {
  799         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  800         struct sockaddr *nam = nfsd->nd_nam;
  801         caddr_t dpos = nfsd->nd_dpos;
  802         struct ucred *cred = nfsd->nd_cr;
  803         struct iovec *iv;
  804         struct iovec *iv2;
  805         struct mbuf *m;
  806         struct nfs_fattr *fp;
  807         u_int32_t *tl;
  808         int i;
  809         caddr_t bpos;
  810         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
  811         int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen;
  812         struct mbuf *mb, *mreq;
  813         struct mbuf *m2;
  814         struct vnode *vp = NULL;
  815         nfsfh_t nfh;
  816         fhandle_t *fhp;
  817         struct uio io, *uiop = &io;
  818         struct vattr va, *vap = &va;
  819         struct nfsheur *nh;
  820         off_t off;
  821         int ioflag = 0;
  822 
  823         NFSD_LOCK_ASSERT();
  824 
  825         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  826         fhp = &nfh.fh_generic;
  827         nfsm_srvmtofh(fhp);
  828         if (v3) {
  829                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
  830                 off = fxdr_hyper(tl);
  831         } else {
  832                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
  833                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
  834         }
  835         nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd));
  836 
  837         /*
  838          * Reference vp.  If an error occurs, vp will be invalid, but we
  839          * have to NULL it just in case.  The macros might goto nfsmout
  840          * as well.
  841          */
  842 
  843         error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
  844         if (error) {
  845                 vp = NULL;
  846                 nfsm_reply(2 * NFSX_UNSIGNED);
  847                 if (v3)
  848                         nfsm_srvpostop_attr(1, NULL);
  849                 error = 0;
  850                 goto nfsmout;
  851         }
  852 
  853         if (vp->v_type != VREG) {
  854                 if (v3)
  855                         error = EINVAL;
  856                 else
  857                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
  858         }
  859         NFSD_UNLOCK();
  860         mtx_lock(&Giant);       /* VFS */
  861         if (!error) {
  862                 if ((error = nfsrv_access_withgiant(vp, VREAD, cred, rdonly,
  863                     td, 1)) != 0)
  864                         error = nfsrv_access_withgiant(vp, VEXEC, cred,
  865                             rdonly, td, 1);
  866         }
  867         getret = VOP_GETATTR(vp, vap, cred, td);
  868         if (!error)
  869                 error = getret;
  870         if (error) {
  871                 vput(vp);
  872                 mtx_unlock(&Giant);     /* VFS */
  873                 vp = NULL;
  874                 NFSD_LOCK();
  875                 nfsm_reply(NFSX_POSTOPATTR(v3));
  876                 if (v3)
  877                         nfsm_srvpostop_attr(getret, vap);
  878                 error = 0;
  879                 goto nfsmout;
  880         }
  881         mtx_unlock(&Giant);     /* VFS */
  882         NFSD_LOCK();
  883 
  884         /*
  885          * Calculate byte count to read
  886          */
  887 
  888         if (off >= vap->va_size)
  889                 cnt = 0;
  890         else if ((off + reqlen) > vap->va_size)
  891                 cnt = vap->va_size - off;
  892         else
  893                 cnt = reqlen;
  894 
  895         /*
  896          * Calculate seqcount for heuristic
  897          */
  898 
  899         {
  900                 int hi;
  901                 int try = 32;
  902 
  903                 /*
  904                  * Locate best candidate
  905                  */
  906 
  907                 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
  908                 nh = &nfsheur[hi];
  909 
  910                 while (try--) {
  911                         if (nfsheur[hi].nh_vp == vp) {
  912                                 nh = &nfsheur[hi];
  913                                 break;
  914                         }
  915                         if (nfsheur[hi].nh_use > 0)
  916                                 --nfsheur[hi].nh_use;
  917                         hi = (hi + 1) % NUM_HEURISTIC;
  918                         if (nfsheur[hi].nh_use < nh->nh_use)
  919                                 nh = &nfsheur[hi];
  920                 }
  921 
  922                 if (nh->nh_vp != vp) {
  923                         nh->nh_vp = vp;
  924                         nh->nh_nextr = off;
  925                         nh->nh_use = NHUSE_INIT;
  926                         if (off == 0)
  927                                 nh->nh_seqcount = 4;
  928                         else
  929                                 nh->nh_seqcount = 1;
  930                 }
  931 
  932                 /*
  933                  * Calculate heuristic
  934                  */
  935 
  936                 if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
  937                         if (++nh->nh_seqcount > IO_SEQMAX)
  938                                 nh->nh_seqcount = IO_SEQMAX;
  939                 } else if (nh->nh_seqcount > 1) {
  940                         nh->nh_seqcount = 1;
  941                 } else {
  942                         nh->nh_seqcount = 0;
  943                 }
  944                 nh->nh_use += NHUSE_INC;
  945                 if (nh->nh_use > NHUSE_MAX)
  946                         nh->nh_use = NHUSE_MAX;
  947                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
  948         }
  949 
  950         nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
  951         if (v3) {
  952                 tl = nfsm_build(u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
  953                 *tl++ = nfsrv_nfs_true;
  954                 fp = (struct nfs_fattr *)tl;
  955                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
  956         } else {
  957                 tl = nfsm_build(u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
  958                 fp = (struct nfs_fattr *)tl;
  959                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
  960         }
  961         len = left = nfsm_rndup(cnt);
  962         NFSD_UNLOCK();
  963         if (cnt > 0) {
  964                 /*
  965                  * Generate the mbuf list with the uio_iov ref. to it.
  966                  */
  967                 i = 0;
  968                 m = m2 = mb;
  969                 while (left > 0) {
  970                         siz = min(M_TRAILINGSPACE(m), left);
  971                         if (siz > 0) {
  972                                 left -= siz;
  973                                 i++;
  974                         }
  975                         if (left > 0) {
  976                                 MGET(m, M_TRYWAIT, MT_DATA);
  977                                 MCLGET(m, M_TRYWAIT);
  978                                 m->m_len = 0;
  979                                 m2->m_next = m;
  980                                 m2 = m;
  981                         }
  982                 }
  983                 MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
  984                        M_TEMP, M_WAITOK);
  985                 uiop->uio_iov = iv2 = iv;
  986                 m = mb;
  987                 left = len;
  988                 i = 0;
  989                 while (left > 0) {
  990                         if (m == NULL)
  991                                 panic("nfsrv_read iov");
  992                         siz = min(M_TRAILINGSPACE(m), left);
  993                         if (siz > 0) {
  994                                 iv->iov_base = mtod(m, caddr_t) + m->m_len;
  995                                 iv->iov_len = siz;
  996                                 m->m_len += siz;
  997                                 left -= siz;
  998                                 iv++;
  999                                 i++;
 1000                         }
 1001                         m = m->m_next;
 1002                 }
 1003                 uiop->uio_iovcnt = i;
 1004                 uiop->uio_offset = off;
 1005                 uiop->uio_resid = len;
 1006                 uiop->uio_rw = UIO_READ;
 1007                 uiop->uio_segflg = UIO_SYSSPACE;
 1008                 mtx_lock(&Giant);       /* VFS */
 1009                 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
 1010                 off = uiop->uio_offset;
 1011                 nh->nh_nextr = off;
 1012                 FREE((caddr_t)iv2, M_TEMP);
 1013                 if (error || (getret = VOP_GETATTR(vp, vap, cred, td))) {
 1014                         if (!error)
 1015                                 error = getret;
 1016                         m_freem(mreq);
 1017                         vput(vp);
 1018                         mtx_unlock(&Giant);     /* VFS */
 1019                         NFSD_LOCK();
 1020                         vp = NULL;
 1021                         nfsm_reply(NFSX_POSTOPATTR(v3));
 1022                         if (v3)
 1023                                 nfsm_srvpostop_attr(getret, vap);
 1024                         error = 0;
 1025                         goto nfsmout;
 1026                 }
 1027         } else {
 1028                 uiop->uio_resid = 0;
 1029                 mtx_lock(&Giant);       /* VFS */
 1030         }
 1031         mtx_assert(&Giant, MA_OWNED);   /* VFS */
 1032         vput(vp);
 1033         mtx_unlock(&Giant);     /* VFS */
 1034         vp = NULL;
 1035         NFSD_LOCK();
 1036         nfsm_srvfillattr(vap, fp);
 1037         tlen = len - uiop->uio_resid;
 1038         cnt = cnt < tlen ? cnt : tlen;
 1039         tlen = nfsm_rndup(cnt);
 1040         if (len != tlen || tlen != cnt)
 1041                 nfsm_adj(mb, len - tlen, tlen - cnt);
 1042         if (v3) {
 1043                 *tl++ = txdr_unsigned(cnt);
 1044                 if (cnt < reqlen)
 1045                         *tl++ = nfsrv_nfs_true;
 1046                 else
 1047                         *tl++ = nfsrv_nfs_false;
 1048         }
 1049         *tl = txdr_unsigned(cnt);
 1050 nfsmout:
 1051         NFSD_LOCK_ASSERT();
 1052         if (vp) {
 1053                 NFSD_UNLOCK();
 1054                 mtx_lock(&Giant);       /* VFS */
 1055                 vput(vp);
 1056                 mtx_unlock(&Giant);     /* VFS */
 1057                 NFSD_LOCK();
 1058         }
 1059         return(error);
 1060 }
 1061 
 1062 /*
 1063  * nfs write service
 1064  */
 1065 int
 1066 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1067     struct thread *td, struct mbuf **mrq)
 1068 {
 1069         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1070         struct sockaddr *nam = nfsd->nd_nam;
 1071         caddr_t dpos = nfsd->nd_dpos;
 1072         struct ucred *cred = nfsd->nd_cr;
 1073         struct iovec *ivp;
 1074         int i, cnt;
 1075         struct mbuf *mp;
 1076         struct nfs_fattr *fp;
 1077         struct iovec *iv;
 1078         struct vattr va, forat;
 1079         struct vattr *vap = &va;
 1080         u_int32_t *tl;
 1081         caddr_t bpos;
 1082         int error = 0, rdonly, len, forat_ret = 1;
 1083         int ioflags, aftat_ret = 1, retlen = 0, zeroing, adjust;
 1084         int stable = NFSV3WRITE_FILESYNC;
 1085         int v3 = (nfsd->nd_flag & ND_NFSV3);
 1086         struct mbuf *mb, *mreq;
 1087         struct vnode *vp = NULL;
 1088         nfsfh_t nfh;
 1089         fhandle_t *fhp;
 1090         struct uio io, *uiop = &io;
 1091         off_t off;
 1092         struct mount *mntp = NULL;
 1093 
 1094         NFSD_LOCK_ASSERT();
 1095 
 1096         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1097         if (mrep == NULL) {
 1098                 *mrq = NULL;
 1099                 error = 0;
 1100                 goto nfsmout;
 1101         }
 1102         fhp = &nfh.fh_generic;
 1103         nfsm_srvmtofh(fhp);
 1104         if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 1105                 error = ESTALE;
 1106                 goto ereply;
 1107         }
 1108         NFSD_UNLOCK();
 1109         mtx_lock(&Giant);       /* VFS */
 1110         (void) vn_start_write(NULL, &mntp, V_WAIT);
 1111         vfs_rel(mntp);          /* The write holds a ref. */
 1112         mtx_unlock(&Giant);     /* VFS */
 1113         NFSD_LOCK();
 1114         if (v3) {
 1115                 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
 1116                 off = fxdr_hyper(tl);
 1117                 tl += 3;
 1118                 stable = fxdr_unsigned(int, *tl++);
 1119         } else {
 1120                 tl = nfsm_dissect_nonblock(u_int32_t *, 4 * NFSX_UNSIGNED);
 1121                 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
 1122                 tl += 2;
 1123                 if (nfs_async)
 1124                         stable = NFSV3WRITE_UNSTABLE;
 1125         }
 1126         retlen = len = fxdr_unsigned(int32_t, *tl);
 1127         cnt = i = 0;
 1128 
 1129         /*
 1130          * For NFS Version 2, it is not obvious what a write of zero length
 1131          * should do, but I might as well be consistent with Version 3,
 1132          * which is to return ok so long as there are no permission problems.
 1133          */
 1134         if (len > 0) {
 1135             zeroing = 1;
 1136             mp = mrep;
 1137             while (mp) {
 1138                 if (mp == md) {
 1139                         zeroing = 0;
 1140                         adjust = dpos - mtod(mp, caddr_t);
 1141                         mp->m_len -= adjust;
 1142                         if (mp->m_len > 0 && adjust > 0)
 1143                                 mp->m_data += adjust;
 1144                 }
 1145                 if (zeroing)
 1146                         mp->m_len = 0;
 1147                 else if (mp->m_len > 0) {
 1148                         i += mp->m_len;
 1149                         if (i > len) {
 1150                                 mp->m_len -= (i - len);
 1151                                 zeroing = 1;
 1152                         }
 1153                         if (mp->m_len > 0)
 1154                                 cnt++;
 1155                 }
 1156                 mp = mp->m_next;
 1157             }
 1158         }
 1159         if (len > NFS_MAXDATA || len < 0 || i < len) {
 1160                 error = EIO;
 1161                 nfsm_reply(2 * NFSX_UNSIGNED);
 1162                 if (v3)
 1163                         nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1164                 error = 0;
 1165                 goto nfsmout;
 1166         }
 1167         error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
 1168         if (error) {
 1169                 vp = NULL;
 1170                 nfsm_reply(2 * NFSX_UNSIGNED);
 1171                 if (v3)
 1172                         nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1173                 error = 0;
 1174                 goto nfsmout;
 1175         }
 1176         if (v3) {
 1177                 NFSD_UNLOCK();
 1178                 mtx_lock(&Giant);       /* VFS */
 1179                 forat_ret = VOP_GETATTR(vp, &forat, cred, td);
 1180                 mtx_unlock(&Giant);     /* VFS */
 1181                 NFSD_LOCK();
 1182         }
 1183         if (vp->v_type != VREG) {
 1184                 if (v3)
 1185                         error = EINVAL;
 1186                 else
 1187                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
 1188         }
 1189         if (!error)
 1190                 error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1);
 1191         if (error) {
 1192                 NFSD_UNLOCK();
 1193                 mtx_lock(&Giant);       /* VFS */
 1194                 vput(vp);
 1195                 mtx_unlock(&Giant);     /* VFS */
 1196                 NFSD_LOCK();
 1197                 vp = NULL;
 1198                 nfsm_reply(NFSX_WCCDATA(v3));
 1199                 if (v3)
 1200                         nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1201                 error = 0;
 1202                 goto nfsmout;
 1203         }
 1204 
 1205         NFSD_UNLOCK();
 1206         if (len > 0) {
 1207             MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
 1208                 M_WAITOK);
 1209             uiop->uio_iov = iv = ivp;
 1210             uiop->uio_iovcnt = cnt;
 1211             mp = mrep;
 1212             while (mp) {
 1213                 if (mp->m_len > 0) {
 1214                         ivp->iov_base = mtod(mp, caddr_t);
 1215                         ivp->iov_len = mp->m_len;
 1216                         ivp++;
 1217                 }
 1218                 mp = mp->m_next;
 1219             }
 1220 
 1221             /*
 1222              * XXX
 1223              * The IO_METASYNC flag indicates that all metadata (and not just
 1224              * enough to ensure data integrity) mus be written to stable storage
 1225              * synchronously.
 1226              * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
 1227              */
 1228             if (stable == NFSV3WRITE_UNSTABLE)
 1229                 ioflags = IO_NODELOCKED;
 1230             else if (stable == NFSV3WRITE_DATASYNC)
 1231                 ioflags = (IO_SYNC | IO_NODELOCKED);
 1232             else
 1233                 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
 1234             uiop->uio_resid = len;
 1235             uiop->uio_rw = UIO_WRITE;
 1236             uiop->uio_segflg = UIO_SYSSPACE;
 1237             uiop->uio_td = NULL;
 1238             uiop->uio_offset = off;
 1239             mtx_lock(&Giant);   /* VFS */
 1240             error = VOP_WRITE(vp, uiop, ioflags, cred);
 1241             /* XXXRW: unlocked write. */
 1242             nfsrvstats.srvvop_writes++;
 1243             FREE((caddr_t)iv, M_TEMP);
 1244         } else
 1245             mtx_lock(&Giant);   /* VFS */
 1246         mtx_assert(&Giant, MA_OWNED);   /* VFS */
 1247         aftat_ret = VOP_GETATTR(vp, vap, cred, td);
 1248         vput(vp);
 1249         mtx_unlock(&Giant);     /* VFS */
 1250         NFSD_LOCK();
 1251         vp = NULL;
 1252         if (!error)
 1253                 error = aftat_ret;
 1254 ereply:
 1255         NFSD_LOCK_ASSERT();
 1256         nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
 1257                 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
 1258         if (v3) {
 1259                 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1260                 if (error) {
 1261                         error = 0;
 1262                         goto nfsmout;
 1263                 }
 1264                 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 1265                 *tl++ = txdr_unsigned(retlen);
 1266                 /*
 1267                  * If nfs_async is set, then pretend the write was FILESYNC.
 1268                  */
 1269                 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
 1270                         *tl++ = txdr_unsigned(stable);
 1271                 else
 1272                         *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
 1273                 /*
 1274                  * Actually, there is no need to txdr these fields,
 1275                  * but it may make the values more human readable,
 1276                  * for debugging purposes.
 1277                  */
 1278                 if (nfsver.tv_sec == 0)
 1279                         nfsver = boottime;
 1280                 *tl++ = txdr_unsigned(nfsver.tv_sec);
 1281                 *tl = txdr_unsigned(nfsver.tv_usec);
 1282         } else if (!error) {
 1283                 /* v2 non-error case. */
 1284                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 1285                 nfsm_srvfillattr(vap, fp);
 1286         }
 1287         error = 0;
 1288 nfsmout:
 1289         NFSD_LOCK_ASSERT();
 1290         NFSD_UNLOCK();
 1291         mtx_lock(&Giant);       /* VFS */
 1292         if (vp)
 1293                 vput(vp);
 1294         vn_finished_write(mntp);
 1295         mtx_unlock(&Giant);     /* VFS */
 1296         NFSD_LOCK();
 1297         return(error);
 1298 }
 1299 
 1300 /*
 1301  * For the purposes of write gathering, we must decide if the credential
 1302  * associated with two pending requests have equivilent privileges.  Since
 1303  * NFS only uses a subset of the BSD ucred -- the effective uid and group
 1304  * IDs -- we have a compare routine that checks only the relevant fields.
 1305  */
 1306 static int
 1307 nfsrv_samecred(struct ucred *cr1, struct ucred *cr2)
 1308 {
 1309         int i;
 1310 
 1311         if (cr1->cr_uid != cr2->cr_uid)
 1312                 return (0);
 1313         if (cr1->cr_ngroups != cr2->cr_ngroups)
 1314                 return (0);
 1315         for (i = 0; i < cr1->cr_ngroups; i++) {
 1316                 if (cr1->cr_groups[i] != cr2->cr_groups[i])
 1317                         return (0);
 1318         }
 1319         return (1);
 1320 }
 1321 
 1322 /*
 1323  * NFS write service with write gathering support. Called when
 1324  * nfsrvw_procrastinate > 0.
 1325  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
 1326  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
 1327  * Jan. 1994.
 1328  */
 1329 int
 1330 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
 1331     struct thread *td, struct mbuf **mrq)
 1332 {
 1333         struct iovec *ivp;
 1334         struct mbuf *mp;
 1335         struct nfsrv_descript *wp, *nfsd, *owp, *swp;
 1336         struct nfs_fattr *fp;
 1337         int i;
 1338         struct iovec *iov;
 1339         struct nfsrvw_delayhash *wpp;
 1340         struct ucred *cred;
 1341         struct vattr va, forat;
 1342         u_int32_t *tl;
 1343         caddr_t bpos, dpos;
 1344         int error = 0, rdonly, len, forat_ret = 1;
 1345         int ioflags, aftat_ret = 1, s, adjust, v3, zeroing;
 1346         struct mbuf *mb, *mreq, *mrep, *md;
 1347         struct vnode *vp = NULL;
 1348         struct uio io, *uiop = &io;
 1349         u_quad_t cur_usec;
 1350         struct mount *mntp = NULL;
 1351 
 1352         NFSD_LOCK_ASSERT();
 1353 
 1354         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1355 #ifndef nolint
 1356         i = 0;
 1357         len = 0;
 1358 #endif
 1359         *mrq = NULL;
 1360         if (*ndp) {
 1361             nfsd = *ndp;
 1362             *ndp = NULL;
 1363             mrep = nfsd->nd_mrep;
 1364             md = nfsd->nd_md;
 1365             dpos = nfsd->nd_dpos;
 1366             cred = nfsd->nd_cr;
 1367             v3 = (nfsd->nd_flag & ND_NFSV3);
 1368             LIST_INIT(&nfsd->nd_coalesce);
 1369             nfsd->nd_mreq = NULL;
 1370             nfsd->nd_stable = NFSV3WRITE_FILESYNC;
 1371             cur_usec = nfs_curusec();
 1372             nfsd->nd_time = cur_usec +
 1373                 (v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
 1374 
 1375             /*
 1376              * Now, get the write header..
 1377              */
 1378             nfsm_srvmtofh(&nfsd->nd_fh);
 1379             if (v3) {
 1380                 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
 1381                 nfsd->nd_off = fxdr_hyper(tl);
 1382                 tl += 3;
 1383                 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
 1384             } else {
 1385                 tl = nfsm_dissect_nonblock(u_int32_t *, 4 * NFSX_UNSIGNED);
 1386                 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
 1387                 tl += 2;
 1388                 if (nfs_async)
 1389                         nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
 1390             }
 1391             len = fxdr_unsigned(int32_t, *tl);
 1392             nfsd->nd_len = len;
 1393             nfsd->nd_eoff = nfsd->nd_off + len;
 1394 
 1395             /*
 1396              * Trim the header out of the mbuf list and trim off any trailing
 1397              * junk so that the mbuf list has only the write data.
 1398              */
 1399             zeroing = 1;
 1400             i = 0;
 1401             mp = mrep;
 1402             while (mp) {
 1403                 if (mp == md) {
 1404                     zeroing = 0;
 1405                     adjust = dpos - mtod(mp, caddr_t);
 1406                     mp->m_len -= adjust;
 1407                     if (mp->m_len > 0 && adjust > 0)
 1408                         mp->m_data += adjust;
 1409                 }
 1410                 if (zeroing)
 1411                     mp->m_len = 0;
 1412                 else {
 1413                     i += mp->m_len;
 1414                     if (i > len) {
 1415                         mp->m_len -= (i - len);
 1416                         zeroing = 1;
 1417                     }
 1418                 }
 1419                 mp = mp->m_next;
 1420             }
 1421             if (len > NFS_MAXDATA || len < 0  || i < len) {
 1422 nfsmout:
 1423                 NFSD_LOCK_ASSERT();
 1424                 m_freem(mrep);
 1425                 error = EIO;
 1426                 nfsm_writereply(2 * NFSX_UNSIGNED);
 1427                 if (v3)
 1428                     nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
 1429                 nfsd->nd_mreq = mreq;
 1430                 nfsd->nd_mrep = NULL;
 1431                 nfsd->nd_time = 0;
 1432             }
 1433 
 1434             /*
 1435              * Add this entry to the hash and time queues.
 1436              */
 1437             s = splsoftclock();
 1438             owp = NULL;
 1439             wp = LIST_FIRST(&slp->ns_tq);
 1440             while (wp && wp->nd_time < nfsd->nd_time) {
 1441                 owp = wp;
 1442                 wp = LIST_NEXT(wp, nd_tq);
 1443             }
 1444             NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
 1445             if (owp) {
 1446                 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
 1447             } else {
 1448                 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
 1449             }
 1450             if (nfsd->nd_mrep) {
 1451                 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
 1452                 owp = NULL;
 1453                 wp = LIST_FIRST(wpp);
 1454                 while (wp &&
 1455                     bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh, NFSX_V3FH)){
 1456                     owp = wp;
 1457                     wp = LIST_NEXT(wp, nd_hash);
 1458                 }
 1459                 while (wp && wp->nd_off < nfsd->nd_off &&
 1460                     !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh, NFSX_V3FH)) {
 1461                     owp = wp;
 1462                     wp = LIST_NEXT(wp, nd_hash);
 1463                 }
 1464                 if (owp) {
 1465                     LIST_INSERT_AFTER(owp, nfsd, nd_hash);
 1466 
 1467                     /*
 1468                      * Search the hash list for overlapping entries and
 1469                      * coalesce.
 1470                      */
 1471                     for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
 1472                         wp = LIST_NEXT(nfsd, nd_hash);
 1473                         if (nfsrv_samecred(owp->nd_cr, nfsd->nd_cr))
 1474                             nfsrvw_coalesce(owp, nfsd);
 1475                     }
 1476                 } else {
 1477                     LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
 1478                 }
 1479             }
 1480             splx(s);
 1481         }
 1482 
 1483         /*
 1484          * Now, do VOP_WRITE()s for any one(s) that need to be done now
 1485          * and generate the associated reply mbuf list(s).
 1486          */
 1487 loop1:
 1488         cur_usec = nfs_curusec();
 1489         s = splsoftclock();
 1490         for (nfsd = LIST_FIRST(&slp->ns_tq); nfsd; nfsd = owp) {
 1491                 owp = LIST_NEXT(nfsd, nd_tq);
 1492                 if (nfsd->nd_time > cur_usec)
 1493                     break;
 1494                 if (nfsd->nd_mreq)
 1495                     continue;
 1496                 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
 1497                 LIST_REMOVE(nfsd, nd_tq);
 1498                 LIST_REMOVE(nfsd, nd_hash);
 1499                 splx(s);
 1500                 mrep = nfsd->nd_mrep;
 1501                 nfsd->nd_mrep = NULL;
 1502                 cred = nfsd->nd_cr;
 1503                 v3 = (nfsd->nd_flag & ND_NFSV3);
 1504                 forat_ret = aftat_ret = 1;
 1505                 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp,
 1506                     nfsd->nd_nam, &rdonly, TRUE);
 1507                 if (!error) {
 1508                     if (v3) {
 1509                         NFSD_UNLOCK();
 1510                         mtx_lock(&Giant);       /* VFS */
 1511                         forat_ret = VOP_GETATTR(vp, &forat, cred, td);
 1512                         mtx_unlock(&Giant);     /* VFS */
 1513                         NFSD_LOCK();
 1514                     }
 1515                     if (vp->v_type != VREG) {
 1516                         if (v3)
 1517                             error = EINVAL;
 1518                         else
 1519                             error = (vp->v_type == VDIR) ? EISDIR : EACCES;
 1520                     }
 1521                 } else {
 1522                     vp = NULL;
 1523                 }
 1524                 NFSD_UNLOCK();
 1525                 mtx_lock(&Giant);       /* VFS */
 1526                 if (!error)
 1527                     error = nfsrv_access_withgiant(vp, VWRITE, cred, rdonly,
 1528                         td, 1);
 1529                 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
 1530                     ioflags = IO_NODELOCKED;
 1531                 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
 1532                     ioflags = (IO_SYNC | IO_NODELOCKED);
 1533                 else
 1534                     ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
 1535                 uiop->uio_rw = UIO_WRITE;
 1536                 uiop->uio_segflg = UIO_SYSSPACE;
 1537                 uiop->uio_td = NULL;
 1538                 uiop->uio_offset = nfsd->nd_off;
 1539                 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
 1540                 if (uiop->uio_resid > 0) {
 1541                     mp = mrep;
 1542                     i = 0;
 1543                     while (mp) {
 1544                         if (mp->m_len > 0)
 1545                             i++;
 1546                         mp = mp->m_next;
 1547                     }
 1548                     uiop->uio_iovcnt = i;
 1549                     MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
 1550                         M_TEMP, M_WAITOK);
 1551                     uiop->uio_iov = ivp = iov;
 1552                     mp = mrep;
 1553                     while (mp) {
 1554                         if (mp->m_len > 0) {
 1555                             ivp->iov_base = mtod(mp, caddr_t);
 1556                             ivp->iov_len = mp->m_len;
 1557                             ivp++;
 1558                         }
 1559                         mp = mp->m_next;
 1560                     }
 1561                     if (!error) {
 1562                         if (vn_start_write(vp, &mntp, V_NOWAIT) != 0) {
 1563                             VOP_UNLOCK(vp, 0, td);
 1564                             error = vn_start_write(NULL, &mntp, V_WAIT);
 1565                             vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1566                         }
 1567                     }
 1568                     if (!error) {
 1569                         error = VOP_WRITE(vp, uiop, ioflags, cred);
 1570                         /* XXXRW: unlocked write. */
 1571                         nfsrvstats.srvvop_writes++;
 1572                         vn_finished_write(mntp);
 1573                     }
 1574                     FREE((caddr_t)iov, M_TEMP);
 1575                 }
 1576                 m_freem(mrep);
 1577                 if (vp) {
 1578                     aftat_ret = VOP_GETATTR(vp, &va, cred, td);
 1579                     vput(vp);
 1580                     vp = NULL;
 1581                 }
 1582                 mtx_unlock(&Giant);     /* VFS */
 1583                 NFSD_LOCK();
 1584 
 1585                 /*
 1586                  * Loop around generating replies for all write rpcs that have
 1587                  * now been completed.
 1588                  */
 1589                 swp = nfsd;
 1590                 do {
 1591                     NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
 1592                     if (error) {
 1593                         nfsm_writereply(NFSX_WCCDATA(v3));
 1594                         if (v3) {
 1595                             nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
 1596                         }
 1597                     } else {
 1598                         nfsm_writereply(NFSX_PREOPATTR(v3) +
 1599                             NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED +
 1600                             NFSX_WRITEVERF(v3));
 1601                         if (v3) {
 1602                             nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
 1603                             tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 1604                             *tl++ = txdr_unsigned(nfsd->nd_len);
 1605                             *tl++ = txdr_unsigned(swp->nd_stable);
 1606                             /*
 1607                              * Actually, there is no need to txdr these fields,
 1608                              * but it may make the values more human readable,
 1609                              * for debugging purposes.
 1610                              */
 1611                             if (nfsver.tv_sec == 0)
 1612                                     nfsver = boottime;
 1613                             *tl++ = txdr_unsigned(nfsver.tv_sec);
 1614                             *tl = txdr_unsigned(nfsver.tv_usec);
 1615                         } else {
 1616                             fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 1617                             nfsm_srvfillattr(&va, fp);
 1618                         }
 1619                     }
 1620                     nfsd->nd_mreq = mreq;
 1621                     if (nfsd->nd_mrep)
 1622                         panic("nfsrv_write: nd_mrep not free");
 1623 
 1624                     /*
 1625                      * Done. Put it at the head of the timer queue so that
 1626                      * the final phase can return the reply.
 1627                      */
 1628                     s = splsoftclock();
 1629                     if (nfsd != swp) {
 1630                         nfsd->nd_time = 0;
 1631                         LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
 1632                     }
 1633                     nfsd = LIST_FIRST(&swp->nd_coalesce);
 1634                     if (nfsd) {
 1635                         LIST_REMOVE(nfsd, nd_tq);
 1636                     }
 1637                     splx(s);
 1638                 } while (nfsd);
 1639                 s = splsoftclock();
 1640                 swp->nd_time = 0;
 1641                 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
 1642                 splx(s);
 1643                 goto loop1;
 1644         }
 1645         splx(s);
 1646 
 1647         /*
 1648          * Search for a reply to return.
 1649          */
 1650         s = splsoftclock();
 1651         LIST_FOREACH(nfsd, &slp->ns_tq, nd_tq)
 1652                 if (nfsd->nd_mreq) {
 1653                     NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
 1654                     LIST_REMOVE(nfsd, nd_tq);
 1655                     *mrq = nfsd->nd_mreq;
 1656                     *ndp = nfsd;
 1657                     break;
 1658                 }
 1659         splx(s);
 1660         return (0);
 1661 }
 1662 
 1663 /*
 1664  * Coalesce the write request nfsd into owp. To do this we must:
 1665  * - remove nfsd from the queues
 1666  * - merge nfsd->nd_mrep into owp->nd_mrep
 1667  * - update the nd_eoff and nd_stable for owp
 1668  * - put nfsd on owp's nd_coalesce list
 1669  * NB: Must be called at splsoftclock().
 1670  */
 1671 static void
 1672 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
 1673 {
 1674         int overlap;
 1675         struct mbuf *mp;
 1676         struct nfsrv_descript *p;
 1677 
 1678         NFSD_LOCK_ASSERT();
 1679 
 1680         NFS_DPF(WG, ("C%03x-%03x",
 1681                      nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
 1682         LIST_REMOVE(nfsd, nd_hash);
 1683         LIST_REMOVE(nfsd, nd_tq);
 1684         if (owp->nd_eoff < nfsd->nd_eoff) {
 1685             overlap = owp->nd_eoff - nfsd->nd_off;
 1686             if (overlap < 0)
 1687                 panic("nfsrv_coalesce: bad off");
 1688             if (overlap > 0)
 1689                 m_adj(nfsd->nd_mrep, overlap);
 1690             mp = owp->nd_mrep;
 1691             while (mp->m_next)
 1692                 mp = mp->m_next;
 1693             mp->m_next = nfsd->nd_mrep;
 1694             owp->nd_eoff = nfsd->nd_eoff;
 1695         } else
 1696             m_freem(nfsd->nd_mrep);
 1697         nfsd->nd_mrep = NULL;
 1698         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
 1699             owp->nd_stable = NFSV3WRITE_FILESYNC;
 1700         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
 1701             owp->nd_stable == NFSV3WRITE_UNSTABLE)
 1702             owp->nd_stable = NFSV3WRITE_DATASYNC;
 1703         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
 1704 
 1705         /*
 1706          * If nfsd had anything else coalesced into it, transfer them
 1707          * to owp, otherwise their replies will never get sent.
 1708          */
 1709         for (p = LIST_FIRST(&nfsd->nd_coalesce); p;
 1710              p = LIST_FIRST(&nfsd->nd_coalesce)) {
 1711             LIST_REMOVE(p, nd_tq);
 1712             LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
 1713         }
 1714 }
 1715 
 1716 /*
 1717  * nfs create service
 1718  * now does a truncate to 0 length via. setattr if it already exists
 1719  */
 1720 int
 1721 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1722     struct thread *td, struct mbuf **mrq)
 1723 {
 1724         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1725         struct sockaddr *nam = nfsd->nd_nam;
 1726         caddr_t dpos = nfsd->nd_dpos;
 1727         struct ucred *cred = nfsd->nd_cr;
 1728         struct nfs_fattr *fp;
 1729         struct vattr va, dirfor, diraft;
 1730         struct vattr *vap = &va;
 1731         struct nfsv2_sattr *sp;
 1732         u_int32_t *tl;
 1733         struct nameidata nd;
 1734         caddr_t bpos;
 1735         int error = 0, rdev, len, tsize, dirfor_ret = 1, diraft_ret = 1;
 1736         int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0;
 1737         caddr_t cp;
 1738         struct mbuf *mb, *mreq;
 1739         struct vnode *dirp = NULL;
 1740         nfsfh_t nfh;
 1741         fhandle_t *fhp;
 1742         u_quad_t tempsize;
 1743         u_char cverf[NFSX_V3CREATEVERF];
 1744         struct mount *mp = NULL;
 1745 
 1746         NFSD_LOCK_ASSERT();
 1747 
 1748         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1749 #ifndef nolint
 1750         rdev = 0;
 1751 #endif
 1752         ndclear(&nd);
 1753 
 1754         fhp = &nfh.fh_generic;
 1755         nfsm_srvmtofh(fhp);
 1756         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 1757                 error = ESTALE;
 1758                 goto ereply_locked;
 1759         }
 1760         NFSD_UNLOCK();
 1761         mtx_lock(&Giant);       /* VFS */
 1762         (void) vn_start_write(NULL, &mp, V_WAIT);
 1763         vfs_rel(mp);            /* The write holds a ref. */
 1764         mtx_unlock(&Giant);     /* VFS */
 1765         NFSD_LOCK();
 1766         nfsm_srvnamesiz(len);
 1767 
 1768         nd.ni_cnd.cn_cred = cred;
 1769         nd.ni_cnd.cn_nameiop = CREATE;
 1770         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
 1771 
 1772         /*
 1773          * Call namei and do initial cleanup to get a few things
 1774          * out of the way.  If we get an initial error we cleanup
 1775          * and return here to avoid special-casing the invalid nd
 1776          * structure through the rest of the case.  dirp may be
 1777          * set even if an error occurs, but the nd structure will not
 1778          * be valid at all if an error occurs so we have to invalidate it
 1779          * prior to calling nfsm_reply ( which might goto nfsmout ).
 1780          */
 1781         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 1782                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 1783         if (dirp && !v3) {
 1784                 NFSD_UNLOCK();
 1785                 mtx_lock(&Giant);       /* VFS */
 1786                 vrele(dirp);
 1787                 mtx_unlock(&Giant);     /* VFS */
 1788                 NFSD_LOCK();
 1789                 dirp = NULL;
 1790         }
 1791         if (error) {
 1792                 nfsm_reply(NFSX_WCCDATA(v3));
 1793                 if (v3)
 1794                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1795                 error = 0;
 1796                 goto nfsmout;
 1797         }
 1798 
 1799         /*
 1800          * No error.  Continue.  State:
 1801          *
 1802          *      startdir        is valid ( we release this immediately )
 1803          *      dirp            may be valid
 1804          *      nd.ni_vp        may be valid
 1805          *      nd.ni_dvp       is valid
 1806          *
 1807          * The error state is set through the code and we may also do some
 1808          * opportunistic releasing of vnodes to avoid holding locks through
 1809          * NFS I/O.  The cleanup at the end is a catch-all
 1810          */
 1811 
 1812         VATTR_NULL(vap);
 1813         if (v3) {
 1814                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
 1815                 how = fxdr_unsigned(int, *tl);
 1816                 switch (how) {
 1817                 case NFSV3CREATE_GUARDED:
 1818                         if (nd.ni_vp) {
 1819                                 error = EEXIST;
 1820                                 break;
 1821                         }
 1822                         /* fall through */
 1823                 case NFSV3CREATE_UNCHECKED:
 1824                         nfsm_srvsattr(vap);
 1825                         break;
 1826                 case NFSV3CREATE_EXCLUSIVE:
 1827                         cp = nfsm_dissect_nonblock(caddr_t, NFSX_V3CREATEVERF);
 1828                         bcopy(cp, cverf, NFSX_V3CREATEVERF);
 1829                         exclusive_flag = 1;
 1830                         break;
 1831                 };
 1832                 vap->va_type = VREG;
 1833         } else {
 1834                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
 1835                 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
 1836                 if (vap->va_type == VNON)
 1837                         vap->va_type = VREG;
 1838                 vap->va_mode = nfstov_mode(sp->sa_mode);
 1839                 switch (vap->va_type) {
 1840                 case VREG:
 1841                         tsize = fxdr_unsigned(int32_t, sp->sa_size);
 1842                         if (tsize != -1)
 1843                                 vap->va_size = (u_quad_t)tsize;
 1844                         break;
 1845                 case VCHR:
 1846                 case VBLK:
 1847                 case VFIFO:
 1848                         rdev = fxdr_unsigned(long, sp->sa_size);
 1849                         break;
 1850                 default:
 1851                         break;
 1852                 };
 1853         }
 1854 
 1855         /*
 1856          * Iff doesn't exist, create it
 1857          * otherwise just truncate to 0 length
 1858          *   should I set the mode too ?
 1859          *
 1860          * The only possible error we can have at this point is EEXIST.
 1861          * nd.ni_vp will also be non-NULL in that case.
 1862          */
 1863         NFSD_UNLOCK();
 1864         mtx_lock(&Giant);       /* VFS */
 1865         if (nd.ni_vp == NULL) {
 1866                 if (vap->va_mode == (mode_t)VNOVAL)
 1867                         vap->va_mode = 0;
 1868                 if (vap->va_type == VREG || vap->va_type == VSOCK) {
 1869                         error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 1870                         if (error)
 1871                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1872                         else {
 1873                                 if (exclusive_flag) {
 1874                                         exclusive_flag = 0;
 1875                                         VATTR_NULL(vap);
 1876                                         bcopy(cverf, (caddr_t)&vap->va_atime,
 1877                                                 NFSX_V3CREATEVERF);
 1878                                         error = VOP_SETATTR(nd.ni_vp, vap, cred,
 1879                                                 td);
 1880                                 }
 1881                         }
 1882                 } else if (vap->va_type == VCHR || vap->va_type == VBLK ||
 1883                     vap->va_type == VFIFO) {
 1884                         /*
 1885                          * NFSv2-specific code for creating device nodes
 1886                          * and fifos.
 1887                          *
 1888                          * Handle SysV FIFO node special cases.  All other
 1889                          * devices require super user to access.
 1890                          */
 1891                         if (vap->va_type == VCHR && rdev == 0xffffffff)
 1892                                 vap->va_type = VFIFO;
 1893                         if (vap->va_type != VFIFO &&
 1894                             (error = suser_cred(cred, 0))) {
 1895                                 goto ereply;
 1896                         }
 1897                         vap->va_rdev = rdev;
 1898                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 1899                         if (error) {
 1900                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1901                                 goto ereply;
 1902                         }
 1903                         vput(nd.ni_vp);
 1904                         nd.ni_vp = NULL;
 1905 
 1906                         /*
 1907                          * release dvp prior to lookup
 1908                          */
 1909                         vput(nd.ni_dvp);
 1910                         nd.ni_dvp = NULL;
 1911 
 1912                         /*
 1913                          * Setup for lookup.
 1914                          *
 1915                          * Even though LOCKPARENT was cleared, ni_dvp may
 1916                          * be garbage.
 1917                          */
 1918                         nd.ni_cnd.cn_nameiop = LOOKUP;
 1919                         nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
 1920                         nd.ni_cnd.cn_thread = td;
 1921                         nd.ni_cnd.cn_cred = cred;
 1922 
 1923                         error = lookup(&nd);
 1924                         nd.ni_dvp = NULL;
 1925                         if (nd.ni_cnd.cn_flags & GIANTHELD) {
 1926                                 mtx_unlock(&Giant);
 1927                                 nd.ni_cnd.cn_flags &= ~GIANTHELD;
 1928                         }
 1929                         if (error)
 1930                                 goto ereply;
 1931 
 1932                         if (nd.ni_cnd.cn_flags & ISSYMLINK) {
 1933                                 error = EINVAL;
 1934                                 goto ereply;
 1935                         }
 1936                 } else {
 1937                         error = ENXIO;
 1938                 }
 1939         } else {
 1940                 if (vap->va_size != -1) {
 1941                         error = nfsrv_access_withgiant(nd.ni_vp, VWRITE,
 1942                             cred, (nd.ni_cnd.cn_flags & RDONLY), td, 0);
 1943                         if (!error) {
 1944                                 tempsize = vap->va_size;
 1945                                 VATTR_NULL(vap);
 1946                                 vap->va_size = tempsize;
 1947                                 error = VOP_SETATTR(nd.ni_vp, vap, cred,
 1948                                          td);
 1949                         }
 1950                 }
 1951         }
 1952 
 1953         if (!error) {
 1954                 bzero((caddr_t)fhp, sizeof(nfh));
 1955                 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
 1956                 error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
 1957                 if (!error)
 1958                         error = VOP_GETATTR(nd.ni_vp, vap, cred, td);
 1959         }
 1960         if (v3) {
 1961                 if (exclusive_flag && !error &&
 1962                         bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
 1963                         error = EEXIST;
 1964                 if (dirp == nd.ni_dvp)
 1965                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 1966                 else {
 1967                         /* Drop the other locks to avoid deadlock. */
 1968                         if (nd.ni_dvp) {
 1969                                 if (nd.ni_dvp == nd.ni_vp)
 1970                                         vrele(nd.ni_dvp);
 1971                                 else
 1972                                         vput(nd.ni_dvp);
 1973                         }
 1974                         if (nd.ni_vp)
 1975                                 vput(nd.ni_vp);
 1976                         nd.ni_dvp = NULL;
 1977                         nd.ni_vp = NULL;
 1978 
 1979                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 1980                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 1981                         VOP_UNLOCK(dirp, 0, td);
 1982                 }
 1983         }
 1984 ereply:
 1985         NFSD_UNLOCK_ASSERT();
 1986         mtx_unlock(&Giant);     /* VFS */
 1987         NFSD_LOCK();
 1988 ereply_locked:
 1989         NFSD_LOCK_ASSERT();
 1990         nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
 1991         if (v3) {
 1992                 if (!error) {
 1993                         nfsm_srvpostop_fh(fhp);
 1994                         nfsm_srvpostop_attr(0, vap);
 1995                 }
 1996                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1997         } else if (!error) {
 1998                 /* v2 non-error case. */
 1999                 nfsm_srvfhtom(fhp, v3);
 2000                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 2001                 nfsm_srvfillattr(vap, fp);
 2002         }
 2003         error = 0;
 2004 
 2005 nfsmout:
 2006         NFSD_LOCK_ASSERT();
 2007         NFSD_UNLOCK();
 2008         mtx_lock(&Giant);       /* VFS */
 2009         if (nd.ni_dvp) {
 2010                 if (nd.ni_dvp == nd.ni_vp)
 2011                         vrele(nd.ni_dvp);
 2012                 else
 2013                         vput(nd.ni_dvp);
 2014         }
 2015         if (nd.ni_vp)
 2016                 vput(nd.ni_vp);
 2017         if (nd.ni_startdir) {
 2018                 vrele(nd.ni_startdir);
 2019                 nd.ni_startdir = NULL;
 2020         }
 2021         if (dirp)
 2022                 vrele(dirp);
 2023         NDFREE(&nd, NDF_ONLY_PNBUF);
 2024         vn_finished_write(mp);
 2025         mtx_unlock(&Giant);     /* VFS */
 2026         NFSD_LOCK();
 2027         return (error);
 2028 }
 2029 
 2030 /*
 2031  * nfs v3 mknod service
 2032  */
 2033 int
 2034 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2035     struct thread *td, struct mbuf **mrq)
 2036 {
 2037         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2038         struct sockaddr *nam = nfsd->nd_nam;
 2039         caddr_t dpos = nfsd->nd_dpos;
 2040         struct ucred *cred = nfsd->nd_cr;
 2041         struct vattr va, dirfor, diraft;
 2042         struct vattr *vap = &va;
 2043         u_int32_t *tl;
 2044         struct nameidata nd;
 2045         caddr_t bpos;
 2046         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 2047         u_int32_t major, minor;
 2048         enum vtype vtyp;
 2049         struct mbuf *mb, *mreq;
 2050         struct vnode *vp, *dirp = NULL;
 2051         nfsfh_t nfh;
 2052         fhandle_t *fhp;
 2053         struct mount *mp = NULL;
 2054         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2055 
 2056         NFSD_LOCK_ASSERT();
 2057 
 2058         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2059         if (!v3)
 2060                 panic("nfsrv_mknod: v3 proc called on a v2 connection");
 2061         ndclear(&nd);
 2062 
 2063         fhp = &nfh.fh_generic;
 2064         nfsm_srvmtofh(fhp);
 2065         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2066                 error = ESTALE;
 2067                 goto ereply;
 2068         }
 2069         NFSD_UNLOCK();
 2070         mtx_lock(&Giant);       /* VFS */
 2071         (void) vn_start_write(NULL, &mp, V_WAIT);
 2072         vfs_rel(mp);            /* The write holds a ref. */
 2073         mtx_unlock(&Giant);     /* VFS */
 2074         NFSD_LOCK();
 2075         nfsm_srvnamesiz(len);
 2076 
 2077         nd.ni_cnd.cn_cred = cred;
 2078         nd.ni_cnd.cn_nameiop = CREATE;
 2079         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
 2080 
 2081         /*
 2082          * Handle nfs_namei() call.  If an error occurs, the nd structure
 2083          * is not valid.  However, nfsm_*() routines may still jump to
 2084          * nfsmout.
 2085          */
 2086 
 2087         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 2088                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 2089         if (error) {
 2090                 nfsm_reply(NFSX_WCCDATA(1));
 2091                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2092                 error = 0;
 2093                 goto nfsmout;
 2094         }
 2095         tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
 2096         vtyp = nfsv3tov_type(*tl);
 2097         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
 2098                 NFSD_UNLOCK();
 2099                 mtx_lock(&Giant);       /* VFS */
 2100                 error = NFSERR_BADTYPE;
 2101                 goto out;
 2102         }
 2103         VATTR_NULL(vap);
 2104         nfsm_srvsattr(vap);
 2105         if (vtyp == VCHR || vtyp == VBLK) {
 2106                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
 2107                 major = fxdr_unsigned(u_int32_t, *tl++);
 2108                 minor = fxdr_unsigned(u_int32_t, *tl);
 2109                 vap->va_rdev = makedev(major, minor);
 2110         }
 2111 
 2112         /*
 2113          * Iff doesn't exist, create it.
 2114          */
 2115         if (nd.ni_vp) {
 2116                 NFSD_UNLOCK();
 2117                 mtx_lock(&Giant);       /* VFS */
 2118                 error = EEXIST;
 2119                 goto out;
 2120         }
 2121         vap->va_type = vtyp;
 2122         if (vap->va_mode == (mode_t)VNOVAL)
 2123                 vap->va_mode = 0;
 2124         NFSD_UNLOCK();
 2125         mtx_lock(&Giant);       /* VFS */
 2126         if (vtyp == VSOCK) {
 2127                 vrele(nd.ni_startdir);
 2128                 nd.ni_startdir = NULL;
 2129                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 2130                 if (error)
 2131                         NDFREE(&nd, NDF_ONLY_PNBUF);
 2132         } else {
 2133                 if (vtyp != VFIFO && (error = suser_cred(cred, 0)))
 2134                         goto out;
 2135                 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 2136                 if (error) {
 2137                         NDFREE(&nd, NDF_ONLY_PNBUF);
 2138                         goto out;
 2139                 }
 2140                 vput(nd.ni_vp);
 2141                 nd.ni_vp = NULL;
 2142 
 2143                 /*
 2144                  * Release dvp prior to lookup
 2145                  */
 2146                 vput(nd.ni_dvp);
 2147                 nd.ni_dvp = NULL;
 2148 
 2149                 nd.ni_cnd.cn_nameiop = LOOKUP;
 2150                 nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
 2151                 nd.ni_cnd.cn_thread = td;
 2152                 nd.ni_cnd.cn_cred = td->td_ucred;
 2153 
 2154                 error = lookup(&nd);
 2155                 nd.ni_dvp = NULL;
 2156                 if (nd.ni_cnd.cn_flags & GIANTHELD) {
 2157                         mtx_unlock(&Giant);
 2158                         nd.ni_cnd.cn_flags &= ~GIANTHELD;
 2159                 }
 2160 
 2161                 if (error)
 2162                         goto out;
 2163                 if (nd.ni_cnd.cn_flags & ISSYMLINK)
 2164                         error = EINVAL;
 2165         }
 2166 
 2167         /*
 2168          * send response, cleanup, return.
 2169          */
 2170 out:
 2171         NFSD_UNLOCK_ASSERT();
 2172         vp = nd.ni_vp;
 2173         if (!error) {
 2174                 bzero((caddr_t)fhp, sizeof(nfh));
 2175                 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 2176                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
 2177                 if (!error)
 2178                         error = VOP_GETATTR(vp, vap, cred, td);
 2179         }
 2180         if (nd.ni_dvp) {
 2181                 if (nd.ni_dvp == nd.ni_vp)
 2182                         vrele(nd.ni_dvp);
 2183                 else
 2184                         vput(nd.ni_dvp);
 2185                 nd.ni_dvp = NULL;
 2186         }
 2187         if (vp) {
 2188                 vput(vp);
 2189                 vp = NULL;
 2190                 nd.ni_vp = NULL;
 2191         }
 2192         if (nd.ni_startdir) {
 2193                 vrele(nd.ni_startdir);
 2194                 nd.ni_startdir = NULL;
 2195         }
 2196         NDFREE(&nd, NDF_ONLY_PNBUF);
 2197         if (dirp) {
 2198                 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2199                 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2200                 VOP_UNLOCK(dirp, 0, td);
 2201         }
 2202         mtx_unlock(&Giant);     /* VFS */
 2203         NFSD_LOCK();
 2204 ereply:
 2205         NFSD_LOCK_ASSERT();
 2206         nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
 2207         if (v3) {
 2208                 if (!error) {
 2209                         nfsm_srvpostop_fh(fhp);
 2210                         nfsm_srvpostop_attr(0, vap);
 2211                 }
 2212                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2213         }
 2214         NFSD_UNLOCK();
 2215         mtx_lock(&Giant);       /* VFS */
 2216         vn_finished_write(mp);
 2217         mtx_unlock(&Giant);     /* VFS */
 2218         NFSD_LOCK();
 2219         return (0);
 2220 nfsmout:
 2221         NFSD_LOCK_ASSERT();
 2222         NFSD_UNLOCK();
 2223         mtx_lock(&Giant);       /* VFS */
 2224         if (nd.ni_dvp) {
 2225                 if (nd.ni_dvp == nd.ni_vp)
 2226                         vrele(nd.ni_dvp);
 2227                 else
 2228                         vput(nd.ni_dvp);
 2229         }
 2230         if (nd.ni_vp)
 2231                 vput(nd.ni_vp);
 2232         if (dirp)
 2233                 vrele(dirp);
 2234         if (nd.ni_startdir)
 2235                 vrele(nd.ni_startdir);
 2236         NDFREE(&nd, NDF_ONLY_PNBUF);
 2237         vn_finished_write(mp);
 2238         mtx_unlock(&Giant);     /* VFS */
 2239         NFSD_LOCK();
 2240         return (error);
 2241 }
 2242 
 2243 /*
 2244  * nfs remove service
 2245  */
 2246 int
 2247 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2248     struct thread *td, struct mbuf **mrq)
 2249 {
 2250         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2251         struct sockaddr *nam = nfsd->nd_nam;
 2252         caddr_t dpos = nfsd->nd_dpos;
 2253         struct ucred *cred = nfsd->nd_cr;
 2254         struct nameidata nd;
 2255         caddr_t bpos;
 2256         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 2257         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2258         struct mbuf *mb, *mreq;
 2259         struct vnode *dirp;
 2260         struct vattr dirfor, diraft;
 2261         nfsfh_t nfh;
 2262         fhandle_t *fhp;
 2263         struct mount *mp = NULL;
 2264 
 2265         NFSD_LOCK_ASSERT();
 2266 
 2267         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2268         ndclear(&nd);
 2269 
 2270         fhp = &nfh.fh_generic;
 2271         nfsm_srvmtofh(fhp);
 2272         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2273                 error = ESTALE;
 2274                 goto ereply;
 2275         }
 2276         NFSD_UNLOCK();
 2277         mtx_lock(&Giant);       /* VFS */
 2278         (void) vn_start_write(NULL, &mp, V_WAIT);
 2279         vfs_rel(mp);            /* The write holds a ref. */
 2280         mtx_unlock(&Giant);     /* VFS */
 2281         NFSD_LOCK();
 2282         nfsm_srvnamesiz(len);
 2283 
 2284         nd.ni_cnd.cn_cred = cred;
 2285         nd.ni_cnd.cn_nameiop = DELETE;
 2286         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
 2287         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 2288                 &dirp, v3,  &dirfor, &dirfor_ret, td, FALSE);
 2289         NFSD_UNLOCK();
 2290         mtx_lock(&Giant);       /* VFS */
 2291         if (dirp && !v3) {
 2292                 vrele(dirp);
 2293                 dirp = NULL;
 2294         }
 2295         if (error == 0) {
 2296                 if (nd.ni_vp->v_type == VDIR) {
 2297                         error = EPERM;          /* POSIX */
 2298                         goto out;
 2299                 }
 2300                 /*
 2301                  * The root of a mounted filesystem cannot be deleted.
 2302                  */
 2303                 if (nd.ni_vp->v_vflag & VV_ROOT) {
 2304                         error = EBUSY;
 2305                         goto out;
 2306                 }
 2307 out:
 2308                 NFSD_UNLOCK_ASSERT();
 2309                 if (!error) {
 2310                         error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 2311                         NDFREE(&nd, NDF_ONLY_PNBUF);
 2312                 }
 2313         }
 2314         if (dirp && v3) {
 2315                 if (dirp == nd.ni_dvp)
 2316                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2317                 else {
 2318                         /* Drop the other locks to avoid deadlock. */
 2319                         if (nd.ni_dvp) {
 2320                                 if (nd.ni_dvp == nd.ni_vp)
 2321                                         vrele(nd.ni_dvp);
 2322                                 else
 2323                                         vput(nd.ni_dvp);
 2324                         }
 2325                         if (nd.ni_vp)
 2326                                 vput(nd.ni_vp);
 2327                         nd.ni_dvp = NULL;
 2328                         nd.ni_vp = NULL;
 2329 
 2330                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2331                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2332                         VOP_UNLOCK(dirp, 0, td);
 2333                 }
 2334                 vrele(dirp);
 2335                 dirp = NULL;
 2336         }
 2337         mtx_unlock(&Giant);     /* VFS */
 2338         NFSD_LOCK();
 2339 ereply:
 2340         NFSD_LOCK_ASSERT();
 2341         nfsm_reply(NFSX_WCCDATA(v3));
 2342         if (v3) {
 2343                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2344                 error = 0;
 2345         }
 2346 nfsmout:
 2347         NFSD_LOCK_ASSERT();
 2348         NFSD_UNLOCK();
 2349         mtx_lock(&Giant);       /* VFS */
 2350         NDFREE(&nd, NDF_ONLY_PNBUF);
 2351         if (nd.ni_dvp) {
 2352                 if (nd.ni_dvp == nd.ni_vp)
 2353                         vrele(nd.ni_dvp);
 2354                 else
 2355                         vput(nd.ni_dvp);
 2356         }
 2357         if (nd.ni_vp)
 2358                 vput(nd.ni_vp);
 2359         vn_finished_write(mp);
 2360         mtx_unlock(&Giant);     /* VFS */
 2361         NFSD_LOCK();
 2362         return(error);
 2363 }
 2364 
 2365 /*
 2366  * nfs rename service
 2367  */
 2368 int
 2369 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2370     struct thread *td, struct mbuf **mrq)
 2371 {
 2372         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2373         struct sockaddr *nam = nfsd->nd_nam;
 2374         caddr_t dpos = nfsd->nd_dpos;
 2375         struct ucred *cred = nfsd->nd_cr;
 2376         caddr_t bpos;
 2377         int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
 2378         int tdirfor_ret = 1, tdiraft_ret = 1;
 2379         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2380         struct mbuf *mb, *mreq;
 2381         struct nameidata fromnd, tond;
 2382         struct vnode *fvp, *tvp, *tdvp, *fdirp = NULL;
 2383         struct vnode *tdirp = NULL;
 2384         struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
 2385         nfsfh_t fnfh, tnfh;
 2386         fhandle_t *ffhp, *tfhp;
 2387         uid_t saved_uid;
 2388         struct mount *mp = NULL;
 2389 
 2390         NFSD_LOCK_ASSERT();
 2391 
 2392         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2393 #ifndef nolint
 2394         fvp = NULL;
 2395 #endif
 2396         ffhp = &fnfh.fh_generic;
 2397         tfhp = &tnfh.fh_generic;
 2398 
 2399         /*
 2400          * Clear fields incase goto nfsmout occurs from macro.
 2401          */
 2402 
 2403         ndclear(&fromnd);
 2404         ndclear(&tond);
 2405 
 2406         nfsm_srvmtofh(ffhp);
 2407         if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL) {
 2408                 error = ESTALE;
 2409                 goto out1;
 2410         }
 2411         NFSD_UNLOCK();
 2412         mtx_lock(&Giant);
 2413         (void) vn_start_write(NULL, &mp, V_WAIT);
 2414         vfs_rel(mp);            /* The write holds a ref. */
 2415         mtx_unlock(&Giant);
 2416         NFSD_LOCK();
 2417         nfsm_srvnamesiz(len);
 2418         /*
 2419          * Remember our original uid so that we can reset cr_uid before
 2420          * the second nfs_namei() call, in case it is remapped.
 2421          */
 2422         saved_uid = cred->cr_uid;
 2423         fromnd.ni_cnd.cn_cred = cred;
 2424         fromnd.ni_cnd.cn_nameiop = DELETE;
 2425         fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART;
 2426         error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md,
 2427                 &dpos, &fdirp, v3, &fdirfor, &fdirfor_ret, td, FALSE);
 2428         if (fdirp && !v3) {
 2429                 NFSD_UNLOCK();
 2430                 mtx_lock(&Giant);       /* VFS */
 2431                 vrele(fdirp);
 2432                 mtx_unlock(&Giant);     /* VFS */
 2433                 NFSD_LOCK();
 2434                 fdirp = NULL;
 2435         }
 2436         if (error) {
 2437                 nfsm_reply(2 * NFSX_WCCDATA(v3));
 2438                 if (v3) {
 2439                         nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
 2440                         nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
 2441                 }
 2442                 error = 0;
 2443                 goto nfsmout;
 2444         }
 2445         fvp = fromnd.ni_vp;
 2446         nfsm_srvmtofh(tfhp);
 2447         nfsm_srvnamesiz(len2);
 2448         cred->cr_uid = saved_uid;
 2449         tond.ni_cnd.cn_cred = cred;
 2450         tond.ni_cnd.cn_nameiop = RENAME;
 2451         tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
 2452         error = nfs_namei(&tond, tfhp, len2, slp, nam, &md,
 2453                 &dpos, &tdirp, v3, &tdirfor, &tdirfor_ret, td, FALSE);
 2454         NFSD_UNLOCK();
 2455         mtx_lock(&Giant);       /* VFS */
 2456         if (tdirp && !v3) {
 2457                 vrele(tdirp);
 2458                 tdirp = NULL;
 2459         }
 2460         if (error) {
 2461                 mtx_unlock(&Giant);     /* VFS */
 2462                 NFSD_LOCK();
 2463                 goto out1;
 2464         }
 2465 
 2466         tdvp = tond.ni_dvp;
 2467         tvp = tond.ni_vp;
 2468         if (tvp != NULL) {
 2469                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 2470                         if (v3)
 2471                                 error = EEXIST;
 2472                         else
 2473                                 error = EISDIR;
 2474                         goto out;
 2475                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 2476                         if (v3)
 2477                                 error = EEXIST;
 2478                         else
 2479                                 error = ENOTDIR;
 2480                         goto out;
 2481                 }
 2482                 if (tvp->v_type == VDIR && tvp->v_mountedhere) {
 2483                         if (v3)
 2484                                 error = EXDEV;
 2485                         else
 2486                                 error = ENOTEMPTY;
 2487                         goto out;
 2488                 }
 2489         }
 2490         if (fvp->v_type == VDIR && fvp->v_mountedhere) {
 2491                 if (v3)
 2492                         error = EXDEV;
 2493                 else
 2494                         error = ENOTEMPTY;
 2495                 goto out;
 2496         }
 2497         if (fvp->v_mount != tdvp->v_mount) {
 2498                 if (v3)
 2499                         error = EXDEV;
 2500                 else
 2501                         error = ENOTEMPTY;
 2502                 goto out;
 2503         }
 2504         if (fvp == tdvp) {
 2505                 if (v3)
 2506                         error = EINVAL;
 2507                 else
 2508                         error = ENOTEMPTY;
 2509         }
 2510         /*
 2511          * If source is the same as the destination (that is the
 2512          * same vnode with the same name in the same directory),
 2513          * then there is nothing to do.
 2514          */
 2515         if (fvp == tvp && fromnd.ni_dvp == tdvp &&
 2516             fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
 2517             !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
 2518               fromnd.ni_cnd.cn_namelen))
 2519                 error = -1;
 2520 out:
 2521         NFSD_UNLOCK_ASSERT();
 2522         if (!error) {
 2523                 /*
 2524                  * The VOP_RENAME function releases all vnode references &
 2525                  * locks prior to returning so we need to clear the pointers
 2526                  * to bypass cleanup code later on.
 2527                  */
 2528                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 2529                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 2530                 fromnd.ni_dvp = NULL;
 2531                 fromnd.ni_vp = NULL;
 2532                 tond.ni_dvp = NULL;
 2533                 tond.ni_vp = NULL;
 2534                 if (error) {
 2535                         NDFREE(&fromnd, NDF_ONLY_PNBUF);
 2536                         NDFREE(&tond, NDF_ONLY_PNBUF);
 2537                 }
 2538         } else {
 2539                 if (error == -1)
 2540                         error = 0;
 2541         }
 2542         /* fall through */
 2543 
 2544         mtx_unlock(&Giant);     /* VFS */
 2545         NFSD_LOCK();
 2546 out1:
 2547         NFSD_LOCK_ASSERT();
 2548         nfsm_reply(2 * NFSX_WCCDATA(v3));
 2549         if (v3) {
 2550                 /* Release existing locks to prevent deadlock. */
 2551                 NFSD_UNLOCK();
 2552                 mtx_lock(&Giant);       /* VFS */
 2553                 if (tond.ni_dvp) {
 2554                         if (tond.ni_dvp == tond.ni_vp)
 2555                                 vrele(tond.ni_dvp);
 2556                         else
 2557                                 vput(tond.ni_dvp);
 2558                 }
 2559                 if (tond.ni_vp)
 2560                         vput(tond.ni_vp);
 2561                 tond.ni_dvp = NULL;
 2562                 tond.ni_vp = NULL;
 2563 
 2564                 if (fdirp) {
 2565                         vn_lock(fdirp, LK_EXCLUSIVE | LK_RETRY, td);
 2566                         fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, td);
 2567                         VOP_UNLOCK(fdirp, 0, td);
 2568                 }
 2569                 if (tdirp) {
 2570                         vn_lock(tdirp, LK_EXCLUSIVE | LK_RETRY, td);
 2571                         tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, td);
 2572                         VOP_UNLOCK(tdirp, 0, td);
 2573                 }
 2574                 mtx_unlock(&Giant);     /* VFS */
 2575                 NFSD_LOCK();
 2576                 nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
 2577                 nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
 2578         }
 2579         error = 0;
 2580         /* fall through */
 2581 
 2582 nfsmout:
 2583         /*
 2584          * Clear out tond related fields
 2585          */
 2586         NFSD_LOCK_ASSERT();
 2587         NFSD_UNLOCK();
 2588         mtx_lock(&Giant);       /* VFS */
 2589         if (tond.ni_dvp) {
 2590                 if (tond.ni_dvp == tond.ni_vp)
 2591                         vrele(tond.ni_dvp);
 2592                 else
 2593                         vput(tond.ni_dvp);
 2594         }
 2595         if (tond.ni_vp)
 2596                 vput(tond.ni_vp);
 2597         if (tdirp)
 2598                 vrele(tdirp);
 2599         if (tond.ni_startdir)
 2600                 vrele(tond.ni_startdir);
 2601         NDFREE(&tond, NDF_ONLY_PNBUF);
 2602         /*
 2603          * Clear out fromnd related fields
 2604          */
 2605         if (fdirp)
 2606                 vrele(fdirp);
 2607         if (fromnd.ni_startdir)
 2608                 vrele(fromnd.ni_startdir);
 2609         NDFREE(&fromnd, NDF_ONLY_PNBUF);
 2610         if (fromnd.ni_dvp)
 2611                 vrele(fromnd.ni_dvp);
 2612         if (fromnd.ni_vp)
 2613                 vrele(fromnd.ni_vp);
 2614 
 2615         vn_finished_write(mp);
 2616         mtx_unlock(&Giant);     /* VFS */
 2617         NFSD_LOCK();
 2618         return (error);
 2619 }
 2620 
 2621 /*
 2622  * nfs link service
 2623  */
 2624 int
 2625 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2626     struct thread *td, struct mbuf **mrq)
 2627 {
 2628         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2629         struct sockaddr *nam = nfsd->nd_nam;
 2630         caddr_t dpos = nfsd->nd_dpos;
 2631         struct ucred *cred = nfsd->nd_cr;
 2632         struct nameidata nd;
 2633         caddr_t bpos;
 2634         int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
 2635         int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3);
 2636         struct mbuf *mb, *mreq;
 2637         struct vnode *vp = NULL, *xp, *dirp = NULL;
 2638         struct vattr dirfor, diraft, at;
 2639         nfsfh_t nfh, dnfh;
 2640         fhandle_t *fhp, *dfhp;
 2641         struct mount *mp = NULL;
 2642 
 2643         NFSD_LOCK_ASSERT();
 2644 
 2645         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2646         ndclear(&nd);
 2647 
 2648         fhp = &nfh.fh_generic;
 2649         dfhp = &dnfh.fh_generic;
 2650         nfsm_srvmtofh(fhp);
 2651         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2652                 error = ESTALE;
 2653                 goto ereply;
 2654         }
 2655         NFSD_UNLOCK();
 2656         mtx_lock(&Giant);       /* VFS */
 2657         (void) vn_start_write(NULL, &mp, V_WAIT);
 2658         vfs_rel(mp);            /* The write holds a ref. */
 2659         mtx_unlock(&Giant);     /* VFS */
 2660         NFSD_LOCK();
 2661         nfsm_srvmtofh(dfhp);
 2662         nfsm_srvnamesiz(len);
 2663 
 2664         error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, slp, nam, &rdonly, TRUE);
 2665         if (error) {
 2666                 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2667                 if (v3) {
 2668                         nfsm_srvpostop_attr(getret, &at);
 2669                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2670                 }
 2671                 vp = NULL;
 2672                 error = 0;
 2673                 goto nfsmout;
 2674         }
 2675         NFSD_UNLOCK();
 2676         mtx_lock(&Giant);       /* VFS */
 2677         if (v3)
 2678                 getret = VOP_GETATTR(vp, &at, cred, td);
 2679         if (vp->v_type == VDIR) {
 2680                 error = EPERM;          /* POSIX */
 2681                 goto out1;
 2682         }
 2683         VOP_UNLOCK(vp, 0, td);
 2684         nd.ni_cnd.cn_cred = cred;
 2685         nd.ni_cnd.cn_nameiop = CREATE;
 2686         nd.ni_cnd.cn_flags = LOCKPARENT;
 2687         mtx_unlock(&Giant);     /* VFS */
 2688         NFSD_LOCK();
 2689         error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos,
 2690                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 2691         NFSD_UNLOCK();
 2692         mtx_lock(&Giant);       /* VFS */
 2693         if (dirp && !v3) {
 2694                 vrele(dirp);
 2695                 dirp = NULL;
 2696         }
 2697         if (error) {
 2698                 vrele(vp);
 2699                 vp = NULL;
 2700                 goto out2;
 2701         }
 2702         xp = nd.ni_vp;
 2703         if (xp != NULL) {
 2704                 error = EEXIST;
 2705                 vrele(vp);
 2706                 vp = NULL;
 2707                 goto out2;
 2708         }
 2709         xp = nd.ni_dvp;
 2710         if (vp->v_mount != xp->v_mount) {
 2711                 error = EXDEV;
 2712                 vrele(vp);
 2713                 vp = NULL;
 2714                 goto out2;
 2715         }
 2716         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 2717         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 2718         NDFREE(&nd, NDF_ONLY_PNBUF);
 2719         /* fall through */
 2720 
 2721 out1:
 2722         if (v3)
 2723                 getret = VOP_GETATTR(vp, &at, cred, td);
 2724 out2:
 2725         if (dirp) {
 2726                 if (dirp == nd.ni_dvp)
 2727                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2728                 else {
 2729                         /* Release existing locks to prevent deadlock. */
 2730                         if (nd.ni_dvp) {
 2731                                 if (nd.ni_dvp == nd.ni_vp)
 2732                                         vrele(nd.ni_dvp);
 2733                                 else
 2734                                         vput(nd.ni_dvp);
 2735                         }
 2736                         if (nd.ni_vp)
 2737                                 vrele(nd.ni_vp);
 2738                         nd.ni_dvp = NULL;
 2739                         nd.ni_vp = NULL;
 2740 
 2741                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2742                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2743                         VOP_UNLOCK(dirp, 0, td);
 2744                 }
 2745         }
 2746         mtx_unlock(&Giant);     /* VFS */
 2747         NFSD_LOCK();
 2748 ereply:
 2749         NFSD_LOCK_ASSERT();
 2750         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2751         if (v3) {
 2752                 nfsm_srvpostop_attr(getret, &at);
 2753                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2754                 error = 0;
 2755         }
 2756         /* fall through */
 2757 
 2758 nfsmout:
 2759         NFSD_LOCK_ASSERT();
 2760         NFSD_UNLOCK();
 2761         mtx_lock(&Giant);       /* VFS */
 2762         NDFREE(&nd, NDF_ONLY_PNBUF);
 2763         if (vp)
 2764                 vput(vp);
 2765         if (nd.ni_dvp) {
 2766                 if (nd.ni_dvp == nd.ni_vp)
 2767                         vrele(nd.ni_dvp);
 2768                 else
 2769                         vput(nd.ni_dvp);
 2770         }
 2771         if (dirp)
 2772                 vrele(dirp);
 2773         if (nd.ni_vp)
 2774                 vrele(nd.ni_vp);
 2775         vn_finished_write(mp);
 2776         mtx_unlock(&Giant);     /* VFS */
 2777         NFSD_LOCK();
 2778         return(error);
 2779 }
 2780 
 2781 /*
 2782  * nfs symbolic link service
 2783  */
 2784 int
 2785 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2786     struct thread *td, struct mbuf **mrq)
 2787 {
 2788         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2789         struct sockaddr *nam = nfsd->nd_nam;
 2790         caddr_t dpos = nfsd->nd_dpos;
 2791         struct ucred *cred = nfsd->nd_cr;
 2792         struct vattr va, dirfor, diraft;
 2793         struct nameidata nd;
 2794         struct vattr *vap = &va;
 2795         struct nfsv2_sattr *sp;
 2796         char *bpos, *pathcp = NULL;
 2797         struct uio io;
 2798         struct iovec iv;
 2799         int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
 2800         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2801         struct mbuf *mb, *mreq;
 2802         struct vnode *dirp = NULL;
 2803         nfsfh_t nfh;
 2804         fhandle_t *fhp;
 2805         struct mount *mp = NULL;
 2806 
 2807         NFSD_LOCK_ASSERT();
 2808 
 2809         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2810         ndclear(&nd);
 2811 
 2812         fhp = &nfh.fh_generic;
 2813         nfsm_srvmtofh(fhp);
 2814         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2815                 NFSD_UNLOCK();
 2816                 mtx_lock(&Giant);       /* VFS */
 2817                 error = ESTALE;
 2818                 goto out;
 2819         }
 2820         NFSD_UNLOCK();
 2821         mtx_lock(&Giant);       /* VFS */
 2822         (void) vn_start_write(NULL, &mp, V_WAIT);
 2823         vfs_rel(mp);            /* The write holds a ref. */
 2824         mtx_unlock(&Giant);     /* VFS */
 2825         NFSD_LOCK();
 2826         nfsm_srvnamesiz(len);
 2827         nd.ni_cnd.cn_cred = cred;
 2828         nd.ni_cnd.cn_nameiop = CREATE;
 2829         nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART;
 2830         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 2831                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 2832         if (error == 0) {
 2833                 VATTR_NULL(vap);
 2834                 if (v3)
 2835                         nfsm_srvsattr(vap);
 2836                 nfsm_srvpathsiz(len2);
 2837         }
 2838         NFSD_UNLOCK();
 2839         mtx_lock(&Giant);       /* VFS */
 2840         if (dirp && !v3) {
 2841                 vrele(dirp);
 2842                 dirp = NULL;
 2843         }
 2844         if (error)
 2845                 goto out;
 2846         MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
 2847         iv.iov_base = pathcp;
 2848         iv.iov_len = len2;
 2849         io.uio_resid = len2;
 2850         io.uio_offset = 0;
 2851         io.uio_iov = &iv;
 2852         io.uio_iovcnt = 1;
 2853         io.uio_segflg = UIO_SYSSPACE;
 2854         io.uio_rw = UIO_READ;
 2855         io.uio_td = NULL;
 2856         nfsm_mtouio(&io, len2);
 2857         if (!v3) {
 2858                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
 2859                 vap->va_mode = nfstov_mode(sp->sa_mode);
 2860         }
 2861         *(pathcp + len2) = '\0';
 2862         if (nd.ni_vp) {
 2863                 error = EEXIST;
 2864                 goto out;
 2865         }
 2866 
 2867         /*
 2868          * issue symlink op.  SAVESTART is set so the underlying path component
 2869          * is only freed by the VOP if an error occurs.
 2870          */
 2871         if (vap->va_mode == (mode_t)VNOVAL)
 2872                 vap->va_mode = 0;
 2873         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
 2874         if (error)
 2875                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2876         else
 2877                 vput(nd.ni_vp);
 2878         nd.ni_vp = NULL;
 2879         /*
 2880          * releases directory prior to potential lookup op.
 2881          */
 2882         vput(nd.ni_dvp);
 2883         nd.ni_dvp = NULL;
 2884 
 2885         if (error == 0) {
 2886             if (v3) {
 2887                 /*
 2888                  * Issue lookup.  Leave SAVESTART set so we can easily free
 2889                  * the name buffer later on.
 2890                  *
 2891                  * since LOCKPARENT is not set, ni_dvp will be garbage on
 2892                  * return whether an error occurs or not.
 2893                  */
 2894                 nd.ni_cnd.cn_nameiop = LOOKUP;
 2895                 nd.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW);
 2896                 nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF);
 2897                 nd.ni_cnd.cn_thread = td;
 2898                 nd.ni_cnd.cn_cred = cred;
 2899 
 2900                 error = lookup(&nd);
 2901                 nd.ni_dvp = NULL;
 2902                 if (nd.ni_cnd.cn_flags & GIANTHELD) {
 2903                         mtx_unlock(&Giant);
 2904                         nd.ni_cnd.cn_flags &= ~GIANTHELD;
 2905                 }
 2906 
 2907                 if (error == 0) {
 2908                         bzero((caddr_t)fhp, sizeof(nfh));
 2909                         fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
 2910                         error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
 2911                         if (!error)
 2912                                 error = VOP_GETATTR(nd.ni_vp, vap, cred,
 2913                                         td);
 2914                         vput(nd.ni_vp);
 2915                         nd.ni_vp = NULL;
 2916                 }
 2917             }
 2918         }
 2919 out:
 2920         NFSD_UNLOCK_ASSERT();
 2921         /*
 2922          * These releases aren't strictly required, does even doing them
 2923          * make any sense? XXX can nfsm_reply() block?
 2924          */
 2925         if (pathcp) {
 2926                 FREE(pathcp, M_TEMP);
 2927                 pathcp = NULL;
 2928         }
 2929         if (dirp) {
 2930                 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2931                 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2932                 VOP_UNLOCK(dirp, 0, td);
 2933         }
 2934         if (nd.ni_startdir) {
 2935                 vrele(nd.ni_startdir);
 2936                 nd.ni_startdir = NULL;
 2937         }
 2938         mtx_unlock(&Giant);     /* VFS */
 2939         NFSD_LOCK();
 2940         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2941         if (v3) {
 2942                 if (!error) {
 2943                         nfsm_srvpostop_fh(fhp);
 2944                         nfsm_srvpostop_attr(0, vap);
 2945                 }
 2946                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2947         }
 2948         error = 0;
 2949         /* fall through */
 2950 
 2951 nfsmout:
 2952         NFSD_LOCK_ASSERT();
 2953         NFSD_UNLOCK();
 2954         mtx_lock(&Giant);       /* VFS */
 2955         NDFREE(&nd, NDF_ONLY_PNBUF);
 2956         if (nd.ni_dvp) {
 2957                 if (nd.ni_dvp == nd.ni_vp)
 2958                         vrele(nd.ni_dvp);
 2959                 else
 2960                         vput(nd.ni_dvp);
 2961         }
 2962         if (nd.ni_vp)
 2963                 vrele(nd.ni_vp);
 2964         if (nd.ni_startdir)
 2965                 vrele(nd.ni_startdir);
 2966         if (dirp)
 2967                 vrele(dirp);
 2968         if (pathcp)
 2969                 FREE(pathcp, M_TEMP);
 2970 
 2971         vn_finished_write(mp);
 2972         mtx_unlock(&Giant);     /* VFS */
 2973         NFSD_LOCK();
 2974         return (error);
 2975 }
 2976 
 2977 /*
 2978  * nfs mkdir service
 2979  */
 2980 int
 2981 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2982     struct thread *td, struct mbuf **mrq)
 2983 {
 2984         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2985         struct sockaddr *nam = nfsd->nd_nam;
 2986         caddr_t dpos = nfsd->nd_dpos;
 2987         struct ucred *cred = nfsd->nd_cr;
 2988         struct vattr va, dirfor, diraft;
 2989         struct vattr *vap = &va;
 2990         struct nfs_fattr *fp;
 2991         struct nameidata nd;
 2992         u_int32_t *tl;
 2993         caddr_t bpos;
 2994         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 2995         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2996         struct mbuf *mb, *mreq;
 2997         struct vnode *dirp = NULL;
 2998         int vpexcl = 0;
 2999         nfsfh_t nfh;
 3000         fhandle_t *fhp;
 3001         struct mount *mp = NULL;
 3002 
 3003         NFSD_LOCK_ASSERT();
 3004 
 3005         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3006         ndclear(&nd);
 3007 
 3008         fhp = &nfh.fh_generic;
 3009         nfsm_srvmtofh(fhp);
 3010         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 3011                 NFSD_UNLOCK();
 3012                 mtx_lock(&Giant);       /* VFS */
 3013                 error = ESTALE;
 3014                 goto out;
 3015         }
 3016         NFSD_UNLOCK();
 3017         mtx_lock(&Giant);       /* VFS */
 3018         (void) vn_start_write(NULL, &mp, V_WAIT);
 3019         vfs_rel(mp);            /* The write holds a ref. */
 3020         mtx_unlock(&Giant);     /* VFS */
 3021         NFSD_LOCK();
 3022         nfsm_srvnamesiz(len);
 3023         nd.ni_cnd.cn_cred = cred;
 3024         nd.ni_cnd.cn_nameiop = CREATE;
 3025         nd.ni_cnd.cn_flags = LOCKPARENT;
 3026 
 3027         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 3028                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 3029         if (dirp && !v3) {
 3030                 NFSD_UNLOCK();
 3031                 mtx_lock(&Giant);       /* VFS */
 3032                 vrele(dirp);
 3033                 mtx_unlock(&Giant);     /* VFS */
 3034                 NFSD_LOCK();
 3035                 dirp = NULL;
 3036         }
 3037         if (error) {
 3038                 nfsm_reply(NFSX_WCCDATA(v3));
 3039                 if (v3)
 3040                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 3041                 error = 0;
 3042                 goto nfsmout;
 3043         }
 3044         VATTR_NULL(vap);
 3045         if (v3) {
 3046                 nfsm_srvsattr(vap);
 3047         } else {
 3048                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
 3049                 vap->va_mode = nfstov_mode(*tl++);
 3050         }
 3051 
 3052         /*
 3053          * At this point nd.ni_dvp is referenced and exclusively locked and
 3054          * nd.ni_vp, if it exists, is referenced but not locked.
 3055          */
 3056 
 3057         NFSD_UNLOCK();
 3058         mtx_lock(&Giant);       /* VFS */
 3059         vap->va_type = VDIR;
 3060         if (nd.ni_vp != NULL) {
 3061                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3062                 error = EEXIST;
 3063                 goto out;
 3064         }
 3065 
 3066         /*
 3067          * Issue mkdir op.  Since SAVESTART is not set, the pathname
 3068          * component is freed by the VOP call.  This will fill-in
 3069          * nd.ni_vp, reference, and exclusively lock it.
 3070          */
 3071         if (vap->va_mode == (mode_t)VNOVAL)
 3072                 vap->va_mode = 0;
 3073         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 3074         NDFREE(&nd, NDF_ONLY_PNBUF);
 3075         vpexcl = 1;
 3076 
 3077         vput(nd.ni_dvp);
 3078         nd.ni_dvp = NULL;
 3079 
 3080         if (!error) {
 3081                 bzero((caddr_t)fhp, sizeof(nfh));
 3082                 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
 3083                 error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid);
 3084                 if (!error)
 3085                         error = VOP_GETATTR(nd.ni_vp, vap, cred, td);
 3086         }
 3087 out:
 3088         NFSD_UNLOCK_ASSERT();
 3089         if (dirp) {
 3090                 if (dirp == nd.ni_dvp) {
 3091                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 3092                 } else {
 3093                         /* Release existing locks to prevent deadlock. */
 3094                         if (nd.ni_dvp) {
 3095                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3096                                 if (nd.ni_dvp == nd.ni_vp && vpexcl)
 3097                                         vrele(nd.ni_dvp);
 3098                                 else
 3099                                         vput(nd.ni_dvp);
 3100                         }
 3101                         if (nd.ni_vp) {
 3102                                 if (vpexcl)
 3103                                         vput(nd.ni_vp);
 3104                                 else
 3105                                         vrele(nd.ni_vp);
 3106                         }
 3107                         nd.ni_dvp = NULL;
 3108                         nd.ni_vp = NULL;
 3109                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 3110                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 3111                         VOP_UNLOCK(dirp, 0, td);
 3112                 }
 3113         }
 3114         mtx_unlock(&Giant);     /* VFS */
 3115         NFSD_LOCK();
 3116         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 3117         if (v3) {
 3118                 if (!error) {
 3119                         nfsm_srvpostop_fh(fhp);
 3120                         nfsm_srvpostop_attr(0, vap);
 3121                 }
 3122                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 3123         } else if (!error) {
 3124                 /* v2 non-error case. */
 3125                 nfsm_srvfhtom(fhp, v3);
 3126                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 3127                 nfsm_srvfillattr(vap, fp);
 3128         }
 3129         error = 0;
 3130         /* fall through */
 3131 
 3132 nfsmout:
 3133         NFSD_LOCK_ASSERT();
 3134         NFSD_UNLOCK();
 3135         mtx_lock(&Giant);       /* VFS */
 3136         if (nd.ni_dvp) {
 3137                 NDFREE(&nd, NDF_ONLY_PNBUF);
 3138                 if (nd.ni_dvp == nd.ni_vp && vpexcl)
 3139                         vrele(nd.ni_dvp);
 3140                 else
 3141                         vput(nd.ni_dvp);
 3142         }
 3143         if (nd.ni_vp) {
 3144                 if (vpexcl)
 3145                         vput(nd.ni_vp);
 3146                 else
 3147                         vrele(nd.ni_vp);
 3148         }
 3149         if (dirp)
 3150                 vrele(dirp);
 3151         vn_finished_write(mp);
 3152         mtx_unlock(&Giant);     /* VFS */
 3153         NFSD_LOCK();
 3154         return (error);
 3155 }
 3156 
 3157 /*
 3158  * nfs rmdir service
 3159  */
 3160 int
 3161 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3162     struct thread *td, struct mbuf **mrq)
 3163 {
 3164         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3165         struct sockaddr *nam = nfsd->nd_nam;
 3166         caddr_t dpos = nfsd->nd_dpos;
 3167         struct ucred *cred = nfsd->nd_cr;
 3168         caddr_t bpos;
 3169         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 3170         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3171         struct mbuf *mb, *mreq;
 3172         struct vnode *vp, *dirp = NULL;
 3173         struct vattr dirfor, diraft;
 3174         nfsfh_t nfh;
 3175         fhandle_t *fhp;
 3176         struct nameidata nd;
 3177         struct mount *mp = NULL;
 3178 
 3179         NFSD_LOCK_ASSERT();
 3180 
 3181         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3182         ndclear(&nd);
 3183 
 3184         fhp = &nfh.fh_generic;
 3185         nfsm_srvmtofh(fhp);
 3186         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 3187                 error = ESTALE;
 3188                 goto out;
 3189         }
 3190         NFSD_UNLOCK();
 3191         mtx_lock(&Giant);       /* VFS */
 3192         (void) vn_start_write(NULL, &mp, V_WAIT);
 3193         vfs_rel(mp);            /* The write holds a ref. */
 3194         mtx_unlock(&Giant);     /* VFS */
 3195         NFSD_LOCK();
 3196         nfsm_srvnamesiz(len);
 3197         nd.ni_cnd.cn_cred = cred;
 3198         nd.ni_cnd.cn_nameiop = DELETE;
 3199         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
 3200         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 3201                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 3202         if (dirp && !v3) {
 3203                 NFSD_UNLOCK();
 3204                 mtx_lock(&Giant);       /* VFS */
 3205                 vrele(dirp);
 3206                 mtx_unlock(&Giant);     /* VFS */
 3207                 NFSD_LOCK();
 3208                 dirp = NULL;
 3209         }
 3210         if (error) {
 3211                 nfsm_reply(NFSX_WCCDATA(v3));
 3212                 if (v3)
 3213                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 3214                 error = 0;
 3215                 goto nfsmout;
 3216         }
 3217         vp = nd.ni_vp;
 3218         if (vp->v_type != VDIR) {
 3219                 error = ENOTDIR;
 3220                 goto out;
 3221         }
 3222         /*
 3223          * No rmdir "." please.
 3224          */
 3225         if (nd.ni_dvp == vp) {
 3226                 error = EINVAL;
 3227                 goto out;
 3228         }
 3229         /*
 3230          * The root of a mounted filesystem cannot be deleted.
 3231          */
 3232         if (vp->v_vflag & VV_ROOT)
 3233                 error = EBUSY;
 3234 out:
 3235         /*
 3236          * Issue or abort op.  Since SAVESTART is not set, path name
 3237          * component is freed by the VOP after either.
 3238          */
 3239         NFSD_LOCK_ASSERT();
 3240         NFSD_UNLOCK();
 3241         mtx_lock(&Giant);       /* VFS */
 3242         if (!error)
 3243                 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3244         NDFREE(&nd, NDF_ONLY_PNBUF);
 3245 
 3246         if (dirp) {
 3247                 if (dirp == nd.ni_dvp)
 3248                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 3249                 else {
 3250                         /* Release existing locks to prevent deadlock. */
 3251                         if (nd.ni_dvp) {
 3252                                 if (nd.ni_dvp == nd.ni_vp)
 3253                                         vrele(nd.ni_dvp);
 3254                                 else
 3255                                         vput(nd.ni_dvp);
 3256                         }
 3257                         if (nd.ni_vp)
 3258                                 vput(nd.ni_vp);
 3259                         nd.ni_dvp = NULL;
 3260                         nd.ni_vp = NULL;
 3261                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 3262                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 3263                         VOP_UNLOCK(dirp, 0, td);
 3264                 }
 3265         }
 3266         mtx_unlock(&Giant);     /* VFS */
 3267         NFSD_LOCK();
 3268         nfsm_reply(NFSX_WCCDATA(v3));
 3269         error = 0;
 3270         if (v3)
 3271                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 3272         /* fall through */
 3273 
 3274 nfsmout:
 3275         NFSD_LOCK_ASSERT();
 3276         NFSD_UNLOCK();
 3277         mtx_lock(&Giant);       /* VFS */
 3278         NDFREE(&nd, NDF_ONLY_PNBUF);
 3279         if (nd.ni_dvp) {
 3280                 if (nd.ni_dvp == nd.ni_vp)
 3281                         vrele(nd.ni_dvp);
 3282                 else
 3283                         vput(nd.ni_dvp);
 3284         }
 3285         if (nd.ni_vp)
 3286                 vput(nd.ni_vp);
 3287         if (dirp)
 3288                 vrele(dirp);
 3289 
 3290         vn_finished_write(mp);
 3291         mtx_unlock(&Giant);     /* VFS */
 3292         NFSD_LOCK();
 3293         return(error);
 3294 }
 3295 
 3296 /*
 3297  * nfs readdir service
 3298  * - mallocs what it thinks is enough to read
 3299  *      count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
 3300  * - calls VOP_READDIR()
 3301  * - loops around building the reply
 3302  *      if the output generated exceeds count break out of loop
 3303  *      The nfsm_clget macro is used here so that the reply will be packed
 3304  *      tightly in mbuf clusters.
 3305  * - it only knows that it has encountered eof when the VOP_READDIR()
 3306  *      reads nothing
 3307  * - as such one readdir rpc will return eof false although you are there
 3308  *      and then the next will return eof
 3309  * - it trims out records with d_fileno == 0
 3310  *      this doesn't matter for Unix clients, but they might confuse clients
 3311  *      for other os'.
 3312  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
 3313  *      than requested, but this may not apply to all filesystems. For
 3314  *      example, client NFS does not { although it is never remote mounted
 3315  *      anyhow }
 3316  *     The alternate call nfsrv_readdirplus() does lookups as well.
 3317  * PS: The NFS protocol spec. does not clarify what the "count" byte
 3318  *      argument is a count of.. just name strings and file id's or the
 3319  *      entire reply rpc or ...
 3320  *      I tried just file name and id sizes and it confused the Sun client,
 3321  *      so I am using the full rpc size now. The "paranoia.." comment refers
 3322  *      to including the status longwords that are not a part of the dir.
 3323  *      "entry" structures, but are in the rpc.
 3324  */
 3325 struct flrep {
 3326         nfsuint64       fl_off;
 3327         u_int32_t       fl_postopok;
 3328         u_int32_t       fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
 3329         u_int32_t       fl_fhok;
 3330         u_int32_t       fl_fhsize;
 3331         u_int32_t       fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
 3332 };
 3333 
 3334 int
 3335 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3336     struct thread *td, struct mbuf **mrq)
 3337 {
 3338         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3339         struct sockaddr *nam = nfsd->nd_nam;
 3340         caddr_t dpos = nfsd->nd_dpos;
 3341         struct ucred *cred = nfsd->nd_cr;
 3342         char *bp, *be;
 3343         struct mbuf *mp;
 3344         struct dirent *dp;
 3345         caddr_t cp;
 3346         u_int32_t *tl;
 3347         caddr_t bpos;
 3348         struct mbuf *mb, *mreq;
 3349         char *cpos, *cend, *rbuf;
 3350         struct vnode *vp = NULL;
 3351         struct vattr at;
 3352         nfsfh_t nfh;
 3353         fhandle_t *fhp;
 3354         struct uio io;
 3355         struct iovec iv;
 3356         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
 3357         int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
 3358         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3359         u_quad_t off, toff, verf;
 3360         u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
 3361 
 3362         NFSD_LOCK_ASSERT();
 3363 
 3364         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3365         fhp = &nfh.fh_generic;
 3366         nfsm_srvmtofh(fhp);
 3367         if (v3) {
 3368                 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
 3369                 toff = fxdr_hyper(tl);
 3370                 tl += 2;
 3371                 verf = fxdr_hyper(tl);
 3372                 tl += 2;
 3373         } else {
 3374                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
 3375                 toff = fxdr_unsigned(u_quad_t, *tl++);
 3376                 verf = 0;       /* shut up gcc */
 3377         }
 3378         off = toff;
 3379         cnt = fxdr_unsigned(int, *tl);
 3380         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 3381         xfer = NFS_SRVMAXDATA(nfsd);
 3382         if (cnt > xfer)
 3383                 cnt = xfer;
 3384         if (siz > xfer)
 3385                 siz = xfer;
 3386         fullsiz = siz;
 3387         error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
 3388         if (!error && vp->v_type != VDIR) {
 3389                 error = ENOTDIR;
 3390                 NFSD_UNLOCK();
 3391                 mtx_lock(&Giant);       /* VFS */
 3392                 vput(vp);
 3393                 mtx_unlock(&Giant);     /* VFS */
 3394                 NFSD_LOCK();
 3395                 vp = NULL;
 3396         }
 3397         if (error) {
 3398                 nfsm_reply(NFSX_UNSIGNED);
 3399                 if (v3)
 3400                         nfsm_srvpostop_attr(getret, &at);
 3401                 error = 0;
 3402                 goto nfsmout;
 3403         }
 3404 
 3405         /*
 3406          * Obtain lock on vnode for this section of the code
 3407          */
 3408         NFSD_UNLOCK();
 3409         mtx_lock(&Giant);       /* VFS */
 3410         if (v3) {
 3411                 error = getret = VOP_GETATTR(vp, &at, cred, td);
 3412 #if 0
 3413                 /*
 3414                  * XXX This check may be too strict for Solaris 2.5 clients.
 3415                  */
 3416                 if (!error && toff && verf && verf != at.va_filerev)
 3417                         error = NFSERR_BAD_COOKIE;
 3418 #endif
 3419         }
 3420         if (!error)
 3421                 error = nfsrv_access_withgiant(vp, VEXEC, cred, rdonly, td, 0);
 3422         if (error) {
 3423                 vput(vp);
 3424                 mtx_unlock(&Giant);     /* VFS */
 3425                 NFSD_LOCK();
 3426                 vp = NULL;
 3427                 nfsm_reply(NFSX_POSTOPATTR(v3));
 3428                 if (v3)
 3429                         nfsm_srvpostop_attr(getret, &at);
 3430                 error = 0;
 3431                 goto nfsmout;
 3432         }
 3433         VOP_UNLOCK(vp, 0, td);
 3434 
 3435         /*
 3436          * end section.  Allocate rbuf and continue
 3437          */
 3438         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 3439 again:
 3440         NFSD_UNLOCK_ASSERT();
 3441         iv.iov_base = rbuf;
 3442         iv.iov_len = fullsiz;
 3443         io.uio_iov = &iv;
 3444         io.uio_iovcnt = 1;
 3445         io.uio_offset = (off_t)off;
 3446         io.uio_resid = fullsiz;
 3447         io.uio_segflg = UIO_SYSSPACE;
 3448         io.uio_rw = UIO_READ;
 3449         io.uio_td = NULL;
 3450         eofflag = 0;
 3451         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 3452         if (cookies) {
 3453                 free((caddr_t)cookies, M_TEMP);
 3454                 cookies = NULL;
 3455         }
 3456         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
 3457         off = (off_t)io.uio_offset;
 3458         if (!cookies && !error)
 3459                 error = NFSERR_PERM;
 3460         if (v3) {
 3461                 getret = VOP_GETATTR(vp, &at, cred, td);
 3462                 if (!error)
 3463                         error = getret;
 3464         }
 3465         VOP_UNLOCK(vp, 0, td);
 3466         if (error) {
 3467                 vrele(vp);
 3468                 mtx_unlock(&Giant);     /* VFS */
 3469                 vp = NULL;
 3470                 free((caddr_t)rbuf, M_TEMP);
 3471                 if (cookies)
 3472                         free((caddr_t)cookies, M_TEMP);
 3473                 NFSD_LOCK();
 3474                 nfsm_reply(NFSX_POSTOPATTR(v3));
 3475                 if (v3)
 3476                         nfsm_srvpostop_attr(getret, &at);
 3477                 error = 0;
 3478                 goto nfsmout;
 3479         }
 3480         if (io.uio_resid) {
 3481                 siz -= io.uio_resid;
 3482 
 3483                 /*
 3484                  * If nothing read, return eof
 3485                  * rpc reply
 3486                  */
 3487                 if (siz == 0) {
 3488                         vrele(vp);
 3489                         mtx_unlock(&Giant);     /* VFS */
 3490                         vp = NULL;
 3491                         NFSD_LOCK();
 3492                         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) +
 3493                                 2 * NFSX_UNSIGNED);
 3494                         if (v3) {
 3495                                 nfsm_srvpostop_attr(getret, &at);
 3496                                 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 3497                                 txdr_hyper(at.va_filerev, tl);
 3498                                 tl += 2;
 3499                         } else
 3500                                 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 3501                         *tl++ = nfsrv_nfs_false;
 3502                         *tl = nfsrv_nfs_true;
 3503                         FREE((caddr_t)rbuf, M_TEMP);
 3504                         FREE((caddr_t)cookies, M_TEMP);
 3505                         error = 0;
 3506                         goto nfsmout;
 3507                 }
 3508         }
 3509 
 3510         /*
 3511          * Check for degenerate cases of nothing useful read.
 3512          * If so go try again
 3513          */
 3514         cpos = rbuf;
 3515         cend = rbuf + siz;
 3516         dp = (struct dirent *)cpos;
 3517         cookiep = cookies;
 3518         /*
 3519          * For some reason FreeBSD's ufs_readdir() chooses to back the
 3520          * directory offset up to a block boundary, so it is necessary to
 3521          * skip over the records that precede the requested offset. This
 3522          * requires the assumption that file offset cookies monotonically
 3523          * increase.
 3524          */
 3525         while (cpos < cend && ncookies > 0 &&
 3526                 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 3527                  ((u_quad_t)(*cookiep)) <= toff)) {
 3528                 cpos += dp->d_reclen;
 3529                 dp = (struct dirent *)cpos;
 3530                 cookiep++;
 3531                 ncookies--;
 3532         }
 3533         if (cpos >= cend || ncookies == 0) {
 3534                 toff = off;
 3535                 siz = fullsiz;
 3536                 goto again;
 3537         }
 3538 
 3539         mtx_unlock(&Giant);     /* VFS */
 3540         NFSD_LOCK();
 3541         len = 3 * NFSX_UNSIGNED;        /* paranoia, probably can be 0 */
 3542         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
 3543         if (v3) {
 3544                 nfsm_srvpostop_attr(getret, &at);
 3545                 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 3546                 txdr_hyper(at.va_filerev, tl);
 3547         }
 3548         mp = mb;
 3549         bp = bpos;
 3550         be = bp + M_TRAILINGSPACE(mp);
 3551 
 3552         /* Loop through the records and build reply */
 3553         while (cpos < cend && ncookies > 0) {
 3554                 if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
 3555                         nlen = dp->d_namlen;
 3556                         rem = nfsm_rndup(nlen) - nlen;
 3557                         len += (4 * NFSX_UNSIGNED + nlen + rem);
 3558                         if (v3)
 3559                                 len += 2 * NFSX_UNSIGNED;
 3560                         if (len > cnt) {
 3561                                 eofflag = 0;
 3562                                 break;
 3563                         }
 3564                         /*
 3565                          * Build the directory record xdr from
 3566                          * the dirent entry.
 3567                          */
 3568                         nfsm_clget;
 3569                         *tl = nfsrv_nfs_true;
 3570                         bp += NFSX_UNSIGNED;
 3571                         if (v3) {
 3572                                 nfsm_clget;
 3573                                 *tl = 0;
 3574                                 bp += NFSX_UNSIGNED;
 3575                         }
 3576                         nfsm_clget;
 3577                         *tl = txdr_unsigned(dp->d_fileno);
 3578                         bp += NFSX_UNSIGNED;
 3579                         nfsm_clget;
 3580                         *tl = txdr_unsigned(nlen);
 3581                         bp += NFSX_UNSIGNED;
 3582 
 3583                         /* And loop around copying the name */
 3584                         xfer = nlen;
 3585                         cp = dp->d_name;
 3586                         while (xfer > 0) {
 3587                                 nfsm_clget;
 3588                                 if ((bp+xfer) > be)
 3589                                         tsiz = be-bp;
 3590                                 else
 3591                                         tsiz = xfer;
 3592                                 bcopy(cp, bp, tsiz);
 3593                                 bp += tsiz;
 3594                                 xfer -= tsiz;
 3595                                 if (xfer > 0)
 3596                                         cp += tsiz;
 3597                         }
 3598                         /* And null pad to an int32_t boundary. */
 3599                         for (i = 0; i < rem; i++)
 3600                                 *bp++ = '\0';
 3601                         nfsm_clget;
 3602 
 3603                         /* Finish off the record */
 3604                         if (v3) {
 3605                                 *tl = 0;
 3606                                 bp += NFSX_UNSIGNED;
 3607                                 nfsm_clget;
 3608                         }
 3609                         *tl = txdr_unsigned(*cookiep);
 3610                         bp += NFSX_UNSIGNED;
 3611                 }
 3612                 cpos += dp->d_reclen;
 3613                 dp = (struct dirent *)cpos;
 3614                 cookiep++;
 3615                 ncookies--;
 3616         }
 3617         NFSD_UNLOCK();
 3618         mtx_lock(&Giant);       /* VFS */
 3619         vrele(vp);
 3620         mtx_unlock(&Giant);     /* VFS */
 3621         NFSD_LOCK();
 3622         vp = NULL;
 3623         nfsm_clget;
 3624         *tl = nfsrv_nfs_false;
 3625         bp += NFSX_UNSIGNED;
 3626         nfsm_clget;
 3627         if (eofflag)
 3628                 *tl = nfsrv_nfs_true;
 3629         else
 3630                 *tl = nfsrv_nfs_false;
 3631         bp += NFSX_UNSIGNED;
 3632         if (mp != mb) {
 3633                 if (bp < be)
 3634                         mp->m_len = bp - mtod(mp, caddr_t);
 3635         } else
 3636                 mp->m_len += bp - bpos;
 3637         FREE((caddr_t)rbuf, M_TEMP);
 3638         FREE((caddr_t)cookies, M_TEMP);
 3639 
 3640 nfsmout:
 3641         NFSD_LOCK_ASSERT();
 3642         if (vp) {
 3643                 NFSD_UNLOCK();
 3644                 mtx_lock(&Giant);       /* VFS */
 3645                 vrele(vp);
 3646                 mtx_unlock(&Giant);     /* VFS */
 3647                 NFSD_LOCK();
 3648         }
 3649         return(error);
 3650 }
 3651 
 3652 int
 3653 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3654     struct thread *td, struct mbuf **mrq)
 3655 {
 3656         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3657         struct sockaddr *nam = nfsd->nd_nam;
 3658         caddr_t dpos = nfsd->nd_dpos;
 3659         struct ucred *cred = nfsd->nd_cr;
 3660         char *bp, *be;
 3661         struct mbuf *mp;
 3662         struct dirent *dp;
 3663         caddr_t cp;
 3664         u_int32_t *tl;
 3665         caddr_t bpos;
 3666         struct mbuf *mb, *mreq;
 3667         char *cpos, *cend, *rbuf;
 3668         struct vnode *vp = NULL, *nvp;
 3669         struct flrep fl;
 3670         nfsfh_t nfh;
 3671         fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
 3672         struct uio io;
 3673         struct iovec iv;
 3674         struct vattr va, at, *vap = &va;
 3675         struct nfs_fattr *fp;
 3676         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
 3677         int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
 3678         u_quad_t off, toff, verf;
 3679         u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
 3680         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3681 
 3682         NFSD_LOCK_ASSERT();
 3683 
 3684         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3685         if (!v3)
 3686                 panic("nfsrv_readdirplus: v3 proc called on a v2 connection");
 3687         fhp = &nfh.fh_generic;
 3688         nfsm_srvmtofh(fhp);
 3689         tl = nfsm_dissect_nonblock(u_int32_t *, 6 * NFSX_UNSIGNED);
 3690         toff = fxdr_hyper(tl);
 3691         tl += 2;
 3692         verf = fxdr_hyper(tl);
 3693         tl += 2;
 3694         siz = fxdr_unsigned(int, *tl++);
 3695         cnt = fxdr_unsigned(int, *tl);
 3696         off = toff;
 3697         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 3698         xfer = NFS_SRVMAXDATA(nfsd);
 3699         if (cnt > xfer)
 3700                 cnt = xfer;
 3701         if (siz > xfer)
 3702                 siz = xfer;
 3703         fullsiz = siz;
 3704         error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
 3705         if (!error && vp->v_type != VDIR) {
 3706                 error = ENOTDIR;
 3707                 NFSD_UNLOCK();
 3708                 mtx_lock(&Giant);       /* VFS */
 3709                 vput(vp);
 3710                 mtx_unlock(&Giant);     /* VFS */
 3711                 vp = NULL;
 3712                 NFSD_LOCK();
 3713         }
 3714         if (error) {
 3715                 nfsm_reply(NFSX_UNSIGNED);
 3716                 nfsm_srvpostop_attr(getret, &at);
 3717                 error = 0;
 3718                 goto nfsmout;
 3719         }
 3720         NFSD_UNLOCK();
 3721         mtx_lock(&Giant);       /* VFS */
 3722         error = getret = VOP_GETATTR(vp, &at, cred, td);
 3723 #if 0
 3724         /*
 3725          * XXX This check may be too strict for Solaris 2.5 clients.
 3726          */
 3727         if (!error && toff && verf && verf != at.va_filerev)
 3728                 error = NFSERR_BAD_COOKIE;
 3729 #endif
 3730         if (!error)
 3731                 error = nfsrv_access_withgiant(vp, VEXEC, cred, rdonly, td, 0);
 3732         if (error) {
 3733                 vput(vp);
 3734                 mtx_unlock(&Giant);     /* VFS */
 3735                 NFSD_LOCK();
 3736                 vp = NULL;
 3737                 nfsm_reply(NFSX_V3POSTOPATTR);
 3738                 nfsm_srvpostop_attr(getret, &at);
 3739                 error = 0;
 3740                 goto nfsmout;
 3741         }
 3742         VOP_UNLOCK(vp, 0, td);
 3743         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 3744 again:
 3745         NFSD_UNLOCK_ASSERT();
 3746         iv.iov_base = rbuf;
 3747         iv.iov_len = fullsiz;
 3748         io.uio_iov = &iv;
 3749         io.uio_iovcnt = 1;
 3750         io.uio_offset = (off_t)off;
 3751         io.uio_resid = fullsiz;
 3752         io.uio_segflg = UIO_SYSSPACE;
 3753         io.uio_rw = UIO_READ;
 3754         io.uio_td = NULL;
 3755         eofflag = 0;
 3756         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 3757         if (cookies) {
 3758                 free((caddr_t)cookies, M_TEMP);
 3759                 cookies = NULL;
 3760         }
 3761         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
 3762         off = (u_quad_t)io.uio_offset;
 3763         getret = VOP_GETATTR(vp, &at, cred, td);
 3764         VOP_UNLOCK(vp, 0, td);
 3765         if (!cookies && !error)
 3766                 error = NFSERR_PERM;
 3767         if (!error)
 3768                 error = getret;
 3769         if (error) {
 3770                 vrele(vp);
 3771                 mtx_unlock(&Giant);     /* VFS */
 3772                 vp = NULL;
 3773                 if (cookies)
 3774                         free((caddr_t)cookies, M_TEMP);
 3775                 free((caddr_t)rbuf, M_TEMP);
 3776                 NFSD_LOCK();
 3777                 nfsm_reply(NFSX_V3POSTOPATTR);
 3778                 nfsm_srvpostop_attr(getret, &at);
 3779                 error = 0;
 3780                 goto nfsmout;
 3781         }
 3782         if (io.uio_resid) {
 3783                 siz -= io.uio_resid;
 3784 
 3785                 /*
 3786                  * If nothing read, return eof
 3787                  * rpc reply
 3788                  */
 3789                 if (siz == 0) {
 3790                         vrele(vp);
 3791                         mtx_unlock(&Giant);     /* VFS */
 3792                         NFSD_LOCK();
 3793                         vp = NULL;
 3794                         nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
 3795                                 2 * NFSX_UNSIGNED);
 3796                         nfsm_srvpostop_attr(getret, &at);
 3797                         tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 3798                         txdr_hyper(at.va_filerev, tl);
 3799                         tl += 2;
 3800                         *tl++ = nfsrv_nfs_false;
 3801                         *tl = nfsrv_nfs_true;
 3802                         FREE((caddr_t)cookies, M_TEMP);
 3803                         FREE((caddr_t)rbuf, M_TEMP);
 3804                         error = 0;
 3805                         goto nfsmout;
 3806                 }
 3807         }
 3808 
 3809         /*
 3810          * Check for degenerate cases of nothing useful read.
 3811          * If so go try again
 3812          */
 3813         cpos = rbuf;
 3814         cend = rbuf + siz;
 3815         dp = (struct dirent *)cpos;
 3816         cookiep = cookies;
 3817         /*
 3818          * For some reason FreeBSD's ufs_readdir() chooses to back the
 3819          * directory offset up to a block boundary, so it is necessary to
 3820          * skip over the records that precede the requested offset. This
 3821          * requires the assumption that file offset cookies monotonically
 3822          * increase.
 3823          */
 3824         while (cpos < cend && ncookies > 0 &&
 3825                 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 3826                  ((u_quad_t)(*cookiep)) <= toff)) {
 3827                 cpos += dp->d_reclen;
 3828                 dp = (struct dirent *)cpos;
 3829                 cookiep++;
 3830                 ncookies--;
 3831         }
 3832         if (cpos >= cend || ncookies == 0) {
 3833                 toff = off;
 3834                 siz = fullsiz;
 3835                 goto again;
 3836         }
 3837 
 3838         /*
 3839          * Probe one of the directory entries to see if the filesystem
 3840          * supports VGET.
 3841          */
 3842         if (VFS_VGET(vp->v_mount, dp->d_fileno, LK_EXCLUSIVE, &nvp) ==
 3843             EOPNOTSUPP) {
 3844                 error = NFSERR_NOTSUPP;
 3845                 vrele(vp);
 3846                 mtx_unlock(&Giant);     /* VFS */
 3847                 vp = NULL;
 3848                 free((caddr_t)cookies, M_TEMP);
 3849                 free((caddr_t)rbuf, M_TEMP);
 3850                 NFSD_LOCK();
 3851                 nfsm_reply(NFSX_V3POSTOPATTR);
 3852                 nfsm_srvpostop_attr(getret, &at);
 3853                 error = 0;
 3854                 goto nfsmout;
 3855         }
 3856         vput(nvp);
 3857         mtx_unlock(&Giant);     /* VFS */
 3858         nvp = NULL;
 3859 
 3860         dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
 3861             2 * NFSX_UNSIGNED;
 3862         NFSD_LOCK();
 3863         nfsm_reply(cnt);
 3864         nfsm_srvpostop_attr(getret, &at);
 3865         tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 3866         txdr_hyper(at.va_filerev, tl);
 3867         mp = mb;
 3868         bp = bpos;
 3869         be = bp + M_TRAILINGSPACE(mp);
 3870 
 3871         NFSD_UNLOCK();
 3872         mtx_lock(&Giant);       /* VFS */
 3873         /* Loop through the records and build reply */
 3874         while (cpos < cend && ncookies > 0) {
 3875                 if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
 3876                         nlen = dp->d_namlen;
 3877                         rem = nfsm_rndup(nlen)-nlen;
 3878 
 3879                         /*
 3880                          * For readdir_and_lookup get the vnode using
 3881                          * the file number.
 3882                          */
 3883                         if (VFS_VGET(vp->v_mount, dp->d_fileno, LK_EXCLUSIVE,
 3884                             &nvp))
 3885                                 goto invalid;
 3886                         bzero((caddr_t)nfhp, NFSX_V3FH);
 3887                         nfhp->fh_fsid =
 3888                                 nvp->v_mount->mnt_stat.f_fsid;
 3889                         if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
 3890                                 vput(nvp);
 3891                                 nvp = NULL;
 3892                                 goto invalid;
 3893                         }
 3894                         if (VOP_GETATTR(nvp, vap, cred, td)) {
 3895                                 vput(nvp);
 3896                                 nvp = NULL;
 3897                                 goto invalid;
 3898                         }
 3899                         vput(nvp);
 3900                         nvp = NULL;
 3901 
 3902                         /*
 3903                          * If either the dircount or maxcount will be
 3904                          * exceeded, get out now. Both of these lengths
 3905                          * are calculated conservatively, including all
 3906                          * XDR overheads.
 3907                          */
 3908                         len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
 3909                                 NFSX_V3POSTOPATTR);
 3910                         dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
 3911                         if (len > cnt || dirlen > fullsiz) {
 3912                                 eofflag = 0;
 3913                                 break;
 3914                         }
 3915 
 3916                         /*
 3917                          * Build the directory record xdr from
 3918                          * the dirent entry.
 3919                          */
 3920                         fp = (struct nfs_fattr *)&fl.fl_fattr;
 3921                         nfsm_srvfillattr(vap, fp);
 3922                         fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
 3923                         fl.fl_fhok = nfsrv_nfs_true;
 3924                         fl.fl_postopok = nfsrv_nfs_true;
 3925                         fl.fl_off.nfsuquad[0] = 0;
 3926                         fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
 3927 
 3928                         nfsm_clget_nolock;
 3929                         *tl = nfsrv_nfs_true;
 3930                         bp += NFSX_UNSIGNED;
 3931                         nfsm_clget_nolock;
 3932                         *tl = 0;
 3933                         bp += NFSX_UNSIGNED;
 3934                         nfsm_clget_nolock;
 3935                         *tl = txdr_unsigned(dp->d_fileno);
 3936                         bp += NFSX_UNSIGNED;
 3937                         nfsm_clget_nolock;
 3938                         *tl = txdr_unsigned(nlen);
 3939                         bp += NFSX_UNSIGNED;
 3940 
 3941                         /* And loop around copying the name */
 3942                         xfer = nlen;
 3943                         cp = dp->d_name;
 3944                         while (xfer > 0) {
 3945                                 nfsm_clget_nolock;
 3946                                 if ((bp + xfer) > be)
 3947                                         tsiz = be - bp;
 3948                                 else
 3949                                         tsiz = xfer;
 3950                                 bcopy(cp, bp, tsiz);
 3951                                 bp += tsiz;
 3952                                 xfer -= tsiz;
 3953                                 if (xfer > 0)
 3954                                         cp += tsiz;
 3955                         }
 3956                         /* And null pad to an int32_t boundary. */
 3957                         for (i = 0; i < rem; i++)
 3958                                 *bp++ = '\0';
 3959 
 3960                         /*
 3961                          * Now copy the flrep structure out.
 3962                          */
 3963                         xfer = sizeof (struct flrep);
 3964                         cp = (caddr_t)&fl;
 3965                         while (xfer > 0) {
 3966                                 nfsm_clget_nolock;
 3967                                 if ((bp + xfer) > be)
 3968                                         tsiz = be - bp;
 3969                                 else
 3970                                         tsiz = xfer;
 3971                                 bcopy(cp, bp, tsiz);
 3972                                 bp += tsiz;
 3973                                 xfer -= tsiz;
 3974                                 if (xfer > 0)
 3975                                         cp += tsiz;
 3976                         }
 3977                 }
 3978 invalid:
 3979                 NFSD_UNLOCK_ASSERT();
 3980                 cpos += dp->d_reclen;
 3981                 dp = (struct dirent *)cpos;
 3982                 cookiep++;
 3983                 ncookies--;
 3984         }
 3985         vrele(vp);
 3986         mtx_unlock(&Giant);     /* VFS */
 3987         vp = NULL;
 3988         nfsm_clget_nolock;
 3989         *tl = nfsrv_nfs_false;
 3990         bp += NFSX_UNSIGNED;
 3991         NFSD_LOCK();
 3992         nfsm_clget;
 3993         if (eofflag)
 3994                 *tl = nfsrv_nfs_true;
 3995         else
 3996                 *tl = nfsrv_nfs_false;
 3997         bp += NFSX_UNSIGNED;
 3998         if (mp != mb) {
 3999                 if (bp < be)
 4000                         mp->m_len = bp - mtod(mp, caddr_t);
 4001         } else
 4002                 mp->m_len += bp - bpos;
 4003         FREE((caddr_t)cookies, M_TEMP);
 4004         FREE((caddr_t)rbuf, M_TEMP);
 4005 nfsmout:
 4006         NFSD_LOCK_ASSERT();
 4007         if (vp) {
 4008                 NFSD_UNLOCK();
 4009                 mtx_lock(&Giant);       /* VFS */
 4010                 vrele(vp);
 4011                 mtx_unlock(&Giant);     /* VFS */
 4012                 NFSD_LOCK();
 4013         }
 4014         return(error);
 4015 }
 4016 
 4017 /*
 4018  * nfs commit service
 4019  */
 4020 int
 4021 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4022     struct thread *td, struct mbuf **mrq)
 4023 {
 4024         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 4025         struct sockaddr *nam = nfsd->nd_nam;
 4026         caddr_t dpos = nfsd->nd_dpos;
 4027         struct ucred *cred = nfsd->nd_cr;
 4028         struct vattr bfor, aft;
 4029         struct vnode *vp = NULL;
 4030         nfsfh_t nfh;
 4031         fhandle_t *fhp;
 4032         u_int32_t *tl;
 4033         caddr_t bpos;
 4034         int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
 4035         struct mbuf *mb, *mreq;
 4036         u_quad_t off;
 4037         struct mount *mp = NULL;
 4038         int v3 = (nfsd->nd_flag & ND_NFSV3);
 4039 
 4040         NFSD_LOCK_ASSERT();
 4041 
 4042         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4043         if (!v3)
 4044                 panic("nfsrv_commit: v3 proc called on a v2 connection");
 4045         fhp = &nfh.fh_generic;
 4046         nfsm_srvmtofh(fhp);
 4047         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 4048                 error = ESTALE;
 4049                 goto ereply;
 4050         }
 4051         NFSD_UNLOCK();
 4052         mtx_lock(&Giant);       /* VFS */
 4053         (void) vn_start_write(NULL, &mp, V_WAIT);
 4054         vfs_rel(mp);            /* The write holds a ref. */
 4055         mtx_unlock(&Giant);     /* VFS */
 4056         NFSD_LOCK();
 4057         tl = nfsm_dissect_nonblock(u_int32_t *, 3 * NFSX_UNSIGNED);
 4058 
 4059         /*
 4060          * XXX At this time VOP_FSYNC() does not accept offset and byte
 4061          * count parameters, so these arguments are useless (someday maybe).
 4062          */
 4063         off = fxdr_hyper(tl);
 4064         tl += 2;
 4065         cnt = fxdr_unsigned(int, *tl);
 4066         error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
 4067         if (error) {
 4068                 nfsm_reply(2 * NFSX_UNSIGNED);
 4069                 nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
 4070                 error = 0;
 4071                 goto nfsmout;
 4072         }
 4073         NFSD_UNLOCK();
 4074         mtx_lock(&Giant);       /* VFS */
 4075         for_ret = VOP_GETATTR(vp, &bfor, cred, td);
 4076 
 4077         if (cnt > MAX_COMMIT_COUNT) {
 4078                 /*
 4079                  * Give up and do the whole thing
 4080                  */
 4081                 if (vp->v_object &&
 4082                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 4083                         VM_OBJECT_LOCK(vp->v_object);
 4084                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
 4085                         VM_OBJECT_UNLOCK(vp->v_object);
 4086                 }
 4087                 error = VOP_FSYNC(vp, MNT_WAIT, td);
 4088         } else {
 4089                 /*
 4090                  * Locate and synchronously write any buffers that fall
 4091                  * into the requested range.  Note:  we are assuming that
 4092                  * f_iosize is a power of 2.
 4093                  */
 4094                 int iosize = vp->v_mount->mnt_stat.f_iosize;
 4095                 int iomask = iosize - 1;
 4096                 int s;
 4097                 daddr_t lblkno;
 4098 
 4099                 /*
 4100                  * Align to iosize boundry, super-align to page boundry.
 4101                  */
 4102                 if (off & iomask) {
 4103                         cnt += off & iomask;
 4104                         off &= ~(u_quad_t)iomask;
 4105                 }
 4106                 if (off & PAGE_MASK) {
 4107                         cnt += off & PAGE_MASK;
 4108                         off &= ~(u_quad_t)PAGE_MASK;
 4109                 }
 4110                 lblkno = off / iosize;
 4111 
 4112                 if (vp->v_object &&
 4113                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 4114                         VM_OBJECT_LOCK(vp->v_object);
 4115                         vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
 4116                         VM_OBJECT_UNLOCK(vp->v_object);
 4117                 }
 4118 
 4119                 s = splbio();
 4120                 VI_LOCK(vp);
 4121                 while (cnt > 0) {
 4122                         struct buf *bp;
 4123 
 4124                         /*
 4125                          * If we have a buffer and it is marked B_DELWRI we
 4126                          * have to lock and write it.  Otherwise the prior
 4127                          * write is assumed to have already been committed.
 4128                          *
 4129                          * gbincore() can return invalid buffers now so we
 4130                          * have to check that bit as well (though B_DELWRI
 4131                          * should not be set if B_INVAL is set there could be
 4132                          * a race here since we haven't locked the buffer).
 4133                          */
 4134                         if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
 4135                                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 4136                                     LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) {
 4137                                         VI_LOCK(vp);
 4138                                         continue; /* retry */
 4139                                 }
 4140                                 if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
 4141                                     B_DELWRI) {
 4142                                         bremfree(bp);
 4143                                         bp->b_flags &= ~B_ASYNC;
 4144                                         bwrite(bp);
 4145                                         ++nfs_commit_miss;
 4146                                 } else
 4147                                         BUF_UNLOCK(bp);
 4148                                 VI_LOCK(vp);
 4149                         }
 4150                         ++nfs_commit_blks;
 4151                         if (cnt < iosize)
 4152                                 break;
 4153                         cnt -= iosize;
 4154                         ++lblkno;
 4155                 }
 4156                 VI_UNLOCK(vp);
 4157                 splx(s);
 4158         }
 4159 
 4160         aft_ret = VOP_GETATTR(vp, &aft, cred, td);
 4161         vput(vp);
 4162         mtx_unlock(&Giant);     /* VFS */
 4163         vp = NULL;
 4164         NFSD_LOCK();
 4165 ereply:
 4166         NFSD_LOCK_ASSERT();
 4167         nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
 4168         nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
 4169         if (!error) {
 4170                 tl = nfsm_build(u_int32_t *, NFSX_V3WRITEVERF);
 4171                 if (nfsver.tv_sec == 0)
 4172                         nfsver = boottime;
 4173                 *tl++ = txdr_unsigned(nfsver.tv_sec);
 4174                 *tl = txdr_unsigned(nfsver.tv_usec);
 4175         } else {
 4176                 error = 0;
 4177         }
 4178 nfsmout:
 4179         NFSD_LOCK_ASSERT();
 4180         NFSD_UNLOCK();
 4181         mtx_lock(&Giant);       /* VFS */
 4182         if (vp)
 4183                 vput(vp);
 4184         vn_finished_write(mp);
 4185         mtx_unlock(&Giant);     /* VFS */
 4186         NFSD_LOCK();
 4187         return(error);
 4188 }
 4189 
 4190 /*
 4191  * nfs statfs service
 4192  */
 4193 int
 4194 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4195     struct thread *td, struct mbuf **mrq)
 4196 {
 4197         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 4198         struct sockaddr *nam = nfsd->nd_nam;
 4199         caddr_t dpos = nfsd->nd_dpos;
 4200         struct ucred *cred = nfsd->nd_cr;
 4201         struct statfs *sf;
 4202         struct nfs_statfs *sfp;
 4203         caddr_t bpos;
 4204         int error = 0, rdonly, getret = 1;
 4205         int v3 = (nfsd->nd_flag & ND_NFSV3);
 4206         struct mbuf *mb, *mreq;
 4207         struct vnode *vp = NULL;
 4208         struct vattr at;
 4209         nfsfh_t nfh;
 4210         fhandle_t *fhp;
 4211         struct statfs statfs;
 4212         u_quad_t tval;
 4213 
 4214         NFSD_LOCK_ASSERT();
 4215 
 4216         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4217         fhp = &nfh.fh_generic;
 4218         nfsm_srvmtofh(fhp);
 4219         error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
 4220         if (error) {
 4221                 nfsm_reply(NFSX_UNSIGNED);
 4222                 if (v3)
 4223                         nfsm_srvpostop_attr(getret, &at);
 4224                 error = 0;
 4225                 goto nfsmout;
 4226         }
 4227         sf = &statfs;
 4228         NFSD_UNLOCK();
 4229         mtx_lock(&Giant);       /* VFS */
 4230         error = VFS_STATFS(vp->v_mount, sf, td);
 4231         getret = VOP_GETATTR(vp, &at, cred, td);
 4232         vput(vp);
 4233         mtx_unlock(&Giant);     /* VFS */
 4234         vp = NULL;
 4235         NFSD_LOCK();
 4236         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3));
 4237         if (v3)
 4238                 nfsm_srvpostop_attr(getret, &at);
 4239         if (error) {
 4240                 error = 0;
 4241                 goto nfsmout;
 4242         }
 4243         sfp = nfsm_build(struct nfs_statfs *, NFSX_STATFS(v3));
 4244         if (v3) {
 4245                 tval = (u_quad_t)sf->f_blocks;
 4246                 tval *= (u_quad_t)sf->f_bsize;
 4247                 txdr_hyper(tval, &sfp->sf_tbytes);
 4248                 tval = (u_quad_t)sf->f_bfree;
 4249                 tval *= (u_quad_t)sf->f_bsize;
 4250                 txdr_hyper(tval, &sfp->sf_fbytes);
 4251                 /*
 4252                  * Don't send negative values for available space,
 4253                  * since this field is unsigned in the NFS protocol.
 4254                  * Otherwise, the client would see absurdly high
 4255                  * numbers for free space.
 4256                  */
 4257                 if (sf->f_bavail < 0)
 4258                         tval = 0;
 4259                 else
 4260                         tval = (u_quad_t)sf->f_bavail;
 4261                 tval *= (u_quad_t)sf->f_bsize;
 4262                 txdr_hyper(tval, &sfp->sf_abytes);
 4263                 sfp->sf_tfiles.nfsuquad[0] = 0;
 4264                 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
 4265                 sfp->sf_ffiles.nfsuquad[0] = 0;
 4266                 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
 4267                 sfp->sf_afiles.nfsuquad[0] = 0;
 4268                 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
 4269                 sfp->sf_invarsec = 0;
 4270         } else {
 4271                 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
 4272                 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
 4273                 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
 4274                 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
 4275                 if (sf->f_bavail < 0)
 4276                         sfp->sf_bavail = 0;
 4277                 else
 4278                         sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
 4279         }
 4280 nfsmout:
 4281         NFSD_LOCK_ASSERT();
 4282         if (vp) {
 4283                 NFSD_UNLOCK();
 4284                 mtx_lock(&Giant);       /* VFS */
 4285                 vput(vp);
 4286                 mtx_unlock(&Giant);     /* VFS */
 4287                 NFSD_LOCK();
 4288         }
 4289         return(error);
 4290 }
 4291 
 4292 /*
 4293  * nfs fsinfo service
 4294  */
 4295 int
 4296 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4297     struct thread *td, struct mbuf **mrq)
 4298 {
 4299         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 4300         struct sockaddr *nam = nfsd->nd_nam;
 4301         caddr_t dpos = nfsd->nd_dpos;
 4302         struct ucred *cred = nfsd->nd_cr;
 4303         struct nfsv3_fsinfo *sip;
 4304         caddr_t bpos;
 4305         int error = 0, rdonly, getret = 1, pref;
 4306         struct mbuf *mb, *mreq;
 4307         struct vnode *vp = NULL;
 4308         struct vattr at;
 4309         nfsfh_t nfh;
 4310         fhandle_t *fhp;
 4311         u_quad_t maxfsize;
 4312         struct statfs sb;
 4313         int v3 = (nfsd->nd_flag & ND_NFSV3);
 4314 
 4315         NFSD_LOCK_ASSERT();
 4316 
 4317         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4318         if (!v3)
 4319                 panic("nfsrv_fsinfo: v3 proc called on a v2 connection");
 4320         fhp = &nfh.fh_generic;
 4321         nfsm_srvmtofh(fhp);
 4322         error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
 4323         if (error) {
 4324                 nfsm_reply(NFSX_UNSIGNED);
 4325                 nfsm_srvpostop_attr(getret, &at);
 4326                 error = 0;
 4327                 goto nfsmout;
 4328         }
 4329 
 4330         NFSD_UNLOCK();
 4331         mtx_lock(&Giant);       /* VFS */
 4332         /* XXX Try to make a guess on the max file size. */
 4333         VFS_STATFS(vp->v_mount, &sb, td);
 4334         maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
 4335 
 4336         getret = VOP_GETATTR(vp, &at, cred, td);
 4337         vput(vp);
 4338         mtx_unlock(&Giant);     /* VFS */
 4339         vp = NULL;
 4340         NFSD_LOCK();
 4341         nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
 4342         nfsm_srvpostop_attr(getret, &at);
 4343         sip = nfsm_build(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
 4344 
 4345         /*
 4346          * XXX
 4347          * There should be filesystem VFS OP(s) to get this information.
 4348          * For now, assume ufs.
 4349          */
 4350         if (slp->ns_so->so_type == SOCK_DGRAM)
 4351                 pref = NFS_MAXDGRAMDATA;
 4352         else
 4353                 pref = NFS_MAXDATA;
 4354         sip->fs_rtmax = txdr_unsigned(pref);
 4355         sip->fs_rtpref = txdr_unsigned(pref);
 4356         sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
 4357         sip->fs_wtmax = txdr_unsigned(pref);
 4358         sip->fs_wtpref = txdr_unsigned(pref);
 4359         sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
 4360         sip->fs_dtpref = txdr_unsigned(pref);
 4361         txdr_hyper(maxfsize, &sip->fs_maxfilesize);
 4362         sip->fs_timedelta.nfsv3_sec = 0;
 4363         sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
 4364         sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
 4365                 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
 4366                 NFSV3FSINFO_CANSETTIME);
 4367 nfsmout:
 4368         NFSD_LOCK_ASSERT();
 4369         if (vp) {
 4370                 NFSD_UNLOCK();
 4371                 mtx_lock(&Giant);       /* VFS */
 4372                 vput(vp);
 4373                 mtx_unlock(&Giant);     /* VFS */
 4374                 NFSD_LOCK();
 4375         }
 4376         return(error);
 4377 }
 4378 
 4379 /*
 4380  * nfs pathconf service
 4381  */
 4382 int
 4383 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4384     struct thread *td, struct mbuf **mrq)
 4385 {
 4386         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 4387         struct sockaddr *nam = nfsd->nd_nam;
 4388         caddr_t dpos = nfsd->nd_dpos;
 4389         struct ucred *cred = nfsd->nd_cr;
 4390         struct nfsv3_pathconf *pc;
 4391         caddr_t bpos;
 4392         int error = 0, rdonly, getret = 1;
 4393         register_t linkmax, namemax, chownres, notrunc;
 4394         struct mbuf *mb, *mreq;
 4395         struct vnode *vp = NULL;
 4396         struct vattr at;
 4397         nfsfh_t nfh;
 4398         fhandle_t *fhp;
 4399         int v3 = (nfsd->nd_flag & ND_NFSV3);
 4400 
 4401         NFSD_LOCK_ASSERT();
 4402 
 4403         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4404         if (!v3)
 4405                 panic("nfsrv_pathconf: v3 proc called on a v2 connection");
 4406         fhp = &nfh.fh_generic;
 4407         nfsm_srvmtofh(fhp);
 4408         error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, TRUE);
 4409         if (error) {
 4410                 nfsm_reply(NFSX_UNSIGNED);
 4411                 nfsm_srvpostop_attr(getret, &at);
 4412                 error = 0;
 4413                 goto nfsmout;
 4414         }
 4415         NFSD_UNLOCK();
 4416         mtx_lock(&Giant);       /* VFS */
 4417         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
 4418         if (!error)
 4419                 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
 4420         if (!error)
 4421                 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
 4422         if (!error)
 4423                 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
 4424         getret = VOP_GETATTR(vp, &at, cred, td);
 4425         vput(vp);
 4426         mtx_unlock(&Giant);     /* VFS */
 4427         vp = NULL;
 4428         NFSD_LOCK();
 4429         nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
 4430         nfsm_srvpostop_attr(getret, &at);
 4431         if (error) {
 4432                 error = 0;
 4433                 goto nfsmout;
 4434         }
 4435         pc = nfsm_build(struct nfsv3_pathconf *, NFSX_V3PATHCONF);
 4436 
 4437         pc->pc_linkmax = txdr_unsigned(linkmax);
 4438         pc->pc_namemax = txdr_unsigned(namemax);
 4439         pc->pc_notrunc = txdr_unsigned(notrunc);
 4440         pc->pc_chownrestricted = txdr_unsigned(chownres);
 4441 
 4442         /*
 4443          * These should probably be supported by VOP_PATHCONF(), but
 4444          * until msdosfs is exportable (why would you want to?), the
 4445          * Unix defaults should be ok.
 4446          */
 4447         pc->pc_caseinsensitive = nfsrv_nfs_false;
 4448         pc->pc_casepreserving = nfsrv_nfs_true;
 4449 nfsmout:
 4450         NFSD_LOCK_ASSERT();
 4451         if (vp) {
 4452                 NFSD_UNLOCK();
 4453                 mtx_lock(&Giant);       /* VFS */
 4454                 vput(vp);
 4455                 mtx_unlock(&Giant);     /* VFS */
 4456                 NFSD_LOCK();
 4457         }
 4458         return(error);
 4459 }
 4460 
 4461 /*
 4462  * Null operation, used by clients to ping server
 4463  */
 4464 /* ARGSUSED */
 4465 int
 4466 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4467     struct thread *td, struct mbuf **mrq)
 4468 {
 4469         struct mbuf *mrep = nfsd->nd_mrep;
 4470         caddr_t bpos;
 4471         int error = NFSERR_RETVOID;
 4472         struct mbuf *mb, *mreq;
 4473 
 4474         NFSD_LOCK_ASSERT();
 4475 
 4476         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4477         nfsm_reply(0);
 4478 nfsmout:
 4479         NFSD_LOCK_ASSERT();
 4480         return (error);
 4481 }
 4482 
 4483 /*
 4484  * No operation, used for obsolete procedures
 4485  */
 4486 /* ARGSUSED */
 4487 int
 4488 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4489     struct thread *td, struct mbuf **mrq)
 4490 {
 4491         struct mbuf *mrep = nfsd->nd_mrep;
 4492         caddr_t bpos;
 4493         int error;
 4494         struct mbuf *mb, *mreq;
 4495 
 4496         NFSD_LOCK_ASSERT();
 4497 
 4498         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4499         if (nfsd->nd_repstat)
 4500                 error = nfsd->nd_repstat;
 4501         else
 4502                 error = EPROCUNAVAIL;
 4503         nfsm_reply(0);
 4504         error = 0;
 4505 nfsmout:
 4506         NFSD_LOCK_ASSERT();
 4507         return (error);
 4508 }
 4509 
 4510 /*
 4511  * Perform access checking for vnodes obtained from file handles that would
 4512  * refer to files already opened by a Unix client. You cannot just use
 4513  * vn_writechk() and VOP_ACCESS() for two reasons.
 4514  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
 4515  *     case.
 4516  * 2 - The owner is to be given access irrespective of mode bits for some
 4517  *     operations, so that processes that chmod after opening a file don't
 4518  *     break. I don't like this because it opens a security hole, but since
 4519  *     the nfs server opens a security hole the size of a barn door anyhow,
 4520  *     what the heck.
 4521  *
 4522  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
 4523  * will return EPERM instead of EACCESS. EPERM is always an error.
 4524  *
 4525  * There are two versions: one to be called while holding Giant (which is
 4526  * needed due to use of VFS), and the other called with the NFS server lock
 4527  * (which will be dropped and reacquired).  This is necessary because
 4528  * nfsrv_access checks are required from both classes of contexts.
 4529  */
 4530 static int
 4531 nfsrv_access_withgiant(struct vnode *vp, int flags, struct ucred *cred,
 4532     int rdonly, struct thread *td, int override)
 4533 {
 4534         struct vattr vattr;
 4535         int error;
 4536 
 4537         GIANT_REQUIRED;
 4538 
 4539         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4540 
 4541         if (flags & VWRITE) {
 4542                 /* Just vn_writechk() changed to check rdonly */
 4543                 /*
 4544                  * Disallow write attempts on read-only filesystems;
 4545                  * unless the file is a socket or a block or character
 4546                  * device resident on the filesystem.
 4547                  */
 4548                 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 4549                         switch (vp->v_type) {
 4550                         case VREG:
 4551                         case VDIR:
 4552                         case VLNK:
 4553                                 return (EROFS);
 4554                         default:
 4555                                 break;
 4556                         }
 4557                 }
 4558                 /*
 4559                  * If there's shared text associated with
 4560                  * the inode, we can't allow writing.
 4561                  */
 4562                 if (vp->v_vflag & VV_TEXT)
 4563                         return (ETXTBSY);
 4564         }
 4565 
 4566         error = VOP_GETATTR(vp, &vattr, cred, td);
 4567         if (error)
 4568                 return (error);
 4569         error = VOP_ACCESS(vp, flags, cred, td);
 4570         /*
 4571          * Allow certain operations for the owner (reads and writes
 4572          * on files that are already open).
 4573          */
 4574         if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
 4575                 error = 0;
 4576         return (error);
 4577 }
 4578 
 4579 static int
 4580 nfsrv_access(struct vnode *vp, int flags, struct ucred *cred, int rdonly,
 4581     struct thread *td, int override)
 4582 {
 4583         int error;
 4584 
 4585         NFSD_LOCK_ASSERT();
 4586 
 4587         NFSD_UNLOCK();
 4588         mtx_lock(&Giant);       /* VFS */
 4589         error = nfsrv_access_withgiant(vp, flags, cred, rdonly, td, override);
 4590         mtx_unlock(&Giant);     /* VFS */
 4591         NFSD_LOCK();
 4592         return (error);
 4593 }

Cache object: 11e53c83e8399c93b09bc598fc374747


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.