The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/nfsserver/nfs_serv.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD$");
   37 
   38 /*
   39  * nfs version 2 and 3 server calls to vnode ops
   40  * - these routines generally have 3 phases
   41  *   1 - break down and validate rpc request in mbuf list
   42  *   2 - do the vnode ops for the request
   43  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
   44  *   3 - build the rpc reply in an mbuf list
   45  *   nb:
   46  *      - do not mix the phases, since the nfsm_?? macros can return failures
   47  *        on a bad rpc or similar and do not do any vrele() or vput()'s
   48  *
   49  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
   50  *      error number iff error != 0 whereas
   51  *      returning an error from the server function implies a fatal error
   52  *      such as a badly constructed rpc request that should be dropped without
   53  *      a reply.
   54  *      For nfsm_reply(), the case where error == EBADRPC is treated
   55  *      specially; after constructing a reply, it does an immediate
   56  *      `goto nfsmout' to avoid getting any V3 post-op status appended.
   57  *
   58  * Other notes:
   59  *      Warning: always pay careful attention to resource cleanup on return
   60  *      and note that nfsm_*() macros can terminate a procedure on certain
   61  *      errors.
   62  *
   63  *      lookup() and namei()
   64  *      may return garbage in various structural fields/return elements
   65  *      if an error is returned, and may garbage up nd.ni_dvp even if no
   66  *      error is returned and you did not request LOCKPARENT or WANTPARENT.
   67  *
   68  *      We use the ni_cnd.cn_flags 'HASBUF' flag to track whether the name
   69  *      buffer has been freed or not.
   70  */
   71 
   72 #include <sys/param.h>
   73 #include <sys/systm.h>
   74 #include <sys/proc.h>
   75 #include <sys/namei.h>
   76 #include <sys/unistd.h>
   77 #include <sys/vnode.h>
   78 #include <sys/mount.h>
   79 #include <sys/socket.h>
   80 #include <sys/socketvar.h>
   81 #include <sys/malloc.h>
   82 #include <sys/mbuf.h>
   83 #include <sys/priv.h>
   84 #include <sys/dirent.h>
   85 #include <sys/stat.h>
   86 #include <sys/kernel.h>
   87 #include <sys/sysctl.h>
   88 #include <sys/bio.h>
   89 #include <sys/buf.h>
   90 
   91 #include <vm/vm.h>
   92 #include <vm/vm_extern.h>
   93 #include <vm/vm_object.h>
   94 
   95 #include <nfs/nfsproto.h>
   96 #include <nfs/rpcv2.h>
   97 #include <nfsserver/nfs.h>
   98 #include <nfs/xdr_subs.h>
   99 #include <nfsserver/nfsm_subs.h>
  100 
  101 #ifdef NFSRV_DEBUG
  102 #define nfsdbprintf(info)       printf info
  103 #else
  104 #define nfsdbprintf(info)
  105 #endif
  106 
  107 #define MAX_COMMIT_COUNT        (1024 * 1024)
  108 
  109 #define NUM_HEURISTIC           1017
  110 #define NHUSE_INIT              64
  111 #define NHUSE_INC               16
  112 #define NHUSE_MAX               2048
  113 
  114 static struct nfsheur {
  115         struct vnode *nh_vp;    /* vp to match (unreferenced pointer) */
  116         off_t nh_nextr;         /* next offset for sequential detection */
  117         int nh_use;             /* use count for selection */
  118         int nh_seqcount;        /* heuristic */
  119 } nfsheur[NUM_HEURISTIC];
  120 
  121 /* Global vars */
  122 
  123 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
  124 int nfsrvw_procrastinate_v3 = 0;
  125 
  126 static struct timeval   nfsver = { 0 };
  127 
  128 SYSCTL_NODE(_vfs, OID_AUTO, nfsrv, CTLFLAG_RW, 0, "NFS server");
  129 
  130 static int nfs_async;
  131 static int nfs_commit_blks;
  132 static int nfs_commit_miss;
  133 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
  134 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
  135 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
  136 
  137 struct nfsrvstats nfsrvstats;
  138 SYSCTL_STRUCT(_vfs_nfsrv, NFS_NFSRVSTATS, nfsrvstats, CTLFLAG_RW,
  139         &nfsrvstats, nfsrvstats, "S,nfsrvstats");
  140 
  141 static int      nfsrv_access(struct vnode *, int, struct ucred *, int,
  142                     struct thread *, int);
  143 static void     nfsrvw_coalesce(struct nfsrv_descript *,
  144                     struct nfsrv_descript *);
  145 
  146 /*
  147  * Clear nameidata fields that are tested in nsfmout cleanup code prior
  148  * to using first nfsm macro (that might jump to the cleanup code).
  149  */
  150 
  151 static __inline void
  152 ndclear(struct nameidata *nd)
  153 {
  154 
  155         nd->ni_cnd.cn_flags = 0;
  156         nd->ni_vp = NULL;
  157         nd->ni_dvp = NULL;
  158         nd->ni_startdir = NULL;
  159 }
  160 
  161 /*
  162  * Takes two vfslocked integers and returns with at most one
  163  * reference to giant.  The return value indicates whether giant
  164  * is held by either lock.  This simplifies nfsrv ops by allowing
  165  * them to track only one vfslocked var.
  166  */
  167 static __inline int
  168 nfsrv_lockedpair(int vfs1, int vfs2)
  169 {
  170 
  171         if (vfs1 && vfs2)
  172                 VFS_UNLOCK_GIANT(vfs2);
  173 
  174         return (vfs1 | vfs2);
  175 }
  176 
  177 static __inline int
  178 nfsrv_lockedpair_nd(int vfs1, struct nameidata *nd)
  179 {
  180         int vfs2;
  181 
  182         vfs2 = NDHASGIANT(nd);
  183 
  184         return nfsrv_lockedpair(vfs1, vfs2);
  185 }
  186 
  187 /*
  188  * nfs v3 access service
  189  */
  190 int
  191 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  192     struct thread *td, struct mbuf **mrq)
  193 {
  194         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  195         struct sockaddr *nam = nfsd->nd_nam;
  196         caddr_t dpos = nfsd->nd_dpos;
  197         struct ucred *cred = nfsd->nd_cr;
  198         struct vnode *vp = NULL;
  199         nfsfh_t nfh;
  200         fhandle_t *fhp;
  201         u_int32_t *tl;
  202         caddr_t bpos;
  203         int error = 0, rdonly, getret;
  204         struct mbuf *mb, *mreq;
  205         struct vattr vattr, *vap = &vattr;
  206         u_long testmode, nfsmode;
  207         int v3 = (nfsd->nd_flag & ND_NFSV3);
  208         int vfslocked;
  209 
  210         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  211         if (!v3)
  212                 panic("nfsrv3_access: v3 proc called on a v2 connection");
  213         fhp = &nfh.fh_generic;
  214         nfsm_srvmtofh(fhp);
  215         tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
  216         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
  217             nam, &rdonly, TRUE);
  218         if (error) {
  219                 nfsm_reply(NFSX_UNSIGNED);
  220                 nfsm_srvpostop_attr(1, NULL);
  221                 error = 0;
  222                 goto nfsmout;
  223         }
  224         nfsmode = fxdr_unsigned(u_int32_t, *tl);
  225         if ((nfsmode & NFSV3ACCESS_READ) &&
  226                 nfsrv_access(vp, VREAD, cred, rdonly, td, 0))
  227                 nfsmode &= ~NFSV3ACCESS_READ;
  228         if (vp->v_type == VDIR)
  229                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
  230                         NFSV3ACCESS_DELETE);
  231         else
  232                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
  233         if ((nfsmode & testmode) &&
  234                 nfsrv_access(vp, VWRITE, cred, rdonly, td, 0))
  235                 nfsmode &= ~testmode;
  236         if (vp->v_type == VDIR)
  237                 testmode = NFSV3ACCESS_LOOKUP;
  238         else
  239                 testmode = NFSV3ACCESS_EXECUTE;
  240         if ((nfsmode & testmode) &&
  241                 nfsrv_access(vp, VEXEC, cred, rdonly, td, 0))
  242                 nfsmode &= ~testmode;
  243         getret = VOP_GETATTR(vp, vap, cred, td);
  244         vput(vp);
  245         vp = NULL;
  246         nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
  247         nfsm_srvpostop_attr(getret, vap);
  248         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  249         *tl = txdr_unsigned(nfsmode);
  250 nfsmout:
  251         if (vp)
  252                 vput(vp);
  253         VFS_UNLOCK_GIANT(vfslocked);
  254         return(error);
  255 }
  256 
  257 /*
  258  * nfs getattr service
  259  */
  260 int
  261 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  262     struct thread *td, struct mbuf **mrq)
  263 {
  264         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  265         struct sockaddr *nam = nfsd->nd_nam;
  266         caddr_t dpos = nfsd->nd_dpos;
  267         struct ucred *cred = nfsd->nd_cr;
  268         struct nfs_fattr *fp;
  269         struct vattr va;
  270         struct vattr *vap = &va;
  271         struct vnode *vp = NULL;
  272         nfsfh_t nfh;
  273         fhandle_t *fhp;
  274         caddr_t bpos;
  275         int error = 0, rdonly;
  276         struct mbuf *mb, *mreq;
  277         int vfslocked;
  278 
  279         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  280         vfslocked = 0;
  281         fhp = &nfh.fh_generic;
  282         nfsm_srvmtofh(fhp);
  283         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp, nam,
  284             &rdonly, TRUE);
  285         if (error) {
  286                 nfsm_reply(0);
  287                 error = 0;
  288                 goto nfsmout;
  289         }
  290         error = VOP_GETATTR(vp, vap, cred, td);
  291         vput(vp);
  292         vp = NULL;
  293         nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
  294         if (error) {
  295                 error = 0;
  296                 goto nfsmout;
  297         }
  298         fp = nfsm_build(struct nfs_fattr *,
  299             NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
  300         nfsm_srvfillattr(vap, fp);
  301         /* fall through */
  302 
  303 nfsmout:
  304         if (vp)
  305                 vput(vp);
  306         VFS_UNLOCK_GIANT(vfslocked);
  307         return(error);
  308 }
  309 
  310 /*
  311  * nfs setattr service
  312  */
  313 int
  314 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  315     struct thread *td, struct mbuf **mrq)
  316 {
  317         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  318         struct sockaddr *nam = nfsd->nd_nam;
  319         caddr_t dpos = nfsd->nd_dpos;
  320         struct ucred *cred = nfsd->nd_cr;
  321         struct vattr va, preat;
  322         struct vattr *vap = &va;
  323         struct nfsv2_sattr *sp;
  324         struct nfs_fattr *fp;
  325         struct vnode *vp = NULL;
  326         nfsfh_t nfh;
  327         fhandle_t *fhp;
  328         u_int32_t *tl;
  329         caddr_t bpos;
  330         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
  331         int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
  332         struct mbuf *mb, *mreq;
  333         struct timespec guard = { 0, 0 };
  334         struct mount *mp = NULL;
  335         int tvfslocked;
  336         int vfslocked;
  337 
  338         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  339         vfslocked = 0;
  340         fhp = &nfh.fh_generic;
  341         nfsm_srvmtofh(fhp);
  342         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
  343                 error = ESTALE;
  344                 goto out;
  345         }
  346         vfslocked = VFS_LOCK_GIANT(mp);
  347         (void) vn_start_write(NULL, &mp, V_WAIT);
  348         vfs_rel(mp);            /* The write holds a ref. */
  349         VATTR_NULL(vap);
  350         if (v3) {
  351                 nfsm_srvsattr(vap);
  352                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
  353                 gcheck = fxdr_unsigned(int, *tl);
  354                 if (gcheck) {
  355                         tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
  356                         fxdr_nfsv3time(tl, &guard);
  357                 }
  358         } else {
  359                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
  360                 /*
  361                  * Nah nah nah nah na nah
  362                  * There is a bug in the Sun client that puts 0xffff in the mode
  363                  * field of sattr when it should put in 0xffffffff. The u_short
  364                  * doesn't sign extend.
  365                  * --> check the low order 2 bytes for 0xffff
  366                  */
  367                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
  368                         vap->va_mode = nfstov_mode(sp->sa_mode);
  369                 if (sp->sa_uid != nfsrv_nfs_xdrneg1)
  370                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
  371                 if (sp->sa_gid != nfsrv_nfs_xdrneg1)
  372                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
  373                 if (sp->sa_size != nfsrv_nfs_xdrneg1)
  374                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
  375                 if (sp->sa_atime.nfsv2_sec != nfsrv_nfs_xdrneg1) {
  376 #ifdef notyet
  377                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
  378 #else
  379                         vap->va_atime.tv_sec =
  380                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
  381                         vap->va_atime.tv_nsec = 0;
  382 #endif
  383                 }
  384                 if (sp->sa_mtime.nfsv2_sec != nfsrv_nfs_xdrneg1)
  385                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
  386 
  387         }
  388 
  389         /*
  390          * Now that we have all the fields, lets do it.
  391          */
  392         error = nfsrv_fhtovp(fhp, 1, &vp, &tvfslocked, cred, slp,
  393             nam, &rdonly, TRUE);
  394         vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
  395         if (error) {
  396                 nfsm_reply(2 * NFSX_UNSIGNED);
  397                 if (v3)
  398                         nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
  399                 error = 0;
  400                 goto nfsmout;
  401         }
  402 
  403         /*
  404          * vp now an active resource, pay careful attention to cleanup
  405          */
  406         if (v3) {
  407                 error = preat_ret = VOP_GETATTR(vp, &preat, cred, td);
  408                 if (!error && gcheck &&
  409                         (preat.va_ctime.tv_sec != guard.tv_sec ||
  410                          preat.va_ctime.tv_nsec != guard.tv_nsec))
  411                         error = NFSERR_NOT_SYNC;
  412                 if (error) {
  413                         vput(vp);
  414                         vp = NULL;
  415                         nfsm_reply(NFSX_WCCDATA(v3));
  416                         if (v3)
  417                                 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
  418                         error = 0;
  419                         goto nfsmout;
  420                 }
  421         }
  422 
  423         /*
  424          * If the size is being changed write acces is required, otherwise
  425          * just check for a read only filesystem.
  426          */
  427         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
  428                 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
  429                         error = EROFS;
  430                         goto out;
  431                 }
  432         } else {
  433                 if (vp->v_type == VDIR) {
  434                         error = EISDIR;
  435                         goto out;
  436                 } else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
  437                         td, 0)) != 0)
  438                         goto out;
  439         }
  440         error = VOP_SETATTR(vp, vap, cred, td);
  441         postat_ret = VOP_GETATTR(vp, vap, cred, td);
  442         if (!error)
  443                 error = postat_ret;
  444 out:
  445         if (vp != NULL)
  446                 vput(vp);
  447 
  448         vp = NULL;
  449         nfsm_reply(NFSX_WCCORFATTR(v3));
  450         if (v3) {
  451                 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
  452         } else if (!error) {
  453                 /* v2 non-error case. */
  454                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
  455                 nfsm_srvfillattr(vap, fp);
  456         }
  457         error = 0;
  458         /* fall through */
  459 
  460 nfsmout:
  461         if (vp)
  462                 vput(vp);
  463         vn_finished_write(mp);
  464         VFS_UNLOCK_GIANT(vfslocked);
  465         return(error);
  466 }
  467 
  468 /*
  469  * nfs lookup rpc
  470  */
  471 int
  472 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  473     struct thread *td, struct mbuf **mrq)
  474 {
  475         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  476         struct sockaddr *nam = nfsd->nd_nam;
  477         caddr_t dpos = nfsd->nd_dpos;
  478         struct ucred *cred = nfsd->nd_cr;
  479         struct nfs_fattr *fp;
  480         struct nameidata nd, ind, *ndp = &nd;
  481         struct vnode *vp, *dirp = NULL;
  482         nfsfh_t nfh;
  483         fhandle_t *fhp;
  484         caddr_t bpos;
  485         int error = 0, len, dirattr_ret = 1;
  486         int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
  487         struct mbuf *mb, *mreq;
  488         struct vattr va, dirattr, *vap = &va;
  489         int tvfslocked;
  490         int vfslocked;
  491 
  492         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  493         ndclear(&nd);
  494         vfslocked = 0;
  495 
  496         fhp = &nfh.fh_generic;
  497         nfsm_srvmtofh(fhp);
  498         nfsm_srvnamesiz(len);
  499 
  500         pubflag = nfs_ispublicfh(fhp);
  501 
  502         nd.ni_cnd.cn_cred = cred;
  503         nd.ni_cnd.cn_nameiop = LOOKUP;
  504         nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART | MPSAFE;
  505         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
  506                 &dirp, v3, &dirattr, &dirattr_ret, td, pubflag);
  507         vfslocked = NDHASGIANT(&nd);
  508 
  509         /*
  510          * namei failure, only dirp to cleanup.  Clear out garbarge from
  511          * structure in case macros jump to nfsmout.
  512          */
  513 
  514         if (error) {
  515                 if (dirp) {
  516                         vrele(dirp);
  517                         dirp = NULL;
  518                 }
  519                 nfsm_reply(NFSX_POSTOPATTR(v3));
  520                 if (v3)
  521                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  522                 error = 0;
  523                 goto nfsmout;
  524         }
  525 
  526         /*
  527          * Locate index file for public filehandle
  528          *
  529          * error is 0 on entry and 0 on exit from this block.
  530          */
  531 
  532         if (pubflag) {
  533                 if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) {
  534                         /*
  535                          * Setup call to lookup() to see if we can find
  536                          * the index file. Arguably, this doesn't belong
  537                          * in a kernel.. Ugh.  If an error occurs, do not
  538                          * try to install an index file and then clear the
  539                          * error.
  540                          *
  541                          * When we replace nd with ind and redirect ndp,
  542                          * maintenance of ni_startdir and ni_vp shift to
  543                          * ind and we have to clean them up in the old nd.
  544                          * However, the cnd resource continues to be maintained
  545                          * via the original nd.  Confused?  You aren't alone!
  546                          */
  547                         ind = nd;
  548                         VOP_UNLOCK(nd.ni_vp, 0, td);
  549                         ind.ni_pathlen = strlen(nfs_pub.np_index);
  550                         ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf =
  551                             nfs_pub.np_index;
  552                         ind.ni_startdir = nd.ni_vp;
  553                         VREF(ind.ni_startdir);
  554                         ind.ni_cnd.cn_flags &= ~GIANTHELD;
  555                         tvfslocked = VFS_LOCK_GIANT(ind.ni_startdir->v_mount);
  556                         if (tvfslocked)
  557                                 nd.ni_cnd.cn_flags |= GIANTHELD;
  558                         error = lookup(&ind);
  559                         ind.ni_dvp = NULL;
  560                         vfslocked = nfsrv_lockedpair_nd(vfslocked, &ind);
  561                         ind.ni_cnd.cn_flags &= ~GIANTHELD;
  562 
  563                         if (error == 0) {
  564                                 /*
  565                                  * Found an index file. Get rid of
  566                                  * the old references.  transfer nd.ni_vp'
  567                                  */
  568                                 if (dirp)
  569                                         vrele(dirp);
  570                                 dirp = nd.ni_vp;
  571                                 nd.ni_vp = NULL;
  572                                 vrele(nd.ni_startdir);
  573                                 nd.ni_startdir = NULL;
  574                                 ndp = &ind;
  575                         }
  576                         error = 0;
  577                 }
  578                 /*
  579                  * If the public filehandle was used, check that this lookup
  580                  * didn't result in a filehandle outside the publicly exported
  581                  * filesystem.  We clear the poor vp here to avoid lockups due
  582                  * to NFS I/O.
  583                  */
  584 
  585                 if (ndp->ni_vp->v_mount != nfs_pub.np_mount) {
  586                         vput(nd.ni_vp);
  587                         nd.ni_vp = NULL;
  588                         error = EPERM;
  589                 }
  590         }
  591 
  592         /*
  593          * Resources at this point:
  594          *      ndp->ni_vp      may not be NULL
  595          */
  596 
  597         if (error) {
  598                 nfsm_reply(NFSX_POSTOPATTR(v3));
  599                 if (v3)
  600                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  601                 error = 0;
  602                 goto nfsmout;
  603         }
  604 
  605         /*
  606          * Get underlying attribute, then release remaining resources ( for
  607          * the same potential blocking reason ) and reply.
  608          */
  609         vp = ndp->ni_vp;
  610         bzero((caddr_t)fhp, sizeof(nfh));
  611         fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
  612         error = VOP_VPTOFH(vp, &fhp->fh_fid);
  613         if (!error)
  614                 error = VOP_GETATTR(vp, vap, cred, td);
  615 
  616         vput(vp);
  617         vrele(ndp->ni_startdir);
  618         vrele(dirp);
  619         ndp->ni_vp = NULL;
  620         ndp->ni_startdir = NULL;
  621         dirp = NULL;
  622         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3));
  623         if (error) {
  624                 if (v3)
  625                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  626                 error = 0;
  627                 goto nfsmout;
  628         }
  629         nfsm_srvfhtom(fhp, v3);
  630         if (v3) {
  631                 nfsm_srvpostop_attr(0, vap);
  632                 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  633         } else {
  634                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
  635                 nfsm_srvfillattr(vap, fp);
  636         }
  637 
  638 nfsmout:
  639         if (ndp->ni_vp || dirp || ndp->ni_startdir) {
  640                 if (ndp->ni_vp)
  641                         vput(ndp->ni_vp);
  642                 if (dirp)
  643                         vrele(dirp);
  644                 if (ndp->ni_startdir)
  645                         vrele(ndp->ni_startdir);
  646         }
  647         NDFREE(&nd, NDF_ONLY_PNBUF);
  648         VFS_UNLOCK_GIANT(vfslocked);
  649         return (error);
  650 }
  651 
  652 /*
  653  * nfs readlink service
  654  */
  655 int
  656 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  657     struct thread *td, struct mbuf **mrq)
  658 {
  659         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  660         struct sockaddr *nam = nfsd->nd_nam;
  661         caddr_t dpos = nfsd->nd_dpos;
  662         struct ucred *cred = nfsd->nd_cr;
  663         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
  664         struct iovec *ivp = iv;
  665         struct mbuf *mp;
  666         u_int32_t *tl;
  667         caddr_t bpos;
  668         int error = 0, rdonly, i, tlen, len, getret;
  669         int v3 = (nfsd->nd_flag & ND_NFSV3);
  670         struct mbuf *mb, *mp3, *nmp, *mreq;
  671         struct vnode *vp = NULL;
  672         struct vattr attr;
  673         nfsfh_t nfh;
  674         fhandle_t *fhp;
  675         struct uio io, *uiop = &io;
  676         int vfslocked;
  677 
  678         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  679         vfslocked = 0;
  680 #ifndef nolint
  681         mp = NULL;
  682 #endif
  683         mp3 = NULL;
  684         fhp = &nfh.fh_generic;
  685         nfsm_srvmtofh(fhp);
  686         len = 0;
  687         i = 0;
  688         while (len < NFS_MAXPATHLEN) {
  689                 MGET(nmp, M_TRYWAIT, MT_DATA);
  690                 MCLGET(nmp, M_TRYWAIT);
  691                 nmp->m_len = NFSMSIZ(nmp);
  692                 if (len == 0)
  693                         mp3 = mp = nmp;
  694                 else {
  695                         mp->m_next = nmp;
  696                         mp = nmp;
  697                 }
  698                 if ((len + mp->m_len) > NFS_MAXPATHLEN) {
  699                         mp->m_len = NFS_MAXPATHLEN - len;
  700                         len = NFS_MAXPATHLEN;
  701                 } else
  702                         len += mp->m_len;
  703                 ivp->iov_base = mtod(mp, caddr_t);
  704                 ivp->iov_len = mp->m_len;
  705                 i++;
  706                 ivp++;
  707         }
  708         uiop->uio_iov = iv;
  709         uiop->uio_iovcnt = i;
  710         uiop->uio_offset = 0;
  711         uiop->uio_resid = len;
  712         uiop->uio_rw = UIO_READ;
  713         uiop->uio_segflg = UIO_SYSSPACE;
  714         uiop->uio_td = NULL;
  715         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
  716             nam, &rdonly, TRUE);
  717         if (error) {
  718                 nfsm_reply(2 * NFSX_UNSIGNED);
  719                 if (v3)
  720                         nfsm_srvpostop_attr(1, NULL);
  721                 error = 0;
  722                 goto nfsmout;
  723         }
  724         if (vp->v_type != VLNK) {
  725                 if (v3)
  726                         error = EINVAL;
  727                 else
  728                         error = ENXIO;
  729         } else 
  730                 error = VOP_READLINK(vp, uiop, cred);
  731         getret = VOP_GETATTR(vp, &attr, cred, td);
  732         vput(vp);
  733         vp = NULL;
  734         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
  735         if (v3)
  736                 nfsm_srvpostop_attr(getret, &attr);
  737         if (error) {
  738                 error = 0;
  739                 goto nfsmout;
  740         }
  741         if (uiop->uio_resid > 0) {
  742                 len -= uiop->uio_resid;
  743                 tlen = nfsm_rndup(len);
  744                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
  745         }
  746         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  747         *tl = txdr_unsigned(len);
  748         mb->m_next = mp3;
  749         mp3 = NULL;
  750 nfsmout:
  751         if (mp3)
  752                 m_freem(mp3);
  753         if (vp)
  754                 vput(vp);
  755         VFS_UNLOCK_GIANT(vfslocked);
  756         return(error);
  757 }
  758 
  759 /*
  760  * nfs read service
  761  */
  762 int
  763 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  764     struct thread *td, struct mbuf **mrq)
  765 {
  766         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  767         struct sockaddr *nam = nfsd->nd_nam;
  768         caddr_t dpos = nfsd->nd_dpos;
  769         struct ucred *cred = nfsd->nd_cr;
  770         struct iovec *iv;
  771         struct iovec *iv2;
  772         struct mbuf *m;
  773         struct nfs_fattr *fp;
  774         u_int32_t *tl;
  775         int i;
  776         caddr_t bpos;
  777         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
  778         int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen;
  779         struct mbuf *mb, *mreq;
  780         struct mbuf *m2;
  781         struct vnode *vp = NULL;
  782         nfsfh_t nfh;
  783         fhandle_t *fhp;
  784         struct uio io, *uiop = &io;
  785         struct vattr va, *vap = &va;
  786         struct nfsheur *nh;
  787         off_t off;
  788         int ioflag = 0;
  789         int vfslocked;
  790 
  791 
  792         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  793         vfslocked = 0;
  794         fhp = &nfh.fh_generic;
  795         nfsm_srvmtofh(fhp);
  796         if (v3) {
  797                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
  798                 off = fxdr_hyper(tl);
  799         } else {
  800                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
  801                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
  802         }
  803         nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd));
  804 
  805         /*
  806          * Reference vp.  If an error occurs, vp will be invalid, but we
  807          * have to NULL it just in case.  The macros might goto nfsmout
  808          * as well.
  809          */
  810 
  811         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
  812             nam, &rdonly, TRUE);
  813         if (error) {
  814                 vp = NULL;
  815                 nfsm_reply(2 * NFSX_UNSIGNED);
  816                 if (v3)
  817                         nfsm_srvpostop_attr(1, NULL);
  818                 error = 0;
  819                 goto nfsmout;
  820         }
  821 
  822         if (vp->v_type != VREG) {
  823                 if (v3)
  824                         error = EINVAL;
  825                 else
  826                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
  827         }
  828         if (!error) {
  829                 if ((error = nfsrv_access(vp, VREAD, cred, rdonly,
  830                     td, 1)) != 0)
  831                         error = nfsrv_access(vp, VEXEC, cred,
  832                             rdonly, td, 1);
  833         }
  834         getret = VOP_GETATTR(vp, vap, cred, td);
  835         if (!error)
  836                 error = getret;
  837         if (error) {
  838                 vput(vp);
  839                 vp = NULL;
  840                 nfsm_reply(NFSX_POSTOPATTR(v3));
  841                 if (v3)
  842                         nfsm_srvpostop_attr(getret, vap);
  843                 error = 0;
  844                 goto nfsmout;
  845         }
  846 
  847         /*
  848          * Calculate byte count to read
  849          */
  850 
  851         if (off >= vap->va_size)
  852                 cnt = 0;
  853         else if ((off + reqlen) > vap->va_size)
  854                 cnt = vap->va_size - off;
  855         else
  856                 cnt = reqlen;
  857 
  858         /*
  859          * Calculate seqcount for heuristic
  860          */
  861 
  862         {
  863                 int hi;
  864                 int try = 32;
  865 
  866                 /*
  867                  * Locate best candidate
  868                  */
  869 
  870                 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
  871                 nh = &nfsheur[hi];
  872 
  873                 while (try--) {
  874                         if (nfsheur[hi].nh_vp == vp) {
  875                                 nh = &nfsheur[hi];
  876                                 break;
  877                         }
  878                         if (nfsheur[hi].nh_use > 0)
  879                                 --nfsheur[hi].nh_use;
  880                         hi = (hi + 1) % NUM_HEURISTIC;
  881                         if (nfsheur[hi].nh_use < nh->nh_use)
  882                                 nh = &nfsheur[hi];
  883                 }
  884 
  885                 if (nh->nh_vp != vp) {
  886                         nh->nh_vp = vp;
  887                         nh->nh_nextr = off;
  888                         nh->nh_use = NHUSE_INIT;
  889                         if (off == 0)
  890                                 nh->nh_seqcount = 4;
  891                         else
  892                                 nh->nh_seqcount = 1;
  893                 }
  894 
  895                 /*
  896                  * Calculate heuristic
  897                  */
  898 
  899                 if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
  900                         if (++nh->nh_seqcount > IO_SEQMAX)
  901                                 nh->nh_seqcount = IO_SEQMAX;
  902                 } else if (nh->nh_seqcount > 1) {
  903                         nh->nh_seqcount = 1;
  904                 } else {
  905                         nh->nh_seqcount = 0;
  906                 }
  907                 nh->nh_use += NHUSE_INC;
  908                 if (nh->nh_use > NHUSE_MAX)
  909                         nh->nh_use = NHUSE_MAX;
  910                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
  911         }
  912 
  913         nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
  914         if (v3) {
  915                 tl = nfsm_build(u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
  916                 *tl++ = nfsrv_nfs_true;
  917                 fp = (struct nfs_fattr *)tl;
  918                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
  919         } else {
  920                 tl = nfsm_build(u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
  921                 fp = (struct nfs_fattr *)tl;
  922                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
  923         }
  924         len = left = nfsm_rndup(cnt);
  925         if (cnt > 0) {
  926                 /*
  927                  * Generate the mbuf list with the uio_iov ref. to it.
  928                  */
  929                 i = 0;
  930                 m = m2 = mb;
  931                 while (left > 0) {
  932                         siz = min(M_TRAILINGSPACE(m), left);
  933                         if (siz > 0) {
  934                                 left -= siz;
  935                                 i++;
  936                         }
  937                         if (left > 0) {
  938                                 MGET(m, M_TRYWAIT, MT_DATA);
  939                                 MCLGET(m, M_TRYWAIT);
  940                                 m->m_len = 0;
  941                                 m2->m_next = m;
  942                                 m2 = m;
  943                         }
  944                 }
  945                 MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
  946                        M_TEMP, M_WAITOK);
  947                 uiop->uio_iov = iv2 = iv;
  948                 m = mb;
  949                 left = len;
  950                 i = 0;
  951                 while (left > 0) {
  952                         if (m == NULL)
  953                                 panic("nfsrv_read iov");
  954                         siz = min(M_TRAILINGSPACE(m), left);
  955                         if (siz > 0) {
  956                                 iv->iov_base = mtod(m, caddr_t) + m->m_len;
  957                                 iv->iov_len = siz;
  958                                 m->m_len += siz;
  959                                 left -= siz;
  960                                 iv++;
  961                                 i++;
  962                         }
  963                         m = m->m_next;
  964                 }
  965                 uiop->uio_iovcnt = i;
  966                 uiop->uio_offset = off;
  967                 uiop->uio_resid = len;
  968                 uiop->uio_rw = UIO_READ;
  969                 uiop->uio_segflg = UIO_SYSSPACE;
  970                 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
  971                 off = uiop->uio_offset;
  972                 nh->nh_nextr = off;
  973                 FREE((caddr_t)iv2, M_TEMP);
  974                 if (error || (getret = VOP_GETATTR(vp, vap, cred, td))) {
  975                         if (!error)
  976                                 error = getret;
  977                         m_freem(mreq);
  978                         vput(vp);
  979                         vp = NULL;
  980                         nfsm_reply(NFSX_POSTOPATTR(v3));
  981                         if (v3)
  982                                 nfsm_srvpostop_attr(getret, vap);
  983                         error = 0;
  984                         goto nfsmout;
  985                 }
  986         } else
  987                 uiop->uio_resid = 0;
  988         vput(vp);
  989         vp = NULL;
  990         nfsm_srvfillattr(vap, fp);
  991         tlen = len - uiop->uio_resid;
  992         cnt = cnt < tlen ? cnt : tlen;
  993         tlen = nfsm_rndup(cnt);
  994         if (len != tlen || tlen != cnt)
  995                 nfsm_adj(mb, len - tlen, tlen - cnt);
  996         if (v3) {
  997                 *tl++ = txdr_unsigned(cnt);
  998                 if (cnt < reqlen)
  999                         *tl++ = nfsrv_nfs_true;
 1000                 else
 1001                         *tl++ = nfsrv_nfs_false;
 1002         }
 1003         *tl = txdr_unsigned(cnt);
 1004 nfsmout:
 1005         if (vp)
 1006                 vput(vp);
 1007         VFS_UNLOCK_GIANT(vfslocked);
 1008         return(error);
 1009 }
 1010 
 1011 /*
 1012  * nfs write service
 1013  */
 1014 int
 1015 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1016     struct thread *td, struct mbuf **mrq)
 1017 {
 1018         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1019         struct sockaddr *nam = nfsd->nd_nam;
 1020         caddr_t dpos = nfsd->nd_dpos;
 1021         struct ucred *cred = nfsd->nd_cr;
 1022         struct iovec *ivp;
 1023         int i, cnt;
 1024         struct mbuf *mp;
 1025         struct nfs_fattr *fp;
 1026         struct iovec *iv;
 1027         struct vattr va, forat;
 1028         struct vattr *vap = &va;
 1029         u_int32_t *tl;
 1030         caddr_t bpos;
 1031         int error = 0, rdonly, len, forat_ret = 1;
 1032         int ioflags, aftat_ret = 1, retlen = 0, zeroing, adjust;
 1033         int stable = NFSV3WRITE_FILESYNC;
 1034         int v3 = (nfsd->nd_flag & ND_NFSV3);
 1035         struct mbuf *mb, *mreq;
 1036         struct vnode *vp = NULL;
 1037         nfsfh_t nfh;
 1038         fhandle_t *fhp;
 1039         struct uio io, *uiop = &io;
 1040         off_t off;
 1041         struct mount *mntp = NULL;
 1042         int tvfslocked;
 1043         int vfslocked;
 1044 
 1045         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1046         vfslocked = 0;
 1047         if (mrep == NULL) {
 1048                 *mrq = NULL;
 1049                 error = 0;
 1050                 goto nfsmout;
 1051         }
 1052         fhp = &nfh.fh_generic;
 1053         nfsm_srvmtofh(fhp);
 1054         if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 1055                 error = ESTALE;
 1056                 goto ereply;
 1057         }
 1058         vfslocked = VFS_LOCK_GIANT(mntp);
 1059         (void) vn_start_write(NULL, &mntp, V_WAIT);
 1060         vfs_rel(mntp);          /* The write holds a ref. */
 1061         if (v3) {
 1062                 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
 1063                 off = fxdr_hyper(tl);
 1064                 tl += 3;
 1065                 stable = fxdr_unsigned(int, *tl++);
 1066         } else {
 1067                 tl = nfsm_dissect_nonblock(u_int32_t *, 4 * NFSX_UNSIGNED);
 1068                 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
 1069                 tl += 2;
 1070                 if (nfs_async)
 1071                         stable = NFSV3WRITE_UNSTABLE;
 1072         }
 1073         retlen = len = fxdr_unsigned(int32_t, *tl);
 1074         cnt = i = 0;
 1075 
 1076         /*
 1077          * For NFS Version 2, it is not obvious what a write of zero length
 1078          * should do, but I might as well be consistent with Version 3,
 1079          * which is to return ok so long as there are no permission problems.
 1080          */
 1081         if (len > 0) {
 1082             zeroing = 1;
 1083             mp = mrep;
 1084             while (mp) {
 1085                 if (mp == md) {
 1086                         zeroing = 0;
 1087                         adjust = dpos - mtod(mp, caddr_t);
 1088                         mp->m_len -= adjust;
 1089                         if (mp->m_len > 0 && adjust > 0)
 1090                                 mp->m_data += adjust;
 1091                 }
 1092                 if (zeroing)
 1093                         mp->m_len = 0;
 1094                 else if (mp->m_len > 0) {
 1095                         i += mp->m_len;
 1096                         if (i > len) {
 1097                                 mp->m_len -= (i - len);
 1098                                 zeroing = 1;
 1099                         }
 1100                         if (mp->m_len > 0)
 1101                                 cnt++;
 1102                 }
 1103                 mp = mp->m_next;
 1104             }
 1105         }
 1106         if (len > NFS_MAXDATA || len < 0 || i < len) {
 1107                 error = EIO;
 1108                 nfsm_reply(2 * NFSX_UNSIGNED);
 1109                 if (v3)
 1110                         nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1111                 error = 0;
 1112                 goto nfsmout;
 1113         }
 1114         error = nfsrv_fhtovp(fhp, 1, &vp, &tvfslocked, cred, slp,
 1115             nam, &rdonly, TRUE);
 1116         vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
 1117         if (error) {
 1118                 vp = NULL;
 1119                 nfsm_reply(2 * NFSX_UNSIGNED);
 1120                 if (v3)
 1121                         nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1122                 error = 0;
 1123                 goto nfsmout;
 1124         }
 1125         if (v3)
 1126                 forat_ret = VOP_GETATTR(vp, &forat, cred, td);
 1127         if (vp->v_type != VREG) {
 1128                 if (v3)
 1129                         error = EINVAL;
 1130                 else
 1131                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
 1132         }
 1133         if (!error)
 1134                 error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1);
 1135         if (error) {
 1136                 vput(vp);
 1137                 vp = NULL;
 1138                 nfsm_reply(NFSX_WCCDATA(v3));
 1139                 if (v3)
 1140                         nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1141                 error = 0;
 1142                 goto nfsmout;
 1143         }
 1144 
 1145         if (len > 0) {
 1146             MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
 1147                 M_WAITOK);
 1148             uiop->uio_iov = iv = ivp;
 1149             uiop->uio_iovcnt = cnt;
 1150             mp = mrep;
 1151             while (mp) {
 1152                 if (mp->m_len > 0) {
 1153                         ivp->iov_base = mtod(mp, caddr_t);
 1154                         ivp->iov_len = mp->m_len;
 1155                         ivp++;
 1156                 }
 1157                 mp = mp->m_next;
 1158             }
 1159 
 1160             /*
 1161              * XXX
 1162              * The IO_METASYNC flag indicates that all metadata (and not just
 1163              * enough to ensure data integrity) mus be written to stable storage
 1164              * synchronously.
 1165              * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
 1166              */
 1167             if (stable == NFSV3WRITE_UNSTABLE)
 1168                 ioflags = IO_NODELOCKED;
 1169             else if (stable == NFSV3WRITE_DATASYNC)
 1170                 ioflags = (IO_SYNC | IO_NODELOCKED);
 1171             else
 1172                 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
 1173             uiop->uio_resid = len;
 1174             uiop->uio_rw = UIO_WRITE;
 1175             uiop->uio_segflg = UIO_SYSSPACE;
 1176             uiop->uio_td = NULL;
 1177             uiop->uio_offset = off;
 1178             error = VOP_WRITE(vp, uiop, ioflags, cred);
 1179             /* XXXRW: unlocked write. */
 1180             nfsrvstats.srvvop_writes++;
 1181             FREE((caddr_t)iv, M_TEMP);
 1182         }
 1183         aftat_ret = VOP_GETATTR(vp, vap, cred, td);
 1184         vput(vp);
 1185         vp = NULL;
 1186         if (!error)
 1187                 error = aftat_ret;
 1188 ereply:
 1189         nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
 1190                 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
 1191         if (v3) {
 1192                 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1193                 if (error) {
 1194                         error = 0;
 1195                         goto nfsmout;
 1196                 }
 1197                 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 1198                 *tl++ = txdr_unsigned(retlen);
 1199                 /*
 1200                  * If nfs_async is set, then pretend the write was FILESYNC.
 1201                  */
 1202                 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
 1203                         *tl++ = txdr_unsigned(stable);
 1204                 else
 1205                         *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
 1206                 /*
 1207                  * Actually, there is no need to txdr these fields,
 1208                  * but it may make the values more human readable,
 1209                  * for debugging purposes.
 1210                  */
 1211                 if (nfsver.tv_sec == 0)
 1212                         nfsver = boottime;
 1213                 *tl++ = txdr_unsigned(nfsver.tv_sec);
 1214                 *tl = txdr_unsigned(nfsver.tv_usec);
 1215         } else if (!error) {
 1216                 /* v2 non-error case. */
 1217                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 1218                 nfsm_srvfillattr(vap, fp);
 1219         }
 1220         error = 0;
 1221 nfsmout:
 1222         if (vp)
 1223                 vput(vp);
 1224         vn_finished_write(mntp);
 1225         VFS_UNLOCK_GIANT(vfslocked);
 1226         return(error);
 1227 }
 1228 
 1229 /*
 1230  * For the purposes of write gathering, we must decide if the credential
 1231  * associated with two pending requests have equivilent privileges.  Since
 1232  * NFS only uses a subset of the BSD ucred -- the effective uid and group
 1233  * IDs -- we have a compare routine that checks only the relevant fields.
 1234  */
 1235 static int
 1236 nfsrv_samecred(struct ucred *cr1, struct ucred *cr2)
 1237 {
 1238         int i;
 1239 
 1240         if (cr1->cr_uid != cr2->cr_uid)
 1241                 return (0);
 1242         if (cr1->cr_ngroups != cr2->cr_ngroups)
 1243                 return (0);
 1244         for (i = 0; i < cr1->cr_ngroups; i++) {
 1245                 if (cr1->cr_groups[i] != cr2->cr_groups[i])
 1246                         return (0);
 1247         }
 1248         return (1);
 1249 }
 1250 
 1251 /*
 1252  * NFS write service with write gathering support. Called when
 1253  * nfsrvw_procrastinate > 0.
 1254  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
 1255  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
 1256  * Jan. 1994.
 1257  */
 1258 int
 1259 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
 1260     struct thread *td, struct mbuf **mrq)
 1261 {
 1262         struct iovec *ivp;
 1263         struct mbuf *mp;
 1264         struct nfsrv_descript *wp, *nfsd, *owp, *swp;
 1265         struct nfs_fattr *fp;
 1266         int i;
 1267         struct iovec *iov;
 1268         struct nfsrvw_delayhash *wpp;
 1269         struct ucred *cred;
 1270         struct vattr va, forat;
 1271         u_int32_t *tl;
 1272         caddr_t bpos, dpos;
 1273         int error = 0, rdonly, len, forat_ret = 1;
 1274         int ioflags, aftat_ret = 1, s, adjust, v3, zeroing;
 1275         struct mbuf *mb, *mreq, *mrep, *md;
 1276         struct vnode *vp = NULL;
 1277         struct uio io, *uiop = &io;
 1278         u_quad_t cur_usec;
 1279         struct mount *mntp = NULL;
 1280         int mvfslocked;
 1281         int vfslocked;
 1282 
 1283         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1284 #ifndef nolint
 1285         i = 0;
 1286         len = 0;
 1287 #endif
 1288         *mrq = NULL;
 1289         if (*ndp) {
 1290             nfsd = *ndp;
 1291             *ndp = NULL;
 1292             mrep = nfsd->nd_mrep;
 1293             md = nfsd->nd_md;
 1294             dpos = nfsd->nd_dpos;
 1295             cred = nfsd->nd_cr;
 1296             v3 = (nfsd->nd_flag & ND_NFSV3);
 1297             LIST_INIT(&nfsd->nd_coalesce);
 1298             nfsd->nd_mreq = NULL;
 1299             nfsd->nd_stable = NFSV3WRITE_FILESYNC;
 1300             cur_usec = nfs_curusec();
 1301             nfsd->nd_time = cur_usec +
 1302                 (v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
 1303 
 1304             /*
 1305              * Now, get the write header..
 1306              */
 1307             nfsm_srvmtofh(&nfsd->nd_fh);
 1308             if (v3) {
 1309                 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
 1310                 nfsd->nd_off = fxdr_hyper(tl);
 1311                 tl += 3;
 1312                 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
 1313             } else {
 1314                 tl = nfsm_dissect_nonblock(u_int32_t *, 4 * NFSX_UNSIGNED);
 1315                 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
 1316                 tl += 2;
 1317                 if (nfs_async)
 1318                         nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
 1319             }
 1320             len = fxdr_unsigned(int32_t, *tl);
 1321             nfsd->nd_len = len;
 1322             nfsd->nd_eoff = nfsd->nd_off + len;
 1323 
 1324             /*
 1325              * Trim the header out of the mbuf list and trim off any trailing
 1326              * junk so that the mbuf list has only the write data.
 1327              */
 1328             zeroing = 1;
 1329             i = 0;
 1330             mp = mrep;
 1331             while (mp) {
 1332                 if (mp == md) {
 1333                     zeroing = 0;
 1334                     adjust = dpos - mtod(mp, caddr_t);
 1335                     mp->m_len -= adjust;
 1336                     if (mp->m_len > 0 && adjust > 0)
 1337                         mp->m_data += adjust;
 1338                 }
 1339                 if (zeroing)
 1340                     mp->m_len = 0;
 1341                 else {
 1342                     i += mp->m_len;
 1343                     if (i > len) {
 1344                         mp->m_len -= (i - len);
 1345                         zeroing = 1;
 1346                     }
 1347                 }
 1348                 mp = mp->m_next;
 1349             }
 1350             if (len > NFS_MAXDATA || len < 0  || i < len) {
 1351 nfsmout:
 1352                 m_freem(mrep);
 1353                 error = EIO;
 1354                 nfsm_writereply(2 * NFSX_UNSIGNED);
 1355                 if (v3)
 1356                     nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
 1357                 nfsd->nd_mreq = mreq;
 1358                 nfsd->nd_mrep = NULL;
 1359                 nfsd->nd_time = 0;
 1360             }
 1361 
 1362             /*
 1363              * Add this entry to the hash and time queues.
 1364              */
 1365             s = splsoftclock();
 1366             owp = NULL;
 1367             wp = LIST_FIRST(&slp->ns_tq);
 1368             while (wp && wp->nd_time < nfsd->nd_time) {
 1369                 owp = wp;
 1370                 wp = LIST_NEXT(wp, nd_tq);
 1371             }
 1372             NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
 1373             if (owp) {
 1374                 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
 1375             } else {
 1376                 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
 1377             }
 1378             if (nfsd->nd_mrep) {
 1379                 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
 1380                 owp = NULL;
 1381                 wp = LIST_FIRST(wpp);
 1382                 while (wp &&
 1383                     bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh, NFSX_V3FH)){
 1384                     owp = wp;
 1385                     wp = LIST_NEXT(wp, nd_hash);
 1386                 }
 1387                 while (wp && wp->nd_off < nfsd->nd_off &&
 1388                     !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh, NFSX_V3FH)) {
 1389                     owp = wp;
 1390                     wp = LIST_NEXT(wp, nd_hash);
 1391                 }
 1392                 if (owp) {
 1393                     LIST_INSERT_AFTER(owp, nfsd, nd_hash);
 1394 
 1395                     /*
 1396                      * Search the hash list for overlapping entries and
 1397                      * coalesce.
 1398                      */
 1399                     for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
 1400                         wp = LIST_NEXT(nfsd, nd_hash);
 1401                         if (nfsrv_samecred(owp->nd_cr, nfsd->nd_cr))
 1402                             nfsrvw_coalesce(owp, nfsd);
 1403                     }
 1404                 } else {
 1405                     LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
 1406                 }
 1407             }
 1408             splx(s);
 1409         }
 1410 
 1411         /*
 1412          * Now, do VOP_WRITE()s for any one(s) that need to be done now
 1413          * and generate the associated reply mbuf list(s).
 1414          */
 1415 loop1:
 1416         cur_usec = nfs_curusec();
 1417         s = splsoftclock();
 1418         for (nfsd = LIST_FIRST(&slp->ns_tq); nfsd; nfsd = owp) {
 1419                 owp = LIST_NEXT(nfsd, nd_tq);
 1420                 if (nfsd->nd_time > cur_usec)
 1421                     break;
 1422                 if (nfsd->nd_mreq)
 1423                     continue;
 1424                 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
 1425                 LIST_REMOVE(nfsd, nd_tq);
 1426                 LIST_REMOVE(nfsd, nd_hash);
 1427                 splx(s);
 1428                 mrep = nfsd->nd_mrep;
 1429                 nfsd->nd_mrep = NULL;
 1430                 cred = nfsd->nd_cr;
 1431                 v3 = (nfsd->nd_flag & ND_NFSV3);
 1432                 forat_ret = aftat_ret = 1;
 1433                 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, &vfslocked, cred,
 1434                     slp, nfsd->nd_nam, &rdonly, TRUE);
 1435                 if (!error) {
 1436                     if (v3)
 1437                         forat_ret = VOP_GETATTR(vp, &forat, cred, td);
 1438                     if (vp->v_type != VREG) {
 1439                         if (v3)
 1440                             error = EINVAL;
 1441                         else
 1442                             error = (vp->v_type == VDIR) ? EISDIR : EACCES;
 1443                     }
 1444                 } else {
 1445                     vp = NULL;
 1446                 }
 1447                 if (!error)
 1448                     error = nfsrv_access(vp, VWRITE, cred, rdonly,
 1449                         td, 1);
 1450                 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
 1451                     ioflags = IO_NODELOCKED;
 1452                 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
 1453                     ioflags = (IO_SYNC | IO_NODELOCKED);
 1454                 else
 1455                     ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
 1456                 uiop->uio_rw = UIO_WRITE;
 1457                 uiop->uio_segflg = UIO_SYSSPACE;
 1458                 uiop->uio_td = NULL;
 1459                 uiop->uio_offset = nfsd->nd_off;
 1460                 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
 1461                 if (uiop->uio_resid > 0) {
 1462                     mp = mrep;
 1463                     i = 0;
 1464                     while (mp) {
 1465                         if (mp->m_len > 0)
 1466                             i++;
 1467                         mp = mp->m_next;
 1468                     }
 1469                     uiop->uio_iovcnt = i;
 1470                     MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
 1471                         M_TEMP, M_WAITOK);
 1472                     uiop->uio_iov = ivp = iov;
 1473                     mp = mrep;
 1474                     while (mp) {
 1475                         if (mp->m_len > 0) {
 1476                             ivp->iov_base = mtod(mp, caddr_t);
 1477                             ivp->iov_len = mp->m_len;
 1478                             ivp++;
 1479                         }
 1480                         mp = mp->m_next;
 1481                     }
 1482                     mvfslocked = 0;
 1483                     if (!error) {
 1484                         if (vn_start_write(vp, &mntp, V_NOWAIT) != 0) {
 1485                             VOP_UNLOCK(vp, 0, td);
 1486                             error = vn_start_write(NULL, &mntp, V_WAIT);
 1487                             vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1488                         }
 1489                         mvfslocked = VFS_LOCK_GIANT(mntp);
 1490                     }
 1491                     if (!error) {
 1492                         error = VOP_WRITE(vp, uiop, ioflags, cred);
 1493                         /* XXXRW: unlocked write. */
 1494                         nfsrvstats.srvvop_writes++;
 1495                         vn_finished_write(mntp);
 1496                     }
 1497                     VFS_UNLOCK_GIANT(mvfslocked);
 1498                     FREE((caddr_t)iov, M_TEMP);
 1499                 }
 1500                 m_freem(mrep);
 1501                 if (vp) {
 1502                     aftat_ret = VOP_GETATTR(vp, &va, cred, td);
 1503                     vput(vp);
 1504                     vp = NULL;
 1505                 }
 1506                 VFS_UNLOCK_GIANT(vfslocked);
 1507                 /*
 1508                  * Loop around generating replies for all write rpcs that have
 1509                  * now been completed.
 1510                  */
 1511                 swp = nfsd;
 1512                 do {
 1513                     NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
 1514                     if (error) {
 1515                         nfsm_writereply(NFSX_WCCDATA(v3));
 1516                         if (v3) {
 1517                             nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
 1518                         }
 1519                     } else {
 1520                         nfsm_writereply(NFSX_PREOPATTR(v3) +
 1521                             NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED +
 1522                             NFSX_WRITEVERF(v3));
 1523                         if (v3) {
 1524                             nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
 1525                             tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 1526                             *tl++ = txdr_unsigned(nfsd->nd_len);
 1527                             *tl++ = txdr_unsigned(swp->nd_stable);
 1528                             /*
 1529                              * Actually, there is no need to txdr these fields,
 1530                              * but it may make the values more human readable,
 1531                              * for debugging purposes.
 1532                              */
 1533                             if (nfsver.tv_sec == 0)
 1534                                     nfsver = boottime;
 1535                             *tl++ = txdr_unsigned(nfsver.tv_sec);
 1536                             *tl = txdr_unsigned(nfsver.tv_usec);
 1537                         } else {
 1538                             fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 1539                             nfsm_srvfillattr(&va, fp);
 1540                         }
 1541                     }
 1542                     nfsd->nd_mreq = mreq;
 1543                     if (nfsd->nd_mrep)
 1544                         panic("nfsrv_write: nd_mrep not free");
 1545 
 1546                     /*
 1547                      * Done. Put it at the head of the timer queue so that
 1548                      * the final phase can return the reply.
 1549                      */
 1550                     s = splsoftclock();
 1551                     if (nfsd != swp) {
 1552                         nfsd->nd_time = 0;
 1553                         LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
 1554                     }
 1555                     nfsd = LIST_FIRST(&swp->nd_coalesce);
 1556                     if (nfsd) {
 1557                         LIST_REMOVE(nfsd, nd_tq);
 1558                     }
 1559                     splx(s);
 1560                 } while (nfsd);
 1561                 s = splsoftclock();
 1562                 swp->nd_time = 0;
 1563                 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
 1564                 splx(s);
 1565                 goto loop1;
 1566         }
 1567         splx(s);
 1568 
 1569         /*
 1570          * Search for a reply to return.
 1571          */
 1572         s = splsoftclock();
 1573         LIST_FOREACH(nfsd, &slp->ns_tq, nd_tq)
 1574                 if (nfsd->nd_mreq) {
 1575                     NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
 1576                     LIST_REMOVE(nfsd, nd_tq);
 1577                     *mrq = nfsd->nd_mreq;
 1578                     *ndp = nfsd;
 1579                     break;
 1580                 }
 1581         splx(s);
 1582         return (0);
 1583 }
 1584 
 1585 /*
 1586  * Coalesce the write request nfsd into owp. To do this we must:
 1587  * - remove nfsd from the queues
 1588  * - merge nfsd->nd_mrep into owp->nd_mrep
 1589  * - update the nd_eoff and nd_stable for owp
 1590  * - put nfsd on owp's nd_coalesce list
 1591  * NB: Must be called at splsoftclock().
 1592  */
 1593 static void
 1594 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
 1595 {
 1596         int overlap;
 1597         struct mbuf *mp;
 1598         struct nfsrv_descript *p;
 1599 
 1600         NFS_DPF(WG, ("C%03x-%03x",
 1601                      nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
 1602         LIST_REMOVE(nfsd, nd_hash);
 1603         LIST_REMOVE(nfsd, nd_tq);
 1604         if (owp->nd_eoff < nfsd->nd_eoff) {
 1605             overlap = owp->nd_eoff - nfsd->nd_off;
 1606             if (overlap < 0)
 1607                 panic("nfsrv_coalesce: bad off");
 1608             if (overlap > 0)
 1609                 m_adj(nfsd->nd_mrep, overlap);
 1610             mp = owp->nd_mrep;
 1611             while (mp->m_next)
 1612                 mp = mp->m_next;
 1613             mp->m_next = nfsd->nd_mrep;
 1614             owp->nd_eoff = nfsd->nd_eoff;
 1615         } else
 1616             m_freem(nfsd->nd_mrep);
 1617         nfsd->nd_mrep = NULL;
 1618         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
 1619             owp->nd_stable = NFSV3WRITE_FILESYNC;
 1620         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
 1621             owp->nd_stable == NFSV3WRITE_UNSTABLE)
 1622             owp->nd_stable = NFSV3WRITE_DATASYNC;
 1623         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
 1624 
 1625         /*
 1626          * If nfsd had anything else coalesced into it, transfer them
 1627          * to owp, otherwise their replies will never get sent.
 1628          */
 1629         for (p = LIST_FIRST(&nfsd->nd_coalesce); p;
 1630              p = LIST_FIRST(&nfsd->nd_coalesce)) {
 1631             LIST_REMOVE(p, nd_tq);
 1632             LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
 1633         }
 1634 }
 1635 
 1636 /*
 1637  * nfs create service
 1638  * now does a truncate to 0 length via. setattr if it already exists
 1639  */
 1640 int
 1641 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1642     struct thread *td, struct mbuf **mrq)
 1643 {
 1644         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1645         struct sockaddr *nam = nfsd->nd_nam;
 1646         caddr_t dpos = nfsd->nd_dpos;
 1647         struct ucred *cred = nfsd->nd_cr;
 1648         struct nfs_fattr *fp;
 1649         struct vattr va, dirfor, diraft;
 1650         struct vattr *vap = &va;
 1651         struct nfsv2_sattr *sp;
 1652         u_int32_t *tl;
 1653         struct nameidata nd;
 1654         caddr_t bpos;
 1655         int error = 0, rdev, len, tsize, dirfor_ret = 1, diraft_ret = 1;
 1656         int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0;
 1657         caddr_t cp;
 1658         struct mbuf *mb, *mreq;
 1659         struct vnode *dirp = NULL;
 1660         nfsfh_t nfh;
 1661         fhandle_t *fhp;
 1662         u_quad_t tempsize;
 1663         u_char cverf[NFSX_V3CREATEVERF];
 1664         struct mount *mp = NULL;
 1665         int tvfslocked;
 1666         int vfslocked;
 1667 
 1668         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1669         vfslocked = 0;
 1670 #ifndef nolint
 1671         rdev = 0;
 1672 #endif
 1673         ndclear(&nd);
 1674 
 1675         fhp = &nfh.fh_generic;
 1676         nfsm_srvmtofh(fhp);
 1677         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 1678                 error = ESTALE;
 1679                 goto ereply;
 1680         }
 1681         vfslocked = VFS_LOCK_GIANT(mp);
 1682         (void) vn_start_write(NULL, &mp, V_WAIT);
 1683         vfs_rel(mp);            /* The write holds a ref. */
 1684         nfsm_srvnamesiz(len);
 1685 
 1686         nd.ni_cnd.cn_cred = cred;
 1687         nd.ni_cnd.cn_nameiop = CREATE;
 1688         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE;
 1689 
 1690         /*
 1691          * Call namei and do initial cleanup to get a few things
 1692          * out of the way.  If we get an initial error we cleanup
 1693          * and return here to avoid special-casing the invalid nd
 1694          * structure through the rest of the case.  dirp may be
 1695          * set even if an error occurs, but the nd structure will not
 1696          * be valid at all if an error occurs so we have to invalidate it
 1697          * prior to calling nfsm_reply ( which might goto nfsmout ).
 1698          */
 1699         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 1700                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 1701         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 1702         if (dirp && !v3) {
 1703                 vrele(dirp);
 1704                 dirp = NULL;
 1705         }
 1706         if (error) {
 1707                 nfsm_reply(NFSX_WCCDATA(v3));
 1708                 if (v3)
 1709                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1710                 error = 0;
 1711                 goto nfsmout;
 1712         }
 1713 
 1714         /*
 1715          * No error.  Continue.  State:
 1716          *
 1717          *      startdir        is valid ( we release this immediately )
 1718          *      dirp            may be valid
 1719          *      nd.ni_vp        may be valid
 1720          *      nd.ni_dvp       is valid
 1721          *
 1722          * The error state is set through the code and we may also do some
 1723          * opportunistic releasing of vnodes to avoid holding locks through
 1724          * NFS I/O.  The cleanup at the end is a catch-all
 1725          */
 1726 
 1727         VATTR_NULL(vap);
 1728         if (v3) {
 1729                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
 1730                 how = fxdr_unsigned(int, *tl);
 1731                 switch (how) {
 1732                 case NFSV3CREATE_GUARDED:
 1733                         if (nd.ni_vp) {
 1734                                 error = EEXIST;
 1735                                 break;
 1736                         }
 1737                         /* fall through */
 1738                 case NFSV3CREATE_UNCHECKED:
 1739                         nfsm_srvsattr(vap);
 1740                         break;
 1741                 case NFSV3CREATE_EXCLUSIVE:
 1742                         cp = nfsm_dissect_nonblock(caddr_t, NFSX_V3CREATEVERF);
 1743                         bcopy(cp, cverf, NFSX_V3CREATEVERF);
 1744                         exclusive_flag = 1;
 1745                         break;
 1746                 };
 1747                 vap->va_type = VREG;
 1748         } else {
 1749                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
 1750                 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
 1751                 if (vap->va_type == VNON)
 1752                         vap->va_type = VREG;
 1753                 vap->va_mode = nfstov_mode(sp->sa_mode);
 1754                 switch (vap->va_type) {
 1755                 case VREG:
 1756                         tsize = fxdr_unsigned(int32_t, sp->sa_size);
 1757                         if (tsize != -1)
 1758                                 vap->va_size = (u_quad_t)tsize;
 1759                         break;
 1760                 case VCHR:
 1761                 case VBLK:
 1762                 case VFIFO:
 1763                         rdev = fxdr_unsigned(long, sp->sa_size);
 1764                         break;
 1765                 default:
 1766                         break;
 1767                 };
 1768         }
 1769 
 1770         /*
 1771          * Iff doesn't exist, create it
 1772          * otherwise just truncate to 0 length
 1773          *   should I set the mode too ?
 1774          *
 1775          * The only possible error we can have at this point is EEXIST.
 1776          * nd.ni_vp will also be non-NULL in that case.
 1777          */
 1778         if (nd.ni_vp == NULL) {
 1779                 if (vap->va_mode == (mode_t)VNOVAL)
 1780                         vap->va_mode = 0;
 1781                 if (vap->va_type == VREG || vap->va_type == VSOCK) {
 1782                         error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 1783                         if (error)
 1784                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1785                         else {
 1786                                 if (exclusive_flag) {
 1787                                         exclusive_flag = 0;
 1788                                         VATTR_NULL(vap);
 1789                                         bcopy(cverf, (caddr_t)&vap->va_atime,
 1790                                                 NFSX_V3CREATEVERF);
 1791                                         error = VOP_SETATTR(nd.ni_vp, vap, cred,
 1792                                                 td);
 1793                                 }
 1794                         }
 1795                 } else if (vap->va_type == VCHR || vap->va_type == VBLK ||
 1796                     vap->va_type == VFIFO) {
 1797                         /*
 1798                          * NFSv2-specific code for creating device nodes
 1799                          * and fifos.
 1800                          *
 1801                          * Handle SysV FIFO node special cases.  All other
 1802                          * devices require super user to access.
 1803                          */
 1804                         if (vap->va_type == VCHR && rdev == 0xffffffff)
 1805                                 vap->va_type = VFIFO;
 1806                         if (vap->va_type != VFIFO &&
 1807                             (error = suser_cred(cred, 0))) {
 1808                                 goto ereply;
 1809                         }
 1810                         vap->va_rdev = rdev;
 1811                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 1812                         if (error) {
 1813                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1814                                 goto ereply;
 1815                         }
 1816                         vput(nd.ni_vp);
 1817                         nd.ni_vp = NULL;
 1818 
 1819                         /*
 1820                          * release dvp prior to lookup
 1821                          */
 1822                         vput(nd.ni_dvp);
 1823                         nd.ni_dvp = NULL;
 1824                         /*
 1825                          * Setup for lookup.
 1826                          *
 1827                          * Even though LOCKPARENT was cleared, ni_dvp may
 1828                          * be garbage.
 1829                          */
 1830                         nd.ni_cnd.cn_nameiop = LOOKUP;
 1831                         nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
 1832                         nd.ni_cnd.cn_thread = td;
 1833                         nd.ni_cnd.cn_cred = cred;
 1834                         tvfslocked = VFS_LOCK_GIANT(nd.ni_startdir->v_mount);
 1835                         if (tvfslocked)
 1836                                 nd.ni_cnd.cn_flags |= GIANTHELD;
 1837                         error = lookup(&nd);
 1838                         nd.ni_dvp = NULL;
 1839                         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 1840                         nd.ni_cnd.cn_flags &= ~GIANTHELD;
 1841                         if (error)
 1842                                 goto ereply;
 1843 
 1844                         if (nd.ni_cnd.cn_flags & ISSYMLINK) {
 1845                                 error = EINVAL;
 1846                                 goto ereply;
 1847                         }
 1848                 } else {
 1849                         error = ENXIO;
 1850                 }
 1851         } else {
 1852                 if (vap->va_size != -1) {
 1853                         error = nfsrv_access(nd.ni_vp, VWRITE,
 1854                             cred, (nd.ni_cnd.cn_flags & RDONLY), td, 0);
 1855                         if (!error) {
 1856                                 tempsize = vap->va_size;
 1857                                 VATTR_NULL(vap);
 1858                                 vap->va_size = tempsize;
 1859                                 error = VOP_SETATTR(nd.ni_vp, vap, cred,
 1860                                          td);
 1861                         }
 1862                 }
 1863         }
 1864 
 1865         if (!error) {
 1866                 bzero((caddr_t)fhp, sizeof(nfh));
 1867                 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
 1868                 error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
 1869                 if (!error)
 1870                         error = VOP_GETATTR(nd.ni_vp, vap, cred, td);
 1871         }
 1872         if (v3) {
 1873                 if (exclusive_flag && !error &&
 1874                         bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
 1875                         error = EEXIST;
 1876                 if (dirp == nd.ni_dvp)
 1877                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 1878                 else {
 1879                         /* Drop the other locks to avoid deadlock. */
 1880                         if (nd.ni_dvp) {
 1881                                 if (nd.ni_dvp == nd.ni_vp)
 1882                                         vrele(nd.ni_dvp);
 1883                                 else
 1884                                         vput(nd.ni_dvp);
 1885                         }
 1886                         if (nd.ni_vp)
 1887                                 vput(nd.ni_vp);
 1888                         nd.ni_dvp = NULL;
 1889                         nd.ni_vp = NULL;
 1890 
 1891                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 1892                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 1893                         VOP_UNLOCK(dirp, 0, td);
 1894                 }
 1895         }
 1896 ereply:
 1897         nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
 1898         if (v3) {
 1899                 if (!error) {
 1900                         nfsm_srvpostop_fh(fhp);
 1901                         nfsm_srvpostop_attr(0, vap);
 1902                 }
 1903                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1904         } else if (!error) {
 1905                 /* v2 non-error case. */
 1906                 nfsm_srvfhtom(fhp, v3);
 1907                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 1908                 nfsm_srvfillattr(vap, fp);
 1909         }
 1910         error = 0;
 1911 
 1912 nfsmout:
 1913         if (nd.ni_dvp) {
 1914                 if (nd.ni_dvp == nd.ni_vp)
 1915                         vrele(nd.ni_dvp);
 1916                 else
 1917                         vput(nd.ni_dvp);
 1918         }
 1919         if (nd.ni_vp)
 1920                 vput(nd.ni_vp);
 1921         if (nd.ni_startdir) {
 1922                 vrele(nd.ni_startdir);
 1923                 nd.ni_startdir = NULL;
 1924         }
 1925         if (dirp)
 1926                 vrele(dirp);
 1927         NDFREE(&nd, NDF_ONLY_PNBUF);
 1928         vn_finished_write(mp);
 1929         VFS_UNLOCK_GIANT(vfslocked);
 1930         return (error);
 1931 }
 1932 
 1933 /*
 1934  * nfs v3 mknod service
 1935  */
 1936 int
 1937 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1938     struct thread *td, struct mbuf **mrq)
 1939 {
 1940         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1941         struct sockaddr *nam = nfsd->nd_nam;
 1942         caddr_t dpos = nfsd->nd_dpos;
 1943         struct ucred *cred = nfsd->nd_cr;
 1944         struct vattr va, dirfor, diraft;
 1945         struct vattr *vap = &va;
 1946         u_int32_t *tl;
 1947         struct nameidata nd;
 1948         caddr_t bpos;
 1949         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 1950         u_int32_t major, minor;
 1951         enum vtype vtyp;
 1952         struct mbuf *mb, *mreq;
 1953         struct vnode *vp, *dirp = NULL;
 1954         nfsfh_t nfh;
 1955         fhandle_t *fhp;
 1956         struct mount *mp = NULL;
 1957         int v3 = (nfsd->nd_flag & ND_NFSV3);
 1958         int tvfslocked;
 1959         int vfslocked;
 1960 
 1961         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1962         vfslocked = 0;
 1963         if (!v3)
 1964                 panic("nfsrv_mknod: v3 proc called on a v2 connection");
 1965         ndclear(&nd);
 1966 
 1967         fhp = &nfh.fh_generic;
 1968         nfsm_srvmtofh(fhp);
 1969         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 1970                 error = ESTALE;
 1971                 goto ereply;
 1972         }
 1973         vfslocked = VFS_LOCK_GIANT(mp);
 1974         (void) vn_start_write(NULL, &mp, V_WAIT);
 1975         vfs_rel(mp);            /* The write holds a ref. */
 1976         nfsm_srvnamesiz(len);
 1977 
 1978         nd.ni_cnd.cn_cred = cred;
 1979         nd.ni_cnd.cn_nameiop = CREATE;
 1980         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE;
 1981 
 1982         /*
 1983          * Handle nfs_namei() call.  If an error occurs, the nd structure
 1984          * is not valid.  However, nfsm_*() routines may still jump to
 1985          * nfsmout.
 1986          */
 1987 
 1988         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 1989                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 1990         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 1991         if (error) {
 1992                 nfsm_reply(NFSX_WCCDATA(1));
 1993                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1994                 error = 0;
 1995                 goto nfsmout;
 1996         }
 1997         tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
 1998         vtyp = nfsv3tov_type(*tl);
 1999         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
 2000                 error = NFSERR_BADTYPE;
 2001                 goto out;
 2002         }
 2003         VATTR_NULL(vap);
 2004         nfsm_srvsattr(vap);
 2005         if (vtyp == VCHR || vtyp == VBLK) {
 2006                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
 2007                 major = fxdr_unsigned(u_int32_t, *tl++);
 2008                 minor = fxdr_unsigned(u_int32_t, *tl);
 2009                 vap->va_rdev = makedev(major, minor);
 2010         }
 2011 
 2012         /*
 2013          * Iff doesn't exist, create it.
 2014          */
 2015         if (nd.ni_vp) {
 2016                 error = EEXIST;
 2017                 goto out;
 2018         }
 2019         vap->va_type = vtyp;
 2020         if (vap->va_mode == (mode_t)VNOVAL)
 2021                 vap->va_mode = 0;
 2022         if (vtyp == VSOCK) {
 2023                 vrele(nd.ni_startdir);
 2024                 nd.ni_startdir = NULL;
 2025                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 2026                 if (error)
 2027                         NDFREE(&nd, NDF_ONLY_PNBUF);
 2028         } else {
 2029                 if (vtyp != VFIFO && (error = suser_cred(cred, 0)))
 2030                         goto out;
 2031                 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 2032                 if (error) {
 2033                         NDFREE(&nd, NDF_ONLY_PNBUF);
 2034                         goto out;
 2035                 }
 2036                 vput(nd.ni_vp);
 2037                 nd.ni_vp = NULL;
 2038 
 2039                 /*
 2040                  * Release dvp prior to lookup
 2041                  */
 2042                 vput(nd.ni_dvp);
 2043                 nd.ni_dvp = NULL;
 2044 
 2045                 nd.ni_cnd.cn_nameiop = LOOKUP;
 2046                 nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
 2047                 nd.ni_cnd.cn_thread = td;
 2048                 nd.ni_cnd.cn_cred = td->td_ucred;
 2049                 tvfslocked = VFS_LOCK_GIANT(nd.ni_startdir->v_mount);
 2050                 if (tvfslocked)
 2051                         nd.ni_cnd.cn_flags |= GIANTHELD;
 2052                 error = lookup(&nd);
 2053                 nd.ni_dvp = NULL;
 2054                 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 2055                 nd.ni_cnd.cn_flags &= ~GIANTHELD;
 2056 
 2057                 if (error)
 2058                         goto out;
 2059                 if (nd.ni_cnd.cn_flags & ISSYMLINK)
 2060                         error = EINVAL;
 2061         }
 2062 
 2063         /*
 2064          * send response, cleanup, return.
 2065          */
 2066 out:
 2067         vp = nd.ni_vp;
 2068         if (!error) {
 2069                 bzero((caddr_t)fhp, sizeof(nfh));
 2070                 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 2071                 error = VOP_VPTOFH(vp, &fhp->fh_fid);
 2072                 if (!error)
 2073                         error = VOP_GETATTR(vp, vap, cred, td);
 2074         }
 2075         if (nd.ni_dvp) {
 2076                 if (nd.ni_dvp == nd.ni_vp)
 2077                         vrele(nd.ni_dvp);
 2078                 else
 2079                         vput(nd.ni_dvp);
 2080                 nd.ni_dvp = NULL;
 2081         }
 2082         if (vp) {
 2083                 vput(vp);
 2084                 vp = NULL;
 2085                 nd.ni_vp = NULL;
 2086         }
 2087         if (nd.ni_startdir) {
 2088                 vrele(nd.ni_startdir);
 2089                 nd.ni_startdir = NULL;
 2090         }
 2091         NDFREE(&nd, NDF_ONLY_PNBUF);
 2092         if (dirp) {
 2093                 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2094                 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2095                 VOP_UNLOCK(dirp, 0, td);
 2096         }
 2097 ereply:
 2098         nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
 2099         if (v3) {
 2100                 if (!error) {
 2101                         nfsm_srvpostop_fh(fhp);
 2102                         nfsm_srvpostop_attr(0, vap);
 2103                 }
 2104                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2105         }
 2106         vn_finished_write(mp);
 2107         VFS_UNLOCK_GIANT(vfslocked);
 2108         return (0);
 2109 nfsmout:
 2110         if (nd.ni_dvp) {
 2111                 if (nd.ni_dvp == nd.ni_vp)
 2112                         vrele(nd.ni_dvp);
 2113                 else
 2114                         vput(nd.ni_dvp);
 2115         }
 2116         if (nd.ni_vp)
 2117                 vput(nd.ni_vp);
 2118         if (dirp)
 2119                 vrele(dirp);
 2120         if (nd.ni_startdir)
 2121                 vrele(nd.ni_startdir);
 2122         NDFREE(&nd, NDF_ONLY_PNBUF);
 2123         vn_finished_write(mp);
 2124         VFS_UNLOCK_GIANT(vfslocked);
 2125         return (error);
 2126 }
 2127 
 2128 /*
 2129  * nfs remove service
 2130  */
 2131 int
 2132 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2133     struct thread *td, struct mbuf **mrq)
 2134 {
 2135         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2136         struct sockaddr *nam = nfsd->nd_nam;
 2137         caddr_t dpos = nfsd->nd_dpos;
 2138         struct ucred *cred = nfsd->nd_cr;
 2139         struct nameidata nd;
 2140         caddr_t bpos;
 2141         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 2142         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2143         struct mbuf *mb, *mreq;
 2144         struct vnode *dirp;
 2145         struct vattr dirfor, diraft;
 2146         nfsfh_t nfh;
 2147         fhandle_t *fhp;
 2148         struct mount *mp = NULL;
 2149         int vfslocked;
 2150 
 2151         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2152         ndclear(&nd);
 2153         vfslocked = 0;
 2154 
 2155         fhp = &nfh.fh_generic;
 2156         nfsm_srvmtofh(fhp);
 2157         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2158                 error = ESTALE;
 2159                 goto ereply;
 2160         }
 2161         vfslocked = VFS_LOCK_GIANT(mp);
 2162         (void) vn_start_write(NULL, &mp, V_WAIT);
 2163         vfs_rel(mp);            /* The write holds a ref. */
 2164         nfsm_srvnamesiz(len);
 2165 
 2166         nd.ni_cnd.cn_cred = cred;
 2167         nd.ni_cnd.cn_nameiop = DELETE;
 2168         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | MPSAFE;
 2169         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 2170                 &dirp, v3,  &dirfor, &dirfor_ret, td, FALSE);
 2171         vfslocked = NDHASGIANT(&nd);
 2172         if (dirp && !v3) {
 2173                 vrele(dirp);
 2174                 dirp = NULL;
 2175         }
 2176         if (error == 0) {
 2177                 if (nd.ni_vp->v_type == VDIR) {
 2178                         error = EPERM;          /* POSIX */
 2179                         goto out;
 2180                 }
 2181                 /*
 2182                  * The root of a mounted filesystem cannot be deleted.
 2183                  */
 2184                 if (nd.ni_vp->v_vflag & VV_ROOT) {
 2185                         error = EBUSY;
 2186                         goto out;
 2187                 }
 2188 out:
 2189                 if (!error) {
 2190                         error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 2191                         NDFREE(&nd, NDF_ONLY_PNBUF);
 2192                 }
 2193         }
 2194         if (dirp && v3) {
 2195                 if (dirp == nd.ni_dvp)
 2196                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2197                 else {
 2198                         /* Drop the other locks to avoid deadlock. */
 2199                         if (nd.ni_dvp) {
 2200                                 if (nd.ni_dvp == nd.ni_vp)
 2201                                         vrele(nd.ni_dvp);
 2202                                 else
 2203                                         vput(nd.ni_dvp);
 2204                         }
 2205                         if (nd.ni_vp)
 2206                                 vput(nd.ni_vp);
 2207                         nd.ni_dvp = NULL;
 2208                         nd.ni_vp = NULL;
 2209 
 2210                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2211                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2212                         VOP_UNLOCK(dirp, 0, td);
 2213                 }
 2214                 vrele(dirp);
 2215                 dirp = NULL;
 2216         }
 2217 ereply:
 2218         nfsm_reply(NFSX_WCCDATA(v3));
 2219         if (v3) {
 2220                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2221                 error = 0;
 2222         }
 2223 nfsmout:
 2224         NDFREE(&nd, NDF_ONLY_PNBUF);
 2225         if (nd.ni_dvp) {
 2226                 if (nd.ni_dvp == nd.ni_vp)
 2227                         vrele(nd.ni_dvp);
 2228                 else
 2229                         vput(nd.ni_dvp);
 2230         }
 2231         if (nd.ni_vp)
 2232                 vput(nd.ni_vp);
 2233         vn_finished_write(mp);
 2234         VFS_UNLOCK_GIANT(vfslocked);
 2235         return(error);
 2236 }
 2237 
 2238 /*
 2239  * nfs rename service
 2240  */
 2241 int
 2242 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2243     struct thread *td, struct mbuf **mrq)
 2244 {
 2245         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2246         struct sockaddr *nam = nfsd->nd_nam;
 2247         caddr_t dpos = nfsd->nd_dpos;
 2248         struct ucred *cred = nfsd->nd_cr;
 2249         caddr_t bpos;
 2250         int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
 2251         int tdirfor_ret = 1, tdiraft_ret = 1;
 2252         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2253         struct mbuf *mb, *mreq;
 2254         struct nameidata fromnd, tond;
 2255         struct vnode *fvp, *tvp, *tdvp, *fdirp = NULL;
 2256         struct vnode *tdirp = NULL;
 2257         struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
 2258         nfsfh_t fnfh, tnfh;
 2259         fhandle_t *ffhp, *tfhp;
 2260         uid_t saved_uid;
 2261         struct mount *mp = NULL;
 2262         int vfslocked;
 2263 
 2264         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2265         vfslocked = 0;
 2266 #ifndef nolint
 2267         fvp = NULL;
 2268 #endif
 2269         ffhp = &fnfh.fh_generic;
 2270         tfhp = &tnfh.fh_generic;
 2271 
 2272         /*
 2273          * Clear fields incase goto nfsmout occurs from macro.
 2274          */
 2275 
 2276         ndclear(&fromnd);
 2277         ndclear(&tond);
 2278 
 2279         nfsm_srvmtofh(ffhp);
 2280         if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL) {
 2281                 error = ESTALE;
 2282                 goto out1;
 2283         }
 2284         vfslocked = VFS_LOCK_GIANT(mp);
 2285         (void) vn_start_write(NULL, &mp, V_WAIT);
 2286         vfs_rel(mp);            /* The write holds a ref. */
 2287         nfsm_srvnamesiz(len);
 2288         /*
 2289          * Remember our original uid so that we can reset cr_uid before
 2290          * the second nfs_namei() call, in case it is remapped.
 2291          */
 2292         saved_uid = cred->cr_uid;
 2293         fromnd.ni_cnd.cn_cred = cred;
 2294         fromnd.ni_cnd.cn_nameiop = DELETE;
 2295         fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART | MPSAFE;
 2296         error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md,
 2297                 &dpos, &fdirp, v3, &fdirfor, &fdirfor_ret, td, FALSE);
 2298         vfslocked = nfsrv_lockedpair_nd(vfslocked, &fromnd);
 2299         if (fdirp && !v3) {
 2300                 vrele(fdirp);
 2301                 fdirp = NULL;
 2302         }
 2303         if (error) {
 2304                 nfsm_reply(2 * NFSX_WCCDATA(v3));
 2305                 if (v3) {
 2306                         nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
 2307                         nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
 2308                 }
 2309                 error = 0;
 2310                 goto nfsmout;
 2311         }
 2312         fvp = fromnd.ni_vp;
 2313         nfsm_srvmtofh(tfhp);
 2314         nfsm_srvnamesiz(len2);
 2315         cred->cr_uid = saved_uid;
 2316         tond.ni_cnd.cn_cred = cred;
 2317         tond.ni_cnd.cn_nameiop = RENAME;
 2318         tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | MPSAFE;
 2319         error = nfs_namei(&tond, tfhp, len2, slp, nam, &md,
 2320                 &dpos, &tdirp, v3, &tdirfor, &tdirfor_ret, td, FALSE);
 2321         vfslocked = nfsrv_lockedpair_nd(vfslocked, &tond);
 2322         if (tdirp && !v3) {
 2323                 vrele(tdirp);
 2324                 tdirp = NULL;
 2325         }
 2326         if (error)
 2327                 goto out1;
 2328 
 2329         tdvp = tond.ni_dvp;
 2330         tvp = tond.ni_vp;
 2331         if (tvp != NULL) {
 2332                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 2333                         if (v3)
 2334                                 error = EEXIST;
 2335                         else
 2336                                 error = EISDIR;
 2337                         goto out;
 2338                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 2339                         if (v3)
 2340                                 error = EEXIST;
 2341                         else
 2342                                 error = ENOTDIR;
 2343                         goto out;
 2344                 }
 2345                 if (tvp->v_type == VDIR && tvp->v_mountedhere) {
 2346                         if (v3)
 2347                                 error = EXDEV;
 2348                         else
 2349                                 error = ENOTEMPTY;
 2350                         goto out;
 2351                 }
 2352         }
 2353         if (fvp->v_type == VDIR && fvp->v_mountedhere) {
 2354                 if (v3)
 2355                         error = EXDEV;
 2356                 else
 2357                         error = ENOTEMPTY;
 2358                 goto out;
 2359         }
 2360         if (fvp->v_mount != tdvp->v_mount) {
 2361                 if (v3)
 2362                         error = EXDEV;
 2363                 else
 2364                         error = ENOTEMPTY;
 2365                 goto out;
 2366         }
 2367         if (fvp == tdvp) {
 2368                 if (v3)
 2369                         error = EINVAL;
 2370                 else
 2371                         error = ENOTEMPTY;
 2372         }
 2373         /*
 2374          * If source is the same as the destination (that is the
 2375          * same vnode with the same name in the same directory),
 2376          * then there is nothing to do.
 2377          */
 2378         if (fvp == tvp && fromnd.ni_dvp == tdvp &&
 2379             fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
 2380             !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
 2381               fromnd.ni_cnd.cn_namelen))
 2382                 error = -1;
 2383 out:
 2384         if (!error) {
 2385                 /*
 2386                  * The VOP_RENAME function releases all vnode references &
 2387                  * locks prior to returning so we need to clear the pointers
 2388                  * to bypass cleanup code later on.
 2389                  */
 2390                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 2391                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 2392                 fromnd.ni_dvp = NULL;
 2393                 fromnd.ni_vp = NULL;
 2394                 tond.ni_dvp = NULL;
 2395                 tond.ni_vp = NULL;
 2396                 if (error) {
 2397                         NDFREE(&fromnd, NDF_ONLY_PNBUF);
 2398                         NDFREE(&tond, NDF_ONLY_PNBUF);
 2399                 }
 2400         } else {
 2401                 if (error == -1)
 2402                         error = 0;
 2403         }
 2404         /* fall through */
 2405 out1:
 2406         nfsm_reply(2 * NFSX_WCCDATA(v3));
 2407         if (v3) {
 2408                 /* Release existing locks to prevent deadlock. */
 2409                 if (tond.ni_dvp) {
 2410                         if (tond.ni_dvp == tond.ni_vp)
 2411                                 vrele(tond.ni_dvp);
 2412                         else
 2413                                 vput(tond.ni_dvp);
 2414                 }
 2415                 if (tond.ni_vp)
 2416                         vput(tond.ni_vp);
 2417                 tond.ni_dvp = NULL;
 2418                 tond.ni_vp = NULL;
 2419 
 2420                 if (fdirp) {
 2421                         vn_lock(fdirp, LK_EXCLUSIVE | LK_RETRY, td);
 2422                         fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, td);
 2423                         VOP_UNLOCK(fdirp, 0, td);
 2424                 }
 2425                 if (tdirp) {
 2426                         vn_lock(tdirp, LK_EXCLUSIVE | LK_RETRY, td);
 2427                         tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, td);
 2428                         VOP_UNLOCK(tdirp, 0, td);
 2429                 }
 2430                 nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
 2431                 nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
 2432         }
 2433         error = 0;
 2434         /* fall through */
 2435 
 2436 nfsmout:
 2437         /*
 2438          * Clear out tond related fields
 2439          */
 2440         if (tond.ni_dvp) {
 2441                 if (tond.ni_dvp == tond.ni_vp)
 2442                         vrele(tond.ni_dvp);
 2443                 else
 2444                         vput(tond.ni_dvp);
 2445         }
 2446         if (tond.ni_vp)
 2447                 vput(tond.ni_vp);
 2448         if (tdirp)
 2449                 vrele(tdirp);
 2450         if (tond.ni_startdir)
 2451                 vrele(tond.ni_startdir);
 2452         NDFREE(&tond, NDF_ONLY_PNBUF);
 2453         /*
 2454          * Clear out fromnd related fields
 2455          */
 2456         if (fdirp)
 2457                 vrele(fdirp);
 2458         if (fromnd.ni_startdir)
 2459                 vrele(fromnd.ni_startdir);
 2460         NDFREE(&fromnd, NDF_ONLY_PNBUF);
 2461         if (fromnd.ni_dvp)
 2462                 vrele(fromnd.ni_dvp);
 2463         if (fromnd.ni_vp)
 2464                 vrele(fromnd.ni_vp);
 2465 
 2466         vn_finished_write(mp);
 2467         VFS_UNLOCK_GIANT(vfslocked);
 2468         return (error);
 2469 }
 2470 
 2471 /*
 2472  * nfs link service
 2473  */
 2474 int
 2475 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2476     struct thread *td, struct mbuf **mrq)
 2477 {
 2478         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2479         struct sockaddr *nam = nfsd->nd_nam;
 2480         caddr_t dpos = nfsd->nd_dpos;
 2481         struct ucred *cred = nfsd->nd_cr;
 2482         struct nameidata nd;
 2483         caddr_t bpos;
 2484         int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
 2485         int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3);
 2486         struct mbuf *mb, *mreq;
 2487         struct vnode *vp = NULL, *xp, *dirp = NULL;
 2488         struct vattr dirfor, diraft, at;
 2489         nfsfh_t nfh, dnfh;
 2490         fhandle_t *fhp, *dfhp;
 2491         struct mount *mp = NULL;
 2492         int tvfslocked;
 2493         int vfslocked;
 2494 
 2495         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2496         ndclear(&nd);
 2497         vfslocked = 0;
 2498 
 2499         fhp = &nfh.fh_generic;
 2500         dfhp = &dnfh.fh_generic;
 2501         nfsm_srvmtofh(fhp);
 2502         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2503                 error = ESTALE;
 2504                 goto ereply;
 2505         }
 2506         vfslocked = VFS_LOCK_GIANT(mp);
 2507         (void) vn_start_write(NULL, &mp, V_WAIT);
 2508         vfs_rel(mp);            /* The write holds a ref. */
 2509         nfsm_srvmtofh(dfhp);
 2510         nfsm_srvnamesiz(len);
 2511 
 2512         error = nfsrv_fhtovp(fhp, TRUE, &vp, &tvfslocked, cred, slp,
 2513             nam, &rdonly, TRUE);
 2514         vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
 2515         if (error) {
 2516                 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2517                 if (v3) {
 2518                         nfsm_srvpostop_attr(getret, &at);
 2519                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2520                 }
 2521                 vp = NULL;
 2522                 error = 0;
 2523                 goto nfsmout;
 2524         }
 2525         if (v3)
 2526                 getret = VOP_GETATTR(vp, &at, cred, td);
 2527         if (vp->v_type == VDIR) {
 2528                 error = EPERM;          /* POSIX */
 2529                 goto out1;
 2530         }
 2531         VOP_UNLOCK(vp, 0, td);
 2532         nd.ni_cnd.cn_cred = cred;
 2533         nd.ni_cnd.cn_nameiop = CREATE;
 2534         nd.ni_cnd.cn_flags = LOCKPARENT | MPSAFE | MPSAFE;
 2535         error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos,
 2536                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 2537         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 2538         if (dirp && !v3) {
 2539                 vrele(dirp);
 2540                 dirp = NULL;
 2541         }
 2542         if (error) {
 2543                 vrele(vp);
 2544                 vp = NULL;
 2545                 goto out2;
 2546         }
 2547         xp = nd.ni_vp;
 2548         if (xp != NULL) {
 2549                 error = EEXIST;
 2550                 vrele(vp);
 2551                 vp = NULL;
 2552                 goto out2;
 2553         }
 2554         xp = nd.ni_dvp;
 2555         if (vp->v_mount != xp->v_mount) {
 2556                 error = EXDEV;
 2557                 vrele(vp);
 2558                 vp = NULL;
 2559                 goto out2;
 2560         }
 2561         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 2562         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 2563         NDFREE(&nd, NDF_ONLY_PNBUF);
 2564         /* fall through */
 2565 
 2566 out1:
 2567         if (v3)
 2568                 getret = VOP_GETATTR(vp, &at, cred, td);
 2569 out2:
 2570         if (dirp) {
 2571                 if (dirp == nd.ni_dvp)
 2572                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2573                 else {
 2574                         /* Release existing locks to prevent deadlock. */
 2575                         if (nd.ni_dvp) {
 2576                                 if (nd.ni_dvp == nd.ni_vp)
 2577                                         vrele(nd.ni_dvp);
 2578                                 else
 2579                                         vput(nd.ni_dvp);
 2580                         }
 2581                         if (nd.ni_vp)
 2582                                 vrele(nd.ni_vp);
 2583                         nd.ni_dvp = NULL;
 2584                         nd.ni_vp = NULL;
 2585 
 2586                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2587                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2588                         VOP_UNLOCK(dirp, 0, td);
 2589                 }
 2590         }
 2591 ereply:
 2592         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2593         if (v3) {
 2594                 nfsm_srvpostop_attr(getret, &at);
 2595                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2596                 error = 0;
 2597         }
 2598         /* fall through */
 2599 
 2600 nfsmout:
 2601         NDFREE(&nd, NDF_ONLY_PNBUF);
 2602         if (vp)
 2603                 vput(vp);
 2604         if (nd.ni_dvp) {
 2605                 if (nd.ni_dvp == nd.ni_vp)
 2606                         vrele(nd.ni_dvp);
 2607                 else
 2608                         vput(nd.ni_dvp);
 2609         }
 2610         if (dirp)
 2611                 vrele(dirp);
 2612         if (nd.ni_vp)
 2613                 vrele(nd.ni_vp);
 2614         vn_finished_write(mp);
 2615         VFS_UNLOCK_GIANT(vfslocked);
 2616         return(error);
 2617 }
 2618 
 2619 /*
 2620  * nfs symbolic link service
 2621  */
 2622 int
 2623 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2624     struct thread *td, struct mbuf **mrq)
 2625 {
 2626         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2627         struct sockaddr *nam = nfsd->nd_nam;
 2628         caddr_t dpos = nfsd->nd_dpos;
 2629         struct ucred *cred = nfsd->nd_cr;
 2630         struct vattr va, dirfor, diraft;
 2631         struct nameidata nd;
 2632         struct vattr *vap = &va;
 2633         struct nfsv2_sattr *sp;
 2634         char *bpos, *pathcp = NULL;
 2635         struct uio io;
 2636         struct iovec iv;
 2637         int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
 2638         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2639         struct mbuf *mb, *mreq;
 2640         struct vnode *dirp = NULL;
 2641         nfsfh_t nfh;
 2642         fhandle_t *fhp;
 2643         struct mount *mp = NULL;
 2644         int tvfslocked;
 2645         int vfslocked;
 2646 
 2647         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2648         ndclear(&nd);
 2649         vfslocked = 0;
 2650 
 2651         fhp = &nfh.fh_generic;
 2652         nfsm_srvmtofh(fhp);
 2653         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2654                 error = ESTALE;
 2655                 goto out;
 2656         }
 2657         vfslocked = VFS_LOCK_GIANT(mp);
 2658         (void) vn_start_write(NULL, &mp, V_WAIT);
 2659         vfs_rel(mp);            /* The write holds a ref. */
 2660         nfsm_srvnamesiz(len);
 2661         nd.ni_cnd.cn_cred = cred;
 2662         nd.ni_cnd.cn_nameiop = CREATE;
 2663         nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART | MPSAFE;
 2664         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 2665                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 2666         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 2667         if (error == 0) {
 2668                 VATTR_NULL(vap);
 2669                 if (v3)
 2670                         nfsm_srvsattr(vap);
 2671                 nfsm_srvpathsiz(len2);
 2672         }
 2673         if (dirp && !v3) {
 2674                 vrele(dirp);
 2675                 dirp = NULL;
 2676         }
 2677         if (error)
 2678                 goto out;
 2679         MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
 2680         iv.iov_base = pathcp;
 2681         iv.iov_len = len2;
 2682         io.uio_resid = len2;
 2683         io.uio_offset = 0;
 2684         io.uio_iov = &iv;
 2685         io.uio_iovcnt = 1;
 2686         io.uio_segflg = UIO_SYSSPACE;
 2687         io.uio_rw = UIO_READ;
 2688         io.uio_td = NULL;
 2689         nfsm_mtouio(&io, len2);
 2690         if (!v3) {
 2691                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
 2692                 vap->va_mode = nfstov_mode(sp->sa_mode);
 2693         }
 2694         *(pathcp + len2) = '\0';
 2695         if (nd.ni_vp) {
 2696                 error = EEXIST;
 2697                 goto out;
 2698         }
 2699 
 2700         /*
 2701          * issue symlink op.  SAVESTART is set so the underlying path component
 2702          * is only freed by the VOP if an error occurs.
 2703          */
 2704         if (vap->va_mode == (mode_t)VNOVAL)
 2705                 vap->va_mode = 0;
 2706         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
 2707         if (error)
 2708                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2709         else
 2710                 vput(nd.ni_vp);
 2711         nd.ni_vp = NULL;
 2712         /*
 2713          * releases directory prior to potential lookup op.
 2714          */
 2715         vput(nd.ni_dvp);
 2716         nd.ni_dvp = NULL;
 2717 
 2718         if (error == 0) {
 2719             if (v3) {
 2720                 /*
 2721                  * Issue lookup.  Leave SAVESTART set so we can easily free
 2722                  * the name buffer later on.
 2723                  *
 2724                  * since LOCKPARENT is not set, ni_dvp will be garbage on
 2725                  * return whether an error occurs or not.
 2726                  */
 2727                 nd.ni_cnd.cn_nameiop = LOOKUP;
 2728                 nd.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW);
 2729                 nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF);
 2730                 nd.ni_cnd.cn_thread = td;
 2731                 nd.ni_cnd.cn_cred = cred;
 2732                 tvfslocked = VFS_LOCK_GIANT(nd.ni_startdir->v_mount);
 2733                 if (tvfslocked)
 2734                         nd.ni_cnd.cn_flags |= GIANTHELD;
 2735                 error = lookup(&nd);
 2736                 nd.ni_dvp = NULL;
 2737                 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 2738                 nd.ni_cnd.cn_flags &= ~GIANTHELD;
 2739 
 2740                 if (error == 0) {
 2741                         bzero((caddr_t)fhp, sizeof(nfh));
 2742                         fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
 2743                         error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
 2744                         if (!error)
 2745                                 error = VOP_GETATTR(nd.ni_vp, vap, cred,
 2746                                         td);
 2747                         vput(nd.ni_vp);
 2748                         nd.ni_vp = NULL;
 2749                 }
 2750             }
 2751         }
 2752 out:
 2753         /*
 2754          * These releases aren't strictly required, does even doing them
 2755          * make any sense? XXX can nfsm_reply() block?
 2756          */
 2757         if (pathcp) {
 2758                 FREE(pathcp, M_TEMP);
 2759                 pathcp = NULL;
 2760         }
 2761         if (dirp) {
 2762                 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2763                 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2764                 VOP_UNLOCK(dirp, 0, td);
 2765         }
 2766         if (nd.ni_startdir) {
 2767                 vrele(nd.ni_startdir);
 2768                 nd.ni_startdir = NULL;
 2769         }
 2770         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2771         if (v3) {
 2772                 if (!error) {
 2773                         nfsm_srvpostop_fh(fhp);
 2774                         nfsm_srvpostop_attr(0, vap);
 2775                 }
 2776                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2777         }
 2778         error = 0;
 2779         /* fall through */
 2780 
 2781 nfsmout:
 2782         NDFREE(&nd, NDF_ONLY_PNBUF);
 2783         if (nd.ni_dvp) {
 2784                 if (nd.ni_dvp == nd.ni_vp)
 2785                         vrele(nd.ni_dvp);
 2786                 else
 2787                         vput(nd.ni_dvp);
 2788         }
 2789         if (nd.ni_vp)
 2790                 vrele(nd.ni_vp);
 2791         if (nd.ni_startdir)
 2792                 vrele(nd.ni_startdir);
 2793         if (dirp)
 2794                 vrele(dirp);
 2795         if (pathcp)
 2796                 FREE(pathcp, M_TEMP);
 2797 
 2798         vn_finished_write(mp);
 2799         VFS_UNLOCK_GIANT(vfslocked);
 2800         return (error);
 2801 }
 2802 
 2803 /*
 2804  * nfs mkdir service
 2805  */
 2806 int
 2807 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2808     struct thread *td, struct mbuf **mrq)
 2809 {
 2810         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2811         struct sockaddr *nam = nfsd->nd_nam;
 2812         caddr_t dpos = nfsd->nd_dpos;
 2813         struct ucred *cred = nfsd->nd_cr;
 2814         struct vattr va, dirfor, diraft;
 2815         struct vattr *vap = &va;
 2816         struct nfs_fattr *fp;
 2817         struct nameidata nd;
 2818         u_int32_t *tl;
 2819         caddr_t bpos;
 2820         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 2821         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2822         struct mbuf *mb, *mreq;
 2823         struct vnode *dirp = NULL;
 2824         int vpexcl = 0;
 2825         nfsfh_t nfh;
 2826         fhandle_t *fhp;
 2827         struct mount *mp = NULL;
 2828         int vfslocked;
 2829 
 2830         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2831         ndclear(&nd);
 2832         vfslocked = 0;
 2833 
 2834         fhp = &nfh.fh_generic;
 2835         nfsm_srvmtofh(fhp);
 2836         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2837                 error = ESTALE;
 2838                 goto out;
 2839         }
 2840         vfslocked = VFS_LOCK_GIANT(mp);
 2841         (void) vn_start_write(NULL, &mp, V_WAIT);
 2842         vfs_rel(mp);            /* The write holds a ref. */
 2843         nfsm_srvnamesiz(len);
 2844         nd.ni_cnd.cn_cred = cred;
 2845         nd.ni_cnd.cn_nameiop = CREATE;
 2846         nd.ni_cnd.cn_flags = LOCKPARENT | MPSAFE;
 2847 
 2848         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 2849                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 2850         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 2851         if (dirp && !v3) {
 2852                 vrele(dirp);
 2853                 dirp = NULL;
 2854         }
 2855         if (error) {
 2856                 nfsm_reply(NFSX_WCCDATA(v3));
 2857                 if (v3)
 2858                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2859                 error = 0;
 2860                 goto nfsmout;
 2861         }
 2862         VATTR_NULL(vap);
 2863         if (v3) {
 2864                 nfsm_srvsattr(vap);
 2865         } else {
 2866                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
 2867                 vap->va_mode = nfstov_mode(*tl++);
 2868         }
 2869 
 2870         /*
 2871          * At this point nd.ni_dvp is referenced and exclusively locked and
 2872          * nd.ni_vp, if it exists, is referenced but not locked.
 2873          */
 2874 
 2875         vap->va_type = VDIR;
 2876         if (nd.ni_vp != NULL) {
 2877                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2878                 error = EEXIST;
 2879                 goto out;
 2880         }
 2881 
 2882         /*
 2883          * Issue mkdir op.  Since SAVESTART is not set, the pathname
 2884          * component is freed by the VOP call.  This will fill-in
 2885          * nd.ni_vp, reference, and exclusively lock it.
 2886          */
 2887         if (vap->va_mode == (mode_t)VNOVAL)
 2888                 vap->va_mode = 0;
 2889         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 2890         NDFREE(&nd, NDF_ONLY_PNBUF);
 2891         vpexcl = 1;
 2892 
 2893         vput(nd.ni_dvp);
 2894         nd.ni_dvp = NULL;
 2895 
 2896         if (!error) {
 2897                 bzero((caddr_t)fhp, sizeof(nfh));
 2898                 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
 2899                 error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
 2900                 if (!error)
 2901                         error = VOP_GETATTR(nd.ni_vp, vap, cred, td);
 2902         }
 2903 out:
 2904         if (dirp) {
 2905                 if (dirp == nd.ni_dvp) {
 2906                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2907                 } else {
 2908                         /* Release existing locks to prevent deadlock. */
 2909                         if (nd.ni_dvp) {
 2910                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2911                                 if (nd.ni_dvp == nd.ni_vp && vpexcl)
 2912                                         vrele(nd.ni_dvp);
 2913                                 else
 2914                                         vput(nd.ni_dvp);
 2915                         }
 2916                         if (nd.ni_vp) {
 2917                                 if (vpexcl)
 2918                                         vput(nd.ni_vp);
 2919                                 else
 2920                                         vrele(nd.ni_vp);
 2921                         }
 2922                         nd.ni_dvp = NULL;
 2923                         nd.ni_vp = NULL;
 2924                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2925                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2926                         VOP_UNLOCK(dirp, 0, td);
 2927                 }
 2928         }
 2929         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2930         if (v3) {
 2931                 if (!error) {
 2932                         nfsm_srvpostop_fh(fhp);
 2933                         nfsm_srvpostop_attr(0, vap);
 2934                 }
 2935                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2936         } else if (!error) {
 2937                 /* v2 non-error case. */
 2938                 nfsm_srvfhtom(fhp, v3);
 2939                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 2940                 nfsm_srvfillattr(vap, fp);
 2941         }
 2942         error = 0;
 2943         /* fall through */
 2944 
 2945 nfsmout:
 2946         if (nd.ni_dvp) {
 2947                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2948                 if (nd.ni_dvp == nd.ni_vp && vpexcl)
 2949                         vrele(nd.ni_dvp);
 2950                 else
 2951                         vput(nd.ni_dvp);
 2952         }
 2953         if (nd.ni_vp) {
 2954                 if (vpexcl)
 2955                         vput(nd.ni_vp);
 2956                 else
 2957                         vrele(nd.ni_vp);
 2958         }
 2959         if (dirp)
 2960                 vrele(dirp);
 2961         vn_finished_write(mp);
 2962         VFS_UNLOCK_GIANT(vfslocked);
 2963         return (error);
 2964 }
 2965 
 2966 /*
 2967  * nfs rmdir service
 2968  */
 2969 int
 2970 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2971     struct thread *td, struct mbuf **mrq)
 2972 {
 2973         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2974         struct sockaddr *nam = nfsd->nd_nam;
 2975         caddr_t dpos = nfsd->nd_dpos;
 2976         struct ucred *cred = nfsd->nd_cr;
 2977         caddr_t bpos;
 2978         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 2979         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2980         struct mbuf *mb, *mreq;
 2981         struct vnode *vp, *dirp = NULL;
 2982         struct vattr dirfor, diraft;
 2983         nfsfh_t nfh;
 2984         fhandle_t *fhp;
 2985         struct nameidata nd;
 2986         struct mount *mp = NULL;
 2987         int vfslocked;
 2988 
 2989         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2990         ndclear(&nd);
 2991         vfslocked = 0;
 2992 
 2993         fhp = &nfh.fh_generic;
 2994         nfsm_srvmtofh(fhp);
 2995         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2996                 error = ESTALE;
 2997                 goto out;
 2998         }
 2999         vfslocked = VFS_LOCK_GIANT(mp);
 3000         (void) vn_start_write(NULL, &mp, V_WAIT);
 3001         vfs_rel(mp);            /* The write holds a ref. */
 3002         nfsm_srvnamesiz(len);
 3003         nd.ni_cnd.cn_cred = cred;
 3004         nd.ni_cnd.cn_nameiop = DELETE;
 3005         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | MPSAFE;
 3006         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 3007                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 3008         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 3009         if (dirp && !v3) {
 3010                 vrele(dirp);
 3011                 dirp = NULL;
 3012         }
 3013         if (error) {
 3014                 nfsm_reply(NFSX_WCCDATA(v3));
 3015                 if (v3)
 3016                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 3017                 error = 0;
 3018                 goto nfsmout;
 3019         }
 3020         vp = nd.ni_vp;
 3021         if (vp->v_type != VDIR) {
 3022                 error = ENOTDIR;
 3023                 goto out;
 3024         }
 3025         /*
 3026          * No rmdir "." please.
 3027          */
 3028         if (nd.ni_dvp == vp) {
 3029                 error = EINVAL;
 3030                 goto out;
 3031         }
 3032         /*
 3033          * The root of a mounted filesystem cannot be deleted.
 3034          */
 3035         if (vp->v_vflag & VV_ROOT)
 3036                 error = EBUSY;
 3037 out:
 3038         /*
 3039          * Issue or abort op.  Since SAVESTART is not set, path name
 3040          * component is freed by the VOP after either.
 3041          */
 3042         if (!error)
 3043                 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3044         NDFREE(&nd, NDF_ONLY_PNBUF);
 3045 
 3046         if (dirp) {
 3047                 if (dirp == nd.ni_dvp)
 3048                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 3049                 else {
 3050                         /* Release existing locks to prevent deadlock. */
 3051                         if (nd.ni_dvp) {
 3052                                 if (nd.ni_dvp == nd.ni_vp)
 3053                                         vrele(nd.ni_dvp);
 3054                                 else
 3055                                         vput(nd.ni_dvp);
 3056                         }
 3057                         if (nd.ni_vp)
 3058                                 vput(nd.ni_vp);
 3059                         nd.ni_dvp = NULL;
 3060                         nd.ni_vp = NULL;
 3061                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 3062                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 3063                         VOP_UNLOCK(dirp, 0, td);
 3064                 }
 3065         }
 3066         nfsm_reply(NFSX_WCCDATA(v3));
 3067         error = 0;
 3068         if (v3)
 3069                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 3070         /* fall through */
 3071 
 3072 nfsmout:
 3073         NDFREE(&nd, NDF_ONLY_PNBUF);
 3074         if (nd.ni_dvp) {
 3075                 if (nd.ni_dvp == nd.ni_vp)
 3076                         vrele(nd.ni_dvp);
 3077                 else
 3078                         vput(nd.ni_dvp);
 3079         }
 3080         if (nd.ni_vp)
 3081                 vput(nd.ni_vp);
 3082         if (dirp)
 3083                 vrele(dirp);
 3084 
 3085         vn_finished_write(mp);
 3086         VFS_UNLOCK_GIANT(vfslocked);
 3087         return(error);
 3088 }
 3089 
 3090 /*
 3091  * nfs readdir service
 3092  * - mallocs what it thinks is enough to read
 3093  *      count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
 3094  * - calls VOP_READDIR()
 3095  * - loops around building the reply
 3096  *      if the output generated exceeds count break out of loop
 3097  *      The nfsm_clget macro is used here so that the reply will be packed
 3098  *      tightly in mbuf clusters.
 3099  * - it only knows that it has encountered eof when the VOP_READDIR()
 3100  *      reads nothing
 3101  * - as such one readdir rpc will return eof false although you are there
 3102  *      and then the next will return eof
 3103  * - it trims out records with d_fileno == 0
 3104  *      this doesn't matter for Unix clients, but they might confuse clients
 3105  *      for other os'.
 3106  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
 3107  *      than requested, but this may not apply to all filesystems. For
 3108  *      example, client NFS does not { although it is never remote mounted
 3109  *      anyhow }
 3110  *     The alternate call nfsrv_readdirplus() does lookups as well.
 3111  * PS: The NFS protocol spec. does not clarify what the "count" byte
 3112  *      argument is a count of.. just name strings and file id's or the
 3113  *      entire reply rpc or ...
 3114  *      I tried just file name and id sizes and it confused the Sun client,
 3115  *      so I am using the full rpc size now. The "paranoia.." comment refers
 3116  *      to including the status longwords that are not a part of the dir.
 3117  *      "entry" structures, but are in the rpc.
 3118  */
 3119 struct flrep {
 3120         nfsuint64       fl_off;
 3121         u_int32_t       fl_postopok;
 3122         u_int32_t       fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
 3123         u_int32_t       fl_fhok;
 3124         u_int32_t       fl_fhsize;
 3125         u_int32_t       fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
 3126 };
 3127 
 3128 int
 3129 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3130     struct thread *td, struct mbuf **mrq)
 3131 {
 3132         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3133         struct sockaddr *nam = nfsd->nd_nam;
 3134         caddr_t dpos = nfsd->nd_dpos;
 3135         struct ucred *cred = nfsd->nd_cr;
 3136         char *bp, *be;
 3137         struct mbuf *mp;
 3138         struct dirent *dp;
 3139         caddr_t cp;
 3140         u_int32_t *tl;
 3141         caddr_t bpos;
 3142         struct mbuf *mb, *mreq;
 3143         char *cpos, *cend, *rbuf;
 3144         struct vnode *vp = NULL;
 3145         struct vattr at;
 3146         nfsfh_t nfh;
 3147         fhandle_t *fhp;
 3148         struct uio io;
 3149         struct iovec iv;
 3150         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
 3151         int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
 3152         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3153         u_quad_t off, toff, verf;
 3154         u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
 3155         int vfslocked;
 3156 
 3157         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3158         vfslocked = 0;
 3159         fhp = &nfh.fh_generic;
 3160         nfsm_srvmtofh(fhp);
 3161         if (v3) {
 3162                 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
 3163                 toff = fxdr_hyper(tl);
 3164                 tl += 2;
 3165                 verf = fxdr_hyper(tl);
 3166                 tl += 2;
 3167         } else {
 3168                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
 3169                 toff = fxdr_unsigned(u_quad_t, *tl++);
 3170                 verf = 0;       /* shut up gcc */
 3171         }
 3172         off = toff;
 3173         cnt = fxdr_unsigned(int, *tl);
 3174         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 3175         xfer = NFS_SRVMAXDATA(nfsd);
 3176         if (cnt > xfer)
 3177                 cnt = xfer;
 3178         if (siz > xfer)
 3179                 siz = xfer;
 3180         fullsiz = siz;
 3181         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
 3182             nam, &rdonly, TRUE);
 3183         if (!error && vp->v_type != VDIR) {
 3184                 error = ENOTDIR;
 3185                 vput(vp);
 3186                 vp = NULL;
 3187         }
 3188         if (error) {
 3189                 nfsm_reply(NFSX_UNSIGNED);
 3190                 if (v3)
 3191                         nfsm_srvpostop_attr(getret, &at);
 3192                 error = 0;
 3193                 goto nfsmout;
 3194         }
 3195 
 3196         /*
 3197          * Obtain lock on vnode for this section of the code
 3198          */
 3199         if (v3) {
 3200                 error = getret = VOP_GETATTR(vp, &at, cred, td);
 3201 #if 0
 3202                 /*
 3203                  * XXX This check may be too strict for Solaris 2.5 clients.
 3204                  */
 3205                 if (!error && toff && verf && verf != at.va_filerev)
 3206                         error = NFSERR_BAD_COOKIE;
 3207 #endif
 3208         }
 3209         if (!error)
 3210                 error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
 3211         if (error) {
 3212                 vput(vp);
 3213                 vp = NULL;
 3214                 nfsm_reply(NFSX_POSTOPATTR(v3));
 3215                 if (v3)
 3216                         nfsm_srvpostop_attr(getret, &at);
 3217                 error = 0;
 3218                 goto nfsmout;
 3219         }
 3220         VOP_UNLOCK(vp, 0, td);
 3221 
 3222         /*
 3223          * end section.  Allocate rbuf and continue
 3224          */
 3225         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 3226 again:
 3227         iv.iov_base = rbuf;
 3228         iv.iov_len = fullsiz;
 3229         io.uio_iov = &iv;
 3230         io.uio_iovcnt = 1;
 3231         io.uio_offset = (off_t)off;
 3232         io.uio_resid = fullsiz;
 3233         io.uio_segflg = UIO_SYSSPACE;
 3234         io.uio_rw = UIO_READ;
 3235         io.uio_td = NULL;
 3236         eofflag = 0;
 3237         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 3238         if (cookies) {
 3239                 free((caddr_t)cookies, M_TEMP);
 3240                 cookies = NULL;
 3241         }
 3242         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
 3243         off = (off_t)io.uio_offset;
 3244         if (!cookies && !error)
 3245                 error = NFSERR_PERM;
 3246         if (v3) {
 3247                 getret = VOP_GETATTR(vp, &at, cred, td);
 3248                 if (!error)
 3249                         error = getret;
 3250         }
 3251         VOP_UNLOCK(vp, 0, td);
 3252         if (error) {
 3253                 vrele(vp);
 3254                 vp = NULL;
 3255                 free((caddr_t)rbuf, M_TEMP);
 3256                 if (cookies)
 3257                         free((caddr_t)cookies, M_TEMP);
 3258                 nfsm_reply(NFSX_POSTOPATTR(v3));
 3259                 if (v3)
 3260                         nfsm_srvpostop_attr(getret, &at);
 3261                 error = 0;
 3262                 goto nfsmout;
 3263         }
 3264         if (io.uio_resid) {
 3265                 siz -= io.uio_resid;
 3266 
 3267                 /*
 3268                  * If nothing read, return eof
 3269                  * rpc reply
 3270                  */
 3271                 if (siz == 0) {
 3272                         vrele(vp);
 3273                         vp = NULL;
 3274                         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) +
 3275                                 2 * NFSX_UNSIGNED);
 3276                         if (v3) {
 3277                                 nfsm_srvpostop_attr(getret, &at);
 3278                                 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 3279                                 txdr_hyper(at.va_filerev, tl);
 3280                                 tl += 2;
 3281                         } else
 3282                                 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 3283                         *tl++ = nfsrv_nfs_false;
 3284                         *tl = nfsrv_nfs_true;
 3285                         FREE((caddr_t)rbuf, M_TEMP);
 3286                         FREE((caddr_t)cookies, M_TEMP);
 3287                         error = 0;
 3288                         goto nfsmout;
 3289                 }
 3290         }
 3291 
 3292         /*
 3293          * Check for degenerate cases of nothing useful read.
 3294          * If so go try again
 3295          */
 3296         cpos = rbuf;
 3297         cend = rbuf + siz;
 3298         dp = (struct dirent *)cpos;
 3299         cookiep = cookies;
 3300         /*
 3301          * For some reason FreeBSD's ufs_readdir() chooses to back the
 3302          * directory offset up to a block boundary, so it is necessary to
 3303          * skip over the records that precede the requested offset. This
 3304          * requires the assumption that file offset cookies monotonically
 3305          * increase.
 3306          */
 3307         while (cpos < cend && ncookies > 0 &&
 3308                 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 3309                  ((u_quad_t)(*cookiep)) <= toff)) {
 3310                 cpos += dp->d_reclen;
 3311                 dp = (struct dirent *)cpos;
 3312                 cookiep++;
 3313                 ncookies--;
 3314         }
 3315         if (cpos >= cend || ncookies == 0) {
 3316                 toff = off;
 3317                 siz = fullsiz;
 3318                 goto again;
 3319         }
 3320 
 3321         len = 3 * NFSX_UNSIGNED;        /* paranoia, probably can be 0 */
 3322         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
 3323         if (v3) {
 3324                 nfsm_srvpostop_attr(getret, &at);
 3325                 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 3326                 txdr_hyper(at.va_filerev, tl);
 3327         }
 3328         mp = mb;
 3329         bp = bpos;
 3330         be = bp + M_TRAILINGSPACE(mp);
 3331 
 3332         /* Loop through the records and build reply */
 3333         while (cpos < cend && ncookies > 0) {
 3334                 if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
 3335                         nlen = dp->d_namlen;
 3336                         rem = nfsm_rndup(nlen) - nlen;
 3337                         len += (4 * NFSX_UNSIGNED + nlen + rem);
 3338                         if (v3)
 3339                                 len += 2 * NFSX_UNSIGNED;
 3340                         if (len > cnt) {
 3341                                 eofflag = 0;
 3342                                 break;
 3343                         }
 3344                         /*
 3345                          * Build the directory record xdr from
 3346                          * the dirent entry.
 3347                          */
 3348                         nfsm_clget;
 3349                         *tl = nfsrv_nfs_true;
 3350                         bp += NFSX_UNSIGNED;
 3351                         if (v3) {
 3352                                 nfsm_clget;
 3353                                 *tl = 0;
 3354                                 bp += NFSX_UNSIGNED;
 3355                         }
 3356                         nfsm_clget;
 3357                         *tl = txdr_unsigned(dp->d_fileno);
 3358                         bp += NFSX_UNSIGNED;
 3359                         nfsm_clget;
 3360                         *tl = txdr_unsigned(nlen);
 3361                         bp += NFSX_UNSIGNED;
 3362 
 3363                         /* And loop around copying the name */
 3364                         xfer = nlen;
 3365                         cp = dp->d_name;
 3366                         while (xfer > 0) {
 3367                                 nfsm_clget;
 3368                                 if ((bp+xfer) > be)
 3369                                         tsiz = be-bp;
 3370                                 else
 3371                                         tsiz = xfer;
 3372                                 bcopy(cp, bp, tsiz);
 3373                                 bp += tsiz;
 3374                                 xfer -= tsiz;
 3375                                 if (xfer > 0)
 3376                                         cp += tsiz;
 3377                         }
 3378                         /* And null pad to an int32_t boundary. */
 3379                         for (i = 0; i < rem; i++)
 3380                                 *bp++ = '\0';
 3381                         nfsm_clget;
 3382 
 3383                         /* Finish off the record */
 3384                         if (v3) {
 3385                                 *tl = 0;
 3386                                 bp += NFSX_UNSIGNED;
 3387                                 nfsm_clget;
 3388                         }
 3389                         *tl = txdr_unsigned(*cookiep);
 3390                         bp += NFSX_UNSIGNED;
 3391                 }
 3392                 cpos += dp->d_reclen;
 3393                 dp = (struct dirent *)cpos;
 3394                 cookiep++;
 3395                 ncookies--;
 3396         }
 3397         vrele(vp);
 3398         vp = NULL;
 3399         nfsm_clget;
 3400         *tl = nfsrv_nfs_false;
 3401         bp += NFSX_UNSIGNED;
 3402         nfsm_clget;
 3403         if (eofflag)
 3404                 *tl = nfsrv_nfs_true;
 3405         else
 3406                 *tl = nfsrv_nfs_false;
 3407         bp += NFSX_UNSIGNED;
 3408         if (mp != mb) {
 3409                 if (bp < be)
 3410                         mp->m_len = bp - mtod(mp, caddr_t);
 3411         } else
 3412                 mp->m_len += bp - bpos;
 3413         FREE((caddr_t)rbuf, M_TEMP);
 3414         FREE((caddr_t)cookies, M_TEMP);
 3415 
 3416 nfsmout:
 3417         if (vp)
 3418                 vrele(vp);
 3419         VFS_UNLOCK_GIANT(vfslocked);
 3420         return(error);
 3421 }
 3422 
 3423 int
 3424 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3425     struct thread *td, struct mbuf **mrq)
 3426 {
 3427         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3428         struct sockaddr *nam = nfsd->nd_nam;
 3429         caddr_t dpos = nfsd->nd_dpos;
 3430         struct ucred *cred = nfsd->nd_cr;
 3431         char *bp, *be;
 3432         struct mbuf *mp;
 3433         struct dirent *dp;
 3434         caddr_t cp;
 3435         u_int32_t *tl;
 3436         caddr_t bpos;
 3437         struct mbuf *mb, *mreq;
 3438         char *cpos, *cend, *rbuf;
 3439         struct vnode *vp = NULL, *nvp;
 3440         struct flrep fl;
 3441         nfsfh_t nfh;
 3442         fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
 3443         struct uio io;
 3444         struct iovec iv;
 3445         struct vattr va, at, *vap = &va;
 3446         struct nfs_fattr *fp;
 3447         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
 3448         int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
 3449         u_quad_t off, toff, verf;
 3450         u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
 3451         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3452         int vfslocked;
 3453 
 3454         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3455         vfslocked = 0;
 3456         if (!v3)
 3457                 panic("nfsrv_readdirplus: v3 proc called on a v2 connection");
 3458         fhp = &nfh.fh_generic;
 3459         nfsm_srvmtofh(fhp);
 3460         tl = nfsm_dissect_nonblock(u_int32_t *, 6 * NFSX_UNSIGNED);
 3461         toff = fxdr_hyper(tl);
 3462         tl += 2;
 3463         verf = fxdr_hyper(tl);
 3464         tl += 2;
 3465         siz = fxdr_unsigned(int, *tl++);
 3466         cnt = fxdr_unsigned(int, *tl);
 3467         off = toff;
 3468         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 3469         xfer = NFS_SRVMAXDATA(nfsd);
 3470         if (cnt > xfer)
 3471                 cnt = xfer;
 3472         if (siz > xfer)
 3473                 siz = xfer;
 3474         fullsiz = siz;
 3475         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
 3476             nam, &rdonly, TRUE);
 3477         if (!error && vp->v_type != VDIR) {
 3478                 error = ENOTDIR;
 3479                 vput(vp);
 3480                 vp = NULL;
 3481         }
 3482         if (error) {
 3483                 nfsm_reply(NFSX_UNSIGNED);
 3484                 nfsm_srvpostop_attr(getret, &at);
 3485                 error = 0;
 3486                 goto nfsmout;
 3487         }
 3488         error = getret = VOP_GETATTR(vp, &at, cred, td);
 3489 #if 0
 3490         /*
 3491          * XXX This check may be too strict for Solaris 2.5 clients.
 3492          */
 3493         if (!error && toff && verf && verf != at.va_filerev)
 3494                 error = NFSERR_BAD_COOKIE;
 3495 #endif
 3496         if (!error)
 3497                 error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
 3498         if (error) {
 3499                 vput(vp);
 3500                 vp = NULL;
 3501                 nfsm_reply(NFSX_V3POSTOPATTR);
 3502                 nfsm_srvpostop_attr(getret, &at);
 3503                 error = 0;
 3504                 goto nfsmout;
 3505         }
 3506         VOP_UNLOCK(vp, 0, td);
 3507         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 3508 again:
 3509         iv.iov_base = rbuf;
 3510         iv.iov_len = fullsiz;
 3511         io.uio_iov = &iv;
 3512         io.uio_iovcnt = 1;
 3513         io.uio_offset = (off_t)off;
 3514         io.uio_resid = fullsiz;
 3515         io.uio_segflg = UIO_SYSSPACE;
 3516         io.uio_rw = UIO_READ;
 3517         io.uio_td = NULL;
 3518         eofflag = 0;
 3519         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 3520         if (cookies) {
 3521                 free((caddr_t)cookies, M_TEMP);
 3522                 cookies = NULL;
 3523         }
 3524         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
 3525         off = (u_quad_t)io.uio_offset;
 3526         getret = VOP_GETATTR(vp, &at, cred, td);
 3527         VOP_UNLOCK(vp, 0, td);
 3528         if (!cookies && !error)
 3529                 error = NFSERR_PERM;
 3530         if (!error)
 3531                 error = getret;
 3532         if (error) {
 3533                 vrele(vp);
 3534                 vp = NULL;
 3535                 if (cookies)
 3536                         free((caddr_t)cookies, M_TEMP);
 3537                 free((caddr_t)rbuf, M_TEMP);
 3538                 nfsm_reply(NFSX_V3POSTOPATTR);
 3539                 nfsm_srvpostop_attr(getret, &at);
 3540                 error = 0;
 3541                 goto nfsmout;
 3542         }
 3543         if (io.uio_resid) {
 3544                 siz -= io.uio_resid;
 3545 
 3546                 /*
 3547                  * If nothing read, return eof
 3548                  * rpc reply
 3549                  */
 3550                 if (siz == 0) {
 3551                         vrele(vp);
 3552                         vp = NULL;
 3553                         nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
 3554                                 2 * NFSX_UNSIGNED);
 3555                         nfsm_srvpostop_attr(getret, &at);
 3556                         tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 3557                         txdr_hyper(at.va_filerev, tl);
 3558                         tl += 2;
 3559                         *tl++ = nfsrv_nfs_false;
 3560                         *tl = nfsrv_nfs_true;
 3561                         FREE((caddr_t)cookies, M_TEMP);
 3562                         FREE((caddr_t)rbuf, M_TEMP);
 3563                         error = 0;
 3564                         goto nfsmout;
 3565                 }
 3566         }
 3567 
 3568         /*
 3569          * Check for degenerate cases of nothing useful read.
 3570          * If so go try again
 3571          */
 3572         cpos = rbuf;
 3573         cend = rbuf + siz;
 3574         dp = (struct dirent *)cpos;
 3575         cookiep = cookies;
 3576         /*
 3577          * For some reason FreeBSD's ufs_readdir() chooses to back the
 3578          * directory offset up to a block boundary, so it is necessary to
 3579          * skip over the records that precede the requested offset. This
 3580          * requires the assumption that file offset cookies monotonically
 3581          * increase.
 3582          */
 3583         while (cpos < cend && ncookies > 0 &&
 3584                 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 3585                  ((u_quad_t)(*cookiep)) <= toff)) {
 3586                 cpos += dp->d_reclen;
 3587                 dp = (struct dirent *)cpos;
 3588                 cookiep++;
 3589                 ncookies--;
 3590         }
 3591         if (cpos >= cend || ncookies == 0) {
 3592                 toff = off;
 3593                 siz = fullsiz;
 3594                 goto again;
 3595         }
 3596 
 3597         /*
 3598          * Probe one of the directory entries to see if the filesystem
 3599          * supports VGET.
 3600          */
 3601         if (VFS_VGET(vp->v_mount, dp->d_fileno, LK_EXCLUSIVE, &nvp) ==
 3602             EOPNOTSUPP) {
 3603                 error = NFSERR_NOTSUPP;
 3604                 vrele(vp);
 3605                 vp = NULL;
 3606                 free((caddr_t)cookies, M_TEMP);
 3607                 free((caddr_t)rbuf, M_TEMP);
 3608                 nfsm_reply(NFSX_V3POSTOPATTR);
 3609                 nfsm_srvpostop_attr(getret, &at);
 3610                 error = 0;
 3611                 goto nfsmout;
 3612         }
 3613         vput(nvp);
 3614         nvp = NULL;
 3615 
 3616         dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
 3617             2 * NFSX_UNSIGNED;
 3618         nfsm_reply(cnt);
 3619         nfsm_srvpostop_attr(getret, &at);
 3620         tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 3621         txdr_hyper(at.va_filerev, tl);
 3622         mp = mb;
 3623         bp = bpos;
 3624         be = bp + M_TRAILINGSPACE(mp);
 3625 
 3626         /* Loop through the records and build reply */
 3627         while (cpos < cend && ncookies > 0) {
 3628                 if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
 3629                         nlen = dp->d_namlen;
 3630                         rem = nfsm_rndup(nlen)-nlen;
 3631 
 3632                         /*
 3633                          * For readdir_and_lookup get the vnode using
 3634                          * the file number.
 3635                          */
 3636                         if (VFS_VGET(vp->v_mount, dp->d_fileno, LK_EXCLUSIVE,
 3637                             &nvp))
 3638                                 goto invalid;
 3639                         bzero((caddr_t)nfhp, NFSX_V3FH);
 3640                         nfhp->fh_fsid =
 3641                                 nvp->v_mount->mnt_stat.f_fsid;
 3642                         /*
 3643                          * XXXRW: Assert the mountpoints are the same so that
 3644                          * we know that acquiring Giant based on the
 3645                          * directory is the right thing for the child.
 3646                          */
 3647                         KASSERT(nvp->v_mount == vp->v_mount,
 3648                             ("nfsrv_readdirplus: nvp mount != vp mount"));
 3649                         if (VOP_VPTOFH(nvp, &nfhp->fh_fid)) {
 3650                                 vput(nvp);
 3651                                 nvp = NULL;
 3652                                 goto invalid;
 3653                         }
 3654                         if (VOP_GETATTR(nvp, vap, cred, td)) {
 3655                                 vput(nvp);
 3656                                 nvp = NULL;
 3657                                 goto invalid;
 3658                         }
 3659                         vput(nvp);
 3660                         nvp = NULL;
 3661 
 3662                         /*
 3663                          * If either the dircount or maxcount will be
 3664                          * exceeded, get out now. Both of these lengths
 3665                          * are calculated conservatively, including all
 3666                          * XDR overheads.
 3667                          */
 3668                         len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
 3669                                 NFSX_V3POSTOPATTR);
 3670                         dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
 3671                         if (len > cnt || dirlen > fullsiz) {
 3672                                 eofflag = 0;
 3673                                 break;
 3674                         }
 3675 
 3676                         /*
 3677                          * Build the directory record xdr from
 3678                          * the dirent entry.
 3679                          */
 3680                         fp = (struct nfs_fattr *)&fl.fl_fattr;
 3681                         nfsm_srvfillattr(vap, fp);
 3682                         fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
 3683                         fl.fl_fhok = nfsrv_nfs_true;
 3684                         fl.fl_postopok = nfsrv_nfs_true;
 3685                         fl.fl_off.nfsuquad[0] = 0;
 3686                         fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
 3687 
 3688                         nfsm_clget;
 3689                         *tl = nfsrv_nfs_true;
 3690                         bp += NFSX_UNSIGNED;
 3691                         nfsm_clget;
 3692                         *tl = 0;
 3693                         bp += NFSX_UNSIGNED;
 3694                         nfsm_clget;
 3695                         *tl = txdr_unsigned(dp->d_fileno);
 3696                         bp += NFSX_UNSIGNED;
 3697                         nfsm_clget;
 3698                         *tl = txdr_unsigned(nlen);
 3699                         bp += NFSX_UNSIGNED;
 3700 
 3701                         /* And loop around copying the name */
 3702                         xfer = nlen;
 3703                         cp = dp->d_name;
 3704                         while (xfer > 0) {
 3705                                 nfsm_clget;
 3706                                 if ((bp + xfer) > be)
 3707                                         tsiz = be - bp;
 3708                                 else
 3709                                         tsiz = xfer;
 3710                                 bcopy(cp, bp, tsiz);
 3711                                 bp += tsiz;
 3712                                 xfer -= tsiz;
 3713                                 if (xfer > 0)
 3714                                         cp += tsiz;
 3715                         }
 3716                         /* And null pad to an int32_t boundary. */
 3717                         for (i = 0; i < rem; i++)
 3718                                 *bp++ = '\0';
 3719 
 3720                         /*
 3721                          * Now copy the flrep structure out.
 3722                          */
 3723                         xfer = sizeof (struct flrep);
 3724                         cp = (caddr_t)&fl;
 3725                         while (xfer > 0) {
 3726                                 nfsm_clget;
 3727                                 if ((bp + xfer) > be)
 3728                                         tsiz = be - bp;
 3729                                 else
 3730                                         tsiz = xfer;
 3731                                 bcopy(cp, bp, tsiz);
 3732                                 bp += tsiz;
 3733                                 xfer -= tsiz;
 3734                                 if (xfer > 0)
 3735                                         cp += tsiz;
 3736                         }
 3737                 }
 3738 invalid:
 3739                 cpos += dp->d_reclen;
 3740                 dp = (struct dirent *)cpos;
 3741                 cookiep++;
 3742                 ncookies--;
 3743         }
 3744         vrele(vp);
 3745         vp = NULL;
 3746         nfsm_clget;
 3747         *tl = nfsrv_nfs_false;
 3748         bp += NFSX_UNSIGNED;
 3749         nfsm_clget;
 3750         if (eofflag)
 3751                 *tl = nfsrv_nfs_true;
 3752         else
 3753                 *tl = nfsrv_nfs_false;
 3754         bp += NFSX_UNSIGNED;
 3755         if (mp != mb) {
 3756                 if (bp < be)
 3757                         mp->m_len = bp - mtod(mp, caddr_t);
 3758         } else
 3759                 mp->m_len += bp - bpos;
 3760         FREE((caddr_t)cookies, M_TEMP);
 3761         FREE((caddr_t)rbuf, M_TEMP);
 3762 nfsmout:
 3763         if (vp)
 3764                 vrele(vp);
 3765         VFS_UNLOCK_GIANT(vfslocked);
 3766         return(error);
 3767 }
 3768 
 3769 /*
 3770  * nfs commit service
 3771  */
 3772 int
 3773 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3774     struct thread *td, struct mbuf **mrq)
 3775 {
 3776         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3777         struct sockaddr *nam = nfsd->nd_nam;
 3778         caddr_t dpos = nfsd->nd_dpos;
 3779         struct ucred *cred = nfsd->nd_cr;
 3780         struct vattr bfor, aft;
 3781         struct vnode *vp = NULL;
 3782         nfsfh_t nfh;
 3783         fhandle_t *fhp;
 3784         u_int32_t *tl;
 3785         caddr_t bpos;
 3786         int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
 3787         struct mbuf *mb, *mreq;
 3788         u_quad_t off;
 3789         struct mount *mp = NULL;
 3790         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3791         int tvfslocked;
 3792         int vfslocked;
 3793 
 3794         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3795         vfslocked = 0;
 3796         if (!v3)
 3797                 panic("nfsrv_commit: v3 proc called on a v2 connection");
 3798         fhp = &nfh.fh_generic;
 3799         nfsm_srvmtofh(fhp);
 3800         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 3801                 error = ESTALE;
 3802                 goto ereply;
 3803         }
 3804         vfslocked = VFS_LOCK_GIANT(mp);
 3805         (void) vn_start_write(NULL, &mp, V_WAIT);
 3806         vfs_rel(mp);            /* The write holds a ref. */
 3807         tl = nfsm_dissect_nonblock(u_int32_t *, 3 * NFSX_UNSIGNED);
 3808 
 3809         /*
 3810          * XXX At this time VOP_FSYNC() does not accept offset and byte
 3811          * count parameters, so these arguments are useless (someday maybe).
 3812          */
 3813         off = fxdr_hyper(tl);
 3814         tl += 2;
 3815         cnt = fxdr_unsigned(int, *tl);
 3816         error = nfsrv_fhtovp(fhp, 1, &vp, &tvfslocked, cred, slp,
 3817             nam, &rdonly, TRUE);
 3818         vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
 3819         if (error) {
 3820                 nfsm_reply(2 * NFSX_UNSIGNED);
 3821                 nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
 3822                 error = 0;
 3823                 goto nfsmout;
 3824         }
 3825         for_ret = VOP_GETATTR(vp, &bfor, cred, td);
 3826 
 3827         if (cnt > MAX_COMMIT_COUNT) {
 3828                 /*
 3829                  * Give up and do the whole thing
 3830                  */
 3831                 if (vp->v_object &&
 3832                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 3833                         VM_OBJECT_LOCK(vp->v_object);
 3834                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
 3835                         VM_OBJECT_UNLOCK(vp->v_object);
 3836                 }
 3837                 error = VOP_FSYNC(vp, MNT_WAIT, td);
 3838         } else {
 3839                 /*
 3840                  * Locate and synchronously write any buffers that fall
 3841                  * into the requested range.  Note:  we are assuming that
 3842                  * f_iosize is a power of 2.
 3843                  */
 3844                 int iosize = vp->v_mount->mnt_stat.f_iosize;
 3845                 int iomask = iosize - 1;
 3846                 int s;
 3847                 daddr_t lblkno;
 3848 
 3849                 /*
 3850                  * Align to iosize boundry, super-align to page boundry.
 3851                  */
 3852                 if (off & iomask) {
 3853                         cnt += off & iomask;
 3854                         off &= ~(u_quad_t)iomask;
 3855                 }
 3856                 if (off & PAGE_MASK) {
 3857                         cnt += off & PAGE_MASK;
 3858                         off &= ~(u_quad_t)PAGE_MASK;
 3859                 }
 3860                 lblkno = off / iosize;
 3861 
 3862                 if (vp->v_object &&
 3863                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 3864                         VM_OBJECT_LOCK(vp->v_object);
 3865                         vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
 3866                         VM_OBJECT_UNLOCK(vp->v_object);
 3867                 }
 3868 
 3869                 s = splbio();
 3870                 VI_LOCK(vp);
 3871                 while (cnt > 0) {
 3872                         struct buf *bp;
 3873 
 3874                         /*
 3875                          * If we have a buffer and it is marked B_DELWRI we
 3876                          * have to lock and write it.  Otherwise the prior
 3877                          * write is assumed to have already been committed.
 3878                          *
 3879                          * gbincore() can return invalid buffers now so we
 3880                          * have to check that bit as well (though B_DELWRI
 3881                          * should not be set if B_INVAL is set there could be
 3882                          * a race here since we haven't locked the buffer).
 3883                          */
 3884                         if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
 3885                                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 3886                                     LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) {
 3887                                         VI_LOCK(vp);
 3888                                         continue; /* retry */
 3889                                 }
 3890                                 if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
 3891                                     B_DELWRI) {
 3892                                         bremfree(bp);
 3893                                         bp->b_flags &= ~B_ASYNC;
 3894                                         bwrite(bp);
 3895                                         ++nfs_commit_miss;
 3896                                 } else
 3897                                         BUF_UNLOCK(bp);
 3898                                 VI_LOCK(vp);
 3899                         }
 3900                         ++nfs_commit_blks;
 3901                         if (cnt < iosize)
 3902                                 break;
 3903                         cnt -= iosize;
 3904                         ++lblkno;
 3905                 }
 3906                 VI_UNLOCK(vp);
 3907                 splx(s);
 3908         }
 3909 
 3910         aft_ret = VOP_GETATTR(vp, &aft, cred, td);
 3911         vput(vp);
 3912         vp = NULL;
 3913 ereply:
 3914         nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
 3915         nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
 3916         if (!error) {
 3917                 tl = nfsm_build(u_int32_t *, NFSX_V3WRITEVERF);
 3918                 if (nfsver.tv_sec == 0)
 3919                         nfsver = boottime;
 3920                 *tl++ = txdr_unsigned(nfsver.tv_sec);
 3921                 *tl = txdr_unsigned(nfsver.tv_usec);
 3922         } else {
 3923                 error = 0;
 3924         }
 3925 nfsmout:
 3926         if (vp)
 3927                 vput(vp);
 3928         vn_finished_write(mp);
 3929         VFS_UNLOCK_GIANT(vfslocked);
 3930         return(error);
 3931 }
 3932 
 3933 /*
 3934  * nfs statfs service
 3935  */
 3936 int
 3937 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3938     struct thread *td, struct mbuf **mrq)
 3939 {
 3940         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3941         struct sockaddr *nam = nfsd->nd_nam;
 3942         caddr_t dpos = nfsd->nd_dpos;
 3943         struct ucred *cred = nfsd->nd_cr;
 3944         struct statfs *sf;
 3945         struct nfs_statfs *sfp;
 3946         caddr_t bpos;
 3947         int error = 0, rdonly, getret = 1;
 3948         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3949         struct mbuf *mb, *mreq;
 3950         struct vnode *vp = NULL;
 3951         struct vattr at;
 3952         nfsfh_t nfh;
 3953         fhandle_t *fhp;
 3954         struct statfs statfs;
 3955         u_quad_t tval;
 3956         int vfslocked;
 3957 
 3958         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3959         vfslocked = 0;
 3960         fhp = &nfh.fh_generic;
 3961         nfsm_srvmtofh(fhp);
 3962         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
 3963             nam, &rdonly, TRUE);
 3964         if (error) {
 3965                 nfsm_reply(NFSX_UNSIGNED);
 3966                 if (v3)
 3967                         nfsm_srvpostop_attr(getret, &at);
 3968                 error = 0;
 3969                 goto nfsmout;
 3970         }
 3971         sf = &statfs;
 3972         error = VFS_STATFS(vp->v_mount, sf, td);
 3973         getret = VOP_GETATTR(vp, &at, cred, td);
 3974         vput(vp);
 3975         vp = NULL;
 3976         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3));
 3977         if (v3)
 3978                 nfsm_srvpostop_attr(getret, &at);
 3979         if (error) {
 3980                 error = 0;
 3981                 goto nfsmout;
 3982         }
 3983         sfp = nfsm_build(struct nfs_statfs *, NFSX_STATFS(v3));
 3984         if (v3) {
 3985                 tval = (u_quad_t)sf->f_blocks;
 3986                 tval *= (u_quad_t)sf->f_bsize;
 3987                 txdr_hyper(tval, &sfp->sf_tbytes);
 3988                 tval = (u_quad_t)sf->f_bfree;
 3989                 tval *= (u_quad_t)sf->f_bsize;
 3990                 txdr_hyper(tval, &sfp->sf_fbytes);
 3991                 /*
 3992                  * Don't send negative values for available space,
 3993                  * since this field is unsigned in the NFS protocol.
 3994                  * Otherwise, the client would see absurdly high
 3995                  * numbers for free space.
 3996                  */
 3997                 if (sf->f_bavail < 0)
 3998                         tval = 0;
 3999                 else
 4000                         tval = (u_quad_t)sf->f_bavail;
 4001                 tval *= (u_quad_t)sf->f_bsize;
 4002                 txdr_hyper(tval, &sfp->sf_abytes);
 4003                 sfp->sf_tfiles.nfsuquad[0] = 0;
 4004                 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
 4005                 sfp->sf_ffiles.nfsuquad[0] = 0;
 4006                 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
 4007                 sfp->sf_afiles.nfsuquad[0] = 0;
 4008                 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
 4009                 sfp->sf_invarsec = 0;
 4010         } else {
 4011                 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
 4012                 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
 4013                 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
 4014                 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
 4015                 if (sf->f_bavail < 0)
 4016                         sfp->sf_bavail = 0;
 4017                 else
 4018                         sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
 4019         }
 4020 nfsmout:
 4021         if (vp)
 4022                 vput(vp);
 4023         VFS_UNLOCK_GIANT(vfslocked);
 4024         return(error);
 4025 }
 4026 
 4027 /*
 4028  * nfs fsinfo service
 4029  */
 4030 int
 4031 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4032     struct thread *td, struct mbuf **mrq)
 4033 {
 4034         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 4035         struct sockaddr *nam = nfsd->nd_nam;
 4036         caddr_t dpos = nfsd->nd_dpos;
 4037         struct ucred *cred = nfsd->nd_cr;
 4038         struct nfsv3_fsinfo *sip;
 4039         caddr_t bpos;
 4040         int error = 0, rdonly, getret = 1, pref;
 4041         struct mbuf *mb, *mreq;
 4042         struct vnode *vp = NULL;
 4043         struct vattr at;
 4044         nfsfh_t nfh;
 4045         fhandle_t *fhp;
 4046         u_quad_t maxfsize;
 4047         struct statfs sb;
 4048         int v3 = (nfsd->nd_flag & ND_NFSV3);
 4049         int vfslocked;
 4050 
 4051         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4052         if (!v3)
 4053                 panic("nfsrv_fsinfo: v3 proc called on a v2 connection");
 4054         fhp = &nfh.fh_generic;
 4055         vfslocked = 0;
 4056         nfsm_srvmtofh(fhp);
 4057         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
 4058             nam, &rdonly, TRUE);
 4059         if (error) {
 4060                 nfsm_reply(NFSX_UNSIGNED);
 4061                 nfsm_srvpostop_attr(getret, &at);
 4062                 error = 0;
 4063                 goto nfsmout;
 4064         }
 4065 
 4066         /* XXX Try to make a guess on the max file size. */
 4067         VFS_STATFS(vp->v_mount, &sb, td);
 4068         maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
 4069 
 4070         getret = VOP_GETATTR(vp, &at, cred, td);
 4071         vput(vp);
 4072         vp = NULL;
 4073         nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
 4074         nfsm_srvpostop_attr(getret, &at);
 4075         sip = nfsm_build(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
 4076 
 4077         /*
 4078          * XXX
 4079          * There should be filesystem VFS OP(s) to get this information.
 4080          * For now, assume ufs.
 4081          */
 4082         if (slp->ns_so->so_type == SOCK_DGRAM)
 4083                 pref = NFS_MAXDGRAMDATA;
 4084         else
 4085                 pref = NFS_MAXDATA;
 4086         sip->fs_rtmax = txdr_unsigned(pref);
 4087         sip->fs_rtpref = txdr_unsigned(pref);
 4088         sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
 4089         sip->fs_wtmax = txdr_unsigned(pref);
 4090         sip->fs_wtpref = txdr_unsigned(pref);
 4091         sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
 4092         sip->fs_dtpref = txdr_unsigned(pref);
 4093         txdr_hyper(maxfsize, &sip->fs_maxfilesize);
 4094         sip->fs_timedelta.nfsv3_sec = 0;
 4095         sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
 4096         sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
 4097                 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
 4098                 NFSV3FSINFO_CANSETTIME);
 4099 nfsmout:
 4100         if (vp)
 4101                 vput(vp);
 4102         VFS_UNLOCK_GIANT(vfslocked);
 4103         return(error);
 4104 }
 4105 
 4106 /*
 4107  * nfs pathconf service
 4108  */
 4109 int
 4110 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4111     struct thread *td, struct mbuf **mrq)
 4112 {
 4113         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 4114         struct sockaddr *nam = nfsd->nd_nam;
 4115         caddr_t dpos = nfsd->nd_dpos;
 4116         struct ucred *cred = nfsd->nd_cr;
 4117         struct nfsv3_pathconf *pc;
 4118         caddr_t bpos;
 4119         int error = 0, rdonly, getret = 1;
 4120         register_t linkmax, namemax, chownres, notrunc;
 4121         struct mbuf *mb, *mreq;
 4122         struct vnode *vp = NULL;
 4123         struct vattr at;
 4124         nfsfh_t nfh;
 4125         fhandle_t *fhp;
 4126         int v3 = (nfsd->nd_flag & ND_NFSV3);
 4127         int vfslocked;
 4128 
 4129         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4130         if (!v3)
 4131                 panic("nfsrv_pathconf: v3 proc called on a v2 connection");
 4132         fhp = &nfh.fh_generic;
 4133         nfsm_srvmtofh(fhp);
 4134         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
 4135             nam, &rdonly, TRUE);
 4136         if (error) {
 4137                 nfsm_reply(NFSX_UNSIGNED);
 4138                 nfsm_srvpostop_attr(getret, &at);
 4139                 error = 0;
 4140                 goto nfsmout;
 4141         }
 4142         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
 4143         if (!error)
 4144                 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
 4145         if (!error)
 4146                 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
 4147         if (!error)
 4148                 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
 4149         getret = VOP_GETATTR(vp, &at, cred, td);
 4150         vput(vp);
 4151         vp = NULL;
 4152         nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
 4153         nfsm_srvpostop_attr(getret, &at);
 4154         if (error) {
 4155                 error = 0;
 4156                 goto nfsmout;
 4157         }
 4158         pc = nfsm_build(struct nfsv3_pathconf *, NFSX_V3PATHCONF);
 4159 
 4160         pc->pc_linkmax = txdr_unsigned(linkmax);
 4161         pc->pc_namemax = txdr_unsigned(namemax);
 4162         pc->pc_notrunc = txdr_unsigned(notrunc);
 4163         pc->pc_chownrestricted = txdr_unsigned(chownres);
 4164 
 4165         /*
 4166          * These should probably be supported by VOP_PATHCONF(), but
 4167          * until msdosfs is exportable (why would you want to?), the
 4168          * Unix defaults should be ok.
 4169          */
 4170         pc->pc_caseinsensitive = nfsrv_nfs_false;
 4171         pc->pc_casepreserving = nfsrv_nfs_true;
 4172 nfsmout:
 4173         if (vp)
 4174                 vput(vp);
 4175         VFS_UNLOCK_GIANT(vfslocked);
 4176         return(error);
 4177 }
 4178 
 4179 /*
 4180  * Null operation, used by clients to ping server
 4181  */
 4182 /* ARGSUSED */
 4183 int
 4184 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4185     struct thread *td, struct mbuf **mrq)
 4186 {
 4187         struct mbuf *mrep = nfsd->nd_mrep;
 4188         caddr_t bpos;
 4189         int error = NFSERR_RETVOID;
 4190         struct mbuf *mb, *mreq;
 4191 
 4192         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4193         nfsm_reply(0);
 4194 nfsmout:
 4195         return (error);
 4196 }
 4197 
 4198 /*
 4199  * No operation, used for obsolete procedures
 4200  */
 4201 /* ARGSUSED */
 4202 int
 4203 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4204     struct thread *td, struct mbuf **mrq)
 4205 {
 4206         struct mbuf *mrep = nfsd->nd_mrep;
 4207         caddr_t bpos;
 4208         int error;
 4209         struct mbuf *mb, *mreq;
 4210 
 4211         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4212         if (nfsd->nd_repstat)
 4213                 error = nfsd->nd_repstat;
 4214         else
 4215                 error = EPROCUNAVAIL;
 4216         nfsm_reply(0);
 4217         error = 0;
 4218 nfsmout:
 4219         return (error);
 4220 }
 4221 
 4222 /*
 4223  * Perform access checking for vnodes obtained from file handles that would
 4224  * refer to files already opened by a Unix client. You cannot just use
 4225  * vn_writechk() and VOP_ACCESS() for two reasons.
 4226  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
 4227  *     case.
 4228  * 2 - The owner is to be given access irrespective of mode bits for some
 4229  *     operations, so that processes that chmod after opening a file don't
 4230  *     break. I don't like this because it opens a security hole, but since
 4231  *     the nfs server opens a security hole the size of a barn door anyhow,
 4232  *     what the heck.
 4233  *
 4234  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
 4235  * will return EPERM instead of EACCESS. EPERM is always an error.
 4236  */
 4237 static int
 4238 nfsrv_access(struct vnode *vp, int flags, struct ucred *cred,
 4239     int rdonly, struct thread *td, int override)
 4240 {
 4241         struct vattr vattr;
 4242         int error;
 4243 
 4244         VFS_ASSERT_GIANT(vp->v_mount);
 4245 
 4246         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4247 
 4248         if (flags & VWRITE) {
 4249                 /* Just vn_writechk() changed to check rdonly */
 4250                 /*
 4251                  * Disallow write attempts on read-only filesystems;
 4252                  * unless the file is a socket or a block or character
 4253                  * device resident on the filesystem.
 4254                  */
 4255                 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 4256                         switch (vp->v_type) {
 4257                         case VREG:
 4258                         case VDIR:
 4259                         case VLNK:
 4260                                 return (EROFS);
 4261                         default:
 4262                                 break;
 4263                         }
 4264                 }
 4265                 /*
 4266                  * If there's shared text associated with
 4267                  * the inode, we can't allow writing.
 4268                  */
 4269                 if (vp->v_vflag & VV_TEXT)
 4270                         return (ETXTBSY);
 4271         }
 4272 
 4273         error = VOP_GETATTR(vp, &vattr, cred, td);
 4274         if (error)
 4275                 return (error);
 4276         error = VOP_ACCESS(vp, flags, cred, td);
 4277         /*
 4278          * Allow certain operations for the owner (reads and writes
 4279          * on files that are already open).
 4280          */
 4281         if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
 4282                 error = 0;
 4283         return (error);
 4284 }

Cache object: 264e588153aab8fcae914b45a87b4a27


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.