The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/nfsserver/nfs_serv.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD$");
   37 
   38 /*
   39  * nfs version 2 and 3 server calls to vnode ops
   40  * - these routines generally have 3 phases
   41  *   1 - break down and validate rpc request in mbuf list
   42  *   2 - do the vnode ops for the request
   43  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
   44  *   3 - build the rpc reply in an mbuf list
   45  *   nb:
   46  *      - do not mix the phases, since the nfsm_?? macros can return failures
   47  *        on a bad rpc or similar and do not do any vrele() or vput()'s
   48  *
   49  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
   50  *      error number iff error != 0 whereas
   51  *      returning an error from the server function implies a fatal error
   52  *      such as a badly constructed rpc request that should be dropped without
   53  *      a reply.
   54  *      For nfsm_reply(), the case where error == EBADRPC is treated
   55  *      specially; after constructing a reply, it does an immediate
   56  *      `goto nfsmout' to avoid getting any V3 post-op status appended.
   57  *
   58  * Other notes:
   59  *      Warning: always pay careful attention to resource cleanup on return
   60  *      and note that nfsm_*() macros can terminate a procedure on certain
   61  *      errors.
   62  *
   63  *      lookup() and namei()
   64  *      may return garbage in various structural fields/return elements
   65  *      if an error is returned, and may garbage up nd.ni_dvp even if no
   66  *      error is returned and you did not request LOCKPARENT or WANTPARENT.
   67  *
   68  *      We use the ni_cnd.cn_flags 'HASBUF' flag to track whether the name
   69  *      buffer has been freed or not.
   70  */
   71 
   72 #include <sys/param.h>
   73 #include <sys/systm.h>
   74 #include <sys/proc.h>
   75 #include <sys/namei.h>
   76 #include <sys/unistd.h>
   77 #include <sys/vnode.h>
   78 #include <sys/mount.h>
   79 #include <sys/socket.h>
   80 #include <sys/socketvar.h>
   81 #include <sys/malloc.h>
   82 #include <sys/mbuf.h>
   83 #include <sys/priv.h>
   84 #include <sys/dirent.h>
   85 #include <sys/stat.h>
   86 #include <sys/kernel.h>
   87 #include <sys/sysctl.h>
   88 #include <sys/bio.h>
   89 #include <sys/buf.h>
   90 
   91 #include <vm/vm.h>
   92 #include <vm/vm_extern.h>
   93 #include <vm/vm_object.h>
   94 
   95 #include <nfs/nfsproto.h>
   96 #include <nfs/rpcv2.h>
   97 #include <nfsserver/nfs.h>
   98 #include <nfs/xdr_subs.h>
   99 #include <nfsserver/nfsm_subs.h>
  100 
  101 #ifdef NFSRV_DEBUG
  102 #define nfsdbprintf(info)       printf info
  103 #else
  104 #define nfsdbprintf(info)
  105 #endif
  106 
  107 #define MAX_COMMIT_COUNT        (1024 * 1024)
  108 
  109 #define NUM_HEURISTIC           1017
  110 #define NHUSE_INIT              64
  111 #define NHUSE_INC               16
  112 #define NHUSE_MAX               2048
  113 
  114 static struct nfsheur {
  115         struct vnode *nh_vp;    /* vp to match (unreferenced pointer) */
  116         off_t nh_nextr;         /* next offset for sequential detection */
  117         int nh_use;             /* use count for selection */
  118         int nh_seqcount;        /* heuristic */
  119 } nfsheur[NUM_HEURISTIC];
  120 
  121 /* Global vars */
  122 
  123 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
  124 int nfsrvw_procrastinate_v3 = 0;
  125 
  126 static struct timeval   nfsver = { 0 };
  127 
  128 SYSCTL_NODE(_vfs, OID_AUTO, nfsrv, CTLFLAG_RW, 0, "NFS server");
  129 
  130 static int nfs_async;
  131 static int nfs_commit_blks;
  132 static int nfs_commit_miss;
  133 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
  134 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
  135 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
  136 
  137 struct nfsrvstats nfsrvstats;
  138 SYSCTL_STRUCT(_vfs_nfsrv, NFS_NFSRVSTATS, nfsrvstats, CTLFLAG_RW,
  139         &nfsrvstats, nfsrvstats, "S,nfsrvstats");
  140 
  141 static int      nfsrv_access(struct vnode *, int, struct ucred *, int,
  142                     struct thread *, int);
  143 static void     nfsrvw_coalesce(struct nfsrv_descript *,
  144                     struct nfsrv_descript *);
  145 
  146 /*
  147  * Clear nameidata fields that are tested in nsfmout cleanup code prior
  148  * to using first nfsm macro (that might jump to the cleanup code).
  149  */
  150 
  151 static __inline void
  152 ndclear(struct nameidata *nd)
  153 {
  154 
  155         nd->ni_cnd.cn_flags = 0;
  156         nd->ni_vp = NULL;
  157         nd->ni_dvp = NULL;
  158         nd->ni_startdir = NULL;
  159 }
  160 
  161 /*
  162  * Takes two vfslocked integers and returns with at most one
  163  * reference to giant.  The return value indicates whether giant
  164  * is held by either lock.  This simplifies nfsrv ops by allowing
  165  * them to track only one vfslocked var.
  166  */
  167 static __inline int
  168 nfsrv_lockedpair(int vfs1, int vfs2)
  169 {
  170 
  171         if (vfs1 && vfs2)
  172                 VFS_UNLOCK_GIANT(vfs2);
  173 
  174         return (vfs1 | vfs2);
  175 }
  176 
  177 static __inline int
  178 nfsrv_lockedpair_nd(int vfs1, struct nameidata *nd)
  179 {
  180         int vfs2;
  181 
  182         vfs2 = NDHASGIANT(nd);
  183 
  184         return nfsrv_lockedpair(vfs1, vfs2);
  185 }
  186 
  187 /*
  188  * nfs v3 access service
  189  */
  190 int
  191 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  192     struct thread *td, struct mbuf **mrq)
  193 {
  194         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  195         struct sockaddr *nam = nfsd->nd_nam;
  196         caddr_t dpos = nfsd->nd_dpos;
  197         struct ucred *cred = nfsd->nd_cr;
  198         struct vnode *vp = NULL;
  199         nfsfh_t nfh;
  200         fhandle_t *fhp;
  201         u_int32_t *tl;
  202         caddr_t bpos;
  203         int error = 0, rdonly, getret;
  204         struct mbuf *mb, *mreq;
  205         struct vattr vattr, *vap = &vattr;
  206         u_long testmode, nfsmode;
  207         int v3 = (nfsd->nd_flag & ND_NFSV3);
  208         int vfslocked;
  209 
  210         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  211         if (!v3)
  212                 panic("nfsrv3_access: v3 proc called on a v2 connection");
  213         vfslocked = 0;
  214         fhp = &nfh.fh_generic;
  215         nfsm_srvmtofh(fhp);
  216         tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
  217         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
  218             nam, &rdonly, TRUE);
  219         if (error) {
  220                 nfsm_reply(NFSX_UNSIGNED);
  221                 nfsm_srvpostop_attr(1, NULL);
  222                 error = 0;
  223                 goto nfsmout;
  224         }
  225         nfsmode = fxdr_unsigned(u_int32_t, *tl);
  226         if ((nfsmode & NFSV3ACCESS_READ) &&
  227                 nfsrv_access(vp, VREAD, cred, rdonly, td, 0))
  228                 nfsmode &= ~NFSV3ACCESS_READ;
  229         if (vp->v_type == VDIR)
  230                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
  231                         NFSV3ACCESS_DELETE);
  232         else
  233                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
  234         if ((nfsmode & testmode) &&
  235                 nfsrv_access(vp, VWRITE, cred, rdonly, td, 0))
  236                 nfsmode &= ~testmode;
  237         if (vp->v_type == VDIR)
  238                 testmode = NFSV3ACCESS_LOOKUP;
  239         else
  240                 testmode = NFSV3ACCESS_EXECUTE;
  241         if ((nfsmode & testmode) &&
  242                 nfsrv_access(vp, VEXEC, cred, rdonly, td, 0))
  243                 nfsmode &= ~testmode;
  244         getret = VOP_GETATTR(vp, vap, cred, td);
  245         vput(vp);
  246         vp = NULL;
  247         nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
  248         nfsm_srvpostop_attr(getret, vap);
  249         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  250         *tl = txdr_unsigned(nfsmode);
  251 nfsmout:
  252         if (vp)
  253                 vput(vp);
  254         VFS_UNLOCK_GIANT(vfslocked);
  255         return(error);
  256 }
  257 
  258 /*
  259  * nfs getattr service
  260  */
  261 int
  262 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  263     struct thread *td, struct mbuf **mrq)
  264 {
  265         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  266         struct sockaddr *nam = nfsd->nd_nam;
  267         caddr_t dpos = nfsd->nd_dpos;
  268         struct ucred *cred = nfsd->nd_cr;
  269         struct nfs_fattr *fp;
  270         struct vattr va;
  271         struct vattr *vap = &va;
  272         struct vnode *vp = NULL;
  273         nfsfh_t nfh;
  274         fhandle_t *fhp;
  275         caddr_t bpos;
  276         int error = 0, rdonly;
  277         struct mbuf *mb, *mreq;
  278         int vfslocked;
  279 
  280         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  281         vfslocked = 0;
  282         fhp = &nfh.fh_generic;
  283         nfsm_srvmtofh(fhp);
  284         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp, nam,
  285             &rdonly, TRUE);
  286         if (error) {
  287                 nfsm_reply(0);
  288                 error = 0;
  289                 goto nfsmout;
  290         }
  291         error = VOP_GETATTR(vp, vap, cred, td);
  292         vput(vp);
  293         vp = NULL;
  294         nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
  295         if (error) {
  296                 error = 0;
  297                 goto nfsmout;
  298         }
  299         fp = nfsm_build(struct nfs_fattr *,
  300             NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
  301         nfsm_srvfillattr(vap, fp);
  302         /* fall through */
  303 
  304 nfsmout:
  305         if (vp)
  306                 vput(vp);
  307         VFS_UNLOCK_GIANT(vfslocked);
  308         return(error);
  309 }
  310 
  311 /*
  312  * nfs setattr service
  313  */
  314 int
  315 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  316     struct thread *td, struct mbuf **mrq)
  317 {
  318         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  319         struct sockaddr *nam = nfsd->nd_nam;
  320         caddr_t dpos = nfsd->nd_dpos;
  321         struct ucred *cred = nfsd->nd_cr;
  322         struct vattr va, preat;
  323         struct vattr *vap = &va;
  324         struct nfsv2_sattr *sp;
  325         struct nfs_fattr *fp;
  326         struct vnode *vp = NULL;
  327         nfsfh_t nfh;
  328         fhandle_t *fhp;
  329         u_int32_t *tl;
  330         caddr_t bpos;
  331         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
  332         int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
  333         struct mbuf *mb, *mreq;
  334         struct timespec guard = { 0, 0 };
  335         struct mount *mp = NULL;
  336         int tvfslocked;
  337         int vfslocked;
  338 
  339         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  340         vfslocked = 0;
  341         fhp = &nfh.fh_generic;
  342         nfsm_srvmtofh(fhp);
  343         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
  344                 error = ESTALE;
  345                 goto out;
  346         }
  347         vfslocked = VFS_LOCK_GIANT(mp);
  348         (void) vn_start_write(NULL, &mp, V_WAIT);
  349         vfs_rel(mp);            /* The write holds a ref. */
  350         VATTR_NULL(vap);
  351         if (v3) {
  352                 nfsm_srvsattr(vap);
  353                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
  354                 gcheck = fxdr_unsigned(int, *tl);
  355                 if (gcheck) {
  356                         tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
  357                         fxdr_nfsv3time(tl, &guard);
  358                 }
  359         } else {
  360                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
  361                 /*
  362                  * Nah nah nah nah na nah
  363                  * There is a bug in the Sun client that puts 0xffff in the mode
  364                  * field of sattr when it should put in 0xffffffff. The u_short
  365                  * doesn't sign extend.
  366                  * --> check the low order 2 bytes for 0xffff
  367                  */
  368                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
  369                         vap->va_mode = nfstov_mode(sp->sa_mode);
  370                 if (sp->sa_uid != nfsrv_nfs_xdrneg1)
  371                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
  372                 if (sp->sa_gid != nfsrv_nfs_xdrneg1)
  373                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
  374                 if (sp->sa_size != nfsrv_nfs_xdrneg1)
  375                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
  376                 if (sp->sa_atime.nfsv2_sec != nfsrv_nfs_xdrneg1) {
  377 #ifdef notyet
  378                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
  379 #else
  380                         vap->va_atime.tv_sec =
  381                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
  382                         vap->va_atime.tv_nsec = 0;
  383 #endif
  384                 }
  385                 if (sp->sa_mtime.nfsv2_sec != nfsrv_nfs_xdrneg1)
  386                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
  387 
  388         }
  389 
  390         /*
  391          * Now that we have all the fields, lets do it.
  392          */
  393         error = nfsrv_fhtovp(fhp, 1, &vp, &tvfslocked, cred, slp,
  394             nam, &rdonly, TRUE);
  395         vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
  396         if (error) {
  397                 nfsm_reply(2 * NFSX_UNSIGNED);
  398                 if (v3)
  399                         nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
  400                 error = 0;
  401                 goto nfsmout;
  402         }
  403 
  404         /*
  405          * vp now an active resource, pay careful attention to cleanup
  406          */
  407         if (v3) {
  408                 error = preat_ret = VOP_GETATTR(vp, &preat, cred, td);
  409                 if (!error && gcheck &&
  410                         (preat.va_ctime.tv_sec != guard.tv_sec ||
  411                          preat.va_ctime.tv_nsec != guard.tv_nsec))
  412                         error = NFSERR_NOT_SYNC;
  413                 if (error) {
  414                         vput(vp);
  415                         vp = NULL;
  416                         nfsm_reply(NFSX_WCCDATA(v3));
  417                         if (v3)
  418                                 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
  419                         error = 0;
  420                         goto nfsmout;
  421                 }
  422         }
  423 
  424         /*
  425          * If the size is being changed write acces is required, otherwise
  426          * just check for a read only filesystem.
  427          */
  428         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
  429                 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
  430                         error = EROFS;
  431                         goto out;
  432                 }
  433         } else {
  434                 if (vp->v_type == VDIR) {
  435                         error = EISDIR;
  436                         goto out;
  437                 } else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
  438                         td, 0)) != 0)
  439                         goto out;
  440         }
  441         error = VOP_SETATTR(vp, vap, cred, td);
  442         postat_ret = VOP_GETATTR(vp, vap, cred, td);
  443         if (!error)
  444                 error = postat_ret;
  445 out:
  446         if (vp != NULL)
  447                 vput(vp);
  448 
  449         vp = NULL;
  450         nfsm_reply(NFSX_WCCORFATTR(v3));
  451         if (v3) {
  452                 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
  453         } else if (!error) {
  454                 /* v2 non-error case. */
  455                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
  456                 nfsm_srvfillattr(vap, fp);
  457         }
  458         error = 0;
  459         /* fall through */
  460 
  461 nfsmout:
  462         if (vp)
  463                 vput(vp);
  464         vn_finished_write(mp);
  465         VFS_UNLOCK_GIANT(vfslocked);
  466         return(error);
  467 }
  468 
  469 /*
  470  * nfs lookup rpc
  471  */
  472 int
  473 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  474     struct thread *td, struct mbuf **mrq)
  475 {
  476         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  477         struct sockaddr *nam = nfsd->nd_nam;
  478         caddr_t dpos = nfsd->nd_dpos;
  479         struct ucred *cred = nfsd->nd_cr;
  480         struct nfs_fattr *fp;
  481         struct nameidata nd, ind, *ndp = &nd;
  482         struct vnode *vp, *dirp = NULL;
  483         nfsfh_t nfh;
  484         fhandle_t *fhp;
  485         caddr_t bpos;
  486         int error = 0, len, dirattr_ret = 1;
  487         int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
  488         struct mbuf *mb, *mreq;
  489         struct vattr va, dirattr, *vap = &va;
  490         int tvfslocked;
  491         int vfslocked;
  492 
  493         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  494         ndclear(&nd);
  495         vfslocked = 0;
  496 
  497         fhp = &nfh.fh_generic;
  498         nfsm_srvmtofh(fhp);
  499         nfsm_srvnamesiz(len);
  500 
  501         pubflag = nfs_ispublicfh(fhp);
  502 
  503         nd.ni_cnd.cn_cred = cred;
  504         nd.ni_cnd.cn_nameiop = LOOKUP;
  505         nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART | MPSAFE;
  506         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
  507                 &dirp, v3, &dirattr, &dirattr_ret, td, pubflag);
  508         vfslocked = NDHASGIANT(&nd);
  509 
  510         /*
  511          * namei failure, only dirp to cleanup.  Clear out garbarge from
  512          * structure in case macros jump to nfsmout.
  513          */
  514 
  515         if (error) {
  516                 if (dirp) {
  517                         vrele(dirp);
  518                         dirp = NULL;
  519                 }
  520                 nfsm_reply(NFSX_POSTOPATTR(v3));
  521                 if (v3)
  522                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  523                 error = 0;
  524                 goto nfsmout;
  525         }
  526 
  527         /*
  528          * Locate index file for public filehandle
  529          *
  530          * error is 0 on entry and 0 on exit from this block.
  531          */
  532 
  533         if (pubflag) {
  534                 if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) {
  535                         /*
  536                          * Setup call to lookup() to see if we can find
  537                          * the index file. Arguably, this doesn't belong
  538                          * in a kernel.. Ugh.  If an error occurs, do not
  539                          * try to install an index file and then clear the
  540                          * error.
  541                          *
  542                          * When we replace nd with ind and redirect ndp,
  543                          * maintenance of ni_startdir and ni_vp shift to
  544                          * ind and we have to clean them up in the old nd.
  545                          * However, the cnd resource continues to be maintained
  546                          * via the original nd.  Confused?  You aren't alone!
  547                          */
  548                         ind = nd;
  549                         VOP_UNLOCK(nd.ni_vp, 0, td);
  550                         ind.ni_pathlen = strlen(nfs_pub.np_index);
  551                         ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf =
  552                             nfs_pub.np_index;
  553                         ind.ni_startdir = nd.ni_vp;
  554                         VREF(ind.ni_startdir);
  555                         ind.ni_cnd.cn_flags &= ~GIANTHELD;
  556                         tvfslocked = VFS_LOCK_GIANT(ind.ni_startdir->v_mount);
  557                         if (tvfslocked)
  558                                 nd.ni_cnd.cn_flags |= GIANTHELD;
  559                         error = lookup(&ind);
  560                         ind.ni_dvp = NULL;
  561                         vfslocked = nfsrv_lockedpair_nd(vfslocked, &ind);
  562                         ind.ni_cnd.cn_flags &= ~GIANTHELD;
  563 
  564                         if (error == 0) {
  565                                 /*
  566                                  * Found an index file. Get rid of
  567                                  * the old references.  transfer nd.ni_vp'
  568                                  */
  569                                 if (dirp)
  570                                         vrele(dirp);
  571                                 dirp = nd.ni_vp;
  572                                 nd.ni_vp = NULL;
  573                                 vrele(nd.ni_startdir);
  574                                 nd.ni_startdir = NULL;
  575                                 ndp = &ind;
  576                         }
  577                         error = 0;
  578                 }
  579                 /*
  580                  * If the public filehandle was used, check that this lookup
  581                  * didn't result in a filehandle outside the publicly exported
  582                  * filesystem.  We clear the poor vp here to avoid lockups due
  583                  * to NFS I/O.
  584                  */
  585 
  586                 if (ndp->ni_vp->v_mount != nfs_pub.np_mount) {
  587                         vput(nd.ni_vp);
  588                         nd.ni_vp = NULL;
  589                         error = EPERM;
  590                 }
  591         }
  592 
  593         /*
  594          * Resources at this point:
  595          *      ndp->ni_vp      may not be NULL
  596          */
  597 
  598         if (error) {
  599                 nfsm_reply(NFSX_POSTOPATTR(v3));
  600                 if (v3)
  601                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  602                 error = 0;
  603                 goto nfsmout;
  604         }
  605 
  606         /*
  607          * Get underlying attribute, then release remaining resources ( for
  608          * the same potential blocking reason ) and reply.
  609          */
  610         vp = ndp->ni_vp;
  611         bzero((caddr_t)fhp, sizeof(nfh));
  612         fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
  613         error = VOP_VPTOFH(vp, &fhp->fh_fid);
  614         if (!error)
  615                 error = VOP_GETATTR(vp, vap, cred, td);
  616 
  617         vput(vp);
  618         vrele(ndp->ni_startdir);
  619         vrele(dirp);
  620         ndp->ni_vp = NULL;
  621         ndp->ni_startdir = NULL;
  622         dirp = NULL;
  623         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3));
  624         if (error) {
  625                 if (v3)
  626                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  627                 error = 0;
  628                 goto nfsmout;
  629         }
  630         nfsm_srvfhtom(fhp, v3);
  631         if (v3) {
  632                 nfsm_srvpostop_attr(0, vap);
  633                 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  634         } else {
  635                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
  636                 nfsm_srvfillattr(vap, fp);
  637         }
  638 
  639 nfsmout:
  640         if (ndp->ni_vp || dirp || ndp->ni_startdir) {
  641                 if (ndp->ni_vp)
  642                         vput(ndp->ni_vp);
  643                 if (dirp)
  644                         vrele(dirp);
  645                 if (ndp->ni_startdir)
  646                         vrele(ndp->ni_startdir);
  647         }
  648         NDFREE(&nd, NDF_ONLY_PNBUF);
  649         VFS_UNLOCK_GIANT(vfslocked);
  650         return (error);
  651 }
  652 
  653 /*
  654  * nfs readlink service
  655  */
  656 int
  657 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  658     struct thread *td, struct mbuf **mrq)
  659 {
  660         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  661         struct sockaddr *nam = nfsd->nd_nam;
  662         caddr_t dpos = nfsd->nd_dpos;
  663         struct ucred *cred = nfsd->nd_cr;
  664         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
  665         struct iovec *ivp = iv;
  666         struct mbuf *mp;
  667         u_int32_t *tl;
  668         caddr_t bpos;
  669         int error = 0, rdonly, i, tlen, len, getret;
  670         int v3 = (nfsd->nd_flag & ND_NFSV3);
  671         struct mbuf *mb, *mp3, *nmp, *mreq;
  672         struct vnode *vp = NULL;
  673         struct vattr attr;
  674         nfsfh_t nfh;
  675         fhandle_t *fhp;
  676         struct uio io, *uiop = &io;
  677         int vfslocked;
  678 
  679         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  680         vfslocked = 0;
  681 #ifndef nolint
  682         mp = NULL;
  683 #endif
  684         mp3 = NULL;
  685         fhp = &nfh.fh_generic;
  686         nfsm_srvmtofh(fhp);
  687         len = 0;
  688         i = 0;
  689         while (len < NFS_MAXPATHLEN) {
  690                 MGET(nmp, M_TRYWAIT, MT_DATA);
  691                 MCLGET(nmp, M_TRYWAIT);
  692                 nmp->m_len = NFSMSIZ(nmp);
  693                 if (len == 0)
  694                         mp3 = mp = nmp;
  695                 else {
  696                         mp->m_next = nmp;
  697                         mp = nmp;
  698                 }
  699                 if ((len + mp->m_len) > NFS_MAXPATHLEN) {
  700                         mp->m_len = NFS_MAXPATHLEN - len;
  701                         len = NFS_MAXPATHLEN;
  702                 } else
  703                         len += mp->m_len;
  704                 ivp->iov_base = mtod(mp, caddr_t);
  705                 ivp->iov_len = mp->m_len;
  706                 i++;
  707                 ivp++;
  708         }
  709         uiop->uio_iov = iv;
  710         uiop->uio_iovcnt = i;
  711         uiop->uio_offset = 0;
  712         uiop->uio_resid = len;
  713         uiop->uio_rw = UIO_READ;
  714         uiop->uio_segflg = UIO_SYSSPACE;
  715         uiop->uio_td = NULL;
  716         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
  717             nam, &rdonly, TRUE);
  718         if (error) {
  719                 nfsm_reply(2 * NFSX_UNSIGNED);
  720                 if (v3)
  721                         nfsm_srvpostop_attr(1, NULL);
  722                 error = 0;
  723                 goto nfsmout;
  724         }
  725         if (vp->v_type != VLNK) {
  726                 if (v3)
  727                         error = EINVAL;
  728                 else
  729                         error = ENXIO;
  730         } else 
  731                 error = VOP_READLINK(vp, uiop, cred);
  732         getret = VOP_GETATTR(vp, &attr, cred, td);
  733         vput(vp);
  734         vp = NULL;
  735         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
  736         if (v3)
  737                 nfsm_srvpostop_attr(getret, &attr);
  738         if (error) {
  739                 error = 0;
  740                 goto nfsmout;
  741         }
  742         if (uiop->uio_resid > 0) {
  743                 len -= uiop->uio_resid;
  744                 tlen = nfsm_rndup(len);
  745                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
  746         }
  747         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  748         *tl = txdr_unsigned(len);
  749         mb->m_next = mp3;
  750         mp3 = NULL;
  751 nfsmout:
  752         if (mp3)
  753                 m_freem(mp3);
  754         if (vp)
  755                 vput(vp);
  756         VFS_UNLOCK_GIANT(vfslocked);
  757         return(error);
  758 }
  759 
  760 /*
  761  * nfs read service
  762  */
  763 int
  764 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  765     struct thread *td, struct mbuf **mrq)
  766 {
  767         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  768         struct sockaddr *nam = nfsd->nd_nam;
  769         caddr_t dpos = nfsd->nd_dpos;
  770         struct ucred *cred = nfsd->nd_cr;
  771         struct iovec *iv;
  772         struct iovec *iv2;
  773         struct mbuf *m;
  774         struct nfs_fattr *fp;
  775         u_int32_t *tl;
  776         int i;
  777         caddr_t bpos;
  778         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
  779         int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen;
  780         struct mbuf *mb, *mreq;
  781         struct mbuf *m2;
  782         struct vnode *vp = NULL;
  783         nfsfh_t nfh;
  784         fhandle_t *fhp;
  785         struct uio io, *uiop = &io;
  786         struct vattr va, *vap = &va;
  787         struct nfsheur *nh;
  788         off_t off;
  789         int ioflag = 0;
  790         int vfslocked;
  791 
  792 
  793         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  794         vfslocked = 0;
  795         fhp = &nfh.fh_generic;
  796         nfsm_srvmtofh(fhp);
  797         if (v3) {
  798                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
  799                 off = fxdr_hyper(tl);
  800         } else {
  801                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
  802                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
  803         }
  804         nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd));
  805 
  806         /*
  807          * Reference vp.  If an error occurs, vp will be invalid, but we
  808          * have to NULL it just in case.  The macros might goto nfsmout
  809          * as well.
  810          */
  811 
  812         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
  813             nam, &rdonly, TRUE);
  814         if (error) {
  815                 vp = NULL;
  816                 nfsm_reply(2 * NFSX_UNSIGNED);
  817                 if (v3)
  818                         nfsm_srvpostop_attr(1, NULL);
  819                 error = 0;
  820                 goto nfsmout;
  821         }
  822 
  823         if (vp->v_type != VREG) {
  824                 if (v3)
  825                         error = EINVAL;
  826                 else
  827                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
  828         }
  829         if (!error) {
  830                 if ((error = nfsrv_access(vp, VREAD, cred, rdonly,
  831                     td, 1)) != 0)
  832                         error = nfsrv_access(vp, VEXEC, cred,
  833                             rdonly, td, 1);
  834         }
  835         getret = VOP_GETATTR(vp, vap, cred, td);
  836         if (!error)
  837                 error = getret;
  838         if (error) {
  839                 vput(vp);
  840                 vp = NULL;
  841                 nfsm_reply(NFSX_POSTOPATTR(v3));
  842                 if (v3)
  843                         nfsm_srvpostop_attr(getret, vap);
  844                 error = 0;
  845                 goto nfsmout;
  846         }
  847 
  848         /*
  849          * Calculate byte count to read
  850          */
  851 
  852         if (off >= vap->va_size)
  853                 cnt = 0;
  854         else if ((off + reqlen) > vap->va_size)
  855                 cnt = vap->va_size - off;
  856         else
  857                 cnt = reqlen;
  858 
  859         /*
  860          * Calculate seqcount for heuristic
  861          */
  862 
  863         {
  864                 int hi;
  865                 int try = 32;
  866 
  867                 /*
  868                  * Locate best candidate
  869                  */
  870 
  871                 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
  872                 nh = &nfsheur[hi];
  873 
  874                 while (try--) {
  875                         if (nfsheur[hi].nh_vp == vp) {
  876                                 nh = &nfsheur[hi];
  877                                 break;
  878                         }
  879                         if (nfsheur[hi].nh_use > 0)
  880                                 --nfsheur[hi].nh_use;
  881                         hi = (hi + 1) % NUM_HEURISTIC;
  882                         if (nfsheur[hi].nh_use < nh->nh_use)
  883                                 nh = &nfsheur[hi];
  884                 }
  885 
  886                 if (nh->nh_vp != vp) {
  887                         nh->nh_vp = vp;
  888                         nh->nh_nextr = off;
  889                         nh->nh_use = NHUSE_INIT;
  890                         if (off == 0)
  891                                 nh->nh_seqcount = 4;
  892                         else
  893                                 nh->nh_seqcount = 1;
  894                 }
  895 
  896                 /*
  897                  * Calculate heuristic
  898                  */
  899 
  900                 if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
  901                         if (++nh->nh_seqcount > IO_SEQMAX)
  902                                 nh->nh_seqcount = IO_SEQMAX;
  903                 } else if (nh->nh_seqcount > 1) {
  904                         nh->nh_seqcount = 1;
  905                 } else {
  906                         nh->nh_seqcount = 0;
  907                 }
  908                 nh->nh_use += NHUSE_INC;
  909                 if (nh->nh_use > NHUSE_MAX)
  910                         nh->nh_use = NHUSE_MAX;
  911                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
  912         }
  913 
  914         nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
  915         if (v3) {
  916                 tl = nfsm_build(u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
  917                 *tl++ = nfsrv_nfs_true;
  918                 fp = (struct nfs_fattr *)tl;
  919                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
  920         } else {
  921                 tl = nfsm_build(u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
  922                 fp = (struct nfs_fattr *)tl;
  923                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
  924         }
  925         len = left = nfsm_rndup(cnt);
  926         if (cnt > 0) {
  927                 /*
  928                  * Generate the mbuf list with the uio_iov ref. to it.
  929                  */
  930                 i = 0;
  931                 m = m2 = mb;
  932                 while (left > 0) {
  933                         siz = min(M_TRAILINGSPACE(m), left);
  934                         if (siz > 0) {
  935                                 left -= siz;
  936                                 i++;
  937                         }
  938                         if (left > 0) {
  939                                 MGET(m, M_TRYWAIT, MT_DATA);
  940                                 MCLGET(m, M_TRYWAIT);
  941                                 m->m_len = 0;
  942                                 m2->m_next = m;
  943                                 m2 = m;
  944                         }
  945                 }
  946                 MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
  947                        M_TEMP, M_WAITOK);
  948                 uiop->uio_iov = iv2 = iv;
  949                 m = mb;
  950                 left = len;
  951                 i = 0;
  952                 while (left > 0) {
  953                         if (m == NULL)
  954                                 panic("nfsrv_read iov");
  955                         siz = min(M_TRAILINGSPACE(m), left);
  956                         if (siz > 0) {
  957                                 iv->iov_base = mtod(m, caddr_t) + m->m_len;
  958                                 iv->iov_len = siz;
  959                                 m->m_len += siz;
  960                                 left -= siz;
  961                                 iv++;
  962                                 i++;
  963                         }
  964                         m = m->m_next;
  965                 }
  966                 uiop->uio_iovcnt = i;
  967                 uiop->uio_offset = off;
  968                 uiop->uio_resid = len;
  969                 uiop->uio_rw = UIO_READ;
  970                 uiop->uio_segflg = UIO_SYSSPACE;
  971                 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
  972                 off = uiop->uio_offset;
  973                 nh->nh_nextr = off;
  974                 FREE((caddr_t)iv2, M_TEMP);
  975                 if (error || (getret = VOP_GETATTR(vp, vap, cred, td))) {
  976                         if (!error)
  977                                 error = getret;
  978                         m_freem(mreq);
  979                         vput(vp);
  980                         vp = NULL;
  981                         nfsm_reply(NFSX_POSTOPATTR(v3));
  982                         if (v3)
  983                                 nfsm_srvpostop_attr(getret, vap);
  984                         error = 0;
  985                         goto nfsmout;
  986                 }
  987         } else
  988                 uiop->uio_resid = 0;
  989         vput(vp);
  990         vp = NULL;
  991         nfsm_srvfillattr(vap, fp);
  992         tlen = len - uiop->uio_resid;
  993         cnt = cnt < tlen ? cnt : tlen;
  994         tlen = nfsm_rndup(cnt);
  995         if (len != tlen || tlen != cnt)
  996                 nfsm_adj(mb, len - tlen, tlen - cnt);
  997         if (v3) {
  998                 *tl++ = txdr_unsigned(cnt);
  999                 if (cnt < reqlen)
 1000                         *tl++ = nfsrv_nfs_true;
 1001                 else
 1002                         *tl++ = nfsrv_nfs_false;
 1003         }
 1004         *tl = txdr_unsigned(cnt);
 1005 nfsmout:
 1006         if (vp)
 1007                 vput(vp);
 1008         VFS_UNLOCK_GIANT(vfslocked);
 1009         return(error);
 1010 }
 1011 
 1012 /*
 1013  * nfs write service
 1014  */
 1015 int
 1016 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1017     struct thread *td, struct mbuf **mrq)
 1018 {
 1019         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1020         struct sockaddr *nam = nfsd->nd_nam;
 1021         caddr_t dpos = nfsd->nd_dpos;
 1022         struct ucred *cred = nfsd->nd_cr;
 1023         struct iovec *ivp;
 1024         int i, cnt;
 1025         struct mbuf *mp;
 1026         struct nfs_fattr *fp;
 1027         struct iovec *iv;
 1028         struct vattr va, forat;
 1029         struct vattr *vap = &va;
 1030         u_int32_t *tl;
 1031         caddr_t bpos;
 1032         int error = 0, rdonly, len, forat_ret = 1;
 1033         int ioflags, aftat_ret = 1, retlen = 0, zeroing, adjust;
 1034         int stable = NFSV3WRITE_FILESYNC;
 1035         int v3 = (nfsd->nd_flag & ND_NFSV3);
 1036         struct mbuf *mb, *mreq;
 1037         struct vnode *vp = NULL;
 1038         nfsfh_t nfh;
 1039         fhandle_t *fhp;
 1040         struct uio io, *uiop = &io;
 1041         off_t off;
 1042         struct mount *mntp = NULL;
 1043         int tvfslocked;
 1044         int vfslocked;
 1045 
 1046         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1047         vfslocked = 0;
 1048         if (mrep == NULL) {
 1049                 *mrq = NULL;
 1050                 error = 0;
 1051                 goto nfsmout;
 1052         }
 1053         fhp = &nfh.fh_generic;
 1054         nfsm_srvmtofh(fhp);
 1055         if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 1056                 error = ESTALE;
 1057                 goto ereply;
 1058         }
 1059         vfslocked = VFS_LOCK_GIANT(mntp);
 1060         (void) vn_start_write(NULL, &mntp, V_WAIT);
 1061         vfs_rel(mntp);          /* The write holds a ref. */
 1062         if (v3) {
 1063                 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
 1064                 off = fxdr_hyper(tl);
 1065                 tl += 3;
 1066                 stable = fxdr_unsigned(int, *tl++);
 1067         } else {
 1068                 tl = nfsm_dissect_nonblock(u_int32_t *, 4 * NFSX_UNSIGNED);
 1069                 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
 1070                 tl += 2;
 1071                 if (nfs_async)
 1072                         stable = NFSV3WRITE_UNSTABLE;
 1073         }
 1074         retlen = len = fxdr_unsigned(int32_t, *tl);
 1075         cnt = i = 0;
 1076 
 1077         /*
 1078          * For NFS Version 2, it is not obvious what a write of zero length
 1079          * should do, but I might as well be consistent with Version 3,
 1080          * which is to return ok so long as there are no permission problems.
 1081          */
 1082         if (len > 0) {
 1083             zeroing = 1;
 1084             mp = mrep;
 1085             while (mp) {
 1086                 if (mp == md) {
 1087                         zeroing = 0;
 1088                         adjust = dpos - mtod(mp, caddr_t);
 1089                         mp->m_len -= adjust;
 1090                         if (mp->m_len > 0 && adjust > 0)
 1091                                 mp->m_data += adjust;
 1092                 }
 1093                 if (zeroing)
 1094                         mp->m_len = 0;
 1095                 else if (mp->m_len > 0) {
 1096                         i += mp->m_len;
 1097                         if (i > len) {
 1098                                 mp->m_len -= (i - len);
 1099                                 zeroing = 1;
 1100                         }
 1101                         if (mp->m_len > 0)
 1102                                 cnt++;
 1103                 }
 1104                 mp = mp->m_next;
 1105             }
 1106         }
 1107         if (len > NFS_MAXDATA || len < 0 || i < len) {
 1108                 error = EIO;
 1109                 nfsm_reply(2 * NFSX_UNSIGNED);
 1110                 if (v3)
 1111                         nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1112                 error = 0;
 1113                 goto nfsmout;
 1114         }
 1115         error = nfsrv_fhtovp(fhp, 1, &vp, &tvfslocked, cred, slp,
 1116             nam, &rdonly, TRUE);
 1117         vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
 1118         if (error) {
 1119                 vp = NULL;
 1120                 nfsm_reply(2 * NFSX_UNSIGNED);
 1121                 if (v3)
 1122                         nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1123                 error = 0;
 1124                 goto nfsmout;
 1125         }
 1126         if (v3)
 1127                 forat_ret = VOP_GETATTR(vp, &forat, cred, td);
 1128         if (vp->v_type != VREG) {
 1129                 if (v3)
 1130                         error = EINVAL;
 1131                 else
 1132                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
 1133         }
 1134         if (!error)
 1135                 error = nfsrv_access(vp, VWRITE, cred, rdonly, td, 1);
 1136         if (error) {
 1137                 vput(vp);
 1138                 vp = NULL;
 1139                 nfsm_reply(NFSX_WCCDATA(v3));
 1140                 if (v3)
 1141                         nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1142                 error = 0;
 1143                 goto nfsmout;
 1144         }
 1145 
 1146         if (len > 0) {
 1147             MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
 1148                 M_WAITOK);
 1149             uiop->uio_iov = iv = ivp;
 1150             uiop->uio_iovcnt = cnt;
 1151             mp = mrep;
 1152             while (mp) {
 1153                 if (mp->m_len > 0) {
 1154                         ivp->iov_base = mtod(mp, caddr_t);
 1155                         ivp->iov_len = mp->m_len;
 1156                         ivp++;
 1157                 }
 1158                 mp = mp->m_next;
 1159             }
 1160 
 1161             /*
 1162              * XXX
 1163              * The IO_METASYNC flag indicates that all metadata (and not just
 1164              * enough to ensure data integrity) mus be written to stable storage
 1165              * synchronously.
 1166              * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
 1167              */
 1168             if (stable == NFSV3WRITE_UNSTABLE)
 1169                 ioflags = IO_NODELOCKED;
 1170             else if (stable == NFSV3WRITE_DATASYNC)
 1171                 ioflags = (IO_SYNC | IO_NODELOCKED);
 1172             else
 1173                 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
 1174             uiop->uio_resid = len;
 1175             uiop->uio_rw = UIO_WRITE;
 1176             uiop->uio_segflg = UIO_SYSSPACE;
 1177             uiop->uio_td = NULL;
 1178             uiop->uio_offset = off;
 1179             error = VOP_WRITE(vp, uiop, ioflags, cred);
 1180             /* Unlocked write. */
 1181             nfsrvstats.srvvop_writes++;
 1182             FREE((caddr_t)iv, M_TEMP);
 1183         }
 1184         aftat_ret = VOP_GETATTR(vp, vap, cred, td);
 1185         vput(vp);
 1186         vp = NULL;
 1187         if (!error)
 1188                 error = aftat_ret;
 1189 ereply:
 1190         nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
 1191                 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
 1192         if (v3) {
 1193                 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1194                 if (error) {
 1195                         error = 0;
 1196                         goto nfsmout;
 1197                 }
 1198                 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 1199                 *tl++ = txdr_unsigned(retlen);
 1200                 /*
 1201                  * If nfs_async is set, then pretend the write was FILESYNC.
 1202                  */
 1203                 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
 1204                         *tl++ = txdr_unsigned(stable);
 1205                 else
 1206                         *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
 1207                 /*
 1208                  * Actually, there is no need to txdr these fields,
 1209                  * but it may make the values more human readable,
 1210                  * for debugging purposes.
 1211                  */
 1212                 if (nfsver.tv_sec == 0)
 1213                         nfsver = boottime;
 1214                 *tl++ = txdr_unsigned(nfsver.tv_sec);
 1215                 *tl = txdr_unsigned(nfsver.tv_usec);
 1216         } else if (!error) {
 1217                 /* v2 non-error case. */
 1218                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 1219                 nfsm_srvfillattr(vap, fp);
 1220         }
 1221         error = 0;
 1222 nfsmout:
 1223         if (vp)
 1224                 vput(vp);
 1225         vn_finished_write(mntp);
 1226         VFS_UNLOCK_GIANT(vfslocked);
 1227         return(error);
 1228 }
 1229 
 1230 /*
 1231  * For the purposes of write gathering, we must decide if the credential
 1232  * associated with two pending requests have equivilent privileges.  Since
 1233  * NFS only uses a subset of the BSD ucred -- the effective uid and group
 1234  * IDs -- we have a compare routine that checks only the relevant fields.
 1235  */
 1236 static int
 1237 nfsrv_samecred(struct ucred *cr1, struct ucred *cr2)
 1238 {
 1239         int i;
 1240 
 1241         if (cr1->cr_uid != cr2->cr_uid)
 1242                 return (0);
 1243         if (cr1->cr_ngroups != cr2->cr_ngroups)
 1244                 return (0);
 1245         for (i = 0; i < cr1->cr_ngroups; i++) {
 1246                 if (cr1->cr_groups[i] != cr2->cr_groups[i])
 1247                         return (0);
 1248         }
 1249         return (1);
 1250 }
 1251 
 1252 /*
 1253  * NFS write service with write gathering support. Called when
 1254  * nfsrvw_procrastinate > 0.
 1255  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
 1256  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
 1257  * Jan. 1994.
 1258  */
 1259 int
 1260 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
 1261     struct thread *td, struct mbuf **mrq)
 1262 {
 1263         struct iovec *ivp;
 1264         struct mbuf *mp;
 1265         struct nfsrv_descript *wp, *nfsd, *owp, *swp;
 1266         struct nfs_fattr *fp;
 1267         int i;
 1268         struct iovec *iov;
 1269         struct nfsrvw_delayhash *wpp;
 1270         struct ucred *cred;
 1271         struct vattr va, forat;
 1272         u_int32_t *tl;
 1273         caddr_t bpos, dpos;
 1274         int error = 0, rdonly, len, forat_ret = 1;
 1275         int ioflags, aftat_ret = 1, s, adjust, v3, zeroing;
 1276         struct mbuf *mb, *mreq, *mrep, *md;
 1277         struct vnode *vp = NULL;
 1278         struct uio io, *uiop = &io;
 1279         u_quad_t cur_usec;
 1280         struct mount *mntp = NULL;
 1281         int mvfslocked;
 1282         int vfslocked;
 1283 
 1284         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1285 #ifndef nolint
 1286         i = 0;
 1287         len = 0;
 1288 #endif
 1289         vfslocked = 0;
 1290         *mrq = NULL;
 1291         if (*ndp) {
 1292             nfsd = *ndp;
 1293             *ndp = NULL;
 1294             mrep = nfsd->nd_mrep;
 1295             md = nfsd->nd_md;
 1296             dpos = nfsd->nd_dpos;
 1297             cred = nfsd->nd_cr;
 1298             v3 = (nfsd->nd_flag & ND_NFSV3);
 1299             LIST_INIT(&nfsd->nd_coalesce);
 1300             nfsd->nd_mreq = NULL;
 1301             nfsd->nd_stable = NFSV3WRITE_FILESYNC;
 1302             cur_usec = nfs_curusec();
 1303             nfsd->nd_time = cur_usec +
 1304                 (v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
 1305 
 1306             /*
 1307              * Now, get the write header..
 1308              */
 1309             nfsm_srvmtofh(&nfsd->nd_fh);
 1310             if (v3) {
 1311                 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
 1312                 nfsd->nd_off = fxdr_hyper(tl);
 1313                 tl += 3;
 1314                 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
 1315             } else {
 1316                 tl = nfsm_dissect_nonblock(u_int32_t *, 4 * NFSX_UNSIGNED);
 1317                 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
 1318                 tl += 2;
 1319                 if (nfs_async)
 1320                         nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
 1321             }
 1322             len = fxdr_unsigned(int32_t, *tl);
 1323             nfsd->nd_len = len;
 1324             nfsd->nd_eoff = nfsd->nd_off + len;
 1325 
 1326             /*
 1327              * Trim the header out of the mbuf list and trim off any trailing
 1328              * junk so that the mbuf list has only the write data.
 1329              */
 1330             zeroing = 1;
 1331             i = 0;
 1332             mp = mrep;
 1333             while (mp) {
 1334                 if (mp == md) {
 1335                     zeroing = 0;
 1336                     adjust = dpos - mtod(mp, caddr_t);
 1337                     mp->m_len -= adjust;
 1338                     if (mp->m_len > 0 && adjust > 0)
 1339                         mp->m_data += adjust;
 1340                 }
 1341                 if (zeroing)
 1342                     mp->m_len = 0;
 1343                 else {
 1344                     i += mp->m_len;
 1345                     if (i > len) {
 1346                         mp->m_len -= (i - len);
 1347                         zeroing = 1;
 1348                     }
 1349                 }
 1350                 mp = mp->m_next;
 1351             }
 1352             if (len > NFS_MAXDATA || len < 0  || i < len) {
 1353 nfsmout:
 1354                 m_freem(mrep);
 1355                 error = EIO;
 1356                 nfsm_writereply(2 * NFSX_UNSIGNED);
 1357                 if (v3)
 1358                     nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
 1359                 nfsd->nd_mreq = mreq;
 1360                 nfsd->nd_mrep = NULL;
 1361                 nfsd->nd_time = 0;
 1362             }
 1363 
 1364             /*
 1365              * Add this entry to the hash and time queues.
 1366              */
 1367             s = splsoftclock();
 1368             owp = NULL;
 1369             wp = LIST_FIRST(&slp->ns_tq);
 1370             while (wp && wp->nd_time < nfsd->nd_time) {
 1371                 owp = wp;
 1372                 wp = LIST_NEXT(wp, nd_tq);
 1373             }
 1374             NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
 1375             if (owp) {
 1376                 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
 1377             } else {
 1378                 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
 1379             }
 1380             if (nfsd->nd_mrep) {
 1381                 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
 1382                 owp = NULL;
 1383                 wp = LIST_FIRST(wpp);
 1384                 while (wp &&
 1385                     bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh, NFSX_V3FH)){
 1386                     owp = wp;
 1387                     wp = LIST_NEXT(wp, nd_hash);
 1388                 }
 1389                 while (wp && wp->nd_off < nfsd->nd_off &&
 1390                     !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh, NFSX_V3FH)) {
 1391                     owp = wp;
 1392                     wp = LIST_NEXT(wp, nd_hash);
 1393                 }
 1394                 if (owp) {
 1395                     LIST_INSERT_AFTER(owp, nfsd, nd_hash);
 1396 
 1397                     /*
 1398                      * Search the hash list for overlapping entries and
 1399                      * coalesce.
 1400                      */
 1401                     for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
 1402                         wp = LIST_NEXT(nfsd, nd_hash);
 1403                         if (nfsrv_samecred(owp->nd_cr, nfsd->nd_cr))
 1404                             nfsrvw_coalesce(owp, nfsd);
 1405                     }
 1406                 } else {
 1407                     LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
 1408                 }
 1409             }
 1410             splx(s);
 1411         }
 1412 
 1413         /*
 1414          * Now, do VOP_WRITE()s for any one(s) that need to be done now
 1415          * and generate the associated reply mbuf list(s).
 1416          */
 1417 loop1:
 1418         cur_usec = nfs_curusec();
 1419         s = splsoftclock();
 1420         for (nfsd = LIST_FIRST(&slp->ns_tq); nfsd; nfsd = owp) {
 1421                 owp = LIST_NEXT(nfsd, nd_tq);
 1422                 if (nfsd->nd_time > cur_usec)
 1423                     break;
 1424                 if (nfsd->nd_mreq)
 1425                     continue;
 1426                 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
 1427                 LIST_REMOVE(nfsd, nd_tq);
 1428                 LIST_REMOVE(nfsd, nd_hash);
 1429                 splx(s);
 1430                 mrep = nfsd->nd_mrep;
 1431                 nfsd->nd_mrep = NULL;
 1432                 cred = nfsd->nd_cr;
 1433                 v3 = (nfsd->nd_flag & ND_NFSV3);
 1434                 forat_ret = aftat_ret = 1;
 1435                 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, &vfslocked, cred,
 1436                     slp, nfsd->nd_nam, &rdonly, TRUE);
 1437                 if (!error) {
 1438                     if (v3)
 1439                         forat_ret = VOP_GETATTR(vp, &forat, cred, td);
 1440                     if (vp->v_type != VREG) {
 1441                         if (v3)
 1442                             error = EINVAL;
 1443                         else
 1444                             error = (vp->v_type == VDIR) ? EISDIR : EACCES;
 1445                     }
 1446                 } else {
 1447                     vp = NULL;
 1448                 }
 1449                 if (!error)
 1450                     error = nfsrv_access(vp, VWRITE, cred, rdonly,
 1451                         td, 1);
 1452                 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
 1453                     ioflags = IO_NODELOCKED;
 1454                 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
 1455                     ioflags = (IO_SYNC | IO_NODELOCKED);
 1456                 else
 1457                     ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
 1458                 uiop->uio_rw = UIO_WRITE;
 1459                 uiop->uio_segflg = UIO_SYSSPACE;
 1460                 uiop->uio_td = NULL;
 1461                 uiop->uio_offset = nfsd->nd_off;
 1462                 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
 1463                 if (uiop->uio_resid > 0) {
 1464                     mp = mrep;
 1465                     i = 0;
 1466                     while (mp) {
 1467                         if (mp->m_len > 0)
 1468                             i++;
 1469                         mp = mp->m_next;
 1470                     }
 1471                     uiop->uio_iovcnt = i;
 1472                     MALLOC(iov, struct iovec *, i * sizeof (struct iovec),
 1473                         M_TEMP, M_WAITOK);
 1474                     uiop->uio_iov = ivp = iov;
 1475                     mp = mrep;
 1476                     while (mp) {
 1477                         if (mp->m_len > 0) {
 1478                             ivp->iov_base = mtod(mp, caddr_t);
 1479                             ivp->iov_len = mp->m_len;
 1480                             ivp++;
 1481                         }
 1482                         mp = mp->m_next;
 1483                     }
 1484                     mvfslocked = 0;
 1485                     if (!error) {
 1486                         if (vn_start_write(vp, &mntp, V_NOWAIT) != 0) {
 1487                             VOP_UNLOCK(vp, 0, td);
 1488                             error = vn_start_write(NULL, &mntp, V_WAIT);
 1489                             vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 1490                         }
 1491                         mvfslocked = VFS_LOCK_GIANT(mntp);
 1492                     }
 1493                     if (!error) {
 1494                         error = VOP_WRITE(vp, uiop, ioflags, cred);
 1495                         /* Unlocked write. */
 1496                         nfsrvstats.srvvop_writes++;
 1497                         vn_finished_write(mntp);
 1498                     }
 1499                     VFS_UNLOCK_GIANT(mvfslocked);
 1500                     FREE((caddr_t)iov, M_TEMP);
 1501                 }
 1502                 m_freem(mrep);
 1503                 if (vp) {
 1504                     aftat_ret = VOP_GETATTR(vp, &va, cred, td);
 1505                     vput(vp);
 1506                     vp = NULL;
 1507                 }
 1508                 VFS_UNLOCK_GIANT(vfslocked);
 1509                 /*
 1510                  * Loop around generating replies for all write rpcs that have
 1511                  * now been completed.
 1512                  */
 1513                 swp = nfsd;
 1514                 do {
 1515                     NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
 1516                     if (error) {
 1517                         nfsm_writereply(NFSX_WCCDATA(v3));
 1518                         if (v3) {
 1519                             nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
 1520                         }
 1521                     } else {
 1522                         nfsm_writereply(NFSX_PREOPATTR(v3) +
 1523                             NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED +
 1524                             NFSX_WRITEVERF(v3));
 1525                         if (v3) {
 1526                             nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va);
 1527                             tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 1528                             *tl++ = txdr_unsigned(nfsd->nd_len);
 1529                             *tl++ = txdr_unsigned(swp->nd_stable);
 1530                             /*
 1531                              * Actually, there is no need to txdr these fields,
 1532                              * but it may make the values more human readable,
 1533                              * for debugging purposes.
 1534                              */
 1535                             if (nfsver.tv_sec == 0)
 1536                                     nfsver = boottime;
 1537                             *tl++ = txdr_unsigned(nfsver.tv_sec);
 1538                             *tl = txdr_unsigned(nfsver.tv_usec);
 1539                         } else {
 1540                             fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 1541                             nfsm_srvfillattr(&va, fp);
 1542                         }
 1543                     }
 1544                     nfsd->nd_mreq = mreq;
 1545                     if (nfsd->nd_mrep)
 1546                         panic("nfsrv_write: nd_mrep not free");
 1547 
 1548                     /*
 1549                      * Done. Put it at the head of the timer queue so that
 1550                      * the final phase can return the reply.
 1551                      */
 1552                     s = splsoftclock();
 1553                     if (nfsd != swp) {
 1554                         nfsd->nd_time = 0;
 1555                         LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
 1556                     }
 1557                     nfsd = LIST_FIRST(&swp->nd_coalesce);
 1558                     if (nfsd) {
 1559                         LIST_REMOVE(nfsd, nd_tq);
 1560                     }
 1561                     splx(s);
 1562                 } while (nfsd);
 1563                 s = splsoftclock();
 1564                 swp->nd_time = 0;
 1565                 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
 1566                 splx(s);
 1567                 goto loop1;
 1568         }
 1569         splx(s);
 1570 
 1571         /*
 1572          * Search for a reply to return.
 1573          */
 1574         s = splsoftclock();
 1575         LIST_FOREACH(nfsd, &slp->ns_tq, nd_tq)
 1576                 if (nfsd->nd_mreq) {
 1577                     NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
 1578                     LIST_REMOVE(nfsd, nd_tq);
 1579                     *mrq = nfsd->nd_mreq;
 1580                     *ndp = nfsd;
 1581                     break;
 1582                 }
 1583         splx(s);
 1584         return (0);
 1585 }
 1586 
 1587 /*
 1588  * Coalesce the write request nfsd into owp. To do this we must:
 1589  * - remove nfsd from the queues
 1590  * - merge nfsd->nd_mrep into owp->nd_mrep
 1591  * - update the nd_eoff and nd_stable for owp
 1592  * - put nfsd on owp's nd_coalesce list
 1593  * NB: Must be called at splsoftclock().
 1594  */
 1595 static void
 1596 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
 1597 {
 1598         int overlap;
 1599         struct mbuf *mp;
 1600         struct nfsrv_descript *p;
 1601 
 1602         NFS_DPF(WG, ("C%03x-%03x",
 1603                      nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
 1604         LIST_REMOVE(nfsd, nd_hash);
 1605         LIST_REMOVE(nfsd, nd_tq);
 1606         if (owp->nd_eoff < nfsd->nd_eoff) {
 1607             overlap = owp->nd_eoff - nfsd->nd_off;
 1608             if (overlap < 0)
 1609                 panic("nfsrv_coalesce: bad off");
 1610             if (overlap > 0)
 1611                 m_adj(nfsd->nd_mrep, overlap);
 1612             mp = owp->nd_mrep;
 1613             while (mp->m_next)
 1614                 mp = mp->m_next;
 1615             mp->m_next = nfsd->nd_mrep;
 1616             owp->nd_eoff = nfsd->nd_eoff;
 1617         } else
 1618             m_freem(nfsd->nd_mrep);
 1619         nfsd->nd_mrep = NULL;
 1620         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
 1621             owp->nd_stable = NFSV3WRITE_FILESYNC;
 1622         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
 1623             owp->nd_stable == NFSV3WRITE_UNSTABLE)
 1624             owp->nd_stable = NFSV3WRITE_DATASYNC;
 1625         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
 1626 
 1627         /*
 1628          * If nfsd had anything else coalesced into it, transfer them
 1629          * to owp, otherwise their replies will never get sent.
 1630          */
 1631         for (p = LIST_FIRST(&nfsd->nd_coalesce); p;
 1632              p = LIST_FIRST(&nfsd->nd_coalesce)) {
 1633             LIST_REMOVE(p, nd_tq);
 1634             LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
 1635         }
 1636 }
 1637 
 1638 /*
 1639  * nfs create service
 1640  * now does a truncate to 0 length via. setattr if it already exists
 1641  */
 1642 int
 1643 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1644     struct thread *td, struct mbuf **mrq)
 1645 {
 1646         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1647         struct sockaddr *nam = nfsd->nd_nam;
 1648         caddr_t dpos = nfsd->nd_dpos;
 1649         struct ucred *cred = nfsd->nd_cr;
 1650         struct nfs_fattr *fp;
 1651         struct vattr va, dirfor, diraft;
 1652         struct vattr *vap = &va;
 1653         struct nfsv2_sattr *sp;
 1654         u_int32_t *tl;
 1655         struct nameidata nd;
 1656         caddr_t bpos;
 1657         int error = 0, rdev, len, tsize, dirfor_ret = 1, diraft_ret = 1;
 1658         int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0;
 1659         struct mbuf *mb, *mreq;
 1660         struct vnode *dirp = NULL;
 1661         nfsfh_t nfh;
 1662         fhandle_t *fhp;
 1663         u_quad_t tempsize;
 1664         struct timespec cverf;
 1665         struct mount *mp = NULL;
 1666         int tvfslocked;
 1667         int vfslocked;
 1668 
 1669         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1670         vfslocked = 0;
 1671 #ifndef nolint
 1672         rdev = 0;
 1673 #endif
 1674         ndclear(&nd);
 1675 
 1676         fhp = &nfh.fh_generic;
 1677         nfsm_srvmtofh(fhp);
 1678         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 1679                 error = ESTALE;
 1680                 goto ereply;
 1681         }
 1682         vfslocked = VFS_LOCK_GIANT(mp);
 1683         (void) vn_start_write(NULL, &mp, V_WAIT);
 1684         vfs_rel(mp);            /* The write holds a ref. */
 1685         nfsm_srvnamesiz(len);
 1686 
 1687         nd.ni_cnd.cn_cred = cred;
 1688         nd.ni_cnd.cn_nameiop = CREATE;
 1689         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE;
 1690 
 1691         /*
 1692          * Call namei and do initial cleanup to get a few things
 1693          * out of the way.  If we get an initial error we cleanup
 1694          * and return here to avoid special-casing the invalid nd
 1695          * structure through the rest of the case.  dirp may be
 1696          * set even if an error occurs, but the nd structure will not
 1697          * be valid at all if an error occurs so we have to invalidate it
 1698          * prior to calling nfsm_reply ( which might goto nfsmout ).
 1699          */
 1700         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 1701                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 1702         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 1703         if (dirp && !v3) {
 1704                 vrele(dirp);
 1705                 dirp = NULL;
 1706         }
 1707         if (error) {
 1708                 nfsm_reply(NFSX_WCCDATA(v3));
 1709                 if (v3)
 1710                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1711                 error = 0;
 1712                 goto nfsmout;
 1713         }
 1714 
 1715         /*
 1716          * No error.  Continue.  State:
 1717          *
 1718          *      startdir        is valid ( we release this immediately )
 1719          *      dirp            may be valid
 1720          *      nd.ni_vp        may be valid
 1721          *      nd.ni_dvp       is valid
 1722          *
 1723          * The error state is set through the code and we may also do some
 1724          * opportunistic releasing of vnodes to avoid holding locks through
 1725          * NFS I/O.  The cleanup at the end is a catch-all
 1726          */
 1727 
 1728         VATTR_NULL(vap);
 1729         if (v3) {
 1730                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
 1731                 how = fxdr_unsigned(int, *tl);
 1732                 switch (how) {
 1733                 case NFSV3CREATE_GUARDED:
 1734                         if (nd.ni_vp) {
 1735                                 error = EEXIST;
 1736                                 break;
 1737                         }
 1738                         /* fall through */
 1739                 case NFSV3CREATE_UNCHECKED:
 1740                         nfsm_srvsattr(vap);
 1741                         break;
 1742                 case NFSV3CREATE_EXCLUSIVE:
 1743                         tl = nfsm_dissect_nonblock(u_int32_t *,
 1744                             NFSX_V3CREATEVERF);
 1745                         /* Unique bytes, endianness is not important. */
 1746                         cverf.tv_sec  = (int32_t)tl[0];
 1747                         cverf.tv_nsec = tl[1];
 1748                         exclusive_flag = 1;
 1749                         break;
 1750                 };
 1751                 vap->va_type = VREG;
 1752         } else {
 1753                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
 1754                 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
 1755                 if (vap->va_type == VNON)
 1756                         vap->va_type = VREG;
 1757                 vap->va_mode = nfstov_mode(sp->sa_mode);
 1758                 switch (vap->va_type) {
 1759                 case VREG:
 1760                         tsize = fxdr_unsigned(int32_t, sp->sa_size);
 1761                         if (tsize != -1)
 1762                                 vap->va_size = (u_quad_t)tsize;
 1763                         break;
 1764                 case VCHR:
 1765                 case VBLK:
 1766                 case VFIFO:
 1767                         rdev = fxdr_unsigned(long, sp->sa_size);
 1768                         break;
 1769                 default:
 1770                         break;
 1771                 };
 1772         }
 1773 
 1774         /*
 1775          * Iff doesn't exist, create it
 1776          * otherwise just truncate to 0 length
 1777          *   should I set the mode too ?
 1778          *
 1779          * The only possible error we can have at this point is EEXIST.
 1780          * nd.ni_vp will also be non-NULL in that case.
 1781          */
 1782         if (nd.ni_vp == NULL) {
 1783                 if (vap->va_mode == (mode_t)VNOVAL)
 1784                         vap->va_mode = 0;
 1785                 if (vap->va_type == VREG || vap->va_type == VSOCK) {
 1786                         error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 1787                         if (error)
 1788                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1789                         else {
 1790                                 if (exclusive_flag) {
 1791                                         exclusive_flag = 0;
 1792                                         VATTR_NULL(vap);
 1793                                         vap->va_atime = cverf;
 1794                                         error = VOP_SETATTR(nd.ni_vp, vap, cred,
 1795                                                 td);
 1796                                 }
 1797                         }
 1798                 } else if (vap->va_type == VCHR || vap->va_type == VBLK ||
 1799                     vap->va_type == VFIFO) {
 1800                         /*
 1801                          * NFSv2-specific code for creating device nodes
 1802                          * and fifos.
 1803                          *
 1804                          * Handle SysV FIFO node special cases.  All other
 1805                          * devices require super user to access.
 1806                          */
 1807                         if (vap->va_type == VCHR && rdev == 0xffffffff)
 1808                                 vap->va_type = VFIFO;
 1809                         if (vap->va_type != VFIFO &&
 1810                             (error = suser_cred(cred, 0))) {
 1811                                 goto ereply;
 1812                         }
 1813                         vap->va_rdev = rdev;
 1814                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 1815                         if (error) {
 1816                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1817                                 goto ereply;
 1818                         }
 1819                         vput(nd.ni_vp);
 1820                         nd.ni_vp = NULL;
 1821 
 1822                         /*
 1823                          * release dvp prior to lookup
 1824                          */
 1825                         vput(nd.ni_dvp);
 1826                         nd.ni_dvp = NULL;
 1827                         /*
 1828                          * Setup for lookup.
 1829                          *
 1830                          * Even though LOCKPARENT was cleared, ni_dvp may
 1831                          * be garbage.
 1832                          */
 1833                         nd.ni_cnd.cn_nameiop = LOOKUP;
 1834                         nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
 1835                         nd.ni_cnd.cn_thread = td;
 1836                         nd.ni_cnd.cn_cred = cred;
 1837                         tvfslocked = VFS_LOCK_GIANT(nd.ni_startdir->v_mount);
 1838                         if (tvfslocked)
 1839                                 nd.ni_cnd.cn_flags |= GIANTHELD;
 1840                         error = lookup(&nd);
 1841                         nd.ni_dvp = NULL;
 1842                         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 1843                         nd.ni_cnd.cn_flags &= ~GIANTHELD;
 1844                         if (error)
 1845                                 goto ereply;
 1846 
 1847                         if (nd.ni_cnd.cn_flags & ISSYMLINK) {
 1848                                 error = EINVAL;
 1849                                 goto ereply;
 1850                         }
 1851                 } else {
 1852                         error = ENXIO;
 1853                 }
 1854         } else {
 1855                 if (vap->va_size != -1) {
 1856                         error = nfsrv_access(nd.ni_vp, VWRITE,
 1857                             cred, (nd.ni_cnd.cn_flags & RDONLY), td, 0);
 1858                         if (!error) {
 1859                                 tempsize = vap->va_size;
 1860                                 VATTR_NULL(vap);
 1861                                 vap->va_size = tempsize;
 1862                                 error = VOP_SETATTR(nd.ni_vp, vap, cred,
 1863                                          td);
 1864                         }
 1865                 }
 1866         }
 1867 
 1868         if (!error) {
 1869                 bzero((caddr_t)fhp, sizeof(nfh));
 1870                 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
 1871                 error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
 1872                 if (!error)
 1873                         error = VOP_GETATTR(nd.ni_vp, vap, cred, td);
 1874         }
 1875         if (v3) {
 1876                 if (exclusive_flag && !error &&
 1877                     bcmp(&cverf, &vap->va_atime, sizeof (cverf)))
 1878                         error = EEXIST;
 1879                 if (dirp == nd.ni_dvp)
 1880                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 1881                 else {
 1882                         /* Drop the other locks to avoid deadlock. */
 1883                         if (nd.ni_dvp) {
 1884                                 if (nd.ni_dvp == nd.ni_vp)
 1885                                         vrele(nd.ni_dvp);
 1886                                 else
 1887                                         vput(nd.ni_dvp);
 1888                         }
 1889                         if (nd.ni_vp)
 1890                                 vput(nd.ni_vp);
 1891                         nd.ni_dvp = NULL;
 1892                         nd.ni_vp = NULL;
 1893 
 1894                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 1895                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 1896                         VOP_UNLOCK(dirp, 0, td);
 1897                 }
 1898         }
 1899 ereply:
 1900         nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
 1901         if (v3) {
 1902                 if (!error) {
 1903                         nfsm_srvpostop_fh(fhp);
 1904                         nfsm_srvpostop_attr(0, vap);
 1905                 }
 1906                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1907         } else if (!error) {
 1908                 /* v2 non-error case. */
 1909                 nfsm_srvfhtom(fhp, v3);
 1910                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 1911                 nfsm_srvfillattr(vap, fp);
 1912         }
 1913         error = 0;
 1914 
 1915 nfsmout:
 1916         if (nd.ni_dvp) {
 1917                 if (nd.ni_dvp == nd.ni_vp)
 1918                         vrele(nd.ni_dvp);
 1919                 else
 1920                         vput(nd.ni_dvp);
 1921         }
 1922         if (nd.ni_vp)
 1923                 vput(nd.ni_vp);
 1924         if (nd.ni_startdir) {
 1925                 vrele(nd.ni_startdir);
 1926                 nd.ni_startdir = NULL;
 1927         }
 1928         if (dirp)
 1929                 vrele(dirp);
 1930         NDFREE(&nd, NDF_ONLY_PNBUF);
 1931         vn_finished_write(mp);
 1932         VFS_UNLOCK_GIANT(vfslocked);
 1933         return (error);
 1934 }
 1935 
 1936 /*
 1937  * nfs v3 mknod service
 1938  */
 1939 int
 1940 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1941     struct thread *td, struct mbuf **mrq)
 1942 {
 1943         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1944         struct sockaddr *nam = nfsd->nd_nam;
 1945         caddr_t dpos = nfsd->nd_dpos;
 1946         struct ucred *cred = nfsd->nd_cr;
 1947         struct vattr va, dirfor, diraft;
 1948         struct vattr *vap = &va;
 1949         u_int32_t *tl;
 1950         struct nameidata nd;
 1951         caddr_t bpos;
 1952         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 1953         u_int32_t major, minor;
 1954         enum vtype vtyp;
 1955         struct mbuf *mb, *mreq;
 1956         struct vnode *vp, *dirp = NULL;
 1957         nfsfh_t nfh;
 1958         fhandle_t *fhp;
 1959         struct mount *mp = NULL;
 1960         int v3 = (nfsd->nd_flag & ND_NFSV3);
 1961         int tvfslocked;
 1962         int vfslocked;
 1963 
 1964         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1965         vfslocked = 0;
 1966         if (!v3)
 1967                 panic("nfsrv_mknod: v3 proc called on a v2 connection");
 1968         ndclear(&nd);
 1969 
 1970         fhp = &nfh.fh_generic;
 1971         nfsm_srvmtofh(fhp);
 1972         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 1973                 error = ESTALE;
 1974                 goto ereply;
 1975         }
 1976         vfslocked = VFS_LOCK_GIANT(mp);
 1977         (void) vn_start_write(NULL, &mp, V_WAIT);
 1978         vfs_rel(mp);            /* The write holds a ref. */
 1979         nfsm_srvnamesiz(len);
 1980 
 1981         nd.ni_cnd.cn_cred = cred;
 1982         nd.ni_cnd.cn_nameiop = CREATE;
 1983         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE;
 1984 
 1985         /*
 1986          * Handle nfs_namei() call.  If an error occurs, the nd structure
 1987          * is not valid.  However, nfsm_*() routines may still jump to
 1988          * nfsmout.
 1989          */
 1990 
 1991         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 1992                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 1993         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 1994         if (error) {
 1995                 nfsm_reply(NFSX_WCCDATA(1));
 1996                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1997                 error = 0;
 1998                 goto nfsmout;
 1999         }
 2000         tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
 2001         vtyp = nfsv3tov_type(*tl);
 2002         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
 2003                 error = NFSERR_BADTYPE;
 2004                 goto out;
 2005         }
 2006         VATTR_NULL(vap);
 2007         nfsm_srvsattr(vap);
 2008         if (vtyp == VCHR || vtyp == VBLK) {
 2009                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
 2010                 major = fxdr_unsigned(u_int32_t, *tl++);
 2011                 minor = fxdr_unsigned(u_int32_t, *tl);
 2012                 vap->va_rdev = makedev(major, minor);
 2013         }
 2014 
 2015         /*
 2016          * Iff doesn't exist, create it.
 2017          */
 2018         if (nd.ni_vp) {
 2019                 error = EEXIST;
 2020                 goto out;
 2021         }
 2022         vap->va_type = vtyp;
 2023         if (vap->va_mode == (mode_t)VNOVAL)
 2024                 vap->va_mode = 0;
 2025         if (vtyp == VSOCK) {
 2026                 vrele(nd.ni_startdir);
 2027                 nd.ni_startdir = NULL;
 2028                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 2029                 if (error)
 2030                         NDFREE(&nd, NDF_ONLY_PNBUF);
 2031         } else {
 2032                 if (vtyp != VFIFO && (error = suser_cred(cred, 0)))
 2033                         goto out;
 2034                 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 2035                 if (error) {
 2036                         NDFREE(&nd, NDF_ONLY_PNBUF);
 2037                         goto out;
 2038                 }
 2039                 vput(nd.ni_vp);
 2040                 nd.ni_vp = NULL;
 2041 
 2042                 /*
 2043                  * Release dvp prior to lookup
 2044                  */
 2045                 vput(nd.ni_dvp);
 2046                 nd.ni_dvp = NULL;
 2047 
 2048                 nd.ni_cnd.cn_nameiop = LOOKUP;
 2049                 nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
 2050                 nd.ni_cnd.cn_thread = td;
 2051                 nd.ni_cnd.cn_cred = td->td_ucred;
 2052                 tvfslocked = VFS_LOCK_GIANT(nd.ni_startdir->v_mount);
 2053                 if (tvfslocked)
 2054                         nd.ni_cnd.cn_flags |= GIANTHELD;
 2055                 error = lookup(&nd);
 2056                 nd.ni_dvp = NULL;
 2057                 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 2058                 nd.ni_cnd.cn_flags &= ~GIANTHELD;
 2059 
 2060                 if (error)
 2061                         goto out;
 2062                 if (nd.ni_cnd.cn_flags & ISSYMLINK)
 2063                         error = EINVAL;
 2064         }
 2065 
 2066         /*
 2067          * send response, cleanup, return.
 2068          */
 2069 out:
 2070         vp = nd.ni_vp;
 2071         if (!error) {
 2072                 bzero((caddr_t)fhp, sizeof(nfh));
 2073                 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 2074                 error = VOP_VPTOFH(vp, &fhp->fh_fid);
 2075                 if (!error)
 2076                         error = VOP_GETATTR(vp, vap, cred, td);
 2077         }
 2078         if (nd.ni_dvp) {
 2079                 if (nd.ni_dvp == nd.ni_vp)
 2080                         vrele(nd.ni_dvp);
 2081                 else
 2082                         vput(nd.ni_dvp);
 2083                 nd.ni_dvp = NULL;
 2084         }
 2085         if (vp) {
 2086                 vput(vp);
 2087                 vp = NULL;
 2088                 nd.ni_vp = NULL;
 2089         }
 2090         if (nd.ni_startdir) {
 2091                 vrele(nd.ni_startdir);
 2092                 nd.ni_startdir = NULL;
 2093         }
 2094         NDFREE(&nd, NDF_ONLY_PNBUF);
 2095         if (dirp) {
 2096                 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2097                 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2098                 VOP_UNLOCK(dirp, 0, td);
 2099         }
 2100 ereply:
 2101         nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
 2102         if (v3) {
 2103                 if (!error) {
 2104                         nfsm_srvpostop_fh(fhp);
 2105                         nfsm_srvpostop_attr(0, vap);
 2106                 }
 2107                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2108         }
 2109         vn_finished_write(mp);
 2110         VFS_UNLOCK_GIANT(vfslocked);
 2111         return (0);
 2112 nfsmout:
 2113         if (nd.ni_dvp) {
 2114                 if (nd.ni_dvp == nd.ni_vp)
 2115                         vrele(nd.ni_dvp);
 2116                 else
 2117                         vput(nd.ni_dvp);
 2118         }
 2119         if (nd.ni_vp)
 2120                 vput(nd.ni_vp);
 2121         if (dirp)
 2122                 vrele(dirp);
 2123         if (nd.ni_startdir)
 2124                 vrele(nd.ni_startdir);
 2125         NDFREE(&nd, NDF_ONLY_PNBUF);
 2126         vn_finished_write(mp);
 2127         VFS_UNLOCK_GIANT(vfslocked);
 2128         return (error);
 2129 }
 2130 
 2131 /*
 2132  * nfs remove service
 2133  */
 2134 int
 2135 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2136     struct thread *td, struct mbuf **mrq)
 2137 {
 2138         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2139         struct sockaddr *nam = nfsd->nd_nam;
 2140         caddr_t dpos = nfsd->nd_dpos;
 2141         struct ucred *cred = nfsd->nd_cr;
 2142         struct nameidata nd;
 2143         caddr_t bpos;
 2144         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 2145         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2146         struct mbuf *mb, *mreq;
 2147         struct vnode *dirp;
 2148         struct vattr dirfor, diraft;
 2149         nfsfh_t nfh;
 2150         fhandle_t *fhp;
 2151         struct mount *mp = NULL;
 2152         int vfslocked;
 2153 
 2154         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2155         ndclear(&nd);
 2156         vfslocked = 0;
 2157 
 2158         fhp = &nfh.fh_generic;
 2159         nfsm_srvmtofh(fhp);
 2160         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2161                 error = ESTALE;
 2162                 goto ereply;
 2163         }
 2164         vfslocked = VFS_LOCK_GIANT(mp);
 2165         (void) vn_start_write(NULL, &mp, V_WAIT);
 2166         vfs_rel(mp);            /* The write holds a ref. */
 2167         nfsm_srvnamesiz(len);
 2168 
 2169         nd.ni_cnd.cn_cred = cred;
 2170         nd.ni_cnd.cn_nameiop = DELETE;
 2171         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | MPSAFE;
 2172         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 2173                 &dirp, v3,  &dirfor, &dirfor_ret, td, FALSE);
 2174         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 2175         if (dirp && !v3) {
 2176                 vrele(dirp);
 2177                 dirp = NULL;
 2178         }
 2179         if (error == 0) {
 2180                 if (nd.ni_vp->v_type == VDIR) {
 2181                         error = EPERM;          /* POSIX */
 2182                         goto out;
 2183                 }
 2184                 /*
 2185                  * The root of a mounted filesystem cannot be deleted.
 2186                  */
 2187                 if (nd.ni_vp->v_vflag & VV_ROOT) {
 2188                         error = EBUSY;
 2189                         goto out;
 2190                 }
 2191 out:
 2192                 if (!error) {
 2193                         error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 2194                         NDFREE(&nd, NDF_ONLY_PNBUF);
 2195                 }
 2196         }
 2197         if (dirp && v3) {
 2198                 if (dirp == nd.ni_dvp)
 2199                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2200                 else {
 2201                         /* Drop the other locks to avoid deadlock. */
 2202                         if (nd.ni_dvp) {
 2203                                 if (nd.ni_dvp == nd.ni_vp)
 2204                                         vrele(nd.ni_dvp);
 2205                                 else
 2206                                         vput(nd.ni_dvp);
 2207                         }
 2208                         if (nd.ni_vp)
 2209                                 vput(nd.ni_vp);
 2210                         nd.ni_dvp = NULL;
 2211                         nd.ni_vp = NULL;
 2212 
 2213                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2214                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2215                         VOP_UNLOCK(dirp, 0, td);
 2216                 }
 2217                 vrele(dirp);
 2218                 dirp = NULL;
 2219         }
 2220 ereply:
 2221         nfsm_reply(NFSX_WCCDATA(v3));
 2222         if (v3)
 2223                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2224         error = 0;
 2225 nfsmout:
 2226         NDFREE(&nd, NDF_ONLY_PNBUF);
 2227         if (nd.ni_dvp) {
 2228                 if (nd.ni_dvp == nd.ni_vp)
 2229                         vrele(nd.ni_dvp);
 2230                 else
 2231                         vput(nd.ni_dvp);
 2232         }
 2233         if (nd.ni_vp)
 2234                 vput(nd.ni_vp);
 2235         vn_finished_write(mp);
 2236         VFS_UNLOCK_GIANT(vfslocked);
 2237         return(error);
 2238 }
 2239 
 2240 /*
 2241  * nfs rename service
 2242  */
 2243 int
 2244 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2245     struct thread *td, struct mbuf **mrq)
 2246 {
 2247         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2248         struct sockaddr *nam = nfsd->nd_nam;
 2249         caddr_t dpos = nfsd->nd_dpos;
 2250         struct ucred *cred = nfsd->nd_cr;
 2251         caddr_t bpos;
 2252         int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
 2253         int tdirfor_ret = 1, tdiraft_ret = 1;
 2254         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2255         struct mbuf *mb, *mreq;
 2256         struct nameidata fromnd, tond;
 2257         struct vnode *fvp, *tvp, *tdvp, *fdirp = NULL;
 2258         struct vnode *tdirp = NULL;
 2259         struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
 2260         nfsfh_t fnfh, tnfh;
 2261         fhandle_t *ffhp, *tfhp;
 2262         uid_t saved_uid;
 2263         struct mount *mp = NULL;
 2264         int vfslocked;
 2265 
 2266         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2267         vfslocked = 0;
 2268 #ifndef nolint
 2269         fvp = NULL;
 2270 #endif
 2271         ffhp = &fnfh.fh_generic;
 2272         tfhp = &tnfh.fh_generic;
 2273 
 2274         /*
 2275          * Clear fields incase goto nfsmout occurs from macro.
 2276          */
 2277 
 2278         ndclear(&fromnd);
 2279         ndclear(&tond);
 2280 
 2281         nfsm_srvmtofh(ffhp);
 2282         if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL) {
 2283                 error = ESTALE;
 2284                 goto out1;
 2285         }
 2286         vfslocked = VFS_LOCK_GIANT(mp);
 2287         (void) vn_start_write(NULL, &mp, V_WAIT);
 2288         vfs_rel(mp);            /* The write holds a ref. */
 2289         nfsm_srvnamesiz(len);
 2290         /*
 2291          * Remember our original uid so that we can reset cr_uid before
 2292          * the second nfs_namei() call, in case it is remapped.
 2293          */
 2294         saved_uid = cred->cr_uid;
 2295         fromnd.ni_cnd.cn_cred = cred;
 2296         fromnd.ni_cnd.cn_nameiop = DELETE;
 2297         fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART | MPSAFE;
 2298         error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md,
 2299                 &dpos, &fdirp, v3, &fdirfor, &fdirfor_ret, td, FALSE);
 2300         vfslocked = nfsrv_lockedpair_nd(vfslocked, &fromnd);
 2301         if (fdirp && !v3) {
 2302                 vrele(fdirp);
 2303                 fdirp = NULL;
 2304         }
 2305         if (error) {
 2306                 nfsm_reply(2 * NFSX_WCCDATA(v3));
 2307                 if (v3) {
 2308                         nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
 2309                         nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
 2310                 }
 2311                 error = 0;
 2312                 goto nfsmout;
 2313         }
 2314         fvp = fromnd.ni_vp;
 2315         nfsm_srvmtofh(tfhp);
 2316         nfsm_srvnamesiz(len2);
 2317         cred->cr_uid = saved_uid;
 2318         tond.ni_cnd.cn_cred = cred;
 2319         tond.ni_cnd.cn_nameiop = RENAME;
 2320         tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | MPSAFE;
 2321         error = nfs_namei(&tond, tfhp, len2, slp, nam, &md,
 2322                 &dpos, &tdirp, v3, &tdirfor, &tdirfor_ret, td, FALSE);
 2323         vfslocked = nfsrv_lockedpair_nd(vfslocked, &tond);
 2324         if (tdirp && !v3) {
 2325                 vrele(tdirp);
 2326                 tdirp = NULL;
 2327         }
 2328         if (error)
 2329                 goto out1;
 2330 
 2331         tdvp = tond.ni_dvp;
 2332         tvp = tond.ni_vp;
 2333         if (tvp != NULL) {
 2334                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 2335                         if (v3)
 2336                                 error = EEXIST;
 2337                         else
 2338                                 error = EISDIR;
 2339                         goto out;
 2340                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 2341                         if (v3)
 2342                                 error = EEXIST;
 2343                         else
 2344                                 error = ENOTDIR;
 2345                         goto out;
 2346                 }
 2347                 if (tvp->v_type == VDIR && tvp->v_mountedhere) {
 2348                         if (v3)
 2349                                 error = EXDEV;
 2350                         else
 2351                                 error = ENOTEMPTY;
 2352                         goto out;
 2353                 }
 2354         }
 2355         if (fvp->v_type == VDIR && fvp->v_mountedhere) {
 2356                 if (v3)
 2357                         error = EXDEV;
 2358                 else
 2359                         error = ENOTEMPTY;
 2360                 goto out;
 2361         }
 2362         if (fvp->v_mount != tdvp->v_mount) {
 2363                 if (v3)
 2364                         error = EXDEV;
 2365                 else
 2366                         error = ENOTEMPTY;
 2367                 goto out;
 2368         }
 2369         if (fvp == tdvp) {
 2370                 if (v3)
 2371                         error = EINVAL;
 2372                 else
 2373                         error = ENOTEMPTY;
 2374         }
 2375         /*
 2376          * If source is the same as the destination (that is the
 2377          * same vnode with the same name in the same directory),
 2378          * then there is nothing to do.
 2379          */
 2380         if (fvp == tvp && fromnd.ni_dvp == tdvp &&
 2381             fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
 2382             !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
 2383               fromnd.ni_cnd.cn_namelen))
 2384                 error = -1;
 2385 out:
 2386         if (!error) {
 2387                 /*
 2388                  * The VOP_RENAME function releases all vnode references &
 2389                  * locks prior to returning so we need to clear the pointers
 2390                  * to bypass cleanup code later on.
 2391                  */
 2392                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 2393                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 2394                 fromnd.ni_dvp = NULL;
 2395                 fromnd.ni_vp = NULL;
 2396                 tond.ni_dvp = NULL;
 2397                 tond.ni_vp = NULL;
 2398                 if (error) {
 2399                         NDFREE(&fromnd, NDF_ONLY_PNBUF);
 2400                         NDFREE(&tond, NDF_ONLY_PNBUF);
 2401                 }
 2402         } else {
 2403                 if (error == -1)
 2404                         error = 0;
 2405         }
 2406         /* fall through */
 2407 out1:
 2408         nfsm_reply(2 * NFSX_WCCDATA(v3));
 2409         if (v3) {
 2410                 /* Release existing locks to prevent deadlock. */
 2411                 if (tond.ni_dvp) {
 2412                         if (tond.ni_dvp == tond.ni_vp)
 2413                                 vrele(tond.ni_dvp);
 2414                         else
 2415                                 vput(tond.ni_dvp);
 2416                 }
 2417                 if (tond.ni_vp)
 2418                         vput(tond.ni_vp);
 2419                 tond.ni_dvp = NULL;
 2420                 tond.ni_vp = NULL;
 2421 
 2422                 if (fdirp) {
 2423                         vn_lock(fdirp, LK_EXCLUSIVE | LK_RETRY, td);
 2424                         fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, td);
 2425                         VOP_UNLOCK(fdirp, 0, td);
 2426                 }
 2427                 if (tdirp) {
 2428                         vn_lock(tdirp, LK_EXCLUSIVE | LK_RETRY, td);
 2429                         tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, td);
 2430                         VOP_UNLOCK(tdirp, 0, td);
 2431                 }
 2432                 nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
 2433                 nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
 2434         }
 2435         error = 0;
 2436         /* fall through */
 2437 
 2438 nfsmout:
 2439         /*
 2440          * Clear out tond related fields
 2441          */
 2442         if (tond.ni_dvp) {
 2443                 if (tond.ni_dvp == tond.ni_vp)
 2444                         vrele(tond.ni_dvp);
 2445                 else
 2446                         vput(tond.ni_dvp);
 2447         }
 2448         if (tond.ni_vp)
 2449                 vput(tond.ni_vp);
 2450         if (tdirp)
 2451                 vrele(tdirp);
 2452         if (tond.ni_startdir)
 2453                 vrele(tond.ni_startdir);
 2454         NDFREE(&tond, NDF_ONLY_PNBUF);
 2455         /*
 2456          * Clear out fromnd related fields
 2457          */
 2458         if (fdirp)
 2459                 vrele(fdirp);
 2460         if (fromnd.ni_startdir)
 2461                 vrele(fromnd.ni_startdir);
 2462         NDFREE(&fromnd, NDF_ONLY_PNBUF);
 2463         if (fromnd.ni_dvp)
 2464                 vrele(fromnd.ni_dvp);
 2465         if (fromnd.ni_vp)
 2466                 vrele(fromnd.ni_vp);
 2467 
 2468         vn_finished_write(mp);
 2469         VFS_UNLOCK_GIANT(vfslocked);
 2470         return (error);
 2471 }
 2472 
 2473 /*
 2474  * nfs link service
 2475  */
 2476 int
 2477 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2478     struct thread *td, struct mbuf **mrq)
 2479 {
 2480         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2481         struct sockaddr *nam = nfsd->nd_nam;
 2482         caddr_t dpos = nfsd->nd_dpos;
 2483         struct ucred *cred = nfsd->nd_cr;
 2484         struct nameidata nd;
 2485         caddr_t bpos;
 2486         int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
 2487         int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3);
 2488         struct mbuf *mb, *mreq;
 2489         struct vnode *vp = NULL, *xp, *dirp = NULL;
 2490         struct vattr dirfor, diraft, at;
 2491         nfsfh_t nfh, dnfh;
 2492         fhandle_t *fhp, *dfhp;
 2493         struct mount *mp = NULL;
 2494         int tvfslocked;
 2495         int vfslocked;
 2496 
 2497         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2498         ndclear(&nd);
 2499         vfslocked = 0;
 2500 
 2501         fhp = &nfh.fh_generic;
 2502         dfhp = &dnfh.fh_generic;
 2503         nfsm_srvmtofh(fhp);
 2504         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2505                 error = ESTALE;
 2506                 goto ereply;
 2507         }
 2508         vfslocked = VFS_LOCK_GIANT(mp);
 2509         (void) vn_start_write(NULL, &mp, V_WAIT);
 2510         vfs_rel(mp);            /* The write holds a ref. */
 2511         nfsm_srvmtofh(dfhp);
 2512         nfsm_srvnamesiz(len);
 2513 
 2514         error = nfsrv_fhtovp(fhp, TRUE, &vp, &tvfslocked, cred, slp,
 2515             nam, &rdonly, TRUE);
 2516         vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
 2517         if (error) {
 2518                 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2519                 if (v3) {
 2520                         nfsm_srvpostop_attr(getret, &at);
 2521                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2522                 }
 2523                 vp = NULL;
 2524                 error = 0;
 2525                 goto nfsmout;
 2526         }
 2527         if (v3)
 2528                 getret = VOP_GETATTR(vp, &at, cred, td);
 2529         if (vp->v_type == VDIR) {
 2530                 error = EPERM;          /* POSIX */
 2531                 goto out1;
 2532         }
 2533         VOP_UNLOCK(vp, 0, td);
 2534         nd.ni_cnd.cn_cred = cred;
 2535         nd.ni_cnd.cn_nameiop = CREATE;
 2536         nd.ni_cnd.cn_flags = LOCKPARENT | MPSAFE | MPSAFE;
 2537         error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos,
 2538                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 2539         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 2540         if (dirp && !v3) {
 2541                 vrele(dirp);
 2542                 dirp = NULL;
 2543         }
 2544         if (error) {
 2545                 vrele(vp);
 2546                 vp = NULL;
 2547                 goto out2;
 2548         }
 2549         xp = nd.ni_vp;
 2550         if (xp != NULL) {
 2551                 error = EEXIST;
 2552                 vrele(vp);
 2553                 vp = NULL;
 2554                 goto out2;
 2555         }
 2556         xp = nd.ni_dvp;
 2557         if (vp->v_mount != xp->v_mount) {
 2558                 error = EXDEV;
 2559                 vrele(vp);
 2560                 vp = NULL;
 2561                 goto out2;
 2562         }
 2563         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 2564         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 2565         NDFREE(&nd, NDF_ONLY_PNBUF);
 2566         /* fall through */
 2567 
 2568 out1:
 2569         if (v3)
 2570                 getret = VOP_GETATTR(vp, &at, cred, td);
 2571 out2:
 2572         if (dirp) {
 2573                 if (dirp == nd.ni_dvp)
 2574                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2575                 else {
 2576                         /* Release existing locks to prevent deadlock. */
 2577                         if (nd.ni_dvp) {
 2578                                 if (nd.ni_dvp == nd.ni_vp)
 2579                                         vrele(nd.ni_dvp);
 2580                                 else
 2581                                         vput(nd.ni_dvp);
 2582                         }
 2583                         if (nd.ni_vp)
 2584                                 vrele(nd.ni_vp);
 2585                         nd.ni_dvp = NULL;
 2586                         nd.ni_vp = NULL;
 2587 
 2588                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2589                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2590                         VOP_UNLOCK(dirp, 0, td);
 2591                 }
 2592         }
 2593 ereply:
 2594         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2595         if (v3) {
 2596                 nfsm_srvpostop_attr(getret, &at);
 2597                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2598         }
 2599         error = 0;
 2600         /* fall through */
 2601 
 2602 nfsmout:
 2603         NDFREE(&nd, NDF_ONLY_PNBUF);
 2604         if (vp)
 2605                 vput(vp);
 2606         if (nd.ni_dvp) {
 2607                 if (nd.ni_dvp == nd.ni_vp)
 2608                         vrele(nd.ni_dvp);
 2609                 else
 2610                         vput(nd.ni_dvp);
 2611         }
 2612         if (dirp)
 2613                 vrele(dirp);
 2614         if (nd.ni_vp)
 2615                 vrele(nd.ni_vp);
 2616         vn_finished_write(mp);
 2617         VFS_UNLOCK_GIANT(vfslocked);
 2618         return(error);
 2619 }
 2620 
 2621 /*
 2622  * nfs symbolic link service
 2623  */
 2624 int
 2625 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2626     struct thread *td, struct mbuf **mrq)
 2627 {
 2628         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2629         struct sockaddr *nam = nfsd->nd_nam;
 2630         caddr_t dpos = nfsd->nd_dpos;
 2631         struct ucred *cred = nfsd->nd_cr;
 2632         struct vattr va, dirfor, diraft;
 2633         struct nameidata nd;
 2634         struct vattr *vap = &va;
 2635         struct nfsv2_sattr *sp;
 2636         char *bpos, *pathcp = NULL;
 2637         struct uio io;
 2638         struct iovec iv;
 2639         int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
 2640         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2641         struct mbuf *mb, *mreq;
 2642         struct vnode *dirp = NULL;
 2643         nfsfh_t nfh;
 2644         fhandle_t *fhp;
 2645         struct mount *mp = NULL;
 2646         int tvfslocked;
 2647         int vfslocked;
 2648 
 2649         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2650         ndclear(&nd);
 2651         vfslocked = 0;
 2652 
 2653         fhp = &nfh.fh_generic;
 2654         nfsm_srvmtofh(fhp);
 2655         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2656                 error = ESTALE;
 2657                 goto out;
 2658         }
 2659         vfslocked = VFS_LOCK_GIANT(mp);
 2660         (void) vn_start_write(NULL, &mp, V_WAIT);
 2661         vfs_rel(mp);            /* The write holds a ref. */
 2662         nfsm_srvnamesiz(len);
 2663         nd.ni_cnd.cn_cred = cred;
 2664         nd.ni_cnd.cn_nameiop = CREATE;
 2665         nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART | MPSAFE;
 2666         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 2667                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 2668         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 2669         if (error == 0) {
 2670                 VATTR_NULL(vap);
 2671                 if (v3)
 2672                         nfsm_srvsattr(vap);
 2673                 nfsm_srvpathsiz(len2);
 2674         }
 2675         if (dirp && !v3) {
 2676                 vrele(dirp);
 2677                 dirp = NULL;
 2678         }
 2679         if (error)
 2680                 goto out;
 2681         MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
 2682         iv.iov_base = pathcp;
 2683         iv.iov_len = len2;
 2684         io.uio_resid = len2;
 2685         io.uio_offset = 0;
 2686         io.uio_iov = &iv;
 2687         io.uio_iovcnt = 1;
 2688         io.uio_segflg = UIO_SYSSPACE;
 2689         io.uio_rw = UIO_READ;
 2690         io.uio_td = NULL;
 2691         nfsm_mtouio(&io, len2);
 2692         if (!v3) {
 2693                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
 2694                 vap->va_mode = nfstov_mode(sp->sa_mode);
 2695         }
 2696         *(pathcp + len2) = '\0';
 2697         if (nd.ni_vp) {
 2698                 error = EEXIST;
 2699                 goto out;
 2700         }
 2701 
 2702         /*
 2703          * issue symlink op.  SAVESTART is set so the underlying path component
 2704          * is only freed by the VOP if an error occurs.
 2705          */
 2706         if (vap->va_mode == (mode_t)VNOVAL)
 2707                 vap->va_mode = 0;
 2708         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
 2709         if (error)
 2710                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2711         else
 2712                 vput(nd.ni_vp);
 2713         nd.ni_vp = NULL;
 2714         /*
 2715          * releases directory prior to potential lookup op.
 2716          */
 2717         vput(nd.ni_dvp);
 2718         nd.ni_dvp = NULL;
 2719 
 2720         if (error == 0) {
 2721             if (v3) {
 2722                 /*
 2723                  * Issue lookup.  Leave SAVESTART set so we can easily free
 2724                  * the name buffer later on.
 2725                  *
 2726                  * since LOCKPARENT is not set, ni_dvp will be garbage on
 2727                  * return whether an error occurs or not.
 2728                  */
 2729                 nd.ni_cnd.cn_nameiop = LOOKUP;
 2730                 nd.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW);
 2731                 nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF);
 2732                 nd.ni_cnd.cn_thread = td;
 2733                 nd.ni_cnd.cn_cred = cred;
 2734                 tvfslocked = VFS_LOCK_GIANT(nd.ni_startdir->v_mount);
 2735                 if (tvfslocked)
 2736                         nd.ni_cnd.cn_flags |= GIANTHELD;
 2737                 error = lookup(&nd);
 2738                 nd.ni_dvp = NULL;
 2739                 vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 2740                 nd.ni_cnd.cn_flags &= ~GIANTHELD;
 2741 
 2742                 if (error == 0) {
 2743                         bzero((caddr_t)fhp, sizeof(nfh));
 2744                         fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
 2745                         error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
 2746                         if (!error)
 2747                                 error = VOP_GETATTR(nd.ni_vp, vap, cred,
 2748                                         td);
 2749                         vput(nd.ni_vp);
 2750                         nd.ni_vp = NULL;
 2751                 }
 2752             }
 2753         }
 2754 out:
 2755         /*
 2756          * These releases aren't strictly required, does even doing them
 2757          * make any sense? XXX can nfsm_reply() block?
 2758          */
 2759         if (pathcp) {
 2760                 FREE(pathcp, M_TEMP);
 2761                 pathcp = NULL;
 2762         }
 2763         if (dirp) {
 2764                 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2765                 diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2766                 VOP_UNLOCK(dirp, 0, td);
 2767         }
 2768         if (nd.ni_startdir) {
 2769                 vrele(nd.ni_startdir);
 2770                 nd.ni_startdir = NULL;
 2771         }
 2772         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2773         if (v3) {
 2774                 if (!error) {
 2775                         nfsm_srvpostop_fh(fhp);
 2776                         nfsm_srvpostop_attr(0, vap);
 2777                 }
 2778                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2779         }
 2780         error = 0;
 2781         /* fall through */
 2782 
 2783 nfsmout:
 2784         NDFREE(&nd, NDF_ONLY_PNBUF);
 2785         if (nd.ni_dvp) {
 2786                 if (nd.ni_dvp == nd.ni_vp)
 2787                         vrele(nd.ni_dvp);
 2788                 else
 2789                         vput(nd.ni_dvp);
 2790         }
 2791         if (nd.ni_vp)
 2792                 vrele(nd.ni_vp);
 2793         if (nd.ni_startdir)
 2794                 vrele(nd.ni_startdir);
 2795         if (dirp)
 2796                 vrele(dirp);
 2797         if (pathcp)
 2798                 FREE(pathcp, M_TEMP);
 2799 
 2800         vn_finished_write(mp);
 2801         VFS_UNLOCK_GIANT(vfslocked);
 2802         return (error);
 2803 }
 2804 
 2805 /*
 2806  * nfs mkdir service
 2807  */
 2808 int
 2809 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2810     struct thread *td, struct mbuf **mrq)
 2811 {
 2812         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2813         struct sockaddr *nam = nfsd->nd_nam;
 2814         caddr_t dpos = nfsd->nd_dpos;
 2815         struct ucred *cred = nfsd->nd_cr;
 2816         struct vattr va, dirfor, diraft;
 2817         struct vattr *vap = &va;
 2818         struct nfs_fattr *fp;
 2819         struct nameidata nd;
 2820         u_int32_t *tl;
 2821         caddr_t bpos;
 2822         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 2823         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2824         struct mbuf *mb, *mreq;
 2825         struct vnode *dirp = NULL;
 2826         int vpexcl = 0;
 2827         nfsfh_t nfh;
 2828         fhandle_t *fhp;
 2829         struct mount *mp = NULL;
 2830         int vfslocked;
 2831 
 2832         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2833         ndclear(&nd);
 2834         vfslocked = 0;
 2835 
 2836         fhp = &nfh.fh_generic;
 2837         nfsm_srvmtofh(fhp);
 2838         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2839                 error = ESTALE;
 2840                 goto out;
 2841         }
 2842         vfslocked = VFS_LOCK_GIANT(mp);
 2843         (void) vn_start_write(NULL, &mp, V_WAIT);
 2844         vfs_rel(mp);            /* The write holds a ref. */
 2845         nfsm_srvnamesiz(len);
 2846         nd.ni_cnd.cn_cred = cred;
 2847         nd.ni_cnd.cn_nameiop = CREATE;
 2848         nd.ni_cnd.cn_flags = LOCKPARENT | MPSAFE;
 2849 
 2850         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 2851                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 2852         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 2853         if (dirp && !v3) {
 2854                 vrele(dirp);
 2855                 dirp = NULL;
 2856         }
 2857         if (error) {
 2858                 nfsm_reply(NFSX_WCCDATA(v3));
 2859                 if (v3)
 2860                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2861                 error = 0;
 2862                 goto nfsmout;
 2863         }
 2864         VATTR_NULL(vap);
 2865         if (v3) {
 2866                 nfsm_srvsattr(vap);
 2867         } else {
 2868                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
 2869                 vap->va_mode = nfstov_mode(*tl++);
 2870         }
 2871 
 2872         /*
 2873          * At this point nd.ni_dvp is referenced and exclusively locked and
 2874          * nd.ni_vp, if it exists, is referenced but not locked.
 2875          */
 2876 
 2877         vap->va_type = VDIR;
 2878         if (nd.ni_vp != NULL) {
 2879                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2880                 error = EEXIST;
 2881                 goto out;
 2882         }
 2883 
 2884         /*
 2885          * Issue mkdir op.  Since SAVESTART is not set, the pathname
 2886          * component is freed by the VOP call.  This will fill-in
 2887          * nd.ni_vp, reference, and exclusively lock it.
 2888          */
 2889         if (vap->va_mode == (mode_t)VNOVAL)
 2890                 vap->va_mode = 0;
 2891         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 2892         NDFREE(&nd, NDF_ONLY_PNBUF);
 2893         vpexcl = 1;
 2894 
 2895         vput(nd.ni_dvp);
 2896         nd.ni_dvp = NULL;
 2897 
 2898         if (!error) {
 2899                 bzero((caddr_t)fhp, sizeof(nfh));
 2900                 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
 2901                 error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
 2902                 if (!error)
 2903                         error = VOP_GETATTR(nd.ni_vp, vap, cred, td);
 2904         }
 2905 out:
 2906         if (dirp) {
 2907                 if (dirp == nd.ni_dvp) {
 2908                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2909                 } else {
 2910                         /* Release existing locks to prevent deadlock. */
 2911                         if (nd.ni_dvp) {
 2912                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2913                                 if (nd.ni_dvp == nd.ni_vp && vpexcl)
 2914                                         vrele(nd.ni_dvp);
 2915                                 else
 2916                                         vput(nd.ni_dvp);
 2917                         }
 2918                         if (nd.ni_vp) {
 2919                                 if (vpexcl)
 2920                                         vput(nd.ni_vp);
 2921                                 else
 2922                                         vrele(nd.ni_vp);
 2923                         }
 2924                         nd.ni_dvp = NULL;
 2925                         nd.ni_vp = NULL;
 2926                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 2927                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 2928                         VOP_UNLOCK(dirp, 0, td);
 2929                 }
 2930         }
 2931         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2932         if (v3) {
 2933                 if (!error) {
 2934                         nfsm_srvpostop_fh(fhp);
 2935                         nfsm_srvpostop_attr(0, vap);
 2936                 }
 2937                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2938         } else if (!error) {
 2939                 /* v2 non-error case. */
 2940                 nfsm_srvfhtom(fhp, v3);
 2941                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 2942                 nfsm_srvfillattr(vap, fp);
 2943         }
 2944         error = 0;
 2945         /* fall through */
 2946 
 2947 nfsmout:
 2948         if (nd.ni_dvp) {
 2949                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2950                 if (nd.ni_dvp == nd.ni_vp && vpexcl)
 2951                         vrele(nd.ni_dvp);
 2952                 else
 2953                         vput(nd.ni_dvp);
 2954         }
 2955         if (nd.ni_vp) {
 2956                 if (vpexcl)
 2957                         vput(nd.ni_vp);
 2958                 else
 2959                         vrele(nd.ni_vp);
 2960         }
 2961         if (dirp)
 2962                 vrele(dirp);
 2963         vn_finished_write(mp);
 2964         VFS_UNLOCK_GIANT(vfslocked);
 2965         return (error);
 2966 }
 2967 
 2968 /*
 2969  * nfs rmdir service
 2970  */
 2971 int
 2972 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2973     struct thread *td, struct mbuf **mrq)
 2974 {
 2975         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2976         struct sockaddr *nam = nfsd->nd_nam;
 2977         caddr_t dpos = nfsd->nd_dpos;
 2978         struct ucred *cred = nfsd->nd_cr;
 2979         caddr_t bpos;
 2980         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 2981         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2982         struct mbuf *mb, *mreq;
 2983         struct vnode *vp, *dirp = NULL;
 2984         struct vattr dirfor, diraft;
 2985         nfsfh_t nfh;
 2986         fhandle_t *fhp;
 2987         struct nameidata nd;
 2988         struct mount *mp = NULL;
 2989         int vfslocked;
 2990 
 2991         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2992         ndclear(&nd);
 2993         vfslocked = 0;
 2994 
 2995         fhp = &nfh.fh_generic;
 2996         nfsm_srvmtofh(fhp);
 2997         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2998                 error = ESTALE;
 2999                 goto out;
 3000         }
 3001         vfslocked = VFS_LOCK_GIANT(mp);
 3002         (void) vn_start_write(NULL, &mp, V_WAIT);
 3003         vfs_rel(mp);            /* The write holds a ref. */
 3004         nfsm_srvnamesiz(len);
 3005         nd.ni_cnd.cn_cred = cred;
 3006         nd.ni_cnd.cn_nameiop = DELETE;
 3007         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | MPSAFE;
 3008         error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos,
 3009                 &dirp, v3, &dirfor, &dirfor_ret, td, FALSE);
 3010         vfslocked = nfsrv_lockedpair_nd(vfslocked, &nd);
 3011         if (dirp && !v3) {
 3012                 vrele(dirp);
 3013                 dirp = NULL;
 3014         }
 3015         if (error) {
 3016                 nfsm_reply(NFSX_WCCDATA(v3));
 3017                 if (v3)
 3018                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 3019                 error = 0;
 3020                 goto nfsmout;
 3021         }
 3022         vp = nd.ni_vp;
 3023         if (vp->v_type != VDIR) {
 3024                 error = ENOTDIR;
 3025                 goto out;
 3026         }
 3027         /*
 3028          * No rmdir "." please.
 3029          */
 3030         if (nd.ni_dvp == vp) {
 3031                 error = EINVAL;
 3032                 goto out;
 3033         }
 3034         /*
 3035          * The root of a mounted filesystem cannot be deleted.
 3036          */
 3037         if (vp->v_vflag & VV_ROOT)
 3038                 error = EBUSY;
 3039 out:
 3040         /*
 3041          * Issue or abort op.  Since SAVESTART is not set, path name
 3042          * component is freed by the VOP after either.
 3043          */
 3044         if (!error)
 3045                 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 3046         NDFREE(&nd, NDF_ONLY_PNBUF);
 3047 
 3048         if (dirp) {
 3049                 if (dirp == nd.ni_dvp)
 3050                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 3051                 else {
 3052                         /* Release existing locks to prevent deadlock. */
 3053                         if (nd.ni_dvp) {
 3054                                 if (nd.ni_dvp == nd.ni_vp)
 3055                                         vrele(nd.ni_dvp);
 3056                                 else
 3057                                         vput(nd.ni_dvp);
 3058                         }
 3059                         if (nd.ni_vp)
 3060                                 vput(nd.ni_vp);
 3061                         nd.ni_dvp = NULL;
 3062                         nd.ni_vp = NULL;
 3063                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY, td);
 3064                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred, td);
 3065                         VOP_UNLOCK(dirp, 0, td);
 3066                 }
 3067         }
 3068         nfsm_reply(NFSX_WCCDATA(v3));
 3069         error = 0;
 3070         if (v3)
 3071                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 3072         /* fall through */
 3073 
 3074 nfsmout:
 3075         NDFREE(&nd, NDF_ONLY_PNBUF);
 3076         if (nd.ni_dvp) {
 3077                 if (nd.ni_dvp == nd.ni_vp)
 3078                         vrele(nd.ni_dvp);
 3079                 else
 3080                         vput(nd.ni_dvp);
 3081         }
 3082         if (nd.ni_vp)
 3083                 vput(nd.ni_vp);
 3084         if (dirp)
 3085                 vrele(dirp);
 3086 
 3087         vn_finished_write(mp);
 3088         VFS_UNLOCK_GIANT(vfslocked);
 3089         return(error);
 3090 }
 3091 
 3092 /*
 3093  * nfs readdir service
 3094  * - mallocs what it thinks is enough to read
 3095  *      count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
 3096  * - calls VOP_READDIR()
 3097  * - loops around building the reply
 3098  *      if the output generated exceeds count break out of loop
 3099  *      The nfsm_clget macro is used here so that the reply will be packed
 3100  *      tightly in mbuf clusters.
 3101  * - it only knows that it has encountered eof when the VOP_READDIR()
 3102  *      reads nothing
 3103  * - as such one readdir rpc will return eof false although you are there
 3104  *      and then the next will return eof
 3105  * - it trims out records with d_fileno == 0
 3106  *      this doesn't matter for Unix clients, but they might confuse clients
 3107  *      for other os'.
 3108  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
 3109  *      than requested, but this may not apply to all filesystems. For
 3110  *      example, client NFS does not { although it is never remote mounted
 3111  *      anyhow }
 3112  *     The alternate call nfsrv_readdirplus() does lookups as well.
 3113  * PS: The NFS protocol spec. does not clarify what the "count" byte
 3114  *      argument is a count of.. just name strings and file id's or the
 3115  *      entire reply rpc or ...
 3116  *      I tried just file name and id sizes and it confused the Sun client,
 3117  *      so I am using the full rpc size now. The "paranoia.." comment refers
 3118  *      to including the status longwords that are not a part of the dir.
 3119  *      "entry" structures, but are in the rpc.
 3120  */
 3121 struct flrep {
 3122         nfsuint64       fl_off;
 3123         u_int32_t       fl_postopok;
 3124         u_int32_t       fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
 3125         u_int32_t       fl_fhok;
 3126         u_int32_t       fl_fhsize;
 3127         u_int32_t       fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
 3128 };
 3129 
 3130 int
 3131 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3132     struct thread *td, struct mbuf **mrq)
 3133 {
 3134         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3135         struct sockaddr *nam = nfsd->nd_nam;
 3136         caddr_t dpos = nfsd->nd_dpos;
 3137         struct ucred *cred = nfsd->nd_cr;
 3138         char *bp, *be;
 3139         struct mbuf *mp;
 3140         struct dirent *dp;
 3141         caddr_t cp;
 3142         u_int32_t *tl;
 3143         caddr_t bpos;
 3144         struct mbuf *mb, *mreq;
 3145         char *cpos, *cend, *rbuf;
 3146         struct vnode *vp = NULL;
 3147         struct vattr at;
 3148         nfsfh_t nfh;
 3149         fhandle_t *fhp;
 3150         struct uio io;
 3151         struct iovec iv;
 3152         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
 3153         int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
 3154         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3155         u_quad_t off, toff, verf;
 3156         u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
 3157         int vfslocked;
 3158 
 3159         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3160         vfslocked = 0;
 3161         fhp = &nfh.fh_generic;
 3162         nfsm_srvmtofh(fhp);
 3163         if (v3) {
 3164                 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
 3165                 toff = fxdr_hyper(tl);
 3166                 tl += 2;
 3167                 verf = fxdr_hyper(tl);
 3168                 tl += 2;
 3169         } else {
 3170                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
 3171                 toff = fxdr_unsigned(u_quad_t, *tl++);
 3172                 verf = 0;       /* shut up gcc */
 3173         }
 3174         off = toff;
 3175         cnt = fxdr_unsigned(int, *tl);
 3176         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 3177         xfer = NFS_SRVMAXDATA(nfsd);
 3178         if (cnt > xfer)
 3179                 cnt = xfer;
 3180         if (siz > xfer)
 3181                 siz = xfer;
 3182         fullsiz = siz;
 3183         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
 3184             nam, &rdonly, TRUE);
 3185         if (!error && vp->v_type != VDIR) {
 3186                 error = ENOTDIR;
 3187                 vput(vp);
 3188                 vp = NULL;
 3189         }
 3190         if (error) {
 3191                 nfsm_reply(NFSX_UNSIGNED);
 3192                 if (v3)
 3193                         nfsm_srvpostop_attr(getret, &at);
 3194                 error = 0;
 3195                 goto nfsmout;
 3196         }
 3197 
 3198         /*
 3199          * Obtain lock on vnode for this section of the code
 3200          */
 3201         if (v3) {
 3202                 error = getret = VOP_GETATTR(vp, &at, cred, td);
 3203 #if 0
 3204                 /*
 3205                  * XXX This check may be too strict for Solaris 2.5 clients.
 3206                  */
 3207                 if (!error && toff && verf && verf != at.va_filerev)
 3208                         error = NFSERR_BAD_COOKIE;
 3209 #endif
 3210         }
 3211         if (!error)
 3212                 error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
 3213         if (error) {
 3214                 vput(vp);
 3215                 vp = NULL;
 3216                 nfsm_reply(NFSX_POSTOPATTR(v3));
 3217                 if (v3)
 3218                         nfsm_srvpostop_attr(getret, &at);
 3219                 error = 0;
 3220                 goto nfsmout;
 3221         }
 3222         VOP_UNLOCK(vp, 0, td);
 3223 
 3224         /*
 3225          * end section.  Allocate rbuf and continue
 3226          */
 3227         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 3228 again:
 3229         iv.iov_base = rbuf;
 3230         iv.iov_len = fullsiz;
 3231         io.uio_iov = &iv;
 3232         io.uio_iovcnt = 1;
 3233         io.uio_offset = (off_t)off;
 3234         io.uio_resid = fullsiz;
 3235         io.uio_segflg = UIO_SYSSPACE;
 3236         io.uio_rw = UIO_READ;
 3237         io.uio_td = NULL;
 3238         eofflag = 0;
 3239         vn_lock(vp, LK_SHARED | LK_RETRY, td);
 3240         if (cookies) {
 3241                 free((caddr_t)cookies, M_TEMP);
 3242                 cookies = NULL;
 3243         }
 3244         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
 3245         off = (off_t)io.uio_offset;
 3246         if (!cookies && !error)
 3247                 error = NFSERR_PERM;
 3248         if (v3) {
 3249                 getret = VOP_GETATTR(vp, &at, cred, td);
 3250                 if (!error)
 3251                         error = getret;
 3252         }
 3253         VOP_UNLOCK(vp, 0, td);
 3254         if (error) {
 3255                 vrele(vp);
 3256                 vp = NULL;
 3257                 free((caddr_t)rbuf, M_TEMP);
 3258                 if (cookies)
 3259                         free((caddr_t)cookies, M_TEMP);
 3260                 nfsm_reply(NFSX_POSTOPATTR(v3));
 3261                 if (v3)
 3262                         nfsm_srvpostop_attr(getret, &at);
 3263                 error = 0;
 3264                 goto nfsmout;
 3265         }
 3266         if (io.uio_resid) {
 3267                 siz -= io.uio_resid;
 3268 
 3269                 /*
 3270                  * If nothing read, return eof
 3271                  * rpc reply
 3272                  */
 3273                 if (siz == 0) {
 3274                         vrele(vp);
 3275                         vp = NULL;
 3276                         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) +
 3277                                 2 * NFSX_UNSIGNED);
 3278                         if (v3) {
 3279                                 nfsm_srvpostop_attr(getret, &at);
 3280                                 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 3281                                 txdr_hyper(at.va_filerev, tl);
 3282                                 tl += 2;
 3283                         } else
 3284                                 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 3285                         *tl++ = nfsrv_nfs_false;
 3286                         *tl = nfsrv_nfs_true;
 3287                         FREE((caddr_t)rbuf, M_TEMP);
 3288                         FREE((caddr_t)cookies, M_TEMP);
 3289                         error = 0;
 3290                         goto nfsmout;
 3291                 }
 3292         }
 3293 
 3294         /*
 3295          * Check for degenerate cases of nothing useful read.
 3296          * If so go try again
 3297          */
 3298         cpos = rbuf;
 3299         cend = rbuf + siz;
 3300         dp = (struct dirent *)cpos;
 3301         cookiep = cookies;
 3302         /*
 3303          * For some reason FreeBSD's ufs_readdir() chooses to back the
 3304          * directory offset up to a block boundary, so it is necessary to
 3305          * skip over the records that precede the requested offset. This
 3306          * requires the assumption that file offset cookies monotonically
 3307          * increase.
 3308          */
 3309         while (cpos < cend && ncookies > 0 &&
 3310                 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 3311                  ((u_quad_t)(*cookiep)) <= toff)) {
 3312                 cpos += dp->d_reclen;
 3313                 dp = (struct dirent *)cpos;
 3314                 cookiep++;
 3315                 ncookies--;
 3316         }
 3317         if (cpos >= cend || ncookies == 0) {
 3318                 toff = off;
 3319                 siz = fullsiz;
 3320                 goto again;
 3321         }
 3322 
 3323         len = 3 * NFSX_UNSIGNED;        /* paranoia, probably can be 0 */
 3324         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
 3325         if (v3) {
 3326                 nfsm_srvpostop_attr(getret, &at);
 3327                 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 3328                 txdr_hyper(at.va_filerev, tl);
 3329         }
 3330         mp = mb;
 3331         bp = bpos;
 3332         be = bp + M_TRAILINGSPACE(mp);
 3333 
 3334         /* Loop through the records and build reply */
 3335         while (cpos < cend && ncookies > 0) {
 3336                 if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
 3337                         nlen = dp->d_namlen;
 3338                         rem = nfsm_rndup(nlen) - nlen;
 3339                         len += (4 * NFSX_UNSIGNED + nlen + rem);
 3340                         if (v3)
 3341                                 len += 2 * NFSX_UNSIGNED;
 3342                         if (len > cnt) {
 3343                                 eofflag = 0;
 3344                                 break;
 3345                         }
 3346                         /*
 3347                          * Build the directory record xdr from
 3348                          * the dirent entry.
 3349                          */
 3350                         nfsm_clget;
 3351                         *tl = nfsrv_nfs_true;
 3352                         bp += NFSX_UNSIGNED;
 3353                         if (v3) {
 3354                                 nfsm_clget;
 3355                                 *tl = 0;
 3356                                 bp += NFSX_UNSIGNED;
 3357                         }
 3358                         nfsm_clget;
 3359                         *tl = txdr_unsigned(dp->d_fileno);
 3360                         bp += NFSX_UNSIGNED;
 3361                         nfsm_clget;
 3362                         *tl = txdr_unsigned(nlen);
 3363                         bp += NFSX_UNSIGNED;
 3364 
 3365                         /* And loop around copying the name */
 3366                         xfer = nlen;
 3367                         cp = dp->d_name;
 3368                         while (xfer > 0) {
 3369                                 nfsm_clget;
 3370                                 if ((bp+xfer) > be)
 3371                                         tsiz = be-bp;
 3372                                 else
 3373                                         tsiz = xfer;
 3374                                 bcopy(cp, bp, tsiz);
 3375                                 bp += tsiz;
 3376                                 xfer -= tsiz;
 3377                                 if (xfer > 0)
 3378                                         cp += tsiz;
 3379                         }
 3380                         /* And null pad to an int32_t boundary. */
 3381                         for (i = 0; i < rem; i++)
 3382                                 *bp++ = '\0';
 3383                         nfsm_clget;
 3384 
 3385                         /* Finish off the record */
 3386                         if (v3) {
 3387                                 *tl = 0;
 3388                                 bp += NFSX_UNSIGNED;
 3389                                 nfsm_clget;
 3390                         }
 3391                         *tl = txdr_unsigned(*cookiep);
 3392                         bp += NFSX_UNSIGNED;
 3393                 }
 3394                 cpos += dp->d_reclen;
 3395                 dp = (struct dirent *)cpos;
 3396                 cookiep++;
 3397                 ncookies--;
 3398         }
 3399         vrele(vp);
 3400         vp = NULL;
 3401         nfsm_clget;
 3402         *tl = nfsrv_nfs_false;
 3403         bp += NFSX_UNSIGNED;
 3404         nfsm_clget;
 3405         if (eofflag)
 3406                 *tl = nfsrv_nfs_true;
 3407         else
 3408                 *tl = nfsrv_nfs_false;
 3409         bp += NFSX_UNSIGNED;
 3410         if (mp != mb) {
 3411                 if (bp < be)
 3412                         mp->m_len = bp - mtod(mp, caddr_t);
 3413         } else
 3414                 mp->m_len += bp - bpos;
 3415         FREE((caddr_t)rbuf, M_TEMP);
 3416         FREE((caddr_t)cookies, M_TEMP);
 3417 
 3418 nfsmout:
 3419         if (vp)
 3420                 vrele(vp);
 3421         VFS_UNLOCK_GIANT(vfslocked);
 3422         return(error);
 3423 }
 3424 
 3425 int
 3426 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3427     struct thread *td, struct mbuf **mrq)
 3428 {
 3429         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3430         struct sockaddr *nam = nfsd->nd_nam;
 3431         caddr_t dpos = nfsd->nd_dpos;
 3432         struct ucred *cred = nfsd->nd_cr;
 3433         char *bp, *be;
 3434         struct mbuf *mp;
 3435         struct dirent *dp;
 3436         caddr_t cp;
 3437         u_int32_t *tl;
 3438         caddr_t bpos;
 3439         struct mbuf *mb, *mreq;
 3440         char *cpos, *cend, *rbuf;
 3441         struct vnode *vp = NULL, *nvp;
 3442         struct flrep fl;
 3443         nfsfh_t nfh;
 3444         fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
 3445         struct uio io;
 3446         struct iovec iv;
 3447         struct vattr va, at, *vap = &va;
 3448         struct nfs_fattr *fp;
 3449         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
 3450         int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
 3451         u_quad_t off, toff, verf;
 3452         u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
 3453         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3454         int vfslocked;
 3455 
 3456         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3457         vfslocked = 0;
 3458         if (!v3)
 3459                 panic("nfsrv_readdirplus: v3 proc called on a v2 connection");
 3460         fhp = &nfh.fh_generic;
 3461         nfsm_srvmtofh(fhp);
 3462         tl = nfsm_dissect_nonblock(u_int32_t *, 6 * NFSX_UNSIGNED);
 3463         toff = fxdr_hyper(tl);
 3464         tl += 2;
 3465         verf = fxdr_hyper(tl);
 3466         tl += 2;
 3467         siz = fxdr_unsigned(int, *tl++);
 3468         cnt = fxdr_unsigned(int, *tl);
 3469         off = toff;
 3470         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 3471         xfer = NFS_SRVMAXDATA(nfsd);
 3472         if (cnt > xfer)
 3473                 cnt = xfer;
 3474         if (siz > xfer)
 3475                 siz = xfer;
 3476         fullsiz = siz;
 3477         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
 3478             nam, &rdonly, TRUE);
 3479         if (!error && vp->v_type != VDIR) {
 3480                 error = ENOTDIR;
 3481                 vput(vp);
 3482                 vp = NULL;
 3483         }
 3484         if (error) {
 3485                 nfsm_reply(NFSX_UNSIGNED);
 3486                 nfsm_srvpostop_attr(getret, &at);
 3487                 error = 0;
 3488                 goto nfsmout;
 3489         }
 3490         error = getret = VOP_GETATTR(vp, &at, cred, td);
 3491 #if 0
 3492         /*
 3493          * XXX This check may be too strict for Solaris 2.5 clients.
 3494          */
 3495         if (!error && toff && verf && verf != at.va_filerev)
 3496                 error = NFSERR_BAD_COOKIE;
 3497 #endif
 3498         if (!error)
 3499                 error = nfsrv_access(vp, VEXEC, cred, rdonly, td, 0);
 3500         if (error) {
 3501                 vput(vp);
 3502                 vp = NULL;
 3503                 nfsm_reply(NFSX_V3POSTOPATTR);
 3504                 nfsm_srvpostop_attr(getret, &at);
 3505                 error = 0;
 3506                 goto nfsmout;
 3507         }
 3508         VOP_UNLOCK(vp, 0, td);
 3509         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
 3510 again:
 3511         iv.iov_base = rbuf;
 3512         iv.iov_len = fullsiz;
 3513         io.uio_iov = &iv;
 3514         io.uio_iovcnt = 1;
 3515         io.uio_offset = (off_t)off;
 3516         io.uio_resid = fullsiz;
 3517         io.uio_segflg = UIO_SYSSPACE;
 3518         io.uio_rw = UIO_READ;
 3519         io.uio_td = NULL;
 3520         eofflag = 0;
 3521         vn_lock(vp, LK_SHARED | LK_RETRY, td);
 3522         if (cookies) {
 3523                 free((caddr_t)cookies, M_TEMP);
 3524                 cookies = NULL;
 3525         }
 3526         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
 3527         off = (u_quad_t)io.uio_offset;
 3528         getret = VOP_GETATTR(vp, &at, cred, td);
 3529         VOP_UNLOCK(vp, 0, td);
 3530         if (!cookies && !error)
 3531                 error = NFSERR_PERM;
 3532         if (!error)
 3533                 error = getret;
 3534         if (error) {
 3535                 vrele(vp);
 3536                 vp = NULL;
 3537                 if (cookies)
 3538                         free((caddr_t)cookies, M_TEMP);
 3539                 free((caddr_t)rbuf, M_TEMP);
 3540                 nfsm_reply(NFSX_V3POSTOPATTR);
 3541                 nfsm_srvpostop_attr(getret, &at);
 3542                 error = 0;
 3543                 goto nfsmout;
 3544         }
 3545         if (io.uio_resid) {
 3546                 siz -= io.uio_resid;
 3547 
 3548                 /*
 3549                  * If nothing read, return eof
 3550                  * rpc reply
 3551                  */
 3552                 if (siz == 0) {
 3553                         vrele(vp);
 3554                         vp = NULL;
 3555                         nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
 3556                                 2 * NFSX_UNSIGNED);
 3557                         nfsm_srvpostop_attr(getret, &at);
 3558                         tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 3559                         txdr_hyper(at.va_filerev, tl);
 3560                         tl += 2;
 3561                         *tl++ = nfsrv_nfs_false;
 3562                         *tl = nfsrv_nfs_true;
 3563                         FREE((caddr_t)cookies, M_TEMP);
 3564                         FREE((caddr_t)rbuf, M_TEMP);
 3565                         error = 0;
 3566                         goto nfsmout;
 3567                 }
 3568         }
 3569 
 3570         /*
 3571          * Check for degenerate cases of nothing useful read.
 3572          * If so go try again
 3573          */
 3574         cpos = rbuf;
 3575         cend = rbuf + siz;
 3576         dp = (struct dirent *)cpos;
 3577         cookiep = cookies;
 3578         /*
 3579          * For some reason FreeBSD's ufs_readdir() chooses to back the
 3580          * directory offset up to a block boundary, so it is necessary to
 3581          * skip over the records that precede the requested offset. This
 3582          * requires the assumption that file offset cookies monotonically
 3583          * increase.
 3584          */
 3585         while (cpos < cend && ncookies > 0 &&
 3586                 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 3587                  ((u_quad_t)(*cookiep)) <= toff)) {
 3588                 cpos += dp->d_reclen;
 3589                 dp = (struct dirent *)cpos;
 3590                 cookiep++;
 3591                 ncookies--;
 3592         }
 3593         if (cpos >= cend || ncookies == 0) {
 3594                 toff = off;
 3595                 siz = fullsiz;
 3596                 goto again;
 3597         }
 3598 
 3599         /*
 3600          * Probe one of the directory entries to see if the filesystem
 3601          * supports VGET.
 3602          */
 3603         error = VFS_VGET(vp->v_mount, dp->d_fileno, LK_EXCLUSIVE, &nvp);
 3604         if (error) {
 3605                 if (error == EOPNOTSUPP)
 3606                         error = NFSERR_NOTSUPP;
 3607                 else
 3608                         error = NFSERR_SERVERFAULT;
 3609                 vrele(vp);
 3610                 vp = NULL;
 3611                 free((caddr_t)cookies, M_TEMP);
 3612                 free((caddr_t)rbuf, M_TEMP);
 3613                 nfsm_reply(NFSX_V3POSTOPATTR);
 3614                 nfsm_srvpostop_attr(getret, &at);
 3615                 error = 0;
 3616                 goto nfsmout;
 3617         }
 3618         vput(nvp);
 3619         nvp = NULL;
 3620 
 3621         dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
 3622             2 * NFSX_UNSIGNED;
 3623         nfsm_reply(cnt);
 3624         nfsm_srvpostop_attr(getret, &at);
 3625         tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 3626         txdr_hyper(at.va_filerev, tl);
 3627         mp = mb;
 3628         bp = bpos;
 3629         be = bp + M_TRAILINGSPACE(mp);
 3630 
 3631         /* Loop through the records and build reply */
 3632         while (cpos < cend && ncookies > 0) {
 3633                 if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
 3634                         nlen = dp->d_namlen;
 3635                         rem = nfsm_rndup(nlen)-nlen;
 3636 
 3637                         /*
 3638                          * For readdir_and_lookup get the vnode using
 3639                          * the file number.
 3640                          */
 3641                         if (VFS_VGET(vp->v_mount, dp->d_fileno, LK_EXCLUSIVE,
 3642                             &nvp))
 3643                                 goto invalid;
 3644                         bzero((caddr_t)nfhp, NFSX_V3FH);
 3645                         nfhp->fh_fsid =
 3646                                 nvp->v_mount->mnt_stat.f_fsid;
 3647                         /*
 3648                          * XXXRW: Assert the mountpoints are the same so that
 3649                          * we know that acquiring Giant based on the
 3650                          * directory is the right thing for the child.
 3651                          */
 3652                         KASSERT(nvp->v_mount == vp->v_mount,
 3653                             ("nfsrv_readdirplus: nvp mount != vp mount"));
 3654                         if (VOP_VPTOFH(nvp, &nfhp->fh_fid)) {
 3655                                 vput(nvp);
 3656                                 nvp = NULL;
 3657                                 goto invalid;
 3658                         }
 3659                         if (VOP_GETATTR(nvp, vap, cred, td)) {
 3660                                 vput(nvp);
 3661                                 nvp = NULL;
 3662                                 goto invalid;
 3663                         }
 3664                         vput(nvp);
 3665                         nvp = NULL;
 3666 
 3667                         /*
 3668                          * If either the dircount or maxcount will be
 3669                          * exceeded, get out now. Both of these lengths
 3670                          * are calculated conservatively, including all
 3671                          * XDR overheads.
 3672                          */
 3673                         len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
 3674                                 NFSX_V3POSTOPATTR);
 3675                         dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
 3676                         if (len > cnt || dirlen > fullsiz) {
 3677                                 eofflag = 0;
 3678                                 break;
 3679                         }
 3680 
 3681                         /*
 3682                          * Build the directory record xdr from
 3683                          * the dirent entry.
 3684                          */
 3685                         fp = (struct nfs_fattr *)&fl.fl_fattr;
 3686                         nfsm_srvfillattr(vap, fp);
 3687                         fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
 3688                         fl.fl_fhok = nfsrv_nfs_true;
 3689                         fl.fl_postopok = nfsrv_nfs_true;
 3690                         fl.fl_off.nfsuquad[0] = 0;
 3691                         fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
 3692 
 3693                         nfsm_clget;
 3694                         *tl = nfsrv_nfs_true;
 3695                         bp += NFSX_UNSIGNED;
 3696                         nfsm_clget;
 3697                         *tl = 0;
 3698                         bp += NFSX_UNSIGNED;
 3699                         nfsm_clget;
 3700                         *tl = txdr_unsigned(dp->d_fileno);
 3701                         bp += NFSX_UNSIGNED;
 3702                         nfsm_clget;
 3703                         *tl = txdr_unsigned(nlen);
 3704                         bp += NFSX_UNSIGNED;
 3705 
 3706                         /* And loop around copying the name */
 3707                         xfer = nlen;
 3708                         cp = dp->d_name;
 3709                         while (xfer > 0) {
 3710                                 nfsm_clget;
 3711                                 if ((bp + xfer) > be)
 3712                                         tsiz = be - bp;
 3713                                 else
 3714                                         tsiz = xfer;
 3715                                 bcopy(cp, bp, tsiz);
 3716                                 bp += tsiz;
 3717                                 xfer -= tsiz;
 3718                                 if (xfer > 0)
 3719                                         cp += tsiz;
 3720                         }
 3721                         /* And null pad to an int32_t boundary. */
 3722                         for (i = 0; i < rem; i++)
 3723                                 *bp++ = '\0';
 3724 
 3725                         /*
 3726                          * Now copy the flrep structure out.
 3727                          */
 3728                         xfer = sizeof (struct flrep);
 3729                         cp = (caddr_t)&fl;
 3730                         while (xfer > 0) {
 3731                                 nfsm_clget;
 3732                                 if ((bp + xfer) > be)
 3733                                         tsiz = be - bp;
 3734                                 else
 3735                                         tsiz = xfer;
 3736                                 bcopy(cp, bp, tsiz);
 3737                                 bp += tsiz;
 3738                                 xfer -= tsiz;
 3739                                 if (xfer > 0)
 3740                                         cp += tsiz;
 3741                         }
 3742                 }
 3743 invalid:
 3744                 cpos += dp->d_reclen;
 3745                 dp = (struct dirent *)cpos;
 3746                 cookiep++;
 3747                 ncookies--;
 3748         }
 3749         vrele(vp);
 3750         vp = NULL;
 3751         nfsm_clget;
 3752         *tl = nfsrv_nfs_false;
 3753         bp += NFSX_UNSIGNED;
 3754         nfsm_clget;
 3755         if (eofflag)
 3756                 *tl = nfsrv_nfs_true;
 3757         else
 3758                 *tl = nfsrv_nfs_false;
 3759         bp += NFSX_UNSIGNED;
 3760         if (mp != mb) {
 3761                 if (bp < be)
 3762                         mp->m_len = bp - mtod(mp, caddr_t);
 3763         } else
 3764                 mp->m_len += bp - bpos;
 3765         FREE((caddr_t)cookies, M_TEMP);
 3766         FREE((caddr_t)rbuf, M_TEMP);
 3767 nfsmout:
 3768         if (vp)
 3769                 vrele(vp);
 3770         VFS_UNLOCK_GIANT(vfslocked);
 3771         return(error);
 3772 }
 3773 
 3774 /*
 3775  * nfs commit service
 3776  */
 3777 int
 3778 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3779     struct thread *td, struct mbuf **mrq)
 3780 {
 3781         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3782         struct sockaddr *nam = nfsd->nd_nam;
 3783         caddr_t dpos = nfsd->nd_dpos;
 3784         struct ucred *cred = nfsd->nd_cr;
 3785         struct vattr bfor, aft;
 3786         struct vnode *vp = NULL;
 3787         nfsfh_t nfh;
 3788         fhandle_t *fhp;
 3789         u_int32_t *tl;
 3790         caddr_t bpos;
 3791         int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
 3792         struct mbuf *mb, *mreq;
 3793         u_quad_t off;
 3794         struct mount *mp = NULL;
 3795         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3796         int tvfslocked;
 3797         int vfslocked;
 3798 
 3799         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3800         vfslocked = 0;
 3801         if (!v3)
 3802                 panic("nfsrv_commit: v3 proc called on a v2 connection");
 3803         fhp = &nfh.fh_generic;
 3804         nfsm_srvmtofh(fhp);
 3805         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 3806                 error = ESTALE;
 3807                 goto ereply;
 3808         }
 3809         vfslocked = VFS_LOCK_GIANT(mp);
 3810         (void) vn_start_write(NULL, &mp, V_WAIT);
 3811         vfs_rel(mp);            /* The write holds a ref. */
 3812         tl = nfsm_dissect_nonblock(u_int32_t *, 3 * NFSX_UNSIGNED);
 3813 
 3814         /*
 3815          * XXX At this time VOP_FSYNC() does not accept offset and byte
 3816          * count parameters, so these arguments are useless (someday maybe).
 3817          */
 3818         off = fxdr_hyper(tl);
 3819         tl += 2;
 3820         cnt = fxdr_unsigned(int, *tl);
 3821         error = nfsrv_fhtovp(fhp, 1, &vp, &tvfslocked, cred, slp,
 3822             nam, &rdonly, TRUE);
 3823         vfslocked = nfsrv_lockedpair(vfslocked, tvfslocked);
 3824         if (error) {
 3825                 nfsm_reply(2 * NFSX_UNSIGNED);
 3826                 nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
 3827                 error = 0;
 3828                 goto nfsmout;
 3829         }
 3830         for_ret = VOP_GETATTR(vp, &bfor, cred, td);
 3831 
 3832         if (cnt > MAX_COMMIT_COUNT) {
 3833                 /*
 3834                  * Give up and do the whole thing
 3835                  */
 3836                 if (vp->v_object &&
 3837                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 3838                         VM_OBJECT_LOCK(vp->v_object);
 3839                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
 3840                         VM_OBJECT_UNLOCK(vp->v_object);
 3841                 }
 3842                 error = VOP_FSYNC(vp, MNT_WAIT, td);
 3843         } else {
 3844                 /*
 3845                  * Locate and synchronously write any buffers that fall
 3846                  * into the requested range.  Note:  we are assuming that
 3847                  * f_iosize is a power of 2.
 3848                  */
 3849                 int iosize = vp->v_mount->mnt_stat.f_iosize;
 3850                 int iomask = iosize - 1;
 3851                 int s;
 3852                 daddr_t lblkno;
 3853 
 3854                 /*
 3855                  * Align to iosize boundry, super-align to page boundry.
 3856                  */
 3857                 if (off & iomask) {
 3858                         cnt += off & iomask;
 3859                         off &= ~(u_quad_t)iomask;
 3860                 }
 3861                 if (off & PAGE_MASK) {
 3862                         cnt += off & PAGE_MASK;
 3863                         off &= ~(u_quad_t)PAGE_MASK;
 3864                 }
 3865                 lblkno = off / iosize;
 3866 
 3867                 if (vp->v_object &&
 3868                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 3869                         VM_OBJECT_LOCK(vp->v_object);
 3870                         vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
 3871                         VM_OBJECT_UNLOCK(vp->v_object);
 3872                 }
 3873 
 3874                 s = splbio();
 3875                 VI_LOCK(vp);
 3876                 while (cnt > 0) {
 3877                         struct buf *bp;
 3878 
 3879                         /*
 3880                          * If we have a buffer and it is marked B_DELWRI we
 3881                          * have to lock and write it.  Otherwise the prior
 3882                          * write is assumed to have already been committed.
 3883                          *
 3884                          * gbincore() can return invalid buffers now so we
 3885                          * have to check that bit as well (though B_DELWRI
 3886                          * should not be set if B_INVAL is set there could be
 3887                          * a race here since we haven't locked the buffer).
 3888                          */
 3889                         if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
 3890                                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 3891                                     LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) {
 3892                                         VI_LOCK(vp);
 3893                                         continue; /* retry */
 3894                                 }
 3895                                 if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
 3896                                     B_DELWRI) {
 3897                                         bremfree(bp);
 3898                                         bp->b_flags &= ~B_ASYNC;
 3899                                         bwrite(bp);
 3900                                         ++nfs_commit_miss;
 3901                                 } else
 3902                                         BUF_UNLOCK(bp);
 3903                                 VI_LOCK(vp);
 3904                         }
 3905                         ++nfs_commit_blks;
 3906                         if (cnt < iosize)
 3907                                 break;
 3908                         cnt -= iosize;
 3909                         ++lblkno;
 3910                 }
 3911                 VI_UNLOCK(vp);
 3912                 splx(s);
 3913         }
 3914 
 3915         aft_ret = VOP_GETATTR(vp, &aft, cred, td);
 3916         vput(vp);
 3917         vp = NULL;
 3918 ereply:
 3919         nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
 3920         nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
 3921         if (!error) {
 3922                 tl = nfsm_build(u_int32_t *, NFSX_V3WRITEVERF);
 3923                 if (nfsver.tv_sec == 0)
 3924                         nfsver = boottime;
 3925                 *tl++ = txdr_unsigned(nfsver.tv_sec);
 3926                 *tl = txdr_unsigned(nfsver.tv_usec);
 3927         } else {
 3928                 error = 0;
 3929         }
 3930 nfsmout:
 3931         if (vp)
 3932                 vput(vp);
 3933         vn_finished_write(mp);
 3934         VFS_UNLOCK_GIANT(vfslocked);
 3935         return(error);
 3936 }
 3937 
 3938 /*
 3939  * nfs statfs service
 3940  */
 3941 int
 3942 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3943     struct thread *td, struct mbuf **mrq)
 3944 {
 3945         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3946         struct sockaddr *nam = nfsd->nd_nam;
 3947         caddr_t dpos = nfsd->nd_dpos;
 3948         struct ucred *cred = nfsd->nd_cr;
 3949         struct statfs *sf;
 3950         struct nfs_statfs *sfp;
 3951         caddr_t bpos;
 3952         int error = 0, rdonly, getret = 1;
 3953         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3954         struct mbuf *mb, *mreq;
 3955         struct vnode *vp = NULL;
 3956         struct vattr at;
 3957         nfsfh_t nfh;
 3958         fhandle_t *fhp;
 3959         struct statfs statfs;
 3960         u_quad_t tval;
 3961         int vfslocked;
 3962 
 3963         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3964         vfslocked = 0;
 3965         fhp = &nfh.fh_generic;
 3966         nfsm_srvmtofh(fhp);
 3967         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
 3968             nam, &rdonly, TRUE);
 3969         if (error) {
 3970                 nfsm_reply(NFSX_UNSIGNED);
 3971                 if (v3)
 3972                         nfsm_srvpostop_attr(getret, &at);
 3973                 error = 0;
 3974                 goto nfsmout;
 3975         }
 3976         sf = &statfs;
 3977         error = VFS_STATFS(vp->v_mount, sf, td);
 3978         getret = VOP_GETATTR(vp, &at, cred, td);
 3979         vput(vp);
 3980         vp = NULL;
 3981         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3));
 3982         if (v3)
 3983                 nfsm_srvpostop_attr(getret, &at);
 3984         if (error) {
 3985                 error = 0;
 3986                 goto nfsmout;
 3987         }
 3988         sfp = nfsm_build(struct nfs_statfs *, NFSX_STATFS(v3));
 3989         if (v3) {
 3990                 tval = (u_quad_t)sf->f_blocks;
 3991                 tval *= (u_quad_t)sf->f_bsize;
 3992                 txdr_hyper(tval, &sfp->sf_tbytes);
 3993                 tval = (u_quad_t)sf->f_bfree;
 3994                 tval *= (u_quad_t)sf->f_bsize;
 3995                 txdr_hyper(tval, &sfp->sf_fbytes);
 3996                 /*
 3997                  * Don't send negative values for available space,
 3998                  * since this field is unsigned in the NFS protocol.
 3999                  * Otherwise, the client would see absurdly high
 4000                  * numbers for free space.
 4001                  */
 4002                 if (sf->f_bavail < 0)
 4003                         tval = 0;
 4004                 else
 4005                         tval = (u_quad_t)sf->f_bavail;
 4006                 tval *= (u_quad_t)sf->f_bsize;
 4007                 txdr_hyper(tval, &sfp->sf_abytes);
 4008                 sfp->sf_tfiles.nfsuquad[0] = 0;
 4009                 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
 4010                 sfp->sf_ffiles.nfsuquad[0] = 0;
 4011                 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
 4012                 sfp->sf_afiles.nfsuquad[0] = 0;
 4013                 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
 4014                 sfp->sf_invarsec = 0;
 4015         } else {
 4016                 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
 4017                 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
 4018                 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
 4019                 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
 4020                 if (sf->f_bavail < 0)
 4021                         sfp->sf_bavail = 0;
 4022                 else
 4023                         sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
 4024         }
 4025 nfsmout:
 4026         if (vp)
 4027                 vput(vp);
 4028         VFS_UNLOCK_GIANT(vfslocked);
 4029         return(error);
 4030 }
 4031 
 4032 /*
 4033  * nfs fsinfo service
 4034  */
 4035 int
 4036 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4037     struct thread *td, struct mbuf **mrq)
 4038 {
 4039         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 4040         struct sockaddr *nam = nfsd->nd_nam;
 4041         caddr_t dpos = nfsd->nd_dpos;
 4042         struct ucred *cred = nfsd->nd_cr;
 4043         struct nfsv3_fsinfo *sip;
 4044         caddr_t bpos;
 4045         int error = 0, rdonly, getret = 1, pref;
 4046         struct mbuf *mb, *mreq;
 4047         struct vnode *vp = NULL;
 4048         struct vattr at;
 4049         nfsfh_t nfh;
 4050         fhandle_t *fhp;
 4051         u_quad_t maxfsize;
 4052         struct statfs sb;
 4053         int v3 = (nfsd->nd_flag & ND_NFSV3);
 4054         int vfslocked;
 4055 
 4056         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4057         if (!v3)
 4058                 panic("nfsrv_fsinfo: v3 proc called on a v2 connection");
 4059         fhp = &nfh.fh_generic;
 4060         vfslocked = 0;
 4061         nfsm_srvmtofh(fhp);
 4062         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
 4063             nam, &rdonly, TRUE);
 4064         if (error) {
 4065                 nfsm_reply(NFSX_UNSIGNED);
 4066                 nfsm_srvpostop_attr(getret, &at);
 4067                 error = 0;
 4068                 goto nfsmout;
 4069         }
 4070 
 4071         /* XXX Try to make a guess on the max file size. */
 4072         VFS_STATFS(vp->v_mount, &sb, td);
 4073         maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
 4074 
 4075         getret = VOP_GETATTR(vp, &at, cred, td);
 4076         vput(vp);
 4077         vp = NULL;
 4078         nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
 4079         nfsm_srvpostop_attr(getret, &at);
 4080         sip = nfsm_build(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
 4081 
 4082         /*
 4083          * XXX
 4084          * There should be filesystem VFS OP(s) to get this information.
 4085          * For now, assume ufs.
 4086          */
 4087         if (slp->ns_so->so_type == SOCK_DGRAM)
 4088                 pref = NFS_MAXDGRAMDATA;
 4089         else
 4090                 pref = NFS_MAXDATA;
 4091         sip->fs_rtmax = txdr_unsigned(pref);
 4092         sip->fs_rtpref = txdr_unsigned(pref);
 4093         sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
 4094         sip->fs_wtmax = txdr_unsigned(pref);
 4095         sip->fs_wtpref = txdr_unsigned(pref);
 4096         sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
 4097         sip->fs_dtpref = txdr_unsigned(pref);
 4098         txdr_hyper(maxfsize, &sip->fs_maxfilesize);
 4099         sip->fs_timedelta.nfsv3_sec = 0;
 4100         sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
 4101         sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
 4102                 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
 4103                 NFSV3FSINFO_CANSETTIME);
 4104 nfsmout:
 4105         if (vp)
 4106                 vput(vp);
 4107         VFS_UNLOCK_GIANT(vfslocked);
 4108         return(error);
 4109 }
 4110 
 4111 /*
 4112  * nfs pathconf service
 4113  */
 4114 int
 4115 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4116     struct thread *td, struct mbuf **mrq)
 4117 {
 4118         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 4119         struct sockaddr *nam = nfsd->nd_nam;
 4120         caddr_t dpos = nfsd->nd_dpos;
 4121         struct ucred *cred = nfsd->nd_cr;
 4122         struct nfsv3_pathconf *pc;
 4123         caddr_t bpos;
 4124         int error = 0, rdonly, getret = 1;
 4125         register_t linkmax, namemax, chownres, notrunc;
 4126         struct mbuf *mb, *mreq;
 4127         struct vnode *vp = NULL;
 4128         struct vattr at;
 4129         nfsfh_t nfh;
 4130         fhandle_t *fhp;
 4131         int v3 = (nfsd->nd_flag & ND_NFSV3);
 4132         int vfslocked;
 4133 
 4134         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4135         if (!v3)
 4136                 panic("nfsrv_pathconf: v3 proc called on a v2 connection");
 4137         vfslocked = 0;
 4138         fhp = &nfh.fh_generic;
 4139         nfsm_srvmtofh(fhp);
 4140         error = nfsrv_fhtovp(fhp, 1, &vp, &vfslocked, cred, slp,
 4141             nam, &rdonly, TRUE);
 4142         if (error) {
 4143                 nfsm_reply(NFSX_UNSIGNED);
 4144                 nfsm_srvpostop_attr(getret, &at);
 4145                 error = 0;
 4146                 goto nfsmout;
 4147         }
 4148         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
 4149         if (!error)
 4150                 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
 4151         if (!error)
 4152                 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
 4153         if (!error)
 4154                 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
 4155         getret = VOP_GETATTR(vp, &at, cred, td);
 4156         vput(vp);
 4157         vp = NULL;
 4158         nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
 4159         nfsm_srvpostop_attr(getret, &at);
 4160         if (error) {
 4161                 error = 0;
 4162                 goto nfsmout;
 4163         }
 4164         pc = nfsm_build(struct nfsv3_pathconf *, NFSX_V3PATHCONF);
 4165 
 4166         pc->pc_linkmax = txdr_unsigned(linkmax);
 4167         pc->pc_namemax = txdr_unsigned(namemax);
 4168         pc->pc_notrunc = txdr_unsigned(notrunc);
 4169         pc->pc_chownrestricted = txdr_unsigned(chownres);
 4170 
 4171         /*
 4172          * These should probably be supported by VOP_PATHCONF(), but
 4173          * until msdosfs is exportable (why would you want to?), the
 4174          * Unix defaults should be ok.
 4175          */
 4176         pc->pc_caseinsensitive = nfsrv_nfs_false;
 4177         pc->pc_casepreserving = nfsrv_nfs_true;
 4178 nfsmout:
 4179         if (vp)
 4180                 vput(vp);
 4181         VFS_UNLOCK_GIANT(vfslocked);
 4182         return(error);
 4183 }
 4184 
 4185 /*
 4186  * Null operation, used by clients to ping server
 4187  */
 4188 /* ARGSUSED */
 4189 int
 4190 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4191     struct thread *td, struct mbuf **mrq)
 4192 {
 4193         struct mbuf *mrep = nfsd->nd_mrep;
 4194         caddr_t bpos;
 4195         int error = NFSERR_RETVOID;
 4196         struct mbuf *mb, *mreq;
 4197 
 4198         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4199         nfsm_reply(0);
 4200 nfsmout:
 4201         return (error);
 4202 }
 4203 
 4204 /*
 4205  * No operation, used for obsolete procedures
 4206  */
 4207 /* ARGSUSED */
 4208 int
 4209 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 4210     struct thread *td, struct mbuf **mrq)
 4211 {
 4212         struct mbuf *mrep = nfsd->nd_mrep;
 4213         caddr_t bpos;
 4214         int error;
 4215         struct mbuf *mb, *mreq;
 4216 
 4217         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4218         if (nfsd->nd_repstat)
 4219                 error = nfsd->nd_repstat;
 4220         else
 4221                 error = EPROCUNAVAIL;
 4222         nfsm_reply(0);
 4223         error = 0;
 4224 nfsmout:
 4225         return (error);
 4226 }
 4227 
 4228 /*
 4229  * Perform access checking for vnodes obtained from file handles that would
 4230  * refer to files already opened by a Unix client. You cannot just use
 4231  * vn_writechk() and VOP_ACCESS() for two reasons.
 4232  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
 4233  *     case.
 4234  * 2 - The owner is to be given access irrespective of mode bits for some
 4235  *     operations, so that processes that chmod after opening a file don't
 4236  *     break. I don't like this because it opens a security hole, but since
 4237  *     the nfs server opens a security hole the size of a barn door anyhow,
 4238  *     what the heck.
 4239  *
 4240  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
 4241  * will return EPERM instead of EACCESS. EPERM is always an error.
 4242  */
 4243 static int
 4244 nfsrv_access(struct vnode *vp, int flags, struct ucred *cred,
 4245     int rdonly, struct thread *td, int override)
 4246 {
 4247         struct vattr vattr;
 4248         int error;
 4249 
 4250         VFS_ASSERT_GIANT(vp->v_mount);
 4251 
 4252         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 4253 
 4254         if (flags & VWRITE) {
 4255                 /* Just vn_writechk() changed to check rdonly */
 4256                 /*
 4257                  * Disallow write attempts on read-only filesystems;
 4258                  * unless the file is a socket or a block or character
 4259                  * device resident on the filesystem.
 4260                  */
 4261                 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 4262                         switch (vp->v_type) {
 4263                         case VREG:
 4264                         case VDIR:
 4265                         case VLNK:
 4266                                 return (EROFS);
 4267                         default:
 4268                                 break;
 4269                         }
 4270                 }
 4271                 /*
 4272                  * If there's shared text associated with
 4273                  * the inode, we can't allow writing.
 4274                  */
 4275                 if (vp->v_vflag & VV_TEXT)
 4276                         return (ETXTBSY);
 4277         }
 4278 
 4279         error = VOP_GETATTR(vp, &vattr, cred, td);
 4280         if (error)
 4281                 return (error);
 4282         error = VOP_ACCESS(vp, flags, cred, td);
 4283         /*
 4284          * Allow certain operations for the owner (reads and writes
 4285          * on files that are already open).
 4286          */
 4287         if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
 4288                 error = 0;
 4289         return (error);
 4290 }

Cache object: ad134796c3410adfccab4958776e3bfd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.