The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/nfsserver/nfs_serv.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/10.0/sys/nfsserver/nfs_serv.c 251171 2013-05-31 00:43:41Z jeff $");
   37 
   38 /*
   39  * nfs version 2 and 3 server calls to vnode ops
   40  * - these routines generally have 3 phases
   41  *   1 - break down and validate rpc request in mbuf list
   42  *   2 - do the vnode ops for the request
   43  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
   44  *   3 - build the rpc reply in an mbuf list
   45  *   nb:
   46  *      - do not mix the phases, since the nfsm_?? macros can return failures
   47  *        on a bad rpc or similar and do not do any vrele() or vput()'s
   48  *
   49  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
   50  *      error number iff error != 0 whereas
   51  *      returning an error from the server function implies a fatal error
   52  *      such as a badly constructed rpc request that should be dropped without
   53  *      a reply.
   54  *      For nfsm_reply(), the case where error == EBADRPC is treated
   55  *      specially; after constructing a reply, it does an immediate
   56  *      `goto nfsmout' to avoid getting any V3 post-op status appended.
   57  *
   58  * Other notes:
   59  *      Warning: always pay careful attention to resource cleanup on return
   60  *      and note that nfsm_*() macros can terminate a procedure on certain
   61  *      errors.
   62  *
   63  *      lookup() and namei()
   64  *      may return garbage in various structural fields/return elements
   65  *      if an error is returned, and may garbage up nd.ni_dvp even if no
   66  *      error is returned and you did not request LOCKPARENT or WANTPARENT.
   67  *
   68  *      We use the ni_cnd.cn_flags 'HASBUF' flag to track whether the name
   69  *      buffer has been freed or not.
   70  */
   71 
   72 #include <sys/param.h>
   73 #include <sys/systm.h>
   74 #include <sys/proc.h>
   75 #include <sys/namei.h>
   76 #include <sys/unistd.h>
   77 #include <sys/vnode.h>
   78 #include <sys/mount.h>
   79 #include <sys/socket.h>
   80 #include <sys/socketvar.h>
   81 #include <sys/malloc.h>
   82 #include <sys/mbuf.h>
   83 #include <sys/priv.h>
   84 #include <sys/dirent.h>
   85 #include <sys/stat.h>
   86 #include <sys/kernel.h>
   87 #include <sys/sysctl.h>
   88 #include <sys/bio.h>
   89 #include <sys/buf.h>
   90 #include <sys/rwlock.h>
   91 
   92 #include <vm/vm.h>
   93 #include <vm/vm_extern.h>
   94 #include <vm/vm_object.h>
   95 
   96 #include <nfs/nfsproto.h>
   97 #include <nfsserver/nfs.h>
   98 #include <nfs/xdr_subs.h>
   99 #include <nfsserver/nfsm_subs.h>
  100 
  101 FEATURE(nfsserver, "NFS server");
  102 
  103 #ifdef NFSRV_DEBUG
  104 #define nfsdbprintf(info)       printf info
  105 #else
  106 #define nfsdbprintf(info)
  107 #endif
  108 
  109 #define MAX_COMMIT_COUNT        (1024 * 1024)
  110 
  111 #define MAX_REORDERED_RPC       16
  112 #define NUM_HEURISTIC           1031
  113 #define NHUSE_INIT              64
  114 #define NHUSE_INC               16
  115 #define NHUSE_MAX               2048
  116 
  117 static struct nfsheur {
  118         struct vnode *nh_vp;    /* vp to match (unreferenced pointer) */
  119         off_t nh_nextoff;       /* next offset for sequential detection */
  120         int nh_use;             /* use count for selection */
  121         int nh_seqcount;        /* heuristic */
  122 } nfsheur[NUM_HEURISTIC];
  123 
  124 /* Global vars */
  125 
  126 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
  127 int nfsrvw_procrastinate_v3 = 0;
  128 
  129 static struct timeval   nfsver = { 0 };
  130 
  131 SYSCTL_NODE(_vfs, OID_AUTO, nfsrv, CTLFLAG_RW, 0, "NFS server");
  132 
  133 static int nfs_async;
  134 static int nfs_commit_blks;
  135 static int nfs_commit_miss;
  136 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
  137     "Tell client that writes were synced even though they were not");
  138 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
  139     "Number of completed commits");
  140 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
  141 
  142 struct nfsrvstats nfsrvstats;
  143 SYSCTL_STRUCT(_vfs_nfsrv, NFS_NFSRVSTATS, nfsrvstats, CTLFLAG_RW,
  144         &nfsrvstats, nfsrvstats, "S,nfsrvstats");
  145 
  146 static int      nfsrv_access(struct vnode *, accmode_t, struct ucred *,
  147                     int, int);
  148 
  149 /*
  150  * Clear nameidata fields that are tested in nsfmout cleanup code prior
  151  * to using first nfsm macro (that might jump to the cleanup code).
  152  */
  153 
  154 static __inline void
  155 ndclear(struct nameidata *nd)
  156 {
  157 
  158         nd->ni_cnd.cn_flags = 0;
  159         nd->ni_vp = NULL;
  160         nd->ni_dvp = NULL;
  161         nd->ni_startdir = NULL;
  162         nd->ni_strictrelative = 0;
  163 }
  164 
  165 /*
  166  * Heuristic to detect sequential operation.
  167  */
  168 static struct nfsheur *
  169 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
  170 {
  171         struct nfsheur *nh;
  172         int hi, try;
  173 
  174         /* Locate best candidate. */
  175         try = 32;
  176         hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
  177         nh = &nfsheur[hi];
  178         while (try--) {
  179                 if (nfsheur[hi].nh_vp == vp) {
  180                         nh = &nfsheur[hi];
  181                         break;
  182                 }
  183                 if (nfsheur[hi].nh_use > 0)
  184                         --nfsheur[hi].nh_use;
  185                 hi = (hi + 1) % NUM_HEURISTIC;
  186                 if (nfsheur[hi].nh_use < nh->nh_use)
  187                         nh = &nfsheur[hi];
  188         }
  189 
  190         /* Initialize hint if this is a new file. */
  191         if (nh->nh_vp != vp) {
  192                 nh->nh_vp = vp;
  193                 nh->nh_nextoff = uio->uio_offset;
  194                 nh->nh_use = NHUSE_INIT;
  195                 if (uio->uio_offset == 0)
  196                         nh->nh_seqcount = 4;
  197                 else
  198                         nh->nh_seqcount = 1;
  199         }
  200 
  201         /* Calculate heuristic. */
  202         if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
  203             uio->uio_offset == nh->nh_nextoff) {
  204                 /* See comments in vfs_vnops.c:sequential_heuristic(). */
  205                 nh->nh_seqcount += howmany(uio->uio_resid, 16384);
  206                 if (nh->nh_seqcount > IO_SEQMAX)
  207                         nh->nh_seqcount = IO_SEQMAX;
  208         } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
  209             imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
  210                 /* Probably a reordered RPC, leave seqcount alone. */
  211         } else if (nh->nh_seqcount > 1) {
  212                 nh->nh_seqcount /= 2;
  213         } else {
  214                 nh->nh_seqcount = 0;
  215         }
  216         nh->nh_use += NHUSE_INC;
  217         if (nh->nh_use > NHUSE_MAX)
  218                 nh->nh_use = NHUSE_MAX;
  219         return (nh);
  220 }
  221 
  222 /*
  223  * nfs v3 access service
  224  */
  225 int
  226 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  227     struct mbuf **mrq)
  228 {
  229         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  230         struct sockaddr *nam = nfsd->nd_nam;
  231         caddr_t dpos = nfsd->nd_dpos;
  232         struct ucred *cred = nfsd->nd_cr;
  233         struct vnode *vp = NULL;
  234         nfsfh_t nfh;
  235         fhandle_t *fhp;
  236         u_int32_t *tl;
  237         caddr_t bpos;
  238         int error = 0, rdonly, getret;
  239         struct mbuf *mb, *mreq;
  240         struct vattr vattr, *vap = &vattr;
  241         u_long testmode, nfsmode;
  242         int v3 = (nfsd->nd_flag & ND_NFSV3);
  243 
  244         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  245         if (!v3)
  246                 panic("nfsrv3_access: v3 proc called on a v2 connection");
  247         fhp = &nfh.fh_generic;
  248         nfsm_srvmtofh(fhp);
  249         tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
  250         error = nfsrv_fhtovp(fhp, 0, &vp, nfsd, slp, nam, &rdonly);
  251         if (error) {
  252                 nfsm_reply(NFSX_UNSIGNED);
  253                 nfsm_srvpostop_attr(1, NULL);
  254                 error = 0;
  255                 goto nfsmout;
  256         }
  257         nfsmode = fxdr_unsigned(u_int32_t, *tl);
  258         if ((nfsmode & NFSV3ACCESS_READ) &&
  259                 nfsrv_access(vp, VREAD, cred, rdonly, 0))
  260                 nfsmode &= ~NFSV3ACCESS_READ;
  261         if (vp->v_type == VDIR)
  262                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
  263                         NFSV3ACCESS_DELETE);
  264         else
  265                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
  266         if ((nfsmode & testmode) &&
  267                 nfsrv_access(vp, VWRITE, cred, rdonly, 0))
  268                 nfsmode &= ~testmode;
  269         if (vp->v_type == VDIR)
  270                 testmode = NFSV3ACCESS_LOOKUP;
  271         else
  272                 testmode = NFSV3ACCESS_EXECUTE;
  273         if ((nfsmode & testmode) &&
  274                 nfsrv_access(vp, VEXEC, cred, rdonly, 0))
  275                 nfsmode &= ~testmode;
  276         getret = VOP_GETATTR(vp, vap, cred);
  277         vput(vp);
  278         vp = NULL;
  279         nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED);
  280         nfsm_srvpostop_attr(getret, vap);
  281         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  282         *tl = txdr_unsigned(nfsmode);
  283 nfsmout:
  284         if (vp)
  285                 vput(vp);
  286         return(error);
  287 }
  288 
  289 /*
  290  * nfs getattr service
  291  */
  292 int
  293 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  294     struct mbuf **mrq)
  295 {
  296         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  297         struct sockaddr *nam = nfsd->nd_nam;
  298         caddr_t dpos = nfsd->nd_dpos;
  299         struct ucred *cred = nfsd->nd_cr;
  300         struct nfs_fattr *fp;
  301         struct vattr va;
  302         struct vattr *vap = &va;
  303         struct vnode *vp = NULL;
  304         nfsfh_t nfh;
  305         fhandle_t *fhp;
  306         caddr_t bpos;
  307         int error = 0, rdonly;
  308         struct mbuf *mb, *mreq;
  309 
  310         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  311         fhp = &nfh.fh_generic;
  312         nfsm_srvmtofh(fhp);
  313         error = nfsrv_fhtovp(fhp, 0, &vp, nfsd, slp, nam, &rdonly);
  314         if (error) {
  315                 nfsm_reply(0);
  316                 error = 0;
  317                 goto nfsmout;
  318         }
  319         error = VOP_GETATTR(vp, vap, cred);
  320         vput(vp);
  321         vp = NULL;
  322         nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
  323         if (error) {
  324                 error = 0;
  325                 goto nfsmout;
  326         }
  327         fp = nfsm_build(struct nfs_fattr *,
  328             NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
  329         nfsm_srvfillattr(vap, fp);
  330         /* fall through */
  331 
  332 nfsmout:
  333         if (vp)
  334                 vput(vp);
  335         return(error);
  336 }
  337 
  338 /*
  339  * nfs setattr service
  340  */
  341 int
  342 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  343     struct mbuf **mrq)
  344 {
  345         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  346         struct sockaddr *nam = nfsd->nd_nam;
  347         caddr_t dpos = nfsd->nd_dpos;
  348         struct ucred *cred = nfsd->nd_cr;
  349         struct vattr va, preat;
  350         struct vattr *vap = &va;
  351         struct nfsv2_sattr *sp;
  352         struct nfs_fattr *fp;
  353         struct vnode *vp = NULL;
  354         nfsfh_t nfh;
  355         fhandle_t *fhp;
  356         u_int32_t *tl;
  357         caddr_t bpos;
  358         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
  359         int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0;
  360         struct mbuf *mb, *mreq;
  361         struct timespec guard = { 0, 0 };
  362         struct mount *mp = NULL;
  363 
  364         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  365         fhp = &nfh.fh_generic;
  366         nfsm_srvmtofh(fhp);
  367         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
  368                 error = ESTALE;
  369                 goto out;
  370         }
  371         (void) vn_start_write(NULL, &mp, V_WAIT);
  372         vfs_rel(mp);            /* The write holds a ref. */
  373         VATTR_NULL(vap);
  374         if (v3) {
  375                 nfsm_srvsattr(vap);
  376                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
  377                 gcheck = fxdr_unsigned(int, *tl);
  378                 if (gcheck) {
  379                         tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
  380                         fxdr_nfsv3time(tl, &guard);
  381                 }
  382         } else {
  383                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
  384                 /*
  385                  * Nah nah nah nah na nah
  386                  * There is a bug in the Sun client that puts 0xffff in the mode
  387                  * field of sattr when it should put in 0xffffffff. The u_short
  388                  * doesn't sign extend.
  389                  * --> check the low order 2 bytes for 0xffff
  390                  */
  391                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
  392                         vap->va_mode = nfstov_mode(sp->sa_mode);
  393                 if (sp->sa_uid != nfsrv_nfs_xdrneg1)
  394                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
  395                 if (sp->sa_gid != nfsrv_nfs_xdrneg1)
  396                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
  397                 if (sp->sa_size != nfsrv_nfs_xdrneg1)
  398                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
  399                 if (sp->sa_atime.nfsv2_sec != nfsrv_nfs_xdrneg1) {
  400 #ifdef notyet
  401                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
  402 #else
  403                         vap->va_atime.tv_sec =
  404                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
  405                         vap->va_atime.tv_nsec = 0;
  406 #endif
  407                 }
  408                 if (sp->sa_mtime.nfsv2_sec != nfsrv_nfs_xdrneg1)
  409                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
  410 
  411         }
  412 
  413         /*
  414          * Now that we have all the fields, lets do it.
  415          */
  416         error = nfsrv_fhtovp(fhp, 0, &vp, nfsd, slp, nam, &rdonly);
  417         if (error) {
  418                 nfsm_reply(2 * NFSX_UNSIGNED);
  419                 if (v3)
  420                         nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
  421                 error = 0;
  422                 goto nfsmout;
  423         }
  424 
  425         /*
  426          * vp now an active resource, pay careful attention to cleanup
  427          */
  428         if (v3) {
  429                 error = preat_ret = VOP_GETATTR(vp, &preat, cred);
  430                 if (!error && gcheck &&
  431                         (preat.va_ctime.tv_sec != guard.tv_sec ||
  432                          preat.va_ctime.tv_nsec != guard.tv_nsec))
  433                         error = NFSERR_NOT_SYNC;
  434                 if (error) {
  435                         vput(vp);
  436                         vp = NULL;
  437                         nfsm_reply(NFSX_WCCDATA(v3));
  438                         if (v3)
  439                                 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
  440                         error = 0;
  441                         goto nfsmout;
  442                 }
  443         }
  444 
  445         /*
  446          * If the size is being changed write acces is required, otherwise
  447          * just check for a read only filesystem.
  448          */
  449         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
  450                 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
  451                         error = EROFS;
  452                         goto out;
  453                 }
  454         } else {
  455                 if (vp->v_type == VDIR) {
  456                         error = EISDIR;
  457                         goto out;
  458                 } else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly,
  459                     0)) != 0)
  460                         goto out;
  461         }
  462         error = VOP_SETATTR(vp, vap, cred);
  463         postat_ret = VOP_GETATTR(vp, vap, cred);
  464         if (!error)
  465                 error = postat_ret;
  466 out:
  467         if (vp != NULL)
  468                 vput(vp);
  469 
  470         vp = NULL;
  471         nfsm_reply(NFSX_WCCORFATTR(v3));
  472         if (v3) {
  473                 nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap);
  474         } else if (!error) {
  475                 /* v2 non-error case. */
  476                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
  477                 nfsm_srvfillattr(vap, fp);
  478         }
  479         error = 0;
  480         /* fall through */
  481 
  482 nfsmout:
  483         if (vp)
  484                 vput(vp);
  485         vn_finished_write(mp);
  486         return(error);
  487 }
  488 
  489 /*
  490  * nfs lookup rpc
  491  */
  492 int
  493 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  494     struct mbuf **mrq)
  495 {
  496         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  497         struct sockaddr *nam = nfsd->nd_nam;
  498         caddr_t dpos = nfsd->nd_dpos;
  499         struct ucred *cred = nfsd->nd_cr;
  500         struct nfs_fattr *fp;
  501         struct nameidata nd, ind, *ndp = &nd;
  502         struct vnode *vp, *dirp = NULL;
  503         nfsfh_t nfh;
  504         fhandle_t *fhp;
  505         caddr_t bpos;
  506         int error = 0, len, dirattr_ret = 1;
  507         int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag;
  508         struct mbuf *mb, *mreq;
  509         struct vattr va, dirattr, *vap = &va;
  510 
  511         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  512         ndclear(&nd);
  513 
  514         fhp = &nfh.fh_generic;
  515         nfsm_srvmtofh(fhp);
  516         nfsm_srvnamesiz(len);
  517 
  518         pubflag = nfs_ispublicfh(fhp);
  519 
  520         nd.ni_cnd.cn_cred = cred;
  521         nd.ni_cnd.cn_nameiop = LOOKUP;
  522         nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART;
  523         error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
  524                 &dirp, v3, &dirattr, &dirattr_ret, pubflag);
  525 
  526         /*
  527          * namei failure, only dirp to cleanup.  Clear out garbarge from
  528          * structure in case macros jump to nfsmout.
  529          */
  530 
  531         if (error) {
  532                 if (dirp) {
  533                         vrele(dirp);
  534                         dirp = NULL;
  535                 }
  536                 nfsm_reply(NFSX_POSTOPATTR(v3));
  537                 if (v3)
  538                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  539                 error = 0;
  540                 goto nfsmout;
  541         }
  542 
  543         /*
  544          * Locate index file for public filehandle
  545          *
  546          * error is 0 on entry and 0 on exit from this block.
  547          */
  548 
  549         if (pubflag) {
  550                 if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) {
  551                         /*
  552                          * Setup call to lookup() to see if we can find
  553                          * the index file. Arguably, this doesn't belong
  554                          * in a kernel.. Ugh.  If an error occurs, do not
  555                          * try to install an index file and then clear the
  556                          * error.
  557                          *
  558                          * When we replace nd with ind and redirect ndp,
  559                          * maintenance of ni_startdir and ni_vp shift to
  560                          * ind and we have to clean them up in the old nd.
  561                          * However, the cnd resource continues to be maintained
  562                          * via the original nd.  Confused?  You aren't alone!
  563                          */
  564                         ind = nd;
  565                         VOP_UNLOCK(nd.ni_vp, 0);
  566                         ind.ni_pathlen = strlen(nfs_pub.np_index);
  567                         ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf =
  568                             nfs_pub.np_index;
  569                         ind.ni_startdir = nd.ni_vp;
  570                         VREF(ind.ni_startdir);
  571                         error = lookup(&ind);
  572                         ind.ni_dvp = NULL;
  573 
  574                         if (error == 0) {
  575                                 /*
  576                                  * Found an index file. Get rid of
  577                                  * the old references.  transfer nd.ni_vp'
  578                                  */
  579                                 if (dirp)
  580                                         vrele(dirp);
  581                                 dirp = nd.ni_vp;
  582                                 nd.ni_vp = NULL;
  583                                 vrele(nd.ni_startdir);
  584                                 nd.ni_startdir = NULL;
  585                                 ndp = &ind;
  586                         }
  587                         error = 0;
  588                 }
  589                 /*
  590                  * If the public filehandle was used, check that this lookup
  591                  * didn't result in a filehandle outside the publicly exported
  592                  * filesystem.  We clear the poor vp here to avoid lockups due
  593                  * to NFS I/O.
  594                  */
  595 
  596                 if (ndp->ni_vp->v_mount != nfs_pub.np_mount) {
  597                         vput(nd.ni_vp);
  598                         nd.ni_vp = NULL;
  599                         error = EPERM;
  600                 }
  601         }
  602 
  603         /*
  604          * Resources at this point:
  605          *      ndp->ni_vp      may not be NULL
  606          */
  607 
  608         if (error) {
  609                 nfsm_reply(NFSX_POSTOPATTR(v3));
  610                 if (v3)
  611                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  612                 error = 0;
  613                 goto nfsmout;
  614         }
  615 
  616         /*
  617          * Get underlying attribute, then release remaining resources ( for
  618          * the same potential blocking reason ) and reply.
  619          */
  620         vp = ndp->ni_vp;
  621         bzero((caddr_t)fhp, sizeof(nfh));
  622         fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
  623         error = VOP_VPTOFH(vp, &fhp->fh_fid);
  624         if (!error)
  625                 error = VOP_GETATTR(vp, vap, cred);
  626 
  627         vput(vp);
  628         vrele(ndp->ni_startdir);
  629         vrele(dirp);
  630         ndp->ni_vp = NULL;
  631         ndp->ni_startdir = NULL;
  632         dirp = NULL;
  633         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3));
  634         if (error) {
  635                 if (v3)
  636                         nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  637                 error = 0;
  638                 goto nfsmout;
  639         }
  640         nfsm_srvfhtom(fhp, v3);
  641         if (v3) {
  642                 nfsm_srvpostop_attr(0, vap);
  643                 nfsm_srvpostop_attr(dirattr_ret, &dirattr);
  644         } else {
  645                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
  646                 nfsm_srvfillattr(vap, fp);
  647         }
  648 
  649 nfsmout:
  650         if (ndp->ni_vp || dirp || ndp->ni_startdir) {
  651                 if (ndp->ni_vp)
  652                         vput(ndp->ni_vp);
  653                 if (dirp)
  654                         vrele(dirp);
  655                 if (ndp->ni_startdir)
  656                         vrele(ndp->ni_startdir);
  657         }
  658         NDFREE(&nd, NDF_ONLY_PNBUF);
  659         return (error);
  660 }
  661 
  662 /*
  663  * nfs readlink service
  664  */
  665 int
  666 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  667     struct mbuf **mrq)
  668 {
  669         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  670         struct sockaddr *nam = nfsd->nd_nam;
  671         caddr_t dpos = nfsd->nd_dpos;
  672         struct ucred *cred = nfsd->nd_cr;
  673         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
  674         struct iovec *ivp = iv;
  675         struct mbuf *mp;
  676         u_int32_t *tl;
  677         caddr_t bpos;
  678         int error = 0, rdonly, i, tlen, len, getret;
  679         int v3 = (nfsd->nd_flag & ND_NFSV3);
  680         struct mbuf *mb, *mp3, *nmp, *mreq;
  681         struct vnode *vp = NULL;
  682         struct vattr attr;
  683         nfsfh_t nfh;
  684         fhandle_t *fhp;
  685         struct uio io, *uiop = &io;
  686 
  687         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  688 #ifndef nolint
  689         mp = NULL;
  690 #endif
  691         mp3 = NULL;
  692         fhp = &nfh.fh_generic;
  693         nfsm_srvmtofh(fhp);
  694         len = 0;
  695         i = 0;
  696         while (len < NFS_MAXPATHLEN) {
  697                 MGET(nmp, M_WAITOK, MT_DATA);
  698                 MCLGET(nmp, M_WAITOK);
  699                 nmp->m_len = NFSMSIZ(nmp);
  700                 if (len == 0)
  701                         mp3 = mp = nmp;
  702                 else {
  703                         mp->m_next = nmp;
  704                         mp = nmp;
  705                 }
  706                 if ((len + mp->m_len) > NFS_MAXPATHLEN) {
  707                         mp->m_len = NFS_MAXPATHLEN - len;
  708                         len = NFS_MAXPATHLEN;
  709                 } else
  710                         len += mp->m_len;
  711                 ivp->iov_base = mtod(mp, caddr_t);
  712                 ivp->iov_len = mp->m_len;
  713                 i++;
  714                 ivp++;
  715         }
  716         uiop->uio_iov = iv;
  717         uiop->uio_iovcnt = i;
  718         uiop->uio_offset = 0;
  719         uiop->uio_resid = len;
  720         uiop->uio_rw = UIO_READ;
  721         uiop->uio_segflg = UIO_SYSSPACE;
  722         uiop->uio_td = NULL;
  723         error = nfsrv_fhtovp(fhp, 0, &vp, nfsd, slp, nam, &rdonly);
  724         if (error) {
  725                 nfsm_reply(2 * NFSX_UNSIGNED);
  726                 if (v3)
  727                         nfsm_srvpostop_attr(1, NULL);
  728                 error = 0;
  729                 goto nfsmout;
  730         }
  731         if (vp->v_type != VLNK) {
  732                 if (v3)
  733                         error = EINVAL;
  734                 else
  735                         error = ENXIO;
  736         } else 
  737                 error = VOP_READLINK(vp, uiop, cred);
  738         getret = VOP_GETATTR(vp, &attr, cred);
  739         vput(vp);
  740         vp = NULL;
  741         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED);
  742         if (v3)
  743                 nfsm_srvpostop_attr(getret, &attr);
  744         if (error) {
  745                 error = 0;
  746                 goto nfsmout;
  747         }
  748         if (uiop->uio_resid > 0) {
  749                 len -= uiop->uio_resid;
  750                 tlen = nfsm_rndup(len);
  751                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
  752         }
  753         tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  754         *tl = txdr_unsigned(len);
  755         mb->m_next = mp3;
  756         mp3 = NULL;
  757 nfsmout:
  758         if (mp3)
  759                 m_freem(mp3);
  760         if (vp)
  761                 vput(vp);
  762         return(error);
  763 }
  764 
  765 /*
  766  * nfs read service
  767  */
  768 int
  769 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  770     struct mbuf **mrq)
  771 {
  772         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  773         struct sockaddr *nam = nfsd->nd_nam;
  774         caddr_t dpos = nfsd->nd_dpos;
  775         struct ucred *cred = nfsd->nd_cr;
  776         struct iovec *iv;
  777         struct iovec *iv2;
  778         struct mbuf *m;
  779         struct nfs_fattr *fp;
  780         u_int32_t *tl;
  781         int i;
  782         caddr_t bpos;
  783         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
  784         int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen;
  785         struct mbuf *mb, *mreq;
  786         struct mbuf *m2;
  787         struct vnode *vp = NULL;
  788         nfsfh_t nfh;
  789         fhandle_t *fhp;
  790         struct uio io, *uiop = &io;
  791         struct vattr va, *vap = &va;
  792         struct nfsheur *nh;
  793         off_t off;
  794         int ioflag = 0;
  795 
  796         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  797         fhp = &nfh.fh_generic;
  798         nfsm_srvmtofh(fhp);
  799         if (v3) {
  800                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
  801                 off = fxdr_hyper(tl);
  802         } else {
  803                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
  804                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
  805         }
  806         nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd));
  807 
  808         /*
  809          * Reference vp.  If an error occurs, vp will be invalid, but we
  810          * have to NULL it just in case.  The macros might goto nfsmout
  811          * as well.
  812          */
  813 
  814         error = nfsrv_fhtovp(fhp, 0, &vp, nfsd, slp, nam, &rdonly);
  815         if (error) {
  816                 vp = NULL;
  817                 nfsm_reply(2 * NFSX_UNSIGNED);
  818                 if (v3)
  819                         nfsm_srvpostop_attr(1, NULL);
  820                 error = 0;
  821                 goto nfsmout;
  822         }
  823 
  824         if (vp->v_type != VREG) {
  825                 if (v3)
  826                         error = EINVAL;
  827                 else
  828                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
  829         }
  830         if (!error) {
  831                 if ((error = nfsrv_access(vp, VREAD, cred, rdonly, 1)) != 0)
  832                         error = nfsrv_access(vp, VEXEC, cred, rdonly, 1);
  833         }
  834         getret = VOP_GETATTR(vp, vap, cred);
  835         if (!error)
  836                 error = getret;
  837         if (error) {
  838                 vput(vp);
  839                 vp = NULL;
  840                 nfsm_reply(NFSX_POSTOPATTR(v3));
  841                 if (v3)
  842                         nfsm_srvpostop_attr(getret, vap);
  843                 error = 0;
  844                 goto nfsmout;
  845         }
  846 
  847         /*
  848          * Calculate byte count to read
  849          */
  850         if (off >= vap->va_size)
  851                 cnt = 0;
  852         else if ((off + reqlen) > vap->va_size)
  853                 cnt = vap->va_size - off;
  854         else
  855                 cnt = reqlen;
  856 
  857         nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt));
  858         if (v3) {
  859                 tl = nfsm_build(u_int32_t *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
  860                 *tl++ = nfsrv_nfs_true;
  861                 fp = (struct nfs_fattr *)tl;
  862                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
  863         } else {
  864                 tl = nfsm_build(u_int32_t *, NFSX_V2FATTR + NFSX_UNSIGNED);
  865                 fp = (struct nfs_fattr *)tl;
  866                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
  867         }
  868         len = left = nfsm_rndup(cnt);
  869         if (cnt > 0) {
  870                 /*
  871                  * Generate the mbuf list with the uio_iov ref. to it.
  872                  */
  873                 i = 0;
  874                 m = m2 = mb;
  875                 while (left > 0) {
  876                         siz = min(M_TRAILINGSPACE(m), left);
  877                         if (siz > 0) {
  878                                 left -= siz;
  879                                 i++;
  880                         }
  881                         if (left > 0) {
  882                                 MGET(m, M_WAITOK, MT_DATA);
  883                                 MCLGET(m, M_WAITOK);
  884                                 m->m_len = 0;
  885                                 m2->m_next = m;
  886                                 m2 = m;
  887                         }
  888                 }
  889                 iv = malloc(i * sizeof (struct iovec),
  890                        M_TEMP, M_WAITOK);
  891                 uiop->uio_iov = iv2 = iv;
  892                 m = mb;
  893                 left = len;
  894                 i = 0;
  895                 while (left > 0) {
  896                         if (m == NULL)
  897                                 panic("nfsrv_read iov");
  898                         siz = min(M_TRAILINGSPACE(m), left);
  899                         if (siz > 0) {
  900                                 iv->iov_base = mtod(m, caddr_t) + m->m_len;
  901                                 iv->iov_len = siz;
  902                                 m->m_len += siz;
  903                                 left -= siz;
  904                                 iv++;
  905                                 i++;
  906                         }
  907                         m = m->m_next;
  908                 }
  909                 uiop->uio_iovcnt = i;
  910                 uiop->uio_offset = off;
  911                 uiop->uio_resid = len;
  912                 uiop->uio_rw = UIO_READ;
  913                 uiop->uio_segflg = UIO_SYSSPACE;
  914                 nh = nfsrv_sequential_heuristic(uiop, vp);
  915                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
  916                 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
  917                 if (error == 0)
  918                         nh->nh_nextoff = uiop->uio_offset;
  919                 free((caddr_t)iv2, M_TEMP);
  920                 if (error || (getret = VOP_GETATTR(vp, vap, cred))) {
  921                         if (!error)
  922                                 error = getret;
  923                         m_freem(mreq);
  924                         vput(vp);
  925                         vp = NULL;
  926                         nfsm_reply(NFSX_POSTOPATTR(v3));
  927                         if (v3)
  928                                 nfsm_srvpostop_attr(getret, vap);
  929                         error = 0;
  930                         goto nfsmout;
  931                 }
  932         } else
  933                 uiop->uio_resid = 0;
  934         vput(vp);
  935         vp = NULL;
  936         nfsm_srvfillattr(vap, fp);
  937         tlen = len - uiop->uio_resid;
  938         cnt = cnt < tlen ? cnt : tlen;
  939         tlen = nfsm_rndup(cnt);
  940         if (len != tlen || tlen != cnt)
  941                 nfsm_adj(mb, len - tlen, tlen - cnt);
  942         if (v3) {
  943                 *tl++ = txdr_unsigned(cnt);
  944                 if (cnt < reqlen)
  945                         *tl++ = nfsrv_nfs_true;
  946                 else
  947                         *tl++ = nfsrv_nfs_false;
  948         }
  949         *tl = txdr_unsigned(cnt);
  950 nfsmout:
  951         if (vp)
  952                 vput(vp);
  953         return(error);
  954 }
  955 
  956 /*
  957  * nfs write service
  958  */
  959 int
  960 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
  961     struct mbuf **mrq)
  962 {
  963         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
  964         struct sockaddr *nam = nfsd->nd_nam;
  965         caddr_t dpos = nfsd->nd_dpos;
  966         struct ucred *cred = nfsd->nd_cr;
  967         struct iovec *ivp;
  968         int i, cnt;
  969         struct mbuf *mp;
  970         struct nfs_fattr *fp;
  971         struct iovec *iv;
  972         struct vattr va, forat;
  973         struct vattr *vap = &va;
  974         u_int32_t *tl;
  975         caddr_t bpos;
  976         int error = 0, rdonly, len, forat_ret = 1;
  977         int ioflags, aftat_ret = 1, retlen = 0, zeroing, adjust;
  978         int stable = NFSV3WRITE_FILESYNC;
  979         int v3 = (nfsd->nd_flag & ND_NFSV3);
  980         struct mbuf *mb, *mreq;
  981         struct vnode *vp = NULL;
  982         struct nfsheur *nh;
  983         nfsfh_t nfh;
  984         fhandle_t *fhp;
  985         struct uio io, *uiop = &io;
  986         off_t off;
  987         struct mount *mntp = NULL;
  988 
  989         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
  990         if (mrep == NULL) {
  991                 *mrq = NULL;
  992                 error = 0;
  993                 goto nfsmout;
  994         }
  995         fhp = &nfh.fh_generic;
  996         nfsm_srvmtofh(fhp);
  997         if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
  998                 error = ESTALE;
  999                 goto ereply;
 1000         }
 1001         (void) vn_start_write(NULL, &mntp, V_WAIT);
 1002         vfs_rel(mntp);          /* The write holds a ref. */
 1003         if (v3) {
 1004                 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
 1005                 off = fxdr_hyper(tl);
 1006                 tl += 3;
 1007                 stable = fxdr_unsigned(int, *tl++);
 1008         } else {
 1009                 tl = nfsm_dissect_nonblock(u_int32_t *, 4 * NFSX_UNSIGNED);
 1010                 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
 1011                 tl += 2;
 1012                 if (nfs_async)
 1013                         stable = NFSV3WRITE_UNSTABLE;
 1014         }
 1015         retlen = len = fxdr_unsigned(int32_t, *tl);
 1016         cnt = i = 0;
 1017 
 1018         /*
 1019          * For NFS Version 2, it is not obvious what a write of zero length
 1020          * should do, but I might as well be consistent with Version 3,
 1021          * which is to return ok so long as there are no permission problems.
 1022          */
 1023         if (len > 0) {
 1024             zeroing = 1;
 1025             mp = mrep;
 1026             while (mp) {
 1027                 if (mp == md) {
 1028                         zeroing = 0;
 1029                         adjust = dpos - mtod(mp, caddr_t);
 1030                         mp->m_len -= adjust;
 1031                         if (mp->m_len > 0 && adjust > 0)
 1032                                 mp->m_data += adjust;
 1033                 }
 1034                 if (zeroing)
 1035                         mp->m_len = 0;
 1036                 else if (mp->m_len > 0) {
 1037                         i += mp->m_len;
 1038                         if (i > len) {
 1039                                 mp->m_len -= (i - len);
 1040                                 zeroing = 1;
 1041                         }
 1042                         if (mp->m_len > 0)
 1043                                 cnt++;
 1044                 }
 1045                 mp = mp->m_next;
 1046             }
 1047         }
 1048         if (len > NFS_MAXDATA || len < 0 || i < len) {
 1049                 error = EIO;
 1050                 nfsm_reply(2 * NFSX_UNSIGNED);
 1051                 if (v3)
 1052                         nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1053                 error = 0;
 1054                 goto nfsmout;
 1055         }
 1056         error = nfsrv_fhtovp(fhp, 0, &vp, nfsd, slp, nam, &rdonly);
 1057         if (error) {
 1058                 vp = NULL;
 1059                 nfsm_reply(2 * NFSX_UNSIGNED);
 1060                 if (v3)
 1061                         nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1062                 error = 0;
 1063                 goto nfsmout;
 1064         }
 1065         if (v3)
 1066                 forat_ret = VOP_GETATTR(vp, &forat, cred);
 1067         if (vp->v_type != VREG) {
 1068                 if (v3)
 1069                         error = EINVAL;
 1070                 else
 1071                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
 1072         }
 1073         if (!error)
 1074                 error = nfsrv_access(vp, VWRITE, cred, rdonly, 1);
 1075         if (error) {
 1076                 vput(vp);
 1077                 vp = NULL;
 1078                 nfsm_reply(NFSX_WCCDATA(v3));
 1079                 if (v3)
 1080                         nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1081                 error = 0;
 1082                 goto nfsmout;
 1083         }
 1084 
 1085         if (len > 0) {
 1086             ivp = malloc(cnt * sizeof (struct iovec), M_TEMP,
 1087                 M_WAITOK);
 1088             uiop->uio_iov = iv = ivp;
 1089             uiop->uio_iovcnt = cnt;
 1090             mp = mrep;
 1091             while (mp) {
 1092                 if (mp->m_len > 0) {
 1093                         ivp->iov_base = mtod(mp, caddr_t);
 1094                         ivp->iov_len = mp->m_len;
 1095                         ivp++;
 1096                 }
 1097                 mp = mp->m_next;
 1098             }
 1099 
 1100             /*
 1101              * XXX
 1102              * The IO_METASYNC flag indicates that all metadata (and not just
 1103              * enough to ensure data integrity) mus be written to stable storage
 1104              * synchronously.
 1105              * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
 1106              */
 1107             if (stable == NFSV3WRITE_UNSTABLE)
 1108                 ioflags = IO_NODELOCKED;
 1109             else if (stable == NFSV3WRITE_DATASYNC)
 1110                 ioflags = (IO_SYNC | IO_NODELOCKED);
 1111             else
 1112                 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
 1113             uiop->uio_resid = len;
 1114             uiop->uio_rw = UIO_WRITE;
 1115             uiop->uio_segflg = UIO_SYSSPACE;
 1116             uiop->uio_td = NULL;
 1117             uiop->uio_offset = off;
 1118             nh = nfsrv_sequential_heuristic(uiop, vp);
 1119             ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
 1120             error = VOP_WRITE(vp, uiop, ioflags, cred);
 1121             if (error == 0)
 1122                     nh->nh_nextoff = uiop->uio_offset;
 1123             /* Unlocked write. */
 1124             nfsrvstats.srvvop_writes++;
 1125             free((caddr_t)iv, M_TEMP);
 1126         }
 1127         aftat_ret = VOP_GETATTR(vp, vap, cred);
 1128         vput(vp);
 1129         vp = NULL;
 1130         if (!error)
 1131                 error = aftat_ret;
 1132 ereply:
 1133         nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) +
 1134                 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3));
 1135         if (v3) {
 1136                 nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap);
 1137                 if (error) {
 1138                         error = 0;
 1139                         goto nfsmout;
 1140                 }
 1141                 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 1142                 *tl++ = txdr_unsigned(retlen);
 1143                 /*
 1144                  * If nfs_async is set, then pretend the write was FILESYNC.
 1145                  */
 1146                 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
 1147                         *tl++ = txdr_unsigned(stable);
 1148                 else
 1149                         *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
 1150                 /*
 1151                  * Actually, there is no need to txdr these fields,
 1152                  * but it may make the values more human readable,
 1153                  * for debugging purposes.
 1154                  */
 1155                 if (nfsver.tv_sec == 0)
 1156                         nfsver = boottime;
 1157                 *tl++ = txdr_unsigned(nfsver.tv_sec);
 1158                 *tl = txdr_unsigned(nfsver.tv_usec);
 1159         } else if (!error) {
 1160                 /* v2 non-error case. */
 1161                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 1162                 nfsm_srvfillattr(vap, fp);
 1163         }
 1164         error = 0;
 1165 nfsmout:
 1166         if (vp)
 1167                 vput(vp);
 1168         vn_finished_write(mntp);
 1169         return(error);
 1170 }
 1171 
 1172 /*
 1173  * nfs create service
 1174  * now does a truncate to 0 length via. setattr if it already exists
 1175  */
 1176 int
 1177 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1178     struct mbuf **mrq)
 1179 {
 1180         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1181         struct sockaddr *nam = nfsd->nd_nam;
 1182         caddr_t dpos = nfsd->nd_dpos;
 1183         struct ucred *cred = nfsd->nd_cr;
 1184         struct nfs_fattr *fp;
 1185         struct vattr va, dirfor, diraft;
 1186         struct vattr *vap = &va;
 1187         struct nfsv2_sattr *sp;
 1188         u_int32_t *tl;
 1189         struct nameidata nd;
 1190         caddr_t bpos;
 1191         int error = 0, rdev, len, tsize, dirfor_ret = 1, diraft_ret = 1;
 1192         int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0;
 1193         struct mbuf *mb, *mreq;
 1194         struct vnode *dirp = NULL;
 1195         nfsfh_t nfh;
 1196         fhandle_t *fhp;
 1197         u_quad_t tempsize;
 1198         struct timespec cverf;
 1199         struct mount *mp = NULL;
 1200 
 1201         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1202 #ifndef nolint
 1203         rdev = 0;
 1204 #endif
 1205         ndclear(&nd);
 1206 
 1207         fhp = &nfh.fh_generic;
 1208         nfsm_srvmtofh(fhp);
 1209         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 1210                 error = ESTALE;
 1211                 goto ereply;
 1212         }
 1213         (void) vn_start_write(NULL, &mp, V_WAIT);
 1214         vfs_rel(mp);            /* The write holds a ref. */
 1215         nfsm_srvnamesiz(len);
 1216 
 1217         nd.ni_cnd.cn_cred = cred;
 1218         nd.ni_cnd.cn_nameiop = CREATE;
 1219         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
 1220 
 1221         /*
 1222          * Call namei and do initial cleanup to get a few things
 1223          * out of the way.  If we get an initial error we cleanup
 1224          * and return here to avoid special-casing the invalid nd
 1225          * structure through the rest of the case.  dirp may be
 1226          * set even if an error occurs, but the nd structure will not
 1227          * be valid at all if an error occurs so we have to invalidate it
 1228          * prior to calling nfsm_reply ( which might goto nfsmout ).
 1229          */
 1230         error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
 1231                 &dirp, v3, &dirfor, &dirfor_ret, FALSE);
 1232         if (dirp && !v3) {
 1233                 vrele(dirp);
 1234                 dirp = NULL;
 1235         }
 1236         if (error) {
 1237                 nfsm_reply(NFSX_WCCDATA(v3));
 1238                 if (v3)
 1239                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1240                 error = 0;
 1241                 goto nfsmout;
 1242         }
 1243 
 1244         /*
 1245          * No error.  Continue.  State:
 1246          *
 1247          *      startdir        is valid ( we release this immediately )
 1248          *      dirp            may be valid
 1249          *      nd.ni_vp        may be valid
 1250          *      nd.ni_dvp       is valid
 1251          *
 1252          * The error state is set through the code and we may also do some
 1253          * opportunistic releasing of vnodes to avoid holding locks through
 1254          * NFS I/O.  The cleanup at the end is a catch-all
 1255          */
 1256 
 1257         VATTR_NULL(vap);
 1258         if (v3) {
 1259                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
 1260                 how = fxdr_unsigned(int, *tl);
 1261                 switch (how) {
 1262                 case NFSV3CREATE_GUARDED:
 1263                         if (nd.ni_vp) {
 1264                                 error = EEXIST;
 1265                                 break;
 1266                         }
 1267                         /* fall through */
 1268                 case NFSV3CREATE_UNCHECKED:
 1269                         nfsm_srvsattr(vap);
 1270                         break;
 1271                 case NFSV3CREATE_EXCLUSIVE:
 1272                         tl = nfsm_dissect_nonblock(u_int32_t *,
 1273                             NFSX_V3CREATEVERF);
 1274                         /* Unique bytes, endianness is not important. */
 1275                         cverf.tv_sec  = (int32_t)tl[0];
 1276                         cverf.tv_nsec = tl[1];
 1277                         exclusive_flag = 1;
 1278                         break;
 1279                 };
 1280                 vap->va_type = VREG;
 1281         } else {
 1282                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
 1283                 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
 1284                 if (vap->va_type == VNON)
 1285                         vap->va_type = VREG;
 1286                 vap->va_mode = nfstov_mode(sp->sa_mode);
 1287                 switch (vap->va_type) {
 1288                 case VREG:
 1289                         tsize = fxdr_unsigned(int32_t, sp->sa_size);
 1290                         if (tsize != -1)
 1291                                 vap->va_size = (u_quad_t)tsize;
 1292                         break;
 1293                 case VCHR:
 1294                 case VBLK:
 1295                 case VFIFO:
 1296                         rdev = fxdr_unsigned(long, sp->sa_size);
 1297                         break;
 1298                 default:
 1299                         break;
 1300                 };
 1301         }
 1302 
 1303         /*
 1304          * Iff doesn't exist, create it
 1305          * otherwise just truncate to 0 length
 1306          *   should I set the mode too ?
 1307          *
 1308          * The only possible error we can have at this point is EEXIST.
 1309          * nd.ni_vp will also be non-NULL in that case.
 1310          */
 1311         if (nd.ni_vp == NULL) {
 1312                 if (vap->va_mode == (mode_t)VNOVAL)
 1313                         vap->va_mode = 0;
 1314                 if (vap->va_type == VREG || vap->va_type == VSOCK) {
 1315                         error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 1316                         if (error)
 1317                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1318                         else {
 1319                                 if (exclusive_flag) {
 1320                                         exclusive_flag = 0;
 1321                                         VATTR_NULL(vap);
 1322                                         vap->va_atime = cverf;
 1323                                         error = VOP_SETATTR(nd.ni_vp, vap,
 1324                                             cred);
 1325                                 }
 1326                         }
 1327                 } else if (vap->va_type == VCHR || vap->va_type == VBLK ||
 1328                     vap->va_type == VFIFO) {
 1329                         /*
 1330                          * NFSv2-specific code for creating device nodes
 1331                          * and fifos.
 1332                          *
 1333                          * Handle SysV FIFO node special cases.  All other
 1334                          * devices require super user to access.
 1335                          */
 1336                         if (vap->va_type == VCHR && rdev == 0xffffffff)
 1337                                 vap->va_type = VFIFO;
 1338                         if (vap->va_type != VFIFO &&
 1339                             (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV,
 1340                             0))) {
 1341                                 goto ereply;
 1342                         }
 1343                         vap->va_rdev = rdev;
 1344                         error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 1345                         if (error) {
 1346                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 1347                                 goto ereply;
 1348                         }
 1349                         vput(nd.ni_vp);
 1350                         nd.ni_vp = NULL;
 1351 
 1352                         /*
 1353                          * release dvp prior to lookup
 1354                          */
 1355                         vput(nd.ni_dvp);
 1356                         nd.ni_dvp = NULL;
 1357                         /*
 1358                          * Setup for lookup.
 1359                          *
 1360                          * Even though LOCKPARENT was cleared, ni_dvp may
 1361                          * be garbage.
 1362                          */
 1363                         nd.ni_cnd.cn_nameiop = LOOKUP;
 1364                         nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
 1365                         nd.ni_cnd.cn_thread = curthread;
 1366                         nd.ni_cnd.cn_cred = cred;
 1367                         error = lookup(&nd);
 1368                         nd.ni_dvp = NULL;
 1369                         if (error)
 1370                                 goto ereply;
 1371 
 1372                         if (nd.ni_cnd.cn_flags & ISSYMLINK) {
 1373                                 error = EINVAL;
 1374                                 goto ereply;
 1375                         }
 1376                 } else {
 1377                         error = ENXIO;
 1378                 }
 1379         } else {
 1380                 if (vap->va_size != -1) {
 1381                         error = nfsrv_access(nd.ni_vp, VWRITE,
 1382                             cred, (nd.ni_cnd.cn_flags & RDONLY), 0);
 1383                         if (!error) {
 1384                                 tempsize = vap->va_size;
 1385                                 VATTR_NULL(vap);
 1386                                 vap->va_size = tempsize;
 1387                                 error = VOP_SETATTR(nd.ni_vp, vap, cred);
 1388                         }
 1389                 }
 1390         }
 1391 
 1392         if (!error) {
 1393                 bzero((caddr_t)fhp, sizeof(nfh));
 1394                 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
 1395                 error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
 1396                 if (!error)
 1397                         error = VOP_GETATTR(nd.ni_vp, vap, cred);
 1398         }
 1399         if (v3) {
 1400                 if (exclusive_flag && !error &&
 1401                     bcmp(&cverf, &vap->va_atime, sizeof (cverf)))
 1402                         error = EEXIST;
 1403                 if (dirp == nd.ni_dvp)
 1404                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
 1405                 else {
 1406                         /* Drop the other locks to avoid deadlock. */
 1407                         if (nd.ni_dvp) {
 1408                                 if (nd.ni_dvp == nd.ni_vp)
 1409                                         vrele(nd.ni_dvp);
 1410                                 else
 1411                                         vput(nd.ni_dvp);
 1412                         }
 1413                         if (nd.ni_vp)
 1414                                 vput(nd.ni_vp);
 1415                         nd.ni_dvp = NULL;
 1416                         nd.ni_vp = NULL;
 1417 
 1418                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
 1419                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
 1420                         VOP_UNLOCK(dirp, 0);
 1421                 }
 1422         }
 1423 ereply:
 1424         nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3));
 1425         if (v3) {
 1426                 if (!error) {
 1427                         nfsm_srvpostop_fh(fhp);
 1428                         nfsm_srvpostop_attr(0, vap);
 1429                 }
 1430                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1431         } else if (!error) {
 1432                 /* v2 non-error case. */
 1433                 nfsm_srvfhtom(fhp, v3);
 1434                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 1435                 nfsm_srvfillattr(vap, fp);
 1436         }
 1437         error = 0;
 1438 
 1439 nfsmout:
 1440         if (nd.ni_dvp) {
 1441                 if (nd.ni_dvp == nd.ni_vp)
 1442                         vrele(nd.ni_dvp);
 1443                 else
 1444                         vput(nd.ni_dvp);
 1445         }
 1446         if (nd.ni_vp)
 1447                 vput(nd.ni_vp);
 1448         if (nd.ni_startdir) {
 1449                 vrele(nd.ni_startdir);
 1450                 nd.ni_startdir = NULL;
 1451         }
 1452         if (dirp)
 1453                 vrele(dirp);
 1454         NDFREE(&nd, NDF_ONLY_PNBUF);
 1455         vn_finished_write(mp);
 1456         return (error);
 1457 }
 1458 
 1459 /*
 1460  * nfs v3 mknod service
 1461  */
 1462 int
 1463 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1464     struct mbuf **mrq)
 1465 {
 1466         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1467         struct sockaddr *nam = nfsd->nd_nam;
 1468         caddr_t dpos = nfsd->nd_dpos;
 1469         struct ucred *cred = nfsd->nd_cr;
 1470         struct vattr va, dirfor, diraft;
 1471         struct vattr *vap = &va;
 1472         struct thread *td = curthread;
 1473         u_int32_t *tl;
 1474         struct nameidata nd;
 1475         caddr_t bpos;
 1476         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 1477         u_int32_t major, minor;
 1478         enum vtype vtyp;
 1479         struct mbuf *mb, *mreq;
 1480         struct vnode *vp, *dirp = NULL;
 1481         nfsfh_t nfh;
 1482         fhandle_t *fhp;
 1483         struct mount *mp = NULL;
 1484         int v3 = (nfsd->nd_flag & ND_NFSV3);
 1485 
 1486         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1487         if (!v3)
 1488                 panic("nfsrv_mknod: v3 proc called on a v2 connection");
 1489         ndclear(&nd);
 1490 
 1491         fhp = &nfh.fh_generic;
 1492         nfsm_srvmtofh(fhp);
 1493         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 1494                 error = ESTALE;
 1495                 goto ereply;
 1496         }
 1497         (void) vn_start_write(NULL, &mp, V_WAIT);
 1498         vfs_rel(mp);            /* The write holds a ref. */
 1499         nfsm_srvnamesiz(len);
 1500 
 1501         nd.ni_cnd.cn_cred = cred;
 1502         nd.ni_cnd.cn_nameiop = CREATE;
 1503         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
 1504 
 1505         /*
 1506          * Handle nfs_namei() call.  If an error occurs, the nd structure
 1507          * is not valid.  However, nfsm_*() routines may still jump to
 1508          * nfsmout.
 1509          */
 1510 
 1511         error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
 1512                 &dirp, v3, &dirfor, &dirfor_ret, FALSE);
 1513         if (error) {
 1514                 nfsm_reply(NFSX_WCCDATA(1));
 1515                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1516                 error = 0;
 1517                 goto nfsmout;
 1518         }
 1519         tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
 1520         vtyp = nfsv3tov_type(*tl);
 1521         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
 1522                 error = NFSERR_BADTYPE;
 1523                 goto out;
 1524         }
 1525         VATTR_NULL(vap);
 1526         nfsm_srvsattr(vap);
 1527         if (vtyp == VCHR || vtyp == VBLK) {
 1528                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
 1529                 major = fxdr_unsigned(u_int32_t, *tl++);
 1530                 minor = fxdr_unsigned(u_int32_t, *tl);
 1531                 vap->va_rdev = makedev(major, minor);
 1532         }
 1533 
 1534         /*
 1535          * Iff doesn't exist, create it.
 1536          */
 1537         if (nd.ni_vp) {
 1538                 error = EEXIST;
 1539                 goto out;
 1540         }
 1541         vap->va_type = vtyp;
 1542         if (vap->va_mode == (mode_t)VNOVAL)
 1543                 vap->va_mode = 0;
 1544         if (vtyp == VSOCK) {
 1545                 vrele(nd.ni_startdir);
 1546                 nd.ni_startdir = NULL;
 1547                 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 1548                 if (error)
 1549                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1550         } else {
 1551                 if (vtyp != VFIFO && (error = priv_check_cred(cred,
 1552                     PRIV_VFS_MKNOD_DEV, 0)))
 1553                         goto out;
 1554                 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 1555                 if (error) {
 1556                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1557                         goto out;
 1558                 }
 1559                 vput(nd.ni_vp);
 1560                 nd.ni_vp = NULL;
 1561 
 1562                 /*
 1563                  * Release dvp prior to lookup
 1564                  */
 1565                 vput(nd.ni_dvp);
 1566                 nd.ni_dvp = NULL;
 1567 
 1568                 nd.ni_cnd.cn_nameiop = LOOKUP;
 1569                 nd.ni_cnd.cn_flags &= ~(LOCKPARENT);
 1570                 nd.ni_cnd.cn_thread = td;
 1571                 nd.ni_cnd.cn_cred = td->td_ucred;
 1572                 error = lookup(&nd);
 1573                 nd.ni_dvp = NULL;
 1574 
 1575                 if (error)
 1576                         goto out;
 1577                 if (nd.ni_cnd.cn_flags & ISSYMLINK)
 1578                         error = EINVAL;
 1579         }
 1580 
 1581         /*
 1582          * send response, cleanup, return.
 1583          */
 1584 out:
 1585         vp = nd.ni_vp;
 1586         if (!error) {
 1587                 bzero((caddr_t)fhp, sizeof(nfh));
 1588                 fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 1589                 error = VOP_VPTOFH(vp, &fhp->fh_fid);
 1590                 if (!error)
 1591                         error = VOP_GETATTR(vp, vap, cred);
 1592         }
 1593         if (nd.ni_dvp) {
 1594                 if (nd.ni_dvp == nd.ni_vp)
 1595                         vrele(nd.ni_dvp);
 1596                 else
 1597                         vput(nd.ni_dvp);
 1598                 nd.ni_dvp = NULL;
 1599         }
 1600         if (vp) {
 1601                 vput(vp);
 1602                 vp = NULL;
 1603                 nd.ni_vp = NULL;
 1604         }
 1605         if (nd.ni_startdir) {
 1606                 vrele(nd.ni_startdir);
 1607                 nd.ni_startdir = NULL;
 1608         }
 1609         NDFREE(&nd, NDF_ONLY_PNBUF);
 1610         if (dirp) {
 1611                 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
 1612                 diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
 1613                 vput(dirp);
 1614         }
 1615 ereply:
 1616         nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1));
 1617         if (v3) {
 1618                 if (!error) {
 1619                         nfsm_srvpostop_fh(fhp);
 1620                         nfsm_srvpostop_attr(0, vap);
 1621                 }
 1622                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1623         }
 1624         vn_finished_write(mp);
 1625         return (0);
 1626 nfsmout:
 1627         if (nd.ni_dvp) {
 1628                 if (nd.ni_dvp == nd.ni_vp)
 1629                         vrele(nd.ni_dvp);
 1630                 else
 1631                         vput(nd.ni_dvp);
 1632         }
 1633         if (nd.ni_vp)
 1634                 vput(nd.ni_vp);
 1635         if (dirp)
 1636                 vrele(dirp);
 1637         if (nd.ni_startdir)
 1638                 vrele(nd.ni_startdir);
 1639         NDFREE(&nd, NDF_ONLY_PNBUF);
 1640         vn_finished_write(mp);
 1641         return (error);
 1642 }
 1643 
 1644 /*
 1645  * nfs remove service
 1646  */
 1647 int
 1648 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1649     struct mbuf **mrq)
 1650 {
 1651         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1652         struct sockaddr *nam = nfsd->nd_nam;
 1653         caddr_t dpos = nfsd->nd_dpos;
 1654         struct ucred *cred = nfsd->nd_cr;
 1655         struct nameidata nd;
 1656         caddr_t bpos;
 1657         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 1658         int v3 = (nfsd->nd_flag & ND_NFSV3);
 1659         struct mbuf *mb, *mreq;
 1660         struct vnode *dirp;
 1661         struct vattr dirfor, diraft;
 1662         nfsfh_t nfh;
 1663         fhandle_t *fhp;
 1664         struct mount *mp = NULL;
 1665 
 1666         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1667         ndclear(&nd);
 1668 
 1669         fhp = &nfh.fh_generic;
 1670         nfsm_srvmtofh(fhp);
 1671         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 1672                 error = ESTALE;
 1673                 goto ereply;
 1674         }
 1675         (void) vn_start_write(NULL, &mp, V_WAIT);
 1676         vfs_rel(mp);            /* The write holds a ref. */
 1677         nfsm_srvnamesiz(len);
 1678 
 1679         nd.ni_cnd.cn_cred = cred;
 1680         nd.ni_cnd.cn_nameiop = DELETE;
 1681         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
 1682         error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
 1683                 &dirp, v3,  &dirfor, &dirfor_ret, FALSE);
 1684         if (dirp && !v3) {
 1685                 vrele(dirp);
 1686                 dirp = NULL;
 1687         }
 1688         if (error == 0) {
 1689                 if (nd.ni_vp->v_type == VDIR) {
 1690                         error = EPERM;          /* POSIX */
 1691                         goto out;
 1692                 }
 1693                 /*
 1694                  * The root of a mounted filesystem cannot be deleted.
 1695                  */
 1696                 if (nd.ni_vp->v_vflag & VV_ROOT) {
 1697                         error = EBUSY;
 1698                         goto out;
 1699                 }
 1700 out:
 1701                 if (!error) {
 1702                         error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 1703                         NDFREE(&nd, NDF_ONLY_PNBUF);
 1704                 }
 1705         }
 1706         if (dirp && v3) {
 1707                 if (dirp == nd.ni_dvp)
 1708                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
 1709                 else {
 1710                         /* Drop the other locks to avoid deadlock. */
 1711                         if (nd.ni_dvp) {
 1712                                 if (nd.ni_dvp == nd.ni_vp)
 1713                                         vrele(nd.ni_dvp);
 1714                                 else
 1715                                         vput(nd.ni_dvp);
 1716                         }
 1717                         if (nd.ni_vp)
 1718                                 vput(nd.ni_vp);
 1719                         nd.ni_dvp = NULL;
 1720                         nd.ni_vp = NULL;
 1721 
 1722                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
 1723                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
 1724                         VOP_UNLOCK(dirp, 0);
 1725                 }
 1726                 vrele(dirp);
 1727                 dirp = NULL;
 1728         }
 1729 ereply:
 1730         nfsm_reply(NFSX_WCCDATA(v3));
 1731         if (v3)
 1732                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 1733         error = 0;
 1734 nfsmout:
 1735         NDFREE(&nd, NDF_ONLY_PNBUF);
 1736         if (nd.ni_dvp) {
 1737                 if (nd.ni_dvp == nd.ni_vp)
 1738                         vrele(nd.ni_dvp);
 1739                 else
 1740                         vput(nd.ni_dvp);
 1741         }
 1742         if (nd.ni_vp)
 1743                 vput(nd.ni_vp);
 1744         vn_finished_write(mp);
 1745         return(error);
 1746 }
 1747 
 1748 /*
 1749  * nfs rename service
 1750  */
 1751 int
 1752 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1753     struct mbuf **mrq)
 1754 {
 1755         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1756         struct sockaddr *nam = nfsd->nd_nam;
 1757         caddr_t dpos = nfsd->nd_dpos;
 1758         struct ucred *cred = nfsd->nd_cr;
 1759         caddr_t bpos;
 1760         int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
 1761         int tdirfor_ret = 1, tdiraft_ret = 1;
 1762         int v3 = (nfsd->nd_flag & ND_NFSV3);
 1763         struct mbuf *mb, *mreq;
 1764         struct nameidata fromnd, tond;
 1765         struct vnode *fvp, *tvp, *tdvp, *fdirp = NULL;
 1766         struct vnode *tdirp = NULL;
 1767         struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
 1768         nfsfh_t fnfh, tnfh;
 1769         fhandle_t *ffhp, *tfhp;
 1770         uid_t saved_uid;
 1771         struct mount *mp = NULL;
 1772 
 1773         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1774 #ifndef nolint
 1775         fvp = NULL;
 1776 #endif
 1777         ffhp = &fnfh.fh_generic;
 1778         tfhp = &tnfh.fh_generic;
 1779 
 1780         /*
 1781          * Clear fields incase goto nfsmout occurs from macro.
 1782          */
 1783 
 1784         ndclear(&fromnd);
 1785         ndclear(&tond);
 1786 
 1787         nfsm_srvmtofh(ffhp);
 1788         if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL) {
 1789                 error = ESTALE;
 1790                 goto out1;
 1791         }
 1792         (void) vn_start_write(NULL, &mp, V_WAIT);
 1793         vfs_rel(mp);            /* The write holds a ref. */
 1794         nfsm_srvnamesiz(len);
 1795         /*
 1796          * Remember our original uid so that we can reset cr_uid before
 1797          * the second nfs_namei() call, in case it is remapped.
 1798          */
 1799         saved_uid = cred->cr_uid;
 1800         fromnd.ni_cnd.cn_cred = cred;
 1801         fromnd.ni_cnd.cn_nameiop = DELETE;
 1802         fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART;
 1803         error = nfs_namei(&fromnd, nfsd, ffhp, len, slp, nam, &md,
 1804                 &dpos, &fdirp, v3, &fdirfor, &fdirfor_ret, FALSE);
 1805         if (fdirp && !v3) {
 1806                 vrele(fdirp);
 1807                 fdirp = NULL;
 1808         }
 1809         if (error) {
 1810                 nfsm_reply(2 * NFSX_WCCDATA(v3));
 1811                 if (v3) {
 1812                         nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
 1813                         nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
 1814                 }
 1815                 error = 0;
 1816                 goto nfsmout;
 1817         }
 1818         fvp = fromnd.ni_vp;
 1819         nfsm_srvmtofh(tfhp);
 1820         nfsm_srvnamesiz(len2);
 1821         cred->cr_uid = saved_uid;
 1822         tond.ni_cnd.cn_cred = cred;
 1823         tond.ni_cnd.cn_nameiop = RENAME;
 1824         tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
 1825         error = nfs_namei(&tond, nfsd, tfhp, len2, slp, nam, &md,
 1826                 &dpos, &tdirp, v3, &tdirfor, &tdirfor_ret, FALSE);
 1827         if (tdirp && !v3) {
 1828                 vrele(tdirp);
 1829                 tdirp = NULL;
 1830         }
 1831         if (error)
 1832                 goto out1;
 1833 
 1834         tdvp = tond.ni_dvp;
 1835         tvp = tond.ni_vp;
 1836         if (tvp != NULL) {
 1837                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 1838                         if (v3)
 1839                                 error = EEXIST;
 1840                         else
 1841                                 error = EISDIR;
 1842                         goto out;
 1843                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 1844                         if (v3)
 1845                                 error = EEXIST;
 1846                         else
 1847                                 error = ENOTDIR;
 1848                         goto out;
 1849                 }
 1850                 if (tvp->v_type == VDIR && tvp->v_mountedhere) {
 1851                         if (v3)
 1852                                 error = EXDEV;
 1853                         else
 1854                                 error = ENOTEMPTY;
 1855                         goto out;
 1856                 }
 1857         }
 1858         if (fvp->v_type == VDIR && fvp->v_mountedhere) {
 1859                 if (v3)
 1860                         error = EXDEV;
 1861                 else
 1862                         error = ENOTEMPTY;
 1863                 goto out;
 1864         }
 1865         if (fvp->v_mount != tdvp->v_mount) {
 1866                 if (v3)
 1867                         error = EXDEV;
 1868                 else
 1869                         error = ENOTEMPTY;
 1870                 goto out;
 1871         }
 1872         if (fvp == tdvp) {
 1873                 if (v3)
 1874                         error = EINVAL;
 1875                 else
 1876                         error = ENOTEMPTY;
 1877         }
 1878         /*
 1879          * If source is the same as the destination (that is the
 1880          * same vnode with the same name in the same directory),
 1881          * then there is nothing to do.
 1882          */
 1883         if (fvp == tvp && fromnd.ni_dvp == tdvp &&
 1884             fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
 1885             !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
 1886               fromnd.ni_cnd.cn_namelen))
 1887                 error = -1;
 1888 out:
 1889         if (!error) {
 1890                 /*
 1891                  * The VOP_RENAME function releases all vnode references &
 1892                  * locks prior to returning so we need to clear the pointers
 1893                  * to bypass cleanup code later on.
 1894                  */
 1895                 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 1896                                    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 1897                 fromnd.ni_dvp = NULL;
 1898                 fromnd.ni_vp = NULL;
 1899                 tond.ni_dvp = NULL;
 1900                 tond.ni_vp = NULL;
 1901                 if (error) {
 1902                         NDFREE(&fromnd, NDF_ONLY_PNBUF);
 1903                         NDFREE(&tond, NDF_ONLY_PNBUF);
 1904                 }
 1905         } else {
 1906                 if (error == -1)
 1907                         error = 0;
 1908         }
 1909         /* fall through */
 1910 out1:
 1911         nfsm_reply(2 * NFSX_WCCDATA(v3));
 1912         if (v3) {
 1913                 /* Release existing locks to prevent deadlock. */
 1914                 if (tond.ni_dvp) {
 1915                         if (tond.ni_dvp == tond.ni_vp)
 1916                                 vrele(tond.ni_dvp);
 1917                         else
 1918                                 vput(tond.ni_dvp);
 1919                 }
 1920                 if (tond.ni_vp)
 1921                         vput(tond.ni_vp);
 1922                 tond.ni_dvp = NULL;
 1923                 tond.ni_vp = NULL;
 1924 
 1925                 if (fdirp) {
 1926                         vn_lock(fdirp, LK_EXCLUSIVE | LK_RETRY);
 1927                         fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred);
 1928                         VOP_UNLOCK(fdirp, 0);
 1929                 }
 1930                 if (tdirp) {
 1931                         vn_lock(tdirp, LK_EXCLUSIVE | LK_RETRY);
 1932                         tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred);
 1933                         VOP_UNLOCK(tdirp, 0);
 1934                 }
 1935                 nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
 1936                 nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
 1937         }
 1938         error = 0;
 1939         /* fall through */
 1940 
 1941 nfsmout:
 1942         /*
 1943          * Clear out tond related fields
 1944          */
 1945         if (tond.ni_dvp) {
 1946                 if (tond.ni_dvp == tond.ni_vp)
 1947                         vrele(tond.ni_dvp);
 1948                 else
 1949                         vput(tond.ni_dvp);
 1950         }
 1951         if (tond.ni_vp)
 1952                 vput(tond.ni_vp);
 1953         if (tdirp)
 1954                 vrele(tdirp);
 1955         if (tond.ni_startdir)
 1956                 vrele(tond.ni_startdir);
 1957         NDFREE(&tond, NDF_ONLY_PNBUF);
 1958         /*
 1959          * Clear out fromnd related fields
 1960          */
 1961         if (fdirp)
 1962                 vrele(fdirp);
 1963         if (fromnd.ni_startdir)
 1964                 vrele(fromnd.ni_startdir);
 1965         NDFREE(&fromnd, NDF_ONLY_PNBUF);
 1966         if (fromnd.ni_dvp)
 1967                 vrele(fromnd.ni_dvp);
 1968         if (fromnd.ni_vp)
 1969                 vrele(fromnd.ni_vp);
 1970 
 1971         vn_finished_write(mp);
 1972         return (error);
 1973 }
 1974 
 1975 /*
 1976  * nfs link service
 1977  */
 1978 int
 1979 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 1980     struct mbuf **mrq)
 1981 {
 1982         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 1983         struct sockaddr *nam = nfsd->nd_nam;
 1984         caddr_t dpos = nfsd->nd_dpos;
 1985         struct ucred *cred = nfsd->nd_cr;
 1986         struct nameidata nd;
 1987         caddr_t bpos;
 1988         int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
 1989         int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3);
 1990         struct mbuf *mb, *mreq;
 1991         struct vnode *vp = NULL, *xp, *dirp = NULL;
 1992         struct vattr dirfor, diraft, at;
 1993         nfsfh_t nfh, dnfh;
 1994         fhandle_t *fhp, *dfhp;
 1995         struct mount *mp = NULL;
 1996 
 1997         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 1998         ndclear(&nd);
 1999 
 2000         fhp = &nfh.fh_generic;
 2001         dfhp = &dnfh.fh_generic;
 2002         nfsm_srvmtofh(fhp);
 2003         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2004                 error = ESTALE;
 2005                 goto ereply;
 2006         }
 2007         (void) vn_start_write(NULL, &mp, V_WAIT);
 2008         vfs_rel(mp);            /* The write holds a ref. */
 2009         nfsm_srvmtofh(dfhp);
 2010         nfsm_srvnamesiz(len);
 2011 
 2012         error = nfsrv_fhtovp(fhp, 0, &vp, nfsd, slp, nam, &rdonly);
 2013         if (error) {
 2014                 nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2015                 if (v3) {
 2016                         nfsm_srvpostop_attr(getret, &at);
 2017                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2018                 }
 2019                 vp = NULL;
 2020                 error = 0;
 2021                 goto nfsmout;
 2022         }
 2023         if (v3)
 2024                 getret = VOP_GETATTR(vp, &at, cred);
 2025         if (vp->v_type == VDIR) {
 2026                 error = EPERM;          /* POSIX */
 2027                 goto out1;
 2028         }
 2029         VOP_UNLOCK(vp, 0);
 2030         nd.ni_cnd.cn_cred = cred;
 2031         nd.ni_cnd.cn_nameiop = CREATE;
 2032         nd.ni_cnd.cn_flags = LOCKPARENT;
 2033         error = nfs_namei(&nd, nfsd, dfhp, len, slp, nam, &md, &dpos,
 2034                 &dirp, v3, &dirfor, &dirfor_ret, FALSE);
 2035         if (dirp && !v3) {
 2036                 vrele(dirp);
 2037                 dirp = NULL;
 2038         }
 2039         if (error) {
 2040                 vrele(vp);
 2041                 vp = NULL;
 2042                 goto out2;
 2043         }
 2044         xp = nd.ni_vp;
 2045         if (xp != NULL) {
 2046                 error = EEXIST;
 2047                 vrele(vp);
 2048                 vp = NULL;
 2049                 goto out2;
 2050         }
 2051         xp = nd.ni_dvp;
 2052         if (vp->v_mount != xp->v_mount) {
 2053                 error = EXDEV;
 2054                 vrele(vp);
 2055                 vp = NULL;
 2056                 goto out2;
 2057         }
 2058         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 2059         error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 2060         NDFREE(&nd, NDF_ONLY_PNBUF);
 2061         /* fall through */
 2062 
 2063 out1:
 2064         if (v3)
 2065                 getret = VOP_GETATTR(vp, &at, cred);
 2066 out2:
 2067         if (dirp) {
 2068                 if (dirp == nd.ni_dvp)
 2069                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
 2070                 else {
 2071                         /* Release existing locks to prevent deadlock. */
 2072                         if (nd.ni_dvp) {
 2073                                 if (nd.ni_dvp == nd.ni_vp)
 2074                                         vrele(nd.ni_dvp);
 2075                                 else
 2076                                         vput(nd.ni_dvp);
 2077                         }
 2078                         if (nd.ni_vp)
 2079                                 vrele(nd.ni_vp);
 2080                         nd.ni_dvp = NULL;
 2081                         nd.ni_vp = NULL;
 2082 
 2083                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
 2084                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
 2085                         VOP_UNLOCK(dirp, 0);
 2086                 }
 2087         }
 2088 ereply:
 2089         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2090         if (v3) {
 2091                 nfsm_srvpostop_attr(getret, &at);
 2092                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2093         }
 2094         error = 0;
 2095         /* fall through */
 2096 
 2097 nfsmout:
 2098         NDFREE(&nd, NDF_ONLY_PNBUF);
 2099         if (vp)
 2100                 vput(vp);
 2101         if (nd.ni_dvp) {
 2102                 if (nd.ni_dvp == nd.ni_vp)
 2103                         vrele(nd.ni_dvp);
 2104                 else
 2105                         vput(nd.ni_dvp);
 2106         }
 2107         if (dirp)
 2108                 vrele(dirp);
 2109         if (nd.ni_vp)
 2110                 vrele(nd.ni_vp);
 2111         vn_finished_write(mp);
 2112         return(error);
 2113 }
 2114 
 2115 /*
 2116  * nfs symbolic link service
 2117  */
 2118 int
 2119 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2120     struct mbuf **mrq)
 2121 {
 2122         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2123         struct sockaddr *nam = nfsd->nd_nam;
 2124         caddr_t dpos = nfsd->nd_dpos;
 2125         struct ucred *cred = nfsd->nd_cr;
 2126         struct vattr va, dirfor, diraft;
 2127         struct nameidata nd;
 2128         struct vattr *vap = &va;
 2129         struct nfsv2_sattr *sp;
 2130         char *bpos, *pathcp = NULL;
 2131         struct uio io;
 2132         struct iovec iv;
 2133         int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
 2134         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2135         struct mbuf *mb, *mreq;
 2136         struct vnode *dirp = NULL;
 2137         nfsfh_t nfh;
 2138         fhandle_t *fhp;
 2139         struct mount *mp = NULL;
 2140 
 2141         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2142         ndclear(&nd);
 2143 
 2144         fhp = &nfh.fh_generic;
 2145         nfsm_srvmtofh(fhp);
 2146         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2147                 error = ESTALE;
 2148                 goto out;
 2149         }
 2150         (void) vn_start_write(NULL, &mp, V_WAIT);
 2151         vfs_rel(mp);            /* The write holds a ref. */
 2152         nfsm_srvnamesiz(len);
 2153         nd.ni_cnd.cn_cred = cred;
 2154         nd.ni_cnd.cn_nameiop = CREATE;
 2155         nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART;
 2156         error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
 2157                 &dirp, v3, &dirfor, &dirfor_ret, FALSE);
 2158         if (error == 0) {
 2159                 VATTR_NULL(vap);
 2160                 if (v3)
 2161                         nfsm_srvsattr(vap);
 2162                 nfsm_srvpathsiz(len2);
 2163         }
 2164         if (dirp && !v3) {
 2165                 vrele(dirp);
 2166                 dirp = NULL;
 2167         }
 2168         if (error)
 2169                 goto out;
 2170         pathcp = malloc(len2 + 1, M_TEMP, M_WAITOK);
 2171         iv.iov_base = pathcp;
 2172         iv.iov_len = len2;
 2173         io.uio_resid = len2;
 2174         io.uio_offset = 0;
 2175         io.uio_iov = &iv;
 2176         io.uio_iovcnt = 1;
 2177         io.uio_segflg = UIO_SYSSPACE;
 2178         io.uio_rw = UIO_READ;
 2179         io.uio_td = NULL;
 2180         nfsm_mtouio(&io, len2);
 2181         if (!v3) {
 2182                 sp = nfsm_dissect_nonblock(struct nfsv2_sattr *, NFSX_V2SATTR);
 2183                 vap->va_mode = nfstov_mode(sp->sa_mode);
 2184         }
 2185         *(pathcp + len2) = '\0';
 2186         if (nd.ni_vp) {
 2187                 error = EEXIST;
 2188                 goto out;
 2189         }
 2190 
 2191         /*
 2192          * issue symlink op.  SAVESTART is set so the underlying path component
 2193          * is only freed by the VOP if an error occurs.
 2194          */
 2195         if (vap->va_mode == (mode_t)VNOVAL)
 2196                 vap->va_mode = 0;
 2197         error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
 2198         if (error)
 2199                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2200         else
 2201                 vput(nd.ni_vp);
 2202         nd.ni_vp = NULL;
 2203         /*
 2204          * releases directory prior to potential lookup op.
 2205          */
 2206         vput(nd.ni_dvp);
 2207         nd.ni_dvp = NULL;
 2208 
 2209         if (error == 0) {
 2210             if (v3) {
 2211                 /*
 2212                  * Issue lookup.  Leave SAVESTART set so we can easily free
 2213                  * the name buffer later on.
 2214                  *
 2215                  * since LOCKPARENT is not set, ni_dvp will be garbage on
 2216                  * return whether an error occurs or not.
 2217                  */
 2218                 nd.ni_cnd.cn_nameiop = LOOKUP;
 2219                 nd.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW);
 2220                 nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF);
 2221                 nd.ni_cnd.cn_thread = curthread;
 2222                 nd.ni_cnd.cn_cred = cred;
 2223                 error = lookup(&nd);
 2224                 nd.ni_dvp = NULL;
 2225 
 2226                 if (error == 0) {
 2227                         bzero((caddr_t)fhp, sizeof(nfh));
 2228                         fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
 2229                         error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
 2230                         if (!error)
 2231                                 error = VOP_GETATTR(nd.ni_vp, vap, cred);
 2232                         vput(nd.ni_vp);
 2233                         nd.ni_vp = NULL;
 2234                 }
 2235             }
 2236         }
 2237 out:
 2238         /*
 2239          * These releases aren't strictly required, does even doing them
 2240          * make any sense? XXX can nfsm_reply() block?
 2241          */
 2242         if (pathcp) {
 2243                 free(pathcp, M_TEMP);
 2244                 pathcp = NULL;
 2245         }
 2246         if (dirp) {
 2247                 vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
 2248                 diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
 2249                 VOP_UNLOCK(dirp, 0);
 2250         }
 2251         if (nd.ni_startdir) {
 2252                 vrele(nd.ni_startdir);
 2253                 nd.ni_startdir = NULL;
 2254         }
 2255         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2256         if (v3) {
 2257                 if (!error) {
 2258                         nfsm_srvpostop_fh(fhp);
 2259                         nfsm_srvpostop_attr(0, vap);
 2260                 }
 2261                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2262         }
 2263         error = 0;
 2264         /* fall through */
 2265 
 2266 nfsmout:
 2267         NDFREE(&nd, NDF_ONLY_PNBUF);
 2268         if (nd.ni_dvp) {
 2269                 if (nd.ni_dvp == nd.ni_vp)
 2270                         vrele(nd.ni_dvp);
 2271                 else
 2272                         vput(nd.ni_dvp);
 2273         }
 2274         if (nd.ni_vp)
 2275                 vrele(nd.ni_vp);
 2276         if (nd.ni_startdir)
 2277                 vrele(nd.ni_startdir);
 2278         if (dirp)
 2279                 vrele(dirp);
 2280         if (pathcp)
 2281                 free(pathcp, M_TEMP);
 2282 
 2283         vn_finished_write(mp);
 2284         return (error);
 2285 }
 2286 
 2287 /*
 2288  * nfs mkdir service
 2289  */
 2290 int
 2291 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2292     struct mbuf **mrq)
 2293 {
 2294         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2295         struct sockaddr *nam = nfsd->nd_nam;
 2296         caddr_t dpos = nfsd->nd_dpos;
 2297         struct ucred *cred = nfsd->nd_cr;
 2298         struct vattr va, dirfor, diraft;
 2299         struct vattr *vap = &va;
 2300         struct nfs_fattr *fp;
 2301         struct nameidata nd;
 2302         u_int32_t *tl;
 2303         caddr_t bpos;
 2304         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 2305         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2306         struct mbuf *mb, *mreq;
 2307         struct vnode *dirp = NULL;
 2308         int vpexcl = 0;
 2309         nfsfh_t nfh;
 2310         fhandle_t *fhp;
 2311         struct mount *mp = NULL;
 2312 
 2313         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2314         ndclear(&nd);
 2315 
 2316         fhp = &nfh.fh_generic;
 2317         nfsm_srvmtofh(fhp);
 2318         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2319                 error = ESTALE;
 2320                 goto out;
 2321         }
 2322         (void) vn_start_write(NULL, &mp, V_WAIT);
 2323         vfs_rel(mp);            /* The write holds a ref. */
 2324         nfsm_srvnamesiz(len);
 2325         nd.ni_cnd.cn_cred = cred;
 2326         nd.ni_cnd.cn_nameiop = CREATE;
 2327         nd.ni_cnd.cn_flags = LOCKPARENT;
 2328 
 2329         error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
 2330                 &dirp, v3, &dirfor, &dirfor_ret, FALSE);
 2331         if (dirp && !v3) {
 2332                 vrele(dirp);
 2333                 dirp = NULL;
 2334         }
 2335         if (error) {
 2336                 nfsm_reply(NFSX_WCCDATA(v3));
 2337                 if (v3)
 2338                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2339                 error = 0;
 2340                 goto nfsmout;
 2341         }
 2342         VATTR_NULL(vap);
 2343         if (v3) {
 2344                 nfsm_srvsattr(vap);
 2345         } else {
 2346                 tl = nfsm_dissect_nonblock(u_int32_t *, NFSX_UNSIGNED);
 2347                 vap->va_mode = nfstov_mode(*tl++);
 2348         }
 2349 
 2350         /*
 2351          * At this point nd.ni_dvp is referenced and exclusively locked and
 2352          * nd.ni_vp, if it exists, is referenced but not locked.
 2353          */
 2354 
 2355         vap->va_type = VDIR;
 2356         if (nd.ni_vp != NULL) {
 2357                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2358                 error = EEXIST;
 2359                 goto out;
 2360         }
 2361 
 2362         /*
 2363          * Issue mkdir op.  Since SAVESTART is not set, the pathname
 2364          * component is freed by the VOP call.  This will fill-in
 2365          * nd.ni_vp, reference, and exclusively lock it.
 2366          */
 2367         if (vap->va_mode == (mode_t)VNOVAL)
 2368                 vap->va_mode = 0;
 2369         error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap);
 2370         NDFREE(&nd, NDF_ONLY_PNBUF);
 2371         vpexcl = 1;
 2372 
 2373         vput(nd.ni_dvp);
 2374         nd.ni_dvp = NULL;
 2375 
 2376         if (!error) {
 2377                 bzero((caddr_t)fhp, sizeof(nfh));
 2378                 fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid;
 2379                 error = VOP_VPTOFH(nd.ni_vp, &fhp->fh_fid);
 2380                 if (!error)
 2381                         error = VOP_GETATTR(nd.ni_vp, vap, cred);
 2382         }
 2383 out:
 2384         if (dirp) {
 2385                 if (dirp == nd.ni_dvp) {
 2386                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
 2387                 } else {
 2388                         /* Release existing locks to prevent deadlock. */
 2389                         if (nd.ni_dvp) {
 2390                                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2391                                 if (nd.ni_dvp == nd.ni_vp && vpexcl)
 2392                                         vrele(nd.ni_dvp);
 2393                                 else
 2394                                         vput(nd.ni_dvp);
 2395                         }
 2396                         if (nd.ni_vp) {
 2397                                 if (vpexcl)
 2398                                         vput(nd.ni_vp);
 2399                                 else
 2400                                         vrele(nd.ni_vp);
 2401                         }
 2402                         nd.ni_dvp = NULL;
 2403                         nd.ni_vp = NULL;
 2404                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
 2405                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
 2406                         VOP_UNLOCK(dirp, 0);
 2407                 }
 2408         }
 2409         nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3));
 2410         if (v3) {
 2411                 if (!error) {
 2412                         nfsm_srvpostop_fh(fhp);
 2413                         nfsm_srvpostop_attr(0, vap);
 2414                 }
 2415                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2416         } else if (!error) {
 2417                 /* v2 non-error case. */
 2418                 nfsm_srvfhtom(fhp, v3);
 2419                 fp = nfsm_build(struct nfs_fattr *, NFSX_V2FATTR);
 2420                 nfsm_srvfillattr(vap, fp);
 2421         }
 2422         error = 0;
 2423         /* fall through */
 2424 
 2425 nfsmout:
 2426         if (nd.ni_dvp) {
 2427                 NDFREE(&nd, NDF_ONLY_PNBUF);
 2428                 if (nd.ni_dvp == nd.ni_vp && vpexcl)
 2429                         vrele(nd.ni_dvp);
 2430                 else
 2431                         vput(nd.ni_dvp);
 2432         }
 2433         if (nd.ni_vp) {
 2434                 if (vpexcl)
 2435                         vput(nd.ni_vp);
 2436                 else
 2437                         vrele(nd.ni_vp);
 2438         }
 2439         if (dirp)
 2440                 vrele(dirp);
 2441         vn_finished_write(mp);
 2442         return (error);
 2443 }
 2444 
 2445 /*
 2446  * nfs rmdir service
 2447  */
 2448 int
 2449 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2450     struct mbuf **mrq)
 2451 {
 2452         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2453         struct sockaddr *nam = nfsd->nd_nam;
 2454         caddr_t dpos = nfsd->nd_dpos;
 2455         struct ucred *cred = nfsd->nd_cr;
 2456         caddr_t bpos;
 2457         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
 2458         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2459         struct mbuf *mb, *mreq;
 2460         struct vnode *vp, *dirp = NULL;
 2461         struct vattr dirfor, diraft;
 2462         nfsfh_t nfh;
 2463         fhandle_t *fhp;
 2464         struct nameidata nd;
 2465         struct mount *mp = NULL;
 2466 
 2467         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2468         ndclear(&nd);
 2469 
 2470         fhp = &nfh.fh_generic;
 2471         nfsm_srvmtofh(fhp);
 2472         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 2473                 error = ESTALE;
 2474                 goto out;
 2475         }
 2476         (void) vn_start_write(NULL, &mp, V_WAIT);
 2477         vfs_rel(mp);            /* The write holds a ref. */
 2478         nfsm_srvnamesiz(len);
 2479         nd.ni_cnd.cn_cred = cred;
 2480         nd.ni_cnd.cn_nameiop = DELETE;
 2481         nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
 2482         error = nfs_namei(&nd, nfsd, fhp, len, slp, nam, &md, &dpos,
 2483                 &dirp, v3, &dirfor, &dirfor_ret, FALSE);
 2484         if (dirp && !v3) {
 2485                 vrele(dirp);
 2486                 dirp = NULL;
 2487         }
 2488         if (error) {
 2489                 nfsm_reply(NFSX_WCCDATA(v3));
 2490                 if (v3)
 2491                         nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2492                 error = 0;
 2493                 goto nfsmout;
 2494         }
 2495         vp = nd.ni_vp;
 2496         if (vp->v_type != VDIR) {
 2497                 error = ENOTDIR;
 2498                 goto out;
 2499         }
 2500         /*
 2501          * No rmdir "." please.
 2502          */
 2503         if (nd.ni_dvp == vp) {
 2504                 error = EINVAL;
 2505                 goto out;
 2506         }
 2507         /*
 2508          * The root of a mounted filesystem cannot be deleted.
 2509          */
 2510         if (vp->v_vflag & VV_ROOT)
 2511                 error = EBUSY;
 2512 out:
 2513         /*
 2514          * Issue or abort op.  Since SAVESTART is not set, path name
 2515          * component is freed by the VOP after either.
 2516          */
 2517         if (!error)
 2518                 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 2519         NDFREE(&nd, NDF_ONLY_PNBUF);
 2520 
 2521         if (dirp) {
 2522                 if (dirp == nd.ni_dvp)
 2523                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
 2524                 else {
 2525                         /* Release existing locks to prevent deadlock. */
 2526                         if (nd.ni_dvp) {
 2527                                 if (nd.ni_dvp == nd.ni_vp)
 2528                                         vrele(nd.ni_dvp);
 2529                                 else
 2530                                         vput(nd.ni_dvp);
 2531                         }
 2532                         if (nd.ni_vp)
 2533                                 vput(nd.ni_vp);
 2534                         nd.ni_dvp = NULL;
 2535                         nd.ni_vp = NULL;
 2536                         vn_lock(dirp, LK_EXCLUSIVE | LK_RETRY);
 2537                         diraft_ret = VOP_GETATTR(dirp, &diraft, cred);
 2538                         VOP_UNLOCK(dirp, 0);
 2539                 }
 2540         }
 2541         nfsm_reply(NFSX_WCCDATA(v3));
 2542         error = 0;
 2543         if (v3)
 2544                 nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
 2545         /* fall through */
 2546 
 2547 nfsmout:
 2548         NDFREE(&nd, NDF_ONLY_PNBUF);
 2549         if (nd.ni_dvp) {
 2550                 if (nd.ni_dvp == nd.ni_vp)
 2551                         vrele(nd.ni_dvp);
 2552                 else
 2553                         vput(nd.ni_dvp);
 2554         }
 2555         if (nd.ni_vp)
 2556                 vput(nd.ni_vp);
 2557         if (dirp)
 2558                 vrele(dirp);
 2559 
 2560         vn_finished_write(mp);
 2561         return(error);
 2562 }
 2563 
 2564 /*
 2565  * nfs readdir service
 2566  * - mallocs what it thinks is enough to read
 2567  *      count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
 2568  * - calls VOP_READDIR()
 2569  * - loops around building the reply
 2570  *      if the output generated exceeds count break out of loop
 2571  *      The nfsm_clget macro is used here so that the reply will be packed
 2572  *      tightly in mbuf clusters.
 2573  * - it only knows that it has encountered eof when the VOP_READDIR()
 2574  *      reads nothing
 2575  * - as such one readdir rpc will return eof false although you are there
 2576  *      and then the next will return eof
 2577  * - it trims out records with d_fileno == 0
 2578  *      this doesn't matter for Unix clients, but they might confuse clients
 2579  *      for other os'.
 2580  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
 2581  *      than requested, but this may not apply to all filesystems. For
 2582  *      example, client NFS does not { although it is never remote mounted
 2583  *      anyhow }
 2584  *     The alternate call nfsrv_readdirplus() does lookups as well.
 2585  * PS: The NFS protocol spec. does not clarify what the "count" byte
 2586  *      argument is a count of.. just name strings and file id's or the
 2587  *      entire reply rpc or ...
 2588  *      I tried just file name and id sizes and it confused the Sun client,
 2589  *      so I am using the full rpc size now. The "paranoia.." comment refers
 2590  *      to including the status longwords that are not a part of the dir.
 2591  *      "entry" structures, but are in the rpc.
 2592  */
 2593 struct flrep {
 2594         nfsuint64       fl_off;
 2595         u_int32_t       fl_postopok;
 2596         u_int32_t       fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
 2597         u_int32_t       fl_fhok;
 2598         u_int32_t       fl_fhsize;
 2599         u_int32_t       fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
 2600 };
 2601 
 2602 int
 2603 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2604     struct mbuf **mrq)
 2605 {
 2606         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2607         struct sockaddr *nam = nfsd->nd_nam;
 2608         caddr_t dpos = nfsd->nd_dpos;
 2609         struct ucred *cred = nfsd->nd_cr;
 2610         char *bp, *be;
 2611         struct mbuf *mp;
 2612         struct dirent *dp;
 2613         caddr_t cp;
 2614         u_int32_t *tl;
 2615         caddr_t bpos;
 2616         struct mbuf *mb, *mreq;
 2617         char *cpos, *cend, *rbuf;
 2618         struct vnode *vp = NULL;
 2619         struct vattr at;
 2620         nfsfh_t nfh;
 2621         fhandle_t *fhp;
 2622         struct uio io;
 2623         struct iovec iv;
 2624         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
 2625         int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
 2626         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2627         u_quad_t off, toff, verf;
 2628         u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
 2629         int not_zfs;
 2630 
 2631         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2632         fhp = &nfh.fh_generic;
 2633         nfsm_srvmtofh(fhp);
 2634         if (v3) {
 2635                 tl = nfsm_dissect_nonblock(u_int32_t *, 5 * NFSX_UNSIGNED);
 2636                 toff = fxdr_hyper(tl);
 2637                 tl += 2;
 2638                 verf = fxdr_hyper(tl);
 2639                 tl += 2;
 2640         } else {
 2641                 tl = nfsm_dissect_nonblock(u_int32_t *, 2 * NFSX_UNSIGNED);
 2642                 toff = fxdr_unsigned(u_quad_t, *tl++);
 2643                 verf = 0;       /* shut up gcc */
 2644         }
 2645         off = toff;
 2646         cnt = fxdr_unsigned(int, *tl);
 2647         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 2648         xfer = NFS_SRVMAXDATA(nfsd);
 2649         if (cnt > xfer)
 2650                 cnt = xfer;
 2651         if (siz > xfer)
 2652                 siz = xfer;
 2653         fullsiz = siz;
 2654         error = nfsrv_fhtovp(fhp, 0, &vp, nfsd, slp, nam, &rdonly);
 2655         if (!error && vp->v_type != VDIR) {
 2656                 error = ENOTDIR;
 2657                 vput(vp);
 2658                 vp = NULL;
 2659         }
 2660         if (error) {
 2661                 nfsm_reply(NFSX_UNSIGNED);
 2662                 if (v3)
 2663                         nfsm_srvpostop_attr(getret, &at);
 2664                 error = 0;
 2665                 goto nfsmout;
 2666         }
 2667 
 2668         /*
 2669          * Obtain lock on vnode for this section of the code
 2670          */
 2671         if (v3) {
 2672                 error = getret = VOP_GETATTR(vp, &at, cred);
 2673 #if 0
 2674                 /*
 2675                  * XXX This check may be too strict for Solaris 2.5 clients.
 2676                  */
 2677                 if (!error && toff && verf && verf != at.va_filerev)
 2678                         error = NFSERR_BAD_COOKIE;
 2679 #endif
 2680         }
 2681         if (!error)
 2682                 error = nfsrv_access(vp, VEXEC, cred, rdonly, 0);
 2683         if (error) {
 2684                 vput(vp);
 2685                 vp = NULL;
 2686                 nfsm_reply(NFSX_POSTOPATTR(v3));
 2687                 if (v3)
 2688                         nfsm_srvpostop_attr(getret, &at);
 2689                 error = 0;
 2690                 goto nfsmout;
 2691         }
 2692         not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") != 0;
 2693         VOP_UNLOCK(vp, 0);
 2694 
 2695         /*
 2696          * end section.  Allocate rbuf and continue
 2697          */
 2698         rbuf = malloc(siz, M_TEMP, M_WAITOK);
 2699 again:
 2700         iv.iov_base = rbuf;
 2701         iv.iov_len = fullsiz;
 2702         io.uio_iov = &iv;
 2703         io.uio_iovcnt = 1;
 2704         io.uio_offset = (off_t)off;
 2705         io.uio_resid = fullsiz;
 2706         io.uio_segflg = UIO_SYSSPACE;
 2707         io.uio_rw = UIO_READ;
 2708         io.uio_td = NULL;
 2709         eofflag = 0;
 2710         if (cookies) {
 2711                 free((caddr_t)cookies, M_TEMP);
 2712                 cookies = NULL;
 2713         }
 2714         vn_lock(vp, LK_SHARED | LK_RETRY);
 2715         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
 2716         off = (off_t)io.uio_offset;
 2717         if (!cookies && !error)
 2718                 error = NFSERR_PERM;
 2719         if (v3) {
 2720                 getret = VOP_GETATTR(vp, &at, cred);
 2721                 if (!error)
 2722                         error = getret;
 2723         }
 2724         VOP_UNLOCK(vp, 0);
 2725         if (error) {
 2726                 vrele(vp);
 2727                 vp = NULL;
 2728                 free((caddr_t)rbuf, M_TEMP);
 2729                 if (cookies)
 2730                         free((caddr_t)cookies, M_TEMP);
 2731                 nfsm_reply(NFSX_POSTOPATTR(v3));
 2732                 if (v3)
 2733                         nfsm_srvpostop_attr(getret, &at);
 2734                 error = 0;
 2735                 goto nfsmout;
 2736         }
 2737         if (io.uio_resid) {
 2738                 siz -= io.uio_resid;
 2739 
 2740                 /*
 2741                  * If nothing read, return eof
 2742                  * rpc reply
 2743                  */
 2744                 if (siz == 0) {
 2745                         vrele(vp);
 2746                         vp = NULL;
 2747                         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) +
 2748                                 2 * NFSX_UNSIGNED);
 2749                         if (v3) {
 2750                                 nfsm_srvpostop_attr(getret, &at);
 2751                                 tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 2752                                 txdr_hyper(at.va_filerev, tl);
 2753                                 tl += 2;
 2754                         } else
 2755                                 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 2756                         *tl++ = nfsrv_nfs_false;
 2757                         *tl = nfsrv_nfs_true;
 2758                         free((caddr_t)rbuf, M_TEMP);
 2759                         free((caddr_t)cookies, M_TEMP);
 2760                         error = 0;
 2761                         goto nfsmout;
 2762                 }
 2763         }
 2764 
 2765         /*
 2766          * Check for degenerate cases of nothing useful read.
 2767          * If so go try again
 2768          */
 2769         cpos = rbuf;
 2770         cend = rbuf + siz;
 2771         dp = (struct dirent *)cpos;
 2772         cookiep = cookies;
 2773         /*
 2774          * For some reason FreeBSD's ufs_readdir() chooses to back the
 2775          * directory offset up to a block boundary, so it is necessary to
 2776          * skip over the records that precede the requested offset. This
 2777          * requires the assumption that file offset cookies monotonically
 2778          * increase.
 2779          * Since the offset cookies don't monotonically increase for ZFS,
 2780          * this is not done when ZFS is the file system.
 2781          */
 2782         while (cpos < cend && ncookies > 0 &&
 2783                 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 2784                  (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff))) {
 2785                 cpos += dp->d_reclen;
 2786                 dp = (struct dirent *)cpos;
 2787                 cookiep++;
 2788                 ncookies--;
 2789         }
 2790         if (cpos >= cend || ncookies == 0) {
 2791                 toff = off;
 2792                 siz = fullsiz;
 2793                 goto again;
 2794         }
 2795 
 2796         len = 3 * NFSX_UNSIGNED;        /* paranoia, probably can be 0 */
 2797         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz);
 2798         if (v3) {
 2799                 nfsm_srvpostop_attr(getret, &at);
 2800                 tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 2801                 txdr_hyper(at.va_filerev, tl);
 2802         }
 2803         mp = mb;
 2804         bp = bpos;
 2805         be = bp + M_TRAILINGSPACE(mp);
 2806 
 2807         /* Loop through the records and build reply */
 2808         while (cpos < cend && ncookies > 0) {
 2809                 if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
 2810                         nlen = dp->d_namlen;
 2811                         rem = nfsm_rndup(nlen) - nlen;
 2812                         len += (4 * NFSX_UNSIGNED + nlen + rem);
 2813                         if (v3)
 2814                                 len += 2 * NFSX_UNSIGNED;
 2815                         if (len > cnt) {
 2816                                 eofflag = 0;
 2817                                 break;
 2818                         }
 2819                         /*
 2820                          * Build the directory record xdr from
 2821                          * the dirent entry.
 2822                          */
 2823                         nfsm_clget;
 2824                         *tl = nfsrv_nfs_true;
 2825                         bp += NFSX_UNSIGNED;
 2826                         if (v3) {
 2827                                 nfsm_clget;
 2828                                 *tl = 0;
 2829                                 bp += NFSX_UNSIGNED;
 2830                         }
 2831                         nfsm_clget;
 2832                         *tl = txdr_unsigned(dp->d_fileno);
 2833                         bp += NFSX_UNSIGNED;
 2834                         nfsm_clget;
 2835                         *tl = txdr_unsigned(nlen);
 2836                         bp += NFSX_UNSIGNED;
 2837 
 2838                         /* And loop around copying the name */
 2839                         xfer = nlen;
 2840                         cp = dp->d_name;
 2841                         while (xfer > 0) {
 2842                                 nfsm_clget;
 2843                                 if ((bp+xfer) > be)
 2844                                         tsiz = be-bp;
 2845                                 else
 2846                                         tsiz = xfer;
 2847                                 bcopy(cp, bp, tsiz);
 2848                                 bp += tsiz;
 2849                                 xfer -= tsiz;
 2850                                 if (xfer > 0)
 2851                                         cp += tsiz;
 2852                         }
 2853                         /* And null pad to an int32_t boundary. */
 2854                         for (i = 0; i < rem; i++)
 2855                                 *bp++ = '\0';
 2856                         nfsm_clget;
 2857 
 2858                         /* Finish off the record */
 2859                         if (v3) {
 2860                                 *tl = 0;
 2861                                 bp += NFSX_UNSIGNED;
 2862                                 nfsm_clget;
 2863                         }
 2864                         *tl = txdr_unsigned(*cookiep);
 2865                         bp += NFSX_UNSIGNED;
 2866                 }
 2867                 cpos += dp->d_reclen;
 2868                 dp = (struct dirent *)cpos;
 2869                 cookiep++;
 2870                 ncookies--;
 2871         }
 2872         vrele(vp);
 2873         vp = NULL;
 2874         nfsm_clget;
 2875         *tl = nfsrv_nfs_false;
 2876         bp += NFSX_UNSIGNED;
 2877         nfsm_clget;
 2878         if (eofflag)
 2879                 *tl = nfsrv_nfs_true;
 2880         else
 2881                 *tl = nfsrv_nfs_false;
 2882         bp += NFSX_UNSIGNED;
 2883         if (mp != mb) {
 2884                 if (bp < be)
 2885                         mp->m_len = bp - mtod(mp, caddr_t);
 2886         } else
 2887                 mp->m_len += bp - bpos;
 2888         free((caddr_t)rbuf, M_TEMP);
 2889         free((caddr_t)cookies, M_TEMP);
 2890 
 2891 nfsmout:
 2892         if (vp)
 2893                 vrele(vp);
 2894         return(error);
 2895 }
 2896 
 2897 int
 2898 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 2899     struct mbuf **mrq)
 2900 {
 2901         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 2902         struct sockaddr *nam = nfsd->nd_nam;
 2903         caddr_t dpos = nfsd->nd_dpos;
 2904         struct ucred *cred = nfsd->nd_cr;
 2905         char *bp, *be;
 2906         struct mbuf *mp;
 2907         struct dirent *dp;
 2908         caddr_t cp;
 2909         u_int32_t *tl;
 2910         caddr_t bpos;
 2911         struct mbuf *mb, *mreq;
 2912         char *cpos, *cend, *rbuf;
 2913         struct vnode *vp = NULL, *nvp;
 2914         struct flrep fl;
 2915         nfsfh_t nfh;
 2916         fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
 2917         struct uio io;
 2918         struct iovec iv;
 2919         struct vattr va, at, *vap = &va;
 2920         struct nfs_fattr *fp;
 2921         int len, nlen, rem, xfer, tsiz, i, error = 0, error1, getret = 1;
 2922         int vp_locked;
 2923         int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
 2924         u_quad_t off, toff, verf;
 2925         u_long *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
 2926         int v3 = (nfsd->nd_flag & ND_NFSV3);
 2927         int usevget = 1;
 2928         struct componentname cn;
 2929         struct mount *mntp = NULL;
 2930         int not_zfs;
 2931 
 2932         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 2933         vp_locked = 0;
 2934         if (!v3)
 2935                 panic("nfsrv_readdirplus: v3 proc called on a v2 connection");
 2936         fhp = &nfh.fh_generic;
 2937         nfsm_srvmtofh(fhp);
 2938         tl = nfsm_dissect_nonblock(u_int32_t *, 6 * NFSX_UNSIGNED);
 2939         toff = fxdr_hyper(tl);
 2940         tl += 2;
 2941         verf = fxdr_hyper(tl);
 2942         tl += 2;
 2943         siz = fxdr_unsigned(int, *tl++);
 2944         cnt = fxdr_unsigned(int, *tl);
 2945         off = toff;
 2946         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 2947         xfer = NFS_SRVMAXDATA(nfsd);
 2948         if (cnt > xfer)
 2949                 cnt = xfer;
 2950         if (siz > xfer)
 2951                 siz = xfer;
 2952         fullsiz = siz;
 2953         error = nfsrv_fhtovp(fhp, NFSRV_FLAG_BUSY, &vp, nfsd, slp,
 2954             nam, &rdonly);
 2955         if (!error) {
 2956                 vp_locked = 1;
 2957                 mntp = vp->v_mount;
 2958                 if (vp->v_type != VDIR) {
 2959                         error = ENOTDIR;
 2960                         vput(vp);
 2961                         vp = NULL;
 2962                         vp_locked = 0;
 2963                 }
 2964         }
 2965         if (error) {
 2966                 nfsm_reply(NFSX_UNSIGNED);
 2967                 nfsm_srvpostop_attr(getret, &at);
 2968                 error = 0;
 2969                 goto nfsmout;
 2970         }
 2971         error = getret = VOP_GETATTR(vp, &at, cred);
 2972 #if 0
 2973         /*
 2974          * XXX This check may be too strict for Solaris 2.5 clients.
 2975          */
 2976         if (!error && toff && verf && verf != at.va_filerev)
 2977                 error = NFSERR_BAD_COOKIE;
 2978 #endif
 2979         if (!error)
 2980                 error = nfsrv_access(vp, VEXEC, cred, rdonly, 0);
 2981         if (error) {
 2982                 vput(vp);
 2983                 vp_locked = 0;
 2984                 vp = NULL;
 2985                 nfsm_reply(NFSX_V3POSTOPATTR);
 2986                 nfsm_srvpostop_attr(getret, &at);
 2987                 error = 0;
 2988                 goto nfsmout;
 2989         }
 2990         not_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") != 0;
 2991         VOP_UNLOCK(vp, 0);
 2992         vp_locked = 0;
 2993         rbuf = malloc(siz, M_TEMP, M_WAITOK);
 2994 again:
 2995         iv.iov_base = rbuf;
 2996         iv.iov_len = fullsiz;
 2997         io.uio_iov = &iv;
 2998         io.uio_iovcnt = 1;
 2999         io.uio_offset = (off_t)off;
 3000         io.uio_resid = fullsiz;
 3001         io.uio_segflg = UIO_SYSSPACE;
 3002         io.uio_rw = UIO_READ;
 3003         io.uio_td = NULL;
 3004         eofflag = 0;
 3005         vp_locked = 1;
 3006         if (cookies) {
 3007                 free((caddr_t)cookies, M_TEMP);
 3008                 cookies = NULL;
 3009         }
 3010         vn_lock(vp, LK_SHARED | LK_RETRY);
 3011         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
 3012         off = (u_quad_t)io.uio_offset;
 3013         getret = VOP_GETATTR(vp, &at, cred);
 3014         VOP_UNLOCK(vp, 0);
 3015         vp_locked = 0;
 3016         if (!cookies && !error)
 3017                 error = NFSERR_PERM;
 3018         if (!error)
 3019                 error = getret;
 3020         if (error) {
 3021                 vrele(vp);
 3022                 vp = NULL;
 3023                 if (cookies)
 3024                         free((caddr_t)cookies, M_TEMP);
 3025                 free((caddr_t)rbuf, M_TEMP);
 3026                 nfsm_reply(NFSX_V3POSTOPATTR);
 3027                 nfsm_srvpostop_attr(getret, &at);
 3028                 error = 0;
 3029                 goto nfsmout;
 3030         }
 3031         if (io.uio_resid) {
 3032                 siz -= io.uio_resid;
 3033 
 3034                 /*
 3035                  * If nothing read, return eof
 3036                  * rpc reply
 3037                  */
 3038                 if (siz == 0) {
 3039                         vrele(vp);
 3040                         vp = NULL;
 3041                         nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
 3042                                 2 * NFSX_UNSIGNED);
 3043                         nfsm_srvpostop_attr(getret, &at);
 3044                         tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED);
 3045                         txdr_hyper(at.va_filerev, tl);
 3046                         tl += 2;
 3047                         *tl++ = nfsrv_nfs_false;
 3048                         *tl = nfsrv_nfs_true;
 3049                         free((caddr_t)cookies, M_TEMP);
 3050                         free((caddr_t)rbuf, M_TEMP);
 3051                         error = 0;
 3052                         goto nfsmout;
 3053                 }
 3054         }
 3055 
 3056         /*
 3057          * Check for degenerate cases of nothing useful read.
 3058          * If so go try again
 3059          */
 3060         cpos = rbuf;
 3061         cend = rbuf + siz;
 3062         dp = (struct dirent *)cpos;
 3063         cookiep = cookies;
 3064         /*
 3065          * For some reason FreeBSD's ufs_readdir() chooses to back the
 3066          * directory offset up to a block boundary, so it is necessary to
 3067          * skip over the records that precede the requested offset. This
 3068          * requires the assumption that file offset cookies monotonically
 3069          * increase.
 3070          * Since the offset cookies don't monotonically increase for ZFS,
 3071          * this is not done when ZFS is the file system.
 3072          */
 3073         while (cpos < cend && ncookies > 0 &&
 3074                 (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 3075                  (not_zfs != 0 && ((u_quad_t)(*cookiep)) <= toff))) {
 3076                 cpos += dp->d_reclen;
 3077                 dp = (struct dirent *)cpos;
 3078                 cookiep++;
 3079                 ncookies--;
 3080         }
 3081         if (cpos >= cend || ncookies == 0) {
 3082                 toff = off;
 3083                 siz = fullsiz;
 3084                 goto again;
 3085         }
 3086 
 3087         dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
 3088             2 * NFSX_UNSIGNED;
 3089         nfsm_reply(cnt);
 3090         nfsm_srvpostop_attr(getret, &at);
 3091         tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
 3092         txdr_hyper(at.va_filerev, tl);
 3093         mp = mb;
 3094         bp = bpos;
 3095         be = bp + M_TRAILINGSPACE(mp);
 3096 
 3097         /* Loop through the records and build reply */
 3098         while (cpos < cend && ncookies > 0) {
 3099                 if (dp->d_fileno != 0 && dp->d_type != DT_WHT) {
 3100                         nlen = dp->d_namlen;
 3101                         rem = nfsm_rndup(nlen)-nlen;
 3102 
 3103                         if (usevget) {
 3104                                 /*
 3105                                  * For readdir_and_lookup get the vnode using
 3106                                  * the file number.
 3107                                  */
 3108                                 error = VFS_VGET(mntp, dp->d_fileno, LK_SHARED,
 3109                                     &nvp);
 3110                                 if (error != 0 && error != EOPNOTSUPP) {
 3111                                         error = 0;
 3112                                         goto invalid;
 3113                                 } else if (error == EOPNOTSUPP) {
 3114                                         /*
 3115                                          * VFS_VGET() not supported?
 3116                                          * Let's switch to VOP_LOOKUP().
 3117                                          */
 3118                                         error = 0;
 3119                                         usevget = 0;
 3120                                         cn.cn_nameiop = LOOKUP;
 3121                                         cn.cn_flags = ISLASTCN | NOFOLLOW | \
 3122                                             LOCKSHARED | LOCKLEAF;
 3123                                         cn.cn_lkflags = LK_SHARED | LK_RETRY;
 3124                                         cn.cn_cred = cred;
 3125                                         cn.cn_thread = curthread;
 3126                                 }
 3127                         }
 3128                         if (!usevget) {
 3129                                 cn.cn_nameptr = dp->d_name;
 3130                                 cn.cn_namelen = dp->d_namlen;
 3131                                 if (dp->d_namlen == 2 &&
 3132                                     dp->d_name[0] == '.' &&
 3133                                     dp->d_name[1] == '.') {
 3134                                         cn.cn_flags |= ISDOTDOT;
 3135                                 } else {
 3136                                         cn.cn_flags &= ~ISDOTDOT;
 3137                                 }
 3138                                 if (!vp_locked) {
 3139                                         vn_lock(vp, LK_SHARED | LK_RETRY);
 3140                                         vp_locked = 1;
 3141                                 }
 3142                                 if ((vp->v_vflag & VV_ROOT) != 0 &&
 3143                                     (cn.cn_flags & ISDOTDOT) != 0) {
 3144                                         vref(vp);
 3145                                         nvp = vp;
 3146                                 } else if (VOP_LOOKUP(vp, &nvp, &cn) != 0)
 3147                                         goto invalid;
 3148                         }
 3149 
 3150                         bzero((caddr_t)nfhp, NFSX_V3FH);
 3151                         nfhp->fh_fsid = nvp->v_mount->mnt_stat.f_fsid;
 3152                         if ((error1 = VOP_VPTOFH(nvp, &nfhp->fh_fid)) == 0)
 3153                                 error1 = VOP_GETATTR(nvp, vap, cred);
 3154                         if (!usevget && vp == nvp)
 3155                                 vunref(nvp);
 3156                         else
 3157                                 vput(nvp);
 3158                         nvp = NULL;
 3159                         if (error1 != 0)
 3160                                 goto invalid;
 3161 
 3162                         /*
 3163                          * If either the dircount or maxcount will be
 3164                          * exceeded, get out now. Both of these lengths
 3165                          * are calculated conservatively, including all
 3166                          * XDR overheads.
 3167                          */
 3168                         len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
 3169                                 NFSX_V3POSTOPATTR);
 3170                         dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
 3171                         if (len > cnt || dirlen > fullsiz) {
 3172                                 eofflag = 0;
 3173                                 break;
 3174                         }
 3175 
 3176                         /*
 3177                          * Build the directory record xdr from
 3178                          * the dirent entry.
 3179                          */
 3180                         fp = (struct nfs_fattr *)&fl.fl_fattr;
 3181                         nfsm_srvfillattr(vap, fp);
 3182                         fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
 3183                         fl.fl_fhok = nfsrv_nfs_true;
 3184                         fl.fl_postopok = nfsrv_nfs_true;
 3185                         fl.fl_off.nfsuquad[0] = 0;
 3186                         fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
 3187 
 3188                         nfsm_clget;
 3189                         *tl = nfsrv_nfs_true;
 3190                         bp += NFSX_UNSIGNED;
 3191                         nfsm_clget;
 3192                         *tl = 0;
 3193                         bp += NFSX_UNSIGNED;
 3194                         nfsm_clget;
 3195                         *tl = txdr_unsigned(dp->d_fileno);
 3196                         bp += NFSX_UNSIGNED;
 3197                         nfsm_clget;
 3198                         *tl = txdr_unsigned(nlen);
 3199                         bp += NFSX_UNSIGNED;
 3200 
 3201                         /* And loop around copying the name */
 3202                         xfer = nlen;
 3203                         cp = dp->d_name;
 3204                         while (xfer > 0) {
 3205                                 nfsm_clget;
 3206                                 if ((bp + xfer) > be)
 3207                                         tsiz = be - bp;
 3208                                 else
 3209                                         tsiz = xfer;
 3210                                 bcopy(cp, bp, tsiz);
 3211                                 bp += tsiz;
 3212                                 xfer -= tsiz;
 3213                                 if (xfer > 0)
 3214                                         cp += tsiz;
 3215                         }
 3216                         /* And null pad to an int32_t boundary. */
 3217                         for (i = 0; i < rem; i++)
 3218                                 *bp++ = '\0';
 3219 
 3220                         /*
 3221                          * Now copy the flrep structure out.
 3222                          */
 3223                         xfer = sizeof (struct flrep);
 3224                         cp = (caddr_t)&fl;
 3225                         while (xfer > 0) {
 3226                                 nfsm_clget;
 3227                                 if ((bp + xfer) > be)
 3228                                         tsiz = be - bp;
 3229                                 else
 3230                                         tsiz = xfer;
 3231                                 bcopy(cp, bp, tsiz);
 3232                                 bp += tsiz;
 3233                                 xfer -= tsiz;
 3234                                 if (xfer > 0)
 3235                                         cp += tsiz;
 3236                         }
 3237                 }
 3238 invalid:
 3239                 cpos += dp->d_reclen;
 3240                 dp = (struct dirent *)cpos;
 3241                 cookiep++;
 3242                 ncookies--;
 3243         }
 3244         if (!usevget && vp_locked)
 3245                 vput(vp);
 3246         else
 3247                 vrele(vp);
 3248         vp = NULL;
 3249         nfsm_clget;
 3250         *tl = nfsrv_nfs_false;
 3251         bp += NFSX_UNSIGNED;
 3252         nfsm_clget;
 3253         if (eofflag)
 3254                 *tl = nfsrv_nfs_true;
 3255         else
 3256                 *tl = nfsrv_nfs_false;
 3257         bp += NFSX_UNSIGNED;
 3258         if (mp != mb) {
 3259                 if (bp < be)
 3260                         mp->m_len = bp - mtod(mp, caddr_t);
 3261         } else
 3262                 mp->m_len += bp - bpos;
 3263         free((caddr_t)cookies, M_TEMP);
 3264         free((caddr_t)rbuf, M_TEMP);
 3265 nfsmout:
 3266         if (vp)
 3267                 vrele(vp);
 3268         if (mntp)
 3269                 vfs_unbusy(mntp);
 3270         return(error);
 3271 }
 3272 
 3273 /*
 3274  * nfs commit service
 3275  */
 3276 int
 3277 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3278     struct mbuf **mrq)
 3279 {
 3280         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3281         struct sockaddr *nam = nfsd->nd_nam;
 3282         caddr_t dpos = nfsd->nd_dpos;
 3283         struct ucred *cred = nfsd->nd_cr;
 3284         struct vattr bfor, aft;
 3285         struct vnode *vp = NULL;
 3286         nfsfh_t nfh;
 3287         fhandle_t *fhp;
 3288         u_int32_t *tl;
 3289         caddr_t bpos;
 3290         int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
 3291         struct mbuf *mb, *mreq;
 3292         u_quad_t off;
 3293         struct mount *mp = NULL;
 3294         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3295 
 3296         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3297         if (!v3)
 3298                 panic("nfsrv_commit: v3 proc called on a v2 connection");
 3299         fhp = &nfh.fh_generic;
 3300         nfsm_srvmtofh(fhp);
 3301         if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL) {
 3302                 error = ESTALE;
 3303                 goto ereply;
 3304         }
 3305         (void) vn_start_write(NULL, &mp, V_WAIT);
 3306         vfs_rel(mp);            /* The write holds a ref. */
 3307         tl = nfsm_dissect_nonblock(u_int32_t *, 3 * NFSX_UNSIGNED);
 3308 
 3309         /*
 3310          * XXX At this time VOP_FSYNC() does not accept offset and byte
 3311          * count parameters, so these arguments are useless (someday maybe).
 3312          */
 3313         off = fxdr_hyper(tl);
 3314         tl += 2;
 3315         cnt = fxdr_unsigned(int, *tl);
 3316         error = nfsrv_fhtovp(fhp, 0, &vp, nfsd, slp, nam, &rdonly);
 3317         if (error) {
 3318                 nfsm_reply(2 * NFSX_UNSIGNED);
 3319                 nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
 3320                 error = 0;
 3321                 goto nfsmout;
 3322         }
 3323         for_ret = VOP_GETATTR(vp, &bfor, cred);
 3324 
 3325         /*
 3326          * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of file
 3327          * is done.  At this time VOP_FSYNC does not accept offset and byte count
 3328          * parameters so call VOP_FSYNC the whole file for now.
 3329          */
 3330         if (cnt == 0 || cnt > MAX_COMMIT_COUNT) {
 3331                 /*
 3332                  * Give up and do the whole thing
 3333                  */
 3334                 if (vp->v_object &&
 3335                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 3336                         VM_OBJECT_WLOCK(vp->v_object);
 3337                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
 3338                         VM_OBJECT_WUNLOCK(vp->v_object);
 3339                 }
 3340                 error = VOP_FSYNC(vp, MNT_WAIT, curthread);
 3341         } else {
 3342                 /*
 3343                  * Locate and synchronously write any buffers that fall
 3344                  * into the requested range.  Note:  we are assuming that
 3345                  * f_iosize is a power of 2.
 3346                  */
 3347                 int iosize = vp->v_mount->mnt_stat.f_iosize;
 3348                 int iomask = iosize - 1;
 3349                 struct bufobj *bo;
 3350                 daddr_t lblkno;
 3351 
 3352                 /*
 3353                  * Align to iosize boundry, super-align to page boundry.
 3354                  */
 3355                 if (off & iomask) {
 3356                         cnt += off & iomask;
 3357                         off &= ~(u_quad_t)iomask;
 3358                 }
 3359                 if (off & PAGE_MASK) {
 3360                         cnt += off & PAGE_MASK;
 3361                         off &= ~(u_quad_t)PAGE_MASK;
 3362                 }
 3363                 lblkno = off / iosize;
 3364 
 3365                 if (vp->v_object &&
 3366                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 3367                         VM_OBJECT_WLOCK(vp->v_object);
 3368                         vm_object_page_clean(vp->v_object, off, off + cnt,
 3369                             OBJPC_SYNC);
 3370                         VM_OBJECT_WUNLOCK(vp->v_object);
 3371                 }
 3372 
 3373                 bo = &vp->v_bufobj;
 3374                 BO_LOCK(bo);
 3375                 while (cnt > 0) {
 3376                         struct buf *bp;
 3377 
 3378                         /*
 3379                          * If we have a buffer and it is marked B_DELWRI we
 3380                          * have to lock and write it.  Otherwise the prior
 3381                          * write is assumed to have already been committed.
 3382                          *
 3383                          * gbincore() can return invalid buffers now so we
 3384                          * have to check that bit as well (though B_DELWRI
 3385                          * should not be set if B_INVAL is set there could be
 3386                          * a race here since we haven't locked the buffer).
 3387                          */
 3388                         if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
 3389                                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 3390                                     LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) {
 3391                                         BO_LOCK(bo);
 3392                                         continue; /* retry */
 3393                                 }
 3394                                 if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
 3395                                     B_DELWRI) {
 3396                                         bremfree(bp);
 3397                                         bp->b_flags &= ~B_ASYNC;
 3398                                         bwrite(bp);
 3399                                         ++nfs_commit_miss;
 3400                                 } else
 3401                                         BUF_UNLOCK(bp);
 3402                                 BO_LOCK(bo);
 3403                         }
 3404                         ++nfs_commit_blks;
 3405                         if (cnt < iosize)
 3406                                 break;
 3407                         cnt -= iosize;
 3408                         ++lblkno;
 3409                 }
 3410                 BO_UNLOCK(bo);
 3411         }
 3412 
 3413         aft_ret = VOP_GETATTR(vp, &aft, cred);
 3414         vput(vp);
 3415         vp = NULL;
 3416 ereply:
 3417         nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF);
 3418         nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft);
 3419         if (!error) {
 3420                 tl = nfsm_build(u_int32_t *, NFSX_V3WRITEVERF);
 3421                 if (nfsver.tv_sec == 0)
 3422                         nfsver = boottime;
 3423                 *tl++ = txdr_unsigned(nfsver.tv_sec);
 3424                 *tl = txdr_unsigned(nfsver.tv_usec);
 3425         } else {
 3426                 error = 0;
 3427         }
 3428 nfsmout:
 3429         if (vp)
 3430                 vput(vp);
 3431         vn_finished_write(mp);
 3432         return(error);
 3433 }
 3434 
 3435 /*
 3436  * nfs statfs service
 3437  */
 3438 int
 3439 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3440     struct mbuf **mrq)
 3441 {
 3442         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3443         struct sockaddr *nam = nfsd->nd_nam;
 3444         caddr_t dpos = nfsd->nd_dpos;
 3445         struct ucred *cred = nfsd->nd_cr;
 3446         struct statfs *sf;
 3447         struct nfs_statfs *sfp;
 3448         caddr_t bpos;
 3449         int error = 0, rdonly, getret = 1;
 3450         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3451         struct mbuf *mb, *mreq;
 3452         struct vnode *vp = NULL;
 3453         struct vattr at;
 3454         nfsfh_t nfh;
 3455         fhandle_t *fhp;
 3456         struct statfs statfs;
 3457         u_quad_t tval;
 3458 
 3459         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3460         fhp = &nfh.fh_generic;
 3461         nfsm_srvmtofh(fhp);
 3462         error = nfsrv_fhtovp(fhp, 0, &vp, nfsd, slp, nam, &rdonly);
 3463         if (error) {
 3464                 nfsm_reply(NFSX_UNSIGNED);
 3465                 if (v3)
 3466                         nfsm_srvpostop_attr(getret, &at);
 3467                 error = 0;
 3468                 goto nfsmout;
 3469         }
 3470         sf = &statfs;
 3471         error = VFS_STATFS(vp->v_mount, sf);
 3472         getret = VOP_GETATTR(vp, &at, cred);
 3473         vput(vp);
 3474         vp = NULL;
 3475         nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3));
 3476         if (v3)
 3477                 nfsm_srvpostop_attr(getret, &at);
 3478         if (error) {
 3479                 error = 0;
 3480                 goto nfsmout;
 3481         }
 3482         sfp = nfsm_build(struct nfs_statfs *, NFSX_STATFS(v3));
 3483         if (v3) {
 3484                 tval = (u_quad_t)sf->f_blocks;
 3485                 tval *= (u_quad_t)sf->f_bsize;
 3486                 txdr_hyper(tval, &sfp->sf_tbytes);
 3487                 tval = (u_quad_t)sf->f_bfree;
 3488                 tval *= (u_quad_t)sf->f_bsize;
 3489                 txdr_hyper(tval, &sfp->sf_fbytes);
 3490                 /*
 3491                  * Don't send negative values for available space,
 3492                  * since this field is unsigned in the NFS protocol.
 3493                  * Otherwise, the client would see absurdly high
 3494                  * numbers for free space.
 3495                  */
 3496                 if (sf->f_bavail < 0)
 3497                         tval = 0;
 3498                 else
 3499                         tval = (u_quad_t)sf->f_bavail;
 3500                 tval *= (u_quad_t)sf->f_bsize;
 3501                 txdr_hyper(tval, &sfp->sf_abytes);
 3502                 sfp->sf_tfiles.nfsuquad[0] = 0;
 3503                 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
 3504                 sfp->sf_ffiles.nfsuquad[0] = 0;
 3505                 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
 3506                 sfp->sf_afiles.nfsuquad[0] = 0;
 3507                 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
 3508                 sfp->sf_invarsec = 0;
 3509         } else {
 3510                 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
 3511                 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
 3512                 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
 3513                 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
 3514                 if (sf->f_bavail < 0)
 3515                         sfp->sf_bavail = 0;
 3516                 else
 3517                         sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
 3518         }
 3519 nfsmout:
 3520         if (vp)
 3521                 vput(vp);
 3522         return(error);
 3523 }
 3524 
 3525 /*
 3526  * nfs fsinfo service
 3527  */
 3528 int
 3529 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3530     struct mbuf **mrq)
 3531 {
 3532         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3533         struct sockaddr *nam = nfsd->nd_nam;
 3534         caddr_t dpos = nfsd->nd_dpos;
 3535         struct ucred *cred = nfsd->nd_cr;
 3536         struct nfsv3_fsinfo *sip;
 3537         caddr_t bpos;
 3538         int error = 0, rdonly, getret = 1, pref;
 3539         struct mbuf *mb, *mreq;
 3540         struct vnode *vp = NULL;
 3541         struct vattr at;
 3542         nfsfh_t nfh;
 3543         fhandle_t *fhp;
 3544         u_quad_t maxfsize;
 3545         struct statfs sb;
 3546         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3547 
 3548         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3549         if (!v3)
 3550                 panic("nfsrv_fsinfo: v3 proc called on a v2 connection");
 3551         fhp = &nfh.fh_generic;
 3552         nfsm_srvmtofh(fhp);
 3553         error = nfsrv_fhtovp(fhp, 0, &vp, nfsd, slp, nam, &rdonly);
 3554         if (error) {
 3555                 nfsm_reply(NFSX_UNSIGNED);
 3556                 nfsm_srvpostop_attr(getret, &at);
 3557                 error = 0;
 3558                 goto nfsmout;
 3559         }
 3560 
 3561         /* XXX Try to make a guess on the max file size. */
 3562         VFS_STATFS(vp->v_mount, &sb);
 3563         maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
 3564 
 3565         getret = VOP_GETATTR(vp, &at, cred);
 3566         vput(vp);
 3567         vp = NULL;
 3568         nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO);
 3569         nfsm_srvpostop_attr(getret, &at);
 3570         sip = nfsm_build(struct nfsv3_fsinfo *, NFSX_V3FSINFO);
 3571 
 3572         /*
 3573          * XXX
 3574          * There should be filesystem VFS OP(s) to get this information.
 3575          * For now, assume ufs.
 3576          */
 3577         pref = NFS_SRVMAXDATA(nfsd);
 3578         sip->fs_rtmax = txdr_unsigned(pref);
 3579         sip->fs_rtpref = txdr_unsigned(pref);
 3580         sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
 3581         sip->fs_wtmax = txdr_unsigned(pref);
 3582         sip->fs_wtpref = txdr_unsigned(pref);
 3583         sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
 3584         sip->fs_dtpref = txdr_unsigned(pref);
 3585         txdr_hyper(maxfsize, &sip->fs_maxfilesize);
 3586         sip->fs_timedelta.nfsv3_sec = 0;
 3587         sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
 3588         sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
 3589                 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
 3590                 NFSV3FSINFO_CANSETTIME);
 3591 nfsmout:
 3592         if (vp)
 3593                 vput(vp);
 3594         return(error);
 3595 }
 3596 
 3597 /*
 3598  * nfs pathconf service
 3599  */
 3600 int
 3601 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3602     struct mbuf **mrq)
 3603 {
 3604         struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md;
 3605         struct sockaddr *nam = nfsd->nd_nam;
 3606         caddr_t dpos = nfsd->nd_dpos;
 3607         struct ucred *cred = nfsd->nd_cr;
 3608         struct nfsv3_pathconf *pc;
 3609         caddr_t bpos;
 3610         int error = 0, rdonly, getret = 1;
 3611         register_t linkmax, namemax, chownres, notrunc;
 3612         struct mbuf *mb, *mreq;
 3613         struct vnode *vp = NULL;
 3614         struct vattr at;
 3615         nfsfh_t nfh;
 3616         fhandle_t *fhp;
 3617         int v3 = (nfsd->nd_flag & ND_NFSV3);
 3618 
 3619         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3620         if (!v3)
 3621                 panic("nfsrv_pathconf: v3 proc called on a v2 connection");
 3622         fhp = &nfh.fh_generic;
 3623         nfsm_srvmtofh(fhp);
 3624         error = nfsrv_fhtovp(fhp, 0, &vp, nfsd, slp, nam, &rdonly);
 3625         if (error) {
 3626                 nfsm_reply(NFSX_UNSIGNED);
 3627                 nfsm_srvpostop_attr(getret, &at);
 3628                 error = 0;
 3629                 goto nfsmout;
 3630         }
 3631         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
 3632         if (!error)
 3633                 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
 3634         if (!error)
 3635                 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
 3636         if (!error)
 3637                 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
 3638         getret = VOP_GETATTR(vp, &at, cred);
 3639         vput(vp);
 3640         vp = NULL;
 3641         nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF);
 3642         nfsm_srvpostop_attr(getret, &at);
 3643         if (error) {
 3644                 error = 0;
 3645                 goto nfsmout;
 3646         }
 3647         pc = nfsm_build(struct nfsv3_pathconf *, NFSX_V3PATHCONF);
 3648 
 3649         pc->pc_linkmax = txdr_unsigned(linkmax);
 3650         pc->pc_namemax = txdr_unsigned(namemax);
 3651         pc->pc_notrunc = txdr_unsigned(notrunc);
 3652         pc->pc_chownrestricted = txdr_unsigned(chownres);
 3653 
 3654         /*
 3655          * These should probably be supported by VOP_PATHCONF(), but
 3656          * until msdosfs is exportable (why would you want to?), the
 3657          * Unix defaults should be ok.
 3658          */
 3659         pc->pc_caseinsensitive = nfsrv_nfs_false;
 3660         pc->pc_casepreserving = nfsrv_nfs_true;
 3661 nfsmout:
 3662         if (vp)
 3663                 vput(vp);
 3664         return(error);
 3665 }
 3666 
 3667 /*
 3668  * Null operation, used by clients to ping server
 3669  */
 3670 /* ARGSUSED */
 3671 int
 3672 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3673     struct mbuf **mrq)
 3674 {
 3675         struct mbuf *mrep = nfsd->nd_mrep;
 3676         caddr_t bpos;
 3677         int error = NFSERR_RETVOID;
 3678         struct mbuf *mb, *mreq;
 3679 
 3680         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3681         nfsm_reply(0);
 3682 nfsmout:
 3683         return (error);
 3684 }
 3685 
 3686 /*
 3687  * No operation, used for obsolete procedures
 3688  */
 3689 /* ARGSUSED */
 3690 int
 3691 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
 3692     struct mbuf **mrq)
 3693 {
 3694         struct mbuf *mrep = nfsd->nd_mrep;
 3695         caddr_t bpos;
 3696         int error;
 3697         struct mbuf *mb, *mreq;
 3698 
 3699         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3700         if (nfsd->nd_repstat)
 3701                 error = nfsd->nd_repstat;
 3702         else
 3703                 error = EPROCUNAVAIL;
 3704         nfsm_reply(0);
 3705         error = 0;
 3706 nfsmout:
 3707         return (error);
 3708 }
 3709 
 3710 /*
 3711  * Perform access checking for vnodes obtained from file handles that would
 3712  * refer to files already opened by a Unix client. You cannot just use
 3713  * vn_writechk() and VOP_ACCESS() for two reasons.
 3714  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
 3715  *     case.
 3716  * 2 - The owner is to be given access irrespective of mode bits for some
 3717  *     operations, so that processes that chmod after opening a file don't
 3718  *     break. I don't like this because it opens a security hole, but since
 3719  *     the nfs server opens a security hole the size of a barn door anyhow,
 3720  *     what the heck.
 3721  *
 3722  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
 3723  * will return EPERM instead of EACCES. EPERM is always an error.
 3724  */
 3725 static int
 3726 nfsrv_access(struct vnode *vp, accmode_t accmode, struct ucred *cred,
 3727     int rdonly, int override)
 3728 {
 3729         struct vattr vattr;
 3730         int error;
 3731 
 3732         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 3733 
 3734         if (accmode & VWRITE) {
 3735                 /* Just vn_writechk() changed to check rdonly */
 3736                 /*
 3737                  * Disallow write attempts on read-only filesystems;
 3738                  * unless the file is a socket or a block or character
 3739                  * device resident on the filesystem.
 3740                  */
 3741                 if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 3742                         switch (vp->v_type) {
 3743                         case VREG:
 3744                         case VDIR:
 3745                         case VLNK:
 3746                                 return (EROFS);
 3747                         default:
 3748                                 break;
 3749                         }
 3750                 }
 3751                 /*
 3752                  * If there's shared text associated with
 3753                  * the inode, we can't allow writing.
 3754                  */
 3755                 if (VOP_IS_TEXT(vp))
 3756                         return (ETXTBSY);
 3757         }
 3758 
 3759         error = VOP_GETATTR(vp, &vattr, cred);
 3760         if (error)
 3761                 return (error);
 3762         error = VOP_ACCESS(vp, accmode, cred, curthread);
 3763         /*
 3764          * Allow certain operations for the owner (reads and writes
 3765          * on files that are already open).
 3766          */
 3767         if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
 3768                 error = 0;
 3769         return (error);
 3770 }

Cache object: ad6c8451a0b6cf2c1eb5881454b6aba9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.