nfs_nfsdstate.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2009 Rick Macklem, University of Guelph
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  *
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 #include "opt_inet.h"
   34 #include "opt_inet6.h"
   35 #include <sys/extattr.h>
   36 #include <fs/nfs/nfsport.h>
   37 
   38 struct nfsrv_stablefirst nfsrv_stablefirst;
   39 int nfsrv_issuedelegs = 0;
   40 int nfsrv_dolocallocks = 0;
   41 struct nfsv4lock nfsv4rootfs_lock;
   42 time_t nfsdev_time = 0;
   43 int nfsrv_layouthashsize;
   44 volatile int nfsrv_layoutcnt = 0;
   45 
   46 extern int newnfs_numnfsd;
   47 extern struct nfsstatsv1 nfsstatsv1;
   48 extern int nfsrv_lease;
   49 extern struct timeval nfsboottime;
   50 extern u_int32_t newnfs_true, newnfs_false;
   51 extern struct mtx nfsrv_dslock_mtx;
   52 extern struct mtx nfsrv_recalllock_mtx;
   53 extern struct mtx nfsrv_dontlistlock_mtx;
   54 extern int nfsd_debuglevel;
   55 extern u_int nfsrv_dsdirsize;
   56 extern struct nfsdevicehead nfsrv_devidhead;
   57 extern int nfsrv_doflexfile;
   58 extern int nfsrv_maxpnfsmirror;
   59 NFSV4ROOTLOCKMUTEX;
   60 NFSSTATESPINLOCK;
   61 extern struct nfsdontlisthead nfsrv_dontlisthead;
   62 extern volatile int nfsrv_devidcnt;
   63 extern struct nfslayouthead nfsrv_recalllisthead;
   64 extern char *nfsrv_zeropnfsdat;
   65 
   66 SYSCTL_DECL(_vfs_nfsd);
   67 int     nfsrv_statehashsize = NFSSTATEHASHSIZE;
   68 SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
   69     &nfsrv_statehashsize, 0,
   70     "Size of state hash table set via loader.conf");
   71 
   72 int     nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
   73 SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
   74     &nfsrv_clienthashsize, 0,
   75     "Size of client hash table set via loader.conf");
   76 
   77 int     nfsrv_lockhashsize = NFSLOCKHASHSIZE;
   78 SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
   79     &nfsrv_lockhashsize, 0,
   80     "Size of file handle hash table set via loader.conf");
   81 
   82 int     nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
   83 SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
   84     &nfsrv_sessionhashsize, 0,
   85     "Size of session hash table set via loader.conf");
   86 
   87 int     nfsrv_layouthighwater = NFSLAYOUTHIGHWATER;
   88 SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN,
   89     &nfsrv_layouthighwater, 0,
   90     "High water mark for number of layouts set via loader.conf");
   91 
   92 static int      nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
   93 SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
   94     &nfsrv_v4statelimit, 0,
   95     "High water limit for NFSv4 opens+locks+delegations");
   96 
   97 static int      nfsrv_writedelegifpos = 0;
   98 SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
   99     &nfsrv_writedelegifpos, 0,
  100     "Issue a write delegation for read opens if possible");
  101 
  102 static int      nfsrv_allowreadforwriteopen = 1;
  103 SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW,
  104     &nfsrv_allowreadforwriteopen, 0,
  105     "Allow Reads to be done with Write Access StateIDs");
  106 
  107 int     nfsrv_pnfsatime = 0;
  108 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW,
  109     &nfsrv_pnfsatime, 0,
  110     "For pNFS service, do Getattr ops to keep atime up-to-date");
  111 
  112 int     nfsrv_flexlinuxhack = 0;
  113 SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
  114     &nfsrv_flexlinuxhack, 0,
  115     "For Linux clients, hack around Flex File Layout bug");
  116 
  117 /*
  118  * Hash lists for nfs V4.
  119  */
  120 struct nfsclienthashhead        *nfsclienthash;
  121 struct nfslockhashhead          *nfslockhash;
  122 struct nfssessionhash           *nfssessionhash;
  123 struct nfslayouthash            *nfslayouthash;
  124 volatile int nfsrv_dontlistlen = 0;
  125 
  126 static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
  127 static time_t nfsrvboottime;
  128 static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
  129 static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
  130 static int nfsrv_nogsscallback = 0;
  131 static volatile int nfsrv_writedelegcnt = 0;
  132 static int nfsrv_faildscnt;
  133 
  134 /* local functions */
  135 static void nfsrv_dumpaclient(struct nfsclient *clp,
  136     struct nfsd_dumpclients *dumpp);
  137 static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
  138     NFSPROC_T *p);
  139 static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
  140     NFSPROC_T *p);
  141 static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
  142     NFSPROC_T *p);
  143 static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
  144     int cansleep, NFSPROC_T *p);
  145 static void nfsrv_freenfslock(struct nfslock *lop);
  146 static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
  147 static void nfsrv_freedeleg(struct nfsstate *);
  148 static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, 
  149     u_int32_t flags, struct nfsstate **stpp);
  150 static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
  151     struct nfsstate **stpp);
  152 static int nfsrv_getlockfh(vnode_t vp, u_short flags,
  153     struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
  154 static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
  155     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
  156 static void nfsrv_insertlock(struct nfslock *new_lop,
  157     struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
  158 static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
  159     struct nfslock **other_lopp, struct nfslockfile *lfp);
  160 static int nfsrv_getipnumber(u_char *cp);
  161 static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
  162     nfsv4stateid_t *stateidp, int specialid);
  163 static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
  164     u_int32_t flags);
  165 static int nfsrv_docallback(struct nfsclient *clp, int procnum,
  166     nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
  167     struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p);
  168 static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
  169     uint32_t callback, int op, const char *optag, struct nfsdsession **sepp);
  170 static u_int32_t nfsrv_nextclientindex(void);
  171 static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
  172 static void nfsrv_markstable(struct nfsclient *clp);
  173 static void nfsrv_markreclaim(struct nfsclient *clp);
  174 static int nfsrv_checkstable(struct nfsclient *clp);
  175 static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct 
  176     vnode *vp, NFSPROC_T *p);
  177 static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
  178     NFSPROC_T *p, vnode_t vp);
  179 static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
  180     struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
  181 static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
  182     struct nfsclient *clp);
  183 static time_t nfsrv_leaseexpiry(void);
  184 static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
  185 static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
  186     struct nfsstate *stp, struct nfsrvcache *op);
  187 static int nfsrv_nootherstate(struct nfsstate *stp);
  188 static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
  189     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
  190 static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
  191     uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
  192 static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
  193     int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
  194     NFSPROC_T *p);
  195 static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
  196     NFSPROC_T *p);
  197 static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
  198     uint64_t first, uint64_t end);
  199 static void nfsrv_locklf(struct nfslockfile *lfp);
  200 static void nfsrv_unlocklf(struct nfslockfile *lfp);
  201 static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
  202 static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid);
  203 static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
  204     int dont_replycache, struct nfsdsession **sepp);
  205 static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
  206 static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
  207     nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p);
  208 static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp);
  209 static void nfsrv_freelayoutlist(nfsquad_t clientid);
  210 static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype,
  211     int iomode);
  212 static void nfsrv_freealllayouts(void);
  213 static void nfsrv_freedevid(struct nfsdevice *ds);
  214 static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
  215     struct nfsdevice **dsp);
  216 static void nfsrv_deleteds(struct nfsdevice *fndds);
  217 static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost);
  218 static void nfsrv_freealldevids(void);
  219 static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp,
  220     int maxcnt, NFSPROC_T *p);
  221 static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp,
  222     fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype,
  223     NFSPROC_T *p);
  224 static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
  225     NFSPROC_T *, struct nfslayout **lypp);
  226 static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt);
  227 static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode,
  228     fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
  229 static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode,
  230     int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
  231 static int nfsrv_dontlayout(fhandle_t *fhp);
  232 static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
  233     vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
  234     vnode_t *tvpp);
  235 static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
  236 
  237 /*
  238  * Scan the client list for a match and either return the current one,
  239  * create a new entry or return an error.
  240  * If returning a non-error, the clp structure must either be linked into
  241  * the client list or free'd.
  242  */
  243 int
  244 nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
  245     nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
  246 {
  247         struct nfsclient *clp = NULL, *new_clp = *new_clpp;
  248         int i, error = 0, ret;
  249         struct nfsstate *stp, *tstp;
  250 #ifdef INET
  251         struct sockaddr_in *sin, *rin;
  252 #endif
  253 #ifdef INET6
  254         struct sockaddr_in6 *sin6, *rin6;
  255 #endif
  256         struct nfsdsession *sep, *nsep;
  257         int zapit = 0, gotit, hasstate = 0, igotlock;
  258         static u_int64_t confirm_index = 0;
  259 
  260         /*
  261          * Check for state resource limit exceeded.
  262          */
  263         if (nfsrv_openpluslock > nfsrv_v4statelimit) {
  264                 error = NFSERR_RESOURCE;
  265                 goto out;
  266         }
  267 
  268         if (nfsrv_issuedelegs == 0 ||
  269             ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
  270                 /*
  271                  * Don't do callbacks when delegations are disabled or
  272                  * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
  273                  * If establishing a callback connection is attempted
  274                  * when a firewall is blocking the callback path, the
  275                  * server may wait too long for the connect attempt to
  276                  * succeed during the Open. Some clients, such as Linux,
  277                  * may timeout and give up on the Open before the server
  278                  * replies. Also, since AUTH_GSS callbacks are not
  279                  * yet interoperability tested, they might cause the
  280                  * server to crap out, if they get past the Init call to
  281                  * the client.
  282                  */
  283                 new_clp->lc_program = 0;
  284 
  285         /* Lock out other nfsd threads */
  286         NFSLOCKV4ROOTMUTEX();
  287         nfsv4_relref(&nfsv4rootfs_lock);
  288         do {
  289                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  290                     NFSV4ROOTLOCKMUTEXPTR, NULL);
  291         } while (!igotlock);
  292         NFSUNLOCKV4ROOTMUTEX();
  293 
  294         /*
  295          * Search for a match in the client list.
  296          */
  297         gotit = i = 0;
  298         while (i < nfsrv_clienthashsize && !gotit) {
  299             LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
  300                 if (new_clp->lc_idlen == clp->lc_idlen &&
  301                     !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
  302                         gotit = 1;
  303                         break;
  304                 }
  305             }
  306             if (gotit == 0)
  307                 i++;
  308         }
  309         if (!gotit ||
  310             (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
  311                 if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
  312                         /*
  313                          * For NFSv4.1, if confirmp->lval[1] is non-zero, the
  314                          * client is trying to update a confirmed clientid.
  315                          */
  316                         NFSLOCKV4ROOTMUTEX();
  317                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  318                         NFSUNLOCKV4ROOTMUTEX();
  319                         confirmp->lval[1] = 0;
  320                         error = NFSERR_NOENT;
  321                         goto out;
  322                 }
  323                 /*
  324                  * Get rid of the old one.
  325                  */
  326                 if (i != nfsrv_clienthashsize) {
  327                         LIST_REMOVE(clp, lc_hash);
  328                         nfsrv_cleanclient(clp, p);
  329                         nfsrv_freedeleglist(&clp->lc_deleg);
  330                         nfsrv_freedeleglist(&clp->lc_olddeleg);
  331                         zapit = 1;
  332                 }
  333                 /*
  334                  * Add it after assigning a client id to it.
  335                  */
  336                 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
  337                 if ((nd->nd_flag & ND_NFSV41) != 0)
  338                         new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
  339                             ++confirm_index;
  340                 else
  341                         confirmp->qval = new_clp->lc_confirm.qval =
  342                             ++confirm_index;
  343                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
  344                     (u_int32_t)nfsrvboottime;
  345                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
  346                     nfsrv_nextclientindex();
  347                 new_clp->lc_stateindex = 0;
  348                 new_clp->lc_statemaxindex = 0;
  349                 new_clp->lc_cbref = 0;
  350                 new_clp->lc_expiry = nfsrv_leaseexpiry();
  351                 LIST_INIT(&new_clp->lc_open);
  352                 LIST_INIT(&new_clp->lc_deleg);
  353                 LIST_INIT(&new_clp->lc_olddeleg);
  354                 LIST_INIT(&new_clp->lc_session);
  355                 for (i = 0; i < nfsrv_statehashsize; i++)
  356                         LIST_INIT(&new_clp->lc_stateid[i]);
  357                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
  358                     lc_hash);
  359                 nfsstatsv1.srvclients++;
  360                 nfsrv_openpluslock++;
  361                 nfsrv_clients++;
  362                 NFSLOCKV4ROOTMUTEX();
  363                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  364                 NFSUNLOCKV4ROOTMUTEX();
  365                 if (zapit)
  366                         nfsrv_zapclient(clp, p);
  367                 *new_clpp = NULL;
  368                 goto out;
  369         }
  370 
  371         /*
  372          * Now, handle the cases where the id is already issued.
  373          */
  374         if (nfsrv_notsamecredname(nd, clp)) {
  375             /*
  376              * Check to see if there is expired state that should go away.
  377              */
  378             if (clp->lc_expiry < NFSD_MONOSEC &&
  379                 (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
  380                 nfsrv_cleanclient(clp, p);
  381                 nfsrv_freedeleglist(&clp->lc_deleg);
  382             }
  383 
  384             /*
  385              * If there is outstanding state, then reply NFSERR_CLIDINUSE per
  386              * RFC3530 Sec. 8.1.2 last para.
  387              */
  388             if (!LIST_EMPTY(&clp->lc_deleg)) {
  389                 hasstate = 1;
  390             } else if (LIST_EMPTY(&clp->lc_open)) {
  391                 hasstate = 0;
  392             } else {
  393                 hasstate = 0;
  394                 /* Look for an Open on the OpenOwner */
  395                 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
  396                     if (!LIST_EMPTY(&stp->ls_open)) {
  397                         hasstate = 1;
  398                         break;
  399                     }
  400                 }
  401             }
  402             if (hasstate) {
  403                 /*
  404                  * If the uid doesn't match, return NFSERR_CLIDINUSE after
  405                  * filling out the correct ipaddr and portnum.
  406                  */
  407                 switch (clp->lc_req.nr_nam->sa_family) {
  408 #ifdef INET
  409                 case AF_INET:
  410                         sin = (struct sockaddr_in *)new_clp->lc_req.nr_nam;
  411                         rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
  412                         sin->sin_addr.s_addr = rin->sin_addr.s_addr;
  413                         sin->sin_port = rin->sin_port;
  414                         break;
  415 #endif
  416 #ifdef INET6
  417                 case AF_INET6:
  418                         sin6 = (struct sockaddr_in6 *)new_clp->lc_req.nr_nam;
  419                         rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
  420                         sin6->sin6_addr = rin6->sin6_addr;
  421                         sin6->sin6_port = rin6->sin6_port;
  422                         break;
  423 #endif
  424                 }
  425                 NFSLOCKV4ROOTMUTEX();
  426                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  427                 NFSUNLOCKV4ROOTMUTEX();
  428                 error = NFSERR_CLIDINUSE;
  429                 goto out;
  430             }
  431         }
  432 
  433         if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
  434                 /*
  435                  * If the verifier has changed, the client has rebooted
  436                  * and a new client id is issued. The old state info
  437                  * can be thrown away once the SETCLIENTID_CONFIRM occurs.
  438                  */
  439                 LIST_REMOVE(clp, lc_hash);
  440 
  441                 /* Get rid of all sessions on this clientid. */
  442                 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) {
  443                         ret = nfsrv_freesession(sep, NULL);
  444                         if (ret != 0)
  445                                 printf("nfsrv_setclient: verifier changed free"
  446                                     " session failed=%d\n", ret);
  447                 }
  448 
  449                 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
  450                 if ((nd->nd_flag & ND_NFSV41) != 0)
  451                         new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
  452                             ++confirm_index;
  453                 else
  454                         confirmp->qval = new_clp->lc_confirm.qval =
  455                             ++confirm_index;
  456                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
  457                     nfsrvboottime;
  458                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
  459                     nfsrv_nextclientindex();
  460                 new_clp->lc_stateindex = 0;
  461                 new_clp->lc_statemaxindex = 0;
  462                 new_clp->lc_cbref = 0;
  463                 new_clp->lc_expiry = nfsrv_leaseexpiry();
  464 
  465                 /*
  466                  * Save the state until confirmed.
  467                  */
  468                 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
  469                 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
  470                         tstp->ls_clp = new_clp;
  471                 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
  472                 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
  473                         tstp->ls_clp = new_clp;
  474                 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
  475                     ls_list);
  476                 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
  477                         tstp->ls_clp = new_clp;
  478                 for (i = 0; i < nfsrv_statehashsize; i++) {
  479                         LIST_NEWHEAD(&new_clp->lc_stateid[i],
  480                             &clp->lc_stateid[i], ls_hash);
  481                         LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
  482                                 tstp->ls_clp = new_clp;
  483                 }
  484                 LIST_INIT(&new_clp->lc_session);
  485                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
  486                     lc_hash);
  487                 nfsstatsv1.srvclients++;
  488                 nfsrv_openpluslock++;
  489                 nfsrv_clients++;
  490                 NFSLOCKV4ROOTMUTEX();
  491                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  492                 NFSUNLOCKV4ROOTMUTEX();
  493 
  494                 /*
  495                  * Must wait until any outstanding callback on the old clp
  496                  * completes.
  497                  */
  498                 NFSLOCKSTATE();
  499                 while (clp->lc_cbref) {
  500                         clp->lc_flags |= LCL_WAKEUPWANTED;
  501                         (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
  502                             "nfsd clp", 10 * hz);
  503                 }
  504                 NFSUNLOCKSTATE();
  505                 nfsrv_zapclient(clp, p);
  506                 *new_clpp = NULL;
  507                 goto out;
  508         }
  509 
  510         /* For NFSv4.1, mark that we found a confirmed clientid. */
  511         if ((nd->nd_flag & ND_NFSV41) != 0) {
  512                 clientidp->lval[0] = clp->lc_clientid.lval[0];
  513                 clientidp->lval[1] = clp->lc_clientid.lval[1];
  514                 confirmp->lval[0] = 0;  /* Ignored by client */
  515                 confirmp->lval[1] = 1;
  516         } else {
  517                 /*
  518                  * id and verifier match, so update the net address info
  519                  * and get rid of any existing callback authentication
  520                  * handle, so a new one will be acquired.
  521                  */
  522                 LIST_REMOVE(clp, lc_hash);
  523                 new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
  524                 new_clp->lc_expiry = nfsrv_leaseexpiry();
  525                 confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
  526                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
  527                     clp->lc_clientid.lval[0];
  528                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
  529                     clp->lc_clientid.lval[1];
  530                 new_clp->lc_delegtime = clp->lc_delegtime;
  531                 new_clp->lc_stateindex = clp->lc_stateindex;
  532                 new_clp->lc_statemaxindex = clp->lc_statemaxindex;
  533                 new_clp->lc_cbref = 0;
  534                 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
  535                 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
  536                         tstp->ls_clp = new_clp;
  537                 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
  538                 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
  539                         tstp->ls_clp = new_clp;
  540                 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
  541                 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
  542                         tstp->ls_clp = new_clp;
  543                 for (i = 0; i < nfsrv_statehashsize; i++) {
  544                         LIST_NEWHEAD(&new_clp->lc_stateid[i],
  545                             &clp->lc_stateid[i], ls_hash);
  546                         LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
  547                                 tstp->ls_clp = new_clp;
  548                 }
  549                 LIST_INIT(&new_clp->lc_session);
  550                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
  551                     lc_hash);
  552                 nfsstatsv1.srvclients++;
  553                 nfsrv_openpluslock++;
  554                 nfsrv_clients++;
  555         }
  556         NFSLOCKV4ROOTMUTEX();
  557         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  558         NFSUNLOCKV4ROOTMUTEX();
  559 
  560         if ((nd->nd_flag & ND_NFSV41) == 0) {
  561                 /*
  562                  * Must wait until any outstanding callback on the old clp
  563                  * completes.
  564                  */
  565                 NFSLOCKSTATE();
  566                 while (clp->lc_cbref) {
  567                         clp->lc_flags |= LCL_WAKEUPWANTED;
  568                         (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
  569                             "nfsdclp", 10 * hz);
  570                 }
  571                 NFSUNLOCKSTATE();
  572                 nfsrv_zapclient(clp, p);
  573                 *new_clpp = NULL;
  574         }
  575 
  576 out:
  577         NFSEXITCODE2(error, nd);
  578         return (error);
  579 }
  580 
  581 /*
  582  * Check to see if the client id exists and optionally confirm it.
  583  */
  584 int
  585 nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
  586     struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
  587     struct nfsrv_descript *nd, NFSPROC_T *p)
  588 {
  589         struct nfsclient *clp;
  590         struct nfsstate *stp;
  591         int i;
  592         struct nfsclienthashhead *hp;
  593         int error = 0, igotlock, doneok;
  594         struct nfssessionhash *shp;
  595         struct nfsdsession *sep;
  596         uint64_t sessid[2];
  597         static uint64_t next_sess = 0;
  598 
  599         if (clpp)
  600                 *clpp = NULL;
  601         if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
  602             opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) {
  603                 error = NFSERR_STALECLIENTID;
  604                 goto out;
  605         }
  606 
  607         /*
  608          * If called with opflags == CLOPS_RENEW, the State Lock is
  609          * already held. Otherwise, we need to get either that or,
  610          * for the case of Confirm, lock out the nfsd threads.
  611          */
  612         if (opflags & CLOPS_CONFIRM) {
  613                 NFSLOCKV4ROOTMUTEX();
  614                 nfsv4_relref(&nfsv4rootfs_lock);
  615                 do {
  616                         igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  617                             NFSV4ROOTLOCKMUTEXPTR, NULL);
  618                 } while (!igotlock);
  619                 /*
  620                  * Create a new sessionid here, since we need to do it where
  621                  * there is a mutex held to serialize update of next_sess.
  622                  */
  623                 if ((nd->nd_flag & ND_NFSV41) != 0) {
  624                         sessid[0] = ++next_sess;
  625                         sessid[1] = clientid.qval;
  626                 }
  627                 NFSUNLOCKV4ROOTMUTEX();
  628         } else if (opflags != CLOPS_RENEW) {
  629                 NFSLOCKSTATE();
  630         }
  631 
  632         /* For NFSv4.1, the clp is acquired from the associated session. */
  633         if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
  634             opflags == CLOPS_RENEW) {
  635                 clp = NULL;
  636                 if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
  637                         shp = NFSSESSIONHASH(nd->nd_sessionid);
  638                         NFSLOCKSESSION(shp);
  639                         sep = nfsrv_findsession(nd->nd_sessionid);
  640                         if (sep != NULL)
  641                                 clp = sep->sess_clp;
  642                         NFSUNLOCKSESSION(shp);
  643                 }
  644         } else {
  645                 hp = NFSCLIENTHASH(clientid);
  646                 LIST_FOREACH(clp, hp, lc_hash) {
  647                         if (clp->lc_clientid.lval[1] == clientid.lval[1])
  648                                 break;
  649                 }
  650         }
  651         if (clp == NULL) {
  652                 if (opflags & CLOPS_CONFIRM)
  653                         error = NFSERR_STALECLIENTID;
  654                 else
  655                         error = NFSERR_EXPIRED;
  656         } else if (clp->lc_flags & LCL_ADMINREVOKED) {
  657                 /*
  658                  * If marked admin revoked, just return the error.
  659                  */
  660                 error = NFSERR_ADMINREVOKED;
  661         }
  662         if (error) {
  663                 if (opflags & CLOPS_CONFIRM) {
  664                         NFSLOCKV4ROOTMUTEX();
  665                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  666                         NFSUNLOCKV4ROOTMUTEX();
  667                 } else if (opflags != CLOPS_RENEW) {
  668                         NFSUNLOCKSTATE();
  669                 }
  670                 goto out;
  671         }
  672 
  673         /*
  674          * Perform any operations specified by the opflags.
  675          */
  676         if (opflags & CLOPS_CONFIRM) {
  677                 if (((nd->nd_flag & ND_NFSV41) != 0 &&
  678                      clp->lc_confirm.lval[0] != confirm.lval[0]) ||
  679                     ((nd->nd_flag & ND_NFSV41) == 0 &&
  680                      clp->lc_confirm.qval != confirm.qval))
  681                         error = NFSERR_STALECLIENTID;
  682                 else if (nfsrv_notsamecredname(nd, clp))
  683                         error = NFSERR_CLIDINUSE;
  684 
  685                 if (!error) {
  686                     if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
  687                         LCL_NEEDSCONFIRM) {
  688                         /*
  689                          * Hang onto the delegations (as old delegations)
  690                          * for an Open with CLAIM_DELEGATE_PREV unless in
  691                          * grace, but get rid of the rest of the state.
  692                          */
  693                         nfsrv_cleanclient(clp, p);
  694                         nfsrv_freedeleglist(&clp->lc_olddeleg);
  695                         if (nfsrv_checkgrace(nd, clp, 0)) {
  696                             /* In grace, so just delete delegations */
  697                             nfsrv_freedeleglist(&clp->lc_deleg);
  698                         } else {
  699                             LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
  700                                 stp->ls_flags |= NFSLCK_OLDDELEG;
  701                             clp->lc_delegtime = NFSD_MONOSEC +
  702                                 nfsrv_lease + NFSRV_LEASEDELTA;
  703                             LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
  704                                 ls_list);
  705                         }
  706                         if ((nd->nd_flag & ND_NFSV41) != 0)
  707                             clp->lc_program = cbprogram;
  708                     }
  709                     clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
  710                     if (clp->lc_program)
  711                         clp->lc_flags |= LCL_NEEDSCBNULL;
  712                     /* For NFSv4.1, link the session onto the client. */
  713                     if (nsep != NULL) {
  714                         /* Hold a reference on the xprt for a backchannel. */
  715                         if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
  716                             != 0) {
  717                             if (clp->lc_req.nr_client == NULL)
  718                                 clp->lc_req.nr_client = (struct __rpc_client *)
  719                                     clnt_bck_create(nd->nd_xprt->xp_socket,
  720                                     cbprogram, NFSV4_CBVERS);
  721                             if (clp->lc_req.nr_client != NULL) {
  722                                 SVC_ACQUIRE(nd->nd_xprt);
  723                                 nd->nd_xprt->xp_p2 =
  724                                     clp->lc_req.nr_client->cl_private;
  725                                 /* Disable idle timeout. */
  726                                 nd->nd_xprt->xp_idletimeout = 0;
  727                                 nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
  728                             } else
  729                                 nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
  730                         }
  731                         NFSBCOPY(sessid, nsep->sess_sessionid,
  732                             NFSX_V4SESSIONID);
  733                         NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
  734                             NFSX_V4SESSIONID);
  735                         shp = NFSSESSIONHASH(nsep->sess_sessionid);
  736                         NFSLOCKSTATE();
  737                         NFSLOCKSESSION(shp);
  738                         LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
  739                         LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
  740                         nsep->sess_clp = clp;
  741                         NFSUNLOCKSESSION(shp);
  742                         NFSUNLOCKSTATE();
  743                     }
  744                 }
  745         } else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
  746                 error = NFSERR_EXPIRED;
  747         }
  748 
  749         /*
  750          * If called by the Renew Op, we must check the principal.
  751          */
  752         if (!error && (opflags & CLOPS_RENEWOP)) {
  753             if (nfsrv_notsamecredname(nd, clp)) {
  754                 doneok = 0;
  755                 for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
  756                     LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
  757                         if ((stp->ls_flags & NFSLCK_OPEN) &&
  758                             stp->ls_uid == nd->nd_cred->cr_uid) {
  759                                 doneok = 1;
  760                                 break;
  761                         }
  762                     }
  763                 }
  764                 if (!doneok)
  765                         error = NFSERR_ACCES;
  766             }
  767             if (!error && (clp->lc_flags & LCL_CBDOWN))
  768                 error = NFSERR_CBPATHDOWN;
  769         }
  770         if ((!error || error == NFSERR_CBPATHDOWN) &&
  771              (opflags & CLOPS_RENEW)) {
  772                 clp->lc_expiry = nfsrv_leaseexpiry();
  773         }
  774         if (opflags & CLOPS_CONFIRM) {
  775                 NFSLOCKV4ROOTMUTEX();
  776                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  777                 NFSUNLOCKV4ROOTMUTEX();
  778         } else if (opflags != CLOPS_RENEW) {
  779                 NFSUNLOCKSTATE();
  780         }
  781         if (clpp)
  782                 *clpp = clp;
  783 
  784 out:
  785         NFSEXITCODE2(error, nd);
  786         return (error);
  787 }
  788 
  789 /*
  790  * Perform the NFSv4.1 destroy clientid.
  791  */
  792 int
  793 nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p)
  794 {
  795         struct nfsclient *clp;
  796         struct nfsclienthashhead *hp;
  797         int error = 0, i, igotlock;
  798 
  799         if (nfsrvboottime != clientid.lval[0]) {
  800                 error = NFSERR_STALECLIENTID;
  801                 goto out;
  802         }
  803 
  804         /* Lock out other nfsd threads */
  805         NFSLOCKV4ROOTMUTEX();
  806         nfsv4_relref(&nfsv4rootfs_lock);
  807         do {
  808                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  809                     NFSV4ROOTLOCKMUTEXPTR, NULL);
  810         } while (igotlock == 0);
  811         NFSUNLOCKV4ROOTMUTEX();
  812 
  813         hp = NFSCLIENTHASH(clientid);
  814         LIST_FOREACH(clp, hp, lc_hash) {
  815                 if (clp->lc_clientid.lval[1] == clientid.lval[1])
  816                         break;
  817         }
  818         if (clp == NULL) {
  819                 NFSLOCKV4ROOTMUTEX();
  820                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  821                 NFSUNLOCKV4ROOTMUTEX();
  822                 /* Just return ok, since it is gone. */
  823                 goto out;
  824         }
  825 
  826         /*
  827          * Free up all layouts on the clientid.  Should the client return the
  828          * layouts?
  829          */
  830         nfsrv_freelayoutlist(clientid);
  831 
  832         /* Scan for state on the clientid. */
  833         for (i = 0; i < nfsrv_statehashsize; i++)
  834                 if (!LIST_EMPTY(&clp->lc_stateid[i])) {
  835                         NFSLOCKV4ROOTMUTEX();
  836                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  837                         NFSUNLOCKV4ROOTMUTEX();
  838                         error = NFSERR_CLIENTIDBUSY;
  839                         goto out;
  840                 }
  841         if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
  842                 NFSLOCKV4ROOTMUTEX();
  843                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  844                 NFSUNLOCKV4ROOTMUTEX();
  845                 error = NFSERR_CLIENTIDBUSY;
  846                 goto out;
  847         }
  848 
  849         /* Destroy the clientid and return ok. */
  850         nfsrv_cleanclient(clp, p);
  851         nfsrv_freedeleglist(&clp->lc_deleg);
  852         nfsrv_freedeleglist(&clp->lc_olddeleg);
  853         LIST_REMOVE(clp, lc_hash);
  854         NFSLOCKV4ROOTMUTEX();
  855         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  856         NFSUNLOCKV4ROOTMUTEX();
  857         nfsrv_zapclient(clp, p);
  858 out:
  859         NFSEXITCODE2(error, nd);
  860         return (error);
  861 }
  862 
  863 /*
  864  * Called from the new nfssvc syscall to admin revoke a clientid.
  865  * Returns 0 for success, error otherwise.
  866  */
  867 int
  868 nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
  869 {
  870         struct nfsclient *clp = NULL;
  871         int i, error = 0;
  872         int gotit, igotlock;
  873 
  874         /*
  875          * First, lock out the nfsd so that state won't change while the
  876          * revocation record is being written to the stable storage restart
  877          * file.
  878          */
  879         NFSLOCKV4ROOTMUTEX();
  880         do {
  881                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  882                     NFSV4ROOTLOCKMUTEXPTR, NULL);
  883         } while (!igotlock);
  884         NFSUNLOCKV4ROOTMUTEX();
  885 
  886         /*
  887          * Search for a match in the client list.
  888          */
  889         gotit = i = 0;
  890         while (i < nfsrv_clienthashsize && !gotit) {
  891             LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
  892                 if (revokep->nclid_idlen == clp->lc_idlen &&
  893                     !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
  894                         gotit = 1;
  895                         break;
  896                 }
  897             }
  898             i++;
  899         }
  900         if (!gotit) {
  901                 NFSLOCKV4ROOTMUTEX();
  902                 nfsv4_unlock(&nfsv4rootfs_lock, 0);
  903                 NFSUNLOCKV4ROOTMUTEX();
  904                 error = EPERM;
  905                 goto out;
  906         }
  907 
  908         /*
  909          * Now, write out the revocation record
  910          */
  911         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
  912         nfsrv_backupstable();
  913 
  914         /*
  915          * and clear out the state, marking the clientid revoked.
  916          */
  917         clp->lc_flags &= ~LCL_CALLBACKSON;
  918         clp->lc_flags |= LCL_ADMINREVOKED;
  919         nfsrv_cleanclient(clp, p);
  920         nfsrv_freedeleglist(&clp->lc_deleg);
  921         nfsrv_freedeleglist(&clp->lc_olddeleg);
  922         NFSLOCKV4ROOTMUTEX();
  923         nfsv4_unlock(&nfsv4rootfs_lock, 0);
  924         NFSUNLOCKV4ROOTMUTEX();
  925 
  926 out:
  927         NFSEXITCODE(error);
  928         return (error);
  929 }
  930 
  931 /*
  932  * Dump out stats for all clients. Called from nfssvc(2), that is used
  933  * nfsstatsv1.
  934  */
  935 void
  936 nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
  937 {
  938         struct nfsclient *clp;
  939         int i = 0, cnt = 0;
  940 
  941         /*
  942          * First, get a reference on the nfsv4rootfs_lock so that an
  943          * exclusive lock cannot be acquired while dumping the clients.
  944          */
  945         NFSLOCKV4ROOTMUTEX();
  946         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
  947         NFSUNLOCKV4ROOTMUTEX();
  948         NFSLOCKSTATE();
  949         /*
  950          * Rattle through the client lists until done.
  951          */
  952         while (i < nfsrv_clienthashsize && cnt < maxcnt) {
  953             clp = LIST_FIRST(&nfsclienthash[i]);
  954             while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
  955                 nfsrv_dumpaclient(clp, &dumpp[cnt]);
  956                 cnt++;
  957                 clp = LIST_NEXT(clp, lc_hash);
  958             }
  959             i++;
  960         }
  961         if (cnt < maxcnt)
  962             dumpp[cnt].ndcl_clid.nclid_idlen = 0;
  963         NFSUNLOCKSTATE();
  964         NFSLOCKV4ROOTMUTEX();
  965         nfsv4_relref(&nfsv4rootfs_lock);
  966         NFSUNLOCKV4ROOTMUTEX();
  967 }
  968 
  969 /*
  970  * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
  971  */
  972 static void
  973 nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
  974 {
  975         struct nfsstate *stp, *openstp, *lckownstp;
  976         struct nfslock *lop;
  977         sa_family_t af;
  978 #ifdef INET
  979         struct sockaddr_in *rin;
  980 #endif
  981 #ifdef INET6
  982         struct sockaddr_in6 *rin6;
  983 #endif
  984 
  985         dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
  986         dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
  987         dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
  988         dumpp->ndcl_flags = clp->lc_flags;
  989         dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
  990         NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
  991         af = clp->lc_req.nr_nam->sa_family;
  992         dumpp->ndcl_addrfam = af;
  993         switch (af) {
  994 #ifdef INET
  995         case AF_INET:
  996                 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
  997                 dumpp->ndcl_cbaddr.sin_addr = rin->sin_addr;
  998                 break;
  999 #endif
 1000 #ifdef INET6
 1001         case AF_INET6:
 1002                 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 1003                 dumpp->ndcl_cbaddr.sin6_addr = rin6->sin6_addr;
 1004                 break;
 1005 #endif
 1006         }
 1007 
 1008         /*
 1009          * Now, scan the state lists and total up the opens and locks.
 1010          */
 1011         LIST_FOREACH(stp, &clp->lc_open, ls_list) {
 1012             dumpp->ndcl_nopenowners++;
 1013             LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
 1014                 dumpp->ndcl_nopens++;
 1015                 LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
 1016                     dumpp->ndcl_nlockowners++;
 1017                     LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
 1018                         dumpp->ndcl_nlocks++;
 1019                     }
 1020                 }
 1021             }
 1022         }
 1023 
 1024         /*
 1025          * and the delegation lists.
 1026          */
 1027         LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
 1028             dumpp->ndcl_ndelegs++;
 1029         }
 1030         LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
 1031             dumpp->ndcl_nolddelegs++;
 1032         }
 1033 }
 1034 
 1035 /*
 1036  * Dump out lock stats for a file.
 1037  */
 1038 void
 1039 nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
 1040     NFSPROC_T *p)
 1041 {
 1042         struct nfsstate *stp;
 1043         struct nfslock *lop;
 1044         int cnt = 0;
 1045         struct nfslockfile *lfp;
 1046         sa_family_t af;
 1047 #ifdef INET
 1048         struct sockaddr_in *rin;
 1049 #endif
 1050 #ifdef INET6
 1051         struct sockaddr_in6 *rin6;
 1052 #endif
 1053         int ret;
 1054         fhandle_t nfh;
 1055 
 1056         ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
 1057         /*
 1058          * First, get a reference on the nfsv4rootfs_lock so that an
 1059          * exclusive lock on it cannot be acquired while dumping the locks.
 1060          */
 1061         NFSLOCKV4ROOTMUTEX();
 1062         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 1063         NFSUNLOCKV4ROOTMUTEX();
 1064         NFSLOCKSTATE();
 1065         if (!ret)
 1066                 ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
 1067         if (ret) {
 1068                 ldumpp[0].ndlck_clid.nclid_idlen = 0;
 1069                 NFSUNLOCKSTATE();
 1070                 NFSLOCKV4ROOTMUTEX();
 1071                 nfsv4_relref(&nfsv4rootfs_lock);
 1072                 NFSUNLOCKV4ROOTMUTEX();
 1073                 return;
 1074         }
 1075 
 1076         /*
 1077          * For each open share on file, dump it out.
 1078          */
 1079         stp = LIST_FIRST(&lfp->lf_open);
 1080         while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
 1081                 ldumpp[cnt].ndlck_flags = stp->ls_flags;
 1082                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 1083                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 1084                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 1085                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 1086                 ldumpp[cnt].ndlck_owner.nclid_idlen =
 1087                     stp->ls_openowner->ls_ownerlen;
 1088                 NFSBCOPY(stp->ls_openowner->ls_owner,
 1089                     ldumpp[cnt].ndlck_owner.nclid_id,
 1090                     stp->ls_openowner->ls_ownerlen);
 1091                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 1092                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 1093                     stp->ls_clp->lc_idlen);
 1094                 af = stp->ls_clp->lc_req.nr_nam->sa_family;
 1095                 ldumpp[cnt].ndlck_addrfam = af;
 1096                 switch (af) {
 1097 #ifdef INET
 1098                 case AF_INET:
 1099                         rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
 1100                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
 1101                         break;
 1102 #endif
 1103 #ifdef INET6
 1104                 case AF_INET6:
 1105                         rin6 = (struct sockaddr_in6 *)
 1106                             stp->ls_clp->lc_req.nr_nam;
 1107                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
 1108                         break;
 1109 #endif
 1110                 }
 1111                 stp = LIST_NEXT(stp, ls_file);
 1112                 cnt++;
 1113         }
 1114 
 1115         /*
 1116          * and all locks.
 1117          */
 1118         lop = LIST_FIRST(&lfp->lf_lock);
 1119         while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
 1120                 stp = lop->lo_stp;
 1121                 ldumpp[cnt].ndlck_flags = lop->lo_flags;
 1122                 ldumpp[cnt].ndlck_first = lop->lo_first;
 1123                 ldumpp[cnt].ndlck_end = lop->lo_end;
 1124                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 1125                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 1126                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 1127                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 1128                 ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
 1129                 NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
 1130                     stp->ls_ownerlen);
 1131                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 1132                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 1133                     stp->ls_clp->lc_idlen);
 1134                 af = stp->ls_clp->lc_req.nr_nam->sa_family;
 1135                 ldumpp[cnt].ndlck_addrfam = af;
 1136                 switch (af) {
 1137 #ifdef INET
 1138                 case AF_INET:
 1139                         rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
 1140                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
 1141                         break;
 1142 #endif
 1143 #ifdef INET6
 1144                 case AF_INET6:
 1145                         rin6 = (struct sockaddr_in6 *)
 1146                             stp->ls_clp->lc_req.nr_nam;
 1147                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
 1148                         break;
 1149 #endif
 1150                 }
 1151                 lop = LIST_NEXT(lop, lo_lckfile);
 1152                 cnt++;
 1153         }
 1154 
 1155         /*
 1156          * and the delegations.
 1157          */
 1158         stp = LIST_FIRST(&lfp->lf_deleg);
 1159         while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
 1160                 ldumpp[cnt].ndlck_flags = stp->ls_flags;
 1161                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 1162                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 1163                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 1164                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 1165                 ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
 1166                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 1167                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 1168                     stp->ls_clp->lc_idlen);
 1169                 af = stp->ls_clp->lc_req.nr_nam->sa_family;
 1170                 ldumpp[cnt].ndlck_addrfam = af;
 1171                 switch (af) {
 1172 #ifdef INET
 1173                 case AF_INET:
 1174                         rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
 1175                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
 1176                         break;
 1177 #endif
 1178 #ifdef INET6
 1179                 case AF_INET6:
 1180                         rin6 = (struct sockaddr_in6 *)
 1181                             stp->ls_clp->lc_req.nr_nam;
 1182                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
 1183                         break;
 1184 #endif
 1185                 }
 1186                 stp = LIST_NEXT(stp, ls_file);
 1187                 cnt++;
 1188         }
 1189 
 1190         /*
 1191          * If list isn't full, mark end of list by setting the client name
 1192          * to zero length.
 1193          */
 1194         if (cnt < maxcnt)
 1195                 ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
 1196         NFSUNLOCKSTATE();
 1197         NFSLOCKV4ROOTMUTEX();
 1198         nfsv4_relref(&nfsv4rootfs_lock);
 1199         NFSUNLOCKV4ROOTMUTEX();
 1200 }
 1201 
 1202 /*
 1203  * Server timer routine. It can scan any linked list, so long
 1204  * as it holds the spin/mutex lock and there is no exclusive lock on
 1205  * nfsv4rootfs_lock.
 1206  * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
 1207  *  to do this from a callout, since the spin locks work. For
 1208  *  Darwin, I'm not sure what will work correctly yet.)
 1209  * Should be called once per second.
 1210  */
 1211 void
 1212 nfsrv_servertimer(void)
 1213 {
 1214         struct nfsclient *clp, *nclp;
 1215         struct nfsstate *stp, *nstp;
 1216         int got_ref, i;
 1217 
 1218         /*
 1219          * Make sure nfsboottime is set. This is used by V3 as well
 1220          * as V4. Note that nfsboottime is not nfsrvboottime, which is
 1221          * only used by the V4 server for leases.
 1222          */
 1223         if (nfsboottime.tv_sec == 0)
 1224                 NFSSETBOOTTIME(nfsboottime);
 1225 
 1226         /*
 1227          * If server hasn't started yet, just return.
 1228          */
 1229         NFSLOCKSTATE();
 1230         if (nfsrv_stablefirst.nsf_eograce == 0) {
 1231                 NFSUNLOCKSTATE();
 1232                 return;
 1233         }
 1234         if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
 1235                 if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
 1236                     NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
 1237                         nfsrv_stablefirst.nsf_flags |=
 1238                             (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
 1239                 NFSUNLOCKSTATE();
 1240                 return;
 1241         }
 1242 
 1243         /*
 1244          * Try and get a reference count on the nfsv4rootfs_lock so that
 1245          * no nfsd thread can acquire an exclusive lock on it before this
 1246          * call is done. If it is already exclusively locked, just return.
 1247          */
 1248         NFSLOCKV4ROOTMUTEX();
 1249         got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
 1250         NFSUNLOCKV4ROOTMUTEX();
 1251         if (got_ref == 0) {
 1252                 NFSUNLOCKSTATE();
 1253                 return;
 1254         }
 1255 
 1256         /*
 1257          * For each client...
 1258          */
 1259         for (i = 0; i < nfsrv_clienthashsize; i++) {
 1260             clp = LIST_FIRST(&nfsclienthash[i]);
 1261             while (clp != LIST_END(&nfsclienthash[i])) {
 1262                 nclp = LIST_NEXT(clp, lc_hash);
 1263                 if (!(clp->lc_flags & LCL_EXPIREIT)) {
 1264                     if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
 1265                          && ((LIST_EMPTY(&clp->lc_deleg)
 1266                               && LIST_EMPTY(&clp->lc_open)) ||
 1267                              nfsrv_clients > nfsrv_clienthighwater)) ||
 1268                         (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
 1269                         (clp->lc_expiry < NFSD_MONOSEC &&
 1270                          (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
 1271                         /*
 1272                          * Lease has expired several nfsrv_lease times ago:
 1273                          * PLUS
 1274                          *    - no state is associated with it
 1275                          *    OR
 1276                          *    - above high water mark for number of clients
 1277                          *      (nfsrv_clienthighwater should be large enough
 1278                          *       that this only occurs when clients fail to
 1279                          *       use the same nfs_client_id4.id. Maybe somewhat
 1280                          *       higher that the maximum number of clients that
 1281                          *       will mount this server?)
 1282                          * OR
 1283                          * Lease has expired a very long time ago
 1284                          * OR
 1285                          * Lease has expired PLUS the number of opens + locks
 1286                          * has exceeded 90% of capacity
 1287                          *
 1288                          * --> Mark for expiry. The actual expiry will be done
 1289                          *     by an nfsd sometime soon.
 1290                          */
 1291                         clp->lc_flags |= LCL_EXPIREIT;
 1292                         nfsrv_stablefirst.nsf_flags |=
 1293                             (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
 1294                     } else {
 1295                         /*
 1296                          * If there are no opens, increment no open tick cnt
 1297                          * If time exceeds NFSNOOPEN, mark it to be thrown away
 1298                          * otherwise, if there is an open, reset no open time
 1299                          * Hopefully, this will avoid excessive re-creation
 1300                          * of open owners and subsequent open confirms.
 1301                          */
 1302                         stp = LIST_FIRST(&clp->lc_open);
 1303                         while (stp != LIST_END(&clp->lc_open)) {
 1304                                 nstp = LIST_NEXT(stp, ls_list);
 1305                                 if (LIST_EMPTY(&stp->ls_open)) {
 1306                                         stp->ls_noopens++;
 1307                                         if (stp->ls_noopens > NFSNOOPEN ||
 1308                                             (nfsrv_openpluslock * 2) >
 1309                                             nfsrv_v4statelimit)
 1310                                                 nfsrv_stablefirst.nsf_flags |=
 1311                                                         NFSNSF_NOOPENS;
 1312                                 } else {
 1313                                         stp->ls_noopens = 0;
 1314                                 }
 1315                                 stp = nstp;
 1316                         }
 1317                     }
 1318                 }
 1319                 clp = nclp;
 1320             }
 1321         }
 1322         NFSUNLOCKSTATE();
 1323         NFSLOCKV4ROOTMUTEX();
 1324         nfsv4_relref(&nfsv4rootfs_lock);
 1325         NFSUNLOCKV4ROOTMUTEX();
 1326 }
 1327 
 1328 /*
 1329  * The following set of functions free up the various data structures.
 1330  */
 1331 /*
 1332  * Clear out all open/lock state related to this nfsclient.
 1333  * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
 1334  * there are no other active nfsd threads.
 1335  */
 1336 void
 1337 nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
 1338 {
 1339         struct nfsstate *stp, *nstp;
 1340         struct nfsdsession *sep, *nsep;
 1341 
 1342         LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
 1343                 nfsrv_freeopenowner(stp, 1, p);
 1344         if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
 1345                 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
 1346                         (void)nfsrv_freesession(sep, NULL);
 1347 }
 1348 
 1349 /*
 1350  * Free a client that has been cleaned. It should also already have been
 1351  * removed from the lists.
 1352  * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
 1353  *  softclock interrupts are enabled.)
 1354  */
 1355 void
 1356 nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
 1357 {
 1358 
 1359 #ifdef notyet
 1360         if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
 1361              (LCL_GSS | LCL_CALLBACKSON) &&
 1362             (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
 1363             clp->lc_handlelen > 0) {
 1364                 clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
 1365                 clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
 1366                 (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
 1367                         NULL, 0, NULL, NULL, NULL, 0, p);
 1368         }
 1369 #endif
 1370         newnfs_disconnect(&clp->lc_req);
 1371         free(clp->lc_req.nr_nam, M_SONAME);
 1372         NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 1373         free(clp->lc_stateid, M_NFSDCLIENT);
 1374         free(clp, M_NFSDCLIENT);
 1375         NFSLOCKSTATE();
 1376         nfsstatsv1.srvclients--;
 1377         nfsrv_openpluslock--;
 1378         nfsrv_clients--;
 1379         NFSUNLOCKSTATE();
 1380 }
 1381 
 1382 /*
 1383  * Free a list of delegation state structures.
 1384  * (This function will also free all nfslockfile structures that no
 1385  *  longer have associated state.)
 1386  */
 1387 void
 1388 nfsrv_freedeleglist(struct nfsstatehead *sthp)
 1389 {
 1390         struct nfsstate *stp, *nstp;
 1391 
 1392         LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
 1393                 nfsrv_freedeleg(stp);
 1394         }
 1395         LIST_INIT(sthp);
 1396 }
 1397 
 1398 /*
 1399  * Free up a delegation.
 1400  */
 1401 static void
 1402 nfsrv_freedeleg(struct nfsstate *stp)
 1403 {
 1404         struct nfslockfile *lfp;
 1405 
 1406         LIST_REMOVE(stp, ls_hash);
 1407         LIST_REMOVE(stp, ls_list);
 1408         LIST_REMOVE(stp, ls_file);
 1409         if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
 1410                 nfsrv_writedelegcnt--;
 1411         lfp = stp->ls_lfp;
 1412         if (LIST_EMPTY(&lfp->lf_open) &&
 1413             LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
 1414             LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
 1415             lfp->lf_usecount == 0 &&
 1416             nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
 1417                 nfsrv_freenfslockfile(lfp);
 1418         free(stp, M_NFSDSTATE);
 1419         nfsstatsv1.srvdelegates--;
 1420         nfsrv_openpluslock--;
 1421         nfsrv_delegatecnt--;
 1422 }
 1423 
 1424 /*
 1425  * This function frees an open owner and all associated opens.
 1426  */
 1427 static void
 1428 nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
 1429 {
 1430         struct nfsstate *nstp, *tstp;
 1431 
 1432         LIST_REMOVE(stp, ls_list);
 1433         /*
 1434          * Now, free all associated opens.
 1435          */
 1436         nstp = LIST_FIRST(&stp->ls_open);
 1437         while (nstp != LIST_END(&stp->ls_open)) {
 1438                 tstp = nstp;
 1439                 nstp = LIST_NEXT(nstp, ls_list);
 1440                 (void) nfsrv_freeopen(tstp, NULL, cansleep, p);
 1441         }
 1442         if (stp->ls_op)
 1443                 nfsrvd_derefcache(stp->ls_op);
 1444         free(stp, M_NFSDSTATE);
 1445         nfsstatsv1.srvopenowners--;
 1446         nfsrv_openpluslock--;
 1447 }
 1448 
 1449 /*
 1450  * This function frees an open (nfsstate open structure) with all associated
 1451  * lock_owners and locks. It also frees the nfslockfile structure iff there
 1452  * are no other opens on the file.
 1453  * Returns 1 if it free'd the nfslockfile, 0 otherwise.
 1454  */
 1455 static int
 1456 nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
 1457 {
 1458         struct nfsstate *nstp, *tstp;
 1459         struct nfslockfile *lfp;
 1460         int ret;
 1461 
 1462         LIST_REMOVE(stp, ls_hash);
 1463         LIST_REMOVE(stp, ls_list);
 1464         LIST_REMOVE(stp, ls_file);
 1465 
 1466         lfp = stp->ls_lfp;
 1467         /*
 1468          * Now, free all lockowners associated with this open.
 1469          */
 1470         LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
 1471                 nfsrv_freelockowner(tstp, vp, cansleep, p);
 1472 
 1473         /*
 1474          * The nfslockfile is freed here if there are no locks
 1475          * associated with the open.
 1476          * If there are locks associated with the open, the
 1477          * nfslockfile structure can be freed via nfsrv_freelockowner().
 1478          * Acquire the state mutex to avoid races with calls to
 1479          * nfsrv_getlockfile().
 1480          */
 1481         if (cansleep != 0)
 1482                 NFSLOCKSTATE();
 1483         if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
 1484             LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
 1485             LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
 1486             lfp->lf_usecount == 0 &&
 1487             (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
 1488                 nfsrv_freenfslockfile(lfp);
 1489                 ret = 1;
 1490         } else
 1491                 ret = 0;
 1492         if (cansleep != 0)
 1493                 NFSUNLOCKSTATE();
 1494         free(stp, M_NFSDSTATE);
 1495         nfsstatsv1.srvopens--;
 1496         nfsrv_openpluslock--;
 1497         return (ret);
 1498 }
 1499 
 1500 /*
 1501  * Frees a lockowner and all associated locks.
 1502  */
 1503 static void
 1504 nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
 1505     NFSPROC_T *p)
 1506 {
 1507 
 1508         LIST_REMOVE(stp, ls_hash);
 1509         LIST_REMOVE(stp, ls_list);
 1510         nfsrv_freeallnfslocks(stp, vp, cansleep, p);
 1511         if (stp->ls_op)
 1512                 nfsrvd_derefcache(stp->ls_op);
 1513         free(stp, M_NFSDSTATE);
 1514         nfsstatsv1.srvlockowners--;
 1515         nfsrv_openpluslock--;
 1516 }
 1517 
 1518 /*
 1519  * Free all the nfs locks on a lockowner.
 1520  */
 1521 static void
 1522 nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
 1523     NFSPROC_T *p)
 1524 {
 1525         struct nfslock *lop, *nlop;
 1526         struct nfsrollback *rlp, *nrlp;
 1527         struct nfslockfile *lfp = NULL;
 1528         int gottvp = 0;
 1529         vnode_t tvp = NULL;
 1530         uint64_t first, end;
 1531 
 1532         if (vp != NULL)
 1533                 ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
 1534         lop = LIST_FIRST(&stp->ls_lock);
 1535         while (lop != LIST_END(&stp->ls_lock)) {
 1536                 nlop = LIST_NEXT(lop, lo_lckowner);
 1537                 /*
 1538                  * Since all locks should be for the same file, lfp should
 1539                  * not change.
 1540                  */
 1541                 if (lfp == NULL)
 1542                         lfp = lop->lo_lfp;
 1543                 else if (lfp != lop->lo_lfp)
 1544                         panic("allnfslocks");
 1545                 /*
 1546                  * If vp is NULL and cansleep != 0, a vnode must be acquired
 1547                  * from the file handle. This only occurs when called from
 1548                  * nfsrv_cleanclient().
 1549                  */
 1550                 if (gottvp == 0) {
 1551                         if (nfsrv_dolocallocks == 0)
 1552                                 tvp = NULL;
 1553                         else if (vp == NULL && cansleep != 0) {
 1554                                 tvp = nfsvno_getvp(&lfp->lf_fh);
 1555                                 if (tvp != NULL)
 1556                                         NFSVOPUNLOCK(tvp);
 1557                         } else
 1558                                 tvp = vp;
 1559                         gottvp = 1;
 1560                 }
 1561 
 1562                 if (tvp != NULL) {
 1563                         if (cansleep == 0)
 1564                                 panic("allnfs2");
 1565                         first = lop->lo_first;
 1566                         end = lop->lo_end;
 1567                         nfsrv_freenfslock(lop);
 1568                         nfsrv_localunlock(tvp, lfp, first, end, p);
 1569                         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
 1570                             nrlp)
 1571                                 free(rlp, M_NFSDROLLBACK);
 1572                         LIST_INIT(&lfp->lf_rollback);
 1573                 } else
 1574                         nfsrv_freenfslock(lop);
 1575                 lop = nlop;
 1576         }
 1577         if (vp == NULL && tvp != NULL)
 1578                 vrele(tvp);
 1579 }
 1580 
 1581 /*
 1582  * Free an nfslock structure.
 1583  */
 1584 static void
 1585 nfsrv_freenfslock(struct nfslock *lop)
 1586 {
 1587 
 1588         if (lop->lo_lckfile.le_prev != NULL) {
 1589                 LIST_REMOVE(lop, lo_lckfile);
 1590                 nfsstatsv1.srvlocks--;
 1591                 nfsrv_openpluslock--;
 1592         }
 1593         LIST_REMOVE(lop, lo_lckowner);
 1594         free(lop, M_NFSDLOCK);
 1595 }
 1596 
 1597 /*
 1598  * This function frees an nfslockfile structure.
 1599  */
 1600 static void
 1601 nfsrv_freenfslockfile(struct nfslockfile *lfp)
 1602 {
 1603 
 1604         LIST_REMOVE(lfp, lf_hash);
 1605         free(lfp, M_NFSDLOCKFILE);
 1606 }
 1607 
 1608 /*
 1609  * This function looks up an nfsstate structure via stateid.
 1610  */
 1611 static int
 1612 nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
 1613     struct nfsstate **stpp)
 1614 {
 1615         struct nfsstate *stp;
 1616         struct nfsstatehead *hp;
 1617         int error = 0;
 1618 
 1619         *stpp = NULL;
 1620         hp = NFSSTATEHASH(clp, *stateidp);
 1621         LIST_FOREACH(stp, hp, ls_hash) {
 1622                 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
 1623                         NFSX_STATEIDOTHER))
 1624                         break;
 1625         }
 1626 
 1627         /*
 1628          * If no state id in list, return NFSERR_BADSTATEID.
 1629          */
 1630         if (stp == LIST_END(hp)) {
 1631                 error = NFSERR_BADSTATEID;
 1632                 goto out;
 1633         }
 1634         *stpp = stp;
 1635 
 1636 out:
 1637         NFSEXITCODE(error);
 1638         return (error);
 1639 }
 1640 
 1641 /*
 1642  * This function gets an nfsstate structure via owner string.
 1643  */
 1644 static void
 1645 nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
 1646     struct nfsstate **stpp)
 1647 {
 1648         struct nfsstate *stp;
 1649 
 1650         *stpp = NULL;
 1651         LIST_FOREACH(stp, hp, ls_list) {
 1652                 if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
 1653                   !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
 1654                         *stpp = stp;
 1655                         return;
 1656                 }
 1657         }
 1658 }
 1659 
 1660 /*
 1661  * Lock control function called to update lock status.
 1662  * Returns 0 upon success, -1 if there is no lock and the flags indicate
 1663  * that one isn't to be created and an NFSERR_xxx for other errors.
 1664  * The structures new_stp and new_lop are passed in as pointers that should
 1665  * be set to NULL if the structure is used and shouldn't be free'd.
 1666  * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
 1667  * never used and can safely be allocated on the stack. For all other
 1668  * cases, *new_stpp and *new_lopp should be malloc'd before the call,
 1669  * in case they are used.
 1670  */
 1671 int
 1672 nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
 1673     struct nfslock **new_lopp, struct nfslockconflict *cfp,
 1674     nfsquad_t clientid, nfsv4stateid_t *stateidp,
 1675     __unused struct nfsexstuff *exp,
 1676     struct nfsrv_descript *nd, NFSPROC_T *p)
 1677 {
 1678         struct nfslock *lop;
 1679         struct nfsstate *new_stp = *new_stpp;
 1680         struct nfslock *new_lop = *new_lopp;
 1681         struct nfsstate *tstp, *mystp, *nstp;
 1682         int specialid = 0;
 1683         struct nfslockfile *lfp;
 1684         struct nfslock *other_lop = NULL;
 1685         struct nfsstate *stp, *lckstp = NULL;
 1686         struct nfsclient *clp = NULL;
 1687         u_int32_t bits;
 1688         int error = 0, haslock = 0, ret, reterr;
 1689         int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
 1690         fhandle_t nfh;
 1691         uint64_t first, end;
 1692         uint32_t lock_flags;
 1693 
 1694         if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
 1695                 /*
 1696                  * Note the special cases of "all 1s" or "all 0s" stateids and
 1697                  * let reads with all 1s go ahead.
 1698                  */
 1699                 if (new_stp->ls_stateid.seqid == 0x0 &&
 1700                     new_stp->ls_stateid.other[0] == 0x0 &&
 1701                     new_stp->ls_stateid.other[1] == 0x0 &&
 1702                     new_stp->ls_stateid.other[2] == 0x0)
 1703                         specialid = 1;
 1704                 else if (new_stp->ls_stateid.seqid == 0xffffffff &&
 1705                     new_stp->ls_stateid.other[0] == 0xffffffff &&
 1706                     new_stp->ls_stateid.other[1] == 0xffffffff &&
 1707                     new_stp->ls_stateid.other[2] == 0xffffffff)
 1708                         specialid = 2;
 1709         }
 1710 
 1711         /*
 1712          * Check for restart conditions (client and server).
 1713          */
 1714         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 1715             &new_stp->ls_stateid, specialid);
 1716         if (error)
 1717                 goto out;
 1718 
 1719         /*
 1720          * Check for state resource limit exceeded.
 1721          */
 1722         if ((new_stp->ls_flags & NFSLCK_LOCK) &&
 1723             nfsrv_openpluslock > nfsrv_v4statelimit) {
 1724                 error = NFSERR_RESOURCE;
 1725                 goto out;
 1726         }
 1727 
 1728         /*
 1729          * For the lock case, get another nfslock structure,
 1730          * just in case we need it.
 1731          * Malloc now, before we start sifting through the linked lists,
 1732          * in case we have to wait for memory.
 1733          */
 1734 tryagain:
 1735         if (new_stp->ls_flags & NFSLCK_LOCK)
 1736                 other_lop = malloc(sizeof (struct nfslock),
 1737                     M_NFSDLOCK, M_WAITOK);
 1738         filestruct_locked = 0;
 1739         reterr = 0;
 1740         lfp = NULL;
 1741 
 1742         /*
 1743          * Get the lockfile structure for CFH now, so we can do a sanity
 1744          * check against the stateid, before incrementing the seqid#, since
 1745          * we want to return NFSERR_BADSTATEID on failure and the seqid#
 1746          * shouldn't be incremented for this case.
 1747          * If nfsrv_getlockfile() returns -1, it means "not found", which
 1748          * will be handled later.
 1749          * If we are doing Lock/LockU and local locking is enabled, sleep
 1750          * lock the nfslockfile structure.
 1751          */
 1752         getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
 1753         NFSLOCKSTATE();
 1754         if (getlckret == 0) {
 1755                 if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
 1756                     nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
 1757                         getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
 1758                             &lfp, &nfh, 1);
 1759                         if (getlckret == 0)
 1760                                 filestruct_locked = 1;
 1761                 } else
 1762                         getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
 1763                             &lfp, &nfh, 0);
 1764         }
 1765         if (getlckret != 0 && getlckret != -1)
 1766                 reterr = getlckret;
 1767 
 1768         if (filestruct_locked != 0) {
 1769                 LIST_INIT(&lfp->lf_rollback);
 1770                 if ((new_stp->ls_flags & NFSLCK_LOCK)) {
 1771                         /*
 1772                          * For local locking, do the advisory locking now, so
 1773                          * that any conflict can be detected. A failure later
 1774                          * can be rolled back locally. If an error is returned,
 1775                          * struct nfslockfile has been unlocked and any local
 1776                          * locking rolled back.
 1777                          */
 1778                         NFSUNLOCKSTATE();
 1779                         if (vnode_unlocked == 0) {
 1780                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
 1781                                 vnode_unlocked = 1;
 1782                                 NFSVOPUNLOCK(vp);
 1783                         }
 1784                         reterr = nfsrv_locallock(vp, lfp,
 1785                             (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
 1786                             new_lop->lo_first, new_lop->lo_end, cfp, p);
 1787                         NFSLOCKSTATE();
 1788                 }
 1789         }
 1790 
 1791         if (specialid == 0) {
 1792             if (new_stp->ls_flags & NFSLCK_TEST) {
 1793                 /*
 1794                  * RFC 3530 does not list LockT as an op that renews a
 1795                  * lease, but the consensus seems to be that it is ok
 1796                  * for a server to do so.
 1797                  */
 1798                 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 1799                     (nfsquad_t)((u_quad_t)0), 0, nd, p);
 1800 
 1801                 /*
 1802                  * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
 1803                  * error returns for LockT, just go ahead and test for a lock,
 1804                  * since there are no locks for this client, but other locks
 1805                  * can conflict. (ie. same client will always be false)
 1806                  */
 1807                 if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
 1808                     error = 0;
 1809                 lckstp = new_stp;
 1810             } else {
 1811               error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 1812                 (nfsquad_t)((u_quad_t)0), 0, nd, p);
 1813               if (error == 0)
 1814                 /*
 1815                  * Look up the stateid
 1816                  */
 1817                 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
 1818                   new_stp->ls_flags, &stp);
 1819               /*
 1820                * do some sanity checks for an unconfirmed open or a
 1821                * stateid that refers to the wrong file, for an open stateid
 1822                */
 1823               if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
 1824                   ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
 1825                    (getlckret == 0 && stp->ls_lfp != lfp))){
 1826                       /*
 1827                        * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID
 1828                        * The only exception is using SETATTR with SIZE.
 1829                        * */
 1830                     if ((new_stp->ls_flags &
 1831                          (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR)
 1832                              error = NFSERR_BADSTATEID;
 1833               }
 1834               
 1835                 if (error == 0 &&
 1836                   (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
 1837                   getlckret == 0 && stp->ls_lfp != lfp)
 1838                         error = NFSERR_BADSTATEID;
 1839 
 1840               /*
 1841                * If the lockowner stateid doesn't refer to the same file,
 1842                * I believe that is considered ok, since some clients will
 1843                * only create a single lockowner and use that for all locks
 1844                * on all files.
 1845                * For now, log it as a diagnostic, instead of considering it
 1846                * a BadStateid.
 1847                */
 1848               if (error == 0 && (stp->ls_flags &
 1849                   (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
 1850                   getlckret == 0 && stp->ls_lfp != lfp) {
 1851 #ifdef DIAGNOSTIC
 1852                   printf("Got a lock statid for different file open\n");
 1853 #endif
 1854                   /*
 1855                   error = NFSERR_BADSTATEID;
 1856                   */
 1857               }
 1858 
 1859               if (error == 0) {
 1860                     if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
 1861                         /*
 1862                          * If haslock set, we've already checked the seqid.
 1863                          */
 1864                         if (!haslock) {
 1865                             if (stp->ls_flags & NFSLCK_OPEN)
 1866                                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 1867                                     stp->ls_openowner, new_stp->ls_op);
 1868                             else
 1869                                 error = NFSERR_BADSTATEID;
 1870                         }
 1871                         if (!error)
 1872                             nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
 1873                         if (lckstp) {
 1874                             /*
 1875                              * For NFSv4.1 and NFSv4.2 allow an
 1876                              * open_to_lock_owner when the lock_owner already
 1877                              * exists.  Just clear NFSLCK_OPENTOLOCK so that
 1878                              * a new lock_owner will not be created.
 1879                              * RFC7530 states that the error for NFSv4.0
 1880                              * is NFS4ERR_BAD_SEQID.
 1881                              */
 1882                             if ((nd->nd_flag & ND_NFSV41) != 0)
 1883                                 new_stp->ls_flags &= ~NFSLCK_OPENTOLOCK;
 1884                             else
 1885                                 error = NFSERR_BADSEQID;
 1886                         } else
 1887                             lckstp = new_stp;
 1888                     } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
 1889                         /*
 1890                          * If haslock set, ditto above.
 1891                          */
 1892                         if (!haslock) {
 1893                             if (stp->ls_flags & NFSLCK_OPEN)
 1894                                 error = NFSERR_BADSTATEID;
 1895                             else
 1896                                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 1897                                     stp, new_stp->ls_op);
 1898                         }
 1899                         lckstp = stp;
 1900                     } else {
 1901                         lckstp = stp;
 1902                     }
 1903               }
 1904               /*
 1905                * If the seqid part of the stateid isn't the same, return
 1906                * NFSERR_OLDSTATEID for cases other than I/O Ops.
 1907                * For I/O Ops, only return NFSERR_OLDSTATEID if
 1908                * nfsrv_returnoldstateid is set. (The consensus on the email
 1909                * list was that most clients would prefer to not receive
 1910                * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
 1911                * is what will happen, so I use the nfsrv_returnoldstateid to
 1912                * allow for either server configuration.)
 1913                */
 1914               if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
 1915                   (((nd->nd_flag & ND_NFSV41) == 0 &&
 1916                    (!(new_stp->ls_flags & NFSLCK_CHECK) ||
 1917                     nfsrv_returnoldstateid)) ||
 1918                    ((nd->nd_flag & ND_NFSV41) != 0 &&
 1919                     new_stp->ls_stateid.seqid != 0)))
 1920                     error = NFSERR_OLDSTATEID;
 1921             }
 1922         }
 1923 
 1924         /*
 1925          * Now we can check for grace.
 1926          */
 1927         if (!error)
 1928                 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
 1929         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
 1930                 nfsrv_checkstable(clp))
 1931                 error = NFSERR_NOGRACE;
 1932         /*
 1933          * If we successfully Reclaimed state, note that.
 1934          */
 1935         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
 1936                 nfsrv_markstable(clp);
 1937 
 1938         /*
 1939          * At this point, either error == NFSERR_BADSTATEID or the
 1940          * seqid# has been updated, so we can return any error.
 1941          * If error == 0, there may be an error in:
 1942          *    nd_repstat - Set by the calling function.
 1943          *    reterr - Set above, if getting the nfslockfile structure
 1944          *       or acquiring the local lock failed.
 1945          *    (If both of these are set, nd_repstat should probably be
 1946          *     returned, since that error was detected before this
 1947          *     function call.)
 1948          */
 1949         if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
 1950                 if (error == 0) {
 1951                         if (nd->nd_repstat != 0)
 1952                                 error = nd->nd_repstat;
 1953                         else
 1954                                 error = reterr;
 1955                 }
 1956                 if (filestruct_locked != 0) {
 1957                         /* Roll back local locks. */
 1958                         NFSUNLOCKSTATE();
 1959                         if (vnode_unlocked == 0) {
 1960                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
 1961                                 vnode_unlocked = 1;
 1962                                 NFSVOPUNLOCK(vp);
 1963                         }
 1964                         nfsrv_locallock_rollback(vp, lfp, p);
 1965                         NFSLOCKSTATE();
 1966                         nfsrv_unlocklf(lfp);
 1967                 }
 1968                 NFSUNLOCKSTATE();
 1969                 goto out;
 1970         }
 1971 
 1972         /*
 1973          * Check the nfsrv_getlockfile return.
 1974          * Returned -1 if no structure found.
 1975          */
 1976         if (getlckret == -1) {
 1977                 error = NFSERR_EXPIRED;
 1978                 /*
 1979                  * Called from lockt, so no lock is OK.
 1980                  */
 1981                 if (new_stp->ls_flags & NFSLCK_TEST) {
 1982                         error = 0;
 1983                 } else if (new_stp->ls_flags &
 1984                     (NFSLCK_CHECK | NFSLCK_SETATTR)) {
 1985                         /*
 1986                          * Called to check for a lock, OK if the stateid is all
 1987                          * 1s or all 0s, but there should be an nfsstate
 1988                          * otherwise.
 1989                          * (ie. If there is no open, I'll assume no share
 1990                          *  deny bits.)
 1991                          */
 1992                         if (specialid)
 1993                                 error = 0;
 1994                         else
 1995                                 error = NFSERR_BADSTATEID;
 1996                 }
 1997                 NFSUNLOCKSTATE();
 1998                 goto out;
 1999         }
 2000 
 2001         /*
 2002          * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
 2003          * For NFSLCK_CHECK, allow a read if write access is granted,
 2004          * but check for a deny. For NFSLCK_LOCK, require correct access,
 2005          * which implies a conflicting deny can't exist.
 2006          */
 2007         if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
 2008             /*
 2009              * Four kinds of state id:
 2010              * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
 2011              * - stateid for an open
 2012              * - stateid for a delegation
 2013              * - stateid for a lock owner
 2014              */
 2015             if (!specialid) {
 2016                 if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
 2017                     delegation = 1;
 2018                     mystp = stp;
 2019                     nfsrv_delaydelegtimeout(stp);
 2020                 } else if (stp->ls_flags & NFSLCK_OPEN) {
 2021                     mystp = stp;
 2022                 } else {
 2023                     mystp = stp->ls_openstp;
 2024                 }
 2025                 /*
 2026                  * If locking or checking, require correct access
 2027                  * bit set.
 2028                  */
 2029                 if (((new_stp->ls_flags & NFSLCK_LOCK) &&
 2030                      !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
 2031                        mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
 2032                     ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
 2033                       (NFSLCK_CHECK | NFSLCK_READACCESS) &&
 2034                      !(mystp->ls_flags & NFSLCK_READACCESS) &&
 2035                      nfsrv_allowreadforwriteopen == 0) ||
 2036                     ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
 2037                       (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
 2038                      !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
 2039                         if (filestruct_locked != 0) {
 2040                                 /* Roll back local locks. */
 2041                                 NFSUNLOCKSTATE();
 2042                                 if (vnode_unlocked == 0) {
 2043                                         ASSERT_VOP_ELOCKED(vp,
 2044                                             "nfsrv_lockctrl3");
 2045                                         vnode_unlocked = 1;
 2046                                         NFSVOPUNLOCK(vp);
 2047                                 }
 2048                                 nfsrv_locallock_rollback(vp, lfp, p);
 2049                                 NFSLOCKSTATE();
 2050                                 nfsrv_unlocklf(lfp);
 2051                         }
 2052                         NFSUNLOCKSTATE();
 2053                         error = NFSERR_OPENMODE;
 2054                         goto out;
 2055                 }
 2056             } else
 2057                 mystp = NULL;
 2058             if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
 2059                 /*
 2060                  * Check for a conflicting deny bit.
 2061                  */
 2062                 LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
 2063                     if (tstp != mystp) {
 2064                         bits = tstp->ls_flags;
 2065                         bits >>= NFSLCK_SHIFT;
 2066                         if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
 2067                             KASSERT(vnode_unlocked == 0,
 2068                                 ("nfsrv_lockctrl: vnode unlocked1"));
 2069                             ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
 2070                                 vp, p);
 2071                             if (ret == 1) {
 2072                                 /*
 2073                                 * nfsrv_clientconflict unlocks state
 2074                                  * when it returns non-zero.
 2075                                  */
 2076                                 lckstp = NULL;
 2077                                 goto tryagain;
 2078                             }
 2079                             if (ret == 0)
 2080                                 NFSUNLOCKSTATE();
 2081                             if (ret == 2)
 2082                                 error = NFSERR_PERM;
 2083                             else
 2084                                 error = NFSERR_OPENMODE;
 2085                             goto out;
 2086                         }
 2087                     }
 2088                 }
 2089 
 2090                 /* We're outta here */
 2091                 NFSUNLOCKSTATE();
 2092                 goto out;
 2093             }
 2094         }
 2095 
 2096         /*
 2097          * For setattr, just get rid of all the Delegations for other clients.
 2098          */
 2099         if (new_stp->ls_flags & NFSLCK_SETATTR) {
 2100                 KASSERT(vnode_unlocked == 0,
 2101                     ("nfsrv_lockctrl: vnode unlocked2"));
 2102                 ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
 2103                 if (ret) {
 2104                         /*
 2105                          * nfsrv_cleandeleg() unlocks state when it
 2106                          * returns non-zero.
 2107                          */
 2108                         if (ret == -1) {
 2109                                 lckstp = NULL;
 2110                                 goto tryagain;
 2111                         }
 2112                         error = ret;
 2113                         goto out;
 2114                 }
 2115                 if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
 2116                     (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
 2117                      LIST_EMPTY(&lfp->lf_deleg))) {
 2118                         NFSUNLOCKSTATE();
 2119                         goto out;
 2120                 }
 2121         }
 2122 
 2123         /*
 2124          * Check for a conflicting delegation. If one is found, call
 2125          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 2126          * been set yet, it will get the lock. Otherwise, it will recall
 2127          * the delegation. Then, we try try again...
 2128          * I currently believe the conflict algorithm to be:
 2129          * For Lock Ops (Lock/LockT/LockU)
 2130          * - there is a conflict iff a different client has a write delegation
 2131          * For Reading (Read Op)
 2132          * - there is a conflict iff a different client has a write delegation
 2133          *   (the specialids are always a different client)
 2134          * For Writing (Write/Setattr of size)
 2135          * - there is a conflict if a different client has any delegation
 2136          * - there is a conflict if the same client has a read delegation
 2137          *   (I don't understand why this isn't allowed, but that seems to be
 2138          *    the current consensus?)
 2139          */
 2140         tstp = LIST_FIRST(&lfp->lf_deleg);
 2141         while (tstp != LIST_END(&lfp->lf_deleg)) {
 2142             nstp = LIST_NEXT(tstp, ls_file);
 2143             if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
 2144                  ((new_stp->ls_flags & NFSLCK_CHECK) &&
 2145                   (new_lop->lo_flags & NFSLCK_READ))) &&
 2146                   clp != tstp->ls_clp &&
 2147                  (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
 2148                  ((new_stp->ls_flags & NFSLCK_CHECK) &&
 2149                    (new_lop->lo_flags & NFSLCK_WRITE) &&
 2150                   (clp != tstp->ls_clp ||
 2151                    (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
 2152                 ret = 0;
 2153                 if (filestruct_locked != 0) {
 2154                         /* Roll back local locks. */
 2155                         NFSUNLOCKSTATE();
 2156                         if (vnode_unlocked == 0) {
 2157                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
 2158                                 NFSVOPUNLOCK(vp);
 2159                         }
 2160                         nfsrv_locallock_rollback(vp, lfp, p);
 2161                         NFSLOCKSTATE();
 2162                         nfsrv_unlocklf(lfp);
 2163                         NFSUNLOCKSTATE();
 2164                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2165                         vnode_unlocked = 0;
 2166                         if (VN_IS_DOOMED(vp))
 2167                                 ret = NFSERR_SERVERFAULT;
 2168                         NFSLOCKSTATE();
 2169                 }
 2170                 if (ret == 0)
 2171                         ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
 2172                 if (ret) {
 2173                     /*
 2174                      * nfsrv_delegconflict unlocks state when it
 2175                      * returns non-zero, which it always does.
 2176                      */
 2177                     if (other_lop) {
 2178                         free(other_lop, M_NFSDLOCK);
 2179                         other_lop = NULL;
 2180                     }
 2181                     if (ret == -1) {
 2182                         lckstp = NULL;
 2183                         goto tryagain;
 2184                     }
 2185                     error = ret;
 2186                     goto out;
 2187                 }
 2188                 /* Never gets here. */
 2189             }
 2190             tstp = nstp;
 2191         }
 2192 
 2193         /*
 2194          * Handle the unlock case by calling nfsrv_updatelock().
 2195          * (Should I have done some access checking above for unlock? For now,
 2196          *  just let it happen.)
 2197          */
 2198         if (new_stp->ls_flags & NFSLCK_UNLOCK) {
 2199                 first = new_lop->lo_first;
 2200                 end = new_lop->lo_end;
 2201                 nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
 2202                 stateidp->seqid = ++(stp->ls_stateid.seqid);
 2203                 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 2204                         stateidp->seqid = stp->ls_stateid.seqid = 1;
 2205                 stateidp->other[0] = stp->ls_stateid.other[0];
 2206                 stateidp->other[1] = stp->ls_stateid.other[1];
 2207                 stateidp->other[2] = stp->ls_stateid.other[2];
 2208                 if (filestruct_locked != 0) {
 2209                         NFSUNLOCKSTATE();
 2210                         if (vnode_unlocked == 0) {
 2211                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
 2212                                 vnode_unlocked = 1;
 2213                                 NFSVOPUNLOCK(vp);
 2214                         }
 2215                         /* Update the local locks. */
 2216                         nfsrv_localunlock(vp, lfp, first, end, p);
 2217                         NFSLOCKSTATE();
 2218                         nfsrv_unlocklf(lfp);
 2219                 }
 2220                 NFSUNLOCKSTATE();
 2221                 goto out;
 2222         }
 2223 
 2224         /*
 2225          * Search for a conflicting lock. A lock conflicts if:
 2226          * - the lock range overlaps and
 2227          * - at least one lock is a write lock and
 2228          * - it is not owned by the same lock owner
 2229          */
 2230         if (!delegation) {
 2231           LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
 2232             if (new_lop->lo_end > lop->lo_first &&
 2233                 new_lop->lo_first < lop->lo_end &&
 2234                 (new_lop->lo_flags == NFSLCK_WRITE ||
 2235                  lop->lo_flags == NFSLCK_WRITE) &&
 2236                 lckstp != lop->lo_stp &&
 2237                 (clp != lop->lo_stp->ls_clp ||
 2238                  lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
 2239                  NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
 2240                     lckstp->ls_ownerlen))) {
 2241                 if (other_lop) {
 2242                     free(other_lop, M_NFSDLOCK);
 2243                     other_lop = NULL;
 2244                 }
 2245                 if (vnode_unlocked != 0)
 2246                     ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
 2247                         NULL, p);
 2248                 else
 2249                     ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
 2250                         vp, p);
 2251                 if (ret == 1) {
 2252                     if (filestruct_locked != 0) {
 2253                         if (vnode_unlocked == 0) {
 2254                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
 2255                                 NFSVOPUNLOCK(vp);
 2256                         }
 2257                         /* Roll back local locks. */
 2258                         nfsrv_locallock_rollback(vp, lfp, p);
 2259                         NFSLOCKSTATE();
 2260                         nfsrv_unlocklf(lfp);
 2261                         NFSUNLOCKSTATE();
 2262                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2263                         vnode_unlocked = 0;
 2264                         if (VN_IS_DOOMED(vp)) {
 2265                                 error = NFSERR_SERVERFAULT;
 2266                                 goto out;
 2267                         }
 2268                     }
 2269                     /*
 2270                      * nfsrv_clientconflict() unlocks state when it
 2271                      * returns non-zero.
 2272                      */
 2273                     lckstp = NULL;
 2274                     goto tryagain;
 2275                 }
 2276                 /*
 2277                  * Found a conflicting lock, so record the conflict and
 2278                  * return the error.
 2279                  */
 2280                 if (cfp != NULL && ret == 0) {
 2281                     cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
 2282                     cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
 2283                     cfp->cl_first = lop->lo_first;
 2284                     cfp->cl_end = lop->lo_end;
 2285                     cfp->cl_flags = lop->lo_flags;
 2286                     cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
 2287                     NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
 2288                         cfp->cl_ownerlen);
 2289                 }
 2290                 if (ret == 2)
 2291                     error = NFSERR_PERM;
 2292                 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2293                     error = NFSERR_RECLAIMCONFLICT;
 2294                 else if (new_stp->ls_flags & NFSLCK_CHECK)
 2295                     error = NFSERR_LOCKED;
 2296                 else
 2297                     error = NFSERR_DENIED;
 2298                 if (filestruct_locked != 0 && ret == 0) {
 2299                         /* Roll back local locks. */
 2300                         NFSUNLOCKSTATE();
 2301                         if (vnode_unlocked == 0) {
 2302                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
 2303                                 vnode_unlocked = 1;
 2304                                 NFSVOPUNLOCK(vp);
 2305                         }
 2306                         nfsrv_locallock_rollback(vp, lfp, p);
 2307                         NFSLOCKSTATE();
 2308                         nfsrv_unlocklf(lfp);
 2309                 }
 2310                 if (ret == 0)
 2311                         NFSUNLOCKSTATE();
 2312                 goto out;
 2313             }
 2314           }
 2315         }
 2316 
 2317         /*
 2318          * We only get here if there was no lock that conflicted.
 2319          */
 2320         if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
 2321                 NFSUNLOCKSTATE();
 2322                 goto out;
 2323         }
 2324 
 2325         /*
 2326          * We only get here when we are creating or modifying a lock.
 2327          * There are two variants:
 2328          * - exist_lock_owner where lock_owner exists
 2329          * - open_to_lock_owner with new lock_owner
 2330          */
 2331         first = new_lop->lo_first;
 2332         end = new_lop->lo_end;
 2333         lock_flags = new_lop->lo_flags;
 2334         if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
 2335                 nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
 2336                 stateidp->seqid = ++(lckstp->ls_stateid.seqid);
 2337                 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 2338                         stateidp->seqid = lckstp->ls_stateid.seqid = 1;
 2339                 stateidp->other[0] = lckstp->ls_stateid.other[0];
 2340                 stateidp->other[1] = lckstp->ls_stateid.other[1];
 2341                 stateidp->other[2] = lckstp->ls_stateid.other[2];
 2342         } else {
 2343                 /*
 2344                  * The new open_to_lock_owner case.
 2345                  * Link the new nfsstate into the lists.
 2346                  */
 2347                 new_stp->ls_seq = new_stp->ls_opentolockseq;
 2348                 nfsrvd_refcache(new_stp->ls_op);
 2349                 stateidp->seqid = new_stp->ls_stateid.seqid = 1;
 2350                 stateidp->other[0] = new_stp->ls_stateid.other[0] =
 2351                     clp->lc_clientid.lval[0];
 2352                 stateidp->other[1] = new_stp->ls_stateid.other[1] =
 2353                     clp->lc_clientid.lval[1];
 2354                 stateidp->other[2] = new_stp->ls_stateid.other[2] =
 2355                     nfsrv_nextstateindex(clp);
 2356                 new_stp->ls_clp = clp;
 2357                 LIST_INIT(&new_stp->ls_lock);
 2358                 new_stp->ls_openstp = stp;
 2359                 new_stp->ls_lfp = lfp;
 2360                 nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
 2361                     lfp);
 2362                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
 2363                     new_stp, ls_hash);
 2364                 LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
 2365                 *new_lopp = NULL;
 2366                 *new_stpp = NULL;
 2367                 nfsstatsv1.srvlockowners++;
 2368                 nfsrv_openpluslock++;
 2369         }
 2370         if (filestruct_locked != 0) {
 2371                 NFSUNLOCKSTATE();
 2372                 nfsrv_locallock_commit(lfp, lock_flags, first, end);
 2373                 NFSLOCKSTATE();
 2374                 nfsrv_unlocklf(lfp);
 2375         }
 2376         NFSUNLOCKSTATE();
 2377 
 2378 out:
 2379         if (haslock) {
 2380                 NFSLOCKV4ROOTMUTEX();
 2381                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2382                 NFSUNLOCKV4ROOTMUTEX();
 2383         }
 2384         if (vnode_unlocked != 0) {
 2385                 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2386                 if (error == 0 && VN_IS_DOOMED(vp))
 2387                         error = NFSERR_SERVERFAULT;
 2388         }
 2389         if (other_lop)
 2390                 free(other_lop, M_NFSDLOCK);
 2391         NFSEXITCODE2(error, nd);
 2392         return (error);
 2393 }
 2394 
 2395 /*
 2396  * Check for state errors for Open.
 2397  * repstat is passed back out as an error if more critical errors
 2398  * are not detected.
 2399  */
 2400 int
 2401 nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
 2402     struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
 2403     NFSPROC_T *p, int repstat)
 2404 {
 2405         struct nfsstate *stp, *nstp;
 2406         struct nfsclient *clp;
 2407         struct nfsstate *ownerstp;
 2408         struct nfslockfile *lfp, *new_lfp;
 2409         int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
 2410 
 2411         if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
 2412                 readonly = 1;
 2413         /*
 2414          * Check for restart conditions (client and server).
 2415          */
 2416         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 2417                 &new_stp->ls_stateid, 0);
 2418         if (error)
 2419                 goto out;
 2420 
 2421         /*
 2422          * Check for state resource limit exceeded.
 2423          * Technically this should be SMP protected, but the worst
 2424          * case error is "out by one or two" on the count when it
 2425          * returns NFSERR_RESOURCE and the limit is just a rather
 2426          * arbitrary high water mark, so no harm is done.
 2427          */
 2428         if (nfsrv_openpluslock > nfsrv_v4statelimit) {
 2429                 error = NFSERR_RESOURCE;
 2430                 goto out;
 2431         }
 2432 
 2433 tryagain:
 2434         new_lfp = malloc(sizeof (struct nfslockfile),
 2435             M_NFSDLOCKFILE, M_WAITOK);
 2436         if (vp)
 2437                 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
 2438                     NULL, p);
 2439         NFSLOCKSTATE();
 2440         /*
 2441          * Get the nfsclient structure.
 2442          */
 2443         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 2444             (nfsquad_t)((u_quad_t)0), 0, nd, p);
 2445 
 2446         /*
 2447          * Look up the open owner. See if it needs confirmation and
 2448          * check the seq#, as required.
 2449          */
 2450         if (!error)
 2451                 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
 2452 
 2453         if (!error && ownerstp) {
 2454                 error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
 2455                     new_stp->ls_op);
 2456                 /*
 2457                  * If the OpenOwner hasn't been confirmed, assume the
 2458                  * old one was a replay and this one is ok.
 2459                  * See: RFC3530 Sec. 14.2.18.
 2460                  */
 2461                 if (error == NFSERR_BADSEQID &&
 2462                     (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
 2463                         error = 0;
 2464         }
 2465 
 2466         /*
 2467          * Check for grace.
 2468          */
 2469         if (!error)
 2470                 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
 2471         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
 2472                 nfsrv_checkstable(clp))
 2473                 error = NFSERR_NOGRACE;
 2474 
 2475         /*
 2476          * If none of the above errors occurred, let repstat be
 2477          * returned.
 2478          */
 2479         if (repstat && !error)
 2480                 error = repstat;
 2481         if (error) {
 2482                 NFSUNLOCKSTATE();
 2483                 if (haslock) {
 2484                         NFSLOCKV4ROOTMUTEX();
 2485                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2486                         NFSUNLOCKV4ROOTMUTEX();
 2487                 }
 2488                 free(new_lfp, M_NFSDLOCKFILE);
 2489                 goto out;
 2490         }
 2491 
 2492         /*
 2493          * If vp == NULL, the file doesn't exist yet, so return ok.
 2494          * (This always happens on the first pass, so haslock must be 0.)
 2495          */
 2496         if (vp == NULL) {
 2497                 NFSUNLOCKSTATE();
 2498                 free(new_lfp, M_NFSDLOCKFILE);
 2499                 goto out;
 2500         }
 2501 
 2502         /*
 2503          * Get the structure for the underlying file.
 2504          */
 2505         if (getfhret)
 2506                 error = getfhret;
 2507         else
 2508                 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
 2509                     NULL, 0);
 2510         if (new_lfp)
 2511                 free(new_lfp, M_NFSDLOCKFILE);
 2512         if (error) {
 2513                 NFSUNLOCKSTATE();
 2514                 if (haslock) {
 2515                         NFSLOCKV4ROOTMUTEX();
 2516                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2517                         NFSUNLOCKV4ROOTMUTEX();
 2518                 }
 2519                 goto out;
 2520         }
 2521 
 2522         /*
 2523          * Search for a conflicting open/share.
 2524          */
 2525         if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
 2526             /*
 2527              * For Delegate_Cur, search for the matching Delegation,
 2528              * which indicates no conflict.
 2529              * An old delegation should have been recovered by the
 2530              * client doing a Claim_DELEGATE_Prev, so I won't let
 2531              * it match and return NFSERR_EXPIRED. Should I let it
 2532              * match?
 2533              */
 2534             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 2535                 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
 2536                     (((nd->nd_flag & ND_NFSV41) != 0 &&
 2537                     stateidp->seqid == 0) ||
 2538                     stateidp->seqid == stp->ls_stateid.seqid) &&
 2539                     !NFSBCMP(stateidp->other, stp->ls_stateid.other,
 2540                           NFSX_STATEIDOTHER))
 2541                         break;
 2542             }
 2543             if (stp == LIST_END(&lfp->lf_deleg) ||
 2544                 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
 2545                  (stp->ls_flags & NFSLCK_DELEGREAD))) {
 2546                 NFSUNLOCKSTATE();
 2547                 if (haslock) {
 2548                         NFSLOCKV4ROOTMUTEX();
 2549                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2550                         NFSUNLOCKV4ROOTMUTEX();
 2551                 }
 2552                 error = NFSERR_EXPIRED;
 2553                 goto out;
 2554             }
 2555         }
 2556 
 2557         /*
 2558          * Check for access/deny bit conflicts. I check for the same
 2559          * owner as well, in case the client didn't bother.
 2560          */
 2561         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 2562                 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
 2563                     (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
 2564                       ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
 2565                      ((stp->ls_flags & NFSLCK_ACCESSBITS) &
 2566                       ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
 2567                         ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
 2568                         if (ret == 1) {
 2569                                 /*
 2570                                  * nfsrv_clientconflict() unlocks
 2571                                  * state when it returns non-zero.
 2572                                  */
 2573                                 goto tryagain;
 2574                         }
 2575                         if (ret == 2)
 2576                                 error = NFSERR_PERM;
 2577                         else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2578                                 error = NFSERR_RECLAIMCONFLICT;
 2579                         else
 2580                                 error = NFSERR_SHAREDENIED;
 2581                         if (ret == 0)
 2582                                 NFSUNLOCKSTATE();
 2583                         if (haslock) {
 2584                                 NFSLOCKV4ROOTMUTEX();
 2585                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2586                                 NFSUNLOCKV4ROOTMUTEX();
 2587                         }
 2588                         goto out;
 2589                 }
 2590         }
 2591 
 2592         /*
 2593          * Check for a conflicting delegation. If one is found, call
 2594          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 2595          * been set yet, it will get the lock. Otherwise, it will recall
 2596          * the delegation. Then, we try try again...
 2597          * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
 2598          *  isn't a conflict.)
 2599          * I currently believe the conflict algorithm to be:
 2600          * For Open with Read Access and Deny None
 2601          * - there is a conflict iff a different client has a write delegation
 2602          * For Open with other Write Access or any Deny except None
 2603          * - there is a conflict if a different client has any delegation
 2604          * - there is a conflict if the same client has a read delegation
 2605          *   (The current consensus is that this last case should be
 2606          *    considered a conflict since the client with a read delegation
 2607          *    could have done an Open with ReadAccess and WriteDeny
 2608          *    locally and then not have checked for the WriteDeny.)
 2609          * Don't check for a Reclaim, since that will be dealt with
 2610          * by nfsrv_openctrl().
 2611          */
 2612         if (!(new_stp->ls_flags &
 2613                 (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
 2614             stp = LIST_FIRST(&lfp->lf_deleg);
 2615             while (stp != LIST_END(&lfp->lf_deleg)) {
 2616                 nstp = LIST_NEXT(stp, ls_file);
 2617                 if ((readonly && stp->ls_clp != clp &&
 2618                        (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
 2619                     (!readonly && (stp->ls_clp != clp ||
 2620                          (stp->ls_flags & NFSLCK_DELEGREAD)))) {
 2621                         ret = nfsrv_delegconflict(stp, &haslock, p, vp);
 2622                         if (ret) {
 2623                             /*
 2624                              * nfsrv_delegconflict() unlocks state
 2625                              * when it returns non-zero.
 2626                              */
 2627                             if (ret == -1)
 2628                                 goto tryagain;
 2629                             error = ret;
 2630                             goto out;
 2631                         }
 2632                 }
 2633                 stp = nstp;
 2634             }
 2635         }
 2636         NFSUNLOCKSTATE();
 2637         if (haslock) {
 2638                 NFSLOCKV4ROOTMUTEX();
 2639                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2640                 NFSUNLOCKV4ROOTMUTEX();
 2641         }
 2642 
 2643 out:
 2644         NFSEXITCODE2(error, nd);
 2645         return (error);
 2646 }
 2647 
 2648 /*
 2649  * Open control function to create/update open state for an open.
 2650  */
 2651 int
 2652 nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
 2653     struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
 2654     nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
 2655     NFSPROC_T *p, u_quad_t filerev)
 2656 {
 2657         struct nfsstate *new_stp = *new_stpp;
 2658         struct nfsstate *stp, *nstp;
 2659         struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
 2660         struct nfslockfile *lfp, *new_lfp;
 2661         struct nfsclient *clp;
 2662         int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
 2663         int readonly = 0, cbret = 1, getfhret = 0;
 2664         int gotstate = 0, len = 0;
 2665         u_char *clidp = NULL;
 2666 
 2667         if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
 2668                 readonly = 1;
 2669         /*
 2670          * Check for restart conditions (client and server).
 2671          * (Paranoia, should have been detected by nfsrv_opencheck().)
 2672          * If an error does show up, return NFSERR_EXPIRED, since the
 2673          * the seqid# has already been incremented.
 2674          */
 2675         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 2676             &new_stp->ls_stateid, 0);
 2677         if (error) {
 2678                 printf("Nfsd: openctrl unexpected restart err=%d\n",
 2679                     error);
 2680                 error = NFSERR_EXPIRED;
 2681                 goto out;
 2682         }
 2683 
 2684         clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
 2685 tryagain:
 2686         new_lfp = malloc(sizeof (struct nfslockfile),
 2687             M_NFSDLOCKFILE, M_WAITOK);
 2688         new_open = malloc(sizeof (struct nfsstate),
 2689             M_NFSDSTATE, M_WAITOK);
 2690         new_deleg = malloc(sizeof (struct nfsstate),
 2691             M_NFSDSTATE, M_WAITOK);
 2692         getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
 2693             NULL, p);
 2694         NFSLOCKSTATE();
 2695         /*
 2696          * Get the client structure. Since the linked lists could be changed
 2697          * by other nfsd processes if this process does a tsleep(), one of
 2698          * two things must be done.
 2699          * 1 - don't tsleep()
 2700          * or
 2701          * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
 2702          *     before using the lists, since this lock stops the other
 2703          *     nfsd. This should only be used for rare cases, since it
 2704          *     essentially single threads the nfsd.
 2705          *     At this time, it is only done for cases where the stable
 2706          *     storage file must be written prior to completion of state
 2707          *     expiration.
 2708          */
 2709         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 2710             (nfsquad_t)((u_quad_t)0), 0, nd, p);
 2711         if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
 2712             clp->lc_program) {
 2713                 /*
 2714                  * This happens on the first open for a client
 2715                  * that supports callbacks.
 2716                  */
 2717                 NFSUNLOCKSTATE();
 2718                 /*
 2719                  * Although nfsrv_docallback() will sleep, clp won't
 2720                  * go away, since they are only removed when the
 2721                  * nfsv4_lock() has blocked the nfsd threads. The
 2722                  * fields in clp can change, but having multiple
 2723                  * threads do this Null callback RPC should be
 2724                  * harmless.
 2725                  */
 2726                 cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
 2727                     NULL, 0, NULL, NULL, NULL, 0, p);
 2728                 NFSLOCKSTATE();
 2729                 clp->lc_flags &= ~LCL_NEEDSCBNULL;
 2730                 if (!cbret)
 2731                         clp->lc_flags |= LCL_CALLBACKSON;
 2732         }
 2733 
 2734         /*
 2735          * Look up the open owner. See if it needs confirmation and
 2736          * check the seq#, as required.
 2737          */
 2738         if (!error)
 2739                 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
 2740 
 2741         if (error) {
 2742                 NFSUNLOCKSTATE();
 2743                 printf("Nfsd: openctrl unexpected state err=%d\n",
 2744                         error);
 2745                 free(new_lfp, M_NFSDLOCKFILE);
 2746                 free(new_open, M_NFSDSTATE);
 2747                 free(new_deleg, M_NFSDSTATE);
 2748                 if (haslock) {
 2749                         NFSLOCKV4ROOTMUTEX();
 2750                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2751                         NFSUNLOCKV4ROOTMUTEX();
 2752                 }
 2753                 error = NFSERR_EXPIRED;
 2754                 goto out;
 2755         }
 2756 
 2757         if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2758                 nfsrv_markstable(clp);
 2759 
 2760         /*
 2761          * Get the structure for the underlying file.
 2762          */
 2763         if (getfhret)
 2764                 error = getfhret;
 2765         else
 2766                 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
 2767                     NULL, 0);
 2768         if (new_lfp)
 2769                 free(new_lfp, M_NFSDLOCKFILE);
 2770         if (error) {
 2771                 NFSUNLOCKSTATE();
 2772                 printf("Nfsd openctrl unexpected getlockfile err=%d\n",
 2773                     error);
 2774                 free(new_open, M_NFSDSTATE);
 2775                 free(new_deleg, M_NFSDSTATE);
 2776                 if (haslock) {
 2777                         NFSLOCKV4ROOTMUTEX();
 2778                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2779                         NFSUNLOCKV4ROOTMUTEX();
 2780                 }
 2781                 goto out;
 2782         }
 2783 
 2784         /*
 2785          * Search for a conflicting open/share.
 2786          */
 2787         if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
 2788             /*
 2789              * For Delegate_Cur, search for the matching Delegation,
 2790              * which indicates no conflict.
 2791              * An old delegation should have been recovered by the
 2792              * client doing a Claim_DELEGATE_Prev, so I won't let
 2793              * it match and return NFSERR_EXPIRED. Should I let it
 2794              * match?
 2795              */
 2796             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 2797                 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
 2798                     (((nd->nd_flag & ND_NFSV41) != 0 &&
 2799                     stateidp->seqid == 0) ||
 2800                     stateidp->seqid == stp->ls_stateid.seqid) &&
 2801                     !NFSBCMP(stateidp->other, stp->ls_stateid.other,
 2802                         NFSX_STATEIDOTHER))
 2803                         break;
 2804             }
 2805             if (stp == LIST_END(&lfp->lf_deleg) ||
 2806                 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
 2807                  (stp->ls_flags & NFSLCK_DELEGREAD))) {
 2808                 NFSUNLOCKSTATE();
 2809                 printf("Nfsd openctrl unexpected expiry\n");
 2810                 free(new_open, M_NFSDSTATE);
 2811                 free(new_deleg, M_NFSDSTATE);
 2812                 if (haslock) {
 2813                         NFSLOCKV4ROOTMUTEX();
 2814                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2815                         NFSUNLOCKV4ROOTMUTEX();
 2816                 }
 2817                 error = NFSERR_EXPIRED;
 2818                 goto out;
 2819             }
 2820 
 2821             /*
 2822              * Don't issue a Delegation, since one already exists and
 2823              * delay delegation timeout, as required.
 2824              */
 2825             delegate = 0;
 2826             nfsrv_delaydelegtimeout(stp);
 2827         }
 2828 
 2829         /*
 2830          * Check for access/deny bit conflicts. I also check for the
 2831          * same owner, since the client might not have bothered to check.
 2832          * Also, note an open for the same file and owner, if found,
 2833          * which is all we do here for Delegate_Cur, since conflict
 2834          * checking is already done.
 2835          */
 2836         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 2837                 if (ownerstp && stp->ls_openowner == ownerstp)
 2838                         openstp = stp;
 2839                 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
 2840                     /*
 2841                      * If another client has the file open, the only
 2842                      * delegation that can be issued is a Read delegation
 2843                      * and only if it is a Read open with Deny none.
 2844                      */
 2845                     if (clp != stp->ls_clp) {
 2846                         if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
 2847                             NFSLCK_READACCESS)
 2848                             writedeleg = 0;
 2849                         else
 2850                             delegate = 0;
 2851                     }
 2852                     if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
 2853                         ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
 2854                        ((stp->ls_flags & NFSLCK_ACCESSBITS) &
 2855                         ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
 2856                         ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
 2857                         if (ret == 1) {
 2858                                 /*
 2859                                  * nfsrv_clientconflict() unlocks state
 2860                                  * when it returns non-zero.
 2861                                  */
 2862                                 free(new_open, M_NFSDSTATE);
 2863                                 free(new_deleg, M_NFSDSTATE);
 2864                                 openstp = NULL;
 2865                                 goto tryagain;
 2866                         }
 2867                         if (ret == 2)
 2868                                 error = NFSERR_PERM;
 2869                         else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2870                                 error = NFSERR_RECLAIMCONFLICT;
 2871                         else
 2872                                 error = NFSERR_SHAREDENIED;
 2873                         if (ret == 0)
 2874                                 NFSUNLOCKSTATE();
 2875                         if (haslock) {
 2876                                 NFSLOCKV4ROOTMUTEX();
 2877                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2878                                 NFSUNLOCKV4ROOTMUTEX();
 2879                         }
 2880                         free(new_open, M_NFSDSTATE);
 2881                         free(new_deleg, M_NFSDSTATE);
 2882                         printf("nfsd openctrl unexpected client cnfl\n");
 2883                         goto out;
 2884                     }
 2885                 }
 2886         }
 2887 
 2888         /*
 2889          * Check for a conflicting delegation. If one is found, call
 2890          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 2891          * been set yet, it will get the lock. Otherwise, it will recall
 2892          * the delegation. Then, we try try again...
 2893          * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
 2894          *  isn't a conflict.)
 2895          * I currently believe the conflict algorithm to be:
 2896          * For Open with Read Access and Deny None
 2897          * - there is a conflict iff a different client has a write delegation
 2898          * For Open with other Write Access or any Deny except None
 2899          * - there is a conflict if a different client has any delegation
 2900          * - there is a conflict if the same client has a read delegation
 2901          *   (The current consensus is that this last case should be
 2902          *    considered a conflict since the client with a read delegation
 2903          *    could have done an Open with ReadAccess and WriteDeny
 2904          *    locally and then not have checked for the WriteDeny.)
 2905          */
 2906         if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
 2907             stp = LIST_FIRST(&lfp->lf_deleg);
 2908             while (stp != LIST_END(&lfp->lf_deleg)) {
 2909                 nstp = LIST_NEXT(stp, ls_file);
 2910                 if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
 2911                         writedeleg = 0;
 2912                 else
 2913                         delegate = 0;
 2914                 if ((readonly && stp->ls_clp != clp &&
 2915                        (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
 2916                     (!readonly && (stp->ls_clp != clp ||
 2917                          (stp->ls_flags & NFSLCK_DELEGREAD)))) {
 2918                     if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 2919                         delegate = 2;
 2920                     } else {
 2921                         ret = nfsrv_delegconflict(stp, &haslock, p, vp);
 2922                         if (ret) {
 2923                             /*
 2924                              * nfsrv_delegconflict() unlocks state
 2925                              * when it returns non-zero.
 2926                              */
 2927                             printf("Nfsd openctrl unexpected deleg cnfl\n");
 2928                             free(new_open, M_NFSDSTATE);
 2929                             free(new_deleg, M_NFSDSTATE);
 2930                             if (ret == -1) {
 2931                                 openstp = NULL;
 2932                                 goto tryagain;
 2933                             }
 2934                             error = ret;
 2935                             goto out;
 2936                         }
 2937                     }
 2938                 }
 2939                 stp = nstp;
 2940             }
 2941         }
 2942 
 2943         /*
 2944          * We only get here if there was no open that conflicted.
 2945          * If an open for the owner exists, or in the access/deny bits.
 2946          * Otherwise it is a new open. If the open_owner hasn't been
 2947          * confirmed, replace the open with the new one needing confirmation,
 2948          * otherwise add the open.
 2949          */
 2950         if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
 2951             /*
 2952              * Handle NFSLCK_DELEGPREV by searching the old delegations for
 2953              * a match. If found, just move the old delegation to the current
 2954              * delegation list and issue open. If not found, return
 2955              * NFSERR_EXPIRED.
 2956              */
 2957             LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
 2958                 if (stp->ls_lfp == lfp) {
 2959                     /* Found it */
 2960                     if (stp->ls_clp != clp)
 2961                         panic("olddeleg clp");
 2962                     LIST_REMOVE(stp, ls_list);
 2963                     LIST_REMOVE(stp, ls_hash);
 2964                     stp->ls_flags &= ~NFSLCK_OLDDELEG;
 2965                     stp->ls_stateid.seqid = delegstateidp->seqid = 1;
 2966                     stp->ls_stateid.other[0] = delegstateidp->other[0] =
 2967                         clp->lc_clientid.lval[0];
 2968                     stp->ls_stateid.other[1] = delegstateidp->other[1] =
 2969                         clp->lc_clientid.lval[1];
 2970                     stp->ls_stateid.other[2] = delegstateidp->other[2] =
 2971                         nfsrv_nextstateindex(clp);
 2972                     stp->ls_compref = nd->nd_compref;
 2973                     LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
 2974                     LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 2975                         stp->ls_stateid), stp, ls_hash);
 2976                     if (stp->ls_flags & NFSLCK_DELEGWRITE)
 2977                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 2978                     else
 2979                         *rflagsp |= NFSV4OPEN_READDELEGATE;
 2980                     clp->lc_delegtime = NFSD_MONOSEC +
 2981                         nfsrv_lease + NFSRV_LEASEDELTA;
 2982 
 2983                     /*
 2984                      * Now, do the associated open.
 2985                      */
 2986                     new_open->ls_stateid.seqid = 1;
 2987                     new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 2988                     new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 2989                     new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 2990                     new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
 2991                         NFSLCK_OPEN;
 2992                     if (stp->ls_flags & NFSLCK_DELEGWRITE)
 2993                         new_open->ls_flags |= (NFSLCK_READACCESS |
 2994                             NFSLCK_WRITEACCESS);
 2995                     else
 2996                         new_open->ls_flags |= NFSLCK_READACCESS;
 2997                     new_open->ls_uid = new_stp->ls_uid;
 2998                     new_open->ls_lfp = lfp;
 2999                     new_open->ls_clp = clp;
 3000                     LIST_INIT(&new_open->ls_open);
 3001                     LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3002                     LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3003                         new_open, ls_hash);
 3004                     /*
 3005                      * and handle the open owner
 3006                      */
 3007                     if (ownerstp) {
 3008                         new_open->ls_openowner = ownerstp;
 3009                         LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
 3010                     } else {
 3011                         new_open->ls_openowner = new_stp;
 3012                         new_stp->ls_flags = 0;
 3013                         nfsrvd_refcache(new_stp->ls_op);
 3014                         new_stp->ls_noopens = 0;
 3015                         LIST_INIT(&new_stp->ls_open);
 3016                         LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 3017                         LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 3018                         *new_stpp = NULL;
 3019                         nfsstatsv1.srvopenowners++;
 3020                         nfsrv_openpluslock++;
 3021                     }
 3022                     openstp = new_open;
 3023                     new_open = NULL;
 3024                     nfsstatsv1.srvopens++;
 3025                     nfsrv_openpluslock++;
 3026                     break;
 3027                 }
 3028             }
 3029             if (stp == LIST_END(&clp->lc_olddeleg))
 3030                 error = NFSERR_EXPIRED;
 3031         } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
 3032             /*
 3033              * Scan to see that no delegation for this client and file
 3034              * doesn't already exist.
 3035              * There also shouldn't yet be an Open for this file and
 3036              * openowner.
 3037              */
 3038             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 3039                 if (stp->ls_clp == clp)
 3040                     break;
 3041             }
 3042             if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
 3043                 /*
 3044                  * This is the Claim_Previous case with a delegation
 3045                  * type != Delegate_None.
 3046                  */
 3047                 /*
 3048                  * First, add the delegation. (Although we must issue the
 3049                  * delegation, we can also ask for an immediate return.)
 3050                  */
 3051                 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 3052                 new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
 3053                     clp->lc_clientid.lval[0];
 3054                 new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
 3055                     clp->lc_clientid.lval[1];
 3056                 new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
 3057                     nfsrv_nextstateindex(clp);
 3058                 if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
 3059                     new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 3060                         NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 3061                     *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3062                     nfsrv_writedelegcnt++;
 3063                 } else {
 3064                     new_deleg->ls_flags = (NFSLCK_DELEGREAD |
 3065                         NFSLCK_READACCESS);
 3066                     *rflagsp |= NFSV4OPEN_READDELEGATE;
 3067                 }
 3068                 new_deleg->ls_uid = new_stp->ls_uid;
 3069                 new_deleg->ls_lfp = lfp;
 3070                 new_deleg->ls_clp = clp;
 3071                 new_deleg->ls_filerev = filerev;
 3072                 new_deleg->ls_compref = nd->nd_compref;
 3073                 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 3074                 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3075                     new_deleg->ls_stateid), new_deleg, ls_hash);
 3076                 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 3077                 new_deleg = NULL;
 3078                 if (delegate == 2 || nfsrv_issuedelegs == 0 ||
 3079                     (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 3080                      LCL_CALLBACKSON ||
 3081                     NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
 3082                     !NFSVNO_DELEGOK(vp))
 3083                     *rflagsp |= NFSV4OPEN_RECALL;
 3084                 nfsstatsv1.srvdelegates++;
 3085                 nfsrv_openpluslock++;
 3086                 nfsrv_delegatecnt++;
 3087 
 3088                 /*
 3089                  * Now, do the associated open.
 3090                  */
 3091                 new_open->ls_stateid.seqid = 1;
 3092                 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 3093                 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 3094                 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 3095                 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
 3096                     NFSLCK_OPEN;
 3097                 if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
 3098                         new_open->ls_flags |= (NFSLCK_READACCESS |
 3099                             NFSLCK_WRITEACCESS);
 3100                 else
 3101                         new_open->ls_flags |= NFSLCK_READACCESS;
 3102                 new_open->ls_uid = new_stp->ls_uid;
 3103                 new_open->ls_lfp = lfp;
 3104                 new_open->ls_clp = clp;
 3105                 LIST_INIT(&new_open->ls_open);
 3106                 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3107                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3108                    new_open, ls_hash);
 3109                 /*
 3110                  * and handle the open owner
 3111                  */
 3112                 if (ownerstp) {
 3113                     new_open->ls_openowner = ownerstp;
 3114                     LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
 3115                 } else {
 3116                     new_open->ls_openowner = new_stp;
 3117                     new_stp->ls_flags = 0;
 3118                     nfsrvd_refcache(new_stp->ls_op);
 3119                     new_stp->ls_noopens = 0;
 3120                     LIST_INIT(&new_stp->ls_open);
 3121                     LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 3122                     LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 3123                     *new_stpp = NULL;
 3124                     nfsstatsv1.srvopenowners++;
 3125                     nfsrv_openpluslock++;
 3126                 }
 3127                 openstp = new_open;
 3128                 new_open = NULL;
 3129                 nfsstatsv1.srvopens++;
 3130                 nfsrv_openpluslock++;
 3131             } else {
 3132                 error = NFSERR_RECLAIMCONFLICT;
 3133             }
 3134         } else if (ownerstp) {
 3135                 if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
 3136                     /* Replace the open */
 3137                     if (ownerstp->ls_op)
 3138                         nfsrvd_derefcache(ownerstp->ls_op);
 3139                     ownerstp->ls_op = new_stp->ls_op;
 3140                     nfsrvd_refcache(ownerstp->ls_op);
 3141                     ownerstp->ls_seq = new_stp->ls_seq;
 3142                     *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
 3143                     stp = LIST_FIRST(&ownerstp->ls_open);
 3144                     stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
 3145                         NFSLCK_OPEN;
 3146                     stp->ls_stateid.seqid = 1;
 3147                     stp->ls_uid = new_stp->ls_uid;
 3148                     if (lfp != stp->ls_lfp) {
 3149                         LIST_REMOVE(stp, ls_file);
 3150                         LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
 3151                         stp->ls_lfp = lfp;
 3152                     }
 3153                     openstp = stp;
 3154                 } else if (openstp) {
 3155                     openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
 3156                     openstp->ls_stateid.seqid++;
 3157                     if ((nd->nd_flag & ND_NFSV41) != 0 &&
 3158                         openstp->ls_stateid.seqid == 0)
 3159                         openstp->ls_stateid.seqid = 1;
 3160 
 3161                     /*
 3162                      * This is where we can choose to issue a delegation.
 3163                      */
 3164                     if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
 3165                         *rflagsp |= NFSV4OPEN_WDNOTWANTED;
 3166                     else if (nfsrv_issuedelegs == 0)
 3167                         *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
 3168                     else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
 3169                         *rflagsp |= NFSV4OPEN_WDRESOURCE;
 3170                     else if (delegate == 0 || writedeleg == 0 ||
 3171                         NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
 3172                         nfsrv_writedelegifpos == 0) ||
 3173                         !NFSVNO_DELEGOK(vp) ||
 3174                         (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
 3175                         (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 3176                          LCL_CALLBACKSON)
 3177                         *rflagsp |= NFSV4OPEN_WDCONTENTION;
 3178                     else {
 3179                         new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 3180                         new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
 3181                             = clp->lc_clientid.lval[0];
 3182                         new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
 3183                             = clp->lc_clientid.lval[1];
 3184                         new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
 3185                             = nfsrv_nextstateindex(clp);
 3186                         new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 3187                             NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 3188                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3189                         new_deleg->ls_uid = new_stp->ls_uid;
 3190                         new_deleg->ls_lfp = lfp;
 3191                         new_deleg->ls_clp = clp;
 3192                         new_deleg->ls_filerev = filerev;
 3193                         new_deleg->ls_compref = nd->nd_compref;
 3194                         nfsrv_writedelegcnt++;
 3195                         LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 3196                         LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3197                             new_deleg->ls_stateid), new_deleg, ls_hash);
 3198                         LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 3199                         new_deleg = NULL;
 3200                         nfsstatsv1.srvdelegates++;
 3201                         nfsrv_openpluslock++;
 3202                         nfsrv_delegatecnt++;
 3203                     }
 3204                 } else {
 3205                     new_open->ls_stateid.seqid = 1;
 3206                     new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 3207                     new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 3208                     new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 3209                     new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
 3210                         NFSLCK_OPEN;
 3211                     new_open->ls_uid = new_stp->ls_uid;
 3212                     new_open->ls_openowner = ownerstp;
 3213                     new_open->ls_lfp = lfp;
 3214                     new_open->ls_clp = clp;
 3215                     LIST_INIT(&new_open->ls_open);
 3216                     LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3217                     LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
 3218                     LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3219                         new_open, ls_hash);
 3220                     openstp = new_open;
 3221                     new_open = NULL;
 3222                     nfsstatsv1.srvopens++;
 3223                     nfsrv_openpluslock++;
 3224 
 3225                     /*
 3226                      * This is where we can choose to issue a delegation.
 3227                      */
 3228                     if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
 3229                         *rflagsp |= NFSV4OPEN_WDNOTWANTED;
 3230                     else if (nfsrv_issuedelegs == 0)
 3231                         *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
 3232                     else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
 3233                         *rflagsp |= NFSV4OPEN_WDRESOURCE;
 3234                     else if (delegate == 0 || (writedeleg == 0 &&
 3235                         readonly == 0) || !NFSVNO_DELEGOK(vp) ||
 3236                         (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 3237                          LCL_CALLBACKSON)
 3238                         *rflagsp |= NFSV4OPEN_WDCONTENTION;
 3239                     else {
 3240                         new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 3241                         new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
 3242                             = clp->lc_clientid.lval[0];
 3243                         new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
 3244                             = clp->lc_clientid.lval[1];
 3245                         new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
 3246                             = nfsrv_nextstateindex(clp);
 3247                         if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
 3248                             (nfsrv_writedelegifpos || !readonly) &&
 3249                             (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
 3250                             new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 3251                                 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 3252                             *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3253                             nfsrv_writedelegcnt++;
 3254                         } else {
 3255                             new_deleg->ls_flags = (NFSLCK_DELEGREAD |
 3256                                 NFSLCK_READACCESS);
 3257                             *rflagsp |= NFSV4OPEN_READDELEGATE;
 3258                         }
 3259                         new_deleg->ls_uid = new_stp->ls_uid;
 3260                         new_deleg->ls_lfp = lfp;
 3261                         new_deleg->ls_clp = clp;
 3262                         new_deleg->ls_filerev = filerev;
 3263                         new_deleg->ls_compref = nd->nd_compref;
 3264                         LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 3265                         LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3266                             new_deleg->ls_stateid), new_deleg, ls_hash);
 3267                         LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 3268                         new_deleg = NULL;
 3269                         nfsstatsv1.srvdelegates++;
 3270                         nfsrv_openpluslock++;
 3271                         nfsrv_delegatecnt++;
 3272                     }
 3273                 }
 3274         } else {
 3275                 /*
 3276                  * New owner case. Start the open_owner sequence with a
 3277                  * Needs confirmation (unless a reclaim) and hang the
 3278                  * new open off it.
 3279                  */
 3280                 new_open->ls_stateid.seqid = 1;
 3281                 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 3282                 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 3283                 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 3284                 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
 3285                     NFSLCK_OPEN;
 3286                 new_open->ls_uid = new_stp->ls_uid;
 3287                 LIST_INIT(&new_open->ls_open);
 3288                 new_open->ls_openowner = new_stp;
 3289                 new_open->ls_lfp = lfp;
 3290                 new_open->ls_clp = clp;
 3291                 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3292                 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 3293                         new_stp->ls_flags = 0;
 3294                 } else if ((nd->nd_flag & ND_NFSV41) != 0) {
 3295                         /* NFSv4.1 never needs confirmation. */
 3296                         new_stp->ls_flags = 0;
 3297 
 3298                         /*
 3299                          * This is where we can choose to issue a delegation.
 3300                          */
 3301                         if (delegate && nfsrv_issuedelegs &&
 3302                             (writedeleg || readonly) &&
 3303                             (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
 3304                              LCL_CALLBACKSON &&
 3305                             !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
 3306                             NFSVNO_DELEGOK(vp) &&
 3307                             ((nd->nd_flag & ND_NFSV41) == 0 ||
 3308                              (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
 3309                                 new_deleg->ls_stateid.seqid =
 3310                                     delegstateidp->seqid = 1;
 3311                                 new_deleg->ls_stateid.other[0] =
 3312                                     delegstateidp->other[0]
 3313                                     = clp->lc_clientid.lval[0];
 3314                                 new_deleg->ls_stateid.other[1] =
 3315                                     delegstateidp->other[1]
 3316                                     = clp->lc_clientid.lval[1];
 3317                                 new_deleg->ls_stateid.other[2] =
 3318                                     delegstateidp->other[2]
 3319                                     = nfsrv_nextstateindex(clp);
 3320                                 if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
 3321                                     (nfsrv_writedelegifpos || !readonly) &&
 3322                                     ((nd->nd_flag & ND_NFSV41) == 0 ||
 3323                                      (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
 3324                                      0)) {
 3325                                         new_deleg->ls_flags =
 3326                                             (NFSLCK_DELEGWRITE |
 3327                                              NFSLCK_READACCESS |
 3328                                              NFSLCK_WRITEACCESS);
 3329                                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3330                                         nfsrv_writedelegcnt++;
 3331                                 } else {
 3332                                         new_deleg->ls_flags =
 3333                                             (NFSLCK_DELEGREAD |
 3334                                              NFSLCK_READACCESS);
 3335                                         *rflagsp |= NFSV4OPEN_READDELEGATE;
 3336                                 }
 3337                                 new_deleg->ls_uid = new_stp->ls_uid;
 3338                                 new_deleg->ls_lfp = lfp;
 3339                                 new_deleg->ls_clp = clp;
 3340                                 new_deleg->ls_filerev = filerev;
 3341                                 new_deleg->ls_compref = nd->nd_compref;
 3342                                 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
 3343                                     ls_file);
 3344                                 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3345                                     new_deleg->ls_stateid), new_deleg, ls_hash);
 3346                                 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
 3347                                     ls_list);
 3348                                 new_deleg = NULL;
 3349                                 nfsstatsv1.srvdelegates++;
 3350                                 nfsrv_openpluslock++;
 3351                                 nfsrv_delegatecnt++;
 3352                         }
 3353                         /*
 3354                          * Since NFSv4.1 never does an OpenConfirm, the first
 3355                          * open state will be acquired here.
 3356                          */
 3357                         if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
 3358                                 clp->lc_flags |= LCL_STAMPEDSTABLE;
 3359                                 len = clp->lc_idlen;
 3360                                 NFSBCOPY(clp->lc_id, clidp, len);
 3361                                 gotstate = 1;
 3362                         }
 3363                 } else {
 3364                         *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
 3365                         new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
 3366                 }
 3367                 nfsrvd_refcache(new_stp->ls_op);
 3368                 new_stp->ls_noopens = 0;
 3369                 LIST_INIT(&new_stp->ls_open);
 3370                 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 3371                 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 3372                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3373                     new_open, ls_hash);
 3374                 openstp = new_open;
 3375                 new_open = NULL;
 3376                 *new_stpp = NULL;
 3377                 nfsstatsv1.srvopens++;
 3378                 nfsrv_openpluslock++;
 3379                 nfsstatsv1.srvopenowners++;
 3380                 nfsrv_openpluslock++;
 3381         }
 3382         if (!error) {
 3383                 stateidp->seqid = openstp->ls_stateid.seqid;
 3384                 stateidp->other[0] = openstp->ls_stateid.other[0];
 3385                 stateidp->other[1] = openstp->ls_stateid.other[1];
 3386                 stateidp->other[2] = openstp->ls_stateid.other[2];
 3387         }
 3388         NFSUNLOCKSTATE();
 3389         if (haslock) {
 3390                 NFSLOCKV4ROOTMUTEX();
 3391                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 3392                 NFSUNLOCKV4ROOTMUTEX();
 3393         }
 3394         if (new_open)
 3395                 free(new_open, M_NFSDSTATE);
 3396         if (new_deleg)
 3397                 free(new_deleg, M_NFSDSTATE);
 3398 
 3399         /*
 3400          * If the NFSv4.1 client just acquired its first open, write a timestamp
 3401          * to the stable storage file.
 3402          */
 3403         if (gotstate != 0) {
 3404                 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
 3405                 nfsrv_backupstable();
 3406         }
 3407 
 3408 out:
 3409         free(clidp, M_TEMP);
 3410         NFSEXITCODE2(error, nd);
 3411         return (error);
 3412 }
 3413 
 3414 /*
 3415  * Open update. Does the confirm, downgrade and close.
 3416  */
 3417 int
 3418 nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
 3419     nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p,
 3420     int *retwriteaccessp)
 3421 {
 3422         struct nfsstate *stp;
 3423         struct nfsclient *clp;
 3424         struct nfslockfile *lfp;
 3425         u_int32_t bits;
 3426         int error = 0, gotstate = 0, len = 0;
 3427         u_char *clidp = NULL;
 3428 
 3429         /*
 3430          * Check for restart conditions (client and server).
 3431          */
 3432         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 3433             &new_stp->ls_stateid, 0);
 3434         if (error)
 3435                 goto out;
 3436 
 3437         clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
 3438         NFSLOCKSTATE();
 3439         /*
 3440          * Get the open structure via clientid and stateid.
 3441          */
 3442         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 3443             (nfsquad_t)((u_quad_t)0), 0, nd, p);
 3444         if (!error)
 3445                 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
 3446                     new_stp->ls_flags, &stp);
 3447 
 3448         /*
 3449          * Sanity check the open.
 3450          */
 3451         if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
 3452                 (!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
 3453                  (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
 3454                 ((new_stp->ls_flags & NFSLCK_CONFIRM) &&
 3455                  (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
 3456                 error = NFSERR_BADSTATEID;
 3457 
 3458         if (!error)
 3459                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 3460                     stp->ls_openowner, new_stp->ls_op);
 3461         if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
 3462             (((nd->nd_flag & ND_NFSV41) == 0 &&
 3463               !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
 3464              ((nd->nd_flag & ND_NFSV41) != 0 &&
 3465               new_stp->ls_stateid.seqid != 0)))
 3466                 error = NFSERR_OLDSTATEID;
 3467         if (!error && vnode_vtype(vp) != VREG) {
 3468                 if (vnode_vtype(vp) == VDIR)
 3469                         error = NFSERR_ISDIR;
 3470                 else
 3471                         error = NFSERR_INVAL;
 3472         }
 3473 
 3474         if (error) {
 3475                 /*
 3476                  * If a client tries to confirm an Open with a bad
 3477                  * seqid# and there are no byte range locks or other Opens
 3478                  * on the openowner, just throw it away, so the next use of the
 3479                  * openowner will start a fresh seq#.
 3480                  */
 3481                 if (error == NFSERR_BADSEQID &&
 3482                     (new_stp->ls_flags & NFSLCK_CONFIRM) &&
 3483                     nfsrv_nootherstate(stp))
 3484                         nfsrv_freeopenowner(stp->ls_openowner, 0, p);
 3485                 NFSUNLOCKSTATE();
 3486                 goto out;
 3487         }
 3488 
 3489         /*
 3490          * Set the return stateid.
 3491          */
 3492         stateidp->seqid = stp->ls_stateid.seqid + 1;
 3493         if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 3494                 stateidp->seqid = 1;
 3495         stateidp->other[0] = stp->ls_stateid.other[0];
 3496         stateidp->other[1] = stp->ls_stateid.other[1];
 3497         stateidp->other[2] = stp->ls_stateid.other[2];
 3498         /*
 3499          * Now, handle the three cases.
 3500          */
 3501         if (new_stp->ls_flags & NFSLCK_CONFIRM) {
 3502                 /*
 3503                  * If the open doesn't need confirmation, it seems to me that
 3504                  * there is a client error, but I'll just log it and keep going?
 3505                  */
 3506                 if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
 3507                         printf("Nfsv4d: stray open confirm\n");
 3508                 stp->ls_openowner->ls_flags = 0;
 3509                 stp->ls_stateid.seqid++;
 3510                 if ((nd->nd_flag & ND_NFSV41) != 0 &&
 3511                     stp->ls_stateid.seqid == 0)
 3512                         stp->ls_stateid.seqid = 1;
 3513                 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
 3514                         clp->lc_flags |= LCL_STAMPEDSTABLE;
 3515                         len = clp->lc_idlen;
 3516                         NFSBCOPY(clp->lc_id, clidp, len);
 3517                         gotstate = 1;
 3518                 }
 3519                 NFSUNLOCKSTATE();
 3520         } else if (new_stp->ls_flags & NFSLCK_CLOSE) {
 3521                 lfp = stp->ls_lfp;
 3522                 if (retwriteaccessp != NULL) {
 3523                         if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0)
 3524                                 *retwriteaccessp = 1;
 3525                         else
 3526                                 *retwriteaccessp = 0;
 3527                 }
 3528                 if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
 3529                         /* Get the lf lock */
 3530                         nfsrv_locklf(lfp);
 3531                         NFSUNLOCKSTATE();
 3532                         ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
 3533                         NFSVOPUNLOCK(vp);
 3534                         if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
 3535                                 NFSLOCKSTATE();
 3536                                 nfsrv_unlocklf(lfp);
 3537                                 NFSUNLOCKSTATE();
 3538                         }
 3539                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 3540                 } else {
 3541                         (void) nfsrv_freeopen(stp, NULL, 0, p);
 3542                         NFSUNLOCKSTATE();
 3543                 }
 3544         } else {
 3545                 /*
 3546                  * Update the share bits, making sure that the new set are a
 3547                  * subset of the old ones.
 3548                  */
 3549                 bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
 3550                 if (~(stp->ls_flags) & bits) {
 3551                         NFSUNLOCKSTATE();
 3552                         error = NFSERR_INVAL;
 3553                         goto out;
 3554                 }
 3555                 stp->ls_flags = (bits | NFSLCK_OPEN);
 3556                 stp->ls_stateid.seqid++;
 3557                 if ((nd->nd_flag & ND_NFSV41) != 0 &&
 3558                     stp->ls_stateid.seqid == 0)
 3559                         stp->ls_stateid.seqid = 1;
 3560                 NFSUNLOCKSTATE();
 3561         }
 3562 
 3563         /*
 3564          * If the client just confirmed its first open, write a timestamp
 3565          * to the stable storage file.
 3566          */
 3567         if (gotstate != 0) {
 3568                 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
 3569                 nfsrv_backupstable();
 3570         }
 3571 
 3572 out:
 3573         free(clidp, M_TEMP);
 3574         NFSEXITCODE2(error, nd);
 3575         return (error);
 3576 }
 3577 
 3578 /*
 3579  * Delegation update. Does the purge and return.
 3580  */
 3581 int
 3582 nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
 3583     nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
 3584     NFSPROC_T *p, int *retwriteaccessp)
 3585 {
 3586         struct nfsstate *stp;
 3587         struct nfsclient *clp;
 3588         int error = 0;
 3589         fhandle_t fh;
 3590 
 3591         /*
 3592          * Do a sanity check against the file handle for DelegReturn.
 3593          */
 3594         if (vp) {
 3595                 error = nfsvno_getfh(vp, &fh, p);
 3596                 if (error)
 3597                         goto out;
 3598         }
 3599         /*
 3600          * Check for restart conditions (client and server).
 3601          */
 3602         if (op == NFSV4OP_DELEGRETURN)
 3603                 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
 3604                         stateidp, 0);
 3605         else
 3606                 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
 3607                         stateidp, 0);
 3608 
 3609         NFSLOCKSTATE();
 3610         /*
 3611          * Get the open structure via clientid and stateid.
 3612          */
 3613         if (!error)
 3614             error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 3615                 (nfsquad_t)((u_quad_t)0), 0, nd, p);
 3616         if (error) {
 3617                 if (error == NFSERR_CBPATHDOWN)
 3618                         error = 0;
 3619                 if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
 3620                         error = NFSERR_STALESTATEID;
 3621         }
 3622         if (!error && op == NFSV4OP_DELEGRETURN) {
 3623             error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
 3624             if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
 3625                 ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
 3626                 error = NFSERR_OLDSTATEID;
 3627         }
 3628         /*
 3629          * NFSERR_EXPIRED means that the state has gone away,
 3630          * so Delegations have been purged. Just return ok.
 3631          */
 3632         if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
 3633                 NFSUNLOCKSTATE();
 3634                 error = 0;
 3635                 goto out;
 3636         }
 3637         if (error) {
 3638                 NFSUNLOCKSTATE();
 3639                 goto out;
 3640         }
 3641 
 3642         if (op == NFSV4OP_DELEGRETURN) {
 3643                 if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
 3644                     sizeof (fhandle_t))) {
 3645                         NFSUNLOCKSTATE();
 3646                         error = NFSERR_BADSTATEID;
 3647                         goto out;
 3648                 }
 3649                 if (retwriteaccessp != NULL) {
 3650                         if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
 3651                                 *retwriteaccessp = 1;
 3652                         else
 3653                                 *retwriteaccessp = 0;
 3654                 }
 3655                 nfsrv_freedeleg(stp);
 3656         } else {
 3657                 nfsrv_freedeleglist(&clp->lc_olddeleg);
 3658         }
 3659         NFSUNLOCKSTATE();
 3660         error = 0;
 3661 
 3662 out:
 3663         NFSEXITCODE(error);
 3664         return (error);
 3665 }
 3666 
 3667 /*
 3668  * Release lock owner.
 3669  */
 3670 int
 3671 nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
 3672     NFSPROC_T *p)
 3673 {
 3674         struct nfsstate *stp, *nstp, *openstp, *ownstp;
 3675         struct nfsclient *clp;
 3676         int error = 0;
 3677 
 3678         /*
 3679          * Check for restart conditions (client and server).
 3680          */
 3681         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 3682             &new_stp->ls_stateid, 0);
 3683         if (error)
 3684                 goto out;
 3685 
 3686         NFSLOCKSTATE();
 3687         /*
 3688          * Get the lock owner by name.
 3689          */
 3690         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 3691             (nfsquad_t)((u_quad_t)0), 0, NULL, p);
 3692         if (error) {
 3693                 NFSUNLOCKSTATE();
 3694                 goto out;
 3695         }
 3696         LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
 3697             LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
 3698                 stp = LIST_FIRST(&openstp->ls_open);
 3699                 while (stp != LIST_END(&openstp->ls_open)) {
 3700                     nstp = LIST_NEXT(stp, ls_list);
 3701                     /*
 3702                      * If the owner matches, check for locks and
 3703                      * then free or return an error.
 3704                      */
 3705                     if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
 3706                         !NFSBCMP(stp->ls_owner, new_stp->ls_owner,
 3707                          stp->ls_ownerlen)){
 3708                         if (LIST_EMPTY(&stp->ls_lock)) {
 3709                             nfsrv_freelockowner(stp, NULL, 0, p);
 3710                         } else {
 3711                             NFSUNLOCKSTATE();
 3712                             error = NFSERR_LOCKSHELD;
 3713                             goto out;
 3714                         }
 3715                     }
 3716                     stp = nstp;
 3717                 }
 3718             }
 3719         }
 3720         NFSUNLOCKSTATE();
 3721 
 3722 out:
 3723         NFSEXITCODE(error);
 3724         return (error);
 3725 }
 3726 
 3727 /*
 3728  * Get the file handle for a lock structure.
 3729  */
 3730 static int
 3731 nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
 3732     fhandle_t *nfhp, NFSPROC_T *p)
 3733 {
 3734         fhandle_t *fhp = NULL;
 3735         int error;
 3736 
 3737         /*
 3738          * For lock, use the new nfslock structure, otherwise just
 3739          * a fhandle_t on the stack.
 3740          */
 3741         if (flags & NFSLCK_OPEN) {
 3742                 KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
 3743                 fhp = &new_lfp->lf_fh;
 3744         } else if (nfhp) {
 3745                 fhp = nfhp;
 3746         } else {
 3747                 panic("nfsrv_getlockfh");
 3748         }
 3749         error = nfsvno_getfh(vp, fhp, p);
 3750         NFSEXITCODE(error);
 3751         return (error);
 3752 }
 3753 
 3754 /*
 3755  * Get an nfs lock structure. Allocate one, as required, and return a
 3756  * pointer to it.
 3757  * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
 3758  */
 3759 static int
 3760 nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
 3761     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
 3762 {
 3763         struct nfslockfile *lfp;
 3764         fhandle_t *fhp = NULL, *tfhp;
 3765         struct nfslockhashhead *hp;
 3766         struct nfslockfile *new_lfp = NULL;
 3767 
 3768         /*
 3769          * For lock, use the new nfslock structure, otherwise just
 3770          * a fhandle_t on the stack.
 3771          */
 3772         if (flags & NFSLCK_OPEN) {
 3773                 new_lfp = *new_lfpp;
 3774                 fhp = &new_lfp->lf_fh;
 3775         } else if (nfhp) {
 3776                 fhp = nfhp;
 3777         } else {
 3778                 panic("nfsrv_getlockfile");
 3779         }
 3780 
 3781         hp = NFSLOCKHASH(fhp);
 3782         LIST_FOREACH(lfp, hp, lf_hash) {
 3783                 tfhp = &lfp->lf_fh;
 3784                 if (NFSVNO_CMPFH(fhp, tfhp)) {
 3785                         if (lockit)
 3786                                 nfsrv_locklf(lfp);
 3787                         *lfpp = lfp;
 3788                         return (0);
 3789                 }
 3790         }
 3791         if (!(flags & NFSLCK_OPEN))
 3792                 return (-1);
 3793 
 3794         /*
 3795          * No match, so chain the new one into the list.
 3796          */
 3797         LIST_INIT(&new_lfp->lf_open);
 3798         LIST_INIT(&new_lfp->lf_lock);
 3799         LIST_INIT(&new_lfp->lf_deleg);
 3800         LIST_INIT(&new_lfp->lf_locallock);
 3801         LIST_INIT(&new_lfp->lf_rollback);
 3802         new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
 3803         new_lfp->lf_locallock_lck.nfslock_lock = 0;
 3804         new_lfp->lf_usecount = 0;
 3805         LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
 3806         *lfpp = new_lfp;
 3807         *new_lfpp = NULL;
 3808         return (0);
 3809 }
 3810 
 3811 /*
 3812  * This function adds a nfslock lock structure to the list for the associated
 3813  * nfsstate and nfslockfile structures. It will be inserted after the
 3814  * entry pointed at by insert_lop.
 3815  */
 3816 static void
 3817 nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
 3818     struct nfsstate *stp, struct nfslockfile *lfp)
 3819 {
 3820         struct nfslock *lop, *nlop;
 3821 
 3822         new_lop->lo_stp = stp;
 3823         new_lop->lo_lfp = lfp;
 3824 
 3825         if (stp != NULL) {
 3826                 /* Insert in increasing lo_first order */
 3827                 lop = LIST_FIRST(&lfp->lf_lock);
 3828                 if (lop == LIST_END(&lfp->lf_lock) ||
 3829                     new_lop->lo_first <= lop->lo_first) {
 3830                         LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
 3831                 } else {
 3832                         nlop = LIST_NEXT(lop, lo_lckfile);
 3833                         while (nlop != LIST_END(&lfp->lf_lock) &&
 3834                                nlop->lo_first < new_lop->lo_first) {
 3835                                 lop = nlop;
 3836                                 nlop = LIST_NEXT(lop, lo_lckfile);
 3837                         }
 3838                         LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
 3839                 }
 3840         } else {
 3841                 new_lop->lo_lckfile.le_prev = NULL;     /* list not used */
 3842         }
 3843 
 3844         /*
 3845          * Insert after insert_lop, which is overloaded as stp or lfp for
 3846          * an empty list.
 3847          */
 3848         if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
 3849                 LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
 3850         else if ((struct nfsstate *)insert_lop == stp)
 3851                 LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
 3852         else
 3853                 LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
 3854         if (stp != NULL) {
 3855                 nfsstatsv1.srvlocks++;
 3856                 nfsrv_openpluslock++;
 3857         }
 3858 }
 3859 
 3860 /*
 3861  * This function updates the locking for a lock owner and given file. It
 3862  * maintains a list of lock ranges ordered on increasing file offset that
 3863  * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
 3864  * It always adds new_lop to the list and sometimes uses the one pointed
 3865  * at by other_lopp.
 3866  */
 3867 static void
 3868 nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
 3869     struct nfslock **other_lopp, struct nfslockfile *lfp)
 3870 {
 3871         struct nfslock *new_lop = *new_lopp;
 3872         struct nfslock *lop, *tlop, *ilop;
 3873         struct nfslock *other_lop = *other_lopp;
 3874         int unlock = 0, myfile = 0;
 3875         u_int64_t tmp;
 3876 
 3877         /*
 3878          * Work down the list until the lock is merged.
 3879          */
 3880         if (new_lop->lo_flags & NFSLCK_UNLOCK)
 3881                 unlock = 1;
 3882         if (stp != NULL) {
 3883                 ilop = (struct nfslock *)stp;
 3884                 lop = LIST_FIRST(&stp->ls_lock);
 3885         } else {
 3886                 ilop = (struct nfslock *)lfp;
 3887                 lop = LIST_FIRST(&lfp->lf_locallock);
 3888         }
 3889         while (lop != NULL) {
 3890             /*
 3891              * Only check locks for this file that aren't before the start of
 3892              * new lock's range.
 3893              */
 3894             if (lop->lo_lfp == lfp) {
 3895               myfile = 1;
 3896               if (lop->lo_end >= new_lop->lo_first) {
 3897                 if (new_lop->lo_end < lop->lo_first) {
 3898                         /*
 3899                          * If the new lock ends before the start of the
 3900                          * current lock's range, no merge, just insert
 3901                          * the new lock.
 3902                          */
 3903                         break;
 3904                 }
 3905                 if (new_lop->lo_flags == lop->lo_flags ||
 3906                     (new_lop->lo_first <= lop->lo_first &&
 3907                      new_lop->lo_end >= lop->lo_end)) {
 3908                         /*
 3909                          * This lock can be absorbed by the new lock/unlock.
 3910                          * This happens when it covers the entire range
 3911                          * of the old lock or is contiguous
 3912                          * with the old lock and is of the same type or an
 3913                          * unlock.
 3914                          */
 3915                         if (lop->lo_first < new_lop->lo_first)
 3916                                 new_lop->lo_first = lop->lo_first;
 3917                         if (lop->lo_end > new_lop->lo_end)
 3918                                 new_lop->lo_end = lop->lo_end;
 3919                         tlop = lop;
 3920                         lop = LIST_NEXT(lop, lo_lckowner);
 3921                         nfsrv_freenfslock(tlop);
 3922                         continue;
 3923                 }
 3924 
 3925                 /*
 3926                  * All these cases are for contiguous locks that are not the
 3927                  * same type, so they can't be merged.
 3928                  */
 3929                 if (new_lop->lo_first <= lop->lo_first) {
 3930                         /*
 3931                          * This case is where the new lock overlaps with the
 3932                          * first part of the old lock. Move the start of the
 3933                          * old lock to just past the end of the new lock. The
 3934                          * new lock will be inserted in front of the old, since
 3935                          * ilop hasn't been updated. (We are done now.)
 3936                          */
 3937                         lop->lo_first = new_lop->lo_end;
 3938                         break;
 3939                 }
 3940                 if (new_lop->lo_end >= lop->lo_end) {
 3941                         /*
 3942                          * This case is where the new lock overlaps with the
 3943                          * end of the old lock's range. Move the old lock's
 3944                          * end to just before the new lock's first and insert
 3945                          * the new lock after the old lock.
 3946                          * Might not be done yet, since the new lock could
 3947                          * overlap further locks with higher ranges.
 3948                          */
 3949                         lop->lo_end = new_lop->lo_first;
 3950                         ilop = lop;
 3951                         lop = LIST_NEXT(lop, lo_lckowner);
 3952                         continue;
 3953                 }
 3954                 /*
 3955                  * The final case is where the new lock's range is in the
 3956                  * middle of the current lock's and splits the current lock
 3957                  * up. Use *other_lopp to handle the second part of the
 3958                  * split old lock range. (We are done now.)
 3959                  * For unlock, we use new_lop as other_lop and tmp, since
 3960                  * other_lop and new_lop are the same for this case.
 3961                  * We noted the unlock case above, so we don't need
 3962                  * new_lop->lo_flags any longer.
 3963                  */
 3964                 tmp = new_lop->lo_first;
 3965                 if (other_lop == NULL) {
 3966                         if (!unlock)
 3967                                 panic("nfsd srv update unlock");
 3968                         other_lop = new_lop;
 3969                         *new_lopp = NULL;
 3970                 }
 3971                 other_lop->lo_first = new_lop->lo_end;
 3972                 other_lop->lo_end = lop->lo_end;
 3973                 other_lop->lo_flags = lop->lo_flags;
 3974                 other_lop->lo_stp = stp;
 3975                 other_lop->lo_lfp = lfp;
 3976                 lop->lo_end = tmp;
 3977                 nfsrv_insertlock(other_lop, lop, stp, lfp);
 3978                 *other_lopp = NULL;
 3979                 ilop = lop;
 3980                 break;
 3981               }
 3982             }
 3983             ilop = lop;
 3984             lop = LIST_NEXT(lop, lo_lckowner);
 3985             if (myfile && (lop == NULL || lop->lo_lfp != lfp))
 3986                 break;
 3987         }
 3988 
 3989         /*
 3990          * Insert the new lock in the list at the appropriate place.
 3991          */
 3992         if (!unlock) {
 3993                 nfsrv_insertlock(new_lop, ilop, stp, lfp);
 3994                 *new_lopp = NULL;
 3995         }
 3996 }
 3997 
 3998 /*
 3999  * This function handles sequencing of locks, etc.
 4000  * It returns an error that indicates what the caller should do.
 4001  */
 4002 static int
 4003 nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
 4004     struct nfsstate *stp, struct nfsrvcache *op)
 4005 {
 4006         int error = 0;
 4007 
 4008         if ((nd->nd_flag & ND_NFSV41) != 0)
 4009                 /* NFSv4.1 ignores the open_seqid and lock_seqid. */
 4010                 goto out;
 4011         if (op != nd->nd_rp)
 4012                 panic("nfsrvstate checkseqid");
 4013         if (!(op->rc_flag & RC_INPROG))
 4014                 panic("nfsrvstate not inprog");
 4015         if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
 4016                 printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
 4017                 panic("nfsrvstate op refcnt");
 4018         }
 4019 
 4020         /* If ND_ERELOOKUP is set, the seqid has already been handled. */
 4021         if ((nd->nd_flag & ND_ERELOOKUP) != 0)
 4022                 goto out;
 4023 
 4024         if ((stp->ls_seq + 1) == seqid) {
 4025                 if (stp->ls_op)
 4026                         nfsrvd_derefcache(stp->ls_op);
 4027                 stp->ls_op = op;
 4028                 nfsrvd_refcache(op);
 4029                 stp->ls_seq = seqid;
 4030                 goto out;
 4031         } else if (stp->ls_seq == seqid && stp->ls_op &&
 4032                 op->rc_xid == stp->ls_op->rc_xid &&
 4033                 op->rc_refcnt == 0 &&
 4034                 op->rc_reqlen == stp->ls_op->rc_reqlen &&
 4035                 op->rc_cksum == stp->ls_op->rc_cksum) {
 4036                 if (stp->ls_op->rc_flag & RC_INPROG) {
 4037                         error = NFSERR_DONTREPLY;
 4038                         goto out;
 4039                 }
 4040                 nd->nd_rp = stp->ls_op;
 4041                 nd->nd_rp->rc_flag |= RC_INPROG;
 4042                 nfsrvd_delcache(op);
 4043                 error = NFSERR_REPLYFROMCACHE;
 4044                 goto out;
 4045         }
 4046         error = NFSERR_BADSEQID;
 4047 
 4048 out:
 4049         NFSEXITCODE2(error, nd);
 4050         return (error);
 4051 }
 4052 
 4053 /*
 4054  * Get the client ip address for callbacks. If the strings can't be parsed,
 4055  * just set lc_program to 0 to indicate no callbacks are possible.
 4056  * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
 4057  *  the address to the client's transport address. This won't be used
 4058  *  for callbacks, but can be printed out by nfsstats for info.)
 4059  * Return error if the xdr can't be parsed, 0 otherwise.
 4060  */
 4061 int
 4062 nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
 4063 {
 4064         u_int32_t *tl;
 4065         u_char *cp, *cp2;
 4066         int i, j, maxalen = 0, minalen = 0;
 4067         sa_family_t af;
 4068 #ifdef INET
 4069         struct sockaddr_in *rin = NULL, *sin;
 4070 #endif
 4071 #ifdef INET6
 4072         struct sockaddr_in6 *rin6 = NULL, *sin6;
 4073 #endif
 4074         u_char *addr;
 4075         int error = 0, cantparse = 0;
 4076         union {
 4077                 in_addr_t ival;
 4078                 u_char cval[4];
 4079         } ip;
 4080         union {
 4081                 in_port_t sval;
 4082                 u_char cval[2];
 4083         } port;
 4084 
 4085         /* 8 is the maximum length of the port# string. */
 4086         addr = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK);
 4087         clp->lc_req.nr_client = NULL;
 4088         clp->lc_req.nr_lock = 0;
 4089         af = AF_UNSPEC;
 4090         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 4091         i = fxdr_unsigned(int, *tl);
 4092         if (i >= 3 && i <= 4) {
 4093                 error = nfsrv_mtostr(nd, addr, i);
 4094                 if (error)
 4095                         goto nfsmout;
 4096 #ifdef INET
 4097                 if (!strcmp(addr, "tcp")) {
 4098                         clp->lc_flags |= LCL_TCPCALLBACK;
 4099                         clp->lc_req.nr_sotype = SOCK_STREAM;
 4100                         clp->lc_req.nr_soproto = IPPROTO_TCP;
 4101                         af = AF_INET;
 4102                 } else if (!strcmp(addr, "udp")) {
 4103                         clp->lc_req.nr_sotype = SOCK_DGRAM;
 4104                         clp->lc_req.nr_soproto = IPPROTO_UDP;
 4105                         af = AF_INET;
 4106                 }
 4107 #endif
 4108 #ifdef INET6
 4109                 if (af == AF_UNSPEC) {
 4110                         if (!strcmp(addr, "tcp6")) {
 4111                                 clp->lc_flags |= LCL_TCPCALLBACK;
 4112                                 clp->lc_req.nr_sotype = SOCK_STREAM;
 4113                                 clp->lc_req.nr_soproto = IPPROTO_TCP;
 4114                                 af = AF_INET6;
 4115                         } else if (!strcmp(addr, "udp6")) {
 4116                                 clp->lc_req.nr_sotype = SOCK_DGRAM;
 4117                                 clp->lc_req.nr_soproto = IPPROTO_UDP;
 4118                                 af = AF_INET6;
 4119                         }
 4120                 }
 4121 #endif
 4122                 if (af == AF_UNSPEC) {
 4123                         cantparse = 1;
 4124                 }
 4125         } else {
 4126                 cantparse = 1;
 4127                 if (i > 0) {
 4128                         error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 4129                         if (error)
 4130                                 goto nfsmout;
 4131                 }
 4132         }
 4133         /*
 4134          * The caller has allocated clp->lc_req.nr_nam to be large enough
 4135          * for either AF_INET or AF_INET6 and zeroed out the contents.
 4136          * maxalen is set to the maximum length of the host IP address string
 4137          * plus 8 for the maximum length of the port#.
 4138          * minalen is set to the minimum length of the host IP address string
 4139          * plus 4 for the minimum length of the port#.
 4140          * These lengths do not include NULL termination,
 4141          * so INET[6]_ADDRSTRLEN - 1 is used in the calculations.
 4142          */
 4143         switch (af) {
 4144 #ifdef INET
 4145         case AF_INET:
 4146                 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 4147                 rin->sin_family = AF_INET;
 4148                 rin->sin_len = sizeof(struct sockaddr_in);
 4149                 maxalen = INET_ADDRSTRLEN - 1 + 8;
 4150                 minalen = 7 + 4;
 4151                 break;
 4152 #endif
 4153 #ifdef INET6
 4154         case AF_INET6:
 4155                 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 4156                 rin6->sin6_family = AF_INET6;
 4157                 rin6->sin6_len = sizeof(struct sockaddr_in6);
 4158                 maxalen = INET6_ADDRSTRLEN - 1 + 8;
 4159                 minalen = 3 + 4;
 4160                 break;
 4161 #endif
 4162         }
 4163         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 4164         i = fxdr_unsigned(int, *tl);
 4165         if (i < 0) {
 4166                 error = NFSERR_BADXDR;
 4167                 goto nfsmout;
 4168         } else if (i == 0) {
 4169                 cantparse = 1;
 4170         } else if (!cantparse && i <= maxalen && i >= minalen) {
 4171                 error = nfsrv_mtostr(nd, addr, i);
 4172                 if (error)
 4173                         goto nfsmout;
 4174 
 4175                 /*
 4176                  * Parse out the address fields. We expect 6 decimal numbers
 4177                  * separated by '.'s for AF_INET and two decimal numbers
 4178                  * preceeded by '.'s for AF_INET6.
 4179                  */
 4180                 cp = NULL;
 4181                 switch (af) {
 4182 #ifdef INET6
 4183                 /*
 4184                  * For AF_INET6, first parse the host address.
 4185                  */
 4186                 case AF_INET6:
 4187                         cp = strchr(addr, '.');
 4188                         if (cp != NULL) {
 4189                                 *cp++ = '\0';
 4190                                 if (inet_pton(af, addr, &rin6->sin6_addr) == 1)
 4191                                         i = 4;
 4192                                 else {
 4193                                         cp = NULL;
 4194                                         cantparse = 1;
 4195                                 }
 4196                         }
 4197                         break;
 4198 #endif
 4199 #ifdef INET
 4200                 case AF_INET:
 4201                         cp = addr;
 4202                         i = 0;
 4203                         break;
 4204 #endif
 4205                 }
 4206                 while (cp != NULL && *cp && i < 6) {
 4207                         cp2 = cp;
 4208                         while (*cp2 && *cp2 != '.')
 4209                                 cp2++;
 4210                         if (*cp2)
 4211                                 *cp2++ = '\0';
 4212                         else if (i != 5) {
 4213                                 cantparse = 1;
 4214                                 break;
 4215                         }
 4216                         j = nfsrv_getipnumber(cp);
 4217                         if (j >= 0) {
 4218                                 if (i < 4)
 4219                                         ip.cval[3 - i] = j;
 4220                                 else
 4221                                         port.cval[5 - i] = j;
 4222                         } else {
 4223                                 cantparse = 1;
 4224                                 break;
 4225                         }
 4226                         cp = cp2;
 4227                         i++;
 4228                 }
 4229                 if (!cantparse) {
 4230                         /*
 4231                          * The host address INADDR_ANY is (mis)used to indicate
 4232                          * "there is no valid callback address".
 4233                          */
 4234                         switch (af) {
 4235 #ifdef INET6
 4236                         case AF_INET6:
 4237                                 if (!IN6_ARE_ADDR_EQUAL(&rin6->sin6_addr,
 4238                                     &in6addr_any))
 4239                                         rin6->sin6_port = htons(port.sval);
 4240                                 else
 4241                                         cantparse = 1;
 4242                                 break;
 4243 #endif
 4244 #ifdef INET
 4245                         case AF_INET:
 4246                                 if (ip.ival != INADDR_ANY) {
 4247                                         rin->sin_addr.s_addr = htonl(ip.ival);
 4248                                         rin->sin_port = htons(port.sval);
 4249                                 } else {
 4250                                         cantparse = 1;
 4251                                 }
 4252                                 break;
 4253 #endif
 4254                         }
 4255                 }
 4256         } else {
 4257                 cantparse = 1;
 4258                 if (i > 0) {
 4259                         error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 4260                         if (error)
 4261                                 goto nfsmout;
 4262                 }
 4263         }
 4264         if (cantparse) {
 4265                 switch (nd->nd_nam->sa_family) {
 4266 #ifdef INET
 4267                 case AF_INET:
 4268                         sin = (struct sockaddr_in *)nd->nd_nam;
 4269                         rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 4270                         rin->sin_family = AF_INET;
 4271                         rin->sin_len = sizeof(struct sockaddr_in);
 4272                         rin->sin_addr.s_addr = sin->sin_addr.s_addr;
 4273                         rin->sin_port = 0x0;
 4274                         break;
 4275 #endif
 4276 #ifdef INET6
 4277                 case AF_INET6:
 4278                         sin6 = (struct sockaddr_in6 *)nd->nd_nam;
 4279                         rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 4280                         rin6->sin6_family = AF_INET6;
 4281                         rin6->sin6_len = sizeof(struct sockaddr_in6);
 4282                         rin6->sin6_addr = sin6->sin6_addr;
 4283                         rin6->sin6_port = 0x0;
 4284                         break;
 4285 #endif
 4286                 }
 4287                 clp->lc_program = 0;
 4288         }
 4289 nfsmout:
 4290         free(addr, M_TEMP);
 4291         NFSEXITCODE2(error, nd);
 4292         return (error);
 4293 }
 4294 
 4295 /*
 4296  * Turn a string of up to three decimal digits into a number. Return -1 upon
 4297  * error.
 4298  */
 4299 static int
 4300 nfsrv_getipnumber(u_char *cp)
 4301 {
 4302         int i = 0, j = 0;
 4303 
 4304         while (*cp) {
 4305                 if (j > 2 || *cp < '' || *cp > '9')
 4306                         return (-1);
 4307                 i *= 10;
 4308                 i += (*cp - '');
 4309                 cp++;
 4310                 j++;
 4311         }
 4312         if (i < 256)
 4313                 return (i);
 4314         return (-1);
 4315 }
 4316 
 4317 /*
 4318  * This function checks for restart conditions.
 4319  */
 4320 static int
 4321 nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
 4322     nfsv4stateid_t *stateidp, int specialid)
 4323 {
 4324         int ret = 0;
 4325 
 4326         /*
 4327          * First check for a server restart. Open, LockT, ReleaseLockOwner
 4328          * and DelegPurge have a clientid, the rest a stateid.
 4329          */
 4330         if (flags &
 4331             (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
 4332                 if (clientid.lval[0] != nfsrvboottime) {
 4333                         ret = NFSERR_STALECLIENTID;
 4334                         goto out;
 4335                 }
 4336         } else if (stateidp->other[0] != nfsrvboottime &&
 4337                 specialid == 0) {
 4338                 ret = NFSERR_STALESTATEID;
 4339                 goto out;
 4340         }
 4341 
 4342         /*
 4343          * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
 4344          * not use a lock/open owner seqid#, so the check can be done now.
 4345          * (The others will be checked, as required, later.)
 4346          */
 4347         if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
 4348                 goto out;
 4349 
 4350         NFSLOCKSTATE();
 4351         ret = nfsrv_checkgrace(NULL, NULL, flags);
 4352         NFSUNLOCKSTATE();
 4353 
 4354 out:
 4355         NFSEXITCODE(ret);
 4356         return (ret);
 4357 }
 4358 
 4359 /*
 4360  * Check for grace.
 4361  */
 4362 static int
 4363 nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
 4364     u_int32_t flags)
 4365 {
 4366         int error = 0, notreclaimed;
 4367         struct nfsrv_stable *sp;
 4368 
 4369         if ((nfsrv_stablefirst.nsf_flags & (NFSNSF_UPDATEDONE |
 4370              NFSNSF_GRACEOVER)) == 0) {
 4371                 /*
 4372                  * First, check to see if all of the clients have done a
 4373                  * ReclaimComplete.  If so, grace can end now.
 4374                  */
 4375                 notreclaimed = 0;
 4376                 LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 4377                         if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
 4378                                 notreclaimed = 1;
 4379                                 break;
 4380                         }
 4381                 }
 4382                 if (notreclaimed == 0)
 4383                         nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER |
 4384                             NFSNSF_NEEDLOCK);
 4385         }
 4386 
 4387         if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
 4388                 if (flags & NFSLCK_RECLAIM) {
 4389                         error = NFSERR_NOGRACE;
 4390                         goto out;
 4391                 }
 4392         } else {
 4393                 if (!(flags & NFSLCK_RECLAIM)) {
 4394                         error = NFSERR_GRACE;
 4395                         goto out;
 4396                 }
 4397                 if (nd != NULL && clp != NULL &&
 4398                     (nd->nd_flag & ND_NFSV41) != 0 &&
 4399                     (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
 4400                         error = NFSERR_NOGRACE;
 4401                         goto out;
 4402                 }
 4403 
 4404                 /*
 4405                  * If grace is almost over and we are still getting Reclaims,
 4406                  * extend grace a bit.
 4407                  */
 4408                 if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
 4409                     nfsrv_stablefirst.nsf_eograce)
 4410                         nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
 4411                                 NFSRV_LEASEDELTA;
 4412         }
 4413 
 4414 out:
 4415         NFSEXITCODE(error);
 4416         return (error);
 4417 }
 4418 
 4419 /*
 4420  * Do a server callback.
 4421  * The "trunc" argument is slightly overloaded and refers to different
 4422  * boolean arguments for CBRECALL and CBLAYOUTRECALL.
 4423  */
 4424 static int
 4425 nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
 4426     int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp,
 4427     int laytype, NFSPROC_T *p)
 4428 {
 4429         struct mbuf *m;
 4430         u_int32_t *tl;
 4431         struct nfsrv_descript *nd;
 4432         struct ucred *cred;
 4433         int error = 0;
 4434         u_int32_t callback;
 4435         struct nfsdsession *sep = NULL;
 4436         uint64_t tval;
 4437         bool dotls;
 4438 
 4439         nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 4440         cred = newnfs_getcred();
 4441         NFSLOCKSTATE(); /* mostly for lc_cbref++ */
 4442         if (clp->lc_flags & LCL_NEEDSCONFIRM) {
 4443                 NFSUNLOCKSTATE();
 4444                 panic("docallb");
 4445         }
 4446         clp->lc_cbref++;
 4447 
 4448         /*
 4449          * Fill the callback program# and version into the request
 4450          * structure for newnfs_connect() to use.
 4451          */
 4452         clp->lc_req.nr_prog = clp->lc_program;
 4453 #ifdef notnow
 4454         if ((clp->lc_flags & LCL_NFSV41) != 0)
 4455                 clp->lc_req.nr_vers = NFSV41_CBVERS;
 4456         else
 4457 #endif
 4458                 clp->lc_req.nr_vers = NFSV4_CBVERS;
 4459 
 4460         /*
 4461          * First, fill in some of the fields of nd and cr.
 4462          */
 4463         nd->nd_flag = ND_NFSV4;
 4464         if (clp->lc_flags & LCL_GSS)
 4465                 nd->nd_flag |= ND_KERBV;
 4466         if ((clp->lc_flags & LCL_NFSV41) != 0)
 4467                 nd->nd_flag |= ND_NFSV41;
 4468         if ((clp->lc_flags & LCL_NFSV42) != 0)
 4469                 nd->nd_flag |= ND_NFSV42;
 4470         nd->nd_repstat = 0;
 4471         cred->cr_uid = clp->lc_uid;
 4472         cred->cr_gid = clp->lc_gid;
 4473         callback = clp->lc_callback;
 4474         NFSUNLOCKSTATE();
 4475         cred->cr_ngroups = 1;
 4476 
 4477         /*
 4478          * Get the first mbuf for the request.
 4479          */
 4480         MGET(m, M_WAITOK, MT_DATA);
 4481         m->m_len = 0;
 4482         nd->nd_mreq = nd->nd_mb = m;
 4483         nd->nd_bpos = mtod(m, caddr_t);
 4484 
 4485         /*
 4486          * and build the callback request.
 4487          */
 4488         if (procnum == NFSV4OP_CBGETATTR) {
 4489                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 4490                 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
 4491                     "CB Getattr", &sep);
 4492                 if (error != 0) {
 4493                         m_freem(nd->nd_mreq);
 4494                         goto errout;
 4495                 }
 4496                 (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
 4497                 (void)nfsrv_putattrbit(nd, attrbitp);
 4498         } else if (procnum == NFSV4OP_CBRECALL) {
 4499                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 4500                 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
 4501                     "CB Recall", &sep);
 4502                 if (error != 0) {
 4503                         m_freem(nd->nd_mreq);
 4504                         goto errout;
 4505                 }
 4506                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 4507                 *tl++ = txdr_unsigned(stateidp->seqid);
 4508                 NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
 4509                     NFSX_STATEIDOTHER);
 4510                 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 4511                 if (trunc)
 4512                         *tl = newnfs_true;
 4513                 else
 4514                         *tl = newnfs_false;
 4515                 (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
 4516         } else if (procnum == NFSV4OP_CBLAYOUTRECALL) {
 4517                 NFSD_DEBUG(4, "docallback layout recall\n");
 4518                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 4519                 error = nfsrv_cbcallargs(nd, clp, callback,
 4520                     NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep);
 4521                 NFSD_DEBUG(4, "aft cbcallargs=%d\n", error);
 4522                 if (error != 0) {
 4523                         m_freem(nd->nd_mreq);
 4524                         goto errout;
 4525                 }
 4526                 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 4527                 *tl++ = txdr_unsigned(laytype);
 4528                 *tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY);
 4529                 if (trunc)
 4530                         *tl++ = newnfs_true;
 4531                 else
 4532                         *tl++ = newnfs_false;
 4533                 *tl = txdr_unsigned(NFSV4LAYOUTRET_FILE);
 4534                 nfsm_fhtom(nd, (uint8_t *)fhp, NFSX_MYFH, 0);
 4535                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID);
 4536                 tval = 0;
 4537                 txdr_hyper(tval, tl); tl += 2;
 4538                 tval = UINT64_MAX;
 4539                 txdr_hyper(tval, tl); tl += 2;
 4540                 *tl++ = txdr_unsigned(stateidp->seqid);
 4541                 NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER);
 4542                 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 4543                 NFSD_DEBUG(4, "aft args\n");
 4544         } else if (procnum == NFSV4PROC_CBNULL) {
 4545                 nd->nd_procnum = NFSV4PROC_CBNULL;
 4546                 if ((clp->lc_flags & LCL_NFSV41) != 0) {
 4547                         error = nfsv4_getcbsession(clp, &sep);
 4548                         if (error != 0) {
 4549                                 m_freem(nd->nd_mreq);
 4550                                 goto errout;
 4551                         }
 4552                 }
 4553         } else {
 4554                 error = NFSERR_SERVERFAULT;
 4555                 m_freem(nd->nd_mreq);
 4556                 goto errout;
 4557         }
 4558 
 4559         /*
 4560          * Call newnfs_connect(), as required, and then newnfs_request().
 4561          */
 4562         dotls = false;
 4563         if ((clp->lc_flags & LCL_TLSCB) != 0)
 4564                 dotls = true;
 4565         (void) newnfs_sndlock(&clp->lc_req.nr_lock);
 4566         if (clp->lc_req.nr_client == NULL) {
 4567                 if ((clp->lc_flags & LCL_NFSV41) != 0) {
 4568                         error = ECONNREFUSED;
 4569                         nfsrv_freesession(sep, NULL);
 4570                 } else if (nd->nd_procnum == NFSV4PROC_CBNULL)
 4571                         error = newnfs_connect(NULL, &clp->lc_req, cred,
 4572                             NULL, 1, dotls);
 4573                 else
 4574                         error = newnfs_connect(NULL, &clp->lc_req, cred,
 4575                             NULL, 3, dotls);
 4576         }
 4577         newnfs_sndunlock(&clp->lc_req.nr_lock);
 4578         NFSD_DEBUG(4, "aft sndunlock=%d\n", error);
 4579         if (!error) {
 4580                 if ((nd->nd_flag & ND_NFSV41) != 0) {
 4581                         KASSERT(sep != NULL, ("sep NULL"));
 4582                         if (sep->sess_cbsess.nfsess_xprt != NULL)
 4583                                 error = newnfs_request(nd, NULL, clp,
 4584                                     &clp->lc_req, NULL, NULL, cred,
 4585                                     clp->lc_program, clp->lc_req.nr_vers, NULL,
 4586                                     1, NULL, &sep->sess_cbsess);
 4587                         else {
 4588                                 /*
 4589                                  * This should probably never occur, but if a
 4590                                  * client somehow does an RPC without a
 4591                                  * SequenceID Op that causes a callback just
 4592                                  * after the nfsd threads have been terminated
 4593                                  * and restared we could conceivably get here
 4594                                  * without a backchannel xprt.
 4595                                  */
 4596                                 printf("nfsrv_docallback: no xprt\n");
 4597                                 error = ECONNREFUSED;
 4598                         }
 4599                         NFSD_DEBUG(4, "aft newnfs_request=%d\n", error);
 4600                         nfsrv_freesession(sep, NULL);
 4601                 } else
 4602                         error = newnfs_request(nd, NULL, clp, &clp->lc_req,
 4603                             NULL, NULL, cred, clp->lc_program,
 4604                             clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
 4605         }
 4606 errout:
 4607         NFSFREECRED(cred);
 4608 
 4609         /*
 4610          * If error is set here, the Callback path isn't working
 4611          * properly, so twiddle the appropriate LCL_ flags.
 4612          * (nd_repstat != 0 indicates the Callback path is working,
 4613          *  but the callback failed on the client.)
 4614          */
 4615         if (error) {
 4616                 /*
 4617                  * Mark the callback pathway down, which disabled issuing
 4618                  * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
 4619                  */
 4620                 NFSLOCKSTATE();
 4621                 clp->lc_flags |= LCL_CBDOWN;
 4622                 NFSUNLOCKSTATE();
 4623         } else {
 4624                 /*
 4625                  * Callback worked. If the callback path was down, disable
 4626                  * callbacks, so no more delegations will be issued. (This
 4627                  * is done on the assumption that the callback pathway is
 4628                  * flakey.)
 4629                  */
 4630                 NFSLOCKSTATE();
 4631                 if (clp->lc_flags & LCL_CBDOWN)
 4632                         clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
 4633                 NFSUNLOCKSTATE();
 4634                 if (nd->nd_repstat) {
 4635                         error = nd->nd_repstat;
 4636                         NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n",
 4637                             procnum, error);
 4638                 } else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
 4639                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 4640                             NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
 4641                             p, NULL);
 4642                 m_freem(nd->nd_mrep);
 4643         }
 4644         NFSLOCKSTATE();
 4645         clp->lc_cbref--;
 4646         if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
 4647                 clp->lc_flags &= ~LCL_WAKEUPWANTED;
 4648                 wakeup(clp);
 4649         }
 4650         NFSUNLOCKSTATE();
 4651 
 4652         free(nd, M_TEMP);
 4653         NFSEXITCODE(error);
 4654         return (error);
 4655 }
 4656 
 4657 /*
 4658  * Set up the compound RPC for the callback.
 4659  */
 4660 static int
 4661 nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
 4662     uint32_t callback, int op, const char *optag, struct nfsdsession **sepp)
 4663 {
 4664         uint32_t *tl;
 4665         int error, len;
 4666 
 4667         len = strlen(optag);
 4668         (void)nfsm_strtom(nd, optag, len);
 4669         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 4670         if ((nd->nd_flag & ND_NFSV41) != 0) {
 4671                 if ((nd->nd_flag & ND_NFSV42) != 0)
 4672                         *tl++ = txdr_unsigned(NFSV42_MINORVERSION);
 4673                 else
 4674                         *tl++ = txdr_unsigned(NFSV41_MINORVERSION);
 4675                 *tl++ = txdr_unsigned(callback);
 4676                 *tl++ = txdr_unsigned(2);
 4677                 *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
 4678                 error = nfsv4_setcbsequence(nd, clp, 1, sepp);
 4679                 if (error != 0)
 4680                         return (error);
 4681                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 4682                 *tl = txdr_unsigned(op);
 4683         } else {
 4684                 *tl++ = txdr_unsigned(NFSV4_MINORVERSION);
 4685                 *tl++ = txdr_unsigned(callback);
 4686                 *tl++ = txdr_unsigned(1);
 4687                 *tl = txdr_unsigned(op);
 4688         }
 4689         return (0);
 4690 }
 4691 
 4692 /*
 4693  * Return the next index# for a clientid. Mostly just increment and return
 4694  * the next one, but... if the 32bit unsigned does actually wrap around,
 4695  * it should be rebooted.
 4696  * At an average rate of one new client per second, it will wrap around in
 4697  * approximately 136 years. (I think the server will have been shut
 4698  * down or rebooted before then.)
 4699  */
 4700 static u_int32_t
 4701 nfsrv_nextclientindex(void)
 4702 {
 4703         static u_int32_t client_index = 0;
 4704 
 4705         client_index++;
 4706         if (client_index != 0)
 4707                 return (client_index);
 4708 
 4709         printf("%s: out of clientids\n", __func__);
 4710         return (client_index);
 4711 }
 4712 
 4713 /*
 4714  * Return the next index# for a stateid. Mostly just increment and return
 4715  * the next one, but... if the 32bit unsigned does actually wrap around
 4716  * (will a BSD server stay up that long?), find
 4717  * new start and end values.
 4718  */
 4719 static u_int32_t
 4720 nfsrv_nextstateindex(struct nfsclient *clp)
 4721 {
 4722         struct nfsstate *stp;
 4723         int i;
 4724         u_int32_t canuse, min_index, max_index;
 4725 
 4726         if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
 4727                 clp->lc_stateindex++;
 4728                 if (clp->lc_stateindex != clp->lc_statemaxindex)
 4729                         return (clp->lc_stateindex);
 4730         }
 4731 
 4732         /*
 4733          * Yuck, we've hit the end.
 4734          * Look for a new min and max.
 4735          */
 4736         min_index = 0;
 4737         max_index = 0xffffffff;
 4738         for (i = 0; i < nfsrv_statehashsize; i++) {
 4739             LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 4740                 if (stp->ls_stateid.other[2] > 0x80000000) {
 4741                     if (stp->ls_stateid.other[2] < max_index)
 4742                         max_index = stp->ls_stateid.other[2];
 4743                 } else {
 4744                     if (stp->ls_stateid.other[2] > min_index)
 4745                         min_index = stp->ls_stateid.other[2];
 4746                 }
 4747             }
 4748         }
 4749 
 4750         /*
 4751          * Yikes, highly unlikely, but I'll handle it anyhow.
 4752          */
 4753         if (min_index == 0x80000000 && max_index == 0x80000001) {
 4754             canuse = 0;
 4755             /*
 4756              * Loop around until we find an unused entry. Return that
 4757              * and set LCL_INDEXNOTOK, so the search will continue next time.
 4758              * (This is one of those rare cases where a goto is the
 4759              *  cleanest way to code the loop.)
 4760              */
 4761 tryagain:
 4762             for (i = 0; i < nfsrv_statehashsize; i++) {
 4763                 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 4764                     if (stp->ls_stateid.other[2] == canuse) {
 4765                         canuse++;
 4766                         goto tryagain;
 4767                     }
 4768                 }
 4769             }
 4770             clp->lc_flags |= LCL_INDEXNOTOK;
 4771             return (canuse);
 4772         }
 4773 
 4774         /*
 4775          * Ok to start again from min + 1.
 4776          */
 4777         clp->lc_stateindex = min_index + 1;
 4778         clp->lc_statemaxindex = max_index;
 4779         clp->lc_flags &= ~LCL_INDEXNOTOK;
 4780         return (clp->lc_stateindex);
 4781 }
 4782 
 4783 /*
 4784  * The following functions handle the stable storage file that deals with
 4785  * the edge conditions described in RFC3530 Sec. 8.6.3.
 4786  * The file is as follows:
 4787  * - a single record at the beginning that has the lease time of the
 4788  *   previous server instance (before the last reboot) and the nfsrvboottime
 4789  *   values for the previous server boots.
 4790  *   These previous boot times are used to ensure that the current
 4791  *   nfsrvboottime does not, somehow, get set to a previous one.
 4792  *   (This is important so that Stale ClientIDs and StateIDs can
 4793  *    be recognized.)
 4794  *   The number of previous nfsvrboottime values precedes the list.
 4795  * - followed by some number of appended records with:
 4796  *   - client id string
 4797  *   - flag that indicates it is a record revoking state via lease
 4798  *     expiration or similar
 4799  *     OR has successfully acquired state.
 4800  * These structures vary in length, with the client string at the end, up
 4801  * to NFSV4_OPAQUELIMIT in size.
 4802  *
 4803  * At the end of the grace period, the file is truncated, the first
 4804  * record is rewritten with updated information and any acquired state
 4805  * records for successful reclaims of state are written.
 4806  *
 4807  * Subsequent records are appended when the first state is issued to
 4808  * a client and when state is revoked for a client.
 4809  *
 4810  * When reading the file in, state issued records that come later in
 4811  * the file override older ones, since the append log is in cronological order.
 4812  * If, for some reason, the file can't be read, the grace period is
 4813  * immediately terminated and all reclaims get NFSERR_NOGRACE.
 4814  */
 4815 
 4816 /*
 4817  * Read in the stable storage file. Called by nfssvc() before the nfsd
 4818  * processes start servicing requests.
 4819  */
 4820 void
 4821 nfsrv_setupstable(NFSPROC_T *p)
 4822 {
 4823         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 4824         struct nfsrv_stable *sp, *nsp;
 4825         struct nfst_rec *tsp;
 4826         int error, i, tryagain;
 4827         off_t off = 0;
 4828         ssize_t aresid, len;
 4829 
 4830         /*
 4831          * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
 4832          * a reboot, so state has not been lost.
 4833          */
 4834         if (sf->nsf_flags & NFSNSF_UPDATEDONE)
 4835                 return;
 4836         /*
 4837          * Set Grace over just until the file reads successfully.
 4838          */
 4839         nfsrvboottime = time_second;
 4840         LIST_INIT(&sf->nsf_head);
 4841         sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
 4842         sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
 4843         if (sf->nsf_fp == NULL)
 4844                 return;
 4845         error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 4846             (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
 4847             0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 4848         if (error || aresid || sf->nsf_numboots == 0 ||
 4849                 sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
 4850                 return;
 4851 
 4852         /*
 4853          * Now, read in the boottimes.
 4854          */
 4855         sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
 4856                 sizeof (time_t), M_TEMP, M_WAITOK);
 4857         off = sizeof (struct nfsf_rec);
 4858         error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 4859             (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
 4860             UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 4861         if (error || aresid) {
 4862                 free(sf->nsf_bootvals, M_TEMP);
 4863                 sf->nsf_bootvals = NULL;
 4864                 return;
 4865         }
 4866 
 4867         /*
 4868          * Make sure this nfsrvboottime is different from all recorded
 4869          * previous ones.
 4870          */
 4871         do {
 4872                 tryagain = 0;
 4873                 for (i = 0; i < sf->nsf_numboots; i++) {
 4874                         if (nfsrvboottime == sf->nsf_bootvals[i]) {
 4875                                 nfsrvboottime++;
 4876                                 tryagain = 1;
 4877                                 break;
 4878                         }
 4879                 }
 4880         } while (tryagain);
 4881 
 4882         sf->nsf_flags |= NFSNSF_OK;
 4883         off += (sf->nsf_numboots * sizeof (time_t));
 4884 
 4885         /*
 4886          * Read through the file, building a list of records for grace
 4887          * checking.
 4888          * Each record is between sizeof (struct nfst_rec) and
 4889          * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
 4890          * and is actually sizeof (struct nfst_rec) + nst_len - 1.
 4891          */
 4892         tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
 4893                 NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
 4894         do {
 4895             error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 4896                 (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
 4897                 off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 4898             len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
 4899             if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
 4900                 len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
 4901                 /*
 4902                  * Yuck, the file has been corrupted, so just return
 4903                  * after clearing out any restart state, so the grace period
 4904                  * is over.
 4905                  */
 4906                 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
 4907                         LIST_REMOVE(sp, nst_list);
 4908                         free(sp, M_TEMP);
 4909                 }
 4910                 free(tsp, M_TEMP);
 4911                 sf->nsf_flags &= ~NFSNSF_OK;
 4912                 free(sf->nsf_bootvals, M_TEMP);
 4913                 sf->nsf_bootvals = NULL;
 4914                 return;
 4915             }
 4916             if (len > 0) {
 4917                 off += sizeof (struct nfst_rec) + tsp->len - 1;
 4918                 /*
 4919                  * Search the list for a matching client.
 4920                  */
 4921                 LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
 4922                         if (tsp->len == sp->nst_len &&
 4923                             !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
 4924                                 break;
 4925                 }
 4926                 if (sp == LIST_END(&sf->nsf_head)) {
 4927                         sp = (struct nfsrv_stable *)malloc(tsp->len +
 4928                                 sizeof (struct nfsrv_stable) - 1, M_TEMP,
 4929                                 M_WAITOK);
 4930                         NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
 4931                                 sizeof (struct nfst_rec) + tsp->len - 1);
 4932                         LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
 4933                 } else {
 4934                         if (tsp->flag == NFSNST_REVOKE)
 4935                                 sp->nst_flag |= NFSNST_REVOKE;
 4936                         else
 4937                                 /*
 4938                                  * A subsequent timestamp indicates the client
 4939                                  * did a setclientid/confirm and any previous
 4940                                  * revoke is no longer relevant.
 4941                                  */
 4942                                 sp->nst_flag &= ~NFSNST_REVOKE;
 4943                 }
 4944             }
 4945         } while (len > 0);
 4946         free(tsp, M_TEMP);
 4947         sf->nsf_flags = NFSNSF_OK;
 4948         sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
 4949                 NFSRV_LEASEDELTA;
 4950 }
 4951 
 4952 /*
 4953  * Update the stable storage file, now that the grace period is over.
 4954  */
 4955 void
 4956 nfsrv_updatestable(NFSPROC_T *p)
 4957 {
 4958         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 4959         struct nfsrv_stable *sp, *nsp;
 4960         int i;
 4961         struct nfsvattr nva;
 4962         vnode_t vp;
 4963 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
 4964         mount_t mp = NULL;
 4965 #endif
 4966         int error;
 4967 
 4968         if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
 4969                 return;
 4970         sf->nsf_flags |= NFSNSF_UPDATEDONE;
 4971         /*
 4972          * Ok, we need to rewrite the stable storage file.
 4973          * - truncate to 0 length
 4974          * - write the new first structure
 4975          * - loop through the data structures, writing out any that
 4976          *   have timestamps older than the old boot
 4977          */
 4978         if (sf->nsf_bootvals) {
 4979                 sf->nsf_numboots++;
 4980                 for (i = sf->nsf_numboots - 2; i >= 0; i--)
 4981                         sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
 4982         } else {
 4983                 sf->nsf_numboots = 1;
 4984                 sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
 4985                         M_TEMP, M_WAITOK);
 4986         }
 4987         sf->nsf_bootvals[0] = nfsrvboottime;
 4988         sf->nsf_lease = nfsrv_lease;
 4989         NFSVNO_ATTRINIT(&nva);
 4990         NFSVNO_SETATTRVAL(&nva, size, 0);
 4991         vp = NFSFPVNODE(sf->nsf_fp);
 4992         vn_start_write(vp, &mp, V_WAIT);
 4993         if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
 4994                 error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
 4995                     NULL);
 4996                 NFSVOPUNLOCK(vp);
 4997         } else
 4998                 error = EPERM;
 4999         vn_finished_write(mp);
 5000         if (!error)
 5001             error = NFSD_RDWR(UIO_WRITE, vp,
 5002                 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
 5003                 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
 5004         if (!error)
 5005             error = NFSD_RDWR(UIO_WRITE, vp,
 5006                 (caddr_t)sf->nsf_bootvals,
 5007                 sf->nsf_numboots * sizeof (time_t),
 5008                 (off_t)(sizeof (struct nfsf_rec)),
 5009                 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
 5010         free(sf->nsf_bootvals, M_TEMP);
 5011         sf->nsf_bootvals = NULL;
 5012         if (error) {
 5013                 sf->nsf_flags &= ~NFSNSF_OK;
 5014                 printf("EEK! Can't write NfsV4 stable storage file\n");
 5015                 return;
 5016         }
 5017         sf->nsf_flags |= NFSNSF_OK;
 5018 
 5019         /*
 5020          * Loop through the list and write out timestamp records for
 5021          * any clients that successfully reclaimed state.
 5022          */
 5023         LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
 5024                 if (sp->nst_flag & NFSNST_GOTSTATE) {
 5025                         nfsrv_writestable(sp->nst_client, sp->nst_len,
 5026                                 NFSNST_NEWSTATE, p);
 5027                         sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
 5028                 }
 5029                 LIST_REMOVE(sp, nst_list);
 5030                 free(sp, M_TEMP);
 5031         }
 5032         nfsrv_backupstable();
 5033 }
 5034 
 5035 /*
 5036  * Append a record to the stable storage file.
 5037  */
 5038 void
 5039 nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
 5040 {
 5041         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 5042         struct nfst_rec *sp;
 5043         int error;
 5044 
 5045         if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
 5046                 return;
 5047         sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
 5048                 len - 1, M_TEMP, M_WAITOK);
 5049         sp->len = len;
 5050         NFSBCOPY(client, sp->client, len);
 5051         sp->flag = flag;
 5052         error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
 5053             (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
 5054             UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
 5055         free(sp, M_TEMP);
 5056         if (error) {
 5057                 sf->nsf_flags &= ~NFSNSF_OK;
 5058                 printf("EEK! Can't write NfsV4 stable storage file\n");
 5059         }
 5060 }
 5061 
 5062 /*
 5063  * This function is called during the grace period to mark a client
 5064  * that successfully reclaimed state.
 5065  */
 5066 static void
 5067 nfsrv_markstable(struct nfsclient *clp)
 5068 {
 5069         struct nfsrv_stable *sp;
 5070 
 5071         /*
 5072          * First find the client structure.
 5073          */
 5074         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 5075                 if (sp->nst_len == clp->lc_idlen &&
 5076                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 5077                         break;
 5078         }
 5079         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
 5080                 return;
 5081 
 5082         /*
 5083          * Now, just mark it and set the nfsclient back pointer.
 5084          */
 5085         sp->nst_flag |= NFSNST_GOTSTATE;
 5086         sp->nst_clp = clp;
 5087 }
 5088 
 5089 /*
 5090  * This function is called when a NFSv4.1 client does a ReclaimComplete.
 5091  * Very similar to nfsrv_markstable(), except for the flag being set.
 5092  */
 5093 static void
 5094 nfsrv_markreclaim(struct nfsclient *clp)
 5095 {
 5096         struct nfsrv_stable *sp;
 5097 
 5098         /*
 5099          * First find the client structure.
 5100          */
 5101         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 5102                 if (sp->nst_len == clp->lc_idlen &&
 5103                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 5104                         break;
 5105         }
 5106         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
 5107                 return;
 5108 
 5109         /*
 5110          * Now, just set the flag.
 5111          */
 5112         sp->nst_flag |= NFSNST_RECLAIMED;
 5113 }
 5114 
 5115 /*
 5116  * This function is called for a reclaim, to see if it gets grace.
 5117  * It returns 0 if a reclaim is allowed, 1 otherwise.
 5118  */
 5119 static int
 5120 nfsrv_checkstable(struct nfsclient *clp)
 5121 {
 5122         struct nfsrv_stable *sp;
 5123 
 5124         /*
 5125          * First, find the entry for the client.
 5126          */
 5127         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 5128                 if (sp->nst_len == clp->lc_idlen &&
 5129                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 5130                         break;
 5131         }
 5132 
 5133         /*
 5134          * If not in the list, state was revoked or no state was issued
 5135          * since the previous reboot, a reclaim is denied.
 5136          */
 5137         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
 5138             (sp->nst_flag & NFSNST_REVOKE) ||
 5139             !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
 5140                 return (1);
 5141         return (0);
 5142 }
 5143 
 5144 /*
 5145  * Test for and try to clear out a conflicting client. This is called by
 5146  * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
 5147  * a found.
 5148  * The trick here is that it can't revoke a conflicting client with an
 5149  * expired lease unless it holds the v4root lock, so...
 5150  * If no v4root lock, get the lock and return 1 to indicate "try again".
 5151  * Return 0 to indicate the conflict can't be revoked and 1 to indicate
 5152  * the revocation worked and the conflicting client is "bye, bye", so it
 5153  * can be tried again.
 5154  * Return 2 to indicate that the vnode is VIRF_DOOMED after NFSVOPLOCK().
 5155  * Unlocks State before a non-zero value is returned.
 5156  */
 5157 static int
 5158 nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
 5159     NFSPROC_T *p)
 5160 {
 5161         int gotlock, lktype = 0;
 5162 
 5163         /*
 5164          * If lease hasn't expired, we can't fix it.
 5165          */
 5166         if (clp->lc_expiry >= NFSD_MONOSEC ||
 5167             !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
 5168                 return (0);
 5169         if (*haslockp == 0) {
 5170                 NFSUNLOCKSTATE();
 5171                 if (vp != NULL) {
 5172                         lktype = NFSVOPISLOCKED(vp);
 5173                         NFSVOPUNLOCK(vp);
 5174                 }
 5175                 NFSLOCKV4ROOTMUTEX();
 5176                 nfsv4_relref(&nfsv4rootfs_lock);
 5177                 do {
 5178                         gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 5179                             NFSV4ROOTLOCKMUTEXPTR, NULL);
 5180                 } while (!gotlock);
 5181                 NFSUNLOCKV4ROOTMUTEX();
 5182                 *haslockp = 1;
 5183                 if (vp != NULL) {
 5184                         NFSVOPLOCK(vp, lktype | LK_RETRY);
 5185                         if (VN_IS_DOOMED(vp))
 5186                                 return (2);
 5187                 }
 5188                 return (1);
 5189         }
 5190         NFSUNLOCKSTATE();
 5191 
 5192         /*
 5193          * Ok, we can expire the conflicting client.
 5194          */
 5195         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 5196         nfsrv_backupstable();
 5197         nfsrv_cleanclient(clp, p);
 5198         nfsrv_freedeleglist(&clp->lc_deleg);
 5199         nfsrv_freedeleglist(&clp->lc_olddeleg);
 5200         LIST_REMOVE(clp, lc_hash);
 5201         nfsrv_zapclient(clp, p);
 5202         return (1);
 5203 }
 5204 
 5205 /*
 5206  * Resolve a delegation conflict.
 5207  * Returns 0 to indicate the conflict was resolved without sleeping.
 5208  * Return -1 to indicate that the caller should check for conflicts again.
 5209  * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
 5210  *
 5211  * Also, manipulate the nfsv4root_lock, as required. It isn't changed
 5212  * for a return of 0, since there was no sleep and it could be required
 5213  * later. It is released for a return of NFSERR_DELAY, since the caller
 5214  * will return that error. It is released when a sleep was done waiting
 5215  * for the delegation to be returned or expire (so that other nfsds can
 5216  * handle ops). Then, it must be acquired for the write to stable storage.
 5217  * (This function is somewhat similar to nfsrv_clientconflict(), but
 5218  *  the semantics differ in a couple of subtle ways. The return of 0
 5219  *  indicates the conflict was resolved without sleeping here, not
 5220  *  that the conflict can't be resolved and the handling of nfsv4root_lock
 5221  *  differs, as noted above.)
 5222  * Unlocks State before returning a non-zero value.
 5223  */
 5224 static int
 5225 nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
 5226     vnode_t vp)
 5227 {
 5228         struct nfsclient *clp = stp->ls_clp;
 5229         int gotlock, error, lktype = 0, retrycnt, zapped_clp;
 5230         nfsv4stateid_t tstateid;
 5231         fhandle_t tfh;
 5232 
 5233         /*
 5234          * If the conflict is with an old delegation...
 5235          */
 5236         if (stp->ls_flags & NFSLCK_OLDDELEG) {
 5237                 /*
 5238                  * You can delete it, if it has expired.
 5239                  */
 5240                 if (clp->lc_delegtime < NFSD_MONOSEC) {
 5241                         nfsrv_freedeleg(stp);
 5242                         NFSUNLOCKSTATE();
 5243                         error = -1;
 5244                         goto out;
 5245                 }
 5246                 NFSUNLOCKSTATE();
 5247                 /*
 5248                  * During this delay, the old delegation could expire or it
 5249                  * could be recovered by the client via an Open with
 5250                  * CLAIM_DELEGATE_PREV.
 5251                  * Release the nfsv4root_lock, if held.
 5252                  */
 5253                 if (*haslockp) {
 5254                         *haslockp = 0;
 5255                         NFSLOCKV4ROOTMUTEX();
 5256                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5257                         NFSUNLOCKV4ROOTMUTEX();
 5258                 }
 5259                 error = NFSERR_DELAY;
 5260                 goto out;
 5261         }
 5262 
 5263         /*
 5264          * It's a current delegation, so:
 5265          * - check to see if the delegation has expired
 5266          *   - if so, get the v4root lock and then expire it
 5267          */
 5268         if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) {
 5269                 /*
 5270                  * - do a recall callback, since not yet done
 5271                  * For now, never allow truncate to be set. To use
 5272                  * truncate safely, it must be guaranteed that the
 5273                  * Remove, Rename or Setattr with size of 0 will
 5274                  * succeed and that would require major changes to
 5275                  * the VFS/Vnode OPs.
 5276                  * Set the expiry time large enough so that it won't expire
 5277                  * until after the callback, then set it correctly, once
 5278                  * the callback is done. (The delegation will now time
 5279                  * out whether or not the Recall worked ok. The timeout
 5280                  * will be extended when ops are done on the delegation
 5281                  * stateid, up to the timelimit.)
 5282                  */
 5283                 stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
 5284                     NFSRV_LEASEDELTA;
 5285                 stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) +
 5286                     NFSRV_LEASEDELTA;
 5287                 stp->ls_flags |= NFSLCK_DELEGRECALL;
 5288 
 5289                 /*
 5290                  * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
 5291                  * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
 5292                  * in order to try and avoid a race that could happen
 5293                  * when a CBRecall request passed the Open reply with
 5294                  * the delegation in it when transitting the network.
 5295                  * Since nfsrv_docallback will sleep, don't use stp after
 5296                  * the call.
 5297                  */
 5298                 NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
 5299                     sizeof (tstateid));
 5300                 NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
 5301                     sizeof (tfh));
 5302                 NFSUNLOCKSTATE();
 5303                 if (*haslockp) {
 5304                         *haslockp = 0;
 5305                         NFSLOCKV4ROOTMUTEX();
 5306                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5307                         NFSUNLOCKV4ROOTMUTEX();
 5308                 }
 5309                 retrycnt = 0;
 5310                 do {
 5311                     error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
 5312                         &tstateid, 0, &tfh, NULL, NULL, 0, p);
 5313                     retrycnt++;
 5314                 } while ((error == NFSERR_BADSTATEID ||
 5315                     error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
 5316                 error = NFSERR_DELAY;
 5317                 goto out;
 5318         }
 5319 
 5320         if (clp->lc_expiry >= NFSD_MONOSEC &&
 5321             stp->ls_delegtime >= NFSD_MONOSEC) {
 5322                 NFSUNLOCKSTATE();
 5323                 /*
 5324                  * A recall has been done, but it has not yet expired.
 5325                  * So, RETURN_DELAY.
 5326                  */
 5327                 if (*haslockp) {
 5328                         *haslockp = 0;
 5329                         NFSLOCKV4ROOTMUTEX();
 5330                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5331                         NFSUNLOCKV4ROOTMUTEX();
 5332                 }
 5333                 error = NFSERR_DELAY;
 5334                 goto out;
 5335         }
 5336 
 5337         /*
 5338          * If we don't yet have the lock, just get it and then return,
 5339          * since we need that before deleting expired state, such as
 5340          * this delegation.
 5341          * When getting the lock, unlock the vnode, so other nfsds that
 5342          * are in progress, won't get stuck waiting for the vnode lock.
 5343          */
 5344         if (*haslockp == 0) {
 5345                 NFSUNLOCKSTATE();
 5346                 if (vp != NULL) {
 5347                         lktype = NFSVOPISLOCKED(vp);
 5348                         NFSVOPUNLOCK(vp);
 5349                 }
 5350                 NFSLOCKV4ROOTMUTEX();
 5351                 nfsv4_relref(&nfsv4rootfs_lock);
 5352                 do {
 5353                         gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 5354                             NFSV4ROOTLOCKMUTEXPTR, NULL);
 5355                 } while (!gotlock);
 5356                 NFSUNLOCKV4ROOTMUTEX();
 5357                 *haslockp = 1;
 5358                 if (vp != NULL) {
 5359                         NFSVOPLOCK(vp, lktype | LK_RETRY);
 5360                         if (VN_IS_DOOMED(vp)) {
 5361                                 *haslockp = 0;
 5362                                 NFSLOCKV4ROOTMUTEX();
 5363                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5364                                 NFSUNLOCKV4ROOTMUTEX();
 5365                                 error = NFSERR_PERM;
 5366                                 goto out;
 5367                         }
 5368                 }
 5369                 error = -1;
 5370                 goto out;
 5371         }
 5372 
 5373         NFSUNLOCKSTATE();
 5374         /*
 5375          * Ok, we can delete the expired delegation.
 5376          * First, write the Revoke record to stable storage and then
 5377          * clear out the conflict.
 5378          * Since all other nfsd threads are now blocked, we can safely
 5379          * sleep without the state changing.
 5380          */
 5381         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 5382         nfsrv_backupstable();
 5383         if (clp->lc_expiry < NFSD_MONOSEC) {
 5384                 nfsrv_cleanclient(clp, p);
 5385                 nfsrv_freedeleglist(&clp->lc_deleg);
 5386                 nfsrv_freedeleglist(&clp->lc_olddeleg);
 5387                 LIST_REMOVE(clp, lc_hash);
 5388                 zapped_clp = 1;
 5389         } else {
 5390                 nfsrv_freedeleg(stp);
 5391                 zapped_clp = 0;
 5392         }
 5393         if (zapped_clp)
 5394                 nfsrv_zapclient(clp, p);
 5395         error = -1;
 5396 
 5397 out:
 5398         NFSEXITCODE(error);
 5399         return (error);
 5400 }
 5401 
 5402 /*
 5403  * Check for a remove allowed, if remove is set to 1 and get rid of
 5404  * delegations.
 5405  */
 5406 int
 5407 nfsrv_checkremove(vnode_t vp, int remove, struct nfsrv_descript *nd,
 5408     nfsquad_t clientid, NFSPROC_T *p)
 5409 {
 5410         struct nfsclient *clp;
 5411         struct nfsstate *stp;
 5412         struct nfslockfile *lfp;
 5413         int error, haslock = 0;
 5414         fhandle_t nfh;
 5415 
 5416         clp = NULL;
 5417         /*
 5418          * First, get the lock file structure.
 5419          * (A return of -1 means no associated state, so remove ok.)
 5420          */
 5421         error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
 5422 tryagain:
 5423         NFSLOCKSTATE();
 5424         if (error == 0 && clientid.qval != 0)
 5425                 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 5426                     (nfsquad_t)((u_quad_t)0), 0, nd, p);
 5427         if (!error)
 5428                 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
 5429         if (error) {
 5430                 NFSUNLOCKSTATE();
 5431                 if (haslock) {
 5432                         NFSLOCKV4ROOTMUTEX();
 5433                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5434                         NFSUNLOCKV4ROOTMUTEX();
 5435                 }
 5436                 if (error == -1)
 5437                         error = 0;
 5438                 goto out;
 5439         }
 5440 
 5441         /*
 5442          * Now, we must Recall any delegations.
 5443          */
 5444         error = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
 5445         if (error) {
 5446                 /*
 5447                  * nfsrv_cleandeleg() unlocks state for non-zero
 5448                  * return.
 5449                  */
 5450                 if (error == -1)
 5451                         goto tryagain;
 5452                 if (haslock) {
 5453                         NFSLOCKV4ROOTMUTEX();
 5454                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5455                         NFSUNLOCKV4ROOTMUTEX();
 5456                 }
 5457                 goto out;
 5458         }
 5459 
 5460         /*
 5461          * Now, look for a conflicting open share.
 5462          */
 5463         if (remove) {
 5464                 /*
 5465                  * If the entry in the directory was the last reference to the
 5466                  * corresponding filesystem object, the object can be destroyed
 5467                  * */
 5468                 if(lfp->lf_usecount>1)
 5469                         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 5470                                 if (stp->ls_flags & NFSLCK_WRITEDENY) {
 5471                                         error = NFSERR_FILEOPEN;
 5472                                         break;
 5473                                 }
 5474                         }
 5475         }
 5476 
 5477         NFSUNLOCKSTATE();
 5478         if (haslock) {
 5479                 NFSLOCKV4ROOTMUTEX();
 5480                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5481                 NFSUNLOCKV4ROOTMUTEX();
 5482         }
 5483 
 5484 out:
 5485         NFSEXITCODE(error);
 5486         return (error);
 5487 }
 5488 
 5489 /*
 5490  * Clear out all delegations for the file referred to by lfp.
 5491  * May return NFSERR_DELAY, if there will be a delay waiting for
 5492  * delegations to expire.
 5493  * Returns -1 to indicate it slept while recalling a delegation.
 5494  * This function has the side effect of deleting the nfslockfile structure,
 5495  * if it no longer has associated state and didn't have to sleep.
 5496  * Unlocks State before a non-zero value is returned.
 5497  */
 5498 static int
 5499 nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
 5500     struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
 5501 {
 5502         struct nfsstate *stp, *nstp;
 5503         int ret = 0;
 5504 
 5505         stp = LIST_FIRST(&lfp->lf_deleg);
 5506         while (stp != LIST_END(&lfp->lf_deleg)) {
 5507                 nstp = LIST_NEXT(stp, ls_file);
 5508                 if (stp->ls_clp != clp) {
 5509                         ret = nfsrv_delegconflict(stp, haslockp, p, vp);
 5510                         if (ret) {
 5511                                 /*
 5512                                  * nfsrv_delegconflict() unlocks state
 5513                                  * when it returns non-zero.
 5514                                  */
 5515                                 goto out;
 5516                         }
 5517                 }
 5518                 stp = nstp;
 5519         }
 5520 out:
 5521         NFSEXITCODE(ret);
 5522         return (ret);
 5523 }
 5524 
 5525 /*
 5526  * There are certain operations that, when being done outside of NFSv4,
 5527  * require that any NFSv4 delegation for the file be recalled.
 5528  * This function is to be called for those cases:
 5529  * VOP_RENAME() - When a delegation is being recalled for any reason,
 5530  *      the client may have to do Opens against the server, using the file's
 5531  *      final component name. If the file has been renamed on the server,
 5532  *      that component name will be incorrect and the Open will fail.
 5533  * VOP_REMOVE() - Theoretically, a client could Open a file after it has
 5534  *      been removed on the server, if there is a delegation issued to
 5535  *      that client for the file. I say "theoretically" since clients
 5536  *      normally do an Access Op before the Open and that Access Op will
 5537  *      fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
 5538  *      they will detect the file's removal in the same manner. (There is
 5539  *      one case where RFC3530 allows a client to do an Open without first
 5540  *      doing an Access Op, which is passage of a check against the ACE
 5541  *      returned with a Write delegation, but current practice is to ignore
 5542  *      the ACE and always do an Access Op.)
 5543  *      Since the functions can only be called with an unlocked vnode, this
 5544  *      can't be done at this time.
 5545  * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
 5546  *      locks locally in the client, which are not visible to the server. To
 5547  *      deal with this, issuing of delegations for a vnode must be disabled
 5548  *      and all delegations for the vnode recalled. This is done via the
 5549  *      second function, using the VV_DISABLEDELEG vflag on the vnode.
 5550  */
 5551 void
 5552 nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
 5553 {
 5554         time_t starttime;
 5555         int error;
 5556 
 5557         /*
 5558          * First, check to see if the server is currently running and it has
 5559          * been called for a regular file when issuing delegations.
 5560          */
 5561         if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
 5562             nfsrv_issuedelegs == 0)
 5563                 return;
 5564 
 5565         KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
 5566         /*
 5567          * First, get a reference on the nfsv4rootfs_lock so that an
 5568          * exclusive lock cannot be acquired by another thread.
 5569          */
 5570         NFSLOCKV4ROOTMUTEX();
 5571         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 5572         NFSUNLOCKV4ROOTMUTEX();
 5573 
 5574         /*
 5575          * Now, call nfsrv_checkremove() in a loop while it returns
 5576          * NFSERR_DELAY. Return upon any other error or when timed out.
 5577          */
 5578         starttime = NFSD_MONOSEC;
 5579         do {
 5580                 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
 5581                         error = nfsrv_checkremove(vp, 0, NULL,
 5582                             (nfsquad_t)((u_quad_t)0), p);
 5583                         NFSVOPUNLOCK(vp);
 5584                 } else
 5585                         error = EPERM;
 5586                 if (error == NFSERR_DELAY) {
 5587                         if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
 5588                                 break;
 5589                         /* Sleep for a short period of time */
 5590                         (void) nfs_catnap(PZERO, 0, "nfsremove");
 5591                 }
 5592         } while (error == NFSERR_DELAY);
 5593         NFSLOCKV4ROOTMUTEX();
 5594         nfsv4_relref(&nfsv4rootfs_lock);
 5595         NFSUNLOCKV4ROOTMUTEX();
 5596 }
 5597 
 5598 void
 5599 nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
 5600 {
 5601 
 5602 #ifdef VV_DISABLEDELEG
 5603         /*
 5604          * First, flag issuance of delegations disabled.
 5605          */
 5606         atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
 5607 #endif
 5608 
 5609         /*
 5610          * Then call nfsd_recalldelegation() to get rid of all extant
 5611          * delegations.
 5612          */
 5613         nfsd_recalldelegation(vp, p);
 5614 }
 5615 
 5616 /*
 5617  * Check for conflicting locks, etc. and then get rid of delegations.
 5618  * (At one point I thought that I should get rid of delegations for any
 5619  *  Setattr, since it could potentially disallow the I/O op (read or write)
 5620  *  allowed by the delegation. However, Setattr Ops that aren't changing
 5621  *  the size get a stateid of all 0s, so you can't tell if it is a delegation
 5622  *  for the same client or a different one, so I decided to only get rid
 5623  *  of delegations for other clients when the size is being changed.)
 5624  * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
 5625  * as Write backs, even if there is no delegation, so it really isn't any
 5626  * different?)
 5627  */
 5628 int
 5629 nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
 5630     nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
 5631     struct nfsexstuff *exp, NFSPROC_T *p)
 5632 {
 5633         struct nfsstate st, *stp = &st;
 5634         struct nfslock lo, *lop = &lo;
 5635         int error = 0;
 5636         nfsquad_t clientid;
 5637 
 5638         if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
 5639                 stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
 5640                 lop->lo_first = nvap->na_size;
 5641         } else {
 5642                 stp->ls_flags = 0;
 5643                 lop->lo_first = 0;
 5644         }
 5645         if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
 5646             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
 5647             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
 5648             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
 5649                 stp->ls_flags |= NFSLCK_SETATTR;
 5650         if (stp->ls_flags == 0)
 5651                 goto out;
 5652         lop->lo_end = NFS64BITSSET;
 5653         lop->lo_flags = NFSLCK_WRITE;
 5654         stp->ls_ownerlen = 0;
 5655         stp->ls_op = NULL;
 5656         stp->ls_uid = nd->nd_cred->cr_uid;
 5657         stp->ls_stateid.seqid = stateidp->seqid;
 5658         clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
 5659         clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
 5660         stp->ls_stateid.other[2] = stateidp->other[2];
 5661         error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 5662             stateidp, exp, nd, p);
 5663 
 5664 out:
 5665         NFSEXITCODE2(error, nd);
 5666         return (error);
 5667 }
 5668 
 5669 /*
 5670  * Check for a write delegation and do a CBGETATTR if there is one, updating
 5671  * the attributes, as required.
 5672  * Should I return an error if I can't get the attributes? (For now, I'll
 5673  * just return ok.
 5674  */
 5675 int
 5676 nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
 5677     struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
 5678 {
 5679         struct nfsstate *stp;
 5680         struct nfslockfile *lfp;
 5681         struct nfsclient *clp;
 5682         struct nfsvattr nva;
 5683         fhandle_t nfh;
 5684         int error = 0;
 5685         nfsattrbit_t cbbits;
 5686         u_quad_t delegfilerev;
 5687 
 5688         NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
 5689         if (!NFSNONZERO_ATTRBIT(&cbbits))
 5690                 goto out;
 5691         if (nfsrv_writedelegcnt == 0)
 5692                 goto out;
 5693 
 5694         /*
 5695          * Get the lock file structure.
 5696          * (A return of -1 means no associated state, so return ok.)
 5697          */
 5698         error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
 5699         NFSLOCKSTATE();
 5700         if (!error)
 5701                 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
 5702         if (error) {
 5703                 NFSUNLOCKSTATE();
 5704                 if (error == -1)
 5705                         error = 0;
 5706                 goto out;
 5707         }
 5708 
 5709         /*
 5710          * Now, look for a write delegation.
 5711          */
 5712         LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 5713                 if (stp->ls_flags & NFSLCK_DELEGWRITE)
 5714                         break;
 5715         }
 5716         if (stp == LIST_END(&lfp->lf_deleg)) {
 5717                 NFSUNLOCKSTATE();
 5718                 goto out;
 5719         }
 5720         clp = stp->ls_clp;
 5721 
 5722         /* If the clientid is not confirmed, ignore the delegation. */
 5723         if (clp->lc_flags & LCL_NEEDSCONFIRM) {
 5724                 NFSUNLOCKSTATE();
 5725                 goto out;
 5726         }
 5727 
 5728         delegfilerev = stp->ls_filerev;
 5729         /*
 5730          * If the Write delegation was issued as a part of this Compound RPC
 5731          * or if we have an Implied Clientid (used in a previous Op in this
 5732          * compound) and it is the client the delegation was issued to,
 5733          * just return ok.
 5734          * I also assume that it is from the same client iff the network
 5735          * host IP address is the same as the callback address. (Not
 5736          * exactly correct by the RFC, but avoids a lot of Getattr
 5737          * callbacks.)
 5738          */
 5739         if (nd->nd_compref == stp->ls_compref ||
 5740             ((nd->nd_flag & ND_IMPLIEDCLID) &&
 5741              clp->lc_clientid.qval == nd->nd_clientid.qval) ||
 5742              nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
 5743                 NFSUNLOCKSTATE();
 5744                 goto out;
 5745         }
 5746 
 5747         /*
 5748          * We are now done with the delegation state structure,
 5749          * so the statelock can be released and we can now tsleep().
 5750          */
 5751 
 5752         /*
 5753          * Now, we must do the CB Getattr callback, to see if Change or Size
 5754          * has changed.
 5755          */
 5756         if (clp->lc_expiry >= NFSD_MONOSEC) {
 5757                 NFSUNLOCKSTATE();
 5758                 NFSVNO_ATTRINIT(&nva);
 5759                 nva.na_filerev = NFS64BITSSET;
 5760                 error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
 5761                     0, &nfh, &nva, &cbbits, 0, p);
 5762                 if (!error) {
 5763                         if ((nva.na_filerev != NFS64BITSSET &&
 5764                             nva.na_filerev > delegfilerev) ||
 5765                             (NFSVNO_ISSETSIZE(&nva) &&
 5766                              nva.na_size != nvap->na_size)) {
 5767                                 error = nfsvno_updfilerev(vp, nvap, nd, p);
 5768                                 if (NFSVNO_ISSETSIZE(&nva))
 5769                                         nvap->na_size = nva.na_size;
 5770                         }
 5771                 } else
 5772                         error = 0;      /* Ignore callback errors for now. */
 5773         } else {
 5774                 NFSUNLOCKSTATE();
 5775         }
 5776 
 5777 out:
 5778         NFSEXITCODE2(error, nd);
 5779         return (error);
 5780 }
 5781 
 5782 /*
 5783  * This function looks for openowners that haven't had any opens for
 5784  * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
 5785  * is set.
 5786  */
 5787 void
 5788 nfsrv_throwawayopens(NFSPROC_T *p)
 5789 {
 5790         struct nfsclient *clp, *nclp;
 5791         struct nfsstate *stp, *nstp;
 5792         int i;
 5793 
 5794         NFSLOCKSTATE();
 5795         nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
 5796         /*
 5797          * For each client...
 5798          */
 5799         for (i = 0; i < nfsrv_clienthashsize; i++) {
 5800             LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
 5801                 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
 5802                         if (LIST_EMPTY(&stp->ls_open) &&
 5803                             (stp->ls_noopens > NFSNOOPEN ||
 5804                              (nfsrv_openpluslock * 2) >
 5805                              nfsrv_v4statelimit))
 5806                                 nfsrv_freeopenowner(stp, 0, p);
 5807                 }
 5808             }
 5809         }
 5810         NFSUNLOCKSTATE();
 5811 }
 5812 
 5813 /*
 5814  * This function checks to see if the credentials are the same.
 5815  * Returns 1 for not same, 0 otherwise.
 5816  */
 5817 static int
 5818 nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
 5819 {
 5820 
 5821         if (nd->nd_flag & ND_GSS) {
 5822                 if (!(clp->lc_flags & LCL_GSS))
 5823                         return (1);
 5824                 if (clp->lc_flags & LCL_NAME) {
 5825                         if (nd->nd_princlen != clp->lc_namelen ||
 5826                             NFSBCMP(nd->nd_principal, clp->lc_name,
 5827                                 clp->lc_namelen))
 5828                                 return (1);
 5829                         else
 5830                                 return (0);
 5831                 }
 5832                 if (nd->nd_cred->cr_uid == clp->lc_uid)
 5833                         return (0);
 5834                 else
 5835                         return (1);
 5836         } else if (clp->lc_flags & LCL_GSS)
 5837                 return (1);
 5838         /*
 5839          * For AUTH_SYS, allow the same uid or root. (This is underspecified
 5840          * in RFC3530, which talks about principals, but doesn't say anything
 5841          * about uids for AUTH_SYS.)
 5842          */
 5843         if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
 5844                 return (0);
 5845         else
 5846                 return (1);
 5847 }
 5848 
 5849 /*
 5850  * Calculate the lease expiry time.
 5851  */
 5852 static time_t
 5853 nfsrv_leaseexpiry(void)
 5854 {
 5855 
 5856         if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
 5857                 return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
 5858         return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
 5859 }
 5860 
 5861 /*
 5862  * Delay the delegation timeout as far as ls_delegtimelimit, as required.
 5863  */
 5864 static void
 5865 nfsrv_delaydelegtimeout(struct nfsstate *stp)
 5866 {
 5867 
 5868         if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
 5869                 return;
 5870 
 5871         if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
 5872             stp->ls_delegtime < stp->ls_delegtimelimit) {
 5873                 stp->ls_delegtime += nfsrv_lease;
 5874                 if (stp->ls_delegtime > stp->ls_delegtimelimit)
 5875                         stp->ls_delegtime = stp->ls_delegtimelimit;
 5876         }
 5877 }
 5878 
 5879 /*
 5880  * This function checks to see if there is any other state associated
 5881  * with the openowner for this Open.
 5882  * It returns 1 if there is no other state, 0 otherwise.
 5883  */
 5884 static int
 5885 nfsrv_nootherstate(struct nfsstate *stp)
 5886 {
 5887         struct nfsstate *tstp;
 5888 
 5889         LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
 5890                 if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
 5891                         return (0);
 5892         }
 5893         return (1);
 5894 }
 5895 
 5896 /*
 5897  * Create a list of lock deltas (changes to local byte range locking
 5898  * that can be rolled back using the list) and apply the changes via
 5899  * nfsvno_advlock(). Optionally, lock the list. It is expected that either
 5900  * the rollback or update function will be called after this.
 5901  * It returns an error (and rolls back, as required), if any nfsvno_advlock()
 5902  * call fails. If it returns an error, it will unlock the list.
 5903  */
 5904 static int
 5905 nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
 5906     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
 5907 {
 5908         struct nfslock *lop, *nlop;
 5909         int error = 0;
 5910 
 5911         /* Loop through the list of locks. */
 5912         lop = LIST_FIRST(&lfp->lf_locallock);
 5913         while (first < end && lop != NULL) {
 5914                 nlop = LIST_NEXT(lop, lo_lckowner);
 5915                 if (first >= lop->lo_end) {
 5916                         /* not there yet */
 5917                         lop = nlop;
 5918                 } else if (first < lop->lo_first) {
 5919                         /* new one starts before entry in list */
 5920                         if (end <= lop->lo_first) {
 5921                                 /* no overlap between old and new */
 5922                                 error = nfsrv_dolocal(vp, lfp, flags,
 5923                                     NFSLCK_UNLOCK, first, end, cfp, p);
 5924                                 if (error != 0)
 5925                                         break;
 5926                                 first = end;
 5927                         } else {
 5928                                 /* handle fragment overlapped with new one */
 5929                                 error = nfsrv_dolocal(vp, lfp, flags,
 5930                                     NFSLCK_UNLOCK, first, lop->lo_first, cfp,
 5931                                     p);
 5932                                 if (error != 0)
 5933                                         break;
 5934                                 first = lop->lo_first;
 5935                         }
 5936                 } else {
 5937                         /* new one overlaps this entry in list */
 5938                         if (end <= lop->lo_end) {
 5939                                 /* overlaps all of new one */
 5940                                 error = nfsrv_dolocal(vp, lfp, flags,
 5941                                     lop->lo_flags, first, end, cfp, p);
 5942                                 if (error != 0)
 5943                                         break;
 5944                                 first = end;
 5945                         } else {
 5946                                 /* handle fragment overlapped with new one */
 5947                                 error = nfsrv_dolocal(vp, lfp, flags,
 5948                                     lop->lo_flags, first, lop->lo_end, cfp, p);
 5949                                 if (error != 0)
 5950                                         break;
 5951                                 first = lop->lo_end;
 5952                                 lop = nlop;
 5953                         }
 5954                 }
 5955         }
 5956         if (first < end && error == 0)
 5957                 /* handle fragment past end of list */
 5958                 error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
 5959                     end, cfp, p);
 5960 
 5961         NFSEXITCODE(error);
 5962         return (error);
 5963 }
 5964 
 5965 /*
 5966  * Local lock unlock. Unlock all byte ranges that are no longer locked
 5967  * by NFSv4. To do this, unlock any subranges of first-->end that
 5968  * do not overlap with the byte ranges of any lock in the lfp->lf_lock
 5969  * list. This list has all locks for the file held by other
 5970  * <clientid, lockowner> tuples. The list is ordered by increasing
 5971  * lo_first value, but may have entries that overlap each other, for
 5972  * the case of read locks.
 5973  */
 5974 static void
 5975 nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
 5976     uint64_t init_end, NFSPROC_T *p)
 5977 {
 5978         struct nfslock *lop;
 5979         uint64_t first, end, prevfirst __unused;
 5980 
 5981         first = init_first;
 5982         end = init_end;
 5983         while (first < init_end) {
 5984                 /* Loop through all nfs locks, adjusting first and end */
 5985                 prevfirst = 0;
 5986                 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
 5987                         KASSERT(prevfirst <= lop->lo_first,
 5988                             ("nfsv4 locks out of order"));
 5989                         KASSERT(lop->lo_first < lop->lo_end,
 5990                             ("nfsv4 bogus lock"));
 5991                         prevfirst = lop->lo_first;
 5992                         if (first >= lop->lo_first &&
 5993                             first < lop->lo_end)
 5994                                 /*
 5995                                  * Overlaps with initial part, so trim
 5996                                  * off that initial part by moving first past
 5997                                  * it.
 5998                                  */
 5999                                 first = lop->lo_end;
 6000                         else if (end > lop->lo_first &&
 6001                             lop->lo_first > first) {
 6002                                 /*
 6003                                  * This lock defines the end of the
 6004                                  * segment to unlock, so set end to the
 6005                                  * start of it and break out of the loop.
 6006                                  */
 6007                                 end = lop->lo_first;
 6008                                 break;
 6009                         }
 6010                         if (first >= end)
 6011                                 /*
 6012                                  * There is no segment left to do, so
 6013                                  * break out of this loop and then exit
 6014                                  * the outer while() since first will be set
 6015                                  * to end, which must equal init_end here.
 6016                                  */
 6017                                 break;
 6018                 }
 6019                 if (first < end) {
 6020                         /* Unlock this segment */
 6021                         (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
 6022                             NFSLCK_READ, first, end, NULL, p);
 6023                         nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
 6024                             first, end);
 6025                 }
 6026                 /*
 6027                  * Now move past this segment and look for any further
 6028                  * segment in the range, if there is one.
 6029                  */
 6030                 first = end;
 6031                 end = init_end;
 6032         }
 6033 }
 6034 
 6035 /*
 6036  * Do the local lock operation and update the rollback list, as required.
 6037  * Perform the rollback and return the error if nfsvno_advlock() fails.
 6038  */
 6039 static int
 6040 nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
 6041     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
 6042 {
 6043         struct nfsrollback *rlp;
 6044         int error = 0, ltype, oldltype;
 6045 
 6046         if (flags & NFSLCK_WRITE)
 6047                 ltype = F_WRLCK;
 6048         else if (flags & NFSLCK_READ)
 6049                 ltype = F_RDLCK;
 6050         else
 6051                 ltype = F_UNLCK;
 6052         if (oldflags & NFSLCK_WRITE)
 6053                 oldltype = F_WRLCK;
 6054         else if (oldflags & NFSLCK_READ)
 6055                 oldltype = F_RDLCK;
 6056         else
 6057                 oldltype = F_UNLCK;
 6058         if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
 6059                 /* nothing to do */
 6060                 goto out;
 6061         error = nfsvno_advlock(vp, ltype, first, end, p);
 6062         if (error != 0) {
 6063                 if (cfp != NULL) {
 6064                         cfp->cl_clientid.lval[0] = 0;
 6065                         cfp->cl_clientid.lval[1] = 0;
 6066                         cfp->cl_first = 0;
 6067                         cfp->cl_end = NFS64BITSSET;
 6068                         cfp->cl_flags = NFSLCK_WRITE;
 6069                         cfp->cl_ownerlen = 5;
 6070                         NFSBCOPY("LOCAL", cfp->cl_owner, 5);
 6071                 }
 6072                 nfsrv_locallock_rollback(vp, lfp, p);
 6073         } else if (ltype != F_UNLCK) {
 6074                 rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
 6075                     M_WAITOK);
 6076                 rlp->rlck_first = first;
 6077                 rlp->rlck_end = end;
 6078                 rlp->rlck_type = oldltype;
 6079                 LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
 6080         }
 6081 
 6082 out:
 6083         NFSEXITCODE(error);
 6084         return (error);
 6085 }
 6086 
 6087 /*
 6088  * Roll back local lock changes and free up the rollback list.
 6089  */
 6090 static void
 6091 nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
 6092 {
 6093         struct nfsrollback *rlp, *nrlp;
 6094 
 6095         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
 6096                 (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
 6097                     rlp->rlck_end, p);
 6098                 free(rlp, M_NFSDROLLBACK);
 6099         }
 6100         LIST_INIT(&lfp->lf_rollback);
 6101 }
 6102 
 6103 /*
 6104  * Update local lock list and delete rollback list (ie now committed to the
 6105  * local locks). Most of the work is done by the internal function.
 6106  */
 6107 static void
 6108 nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
 6109     uint64_t end)
 6110 {
 6111         struct nfsrollback *rlp, *nrlp;
 6112         struct nfslock *new_lop, *other_lop;
 6113 
 6114         new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
 6115         if (flags & (NFSLCK_READ | NFSLCK_WRITE))
 6116                 other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
 6117                     M_WAITOK);
 6118         else
 6119                 other_lop = NULL;
 6120         new_lop->lo_flags = flags;
 6121         new_lop->lo_first = first;
 6122         new_lop->lo_end = end;
 6123         nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
 6124         if (new_lop != NULL)
 6125                 free(new_lop, M_NFSDLOCK);
 6126         if (other_lop != NULL)
 6127                 free(other_lop, M_NFSDLOCK);
 6128 
 6129         /* and get rid of the rollback list */
 6130         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
 6131                 free(rlp, M_NFSDROLLBACK);
 6132         LIST_INIT(&lfp->lf_rollback);
 6133 }
 6134 
 6135 /*
 6136  * Lock the struct nfslockfile for local lock updating.
 6137  */
 6138 static void
 6139 nfsrv_locklf(struct nfslockfile *lfp)
 6140 {
 6141         int gotlock;
 6142 
 6143         /* lf_usecount ensures *lfp won't be free'd */
 6144         lfp->lf_usecount++;
 6145         do {
 6146                 gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
 6147                     NFSSTATEMUTEXPTR, NULL);
 6148         } while (gotlock == 0);
 6149         lfp->lf_usecount--;
 6150 }
 6151 
 6152 /*
 6153  * Unlock the struct nfslockfile after local lock updating.
 6154  */
 6155 static void
 6156 nfsrv_unlocklf(struct nfslockfile *lfp)
 6157 {
 6158 
 6159         nfsv4_unlock(&lfp->lf_locallock_lck, 0);
 6160 }
 6161 
 6162 /*
 6163  * Clear out all state for the NFSv4 server.
 6164  * Must be called by a thread that can sleep when no nfsds are running.
 6165  */
 6166 void
 6167 nfsrv_throwawayallstate(NFSPROC_T *p)
 6168 {
 6169         struct nfsclient *clp, *nclp;
 6170         struct nfslockfile *lfp, *nlfp;
 6171         int i;
 6172 
 6173         /*
 6174          * For each client, clean out the state and then free the structure.
 6175          */
 6176         for (i = 0; i < nfsrv_clienthashsize; i++) {
 6177                 LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
 6178                         nfsrv_cleanclient(clp, p);
 6179                         nfsrv_freedeleglist(&clp->lc_deleg);
 6180                         nfsrv_freedeleglist(&clp->lc_olddeleg);
 6181                         free(clp->lc_stateid, M_NFSDCLIENT);
 6182                         free(clp, M_NFSDCLIENT);
 6183                 }
 6184         }
 6185 
 6186         /*
 6187          * Also, free up any remaining lock file structures.
 6188          */
 6189         for (i = 0; i < nfsrv_lockhashsize; i++) {
 6190                 LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
 6191                         printf("nfsd unload: fnd a lock file struct\n");
 6192                         nfsrv_freenfslockfile(lfp);
 6193                 }
 6194         }
 6195 
 6196         /* And get rid of the deviceid structures and layouts. */
 6197         nfsrv_freealllayoutsanddevids();
 6198 }
 6199 
 6200 /*
 6201  * Check the sequence# for the session and slot provided as an argument.
 6202  * Also, renew the lease if the session will return NFS_OK.
 6203  */
 6204 int
 6205 nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
 6206     uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
 6207     uint32_t *sflagsp, NFSPROC_T *p)
 6208 {
 6209         struct nfsdsession *sep;
 6210         struct nfssessionhash *shp;
 6211         int error;
 6212         SVCXPRT *savxprt;
 6213 
 6214         shp = NFSSESSIONHASH(nd->nd_sessionid);
 6215         NFSLOCKSESSION(shp);
 6216         sep = nfsrv_findsession(nd->nd_sessionid);
 6217         if (sep == NULL) {
 6218                 NFSUNLOCKSESSION(shp);
 6219                 return (NFSERR_BADSESSION);
 6220         }
 6221         error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
 6222             sep->sess_slots, NULL, NFSV4_SLOTS - 1);
 6223         if (error != 0) {
 6224                 NFSUNLOCKSESSION(shp);
 6225                 return (error);
 6226         }
 6227         if (cache_this != 0)
 6228                 nd->nd_flag |= ND_SAVEREPLY;
 6229         /* Renew the lease. */
 6230         sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
 6231         nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
 6232         nd->nd_flag |= ND_IMPLIEDCLID;
 6233 
 6234         /* Save maximum request and reply sizes. */
 6235         nd->nd_maxreq = sep->sess_maxreq;
 6236         nd->nd_maxresp = sep->sess_maxresp;
 6237 
 6238         /*
 6239          * If this session handles the backchannel, save the nd_xprt for this
 6240          * RPC, since this is the one being used.
 6241          * RFC-5661 specifies that the fore channel will be implicitly
 6242          * bound by a Sequence operation.  However, since some NFSv4.1 clients
 6243          * erroneously assumed that the back channel would be implicitly
 6244          * bound as well, do the implicit binding unless a
 6245          * BindConnectiontoSession has already been done on the session.
 6246          */
 6247         savxprt = NULL;
 6248         if (sep->sess_clp->lc_req.nr_client != NULL &&
 6249             sep->sess_cbsess.nfsess_xprt != nd->nd_xprt &&
 6250             (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0 &&
 6251             (sep->sess_clp->lc_flags & LCL_DONEBINDCONN) == 0) {
 6252                 NFSD_DEBUG(2,
 6253                     "nfsrv_checksequence: implicit back channel bind\n");
 6254                 savxprt = sep->sess_cbsess.nfsess_xprt;
 6255                 SVC_ACQUIRE(nd->nd_xprt);
 6256                 nd->nd_xprt->xp_p2 =
 6257                     sep->sess_clp->lc_req.nr_client->cl_private;
 6258                 nd->nd_xprt->xp_idletimeout = 0;        /* Disable timeout. */
 6259                 sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
 6260         }
 6261 
 6262         *sflagsp = 0;
 6263         if (sep->sess_clp->lc_req.nr_client == NULL)
 6264                 *sflagsp |= NFSV4SEQ_CBPATHDOWN;
 6265         NFSUNLOCKSESSION(shp);
 6266         if (savxprt != NULL)
 6267                 SVC_RELEASE(savxprt);
 6268         if (error == NFSERR_EXPIRED) {
 6269                 *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
 6270                 error = 0;
 6271         } else if (error == NFSERR_ADMINREVOKED) {
 6272                 *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
 6273                 error = 0;
 6274         }
 6275         *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
 6276         return (0);
 6277 }
 6278 
 6279 /*
 6280  * Check/set reclaim complete for this session/clientid.
 6281  */
 6282 int
 6283 nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs)
 6284 {
 6285         struct nfsdsession *sep;
 6286         struct nfssessionhash *shp;
 6287         int error = 0;
 6288 
 6289         shp = NFSSESSIONHASH(nd->nd_sessionid);
 6290         NFSLOCKSTATE();
 6291         NFSLOCKSESSION(shp);
 6292         sep = nfsrv_findsession(nd->nd_sessionid);
 6293         if (sep == NULL) {
 6294                 NFSUNLOCKSESSION(shp);
 6295                 NFSUNLOCKSTATE();
 6296                 return (NFSERR_BADSESSION);
 6297         }
 6298 
 6299         if (onefs != 0)
 6300                 sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS;
 6301                 /* Check to see if reclaim complete has already happened. */
 6302         else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
 6303                 error = NFSERR_COMPLETEALREADY;
 6304         else {
 6305                 sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
 6306                 nfsrv_markreclaim(sep->sess_clp);
 6307         }
 6308         NFSUNLOCKSESSION(shp);
 6309         NFSUNLOCKSTATE();
 6310         return (error);
 6311 }
 6312 
 6313 /*
 6314  * Cache the reply in a session slot.
 6315  */
 6316 void
 6317 nfsrv_cache_session(struct nfsrv_descript *nd, struct mbuf **m)
 6318 {
 6319         struct nfsdsession *sep;
 6320         struct nfssessionhash *shp;
 6321         char *buf, *cp;
 6322 #ifdef INET
 6323         struct sockaddr_in *sin;
 6324 #endif
 6325 #ifdef INET6
 6326         struct sockaddr_in6 *sin6;
 6327 #endif
 6328 
 6329         shp = NFSSESSIONHASH(nd->nd_sessionid);
 6330         NFSLOCKSESSION(shp);
 6331         sep = nfsrv_findsession(nd->nd_sessionid);
 6332         if (sep == NULL) {
 6333                 NFSUNLOCKSESSION(shp);
 6334                 if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
 6335                         buf = malloc(INET6_ADDRSTRLEN, M_TEMP, M_WAITOK);
 6336                         switch (nd->nd_nam->sa_family) {
 6337 #ifdef INET
 6338                         case AF_INET:
 6339                                 sin = (struct sockaddr_in *)nd->nd_nam;
 6340                                 cp = inet_ntop(sin->sin_family,
 6341                                     &sin->sin_addr.s_addr, buf,
 6342                                     INET6_ADDRSTRLEN);
 6343                                 break;
 6344 #endif
 6345 #ifdef INET6
 6346                         case AF_INET6:
 6347                                 sin6 = (struct sockaddr_in6 *)nd->nd_nam;
 6348                                 cp = inet_ntop(sin6->sin6_family,
 6349                                     &sin6->sin6_addr, buf, INET6_ADDRSTRLEN);
 6350                                 break;
 6351 #endif
 6352                         default:
 6353                                 cp = NULL;
 6354                         }
 6355                         if (cp != NULL)
 6356                                 printf("nfsrv_cache_session: no session "
 6357                                     "IPaddr=%s\n", cp);
 6358                         else
 6359                                 printf("nfsrv_cache_session: no session\n");
 6360                         free(buf, M_TEMP);
 6361                 }
 6362                 m_freem(*m);
 6363                 return;
 6364         }
 6365         nfsv4_seqsess_cacherep(nd->nd_slotid, sep->sess_slots, nd->nd_repstat,
 6366             m);
 6367         NFSUNLOCKSESSION(shp);
 6368 }
 6369 
 6370 /*
 6371  * Search for a session that matches the sessionid.
 6372  */
 6373 static struct nfsdsession *
 6374 nfsrv_findsession(uint8_t *sessionid)
 6375 {
 6376         struct nfsdsession *sep;
 6377         struct nfssessionhash *shp;
 6378 
 6379         shp = NFSSESSIONHASH(sessionid);
 6380         LIST_FOREACH(sep, &shp->list, sess_hash) {
 6381                 if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
 6382                         break;
 6383         }
 6384         return (sep);
 6385 }
 6386 
 6387 /*
 6388  * Destroy a session.
 6389  */
 6390 int
 6391 nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
 6392 {
 6393         int error, igotlock, samesess;
 6394 
 6395         samesess = 0;
 6396         if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) &&
 6397             (nd->nd_flag & ND_HASSEQUENCE) != 0) {
 6398                 samesess = 1;
 6399                 if ((nd->nd_flag & ND_LASTOP) == 0)
 6400                         return (NFSERR_BADSESSION);
 6401         }
 6402 
 6403         /* Lock out other nfsd threads */
 6404         NFSLOCKV4ROOTMUTEX();
 6405         nfsv4_relref(&nfsv4rootfs_lock);
 6406         do {
 6407                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 6408                     NFSV4ROOTLOCKMUTEXPTR, NULL);
 6409         } while (igotlock == 0);
 6410         NFSUNLOCKV4ROOTMUTEX();
 6411 
 6412         error = nfsrv_freesession(NULL, sessionid);
 6413         if (error == 0 && samesess != 0)
 6414                 nd->nd_flag &= ~ND_HASSEQUENCE;
 6415 
 6416         NFSLOCKV4ROOTMUTEX();
 6417         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 6418         NFSUNLOCKV4ROOTMUTEX();
 6419         return (error);
 6420 }
 6421 
 6422 /*
 6423  * Bind a connection to a session.
 6424  * For now, only certain variants are supported, since the current session
 6425  * structure can only handle a single backchannel entry, which will be
 6426  * applied to all connections if it is set.
 6427  */
 6428 int
 6429 nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp)
 6430 {
 6431         struct nfssessionhash *shp;
 6432         struct nfsdsession *sep;
 6433         struct nfsclient *clp;
 6434         SVCXPRT *savxprt;
 6435         int error;
 6436 
 6437         error = 0;
 6438         savxprt = NULL;
 6439         shp = NFSSESSIONHASH(sessionid);
 6440         NFSLOCKSTATE();
 6441         NFSLOCKSESSION(shp);
 6442         sep = nfsrv_findsession(sessionid);
 6443         if (sep != NULL) {
 6444                 clp = sep->sess_clp;
 6445                 if (*foreaftp == NFSCDFC4_BACK ||
 6446                     *foreaftp == NFSCDFC4_BACK_OR_BOTH ||
 6447                     *foreaftp == NFSCDFC4_FORE_OR_BOTH) {
 6448                         /* Try to set up a backchannel. */
 6449                         if (clp->lc_req.nr_client == NULL) {
 6450                                 NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire "
 6451                                     "backchannel\n");
 6452                                 clp->lc_req.nr_client = (struct __rpc_client *)
 6453                                     clnt_bck_create(nd->nd_xprt->xp_socket,
 6454                                     sep->sess_cbprogram, NFSV4_CBVERS);
 6455                         }
 6456                         if (clp->lc_req.nr_client != NULL) {
 6457                                 NFSD_DEBUG(2, "nfsrv_bindconnsess: set up "
 6458                                     "backchannel\n");
 6459                                 savxprt = sep->sess_cbsess.nfsess_xprt;
 6460                                 SVC_ACQUIRE(nd->nd_xprt);
 6461                                 nd->nd_xprt->xp_p2 =
 6462                                     clp->lc_req.nr_client->cl_private;
 6463                                 /* Disable idle timeout. */
 6464                                 nd->nd_xprt->xp_idletimeout = 0;
 6465                                 sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
 6466                                 sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN;
 6467                                 clp->lc_flags |= LCL_DONEBINDCONN;
 6468                                 if (*foreaftp == NFSCDFS4_BACK)
 6469                                         *foreaftp = NFSCDFS4_BACK;
 6470                                 else
 6471                                         *foreaftp = NFSCDFS4_BOTH;
 6472                         } else if (*foreaftp != NFSCDFC4_BACK) {
 6473                                 NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set "
 6474                                     "up backchannel\n");
 6475                                 sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
 6476                                 clp->lc_flags |= LCL_DONEBINDCONN;
 6477                                 *foreaftp = NFSCDFS4_FORE;
 6478                         } else {
 6479                                 error = NFSERR_NOTSUPP;
 6480                                 printf("nfsrv_bindconnsess: Can't add "
 6481                                     "backchannel\n");
 6482                         }
 6483                 } else {
 6484                         NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n");
 6485                         clp->lc_flags |= LCL_DONEBINDCONN;
 6486                         *foreaftp = NFSCDFS4_FORE;
 6487                 }
 6488         } else
 6489                 error = NFSERR_BADSESSION;
 6490         NFSUNLOCKSESSION(shp);
 6491         NFSUNLOCKSTATE();
 6492         if (savxprt != NULL)
 6493                 SVC_RELEASE(savxprt);
 6494         return (error);
 6495 }
 6496 
 6497 /*
 6498  * Free up a session structure.
 6499  */
 6500 static int
 6501 nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid)
 6502 {
 6503         struct nfssessionhash *shp;
 6504         int i;
 6505 
 6506         NFSLOCKSTATE();
 6507         if (sep == NULL) {
 6508                 shp = NFSSESSIONHASH(sessionid);
 6509                 NFSLOCKSESSION(shp);
 6510                 sep = nfsrv_findsession(sessionid);
 6511         } else {
 6512                 shp = NFSSESSIONHASH(sep->sess_sessionid);
 6513                 NFSLOCKSESSION(shp);
 6514         }
 6515         if (sep != NULL) {
 6516                 sep->sess_refcnt--;
 6517                 if (sep->sess_refcnt > 0) {
 6518                         NFSUNLOCKSESSION(shp);
 6519                         NFSUNLOCKSTATE();
 6520                         return (NFSERR_BACKCHANBUSY);
 6521                 }
 6522                 LIST_REMOVE(sep, sess_hash);
 6523                 LIST_REMOVE(sep, sess_list);
 6524         }
 6525         NFSUNLOCKSESSION(shp);
 6526         NFSUNLOCKSTATE();
 6527         if (sep == NULL)
 6528                 return (NFSERR_BADSESSION);
 6529         for (i = 0; i < NFSV4_SLOTS; i++)
 6530                 if (sep->sess_slots[i].nfssl_reply != NULL)
 6531                         m_freem(sep->sess_slots[i].nfssl_reply);
 6532         if (sep->sess_cbsess.nfsess_xprt != NULL)
 6533                 SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
 6534         free(sep, M_NFSDSESSION);
 6535         return (0);
 6536 }
 6537 
 6538 /*
 6539  * Free a stateid.
 6540  * RFC5661 says that it should fail when there are associated opens, locks
 6541  * or delegations. Since stateids represent opens, I don't see how you can
 6542  * free an open stateid (it will be free'd when closed), so this function
 6543  * only works for lock stateids (freeing the lock_owner) or delegations.
 6544  */
 6545 int
 6546 nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
 6547     NFSPROC_T *p)
 6548 {
 6549         struct nfsclient *clp;
 6550         struct nfsstate *stp;
 6551         int error;
 6552 
 6553         NFSLOCKSTATE();
 6554         /*
 6555          * Look up the stateid
 6556          */
 6557         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 6558             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 6559         if (error == 0) {
 6560                 /* First, check for a delegation. */
 6561                 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
 6562                         if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
 6563                             NFSX_STATEIDOTHER))
 6564                                 break;
 6565                 }
 6566                 if (stp != NULL) {
 6567                         nfsrv_freedeleg(stp);
 6568                         NFSUNLOCKSTATE();
 6569                         return (error);
 6570                 }
 6571         }
 6572         /* Not a delegation, try for a lock_owner. */
 6573         if (error == 0)
 6574                 error = nfsrv_getstate(clp, stateidp, 0, &stp);
 6575         if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
 6576             NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
 6577                 /* Not a lock_owner stateid. */
 6578                 error = NFSERR_LOCKSHELD;
 6579         if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
 6580                 error = NFSERR_LOCKSHELD;
 6581         if (error == 0)
 6582                 nfsrv_freelockowner(stp, NULL, 0, p);
 6583         NFSUNLOCKSTATE();
 6584         return (error);
 6585 }
 6586 
 6587 /*
 6588  * Test a stateid.
 6589  */
 6590 int
 6591 nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
 6592     NFSPROC_T *p)
 6593 {
 6594         struct nfsclient *clp;
 6595         struct nfsstate *stp;
 6596         int error;
 6597 
 6598         NFSLOCKSTATE();
 6599         /*
 6600          * Look up the stateid
 6601          */
 6602         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 6603             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 6604         if (error == 0)
 6605                 error = nfsrv_getstate(clp, stateidp, 0, &stp);
 6606         if (error == 0 && stateidp->seqid != 0 &&
 6607             SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid))
 6608                 error = NFSERR_OLDSTATEID;
 6609         NFSUNLOCKSTATE();
 6610         return (error);
 6611 }
 6612 
 6613 /*
 6614  * Generate the xdr for an NFSv4.1 CBSequence Operation.
 6615  */
 6616 static int
 6617 nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
 6618     int dont_replycache, struct nfsdsession **sepp)
 6619 {
 6620         struct nfsdsession *sep;
 6621         uint32_t *tl, slotseq = 0;
 6622         int maxslot, slotpos;
 6623         uint8_t sessionid[NFSX_V4SESSIONID];
 6624         int error;
 6625 
 6626         error = nfsv4_getcbsession(clp, sepp);
 6627         if (error != 0)
 6628                 return (error);
 6629         sep = *sepp;
 6630         (void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot,
 6631             &slotseq, sessionid);
 6632         KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
 6633 
 6634         /* Build the Sequence arguments. */
 6635         NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
 6636         bcopy(sessionid, tl, NFSX_V4SESSIONID);
 6637         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
 6638         nd->nd_slotseq = tl;
 6639         *tl++ = txdr_unsigned(slotseq);
 6640         *tl++ = txdr_unsigned(slotpos);
 6641         *tl++ = txdr_unsigned(maxslot);
 6642         if (dont_replycache == 0)
 6643                 *tl++ = newnfs_true;
 6644         else
 6645                 *tl++ = newnfs_false;
 6646         *tl = 0;                        /* No referring call list, for now. */
 6647         nd->nd_flag |= ND_HASSEQUENCE;
 6648         return (0);
 6649 }
 6650 
 6651 /*
 6652  * Get a session for the callback.
 6653  */
 6654 static int
 6655 nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
 6656 {
 6657         struct nfsdsession *sep;
 6658 
 6659         NFSLOCKSTATE();
 6660         LIST_FOREACH(sep, &clp->lc_session, sess_list) {
 6661                 if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
 6662                         break;
 6663         }
 6664         if (sep == NULL) {
 6665                 NFSUNLOCKSTATE();
 6666                 return (NFSERR_BADSESSION);
 6667         }
 6668         sep->sess_refcnt++;
 6669         *sepp = sep;
 6670         NFSUNLOCKSTATE();
 6671         return (0);
 6672 }
 6673 
 6674 /*
 6675  * Free up all backchannel xprts.  This needs to be done when the nfsd threads
 6676  * exit, since those transports will all be going away.
 6677  * This is only called after all the nfsd threads are done performing RPCs,
 6678  * so locking shouldn't be an issue.
 6679  */
 6680 void
 6681 nfsrv_freeallbackchannel_xprts(void)
 6682 {
 6683         struct nfsdsession *sep;
 6684         struct nfsclient *clp;
 6685         SVCXPRT *xprt;
 6686         int i;
 6687 
 6688         for (i = 0; i < nfsrv_clienthashsize; i++) {
 6689                 LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
 6690                         LIST_FOREACH(sep, &clp->lc_session, sess_list) {
 6691                                 xprt = sep->sess_cbsess.nfsess_xprt;
 6692                                 sep->sess_cbsess.nfsess_xprt = NULL;
 6693                                 if (xprt != NULL)
 6694                                         SVC_RELEASE(xprt);
 6695                         }
 6696                 }
 6697         }
 6698 }
 6699 
 6700 /*
 6701  * Do a layout commit.  Actually just call nfsrv_updatemdsattr().
 6702  * I have no idea if the rest of these arguments will ever be useful?
 6703  */
 6704 int
 6705 nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype,
 6706     int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len,
 6707     int hasnewmtime, struct timespec *newmtimep, int reclaim,
 6708     nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep,
 6709     uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p)
 6710 {
 6711         struct nfsvattr na;
 6712         int error;
 6713 
 6714         error = nfsrv_updatemdsattr(vp, &na, p);
 6715         if (error == 0) {
 6716                 *hasnewsizep = 1;
 6717                 *newsizep = na.na_size;
 6718         }
 6719         return (error);
 6720 }
 6721 
 6722 /*
 6723  * Try and get a layout.
 6724  */
 6725 int
 6726 nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp,
 6727     int layouttype, int *iomode, uint64_t *offset, uint64_t *len,
 6728     uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose,
 6729     int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p)
 6730 {
 6731         struct nfslayouthash *lhyp;
 6732         struct nfslayout *lyp;
 6733         char *devid;
 6734         fhandle_t fh, *dsfhp;
 6735         int error, mirrorcnt;
 6736 
 6737         if (nfsrv_devidcnt == 0)
 6738                 return (NFSERR_UNKNLAYOUTTYPE);
 6739 
 6740         if (*offset != 0)
 6741                 printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset,
 6742                     (uintmax_t)*len);
 6743         error = nfsvno_getfh(vp, &fh, p);
 6744         NFSD_DEBUG(4, "layoutget getfh=%d\n", error);
 6745         if (error != 0)
 6746                 return (error);
 6747 
 6748         /*
 6749          * For now, all layouts are for entire files.
 6750          * Only issue Read/Write layouts if requested for a non-readonly fs.
 6751          */
 6752         if (NFSVNO_EXRDONLY(exp)) {
 6753                 if (*iomode == NFSLAYOUTIOMODE_RW)
 6754                         return (NFSERR_LAYOUTTRYLATER);
 6755                 *iomode = NFSLAYOUTIOMODE_READ;
 6756         }
 6757         if (*iomode != NFSLAYOUTIOMODE_RW)
 6758                 *iomode = NFSLAYOUTIOMODE_READ;
 6759 
 6760         /*
 6761          * Check to see if a write layout can be issued for this file.
 6762          * This is used during mirror recovery to avoid RW layouts being
 6763          * issued for a file while it is being copied to the recovered
 6764          * mirror.
 6765          */
 6766         if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0)
 6767                 return (NFSERR_LAYOUTTRYLATER);
 6768 
 6769         *retonclose = 0;
 6770         *offset = 0;
 6771         *len = UINT64_MAX;
 6772 
 6773         /* First, see if a layout already exists and return if found. */
 6774         lhyp = NFSLAYOUTHASH(&fh);
 6775         NFSLOCKLAYOUT(lhyp);
 6776         error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp);
 6777         NFSD_DEBUG(4, "layoutget findlay=%d\n", error);
 6778         /*
 6779          * Not sure if the seqid must be the same, so I won't check it.
 6780          */
 6781         if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] ||
 6782             stateidp->other[1] != lyp->lay_stateid.other[1] ||
 6783             stateidp->other[2] != lyp->lay_stateid.other[2])) {
 6784                 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
 6785                         NFSUNLOCKLAYOUT(lhyp);
 6786                         NFSD_DEBUG(1, "ret bad stateid\n");
 6787                         return (NFSERR_BADSTATEID);
 6788                 }
 6789                 /*
 6790                  * I believe we get here because there is a race between
 6791                  * the client processing the CBLAYOUTRECALL and the layout
 6792                  * being deleted here on the server.
 6793                  * The client has now done a LayoutGet with a non-layout
 6794                  * stateid, as it would when there is no layout.
 6795                  * As such, free this layout and set error == NFSERR_BADSTATEID
 6796                  * so the code below will create a new layout structure as
 6797                  * would happen if no layout was found.
 6798                  * "lyp" will be set before being used below, but set it NULL
 6799                  * as a safety belt.
 6800                  */
 6801                 nfsrv_freelayout(&lhyp->list, lyp);
 6802                 lyp = NULL;
 6803                 error = NFSERR_BADSTATEID;
 6804         }
 6805         if (error == 0) {
 6806                 if (lyp->lay_layoutlen > maxcnt) {
 6807                         NFSUNLOCKLAYOUT(lhyp);
 6808                         NFSD_DEBUG(1, "ret layout too small\n");
 6809                         return (NFSERR_TOOSMALL);
 6810                 }
 6811                 if (*iomode == NFSLAYOUTIOMODE_RW)
 6812                         lyp->lay_flags |= NFSLAY_RW;
 6813                 else
 6814                         lyp->lay_flags |= NFSLAY_READ;
 6815                 NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
 6816                 *layoutlenp = lyp->lay_layoutlen;
 6817                 if (++lyp->lay_stateid.seqid == 0)
 6818                         lyp->lay_stateid.seqid = 1;
 6819                 stateidp->seqid = lyp->lay_stateid.seqid;
 6820                 NFSUNLOCKLAYOUT(lhyp);
 6821                 NFSD_DEBUG(4, "ret fnd layout\n");
 6822                 return (0);
 6823         }
 6824         NFSUNLOCKLAYOUT(lhyp);
 6825 
 6826         /* Find the device id and file handle. */
 6827         dsfhp = malloc(sizeof(fhandle_t) * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
 6828         devid = malloc(NFSX_V4DEVICEID * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
 6829         error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, dsfhp, devid);
 6830         NFSD_DEBUG(4, "layoutget devandfh=%d\n", error);
 6831         if (error == 0) {
 6832                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
 6833                         if (NFSX_V4FILELAYOUT > maxcnt)
 6834                                 error = NFSERR_TOOSMALL;
 6835                         else
 6836                                 lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp,
 6837                                     devid, vp->v_mount->mnt_stat.f_fsid);
 6838                 } else {
 6839                         if (NFSX_V4FLEXLAYOUT(mirrorcnt) > maxcnt)
 6840                                 error = NFSERR_TOOSMALL;
 6841                         else
 6842                                 lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt,
 6843                                     &fh, dsfhp, devid,
 6844                                     vp->v_mount->mnt_stat.f_fsid);
 6845                 }
 6846         }
 6847         free(dsfhp, M_TEMP);
 6848         free(devid, M_TEMP);
 6849         if (error != 0)
 6850                 return (error);
 6851 
 6852         /*
 6853          * Now, add this layout to the list.
 6854          */
 6855         error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p);
 6856         NFSD_DEBUG(4, "layoutget addl=%d\n", error);
 6857         /*
 6858          * The lyp will be set to NULL by nfsrv_addlayout() if it
 6859          * linked the new structure into the lists.
 6860          */
 6861         free(lyp, M_NFSDSTATE);
 6862         return (error);
 6863 }
 6864 
 6865 /*
 6866  * Generate a File Layout.
 6867  */
 6868 static struct nfslayout *
 6869 nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
 6870     fhandle_t *dsfhp, char *devid, fsid_t fs)
 6871 {
 6872         uint32_t *tl;
 6873         struct nfslayout *lyp;
 6874         uint64_t pattern_offset;
 6875 
 6876         lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE,
 6877             M_WAITOK | M_ZERO);
 6878         lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES;
 6879         if (iomode == NFSLAYOUTIOMODE_RW)
 6880                 lyp->lay_flags = NFSLAY_RW;
 6881         else
 6882                 lyp->lay_flags = NFSLAY_READ;
 6883         NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
 6884         lyp->lay_clientid.qval = nd->nd_clientid.qval;
 6885         lyp->lay_fsid = fs;
 6886 
 6887         /* Fill in the xdr for the files layout. */
 6888         tl = (uint32_t *)lyp->lay_xdr;
 6889         NFSBCOPY(devid, tl, NFSX_V4DEVICEID);           /* Device ID. */
 6890         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 6891 
 6892         /*
 6893          * Make the stripe size as many 64K blocks as will fit in the stripe
 6894          * mask. Since there is only one stripe, the stripe size doesn't really
 6895          * matter, except that the Linux client will only handle an exact
 6896          * multiple of their PAGE_SIZE (usually 4K).  I chose 64K as a value
 6897          * that should cover most/all arches w.r.t. PAGE_SIZE.
 6898          */
 6899         *tl++ = txdr_unsigned(NFSFLAYUTIL_STRIPE_MASK & ~0xffff);
 6900         *tl++ = 0;                                      /* 1st stripe index. */
 6901         pattern_offset = 0;
 6902         txdr_hyper(pattern_offset, tl); tl += 2;        /* Pattern offset. */
 6903         *tl++ = txdr_unsigned(1);                       /* 1 file handle. */
 6904         *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
 6905         NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
 6906         lyp->lay_layoutlen = NFSX_V4FILELAYOUT;
 6907         return (lyp);
 6908 }
 6909 
 6910 #define FLEX_OWNERID    "999"
 6911 #define FLEX_UID0       ""
 6912 /*
 6913  * Generate a Flex File Layout.
 6914  * The FLEX_OWNERID can be any string of 3 decimal digits. Although this
 6915  * string goes on the wire, it isn't supposed to be used by the client,
 6916  * since this server uses tight coupling.
 6917  * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use
 6918  * a string of "". This works around the Linux Flex File Layout driver bug
 6919  * which uses the synthetic uid/gid strings for the "tightly coupled" case.
 6920  */
 6921 static struct nfslayout *
 6922 nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt,
 6923     fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs)
 6924 {
 6925         uint32_t *tl;
 6926         struct nfslayout *lyp;
 6927         uint64_t lenval;
 6928         int i;
 6929 
 6930         lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FLEXLAYOUT(mirrorcnt),
 6931             M_NFSDSTATE, M_WAITOK | M_ZERO);
 6932         lyp->lay_type = NFSLAYOUT_FLEXFILE;
 6933         if (iomode == NFSLAYOUTIOMODE_RW)
 6934                 lyp->lay_flags = NFSLAY_RW;
 6935         else
 6936                 lyp->lay_flags = NFSLAY_READ;
 6937         NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
 6938         lyp->lay_clientid.qval = nd->nd_clientid.qval;
 6939         lyp->lay_fsid = fs;
 6940         lyp->lay_mirrorcnt = mirrorcnt;
 6941 
 6942         /* Fill in the xdr for the files layout. */
 6943         tl = (uint32_t *)lyp->lay_xdr;
 6944         lenval = 0;
 6945         txdr_hyper(lenval, tl); tl += 2;                /* Stripe unit. */
 6946         *tl++ = txdr_unsigned(mirrorcnt);               /* # of mirrors. */
 6947         for (i = 0; i < mirrorcnt; i++) {
 6948                 *tl++ = txdr_unsigned(1);               /* One stripe. */
 6949                 NFSBCOPY(devid, tl, NFSX_V4DEVICEID);   /* Device ID. */
 6950                 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 6951                 devid += NFSX_V4DEVICEID;
 6952                 *tl++ = txdr_unsigned(1);               /* Efficiency. */
 6953                 *tl++ = 0;                              /* Proxy Stateid. */
 6954                 *tl++ = 0x55555555;
 6955                 *tl++ = 0x55555555;
 6956                 *tl++ = 0x55555555;
 6957                 *tl++ = txdr_unsigned(1);               /* 1 file handle. */
 6958                 *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
 6959                 NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
 6960                 tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED);
 6961                 dsfhp++;
 6962                 if (nfsrv_flexlinuxhack != 0) {
 6963                         *tl++ = txdr_unsigned(strlen(FLEX_UID0));
 6964                         *tl = 0;                /* 0 pad string. */
 6965                         NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
 6966                         *tl++ = txdr_unsigned(strlen(FLEX_UID0));
 6967                         *tl = 0;                /* 0 pad string. */
 6968                         NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
 6969                 } else {
 6970                         *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
 6971                         NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
 6972                         *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
 6973                         NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
 6974                 }
 6975         }
 6976         *tl++ = txdr_unsigned(0);               /* ff_flags. */
 6977         *tl = txdr_unsigned(60);                /* Status interval hint. */
 6978         lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt);
 6979         return (lyp);
 6980 }
 6981 
 6982 /*
 6983  * Parse and process Flex File errors returned via LayoutReturn.
 6984  */
 6985 static void
 6986 nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt,
 6987     NFSPROC_T *p)
 6988 {
 6989         uint32_t *tl;
 6990         int cnt, errcnt, i, j, opnum, stat;
 6991         char devid[NFSX_V4DEVICEID];
 6992 
 6993         tl = layp;
 6994         cnt = fxdr_unsigned(int, *tl++);
 6995         NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt);
 6996         for (i = 0; i < cnt; i++) {
 6997                 /* Skip offset, length and stateid for now. */
 6998                 tl += (4 + NFSX_STATEID / NFSX_UNSIGNED);
 6999                 errcnt = fxdr_unsigned(int, *tl++);
 7000                 NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt);
 7001                 for (j = 0; j < errcnt; j++) {
 7002                         NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
 7003                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 7004                         stat = fxdr_unsigned(int, *tl++);
 7005                         opnum = fxdr_unsigned(int, *tl++);
 7006                         NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum,
 7007                             stat);
 7008                         /*
 7009                          * Except for NFSERR_ACCES and NFSERR_STALE errors,
 7010                          * disable the mirror.
 7011                          */
 7012                         if (stat != NFSERR_ACCES && stat != NFSERR_STALE)
 7013                                 nfsrv_delds(devid, p);
 7014                 }
 7015         }
 7016 }
 7017 
 7018 /*
 7019  * This function removes all flex file layouts which has a mirror with
 7020  * a device id that matches the argument.
 7021  * Called when the DS represented by the device id has failed.
 7022  */
 7023 void
 7024 nfsrv_flexmirrordel(char *devid, NFSPROC_T *p)
 7025 {
 7026         uint32_t *tl;
 7027         struct nfslayout *lyp, *nlyp;
 7028         struct nfslayouthash *lhyp;
 7029         struct nfslayouthead loclyp;
 7030         int i, j;
 7031 
 7032         NFSD_DEBUG(4, "flexmirrordel\n");
 7033         /* Move all layouts found onto a local list. */
 7034         TAILQ_INIT(&loclyp);
 7035         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7036                 lhyp = &nfslayouthash[i];
 7037                 NFSLOCKLAYOUT(lhyp);
 7038                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7039                         if (lyp->lay_type == NFSLAYOUT_FLEXFILE &&
 7040                             lyp->lay_mirrorcnt > 1) {
 7041                                 NFSD_DEBUG(4, "possible match\n");
 7042                                 tl = lyp->lay_xdr;
 7043                                 tl += 3;
 7044                                 for (j = 0; j < lyp->lay_mirrorcnt; j++) {
 7045                                         tl++;
 7046                                         if (NFSBCMP(devid, tl, NFSX_V4DEVICEID)
 7047                                             == 0) {
 7048                                                 /* Found one. */
 7049                                                 NFSD_DEBUG(4, "fnd one\n");
 7050                                                 TAILQ_REMOVE(&lhyp->list, lyp,
 7051                                                     lay_list);
 7052                                                 TAILQ_INSERT_HEAD(&loclyp, lyp,
 7053                                                     lay_list);
 7054                                                 break;
 7055                                         }
 7056                                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED +
 7057                                             NFSM_RNDUP(NFSX_V4PNFSFH) /
 7058                                             NFSX_UNSIGNED + 11 * NFSX_UNSIGNED);
 7059                                 }
 7060                         }
 7061                 }
 7062                 NFSUNLOCKLAYOUT(lhyp);
 7063         }
 7064 
 7065         /* Now, try to do a Layout recall for each one found. */
 7066         TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) {
 7067                 NFSD_DEBUG(4, "do layout recall\n");
 7068                 /*
 7069                  * The layout stateid.seqid needs to be incremented
 7070                  * before doing a LAYOUT_RECALL callback.
 7071                  */
 7072                 if (++lyp->lay_stateid.seqid == 0)
 7073                         lyp->lay_stateid.seqid = 1;
 7074                 nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
 7075                     &lyp->lay_fh, lyp, 1, lyp->lay_type, p);
 7076                 nfsrv_freelayout(&loclyp, lyp);
 7077         }
 7078 }
 7079 
 7080 /*
 7081  * Do a recall callback to the client for this layout.
 7082  */
 7083 static int
 7084 nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp,
 7085     struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p)
 7086 {
 7087         struct nfsclient *clp;
 7088         int error;
 7089 
 7090         NFSD_DEBUG(4, "nfsrv_recalllayout\n");
 7091         error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0),
 7092             0, NULL, p);
 7093         NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error);
 7094         if (error != 0) {
 7095                 printf("nfsrv_recalllayout: getclient err=%d\n", error);
 7096                 return (error);
 7097         }
 7098         if ((clp->lc_flags & LCL_NFSV41) != 0) {
 7099                 error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL,
 7100                     stateidp, changed, fhp, NULL, NULL, laytype, p);
 7101                 /* If lyp != NULL, handle an error return here. */
 7102                 if (error != 0 && lyp != NULL) {
 7103                         NFSDRECALLLOCK();
 7104                         /*
 7105                          * Mark it returned, since no layout recall
 7106                          * has been done.
 7107                          * All errors seem to be non-recoverable, although
 7108                          * NFSERR_NOMATCHLAYOUT is a normal event.
 7109                          */
 7110                         if ((lyp->lay_flags & NFSLAY_RECALL) != 0) {
 7111                                 lyp->lay_flags |= NFSLAY_RETURNED;
 7112                                 wakeup(lyp);
 7113                         }
 7114                         NFSDRECALLUNLOCK();
 7115                         if (error != NFSERR_NOMATCHLAYOUT)
 7116                                 printf("nfsrv_recalllayout: err=%d\n", error);
 7117                 }
 7118         } else
 7119                 printf("nfsrv_recalllayout: clp not NFSv4.1\n");
 7120         return (error);
 7121 }
 7122 
 7123 /*
 7124  * Find a layout to recall when we exceed our high water mark.
 7125  */
 7126 void
 7127 nfsrv_recalloldlayout(NFSPROC_T *p)
 7128 {
 7129         struct nfslayouthash *lhyp;
 7130         struct nfslayout *lyp;
 7131         nfsquad_t clientid;
 7132         nfsv4stateid_t stateid;
 7133         fhandle_t fh;
 7134         int error, laytype = 0, ret;
 7135 
 7136         lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize];
 7137         NFSLOCKLAYOUT(lhyp);
 7138         TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) {
 7139                 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
 7140                         lyp->lay_flags |= NFSLAY_CALLB;
 7141                         /*
 7142                          * The layout stateid.seqid needs to be incremented
 7143                          * before doing a LAYOUT_RECALL callback.
 7144                          */
 7145                         if (++lyp->lay_stateid.seqid == 0)
 7146                                 lyp->lay_stateid.seqid = 1;
 7147                         clientid = lyp->lay_clientid;
 7148                         stateid = lyp->lay_stateid;
 7149                         NFSBCOPY(&lyp->lay_fh, &fh, sizeof(fh));
 7150                         laytype = lyp->lay_type;
 7151                         break;
 7152                 }
 7153         }
 7154         NFSUNLOCKLAYOUT(lhyp);
 7155         if (lyp != NULL) {
 7156                 error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, 0,
 7157                     laytype, p);
 7158                 if (error != 0 && error != NFSERR_NOMATCHLAYOUT)
 7159                         NFSD_DEBUG(4, "recallold=%d\n", error);
 7160                 if (error != 0) {
 7161                         NFSLOCKLAYOUT(lhyp);
 7162                         /*
 7163                          * Since the hash list was unlocked, we need to
 7164                          * find it again.
 7165                          */
 7166                         ret = nfsrv_findlayout(&clientid, &fh, laytype, p,
 7167                             &lyp);
 7168                         if (ret == 0 &&
 7169                             (lyp->lay_flags & NFSLAY_CALLB) != 0 &&
 7170                             lyp->lay_stateid.other[0] == stateid.other[0] &&
 7171                             lyp->lay_stateid.other[1] == stateid.other[1] &&
 7172                             lyp->lay_stateid.other[2] == stateid.other[2]) {
 7173                                 /*
 7174                                  * The client no longer knows this layout, so
 7175                                  * it can be free'd now.
 7176                                  */
 7177                                 if (error == NFSERR_NOMATCHLAYOUT)
 7178                                         nfsrv_freelayout(&lhyp->list, lyp);
 7179                                 else {
 7180                                         /*
 7181                                          * Leave it to be tried later by
 7182                                          * clearing NFSLAY_CALLB and moving
 7183                                          * it to the head of the list, so it
 7184                                          * won't be tried again for a while.
 7185                                          */
 7186                                         lyp->lay_flags &= ~NFSLAY_CALLB;
 7187                                         TAILQ_REMOVE(&lhyp->list, lyp,
 7188                                             lay_list);
 7189                                         TAILQ_INSERT_HEAD(&lhyp->list, lyp,
 7190                                             lay_list);
 7191                                 }
 7192                         }
 7193                         NFSUNLOCKLAYOUT(lhyp);
 7194                 }
 7195         }
 7196 }
 7197 
 7198 /*
 7199  * Try and return layout(s).
 7200  */
 7201 int
 7202 nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp,
 7203     int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim,
 7204     int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp,
 7205     struct ucred *cred, NFSPROC_T *p)
 7206 {
 7207         struct nfsvattr na;
 7208         struct nfslayouthash *lhyp;
 7209         struct nfslayout *lyp;
 7210         fhandle_t fh;
 7211         int error = 0;
 7212 
 7213         *fndp = 0;
 7214         if (kind == NFSV4LAYOUTRET_FILE) {
 7215                 error = nfsvno_getfh(vp, &fh, p);
 7216                 if (error == 0) {
 7217                         error = nfsrv_updatemdsattr(vp, &na, p);
 7218                         if (error != 0)
 7219                                 printf("nfsrv_layoutreturn: updatemdsattr"
 7220                                     " failed=%d\n", error);
 7221                 }
 7222                 if (error == 0) {
 7223                         if (reclaim == newnfs_true) {
 7224                                 error = nfsrv_checkgrace(NULL, NULL,
 7225                                     NFSLCK_RECLAIM);
 7226                                 if (error != NFSERR_NOGRACE)
 7227                                         error = 0;
 7228                                 return (error);
 7229                         }
 7230                         lhyp = NFSLAYOUTHASH(&fh);
 7231                         NFSDRECALLLOCK();
 7232                         NFSLOCKLAYOUT(lhyp);
 7233                         error = nfsrv_findlayout(&nd->nd_clientid, &fh,
 7234                             layouttype, p, &lyp);
 7235                         NFSD_DEBUG(4, "layoutret findlay=%d\n", error);
 7236                         if (error == 0 &&
 7237                             stateidp->other[0] == lyp->lay_stateid.other[0] &&
 7238                             stateidp->other[1] == lyp->lay_stateid.other[1] &&
 7239                             stateidp->other[2] == lyp->lay_stateid.other[2]) {
 7240                                 NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d"
 7241                                     " %x %x %x laystateid %d %x %x %x"
 7242                                     " off=%ju len=%ju flgs=0x%x\n",
 7243                                     stateidp->seqid, stateidp->other[0],
 7244                                     stateidp->other[1], stateidp->other[2],
 7245                                     lyp->lay_stateid.seqid,
 7246                                     lyp->lay_stateid.other[0],
 7247                                     lyp->lay_stateid.other[1],
 7248                                     lyp->lay_stateid.other[2],
 7249                                     (uintmax_t)offset, (uintmax_t)len,
 7250                                     lyp->lay_flags);
 7251                                 if (++lyp->lay_stateid.seqid == 0)
 7252                                         lyp->lay_stateid.seqid = 1;
 7253                                 stateidp->seqid = lyp->lay_stateid.seqid;
 7254                                 if (offset == 0 && len == UINT64_MAX) {
 7255                                         if ((iomode & NFSLAYOUTIOMODE_READ) !=
 7256                                             0)
 7257                                                 lyp->lay_flags &= ~NFSLAY_READ;
 7258                                         if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
 7259                                                 lyp->lay_flags &= ~NFSLAY_RW;
 7260                                         if ((lyp->lay_flags & (NFSLAY_READ |
 7261                                             NFSLAY_RW)) == 0)
 7262                                                 nfsrv_freelayout(&lhyp->list,
 7263                                                     lyp);
 7264                                         else
 7265                                                 *fndp = 1;
 7266                                 } else
 7267                                         *fndp = 1;
 7268                         }
 7269                         NFSUNLOCKLAYOUT(lhyp);
 7270                         /* Search the nfsrv_recalllist for a match. */
 7271                         TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 7272                                 if (NFSBCMP(&lyp->lay_fh, &fh,
 7273                                     sizeof(fh)) == 0 &&
 7274                                     lyp->lay_clientid.qval ==
 7275                                     nd->nd_clientid.qval &&
 7276                                     stateidp->other[0] ==
 7277                                     lyp->lay_stateid.other[0] &&
 7278                                     stateidp->other[1] ==
 7279                                     lyp->lay_stateid.other[1] &&
 7280                                     stateidp->other[2] ==
 7281                                     lyp->lay_stateid.other[2]) {
 7282                                         lyp->lay_flags |= NFSLAY_RETURNED;
 7283                                         wakeup(lyp);
 7284                                         error = 0;
 7285                                 }
 7286                         }
 7287                         NFSDRECALLUNLOCK();
 7288                 }
 7289                 if (layouttype == NFSLAYOUT_FLEXFILE)
 7290                         nfsrv_flexlayouterr(nd, layp, maxcnt, p);
 7291         } else if (kind == NFSV4LAYOUTRET_FSID)
 7292                 nfsrv_freelayouts(&nd->nd_clientid,
 7293                     &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode);
 7294         else if (kind == NFSV4LAYOUTRET_ALL)
 7295                 nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode);
 7296         else
 7297                 error = NFSERR_INVAL;
 7298         if (error == -1)
 7299                 error = 0;
 7300         return (error);
 7301 }
 7302 
 7303 /*
 7304  * Look for an existing layout.
 7305  */
 7306 static int
 7307 nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
 7308     NFSPROC_T *p, struct nfslayout **lypp)
 7309 {
 7310         struct nfslayouthash *lhyp;
 7311         struct nfslayout *lyp;
 7312         int ret;
 7313 
 7314         *lypp = NULL;
 7315         ret = 0;
 7316         lhyp = NFSLAYOUTHASH(fhp);
 7317         TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
 7318                 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
 7319                     lyp->lay_clientid.qval == clientidp->qval &&
 7320                     lyp->lay_type == laytype)
 7321                         break;
 7322         }
 7323         if (lyp != NULL)
 7324                 *lypp = lyp;
 7325         else
 7326                 ret = -1;
 7327         return (ret);
 7328 }
 7329 
 7330 /*
 7331  * Add the new layout, as required.
 7332  */
 7333 static int
 7334 nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
 7335     nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p)
 7336 {
 7337         struct nfsclient *clp;
 7338         struct nfslayouthash *lhyp;
 7339         struct nfslayout *lyp, *nlyp;
 7340         fhandle_t *fhp;
 7341         int error;
 7342 
 7343         KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0,
 7344             ("nfsrv_layoutget: no nd_clientid\n"));
 7345         lyp = *lypp;
 7346         fhp = &lyp->lay_fh;
 7347         NFSLOCKSTATE();
 7348         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 7349             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 7350         if (error != 0) {
 7351                 NFSUNLOCKSTATE();
 7352                 return (error);
 7353         }
 7354         lyp->lay_stateid.seqid = stateidp->seqid = 1;
 7355         lyp->lay_stateid.other[0] = stateidp->other[0] =
 7356             clp->lc_clientid.lval[0];
 7357         lyp->lay_stateid.other[1] = stateidp->other[1] =
 7358             clp->lc_clientid.lval[1];
 7359         lyp->lay_stateid.other[2] = stateidp->other[2] =
 7360             nfsrv_nextstateindex(clp);
 7361         NFSUNLOCKSTATE();
 7362 
 7363         lhyp = NFSLAYOUTHASH(fhp);
 7364         NFSLOCKLAYOUT(lhyp);
 7365         TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) {
 7366                 if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
 7367                     nlyp->lay_clientid.qval == nd->nd_clientid.qval)
 7368                         break;
 7369         }
 7370         if (nlyp != NULL) {
 7371                 /* A layout already exists, so use it. */
 7372                 nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW));
 7373                 NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen);
 7374                 *layoutlenp = nlyp->lay_layoutlen;
 7375                 if (++nlyp->lay_stateid.seqid == 0)
 7376                         nlyp->lay_stateid.seqid = 1;
 7377                 stateidp->seqid = nlyp->lay_stateid.seqid;
 7378                 stateidp->other[0] = nlyp->lay_stateid.other[0];
 7379                 stateidp->other[1] = nlyp->lay_stateid.other[1];
 7380                 stateidp->other[2] = nlyp->lay_stateid.other[2];
 7381                 NFSUNLOCKLAYOUT(lhyp);
 7382                 return (0);
 7383         }
 7384 
 7385         /* Insert the new layout in the lists. */
 7386         *lypp = NULL;
 7387         atomic_add_int(&nfsrv_layoutcnt, 1);
 7388         NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
 7389         *layoutlenp = lyp->lay_layoutlen;
 7390         TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list);
 7391         NFSUNLOCKLAYOUT(lhyp);
 7392         return (0);
 7393 }
 7394 
 7395 /*
 7396  * Get the devinfo for a deviceid.
 7397  */
 7398 int
 7399 nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt,
 7400     uint32_t *notify, int *devaddrlen, char **devaddr)
 7401 {
 7402         struct nfsdevice *ds;
 7403 
 7404         if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype !=
 7405              NFSLAYOUT_FLEXFILE) ||
 7406             (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES))
 7407                 return (NFSERR_UNKNLAYOUTTYPE);
 7408 
 7409         /*
 7410          * Now, search for the device id.  Note that the structures won't go
 7411          * away, but the order changes in the list.  As such, the lock only
 7412          * needs to be held during the search through the list.
 7413          */
 7414         NFSDDSLOCK();
 7415         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 7416                 if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 &&
 7417                     ds->nfsdev_nmp != NULL)
 7418                         break;
 7419         }
 7420         NFSDDSUNLOCK();
 7421         if (ds == NULL)
 7422                 return (NFSERR_NOENT);
 7423 
 7424         /* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */
 7425         *devaddrlen = 0;
 7426         if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
 7427                 *devaddrlen = ds->nfsdev_fileaddrlen;
 7428                 *devaddr = ds->nfsdev_fileaddr;
 7429         } else if (layouttype == NFSLAYOUT_FLEXFILE) {
 7430                 *devaddrlen = ds->nfsdev_flexaddrlen;
 7431                 *devaddr = ds->nfsdev_flexaddr;
 7432         }
 7433         if (*devaddrlen == 0)
 7434                 return (NFSERR_UNKNLAYOUTTYPE);
 7435 
 7436         /*
 7437          * The XDR overhead is 3 unsigned values: layout_type,
 7438          * length_of_address and notify bitmap.
 7439          * If the notify array is changed to not all zeros, the
 7440          * count of unsigned values must be increased.
 7441          */
 7442         if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) +
 7443             3 * NFSX_UNSIGNED) {
 7444                 *maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED;
 7445                 return (NFSERR_TOOSMALL);
 7446         }
 7447         return (0);
 7448 }
 7449 
 7450 /*
 7451  * Free a list of layout state structures.
 7452  */
 7453 static void
 7454 nfsrv_freelayoutlist(nfsquad_t clientid)
 7455 {
 7456         struct nfslayouthash *lhyp;
 7457         struct nfslayout *lyp, *nlyp;
 7458         int i;
 7459 
 7460         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7461                 lhyp = &nfslayouthash[i];
 7462                 NFSLOCKLAYOUT(lhyp);
 7463                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7464                         if (lyp->lay_clientid.qval == clientid.qval)
 7465                                 nfsrv_freelayout(&lhyp->list, lyp);
 7466                 }
 7467                 NFSUNLOCKLAYOUT(lhyp);
 7468         }
 7469 }
 7470 
 7471 /*
 7472  * Free up a layout.
 7473  */
 7474 static void
 7475 nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp)
 7476 {
 7477 
 7478         NFSD_DEBUG(4, "Freelayout=%p\n", lyp);
 7479         atomic_add_int(&nfsrv_layoutcnt, -1);
 7480         TAILQ_REMOVE(lhp, lyp, lay_list);
 7481         free(lyp, M_NFSDSTATE);
 7482 }
 7483 
 7484 /*
 7485  * Free up a device id.
 7486  */
 7487 void
 7488 nfsrv_freeonedevid(struct nfsdevice *ds)
 7489 {
 7490         int i;
 7491 
 7492         atomic_add_int(&nfsrv_devidcnt, -1);
 7493         vrele(ds->nfsdev_dvp);
 7494         for (i = 0; i < nfsrv_dsdirsize; i++)
 7495                 if (ds->nfsdev_dsdir[i] != NULL)
 7496                         vrele(ds->nfsdev_dsdir[i]);
 7497         free(ds->nfsdev_fileaddr, M_NFSDSTATE);
 7498         free(ds->nfsdev_flexaddr, M_NFSDSTATE);
 7499         free(ds->nfsdev_host, M_NFSDSTATE);
 7500         free(ds, M_NFSDSTATE);
 7501 }
 7502 
 7503 /*
 7504  * Free up a device id and its mirrors.
 7505  */
 7506 static void
 7507 nfsrv_freedevid(struct nfsdevice *ds)
 7508 {
 7509 
 7510         TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
 7511         nfsrv_freeonedevid(ds);
 7512 }
 7513 
 7514 /*
 7515  * Free all layouts and device ids.
 7516  * Done when the nfsd threads are shut down since there may be a new
 7517  * modified device id list created when the nfsd is restarted.
 7518  */
 7519 void
 7520 nfsrv_freealllayoutsanddevids(void)
 7521 {
 7522         struct nfsdontlist *mrp, *nmrp;
 7523         struct nfslayout *lyp, *nlyp;
 7524 
 7525         /* Get rid of the deviceid structures. */
 7526         nfsrv_freealldevids();
 7527         TAILQ_INIT(&nfsrv_devidhead);
 7528         nfsrv_devidcnt = 0;
 7529 
 7530         /* Get rid of all layouts. */
 7531         nfsrv_freealllayouts();
 7532 
 7533         /* Get rid of any nfsdontlist entries. */
 7534         LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp)
 7535                 free(mrp, M_NFSDSTATE);
 7536         LIST_INIT(&nfsrv_dontlisthead);
 7537         nfsrv_dontlistlen = 0;
 7538 
 7539         /* Free layouts in the recall list. */
 7540         TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp)
 7541                 nfsrv_freelayout(&nfsrv_recalllisthead, lyp);
 7542         TAILQ_INIT(&nfsrv_recalllisthead);
 7543 }
 7544 
 7545 /*
 7546  * Free layouts that match the arguments.
 7547  */
 7548 static void
 7549 nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode)
 7550 {
 7551         struct nfslayouthash *lhyp;
 7552         struct nfslayout *lyp, *nlyp;
 7553         int i;
 7554 
 7555         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7556                 lhyp = &nfslayouthash[i];
 7557                 NFSLOCKLAYOUT(lhyp);
 7558                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7559                         if (clid->qval != lyp->lay_clientid.qval)
 7560                                 continue;
 7561                         if (fs != NULL && fsidcmp(fs, &lyp->lay_fsid) != 0)
 7562                                 continue;
 7563                         if (laytype != lyp->lay_type)
 7564                                 continue;
 7565                         if ((iomode & NFSLAYOUTIOMODE_READ) != 0)
 7566                                 lyp->lay_flags &= ~NFSLAY_READ;
 7567                         if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
 7568                                 lyp->lay_flags &= ~NFSLAY_RW;
 7569                         if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0)
 7570                                 nfsrv_freelayout(&lhyp->list, lyp);
 7571                 }
 7572                 NFSUNLOCKLAYOUT(lhyp);
 7573         }
 7574 }
 7575 
 7576 /*
 7577  * Free all layouts for the argument file.
 7578  */
 7579 void
 7580 nfsrv_freefilelayouts(fhandle_t *fhp)
 7581 {
 7582         struct nfslayouthash *lhyp;
 7583         struct nfslayout *lyp, *nlyp;
 7584 
 7585         lhyp = NFSLAYOUTHASH(fhp);
 7586         NFSLOCKLAYOUT(lhyp);
 7587         TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7588                 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0)
 7589                         nfsrv_freelayout(&lhyp->list, lyp);
 7590         }
 7591         NFSUNLOCKLAYOUT(lhyp);
 7592 }
 7593 
 7594 /*
 7595  * Free all layouts.
 7596  */
 7597 static void
 7598 nfsrv_freealllayouts(void)
 7599 {
 7600         struct nfslayouthash *lhyp;
 7601         struct nfslayout *lyp, *nlyp;
 7602         int i;
 7603 
 7604         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7605                 lhyp = &nfslayouthash[i];
 7606                 NFSLOCKLAYOUT(lhyp);
 7607                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp)
 7608                         nfsrv_freelayout(&lhyp->list, lyp);
 7609                 NFSUNLOCKLAYOUT(lhyp);
 7610         }
 7611 }
 7612 
 7613 /*
 7614  * Look up the mount path for the DS server.
 7615  */
 7616 static int
 7617 nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
 7618     struct nfsdevice **dsp)
 7619 {
 7620         struct nameidata nd;
 7621         struct nfsdevice *ds;
 7622         struct mount *mp;
 7623         int error, i;
 7624         char *dsdirpath;
 7625         size_t dsdirsize;
 7626 
 7627         NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp);
 7628         *dsp = NULL;
 7629         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 7630             dspathp, p);
 7631         error = namei(&nd);
 7632         NFSD_DEBUG(4, "lookup=%d\n", error);
 7633         if (error != 0)
 7634                 return (error);
 7635         if (nd.ni_vp->v_type != VDIR) {
 7636                 vput(nd.ni_vp);
 7637                 NFSD_DEBUG(4, "dspath not dir\n");
 7638                 return (ENOTDIR);
 7639         }
 7640         if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 7641                 vput(nd.ni_vp);
 7642                 NFSD_DEBUG(4, "dspath not an NFS mount\n");
 7643                 return (ENXIO);
 7644         }
 7645 
 7646         /*
 7647          * Allocate a DS server structure with the NFS mounted directory
 7648          * vnode reference counted, so that a non-forced dismount will
 7649          * fail with EBUSY.
 7650          * This structure is always linked into the list, even if an error
 7651          * is being returned.  The caller will free the entire list upon
 7652          * an error return.
 7653          */
 7654         *dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t),
 7655             M_NFSDSTATE, M_WAITOK | M_ZERO);
 7656         ds->nfsdev_dvp = nd.ni_vp;
 7657         ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount);
 7658         NFSVOPUNLOCK(nd.ni_vp);
 7659 
 7660         dsdirsize = strlen(dspathp) + 16;
 7661         dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK);
 7662         /* Now, create the DS directory structures. */
 7663         for (i = 0; i < nfsrv_dsdirsize; i++) {
 7664                 snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i);
 7665                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 7666                     UIO_SYSSPACE, dsdirpath, p);
 7667                 error = namei(&nd);
 7668                 NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error);
 7669                 if (error != 0)
 7670                         break;
 7671                 if (nd.ni_vp->v_type != VDIR) {
 7672                         vput(nd.ni_vp);
 7673                         error = ENOTDIR;
 7674                         NFSD_DEBUG(4, "dsdirpath not a VDIR\n");
 7675                         break;
 7676                 }
 7677                 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 7678                         vput(nd.ni_vp);
 7679                         error = ENXIO;
 7680                         NFSD_DEBUG(4, "dsdirpath not an NFS mount\n");
 7681                         break;
 7682                 }
 7683                 ds->nfsdev_dsdir[i] = nd.ni_vp;
 7684                 NFSVOPUNLOCK(nd.ni_vp);
 7685         }
 7686         free(dsdirpath, M_TEMP);
 7687 
 7688         if (strlen(mdspathp) > 0) {
 7689                 /*
 7690                  * This DS stores file for a specific MDS exported file
 7691                  * system.
 7692                  */
 7693                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 7694                     UIO_SYSSPACE, mdspathp, p);
 7695                 error = namei(&nd);
 7696                 NFSD_DEBUG(4, "mds lookup=%d\n", error);
 7697                 if (error != 0)
 7698                         goto out;
 7699                 if (nd.ni_vp->v_type != VDIR) {
 7700                         vput(nd.ni_vp);
 7701                         error = ENOTDIR;
 7702                         NFSD_DEBUG(4, "mdspath not dir\n");
 7703                         goto out;
 7704                 }
 7705                 mp = nd.ni_vp->v_mount;
 7706                 if ((mp->mnt_flag & MNT_EXPORTED) == 0) {
 7707                         vput(nd.ni_vp);
 7708                         error = ENXIO;
 7709                         NFSD_DEBUG(4, "mdspath not an exported fs\n");
 7710                         goto out;
 7711                 }
 7712                 ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid;
 7713                 ds->nfsdev_mdsisset = 1;
 7714                 vput(nd.ni_vp);
 7715         }
 7716 
 7717 out:
 7718         TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
 7719         atomic_add_int(&nfsrv_devidcnt, 1);
 7720         return (error);
 7721 }
 7722 
 7723 /*
 7724  * Look up the mount path for the DS server and delete it.
 7725  */
 7726 int
 7727 nfsrv_deldsserver(int op, char *dspathp, NFSPROC_T *p)
 7728 {
 7729         struct mount *mp;
 7730         struct nfsmount *nmp;
 7731         struct nfsdevice *ds;
 7732         int error;
 7733 
 7734         NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp);
 7735         /*
 7736          * Search for the path in the mount list.  Avoid looking the path
 7737          * up, since this mount point may be hung, with associated locked
 7738          * vnodes, etc.
 7739          * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked
 7740          * until this completes.
 7741          * As noted in the man page, this should be done before any forced
 7742          * dismount on the mount point, but at least the handshake on
 7743          * NFSMNTP_CANCELRPCS should make it safe.
 7744          */
 7745         error = 0;
 7746         ds = NULL;
 7747         nmp = NULL;
 7748         mtx_lock(&mountlist_mtx);
 7749         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 7750                 if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 &&
 7751                     strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 &&
 7752                     mp->mnt_data != NULL) {
 7753                         nmp = VFSTONFS(mp);
 7754                         NFSLOCKMNT(nmp);
 7755                         if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 7756                              NFSMNTP_CANCELRPCS)) == 0) {
 7757                                 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 7758                                 NFSUNLOCKMNT(nmp);
 7759                         } else {
 7760                                 NFSUNLOCKMNT(nmp);
 7761                                 nmp = NULL;
 7762                         }
 7763                         break;
 7764                 }
 7765         }
 7766         mtx_unlock(&mountlist_mtx);
 7767 
 7768         if (nmp != NULL) {
 7769                 ds = nfsrv_deldsnmp(op, nmp, p);
 7770                 NFSD_DEBUG(4, "deldsnmp=%p\n", ds);
 7771                 if (ds != NULL) {
 7772                         nfsrv_killrpcs(nmp);
 7773                         NFSD_DEBUG(4, "aft killrpcs\n");
 7774                 } else
 7775                         error = ENXIO;
 7776                 NFSLOCKMNT(nmp);
 7777                 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 7778                 wakeup(nmp);
 7779                 NFSUNLOCKMNT(nmp);
 7780         } else
 7781                 error = EINVAL;
 7782         return (error);
 7783 }
 7784 
 7785 /*
 7786  * Search for and remove a DS entry which matches the "nmp" argument.
 7787  * The nfsdevice structure pointer is returned so that the caller can
 7788  * free it via nfsrv_freeonedevid().
 7789  * For the forced case, do not try to do LayoutRecalls, since the server
 7790  * must be shut down now anyhow.
 7791  */
 7792 struct nfsdevice *
 7793 nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p)
 7794 {
 7795         struct nfsdevice *fndds;
 7796 
 7797         NFSD_DEBUG(4, "deldsdvp\n");
 7798         NFSDDSLOCK();
 7799         if (op == PNFSDOP_FORCEDELDS)
 7800                 fndds = nfsv4_findmirror(nmp);
 7801         else
 7802                 fndds = nfsrv_findmirroredds(nmp);
 7803         if (fndds != NULL)
 7804                 nfsrv_deleteds(fndds);
 7805         NFSDDSUNLOCK();
 7806         if (fndds != NULL) {
 7807                 if (op != PNFSDOP_FORCEDELDS)
 7808                         nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
 7809                 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
 7810         }
 7811         return (fndds);
 7812 }
 7813 
 7814 /*
 7815  * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid.
 7816  * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount
 7817  * point.
 7818  * Also, returns an error instead of the nfsdevice found.
 7819  */
 7820 int
 7821 nfsrv_delds(char *devid, NFSPROC_T *p)
 7822 {
 7823         struct nfsdevice *ds, *fndds;
 7824         struct nfsmount *nmp;
 7825         int fndmirror;
 7826 
 7827         NFSD_DEBUG(4, "delds\n");
 7828         /*
 7829          * Search the DS server list for a match with devid.
 7830          * Remove the DS entry if found and there is a mirror.
 7831          */
 7832         fndds = NULL;
 7833         nmp = NULL;
 7834         fndmirror = 0;
 7835         NFSDDSLOCK();
 7836         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 7837                 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 &&
 7838                     ds->nfsdev_nmp != NULL) {
 7839                         NFSD_DEBUG(4, "fnd main ds\n");
 7840                         fndds = ds;
 7841                         break;
 7842                 }
 7843         }
 7844         if (fndds == NULL) {
 7845                 NFSDDSUNLOCK();
 7846                 return (ENXIO);
 7847         }
 7848         if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
 7849                 fndmirror = 1;
 7850         else if (fndds->nfsdev_mdsisset != 0) {
 7851                 /* For the fsid is set case, search for a mirror. */
 7852                 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 7853                         if (ds != fndds && ds->nfsdev_nmp != NULL &&
 7854                             ds->nfsdev_mdsisset != 0 &&
 7855                             fsidcmp(&ds->nfsdev_mdsfsid,
 7856                             &fndds->nfsdev_mdsfsid) == 0) {
 7857                                 fndmirror = 1;
 7858                                 break;
 7859                         }
 7860                 }
 7861         }
 7862         if (fndmirror != 0) {
 7863                 nmp = fndds->nfsdev_nmp;
 7864                 NFSLOCKMNT(nmp);
 7865                 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 7866                      NFSMNTP_CANCELRPCS)) == 0) {
 7867                         nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 7868                         NFSUNLOCKMNT(nmp);
 7869                         nfsrv_deleteds(fndds);
 7870                 } else {
 7871                         NFSUNLOCKMNT(nmp);
 7872                         nmp = NULL;
 7873                 }
 7874         }
 7875         NFSDDSUNLOCK();
 7876         if (nmp != NULL) {
 7877                 nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
 7878                 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
 7879                 nfsrv_killrpcs(nmp);
 7880                 NFSLOCKMNT(nmp);
 7881                 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 7882                 wakeup(nmp);
 7883                 NFSUNLOCKMNT(nmp);
 7884                 return (0);
 7885         }
 7886         return (ENXIO);
 7887 }
 7888 
 7889 /*
 7890  * Mark a DS as disabled by setting nfsdev_nmp = NULL.
 7891  */
 7892 static void
 7893 nfsrv_deleteds(struct nfsdevice *fndds)
 7894 {
 7895 
 7896         NFSD_DEBUG(4, "deleteds: deleting a mirror\n");
 7897         fndds->nfsdev_nmp = NULL;
 7898         if (fndds->nfsdev_mdsisset == 0)
 7899                 nfsrv_faildscnt--;
 7900 }
 7901 
 7902 /*
 7903  * Fill in the addr structures for the File and Flex File layouts.
 7904  */
 7905 static void
 7906 nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
 7907 {
 7908         uint32_t *tl;
 7909         char *netprot;
 7910         int addrlen;
 7911         static uint64_t new_devid = 0;
 7912 
 7913         if (strchr(addr, ':') != NULL)
 7914                 netprot = "tcp6";
 7915         else
 7916                 netprot = "tcp";
 7917 
 7918         /* Fill in the device id. */
 7919         NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time));
 7920         new_devid++;
 7921         NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)],
 7922             sizeof(new_devid));
 7923 
 7924         /*
 7925          * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4
 7926          * as defined in RFC5661) in XDR.
 7927          */
 7928         addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
 7929             6 * NFSX_UNSIGNED;
 7930         NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot);
 7931         ds->nfsdev_fileaddrlen = addrlen;
 7932         tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
 7933         ds->nfsdev_fileaddr = (char *)tl;
 7934         *tl++ = txdr_unsigned(1);               /* One stripe with index 0. */
 7935         *tl++ = 0;
 7936         *tl++ = txdr_unsigned(1);               /* One multipath list */
 7937         *tl++ = txdr_unsigned(1);               /* with one entry in it. */
 7938         /* The netaddr for this one entry. */
 7939         *tl++ = txdr_unsigned(strlen(netprot));
 7940         NFSBCOPY(netprot, tl, strlen(netprot));
 7941         tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
 7942         *tl++ = txdr_unsigned(strlen(addr));
 7943         NFSBCOPY(addr, tl, strlen(addr));
 7944 
 7945         /*
 7946          * Fill in the flex file addr (actually the ff_device_addr4
 7947          * as defined for Flexible File Layout) in XDR.
 7948          */
 7949         addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
 7950             14 * NFSX_UNSIGNED;
 7951         ds->nfsdev_flexaddrlen = addrlen;
 7952         tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
 7953         ds->nfsdev_flexaddr = (char *)tl;
 7954         *tl++ = txdr_unsigned(1);               /* One multipath entry. */
 7955         /* The netaddr for this one entry. */
 7956         *tl++ = txdr_unsigned(strlen(netprot));
 7957         NFSBCOPY(netprot, tl, strlen(netprot));
 7958         tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
 7959         *tl++ = txdr_unsigned(strlen(addr));
 7960         NFSBCOPY(addr, tl, strlen(addr));
 7961         tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED);
 7962         *tl++ = txdr_unsigned(2);               /* Two NFS Versions. */
 7963         *tl++ = txdr_unsigned(NFS_VER4);        /* NFSv4. */
 7964         *tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */
 7965         *tl++ = txdr_unsigned(NFS_SRVMAXIO);    /* DS max rsize. */
 7966         *tl++ = txdr_unsigned(NFS_SRVMAXIO);    /* DS max wsize. */
 7967         *tl++ = newnfs_true;                    /* Tightly coupled. */
 7968         *tl++ = txdr_unsigned(NFS_VER4);        /* NFSv4. */
 7969         *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
 7970         *tl++ = txdr_unsigned(NFS_SRVMAXIO);    /* DS max rsize. */
 7971         *tl++ = txdr_unsigned(NFS_SRVMAXIO);    /* DS max wsize. */
 7972         *tl = newnfs_true;                      /* Tightly coupled. */
 7973 
 7974         ds->nfsdev_hostnamelen = strlen(dnshost);
 7975         ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE,
 7976             M_WAITOK);
 7977         NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1);
 7978 }
 7979 
 7980 /*
 7981  * Create the device id list.
 7982  * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument
 7983  * is misconfigured.
 7984  */
 7985 int
 7986 nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p)
 7987 {
 7988         struct nfsdevice *ds;
 7989         char *addrp, *dnshostp, *dspathp, *mdspathp;
 7990         int error, i;
 7991 
 7992         addrp = args->addr;
 7993         dnshostp = args->dnshost;
 7994         dspathp = args->dspath;
 7995         mdspathp = args->mdspath;
 7996         nfsrv_maxpnfsmirror = args->mirrorcnt;
 7997         if (addrp == NULL || dnshostp == NULL || dspathp == NULL ||
 7998             mdspathp == NULL)
 7999                 return (0);
 8000 
 8001         /*
 8002          * Loop around for each nul-terminated string in args->addr,
 8003          * args->dnshost, args->dnspath and args->mdspath.
 8004          */
 8005         while (addrp < (args->addr + args->addrlen) &&
 8006             dnshostp < (args->dnshost + args->dnshostlen) &&
 8007             dspathp < (args->dspath + args->dspathlen) &&
 8008             mdspathp < (args->mdspath + args->mdspathlen)) {
 8009                 error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds);
 8010                 if (error != 0) {
 8011                         /* Free all DS servers. */
 8012                         nfsrv_freealldevids();
 8013                         nfsrv_devidcnt = 0;
 8014                         return (ENXIO);
 8015                 }
 8016                 nfsrv_allocdevid(ds, addrp, dnshostp);
 8017                 addrp += (strlen(addrp) + 1);
 8018                 dnshostp += (strlen(dnshostp) + 1);
 8019                 dspathp += (strlen(dspathp) + 1);
 8020                 mdspathp += (strlen(mdspathp) + 1);
 8021         }
 8022         if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) {
 8023                 /* Free all DS servers. */
 8024                 nfsrv_freealldevids();
 8025                 nfsrv_devidcnt = 0;
 8026                 nfsrv_maxpnfsmirror = 1;
 8027                 return (ENXIO);
 8028         }
 8029         /* We can fail at most one less DS than the mirror level. */
 8030         nfsrv_faildscnt = nfsrv_maxpnfsmirror - 1;
 8031 
 8032         /*
 8033          * Allocate the nfslayout hash table now, since this is a pNFS server.
 8034          * Make it 1% of the high water mark and at least 100.
 8035          */
 8036         if (nfslayouthash == NULL) {
 8037                 nfsrv_layouthashsize = nfsrv_layouthighwater / 100;
 8038                 if (nfsrv_layouthashsize < 100)
 8039                         nfsrv_layouthashsize = 100;
 8040                 nfslayouthash = mallocarray(nfsrv_layouthashsize,
 8041                     sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK |
 8042                     M_ZERO);
 8043                 for (i = 0; i < nfsrv_layouthashsize; i++) {
 8044                         mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF);
 8045                         TAILQ_INIT(&nfslayouthash[i].list);
 8046                 }
 8047         }
 8048         return (0);
 8049 }
 8050 
 8051 /*
 8052  * Free all device ids.
 8053  */
 8054 static void
 8055 nfsrv_freealldevids(void)
 8056 {
 8057         struct nfsdevice *ds, *nds;
 8058 
 8059         TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds)
 8060                 nfsrv_freedevid(ds);
 8061 }
 8062 
 8063 /*
 8064  * Check to see if there is a Read/Write Layout plus either:
 8065  * - A Write Delegation
 8066  * or
 8067  * - An Open with Write_access.
 8068  * Return 1 if this is the case and 0 otherwise.
 8069  * This function is used by nfsrv_proxyds() to decide if doing a Proxy
 8070  * Getattr RPC to the Data Server (DS) is necessary.
 8071  */
 8072 #define NFSCLIDVECSIZE  6
 8073 int
 8074 nfsrv_checkdsattr(vnode_t vp, NFSPROC_T *p)
 8075 {
 8076         fhandle_t fh, *tfhp;
 8077         struct nfsstate *stp;
 8078         struct nfslayout *lyp;
 8079         struct nfslayouthash *lhyp;
 8080         struct nfslockhashhead *hp;
 8081         struct nfslockfile *lfp;
 8082         nfsquad_t clid[NFSCLIDVECSIZE];
 8083         int clidcnt, ret;
 8084 
 8085         ret = nfsvno_getfh(vp, &fh, p);
 8086         if (ret != 0)
 8087                 return (0);
 8088 
 8089         /* First check for a Read/Write Layout. */
 8090         clidcnt = 0;
 8091         lhyp = NFSLAYOUTHASH(&fh);
 8092         NFSLOCKLAYOUT(lhyp);
 8093         TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
 8094                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 8095                     ((lyp->lay_flags & NFSLAY_RW) != 0 ||
 8096                      ((lyp->lay_flags & NFSLAY_READ) != 0 &&
 8097                       nfsrv_pnfsatime != 0))) {
 8098                         if (clidcnt < NFSCLIDVECSIZE)
 8099                                 clid[clidcnt].qval = lyp->lay_clientid.qval;
 8100                         clidcnt++;
 8101                 }
 8102         }
 8103         NFSUNLOCKLAYOUT(lhyp);
 8104         if (clidcnt == 0) {
 8105                 /* None found, so return 0. */
 8106                 return (0);
 8107         }
 8108 
 8109         /* Get the nfslockfile for this fh. */
 8110         NFSLOCKSTATE();
 8111         hp = NFSLOCKHASH(&fh);
 8112         LIST_FOREACH(lfp, hp, lf_hash) {
 8113                 tfhp = &lfp->lf_fh;
 8114                 if (NFSVNO_CMPFH(&fh, tfhp))
 8115                         break;
 8116         }
 8117         if (lfp == NULL) {
 8118                 /* None found, so return 0. */
 8119                 NFSUNLOCKSTATE();
 8120                 return (0);
 8121         }
 8122 
 8123         /* Now, look for a Write delegation for this clientid. */
 8124         LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 8125                 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
 8126                     nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
 8127                         break;
 8128         }
 8129         if (stp != NULL) {
 8130                 /* Found one, so return 1. */
 8131                 NFSUNLOCKSTATE();
 8132                 return (1);
 8133         }
 8134 
 8135         /* No Write delegation, so look for an Open with Write_access. */
 8136         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 8137                 KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0,
 8138                     ("nfsrv_checkdsattr: Non-open in Open list\n"));
 8139                 if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 &&
 8140                     nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
 8141                         break;
 8142         }
 8143         NFSUNLOCKSTATE();
 8144         if (stp != NULL)
 8145                 return (1);
 8146         return (0);
 8147 }
 8148 
 8149 /*
 8150  * Look for a matching clientid in the vector. Return 1 if one might match.
 8151  */
 8152 static int
 8153 nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt)
 8154 {
 8155         int i;
 8156 
 8157         /* If too many for the vector, return 1 since there might be a match. */
 8158         if (clidcnt > NFSCLIDVECSIZE)
 8159                 return (1);
 8160 
 8161         for (i = 0; i < clidcnt; i++)
 8162                 if (clidvec[i].qval == clid.qval)
 8163                         return (1);
 8164         return (0);
 8165 }
 8166 
 8167 /*
 8168  * Check the don't list for "vp" and see if issuing an rw layout is allowed.
 8169  * Return 1 if issuing an rw layout isn't allowed, 0 otherwise.
 8170  */
 8171 static int
 8172 nfsrv_dontlayout(fhandle_t *fhp)
 8173 {
 8174         struct nfsdontlist *mrp;
 8175         int ret;
 8176 
 8177         if (nfsrv_dontlistlen == 0)
 8178                 return (0);
 8179         ret = 0;
 8180         NFSDDONTLISTLOCK();
 8181         LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
 8182                 if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 &&
 8183                     (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) {
 8184                         ret = 1;
 8185                         break;
 8186                 }
 8187         }
 8188         NFSDDONTLISTUNLOCK();
 8189         return (ret);
 8190 }
 8191 
 8192 #define PNFSDS_COPYSIZ  65536
 8193 /*
 8194  * Create a new file on a DS and copy the contents of an extant DS file to it.
 8195  * This can be used for recovery of a DS file onto a recovered DS.
 8196  * The steps are:
 8197  * - When called, the MDS file's vnode is locked, blocking LayoutGet operations.
 8198  * - Disable issuing of read/write layouts for the file via the nfsdontlist,
 8199  *   so that they will be disabled after the MDS file's vnode is unlocked.
 8200  * - Set up the nfsrv_recalllist so that recall of read/write layouts can
 8201  *   be done.
 8202  * - Unlock the MDS file's vnode, so that the client(s) can perform proxied
 8203  *   writes, LayoutCommits and LayoutReturns for the file when completing the
 8204  *   LayoutReturn requested by the LayoutRecall callback.
 8205  * - Issue a LayoutRecall callback for all read/write layouts and wait for
 8206  *   them to be returned. (If the LayoutRecall callback replies
 8207  *   NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.)
 8208  * - Exclusively lock the MDS file's vnode.  This ensures that no proxied
 8209  *   writes are in progress or can occur during the DS file copy.
 8210  *   It also blocks Setattr operations.
 8211  * - Create the file on the recovered mirror.
 8212  * - Copy the file from the operational DS.
 8213  * - Copy any ACL from the MDS file to the new DS file.
 8214  * - Set the modify time of the new DS file to that of the MDS file.
 8215  * - Update the extended attribute for the MDS file.
 8216  * - Enable issuing of rw layouts by deleting the nfsdontlist entry.
 8217  * - The caller will unlock the MDS file's vnode allowing operations
 8218  *   to continue normally, since it is now on the mirror again.
 8219  */
 8220 int
 8221 nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds,
 8222     struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt,
 8223     struct ucred *cred, NFSPROC_T *p)
 8224 {
 8225         struct nfsdontlist *mrp, *nmrp;
 8226         struct nfslayouthash *lhyp;
 8227         struct nfslayout *lyp, *nlyp;
 8228         struct nfslayouthead thl;
 8229         struct mount *mp, *tvmp;
 8230         struct acl *aclp;
 8231         struct vattr va;
 8232         struct timespec mtime;
 8233         fhandle_t fh;
 8234         vnode_t tvp;
 8235         off_t rdpos, wrpos;
 8236         ssize_t aresid;
 8237         char *dat;
 8238         int didprintf, ret, retacl, xfer;
 8239 
 8240         ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp");
 8241         ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp");
 8242         /*
 8243          * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag
 8244          * so that no more RW layouts will get issued.
 8245          */
 8246         ret = nfsvno_getfh(vp, &fh, p);
 8247         if (ret != 0) {
 8248                 NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret);
 8249                 return (ret);
 8250         }
 8251         nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK);
 8252         nmrp->nfsmr_flags = NFSMR_DONTLAYOUT;
 8253         NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh));
 8254         NFSDDONTLISTLOCK();
 8255         LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
 8256                 if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0)
 8257                         break;
 8258         }
 8259         if (mrp == NULL) {
 8260                 LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list);
 8261                 mrp = nmrp;
 8262                 nmrp = NULL;
 8263                 nfsrv_dontlistlen++;
 8264                 NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n");
 8265         } else {
 8266                 NFSDDONTLISTUNLOCK();
 8267                 free(nmrp, M_NFSDSTATE);
 8268                 NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n");
 8269                 return (ENXIO);
 8270         }
 8271         NFSDDONTLISTUNLOCK();
 8272 
 8273         /*
 8274          * Search for all RW layouts for this file.  Move them to the
 8275          * recall list, so they can be recalled and their return noted.
 8276          */
 8277         lhyp = NFSLAYOUTHASH(&fh);
 8278         NFSDRECALLLOCK();
 8279         NFSLOCKLAYOUT(lhyp);
 8280         TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 8281                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 8282                     (lyp->lay_flags & NFSLAY_RW) != 0) {
 8283                         TAILQ_REMOVE(&lhyp->list, lyp, lay_list);
 8284                         TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list);
 8285                         lyp->lay_trycnt = 0;
 8286                 }
 8287         }
 8288         NFSUNLOCKLAYOUT(lhyp);
 8289         NFSDRECALLUNLOCK();
 8290 
 8291         ret = 0;
 8292         mp = tvmp = NULL;
 8293         didprintf = 0;
 8294         TAILQ_INIT(&thl);
 8295         /* Unlock the MDS vp, so that a LayoutReturn can be done on it. */
 8296         NFSVOPUNLOCK(vp);
 8297         /* Now, do a recall for all layouts not yet recalled. */
 8298 tryagain:
 8299         NFSDRECALLLOCK();
 8300         TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 8301                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 8302                     (lyp->lay_flags & NFSLAY_RECALL) == 0) {
 8303                         lyp->lay_flags |= NFSLAY_RECALL;
 8304                         /*
 8305                          * The layout stateid.seqid needs to be incremented
 8306                          * before doing a LAYOUT_RECALL callback.
 8307                          */
 8308                         if (++lyp->lay_stateid.seqid == 0)
 8309                                 lyp->lay_stateid.seqid = 1;
 8310                         NFSDRECALLUNLOCK();
 8311                         nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
 8312                             &lyp->lay_fh, lyp, 0, lyp->lay_type, p);
 8313                         NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n");
 8314                         goto tryagain;
 8315                 }
 8316         }
 8317 
 8318         /* Now wait for them to be returned. */
 8319 tryagain2:
 8320         TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 8321                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) {
 8322                         if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) {
 8323                                 TAILQ_REMOVE(&nfsrv_recalllisthead, lyp,
 8324                                     lay_list);
 8325                                 TAILQ_INSERT_HEAD(&thl, lyp, lay_list);
 8326                                 NFSD_DEBUG(4,
 8327                                     "nfsrv_copymr: layout returned\n");
 8328                         } else {
 8329                                 lyp->lay_trycnt++;
 8330                                 ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR,
 8331                                     PVFS | PCATCH, "nfsmrl", hz);
 8332                                 NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n",
 8333                                     ret);
 8334                                 if (ret == EINTR || ret == ERESTART)
 8335                                         break;
 8336                                 if ((lyp->lay_flags & NFSLAY_RETURNED) == 0) {
 8337                                         /*
 8338                                          * Give up after 60sec and return
 8339                                          * ENXIO, failing the copymr.
 8340                                          * This layout will remain on the
 8341                                          * recalllist.  It can only be cleared
 8342                                          * by restarting the nfsd.
 8343                                          * This seems the safe way to handle
 8344                                          * it, since it cannot be safely copied
 8345                                          * with an outstanding RW layout.
 8346                                          */
 8347                                         if (lyp->lay_trycnt >= 60) {
 8348                                                 ret = ENXIO;
 8349                                                 break;
 8350                                         }
 8351                                         if (didprintf == 0) {
 8352                                                 printf("nfsrv_copymr: layout "
 8353                                                     "not returned\n");
 8354                                                 didprintf = 1;
 8355                                         }
 8356                                 }
 8357                         }
 8358                         goto tryagain2;
 8359                 }
 8360         }
 8361         NFSDRECALLUNLOCK();
 8362         /* We can now get rid of the layouts that have been returned. */
 8363         TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp)
 8364                 nfsrv_freelayout(&thl, lyp);
 8365 
 8366         /*
 8367          * Do the vn_start_write() calls here, before the MDS vnode is
 8368          * locked and the tvp is created (locked) in the NFS file system
 8369          * that dvp is in.
 8370          * For tvmp, this probably isn't necessary, since it will be an
 8371          * NFS mount and they are not suspendable at this time.
 8372          */
 8373         if (ret == 0)
 8374                 ret = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 8375         if (ret == 0) {
 8376                 tvmp = dvp->v_mount;
 8377                 ret = vn_start_write(NULL, &tvmp, V_WAIT | PCATCH);
 8378         }
 8379 
 8380         /*
 8381          * LK_EXCLUSIVE lock the MDS vnode, so that any
 8382          * proxied writes through the MDS will be blocked until we have
 8383          * completed the copy and update of the extended attributes.
 8384          * This will also ensure that any attributes and ACL will not be
 8385          * changed until the copy is complete.
 8386          */
 8387         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 8388         if (ret == 0 && VN_IS_DOOMED(vp)) {
 8389                 NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n");
 8390                 ret = ESTALE;
 8391         }
 8392 
 8393         /* Create the data file on the recovered DS. */
 8394         if (ret == 0)
 8395                 ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp);
 8396 
 8397         /* Copy the DS file, if created successfully. */
 8398         if (ret == 0) {
 8399                 /*
 8400                  * Get any NFSv4 ACL on the MDS file, so that it can be set
 8401                  * on the new DS file.
 8402                  */
 8403                 aclp = acl_alloc(M_WAITOK | M_ZERO);
 8404                 retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
 8405                 if (retacl != 0 && retacl != ENOATTR)
 8406                         NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl);
 8407                 dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK);
 8408                 /* Malloc a block of 0s used to check for holes. */
 8409                 if (nfsrv_zeropnfsdat == NULL)
 8410                         nfsrv_zeropnfsdat = malloc(PNFSDS_COPYSIZ, M_TEMP,
 8411                             M_WAITOK | M_ZERO);
 8412                 rdpos = wrpos = 0;
 8413                 ret = VOP_GETATTR(fvp, &va, cred);
 8414                 aresid = 0;
 8415                 while (ret == 0 && aresid == 0) {
 8416                         ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ,
 8417                             rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL,
 8418                             &aresid, p);
 8419                         xfer = PNFSDS_COPYSIZ - aresid;
 8420                         if (ret == 0 && xfer > 0) {
 8421                                 rdpos += xfer;
 8422                                 /*
 8423                                  * Skip the write for holes, except for the
 8424                                  * last block.
 8425                                  */
 8426                                 if (xfer < PNFSDS_COPYSIZ || rdpos ==
 8427                                     va.va_size || NFSBCMP(dat,
 8428                                     nfsrv_zeropnfsdat, PNFSDS_COPYSIZ) != 0)
 8429                                         ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer,
 8430                                             wrpos, UIO_SYSSPACE, IO_NODELOCKED,
 8431                                             cred, NULL, NULL, p);
 8432                                 if (ret == 0)
 8433                                         wrpos += xfer;
 8434                         }
 8435                 }
 8436 
 8437                 /* If there is an ACL and the copy succeeded, set the ACL. */
 8438                 if (ret == 0 && retacl == 0) {
 8439                         ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p);
 8440                         /*
 8441                          * Don't consider these as errors, since VOP_GETACL()
 8442                          * can return an ACL when they are not actually
 8443                          * supported.  For example, for UFS, VOP_GETACL()
 8444                          * will return a trivial ACL based on the uid/gid/mode
 8445                          * when there is no ACL on the file.
 8446                          * This case should be recognized as a trivial ACL
 8447                          * by UFS's VOP_SETACL() and succeed, but...
 8448                          */
 8449                         if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM)
 8450                                 ret = 0;
 8451                 }
 8452 
 8453                 if (ret == 0)
 8454                         ret = VOP_FSYNC(tvp, MNT_WAIT, p);
 8455 
 8456                 /* Set the DS data file's modify time that of the MDS file. */
 8457                 if (ret == 0)
 8458                         ret = VOP_GETATTR(vp, &va, cred);
 8459                 if (ret == 0) {
 8460                         mtime = va.va_mtime;
 8461                         VATTR_NULL(&va);
 8462                         va.va_mtime = mtime;
 8463                         ret = VOP_SETATTR(tvp, &va, cred);
 8464                 }
 8465 
 8466                 vput(tvp);
 8467                 acl_free(aclp);
 8468                 free(dat, M_TEMP);
 8469         }
 8470         if (tvmp != NULL)
 8471                 vn_finished_write(tvmp);
 8472 
 8473         /* Update the extended attributes for the newly created DS file. */
 8474         if (ret == 0)
 8475                 ret = vn_extattr_set(vp, IO_NODELOCKED,
 8476                     EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
 8477                     sizeof(*wpf) * mirrorcnt, (char *)wpf, p);
 8478         if (mp != NULL)
 8479                 vn_finished_write(mp);
 8480 
 8481         /* Get rid of the dontlist entry, so that Layouts can be issued. */
 8482         NFSDDONTLISTLOCK();
 8483         LIST_REMOVE(mrp, nfsmr_list);
 8484         NFSDDONTLISTUNLOCK();
 8485         free(mrp, M_NFSDSTATE);
 8486         return (ret);
 8487 }
 8488 
 8489 /*
 8490  * Create a data storage file on the recovered DS.
 8491  */
 8492 static int
 8493 nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
 8494     vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
 8495     vnode_t *tvpp)
 8496 {
 8497         struct vattr va, nva;
 8498         int error;
 8499 
 8500         /* Make data file name based on FH. */
 8501         error = VOP_GETATTR(vp, &va, cred);
 8502         if (error == 0) {
 8503                 /* Set the attributes for "vp" to Setattr the DS vp. */
 8504                 VATTR_NULL(&nva);
 8505                 nva.va_uid = va.va_uid;
 8506                 nva.va_gid = va.va_gid;
 8507                 nva.va_mode = va.va_mode;
 8508                 nva.va_size = 0;
 8509                 VATTR_NULL(&va);
 8510                 va.va_type = VREG;
 8511                 va.va_mode = nva.va_mode;
 8512                 NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf);
 8513                 error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL,
 8514                     pf->dsf_filename, cred, p, tvpp);
 8515         }
 8516         return (error);
 8517 }
 8518 
 8519 /*
 8520  * Look up the MDS file shared locked, and then get the extended attribute
 8521  * to find the extant DS file to be copied to the new mirror.
 8522  * If successful, *vpp is set to the MDS file's vp and *nvpp is
 8523  * set to a DS data file for the MDS file, both exclusively locked.
 8524  * The "buf" argument has the pnfsdsfile structure from the MDS file
 8525  * in it and buflen is set to its length.
 8526  */
 8527 int
 8528 nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf,
 8529     int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp,
 8530     struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp,
 8531     struct nfsdevice **fdsp)
 8532 {
 8533         struct nameidata nd;
 8534         struct vnode *vp, *curvp;
 8535         struct pnfsdsfile *pf;
 8536         struct nfsmount *nmp, *curnmp;
 8537         int dsdir, error, mirrorcnt, ippos;
 8538 
 8539         vp = NULL;
 8540         curvp = NULL;
 8541         curnmp = NULL;
 8542         *dsp = NULL;
 8543         *fdsp = NULL;
 8544         if (dspathp == NULL && curdspathp != NULL)
 8545                 return (EPERM);
 8546 
 8547         /*
 8548          * Look up the MDS file shared locked.  The lock will be upgraded
 8549          * to an exclusive lock after any rw layouts have been returned.
 8550          */
 8551         NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp);
 8552         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 8553             mdspathp, p);
 8554         error = namei(&nd);
 8555         NFSD_DEBUG(4, "lookup=%d\n", error);
 8556         if (error != 0)
 8557                 return (error);
 8558         if (nd.ni_vp->v_type != VREG) {
 8559                 vput(nd.ni_vp);
 8560                 NFSD_DEBUG(4, "mdspath not reg\n");
 8561                 return (EISDIR);
 8562         }
 8563         vp = nd.ni_vp;
 8564 
 8565         if (curdspathp != NULL) {
 8566                 /*
 8567                  * Look up the current DS path and find the nfsdev structure for
 8568                  * it.
 8569                  */
 8570                 NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp);
 8571                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 8572                     UIO_SYSSPACE, curdspathp, p);
 8573                 error = namei(&nd);
 8574                 NFSD_DEBUG(4, "ds lookup=%d\n", error);
 8575                 if (error != 0) {
 8576                         vput(vp);
 8577                         return (error);
 8578                 }
 8579                 if (nd.ni_vp->v_type != VDIR) {
 8580                         vput(nd.ni_vp);
 8581                         vput(vp);
 8582                         NFSD_DEBUG(4, "curdspath not dir\n");
 8583                         return (ENOTDIR);
 8584                 }
 8585                 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 8586                         vput(nd.ni_vp);
 8587                         vput(vp);
 8588                         NFSD_DEBUG(4, "curdspath not an NFS mount\n");
 8589                         return (ENXIO);
 8590                 }
 8591                 curnmp = VFSTONFS(nd.ni_vp->v_mount);
 8592 
 8593                 /* Search the nfsdev list for a match. */
 8594                 NFSDDSLOCK();
 8595                 *fdsp = nfsv4_findmirror(curnmp);
 8596                 NFSDDSUNLOCK();
 8597                 if (*fdsp == NULL)
 8598                         curnmp = NULL;
 8599                 if (curnmp == NULL) {
 8600                         vput(nd.ni_vp);
 8601                         vput(vp);
 8602                         NFSD_DEBUG(4, "mdscopymr: no current ds\n");
 8603                         return (ENXIO);
 8604                 }
 8605                 curvp = nd.ni_vp;
 8606         }
 8607 
 8608         if (dspathp != NULL) {
 8609                 /* Look up the nfsdev path and find the nfsdev structure. */
 8610                 NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp);
 8611                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 8612                     UIO_SYSSPACE, dspathp, p);
 8613                 error = namei(&nd);
 8614                 NFSD_DEBUG(4, "ds lookup=%d\n", error);
 8615                 if (error != 0) {
 8616                         vput(vp);
 8617                         if (curvp != NULL)
 8618                                 vput(curvp);
 8619                         return (error);
 8620                 }
 8621                 if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) {
 8622                         vput(nd.ni_vp);
 8623                         vput(vp);
 8624                         if (curvp != NULL)
 8625                                 vput(curvp);
 8626                         NFSD_DEBUG(4, "dspath not dir\n");
 8627                         if (nd.ni_vp == curvp)
 8628                                 return (EPERM);
 8629                         return (ENOTDIR);
 8630                 }
 8631                 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 8632                         vput(nd.ni_vp);
 8633                         vput(vp);
 8634                         if (curvp != NULL)
 8635                                 vput(curvp);
 8636                         NFSD_DEBUG(4, "dspath not an NFS mount\n");
 8637                         return (ENXIO);
 8638                 }
 8639                 nmp = VFSTONFS(nd.ni_vp->v_mount);
 8640 
 8641                 /*
 8642                  * Search the nfsdevice list for a match.  If curnmp == NULL,
 8643                  * this is a recovery and there must be a mirror.
 8644                  */
 8645                 NFSDDSLOCK();
 8646                 if (curnmp == NULL)
 8647                         *dsp = nfsrv_findmirroredds(nmp);
 8648                 else
 8649                         *dsp = nfsv4_findmirror(nmp);
 8650                 NFSDDSUNLOCK();
 8651                 if (*dsp == NULL) {
 8652                         vput(nd.ni_vp);
 8653                         vput(vp);
 8654                         if (curvp != NULL)
 8655                                 vput(curvp);
 8656                         NFSD_DEBUG(4, "mdscopymr: no ds\n");
 8657                         return (ENXIO);
 8658                 }
 8659         } else {
 8660                 nd.ni_vp = NULL;
 8661                 nmp = NULL;
 8662         }
 8663 
 8664         /*
 8665          * Get a vp for an available DS data file using the extended
 8666          * attribute on the MDS file.
 8667          * If there is a valid entry for the new DS in the extended attribute
 8668          * on the MDS file (as checked via the nmp argument),
 8669          * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur.
 8670          */
 8671         error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, &mirrorcnt, p,
 8672             NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir);
 8673         if (curvp != NULL)
 8674                 vput(curvp);
 8675         if (nd.ni_vp == NULL) {
 8676                 if (error == 0 && nmp != NULL) {
 8677                         /* Search the nfsdev list for a match. */
 8678                         NFSDDSLOCK();
 8679                         *dsp = nfsrv_findmirroredds(nmp);
 8680                         NFSDDSUNLOCK();
 8681                 }
 8682                 if (error == 0 && (nmp == NULL || *dsp == NULL)) {
 8683                         if (nvpp != NULL && *nvpp != NULL) {
 8684                                 vput(*nvpp);
 8685                                 *nvpp = NULL;
 8686                         }
 8687                         error = ENXIO;
 8688                 }
 8689         } else
 8690                 vput(nd.ni_vp);
 8691 
 8692         /*
 8693          * When dspathp != NULL and curdspathp == NULL, this is a recovery
 8694          * and is only allowed if there is a 0.0.0.0 IP address entry.
 8695          * When curdspathp != NULL, the ippos will be set to that entry.
 8696          */
 8697         if (error == 0 && dspathp != NULL && ippos == -1) {
 8698                 if (nvpp != NULL && *nvpp != NULL) {
 8699                         vput(*nvpp);
 8700                         *nvpp = NULL;
 8701                 }
 8702                 error = ENXIO;
 8703         }
 8704         if (error == 0) {
 8705                 *vpp = vp;
 8706 
 8707                 pf = (struct pnfsdsfile *)buf;
 8708                 if (ippos == -1) {
 8709                         /* If no zeroip pnfsdsfile, add one. */
 8710                         ippos = *buflenp / sizeof(*pf);
 8711                         *buflenp += sizeof(*pf);
 8712                         pf += ippos;
 8713                         pf->dsf_dir = dsdir;
 8714                         strlcpy(pf->dsf_filename, fname,
 8715                             sizeof(pf->dsf_filename));
 8716                 } else
 8717                         pf += ippos;
 8718                 *pfp = pf;
 8719         } else
 8720                 vput(vp);
 8721         return (error);
 8722 }
 8723 
 8724 /*
 8725  * Search for a matching pnfsd mirror device structure, base on the nmp arg.
 8726  * Return one if found, NULL otherwise.
 8727  */
 8728 static struct nfsdevice *
 8729 nfsrv_findmirroredds(struct nfsmount *nmp)
 8730 {
 8731         struct nfsdevice *ds, *fndds;
 8732         int fndmirror;
 8733 
 8734         mtx_assert(NFSDDSMUTEXPTR, MA_OWNED);
 8735         /*
 8736          * Search the DS server list for a match with nmp.
 8737          * Remove the DS entry if found and there is a mirror.
 8738          */
 8739         fndds = NULL;
 8740         fndmirror = 0;
 8741         if (nfsrv_devidcnt == 0)
 8742                 return (fndds);
 8743         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 8744                 if (ds->nfsdev_nmp == nmp) {
 8745                         NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n");
 8746                         fndds = ds;
 8747                         break;
 8748                 }
 8749         }
 8750         if (fndds == NULL)
 8751                 return (fndds);
 8752         if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
 8753                 fndmirror = 1;
 8754         else if (fndds->nfsdev_mdsisset != 0) {
 8755                 /* For the fsid is set case, search for a mirror. */
 8756                 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 8757                         if (ds != fndds && ds->nfsdev_nmp != NULL &&
 8758                             ds->nfsdev_mdsisset != 0 &&
 8759                             fsidcmp(&ds->nfsdev_mdsfsid,
 8760                             &fndds->nfsdev_mdsfsid) == 0) {
 8761                                 fndmirror = 1;
 8762                                 break;
 8763                         }
 8764                 }
 8765         }
 8766         if (fndmirror == 0) {
 8767                 NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n");
 8768                 return (NULL);
 8769         }
 8770         return (fndds);
 8771 }
Cache object: 9d75de58375cb87068c7f36f890604d7
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/fs/nfsserver/nfs_nfsdstate.c

FreeBSD/Linux Kernel Cross Reference
sys/fs/nfsserver/nfs_nfsdstate.c