The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/nfsserver/nfs_nfsdstate.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2009 Rick Macklem, University of Guelph
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  *
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 #include "opt_inet.h"
   34 #include "opt_inet6.h"
   35 #include <sys/extattr.h>
   36 #include <fs/nfs/nfsport.h>
   37 
   38 struct nfsrv_stablefirst nfsrv_stablefirst;
   39 int nfsrv_issuedelegs = 0;
   40 int nfsrv_dolocallocks = 0;
   41 struct nfsv4lock nfsv4rootfs_lock;
   42 time_t nfsdev_time = 0;
   43 int nfsrv_layouthashsize;
   44 volatile int nfsrv_layoutcnt = 0;
   45 
   46 extern int newnfs_numnfsd;
   47 extern struct nfsstatsv1 nfsstatsv1;
   48 extern int nfsrv_lease;
   49 extern struct timeval nfsboottime;
   50 extern u_int32_t newnfs_true, newnfs_false;
   51 extern struct mtx nfsrv_dslock_mtx;
   52 extern struct mtx nfsrv_recalllock_mtx;
   53 extern struct mtx nfsrv_dontlistlock_mtx;
   54 extern int nfsd_debuglevel;
   55 extern u_int nfsrv_dsdirsize;
   56 extern struct nfsdevicehead nfsrv_devidhead;
   57 extern int nfsrv_doflexfile;
   58 extern int nfsrv_maxpnfsmirror;
   59 NFSV4ROOTLOCKMUTEX;
   60 NFSSTATESPINLOCK;
   61 extern struct nfsdontlisthead nfsrv_dontlisthead;
   62 extern volatile int nfsrv_devidcnt;
   63 extern struct nfslayouthead nfsrv_recalllisthead;
   64 extern char *nfsrv_zeropnfsdat;
   65 
   66 SYSCTL_DECL(_vfs_nfsd);
   67 int     nfsrv_statehashsize = NFSSTATEHASHSIZE;
   68 SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
   69     &nfsrv_statehashsize, 0,
   70     "Size of state hash table set via loader.conf");
   71 
   72 int     nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
   73 SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
   74     &nfsrv_clienthashsize, 0,
   75     "Size of client hash table set via loader.conf");
   76 
   77 int     nfsrv_lockhashsize = NFSLOCKHASHSIZE;
   78 SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
   79     &nfsrv_lockhashsize, 0,
   80     "Size of file handle hash table set via loader.conf");
   81 
   82 int     nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
   83 SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
   84     &nfsrv_sessionhashsize, 0,
   85     "Size of session hash table set via loader.conf");
   86 
   87 int     nfsrv_layouthighwater = NFSLAYOUTHIGHWATER;
   88 SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN,
   89     &nfsrv_layouthighwater, 0,
   90     "High water mark for number of layouts set via loader.conf");
   91 
   92 static int      nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
   93 SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
   94     &nfsrv_v4statelimit, 0,
   95     "High water limit for NFSv4 opens+locks+delegations");
   96 
   97 static int      nfsrv_writedelegifpos = 0;
   98 SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
   99     &nfsrv_writedelegifpos, 0,
  100     "Issue a write delegation for read opens if possible");
  101 
  102 static int      nfsrv_allowreadforwriteopen = 1;
  103 SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW,
  104     &nfsrv_allowreadforwriteopen, 0,
  105     "Allow Reads to be done with Write Access StateIDs");
  106 
  107 int     nfsrv_pnfsatime = 0;
  108 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW,
  109     &nfsrv_pnfsatime, 0,
  110     "For pNFS service, do Getattr ops to keep atime up-to-date");
  111 
  112 int     nfsrv_flexlinuxhack = 0;
  113 SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
  114     &nfsrv_flexlinuxhack, 0,
  115     "For Linux clients, hack around Flex File Layout bug");
  116 
  117 /*
  118  * Hash lists for nfs V4.
  119  */
  120 struct nfsclienthashhead        *nfsclienthash;
  121 struct nfslockhashhead          *nfslockhash;
  122 struct nfssessionhash           *nfssessionhash;
  123 struct nfslayouthash            *nfslayouthash;
  124 volatile int nfsrv_dontlistlen = 0;
  125 
  126 static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
  127 static time_t nfsrvboottime;
  128 static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
  129 static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
  130 static int nfsrv_nogsscallback = 0;
  131 static volatile int nfsrv_writedelegcnt = 0;
  132 static int nfsrv_faildscnt;
  133 
  134 /* local functions */
  135 static void nfsrv_dumpaclient(struct nfsclient *clp,
  136     struct nfsd_dumpclients *dumpp);
  137 static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
  138     NFSPROC_T *p);
  139 static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
  140     NFSPROC_T *p);
  141 static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
  142     NFSPROC_T *p);
  143 static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
  144     int cansleep, NFSPROC_T *p);
  145 static void nfsrv_freenfslock(struct nfslock *lop);
  146 static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
  147 static void nfsrv_freedeleg(struct nfsstate *);
  148 static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, 
  149     u_int32_t flags, struct nfsstate **stpp);
  150 static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
  151     struct nfsstate **stpp);
  152 static int nfsrv_getlockfh(vnode_t vp, u_short flags,
  153     struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
  154 static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
  155     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
  156 static void nfsrv_insertlock(struct nfslock *new_lop,
  157     struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
  158 static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
  159     struct nfslock **other_lopp, struct nfslockfile *lfp);
  160 static int nfsrv_getipnumber(u_char *cp);
  161 static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
  162     nfsv4stateid_t *stateidp, int specialid);
  163 static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
  164     u_int32_t flags);
  165 static int nfsrv_docallback(struct nfsclient *clp, int procnum,
  166     nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
  167     struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p);
  168 static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
  169     uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
  170     int *slotposp);
  171 static u_int32_t nfsrv_nextclientindex(void);
  172 static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
  173 static void nfsrv_markstable(struct nfsclient *clp);
  174 static void nfsrv_markreclaim(struct nfsclient *clp);
  175 static int nfsrv_checkstable(struct nfsclient *clp);
  176 static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct 
  177     vnode *vp, NFSPROC_T *p);
  178 static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
  179     NFSPROC_T *p, vnode_t vp);
  180 static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
  181     struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
  182 static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
  183     struct nfsclient *clp);
  184 static time_t nfsrv_leaseexpiry(void);
  185 static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
  186 static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
  187     struct nfsstate *stp, struct nfsrvcache *op);
  188 static int nfsrv_nootherstate(struct nfsstate *stp);
  189 static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
  190     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
  191 static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
  192     uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
  193 static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
  194     int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
  195     NFSPROC_T *p);
  196 static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
  197     NFSPROC_T *p);
  198 static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
  199     uint64_t first, uint64_t end);
  200 static void nfsrv_locklf(struct nfslockfile *lfp);
  201 static void nfsrv_unlocklf(struct nfslockfile *lfp);
  202 static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
  203 static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid);
  204 static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
  205     int dont_replycache, struct nfsdsession **sepp, int *slotposp);
  206 static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
  207 static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
  208     nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p);
  209 static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp);
  210 static void nfsrv_freelayoutlist(nfsquad_t clientid);
  211 static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype,
  212     int iomode);
  213 static void nfsrv_freealllayouts(void);
  214 static void nfsrv_freedevid(struct nfsdevice *ds);
  215 static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
  216     struct nfsdevice **dsp);
  217 static int nfsrv_delds(char *devid, NFSPROC_T *p);
  218 static void nfsrv_deleteds(struct nfsdevice *fndds);
  219 static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost);
  220 static void nfsrv_freealldevids(void);
  221 static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp,
  222     int maxcnt, NFSPROC_T *p);
  223 static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp,
  224     fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype,
  225     NFSPROC_T *p);
  226 static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
  227     NFSPROC_T *, struct nfslayout **lypp);
  228 static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt);
  229 static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode,
  230     fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
  231 static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode,
  232     int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
  233 static int nfsrv_dontlayout(fhandle_t *fhp);
  234 static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
  235     vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
  236     vnode_t *tvpp);
  237 static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
  238 
  239 /*
  240  * Scan the client list for a match and either return the current one,
  241  * create a new entry or return an error.
  242  * If returning a non-error, the clp structure must either be linked into
  243  * the client list or free'd.
  244  */
  245 int
  246 nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
  247     nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
  248 {
  249         struct nfsclient *clp = NULL, *new_clp = *new_clpp;
  250         int i, error = 0, ret;
  251         struct nfsstate *stp, *tstp;
  252 #ifdef INET
  253         struct sockaddr_in *sin, *rin;
  254 #endif
  255 #ifdef INET6
  256         struct sockaddr_in6 *sin6, *rin6;
  257 #endif
  258         struct nfsdsession *sep, *nsep;
  259         int zapit = 0, gotit, hasstate = 0, igotlock;
  260         static u_int64_t confirm_index = 0;
  261 
  262         /*
  263          * Check for state resource limit exceeded.
  264          */
  265         if (nfsrv_openpluslock > nfsrv_v4statelimit) {
  266                 error = NFSERR_RESOURCE;
  267                 goto out;
  268         }
  269 
  270         if (nfsrv_issuedelegs == 0 ||
  271             ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
  272                 /*
  273                  * Don't do callbacks when delegations are disabled or
  274                  * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
  275                  * If establishing a callback connection is attempted
  276                  * when a firewall is blocking the callback path, the
  277                  * server may wait too long for the connect attempt to
  278                  * succeed during the Open. Some clients, such as Linux,
  279                  * may timeout and give up on the Open before the server
  280                  * replies. Also, since AUTH_GSS callbacks are not
  281                  * yet interoperability tested, they might cause the
  282                  * server to crap out, if they get past the Init call to
  283                  * the client.
  284                  */
  285                 new_clp->lc_program = 0;
  286 
  287         /* Lock out other nfsd threads */
  288         NFSLOCKV4ROOTMUTEX();
  289         nfsv4_relref(&nfsv4rootfs_lock);
  290         do {
  291                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  292                     NFSV4ROOTLOCKMUTEXPTR, NULL);
  293         } while (!igotlock);
  294         NFSUNLOCKV4ROOTMUTEX();
  295 
  296         /*
  297          * Search for a match in the client list.
  298          */
  299         gotit = i = 0;
  300         while (i < nfsrv_clienthashsize && !gotit) {
  301             LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
  302                 if (new_clp->lc_idlen == clp->lc_idlen &&
  303                     !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
  304                         gotit = 1;
  305                         break;
  306                 }
  307             }
  308             if (gotit == 0)
  309                 i++;
  310         }
  311         if (!gotit ||
  312             (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
  313                 if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
  314                         /*
  315                          * For NFSv4.1, if confirmp->lval[1] is non-zero, the
  316                          * client is trying to update a confirmed clientid.
  317                          */
  318                         NFSLOCKV4ROOTMUTEX();
  319                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  320                         NFSUNLOCKV4ROOTMUTEX();
  321                         confirmp->lval[1] = 0;
  322                         error = NFSERR_NOENT;
  323                         goto out;
  324                 }
  325                 /*
  326                  * Get rid of the old one.
  327                  */
  328                 if (i != nfsrv_clienthashsize) {
  329                         LIST_REMOVE(clp, lc_hash);
  330                         nfsrv_cleanclient(clp, p);
  331                         nfsrv_freedeleglist(&clp->lc_deleg);
  332                         nfsrv_freedeleglist(&clp->lc_olddeleg);
  333                         zapit = 1;
  334                 }
  335                 /*
  336                  * Add it after assigning a client id to it.
  337                  */
  338                 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
  339                 if ((nd->nd_flag & ND_NFSV41) != 0)
  340                         new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
  341                             ++confirm_index;
  342                 else
  343                         confirmp->qval = new_clp->lc_confirm.qval =
  344                             ++confirm_index;
  345                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
  346                     (u_int32_t)nfsrvboottime;
  347                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
  348                     nfsrv_nextclientindex();
  349                 new_clp->lc_stateindex = 0;
  350                 new_clp->lc_statemaxindex = 0;
  351                 new_clp->lc_cbref = 0;
  352                 new_clp->lc_expiry = nfsrv_leaseexpiry();
  353                 LIST_INIT(&new_clp->lc_open);
  354                 LIST_INIT(&new_clp->lc_deleg);
  355                 LIST_INIT(&new_clp->lc_olddeleg);
  356                 LIST_INIT(&new_clp->lc_session);
  357                 for (i = 0; i < nfsrv_statehashsize; i++)
  358                         LIST_INIT(&new_clp->lc_stateid[i]);
  359                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
  360                     lc_hash);
  361                 nfsstatsv1.srvclients++;
  362                 nfsrv_openpluslock++;
  363                 nfsrv_clients++;
  364                 NFSLOCKV4ROOTMUTEX();
  365                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  366                 NFSUNLOCKV4ROOTMUTEX();
  367                 if (zapit)
  368                         nfsrv_zapclient(clp, p);
  369                 *new_clpp = NULL;
  370                 goto out;
  371         }
  372 
  373         /*
  374          * Now, handle the cases where the id is already issued.
  375          */
  376         if (nfsrv_notsamecredname(nd, clp)) {
  377             /*
  378              * Check to see if there is expired state that should go away.
  379              */
  380             if (clp->lc_expiry < NFSD_MONOSEC &&
  381                 (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
  382                 nfsrv_cleanclient(clp, p);
  383                 nfsrv_freedeleglist(&clp->lc_deleg);
  384             }
  385 
  386             /*
  387              * If there is outstanding state, then reply NFSERR_CLIDINUSE per
  388              * RFC3530 Sec. 8.1.2 last para.
  389              */
  390             if (!LIST_EMPTY(&clp->lc_deleg)) {
  391                 hasstate = 1;
  392             } else if (LIST_EMPTY(&clp->lc_open)) {
  393                 hasstate = 0;
  394             } else {
  395                 hasstate = 0;
  396                 /* Look for an Open on the OpenOwner */
  397                 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
  398                     if (!LIST_EMPTY(&stp->ls_open)) {
  399                         hasstate = 1;
  400                         break;
  401                     }
  402                 }
  403             }
  404             if (hasstate) {
  405                 /*
  406                  * If the uid doesn't match, return NFSERR_CLIDINUSE after
  407                  * filling out the correct ipaddr and portnum.
  408                  */
  409                 switch (clp->lc_req.nr_nam->sa_family) {
  410 #ifdef INET
  411                 case AF_INET:
  412                         sin = (struct sockaddr_in *)new_clp->lc_req.nr_nam;
  413                         rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
  414                         sin->sin_addr.s_addr = rin->sin_addr.s_addr;
  415                         sin->sin_port = rin->sin_port;
  416                         break;
  417 #endif
  418 #ifdef INET6
  419                 case AF_INET6:
  420                         sin6 = (struct sockaddr_in6 *)new_clp->lc_req.nr_nam;
  421                         rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
  422                         sin6->sin6_addr = rin6->sin6_addr;
  423                         sin6->sin6_port = rin6->sin6_port;
  424                         break;
  425 #endif
  426                 }
  427                 NFSLOCKV4ROOTMUTEX();
  428                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  429                 NFSUNLOCKV4ROOTMUTEX();
  430                 error = NFSERR_CLIDINUSE;
  431                 goto out;
  432             }
  433         }
  434 
  435         if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
  436                 /*
  437                  * If the verifier has changed, the client has rebooted
  438                  * and a new client id is issued. The old state info
  439                  * can be thrown away once the SETCLIENTID_CONFIRM occurs.
  440                  */
  441                 LIST_REMOVE(clp, lc_hash);
  442 
  443                 /* Get rid of all sessions on this clientid. */
  444                 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) {
  445                         ret = nfsrv_freesession(sep, NULL);
  446                         if (ret != 0)
  447                                 printf("nfsrv_setclient: verifier changed free"
  448                                     " session failed=%d\n", ret);
  449                 }
  450 
  451                 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
  452                 if ((nd->nd_flag & ND_NFSV41) != 0)
  453                         new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
  454                             ++confirm_index;
  455                 else
  456                         confirmp->qval = new_clp->lc_confirm.qval =
  457                             ++confirm_index;
  458                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
  459                     nfsrvboottime;
  460                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
  461                     nfsrv_nextclientindex();
  462                 new_clp->lc_stateindex = 0;
  463                 new_clp->lc_statemaxindex = 0;
  464                 new_clp->lc_cbref = 0;
  465                 new_clp->lc_expiry = nfsrv_leaseexpiry();
  466 
  467                 /*
  468                  * Save the state until confirmed.
  469                  */
  470                 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
  471                 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
  472                         tstp->ls_clp = new_clp;
  473                 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
  474                 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
  475                         tstp->ls_clp = new_clp;
  476                 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
  477                     ls_list);
  478                 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
  479                         tstp->ls_clp = new_clp;
  480                 for (i = 0; i < nfsrv_statehashsize; i++) {
  481                         LIST_NEWHEAD(&new_clp->lc_stateid[i],
  482                             &clp->lc_stateid[i], ls_hash);
  483                         LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
  484                                 tstp->ls_clp = new_clp;
  485                 }
  486                 LIST_INIT(&new_clp->lc_session);
  487                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
  488                     lc_hash);
  489                 nfsstatsv1.srvclients++;
  490                 nfsrv_openpluslock++;
  491                 nfsrv_clients++;
  492                 NFSLOCKV4ROOTMUTEX();
  493                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  494                 NFSUNLOCKV4ROOTMUTEX();
  495 
  496                 /*
  497                  * Must wait until any outstanding callback on the old clp
  498                  * completes.
  499                  */
  500                 NFSLOCKSTATE();
  501                 while (clp->lc_cbref) {
  502                         clp->lc_flags |= LCL_WAKEUPWANTED;
  503                         (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
  504                             "nfsd clp", 10 * hz);
  505                 }
  506                 NFSUNLOCKSTATE();
  507                 nfsrv_zapclient(clp, p);
  508                 *new_clpp = NULL;
  509                 goto out;
  510         }
  511 
  512         /* For NFSv4.1, mark that we found a confirmed clientid. */
  513         if ((nd->nd_flag & ND_NFSV41) != 0) {
  514                 clientidp->lval[0] = clp->lc_clientid.lval[0];
  515                 clientidp->lval[1] = clp->lc_clientid.lval[1];
  516                 confirmp->lval[0] = 0;  /* Ignored by client */
  517                 confirmp->lval[1] = 1;
  518         } else {
  519                 /*
  520                  * id and verifier match, so update the net address info
  521                  * and get rid of any existing callback authentication
  522                  * handle, so a new one will be acquired.
  523                  */
  524                 LIST_REMOVE(clp, lc_hash);
  525                 new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
  526                 new_clp->lc_expiry = nfsrv_leaseexpiry();
  527                 confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
  528                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
  529                     clp->lc_clientid.lval[0];
  530                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
  531                     clp->lc_clientid.lval[1];
  532                 new_clp->lc_delegtime = clp->lc_delegtime;
  533                 new_clp->lc_stateindex = clp->lc_stateindex;
  534                 new_clp->lc_statemaxindex = clp->lc_statemaxindex;
  535                 new_clp->lc_cbref = 0;
  536                 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
  537                 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
  538                         tstp->ls_clp = new_clp;
  539                 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
  540                 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
  541                         tstp->ls_clp = new_clp;
  542                 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
  543                 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
  544                         tstp->ls_clp = new_clp;
  545                 for (i = 0; i < nfsrv_statehashsize; i++) {
  546                         LIST_NEWHEAD(&new_clp->lc_stateid[i],
  547                             &clp->lc_stateid[i], ls_hash);
  548                         LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
  549                                 tstp->ls_clp = new_clp;
  550                 }
  551                 LIST_INIT(&new_clp->lc_session);
  552                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
  553                     lc_hash);
  554                 nfsstatsv1.srvclients++;
  555                 nfsrv_openpluslock++;
  556                 nfsrv_clients++;
  557         }
  558         NFSLOCKV4ROOTMUTEX();
  559         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  560         NFSUNLOCKV4ROOTMUTEX();
  561 
  562         if ((nd->nd_flag & ND_NFSV41) == 0) {
  563                 /*
  564                  * Must wait until any outstanding callback on the old clp
  565                  * completes.
  566                  */
  567                 NFSLOCKSTATE();
  568                 while (clp->lc_cbref) {
  569                         clp->lc_flags |= LCL_WAKEUPWANTED;
  570                         (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
  571                             "nfsdclp", 10 * hz);
  572                 }
  573                 NFSUNLOCKSTATE();
  574                 nfsrv_zapclient(clp, p);
  575                 *new_clpp = NULL;
  576         }
  577 
  578 out:
  579         NFSEXITCODE2(error, nd);
  580         return (error);
  581 }
  582 
  583 /*
  584  * Check to see if the client id exists and optionally confirm it.
  585  */
  586 int
  587 nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
  588     struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
  589     struct nfsrv_descript *nd, NFSPROC_T *p)
  590 {
  591         struct nfsclient *clp;
  592         struct nfsstate *stp;
  593         int i;
  594         struct nfsclienthashhead *hp;
  595         int error = 0, igotlock, doneok;
  596         struct nfssessionhash *shp;
  597         struct nfsdsession *sep;
  598         uint64_t sessid[2];
  599         static uint64_t next_sess = 0;
  600 
  601         if (clpp)
  602                 *clpp = NULL;
  603         if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
  604             opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) {
  605                 error = NFSERR_STALECLIENTID;
  606                 goto out;
  607         }
  608 
  609         /*
  610          * If called with opflags == CLOPS_RENEW, the State Lock is
  611          * already held. Otherwise, we need to get either that or,
  612          * for the case of Confirm, lock out the nfsd threads.
  613          */
  614         if (opflags & CLOPS_CONFIRM) {
  615                 NFSLOCKV4ROOTMUTEX();
  616                 nfsv4_relref(&nfsv4rootfs_lock);
  617                 do {
  618                         igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  619                             NFSV4ROOTLOCKMUTEXPTR, NULL);
  620                 } while (!igotlock);
  621                 /*
  622                  * Create a new sessionid here, since we need to do it where
  623                  * there is a mutex held to serialize update of next_sess.
  624                  */
  625                 if ((nd->nd_flag & ND_NFSV41) != 0) {
  626                         sessid[0] = ++next_sess;
  627                         sessid[1] = clientid.qval;
  628                 }
  629                 NFSUNLOCKV4ROOTMUTEX();
  630         } else if (opflags != CLOPS_RENEW) {
  631                 NFSLOCKSTATE();
  632         }
  633 
  634         /* For NFSv4.1, the clp is acquired from the associated session. */
  635         if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
  636             opflags == CLOPS_RENEW) {
  637                 clp = NULL;
  638                 if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
  639                         shp = NFSSESSIONHASH(nd->nd_sessionid);
  640                         NFSLOCKSESSION(shp);
  641                         sep = nfsrv_findsession(nd->nd_sessionid);
  642                         if (sep != NULL)
  643                                 clp = sep->sess_clp;
  644                         NFSUNLOCKSESSION(shp);
  645                 }
  646         } else {
  647                 hp = NFSCLIENTHASH(clientid);
  648                 LIST_FOREACH(clp, hp, lc_hash) {
  649                         if (clp->lc_clientid.lval[1] == clientid.lval[1])
  650                                 break;
  651                 }
  652         }
  653         if (clp == NULL) {
  654                 if (opflags & CLOPS_CONFIRM)
  655                         error = NFSERR_STALECLIENTID;
  656                 else
  657                         error = NFSERR_EXPIRED;
  658         } else if (clp->lc_flags & LCL_ADMINREVOKED) {
  659                 /*
  660                  * If marked admin revoked, just return the error.
  661                  */
  662                 error = NFSERR_ADMINREVOKED;
  663         }
  664         if (error) {
  665                 if (opflags & CLOPS_CONFIRM) {
  666                         NFSLOCKV4ROOTMUTEX();
  667                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  668                         NFSUNLOCKV4ROOTMUTEX();
  669                 } else if (opflags != CLOPS_RENEW) {
  670                         NFSUNLOCKSTATE();
  671                 }
  672                 goto out;
  673         }
  674 
  675         /*
  676          * Perform any operations specified by the opflags.
  677          */
  678         if (opflags & CLOPS_CONFIRM) {
  679                 if ((nd->nd_flag & ND_NFSV41) != 0 &&
  680                      clp->lc_confirm.lval[0] != confirm.lval[0])
  681                         error = NFSERR_SEQMISORDERED;
  682                 else if ((nd->nd_flag & ND_NFSV41) == 0 &&
  683                      clp->lc_confirm.qval != confirm.qval)
  684                         error = NFSERR_STALECLIENTID;
  685                 else if (nfsrv_notsamecredname(nd, clp))
  686                         error = NFSERR_CLIDINUSE;
  687 
  688                 if (!error) {
  689                     if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
  690                         LCL_NEEDSCONFIRM) {
  691                         /*
  692                          * Hang onto the delegations (as old delegations)
  693                          * for an Open with CLAIM_DELEGATE_PREV unless in
  694                          * grace, but get rid of the rest of the state.
  695                          */
  696                         nfsrv_cleanclient(clp, p);
  697                         nfsrv_freedeleglist(&clp->lc_olddeleg);
  698                         if (nfsrv_checkgrace(nd, clp, 0)) {
  699                             /* In grace, so just delete delegations */
  700                             nfsrv_freedeleglist(&clp->lc_deleg);
  701                         } else {
  702                             LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
  703                                 stp->ls_flags |= NFSLCK_OLDDELEG;
  704                             clp->lc_delegtime = NFSD_MONOSEC +
  705                                 nfsrv_lease + NFSRV_LEASEDELTA;
  706                             LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
  707                                 ls_list);
  708                         }
  709                         if ((nd->nd_flag & ND_NFSV41) != 0)
  710                             clp->lc_program = cbprogram;
  711                     }
  712                     clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
  713                     if (clp->lc_program)
  714                         clp->lc_flags |= LCL_NEEDSCBNULL;
  715                     /* For NFSv4.1, link the session onto the client. */
  716                     if (nsep != NULL) {
  717                         /* Hold a reference on the xprt for a backchannel. */
  718                         if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
  719                             != 0) {
  720                             if (clp->lc_req.nr_client == NULL)
  721                                 clp->lc_req.nr_client = (struct __rpc_client *)
  722                                     clnt_bck_create(nd->nd_xprt->xp_socket,
  723                                     cbprogram, NFSV4_CBVERS);
  724                             if (clp->lc_req.nr_client != NULL) {
  725                                 SVC_ACQUIRE(nd->nd_xprt);
  726                                 CLNT_ACQUIRE(clp->lc_req.nr_client);
  727                                 nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
  728                                 /* Disable idle timeout. */
  729                                 nd->nd_xprt->xp_idletimeout = 0;
  730                                 nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
  731                             } else
  732                                 nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
  733                         }
  734                         NFSBCOPY(sessid, nsep->sess_sessionid,
  735                             NFSX_V4SESSIONID);
  736                         NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
  737                             NFSX_V4SESSIONID);
  738                         shp = NFSSESSIONHASH(nsep->sess_sessionid);
  739                         NFSLOCKSTATE();
  740                         NFSLOCKSESSION(shp);
  741                         LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
  742                         LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
  743                         nsep->sess_clp = clp;
  744                         NFSUNLOCKSESSION(shp);
  745                         NFSUNLOCKSTATE();
  746                     }
  747                 }
  748         } else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
  749                 error = NFSERR_EXPIRED;
  750         }
  751 
  752         /*
  753          * If called by the Renew Op, we must check the principal.
  754          */
  755         if (!error && (opflags & CLOPS_RENEWOP)) {
  756             if (nfsrv_notsamecredname(nd, clp)) {
  757                 doneok = 0;
  758                 for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
  759                     LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
  760                         if ((stp->ls_flags & NFSLCK_OPEN) &&
  761                             stp->ls_uid == nd->nd_cred->cr_uid) {
  762                                 doneok = 1;
  763                                 break;
  764                         }
  765                     }
  766                 }
  767                 if (!doneok)
  768                         error = NFSERR_ACCES;
  769             }
  770             if (!error && (clp->lc_flags & LCL_CBDOWN))
  771                 error = NFSERR_CBPATHDOWN;
  772         }
  773         if ((!error || error == NFSERR_CBPATHDOWN) &&
  774              (opflags & CLOPS_RENEW)) {
  775                 clp->lc_expiry = nfsrv_leaseexpiry();
  776         }
  777         if (opflags & CLOPS_CONFIRM) {
  778                 NFSLOCKV4ROOTMUTEX();
  779                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  780                 NFSUNLOCKV4ROOTMUTEX();
  781         } else if (opflags != CLOPS_RENEW) {
  782                 NFSUNLOCKSTATE();
  783         }
  784         if (clpp)
  785                 *clpp = clp;
  786 
  787 out:
  788         NFSEXITCODE2(error, nd);
  789         return (error);
  790 }
  791 
  792 /*
  793  * Perform the NFSv4.1 destroy clientid.
  794  */
  795 int
  796 nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p)
  797 {
  798         struct nfsclient *clp;
  799         struct nfsclienthashhead *hp;
  800         int error = 0, i, igotlock;
  801 
  802         if (nfsrvboottime != clientid.lval[0]) {
  803                 error = NFSERR_STALECLIENTID;
  804                 goto out;
  805         }
  806 
  807         /* Lock out other nfsd threads */
  808         NFSLOCKV4ROOTMUTEX();
  809         nfsv4_relref(&nfsv4rootfs_lock);
  810         do {
  811                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  812                     NFSV4ROOTLOCKMUTEXPTR, NULL);
  813         } while (igotlock == 0);
  814         NFSUNLOCKV4ROOTMUTEX();
  815 
  816         hp = NFSCLIENTHASH(clientid);
  817         LIST_FOREACH(clp, hp, lc_hash) {
  818                 if (clp->lc_clientid.lval[1] == clientid.lval[1])
  819                         break;
  820         }
  821         if (clp == NULL) {
  822                 NFSLOCKV4ROOTMUTEX();
  823                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  824                 NFSUNLOCKV4ROOTMUTEX();
  825                 /* Just return ok, since it is gone. */
  826                 goto out;
  827         }
  828 
  829         /*
  830          * Free up all layouts on the clientid.  Should the client return the
  831          * layouts?
  832          */
  833         nfsrv_freelayoutlist(clientid);
  834 
  835         /* Scan for state on the clientid. */
  836         for (i = 0; i < nfsrv_statehashsize; i++)
  837                 if (!LIST_EMPTY(&clp->lc_stateid[i])) {
  838                         NFSLOCKV4ROOTMUTEX();
  839                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  840                         NFSUNLOCKV4ROOTMUTEX();
  841                         error = NFSERR_CLIENTIDBUSY;
  842                         goto out;
  843                 }
  844         if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
  845                 NFSLOCKV4ROOTMUTEX();
  846                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  847                 NFSUNLOCKV4ROOTMUTEX();
  848                 error = NFSERR_CLIENTIDBUSY;
  849                 goto out;
  850         }
  851 
  852         /* Destroy the clientid and return ok. */
  853         nfsrv_cleanclient(clp, p);
  854         nfsrv_freedeleglist(&clp->lc_deleg);
  855         nfsrv_freedeleglist(&clp->lc_olddeleg);
  856         LIST_REMOVE(clp, lc_hash);
  857         NFSLOCKV4ROOTMUTEX();
  858         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  859         NFSUNLOCKV4ROOTMUTEX();
  860         nfsrv_zapclient(clp, p);
  861 out:
  862         NFSEXITCODE2(error, nd);
  863         return (error);
  864 }
  865 
  866 /*
  867  * Called from the new nfssvc syscall to admin revoke a clientid.
  868  * Returns 0 for success, error otherwise.
  869  */
  870 int
  871 nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
  872 {
  873         struct nfsclient *clp = NULL;
  874         int i, error = 0;
  875         int gotit, igotlock;
  876 
  877         /*
  878          * First, lock out the nfsd so that state won't change while the
  879          * revocation record is being written to the stable storage restart
  880          * file.
  881          */
  882         NFSLOCKV4ROOTMUTEX();
  883         do {
  884                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  885                     NFSV4ROOTLOCKMUTEXPTR, NULL);
  886         } while (!igotlock);
  887         NFSUNLOCKV4ROOTMUTEX();
  888 
  889         /*
  890          * Search for a match in the client list.
  891          */
  892         gotit = i = 0;
  893         while (i < nfsrv_clienthashsize && !gotit) {
  894             LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
  895                 if (revokep->nclid_idlen == clp->lc_idlen &&
  896                     !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
  897                         gotit = 1;
  898                         break;
  899                 }
  900             }
  901             i++;
  902         }
  903         if (!gotit) {
  904                 NFSLOCKV4ROOTMUTEX();
  905                 nfsv4_unlock(&nfsv4rootfs_lock, 0);
  906                 NFSUNLOCKV4ROOTMUTEX();
  907                 error = EPERM;
  908                 goto out;
  909         }
  910 
  911         /*
  912          * Now, write out the revocation record
  913          */
  914         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
  915         nfsrv_backupstable();
  916 
  917         /*
  918          * and clear out the state, marking the clientid revoked.
  919          */
  920         clp->lc_flags &= ~LCL_CALLBACKSON;
  921         clp->lc_flags |= LCL_ADMINREVOKED;
  922         nfsrv_cleanclient(clp, p);
  923         nfsrv_freedeleglist(&clp->lc_deleg);
  924         nfsrv_freedeleglist(&clp->lc_olddeleg);
  925         NFSLOCKV4ROOTMUTEX();
  926         nfsv4_unlock(&nfsv4rootfs_lock, 0);
  927         NFSUNLOCKV4ROOTMUTEX();
  928 
  929 out:
  930         NFSEXITCODE(error);
  931         return (error);
  932 }
  933 
  934 /*
  935  * Dump out stats for all clients. Called from nfssvc(2), that is used
  936  * nfsstatsv1.
  937  */
  938 void
  939 nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
  940 {
  941         struct nfsclient *clp;
  942         int i = 0, cnt = 0;
  943 
  944         /*
  945          * First, get a reference on the nfsv4rootfs_lock so that an
  946          * exclusive lock cannot be acquired while dumping the clients.
  947          */
  948         NFSLOCKV4ROOTMUTEX();
  949         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
  950         NFSUNLOCKV4ROOTMUTEX();
  951         NFSLOCKSTATE();
  952         /*
  953          * Rattle through the client lists until done.
  954          */
  955         while (i < nfsrv_clienthashsize && cnt < maxcnt) {
  956             clp = LIST_FIRST(&nfsclienthash[i]);
  957             while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
  958                 nfsrv_dumpaclient(clp, &dumpp[cnt]);
  959                 cnt++;
  960                 clp = LIST_NEXT(clp, lc_hash);
  961             }
  962             i++;
  963         }
  964         if (cnt < maxcnt)
  965             dumpp[cnt].ndcl_clid.nclid_idlen = 0;
  966         NFSUNLOCKSTATE();
  967         NFSLOCKV4ROOTMUTEX();
  968         nfsv4_relref(&nfsv4rootfs_lock);
  969         NFSUNLOCKV4ROOTMUTEX();
  970 }
  971 
  972 /*
  973  * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
  974  */
  975 static void
  976 nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
  977 {
  978         struct nfsstate *stp, *openstp, *lckownstp;
  979         struct nfslock *lop;
  980         sa_family_t af;
  981 #ifdef INET
  982         struct sockaddr_in *rin;
  983 #endif
  984 #ifdef INET6
  985         struct sockaddr_in6 *rin6;
  986 #endif
  987 
  988         dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
  989         dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
  990         dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
  991         dumpp->ndcl_flags = clp->lc_flags;
  992         dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
  993         NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
  994         af = clp->lc_req.nr_nam->sa_family;
  995         dumpp->ndcl_addrfam = af;
  996         switch (af) {
  997 #ifdef INET
  998         case AF_INET:
  999                 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 1000                 dumpp->ndcl_cbaddr.sin_addr = rin->sin_addr;
 1001                 break;
 1002 #endif
 1003 #ifdef INET6
 1004         case AF_INET6:
 1005                 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 1006                 dumpp->ndcl_cbaddr.sin6_addr = rin6->sin6_addr;
 1007                 break;
 1008 #endif
 1009         }
 1010 
 1011         /*
 1012          * Now, scan the state lists and total up the opens and locks.
 1013          */
 1014         LIST_FOREACH(stp, &clp->lc_open, ls_list) {
 1015             dumpp->ndcl_nopenowners++;
 1016             LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
 1017                 dumpp->ndcl_nopens++;
 1018                 LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
 1019                     dumpp->ndcl_nlockowners++;
 1020                     LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
 1021                         dumpp->ndcl_nlocks++;
 1022                     }
 1023                 }
 1024             }
 1025         }
 1026 
 1027         /*
 1028          * and the delegation lists.
 1029          */
 1030         LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
 1031             dumpp->ndcl_ndelegs++;
 1032         }
 1033         LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
 1034             dumpp->ndcl_nolddelegs++;
 1035         }
 1036 }
 1037 
 1038 /*
 1039  * Dump out lock stats for a file.
 1040  */
 1041 void
 1042 nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
 1043     NFSPROC_T *p)
 1044 {
 1045         struct nfsstate *stp;
 1046         struct nfslock *lop;
 1047         int cnt = 0;
 1048         struct nfslockfile *lfp;
 1049         sa_family_t af;
 1050 #ifdef INET
 1051         struct sockaddr_in *rin;
 1052 #endif
 1053 #ifdef INET6
 1054         struct sockaddr_in6 *rin6;
 1055 #endif
 1056         int ret;
 1057         fhandle_t nfh;
 1058 
 1059         ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
 1060         /*
 1061          * First, get a reference on the nfsv4rootfs_lock so that an
 1062          * exclusive lock on it cannot be acquired while dumping the locks.
 1063          */
 1064         NFSLOCKV4ROOTMUTEX();
 1065         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 1066         NFSUNLOCKV4ROOTMUTEX();
 1067         NFSLOCKSTATE();
 1068         if (!ret)
 1069                 ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
 1070         if (ret) {
 1071                 ldumpp[0].ndlck_clid.nclid_idlen = 0;
 1072                 NFSUNLOCKSTATE();
 1073                 NFSLOCKV4ROOTMUTEX();
 1074                 nfsv4_relref(&nfsv4rootfs_lock);
 1075                 NFSUNLOCKV4ROOTMUTEX();
 1076                 return;
 1077         }
 1078 
 1079         /*
 1080          * For each open share on file, dump it out.
 1081          */
 1082         stp = LIST_FIRST(&lfp->lf_open);
 1083         while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
 1084                 ldumpp[cnt].ndlck_flags = stp->ls_flags;
 1085                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 1086                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 1087                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 1088                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 1089                 ldumpp[cnt].ndlck_owner.nclid_idlen =
 1090                     stp->ls_openowner->ls_ownerlen;
 1091                 NFSBCOPY(stp->ls_openowner->ls_owner,
 1092                     ldumpp[cnt].ndlck_owner.nclid_id,
 1093                     stp->ls_openowner->ls_ownerlen);
 1094                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 1095                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 1096                     stp->ls_clp->lc_idlen);
 1097                 af = stp->ls_clp->lc_req.nr_nam->sa_family;
 1098                 ldumpp[cnt].ndlck_addrfam = af;
 1099                 switch (af) {
 1100 #ifdef INET
 1101                 case AF_INET:
 1102                         rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
 1103                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
 1104                         break;
 1105 #endif
 1106 #ifdef INET6
 1107                 case AF_INET6:
 1108                         rin6 = (struct sockaddr_in6 *)
 1109                             stp->ls_clp->lc_req.nr_nam;
 1110                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
 1111                         break;
 1112 #endif
 1113                 }
 1114                 stp = LIST_NEXT(stp, ls_file);
 1115                 cnt++;
 1116         }
 1117 
 1118         /*
 1119          * and all locks.
 1120          */
 1121         lop = LIST_FIRST(&lfp->lf_lock);
 1122         while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
 1123                 stp = lop->lo_stp;
 1124                 ldumpp[cnt].ndlck_flags = lop->lo_flags;
 1125                 ldumpp[cnt].ndlck_first = lop->lo_first;
 1126                 ldumpp[cnt].ndlck_end = lop->lo_end;
 1127                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 1128                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 1129                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 1130                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 1131                 ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
 1132                 NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
 1133                     stp->ls_ownerlen);
 1134                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 1135                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 1136                     stp->ls_clp->lc_idlen);
 1137                 af = stp->ls_clp->lc_req.nr_nam->sa_family;
 1138                 ldumpp[cnt].ndlck_addrfam = af;
 1139                 switch (af) {
 1140 #ifdef INET
 1141                 case AF_INET:
 1142                         rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
 1143                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
 1144                         break;
 1145 #endif
 1146 #ifdef INET6
 1147                 case AF_INET6:
 1148                         rin6 = (struct sockaddr_in6 *)
 1149                             stp->ls_clp->lc_req.nr_nam;
 1150                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
 1151                         break;
 1152 #endif
 1153                 }
 1154                 lop = LIST_NEXT(lop, lo_lckfile);
 1155                 cnt++;
 1156         }
 1157 
 1158         /*
 1159          * and the delegations.
 1160          */
 1161         stp = LIST_FIRST(&lfp->lf_deleg);
 1162         while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
 1163                 ldumpp[cnt].ndlck_flags = stp->ls_flags;
 1164                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 1165                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 1166                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 1167                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 1168                 ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
 1169                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 1170                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 1171                     stp->ls_clp->lc_idlen);
 1172                 af = stp->ls_clp->lc_req.nr_nam->sa_family;
 1173                 ldumpp[cnt].ndlck_addrfam = af;
 1174                 switch (af) {
 1175 #ifdef INET
 1176                 case AF_INET:
 1177                         rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
 1178                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
 1179                         break;
 1180 #endif
 1181 #ifdef INET6
 1182                 case AF_INET6:
 1183                         rin6 = (struct sockaddr_in6 *)
 1184                             stp->ls_clp->lc_req.nr_nam;
 1185                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
 1186                         break;
 1187 #endif
 1188                 }
 1189                 stp = LIST_NEXT(stp, ls_file);
 1190                 cnt++;
 1191         }
 1192 
 1193         /*
 1194          * If list isn't full, mark end of list by setting the client name
 1195          * to zero length.
 1196          */
 1197         if (cnt < maxcnt)
 1198                 ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
 1199         NFSUNLOCKSTATE();
 1200         NFSLOCKV4ROOTMUTEX();
 1201         nfsv4_relref(&nfsv4rootfs_lock);
 1202         NFSUNLOCKV4ROOTMUTEX();
 1203 }
 1204 
 1205 /*
 1206  * Server timer routine. It can scan any linked list, so long
 1207  * as it holds the spin/mutex lock and there is no exclusive lock on
 1208  * nfsv4rootfs_lock.
 1209  * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
 1210  *  to do this from a callout, since the spin locks work. For
 1211  *  Darwin, I'm not sure what will work correctly yet.)
 1212  * Should be called once per second.
 1213  */
 1214 void
 1215 nfsrv_servertimer(void)
 1216 {
 1217         struct nfsclient *clp, *nclp;
 1218         struct nfsstate *stp, *nstp;
 1219         int got_ref, i;
 1220 
 1221         /*
 1222          * Make sure nfsboottime is set. This is used by V3 as well
 1223          * as V4. Note that nfsboottime is not nfsrvboottime, which is
 1224          * only used by the V4 server for leases.
 1225          */
 1226         if (nfsboottime.tv_sec == 0)
 1227                 NFSSETBOOTTIME(nfsboottime);
 1228 
 1229         /*
 1230          * If server hasn't started yet, just return.
 1231          */
 1232         NFSLOCKSTATE();
 1233         if (nfsrv_stablefirst.nsf_eograce == 0) {
 1234                 NFSUNLOCKSTATE();
 1235                 return;
 1236         }
 1237         if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
 1238                 if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
 1239                     NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
 1240                         nfsrv_stablefirst.nsf_flags |=
 1241                             (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
 1242                 NFSUNLOCKSTATE();
 1243                 return;
 1244         }
 1245 
 1246         /*
 1247          * Try and get a reference count on the nfsv4rootfs_lock so that
 1248          * no nfsd thread can acquire an exclusive lock on it before this
 1249          * call is done. If it is already exclusively locked, just return.
 1250          */
 1251         NFSLOCKV4ROOTMUTEX();
 1252         got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
 1253         NFSUNLOCKV4ROOTMUTEX();
 1254         if (got_ref == 0) {
 1255                 NFSUNLOCKSTATE();
 1256                 return;
 1257         }
 1258 
 1259         /*
 1260          * For each client...
 1261          */
 1262         for (i = 0; i < nfsrv_clienthashsize; i++) {
 1263             clp = LIST_FIRST(&nfsclienthash[i]);
 1264             while (clp != LIST_END(&nfsclienthash[i])) {
 1265                 nclp = LIST_NEXT(clp, lc_hash);
 1266                 if (!(clp->lc_flags & LCL_EXPIREIT)) {
 1267                     if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
 1268                          && ((LIST_EMPTY(&clp->lc_deleg)
 1269                               && LIST_EMPTY(&clp->lc_open)) ||
 1270                              nfsrv_clients > nfsrv_clienthighwater)) ||
 1271                         (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
 1272                         (clp->lc_expiry < NFSD_MONOSEC &&
 1273                          (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
 1274                         /*
 1275                          * Lease has expired several nfsrv_lease times ago:
 1276                          * PLUS
 1277                          *    - no state is associated with it
 1278                          *    OR
 1279                          *    - above high water mark for number of clients
 1280                          *      (nfsrv_clienthighwater should be large enough
 1281                          *       that this only occurs when clients fail to
 1282                          *       use the same nfs_client_id4.id. Maybe somewhat
 1283                          *       higher that the maximum number of clients that
 1284                          *       will mount this server?)
 1285                          * OR
 1286                          * Lease has expired a very long time ago
 1287                          * OR
 1288                          * Lease has expired PLUS the number of opens + locks
 1289                          * has exceeded 90% of capacity
 1290                          *
 1291                          * --> Mark for expiry. The actual expiry will be done
 1292                          *     by an nfsd sometime soon.
 1293                          */
 1294                         clp->lc_flags |= LCL_EXPIREIT;
 1295                         nfsrv_stablefirst.nsf_flags |=
 1296                             (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
 1297                     } else {
 1298                         /*
 1299                          * If there are no opens, increment no open tick cnt
 1300                          * If time exceeds NFSNOOPEN, mark it to be thrown away
 1301                          * otherwise, if there is an open, reset no open time
 1302                          * Hopefully, this will avoid excessive re-creation
 1303                          * of open owners and subsequent open confirms.
 1304                          */
 1305                         stp = LIST_FIRST(&clp->lc_open);
 1306                         while (stp != LIST_END(&clp->lc_open)) {
 1307                                 nstp = LIST_NEXT(stp, ls_list);
 1308                                 if (LIST_EMPTY(&stp->ls_open)) {
 1309                                         stp->ls_noopens++;
 1310                                         if (stp->ls_noopens > NFSNOOPEN ||
 1311                                             (nfsrv_openpluslock * 2) >
 1312                                             nfsrv_v4statelimit)
 1313                                                 nfsrv_stablefirst.nsf_flags |=
 1314                                                         NFSNSF_NOOPENS;
 1315                                 } else {
 1316                                         stp->ls_noopens = 0;
 1317                                 }
 1318                                 stp = nstp;
 1319                         }
 1320                     }
 1321                 }
 1322                 clp = nclp;
 1323             }
 1324         }
 1325         NFSUNLOCKSTATE();
 1326         NFSLOCKV4ROOTMUTEX();
 1327         nfsv4_relref(&nfsv4rootfs_lock);
 1328         NFSUNLOCKV4ROOTMUTEX();
 1329 }
 1330 
 1331 /*
 1332  * The following set of functions free up the various data structures.
 1333  */
 1334 /*
 1335  * Clear out all open/lock state related to this nfsclient.
 1336  * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
 1337  * there are no other active nfsd threads.
 1338  */
 1339 void
 1340 nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
 1341 {
 1342         struct nfsstate *stp, *nstp;
 1343         struct nfsdsession *sep, *nsep;
 1344 
 1345         LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
 1346                 nfsrv_freeopenowner(stp, 1, p);
 1347         if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
 1348                 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
 1349                         (void)nfsrv_freesession(sep, NULL);
 1350 }
 1351 
 1352 /*
 1353  * Free a client that has been cleaned. It should also already have been
 1354  * removed from the lists.
 1355  * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
 1356  *  softclock interrupts are enabled.)
 1357  */
 1358 void
 1359 nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
 1360 {
 1361 
 1362 #ifdef notyet
 1363         if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
 1364              (LCL_GSS | LCL_CALLBACKSON) &&
 1365             (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
 1366             clp->lc_handlelen > 0) {
 1367                 clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
 1368                 clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
 1369                 (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
 1370                         NULL, 0, NULL, NULL, NULL, 0, p);
 1371         }
 1372 #endif
 1373         newnfs_disconnect(&clp->lc_req);
 1374         free(clp->lc_req.nr_nam, M_SONAME);
 1375         NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 1376         free(clp->lc_stateid, M_NFSDCLIENT);
 1377         free(clp, M_NFSDCLIENT);
 1378         NFSLOCKSTATE();
 1379         nfsstatsv1.srvclients--;
 1380         nfsrv_openpluslock--;
 1381         nfsrv_clients--;
 1382         NFSUNLOCKSTATE();
 1383 }
 1384 
 1385 /*
 1386  * Free a list of delegation state structures.
 1387  * (This function will also free all nfslockfile structures that no
 1388  *  longer have associated state.)
 1389  */
 1390 void
 1391 nfsrv_freedeleglist(struct nfsstatehead *sthp)
 1392 {
 1393         struct nfsstate *stp, *nstp;
 1394 
 1395         LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
 1396                 nfsrv_freedeleg(stp);
 1397         }
 1398         LIST_INIT(sthp);
 1399 }
 1400 
 1401 /*
 1402  * Free up a delegation.
 1403  */
 1404 static void
 1405 nfsrv_freedeleg(struct nfsstate *stp)
 1406 {
 1407         struct nfslockfile *lfp;
 1408 
 1409         LIST_REMOVE(stp, ls_hash);
 1410         LIST_REMOVE(stp, ls_list);
 1411         LIST_REMOVE(stp, ls_file);
 1412         if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
 1413                 nfsrv_writedelegcnt--;
 1414         lfp = stp->ls_lfp;
 1415         if (LIST_EMPTY(&lfp->lf_open) &&
 1416             LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
 1417             LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
 1418             lfp->lf_usecount == 0 &&
 1419             nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
 1420                 nfsrv_freenfslockfile(lfp);
 1421         free(stp, M_NFSDSTATE);
 1422         nfsstatsv1.srvdelegates--;
 1423         nfsrv_openpluslock--;
 1424         nfsrv_delegatecnt--;
 1425 }
 1426 
 1427 /*
 1428  * This function frees an open owner and all associated opens.
 1429  */
 1430 static void
 1431 nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
 1432 {
 1433         struct nfsstate *nstp, *tstp;
 1434 
 1435         LIST_REMOVE(stp, ls_list);
 1436         /*
 1437          * Now, free all associated opens.
 1438          */
 1439         nstp = LIST_FIRST(&stp->ls_open);
 1440         while (nstp != LIST_END(&stp->ls_open)) {
 1441                 tstp = nstp;
 1442                 nstp = LIST_NEXT(nstp, ls_list);
 1443                 (void) nfsrv_freeopen(tstp, NULL, cansleep, p);
 1444         }
 1445         if (stp->ls_op)
 1446                 nfsrvd_derefcache(stp->ls_op);
 1447         free(stp, M_NFSDSTATE);
 1448         nfsstatsv1.srvopenowners--;
 1449         nfsrv_openpluslock--;
 1450 }
 1451 
 1452 /*
 1453  * This function frees an open (nfsstate open structure) with all associated
 1454  * lock_owners and locks. It also frees the nfslockfile structure iff there
 1455  * are no other opens on the file.
 1456  * Returns 1 if it free'd the nfslockfile, 0 otherwise.
 1457  */
 1458 static int
 1459 nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
 1460 {
 1461         struct nfsstate *nstp, *tstp;
 1462         struct nfslockfile *lfp;
 1463         int ret;
 1464 
 1465         LIST_REMOVE(stp, ls_hash);
 1466         LIST_REMOVE(stp, ls_list);
 1467         LIST_REMOVE(stp, ls_file);
 1468 
 1469         lfp = stp->ls_lfp;
 1470         /*
 1471          * Now, free all lockowners associated with this open.
 1472          */
 1473         LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
 1474                 nfsrv_freelockowner(tstp, vp, cansleep, p);
 1475 
 1476         /*
 1477          * The nfslockfile is freed here if there are no locks
 1478          * associated with the open.
 1479          * If there are locks associated with the open, the
 1480          * nfslockfile structure can be freed via nfsrv_freelockowner().
 1481          * Acquire the state mutex to avoid races with calls to
 1482          * nfsrv_getlockfile().
 1483          */
 1484         if (cansleep != 0)
 1485                 NFSLOCKSTATE();
 1486         if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
 1487             LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
 1488             LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
 1489             lfp->lf_usecount == 0 &&
 1490             (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
 1491                 nfsrv_freenfslockfile(lfp);
 1492                 ret = 1;
 1493         } else
 1494                 ret = 0;
 1495         if (cansleep != 0)
 1496                 NFSUNLOCKSTATE();
 1497         free(stp, M_NFSDSTATE);
 1498         nfsstatsv1.srvopens--;
 1499         nfsrv_openpluslock--;
 1500         return (ret);
 1501 }
 1502 
 1503 /*
 1504  * Frees a lockowner and all associated locks.
 1505  */
 1506 static void
 1507 nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
 1508     NFSPROC_T *p)
 1509 {
 1510 
 1511         LIST_REMOVE(stp, ls_hash);
 1512         LIST_REMOVE(stp, ls_list);
 1513         nfsrv_freeallnfslocks(stp, vp, cansleep, p);
 1514         if (stp->ls_op)
 1515                 nfsrvd_derefcache(stp->ls_op);
 1516         free(stp, M_NFSDSTATE);
 1517         nfsstatsv1.srvlockowners--;
 1518         nfsrv_openpluslock--;
 1519 }
 1520 
 1521 /*
 1522  * Free all the nfs locks on a lockowner.
 1523  */
 1524 static void
 1525 nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
 1526     NFSPROC_T *p)
 1527 {
 1528         struct nfslock *lop, *nlop;
 1529         struct nfsrollback *rlp, *nrlp;
 1530         struct nfslockfile *lfp = NULL;
 1531         int gottvp = 0;
 1532         vnode_t tvp = NULL;
 1533         uint64_t first, end;
 1534 
 1535         if (vp != NULL)
 1536                 ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
 1537         lop = LIST_FIRST(&stp->ls_lock);
 1538         while (lop != LIST_END(&stp->ls_lock)) {
 1539                 nlop = LIST_NEXT(lop, lo_lckowner);
 1540                 /*
 1541                  * Since all locks should be for the same file, lfp should
 1542                  * not change.
 1543                  */
 1544                 if (lfp == NULL)
 1545                         lfp = lop->lo_lfp;
 1546                 else if (lfp != lop->lo_lfp)
 1547                         panic("allnfslocks");
 1548                 /*
 1549                  * If vp is NULL and cansleep != 0, a vnode must be acquired
 1550                  * from the file handle. This only occurs when called from
 1551                  * nfsrv_cleanclient().
 1552                  */
 1553                 if (gottvp == 0) {
 1554                         if (nfsrv_dolocallocks == 0)
 1555                                 tvp = NULL;
 1556                         else if (vp == NULL && cansleep != 0) {
 1557                                 tvp = nfsvno_getvp(&lfp->lf_fh);
 1558                                 if (tvp != NULL)
 1559                                         NFSVOPUNLOCK(tvp, 0);
 1560                         } else
 1561                                 tvp = vp;
 1562                         gottvp = 1;
 1563                 }
 1564 
 1565                 if (tvp != NULL) {
 1566                         if (cansleep == 0)
 1567                                 panic("allnfs2");
 1568                         first = lop->lo_first;
 1569                         end = lop->lo_end;
 1570                         nfsrv_freenfslock(lop);
 1571                         nfsrv_localunlock(tvp, lfp, first, end, p);
 1572                         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
 1573                             nrlp)
 1574                                 free(rlp, M_NFSDROLLBACK);
 1575                         LIST_INIT(&lfp->lf_rollback);
 1576                 } else
 1577                         nfsrv_freenfslock(lop);
 1578                 lop = nlop;
 1579         }
 1580         if (vp == NULL && tvp != NULL)
 1581                 vrele(tvp);
 1582 }
 1583 
 1584 /*
 1585  * Free an nfslock structure.
 1586  */
 1587 static void
 1588 nfsrv_freenfslock(struct nfslock *lop)
 1589 {
 1590 
 1591         if (lop->lo_lckfile.le_prev != NULL) {
 1592                 LIST_REMOVE(lop, lo_lckfile);
 1593                 nfsstatsv1.srvlocks--;
 1594                 nfsrv_openpluslock--;
 1595         }
 1596         LIST_REMOVE(lop, lo_lckowner);
 1597         free(lop, M_NFSDLOCK);
 1598 }
 1599 
 1600 /*
 1601  * This function frees an nfslockfile structure.
 1602  */
 1603 static void
 1604 nfsrv_freenfslockfile(struct nfslockfile *lfp)
 1605 {
 1606 
 1607         LIST_REMOVE(lfp, lf_hash);
 1608         free(lfp, M_NFSDLOCKFILE);
 1609 }
 1610 
 1611 /*
 1612  * This function looks up an nfsstate structure via stateid.
 1613  */
 1614 static int
 1615 nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
 1616     struct nfsstate **stpp)
 1617 {
 1618         struct nfsstate *stp;
 1619         struct nfsstatehead *hp;
 1620         int error = 0;
 1621 
 1622         *stpp = NULL;
 1623         hp = NFSSTATEHASH(clp, *stateidp);
 1624         LIST_FOREACH(stp, hp, ls_hash) {
 1625                 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
 1626                         NFSX_STATEIDOTHER))
 1627                         break;
 1628         }
 1629 
 1630         /*
 1631          * If no state id in list, return NFSERR_BADSTATEID.
 1632          */
 1633         if (stp == LIST_END(hp)) {
 1634                 error = NFSERR_BADSTATEID;
 1635                 goto out;
 1636         }
 1637         *stpp = stp;
 1638 
 1639 out:
 1640         NFSEXITCODE(error);
 1641         return (error);
 1642 }
 1643 
 1644 /*
 1645  * This function gets an nfsstate structure via owner string.
 1646  */
 1647 static void
 1648 nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
 1649     struct nfsstate **stpp)
 1650 {
 1651         struct nfsstate *stp;
 1652 
 1653         *stpp = NULL;
 1654         LIST_FOREACH(stp, hp, ls_list) {
 1655                 if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
 1656                   !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
 1657                         *stpp = stp;
 1658                         return;
 1659                 }
 1660         }
 1661 }
 1662 
 1663 /*
 1664  * Lock control function called to update lock status.
 1665  * Returns 0 upon success, -1 if there is no lock and the flags indicate
 1666  * that one isn't to be created and an NFSERR_xxx for other errors.
 1667  * The structures new_stp and new_lop are passed in as pointers that should
 1668  * be set to NULL if the structure is used and shouldn't be free'd.
 1669  * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
 1670  * never used and can safely be allocated on the stack. For all other
 1671  * cases, *new_stpp and *new_lopp should be malloc'd before the call,
 1672  * in case they are used.
 1673  */
 1674 int
 1675 nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
 1676     struct nfslock **new_lopp, struct nfslockconflict *cfp,
 1677     nfsquad_t clientid, nfsv4stateid_t *stateidp,
 1678     __unused struct nfsexstuff *exp,
 1679     struct nfsrv_descript *nd, NFSPROC_T *p)
 1680 {
 1681         struct nfslock *lop;
 1682         struct nfsstate *new_stp = *new_stpp;
 1683         struct nfslock *new_lop = *new_lopp;
 1684         struct nfsstate *tstp, *mystp, *nstp;
 1685         int specialid = 0;
 1686         struct nfslockfile *lfp;
 1687         struct nfslock *other_lop = NULL;
 1688         struct nfsstate *stp, *lckstp = NULL;
 1689         struct nfsclient *clp = NULL;
 1690         u_int32_t bits;
 1691         int error = 0, haslock = 0, ret, reterr;
 1692         int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
 1693         fhandle_t nfh;
 1694         uint64_t first, end;
 1695         uint32_t lock_flags;
 1696 
 1697         if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
 1698                 /*
 1699                  * Note the special cases of "all 1s" or "all 0s" stateids and
 1700                  * let reads with all 1s go ahead.
 1701                  */
 1702                 if (new_stp->ls_stateid.seqid == 0x0 &&
 1703                     new_stp->ls_stateid.other[0] == 0x0 &&
 1704                     new_stp->ls_stateid.other[1] == 0x0 &&
 1705                     new_stp->ls_stateid.other[2] == 0x0)
 1706                         specialid = 1;
 1707                 else if (new_stp->ls_stateid.seqid == 0xffffffff &&
 1708                     new_stp->ls_stateid.other[0] == 0xffffffff &&
 1709                     new_stp->ls_stateid.other[1] == 0xffffffff &&
 1710                     new_stp->ls_stateid.other[2] == 0xffffffff)
 1711                         specialid = 2;
 1712         }
 1713 
 1714         /*
 1715          * Check for restart conditions (client and server).
 1716          */
 1717         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 1718             &new_stp->ls_stateid, specialid);
 1719         if (error)
 1720                 goto out;
 1721 
 1722         /*
 1723          * Check for state resource limit exceeded.
 1724          */
 1725         if ((new_stp->ls_flags & NFSLCK_LOCK) &&
 1726             nfsrv_openpluslock > nfsrv_v4statelimit) {
 1727                 error = NFSERR_RESOURCE;
 1728                 goto out;
 1729         }
 1730 
 1731         /*
 1732          * For the lock case, get another nfslock structure,
 1733          * just in case we need it.
 1734          * Malloc now, before we start sifting through the linked lists,
 1735          * in case we have to wait for memory.
 1736          */
 1737 tryagain:
 1738         if (new_stp->ls_flags & NFSLCK_LOCK)
 1739                 other_lop = malloc(sizeof (struct nfslock),
 1740                     M_NFSDLOCK, M_WAITOK);
 1741         filestruct_locked = 0;
 1742         reterr = 0;
 1743         lfp = NULL;
 1744 
 1745         /*
 1746          * Get the lockfile structure for CFH now, so we can do a sanity
 1747          * check against the stateid, before incrementing the seqid#, since
 1748          * we want to return NFSERR_BADSTATEID on failure and the seqid#
 1749          * shouldn't be incremented for this case.
 1750          * If nfsrv_getlockfile() returns -1, it means "not found", which
 1751          * will be handled later.
 1752          * If we are doing Lock/LockU and local locking is enabled, sleep
 1753          * lock the nfslockfile structure.
 1754          */
 1755         getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
 1756         NFSLOCKSTATE();
 1757         if (getlckret == 0) {
 1758                 if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
 1759                     nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
 1760                         getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
 1761                             &lfp, &nfh, 1);
 1762                         if (getlckret == 0)
 1763                                 filestruct_locked = 1;
 1764                 } else
 1765                         getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
 1766                             &lfp, &nfh, 0);
 1767         }
 1768         if (getlckret != 0 && getlckret != -1)
 1769                 reterr = getlckret;
 1770 
 1771         if (filestruct_locked != 0) {
 1772                 LIST_INIT(&lfp->lf_rollback);
 1773                 if ((new_stp->ls_flags & NFSLCK_LOCK)) {
 1774                         /*
 1775                          * For local locking, do the advisory locking now, so
 1776                          * that any conflict can be detected. A failure later
 1777                          * can be rolled back locally. If an error is returned,
 1778                          * struct nfslockfile has been unlocked and any local
 1779                          * locking rolled back.
 1780                          */
 1781                         NFSUNLOCKSTATE();
 1782                         if (vnode_unlocked == 0) {
 1783                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
 1784                                 vnode_unlocked = 1;
 1785                                 NFSVOPUNLOCK(vp, 0);
 1786                         }
 1787                         reterr = nfsrv_locallock(vp, lfp,
 1788                             (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
 1789                             new_lop->lo_first, new_lop->lo_end, cfp, p);
 1790                         NFSLOCKSTATE();
 1791                 }
 1792         }
 1793 
 1794         if (specialid == 0) {
 1795             if (new_stp->ls_flags & NFSLCK_TEST) {
 1796                 /*
 1797                  * RFC 3530 does not list LockT as an op that renews a
 1798                  * lease, but the consensus seems to be that it is ok
 1799                  * for a server to do so.
 1800                  */
 1801                 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 1802                     (nfsquad_t)((u_quad_t)0), 0, nd, p);
 1803 
 1804                 /*
 1805                  * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
 1806                  * error returns for LockT, just go ahead and test for a lock,
 1807                  * since there are no locks for this client, but other locks
 1808                  * can conflict. (ie. same client will always be false)
 1809                  */
 1810                 if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
 1811                     error = 0;
 1812                 lckstp = new_stp;
 1813             } else {
 1814               error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 1815                 (nfsquad_t)((u_quad_t)0), 0, nd, p);
 1816               if (error == 0)
 1817                 /*
 1818                  * Look up the stateid
 1819                  */
 1820                 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
 1821                   new_stp->ls_flags, &stp);
 1822               /*
 1823                * do some sanity checks for an unconfirmed open or a
 1824                * stateid that refers to the wrong file, for an open stateid
 1825                */
 1826               if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
 1827                   ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
 1828                    (getlckret == 0 && stp->ls_lfp != lfp))){
 1829                       /*
 1830                        * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID
 1831                        * The only exception is using SETATTR with SIZE.
 1832                        * */
 1833                     if ((new_stp->ls_flags &
 1834                          (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR)
 1835                              error = NFSERR_BADSTATEID;
 1836               }
 1837               
 1838                 if (error == 0 &&
 1839                   (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
 1840                   getlckret == 0 && stp->ls_lfp != lfp)
 1841                         error = NFSERR_BADSTATEID;
 1842 
 1843               /*
 1844                * If the lockowner stateid doesn't refer to the same file,
 1845                * I believe that is considered ok, since some clients will
 1846                * only create a single lockowner and use that for all locks
 1847                * on all files.
 1848                * For now, log it as a diagnostic, instead of considering it
 1849                * a BadStateid.
 1850                */
 1851               if (error == 0 && (stp->ls_flags &
 1852                   (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
 1853                   getlckret == 0 && stp->ls_lfp != lfp) {
 1854 #ifdef DIAGNOSTIC
 1855                   printf("Got a lock statid for different file open\n");
 1856 #endif
 1857                   /*
 1858                   error = NFSERR_BADSTATEID;
 1859                   */
 1860               }
 1861 
 1862               if (error == 0) {
 1863                     if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
 1864                         /*
 1865                          * If haslock set, we've already checked the seqid.
 1866                          */
 1867                         if (!haslock) {
 1868                             if (stp->ls_flags & NFSLCK_OPEN)
 1869                                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 1870                                     stp->ls_openowner, new_stp->ls_op);
 1871                             else
 1872                                 error = NFSERR_BADSTATEID;
 1873                         }
 1874                         if (!error)
 1875                             nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
 1876                         if (lckstp) {
 1877                             /*
 1878                              * For NFSv4.1 and NFSv4.2 allow an
 1879                              * open_to_lock_owner when the lock_owner already
 1880                              * exists.  Just clear NFSLCK_OPENTOLOCK so that
 1881                              * a new lock_owner will not be created.
 1882                              * RFC7530 states that the error for NFSv4.0
 1883                              * is NFS4ERR_BAD_SEQID.
 1884                              */
 1885                             if ((nd->nd_flag & ND_NFSV41) != 0)
 1886                                 new_stp->ls_flags &= ~NFSLCK_OPENTOLOCK;
 1887                             else
 1888                                 error = NFSERR_BADSEQID;
 1889                         } else
 1890                             lckstp = new_stp;
 1891                     } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
 1892                         /*
 1893                          * If haslock set, ditto above.
 1894                          */
 1895                         if (!haslock) {
 1896                             if (stp->ls_flags & NFSLCK_OPEN)
 1897                                 error = NFSERR_BADSTATEID;
 1898                             else
 1899                                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 1900                                     stp, new_stp->ls_op);
 1901                         }
 1902                         lckstp = stp;
 1903                     } else {
 1904                         lckstp = stp;
 1905                     }
 1906               }
 1907               /*
 1908                * If the seqid part of the stateid isn't the same, return
 1909                * NFSERR_OLDSTATEID for cases other than I/O Ops.
 1910                * For I/O Ops, only return NFSERR_OLDSTATEID if
 1911                * nfsrv_returnoldstateid is set. (The consensus on the email
 1912                * list was that most clients would prefer to not receive
 1913                * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
 1914                * is what will happen, so I use the nfsrv_returnoldstateid to
 1915                * allow for either server configuration.)
 1916                */
 1917               if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
 1918                   (((nd->nd_flag & ND_NFSV41) == 0 &&
 1919                    (!(new_stp->ls_flags & NFSLCK_CHECK) ||
 1920                     nfsrv_returnoldstateid)) ||
 1921                    ((nd->nd_flag & ND_NFSV41) != 0 &&
 1922                     new_stp->ls_stateid.seqid != 0)))
 1923                     error = NFSERR_OLDSTATEID;
 1924             }
 1925         }
 1926 
 1927         /*
 1928          * Now we can check for grace.
 1929          */
 1930         if (!error)
 1931                 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
 1932         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
 1933                 nfsrv_checkstable(clp))
 1934                 error = NFSERR_NOGRACE;
 1935         /*
 1936          * If we successfully Reclaimed state, note that.
 1937          */
 1938         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
 1939                 nfsrv_markstable(clp);
 1940 
 1941         /*
 1942          * At this point, either error == NFSERR_BADSTATEID or the
 1943          * seqid# has been updated, so we can return any error.
 1944          * If error == 0, there may be an error in:
 1945          *    nd_repstat - Set by the calling function.
 1946          *    reterr - Set above, if getting the nfslockfile structure
 1947          *       or acquiring the local lock failed.
 1948          *    (If both of these are set, nd_repstat should probably be
 1949          *     returned, since that error was detected before this
 1950          *     function call.)
 1951          */
 1952         if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
 1953                 if (error == 0) {
 1954                         if (nd->nd_repstat != 0)
 1955                                 error = nd->nd_repstat;
 1956                         else
 1957                                 error = reterr;
 1958                 }
 1959                 if (filestruct_locked != 0) {
 1960                         /* Roll back local locks. */
 1961                         NFSUNLOCKSTATE();
 1962                         if (vnode_unlocked == 0) {
 1963                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
 1964                                 vnode_unlocked = 1;
 1965                                 NFSVOPUNLOCK(vp, 0);
 1966                         }
 1967                         nfsrv_locallock_rollback(vp, lfp, p);
 1968                         NFSLOCKSTATE();
 1969                         nfsrv_unlocklf(lfp);
 1970                 }
 1971                 NFSUNLOCKSTATE();
 1972                 goto out;
 1973         }
 1974 
 1975         /*
 1976          * Check the nfsrv_getlockfile return.
 1977          * Returned -1 if no structure found.
 1978          */
 1979         if (getlckret == -1) {
 1980                 error = NFSERR_EXPIRED;
 1981                 /*
 1982                  * Called from lockt, so no lock is OK.
 1983                  */
 1984                 if (new_stp->ls_flags & NFSLCK_TEST) {
 1985                         error = 0;
 1986                 } else if (new_stp->ls_flags &
 1987                     (NFSLCK_CHECK | NFSLCK_SETATTR)) {
 1988                         /*
 1989                          * Called to check for a lock, OK if the stateid is all
 1990                          * 1s or all 0s, but there should be an nfsstate
 1991                          * otherwise.
 1992                          * (ie. If there is no open, I'll assume no share
 1993                          *  deny bits.)
 1994                          */
 1995                         if (specialid)
 1996                                 error = 0;
 1997                         else
 1998                                 error = NFSERR_BADSTATEID;
 1999                 }
 2000                 NFSUNLOCKSTATE();
 2001                 goto out;
 2002         }
 2003 
 2004         /*
 2005          * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
 2006          * For NFSLCK_CHECK, allow a read if write access is granted,
 2007          * but check for a deny. For NFSLCK_LOCK, require correct access,
 2008          * which implies a conflicting deny can't exist.
 2009          */
 2010         if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
 2011             /*
 2012              * Four kinds of state id:
 2013              * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
 2014              * - stateid for an open
 2015              * - stateid for a delegation
 2016              * - stateid for a lock owner
 2017              */
 2018             if (!specialid) {
 2019                 if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
 2020                     delegation = 1;
 2021                     mystp = stp;
 2022                     nfsrv_delaydelegtimeout(stp);
 2023                 } else if (stp->ls_flags & NFSLCK_OPEN) {
 2024                     mystp = stp;
 2025                 } else {
 2026                     mystp = stp->ls_openstp;
 2027                 }
 2028                 /*
 2029                  * If locking or checking, require correct access
 2030                  * bit set.
 2031                  */
 2032                 if (((new_stp->ls_flags & NFSLCK_LOCK) &&
 2033                      !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
 2034                        mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
 2035                     ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
 2036                       (NFSLCK_CHECK | NFSLCK_READACCESS) &&
 2037                      !(mystp->ls_flags & NFSLCK_READACCESS) &&
 2038                      nfsrv_allowreadforwriteopen == 0) ||
 2039                     ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
 2040                       (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
 2041                      !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
 2042                         if (filestruct_locked != 0) {
 2043                                 /* Roll back local locks. */
 2044                                 NFSUNLOCKSTATE();
 2045                                 if (vnode_unlocked == 0) {
 2046                                         ASSERT_VOP_ELOCKED(vp,
 2047                                             "nfsrv_lockctrl3");
 2048                                         vnode_unlocked = 1;
 2049                                         NFSVOPUNLOCK(vp, 0);
 2050                                 }
 2051                                 nfsrv_locallock_rollback(vp, lfp, p);
 2052                                 NFSLOCKSTATE();
 2053                                 nfsrv_unlocklf(lfp);
 2054                         }
 2055                         NFSUNLOCKSTATE();
 2056                         error = NFSERR_OPENMODE;
 2057                         goto out;
 2058                 }
 2059             } else
 2060                 mystp = NULL;
 2061             if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
 2062                 /*
 2063                  * Check for a conflicting deny bit.
 2064                  */
 2065                 LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
 2066                     if (tstp != mystp) {
 2067                         bits = tstp->ls_flags;
 2068                         bits >>= NFSLCK_SHIFT;
 2069                         if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
 2070                             KASSERT(vnode_unlocked == 0,
 2071                                 ("nfsrv_lockctrl: vnode unlocked1"));
 2072                             ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
 2073                                 vp, p);
 2074                             if (ret == 1) {
 2075                                 /*
 2076                                 * nfsrv_clientconflict unlocks state
 2077                                  * when it returns non-zero.
 2078                                  */
 2079                                 lckstp = NULL;
 2080                                 goto tryagain;
 2081                             }
 2082                             if (ret == 0)
 2083                                 NFSUNLOCKSTATE();
 2084                             if (ret == 2)
 2085                                 error = NFSERR_PERM;
 2086                             else
 2087                                 error = NFSERR_OPENMODE;
 2088                             goto out;
 2089                         }
 2090                     }
 2091                 }
 2092 
 2093                 /* We're outta here */
 2094                 NFSUNLOCKSTATE();
 2095                 goto out;
 2096             }
 2097         }
 2098 
 2099         /*
 2100          * For setattr, just get rid of all the Delegations for other clients.
 2101          */
 2102         if (new_stp->ls_flags & NFSLCK_SETATTR) {
 2103                 KASSERT(vnode_unlocked == 0,
 2104                     ("nfsrv_lockctrl: vnode unlocked2"));
 2105                 ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
 2106                 if (ret) {
 2107                         /*
 2108                          * nfsrv_cleandeleg() unlocks state when it
 2109                          * returns non-zero.
 2110                          */
 2111                         if (ret == -1) {
 2112                                 lckstp = NULL;
 2113                                 goto tryagain;
 2114                         }
 2115                         error = ret;
 2116                         goto out;
 2117                 }
 2118                 if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
 2119                     (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
 2120                      LIST_EMPTY(&lfp->lf_deleg))) {
 2121                         NFSUNLOCKSTATE();
 2122                         goto out;
 2123                 }
 2124         }
 2125 
 2126         /*
 2127          * Check for a conflicting delegation. If one is found, call
 2128          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 2129          * been set yet, it will get the lock. Otherwise, it will recall
 2130          * the delegation. Then, we try try again...
 2131          * I currently believe the conflict algorithm to be:
 2132          * For Lock Ops (Lock/LockT/LockU)
 2133          * - there is a conflict iff a different client has a write delegation
 2134          * For Reading (Read Op)
 2135          * - there is a conflict iff a different client has a write delegation
 2136          *   (the specialids are always a different client)
 2137          * For Writing (Write/Setattr of size)
 2138          * - there is a conflict if a different client has any delegation
 2139          * - there is a conflict if the same client has a read delegation
 2140          *   (I don't understand why this isn't allowed, but that seems to be
 2141          *    the current consensus?)
 2142          */
 2143         tstp = LIST_FIRST(&lfp->lf_deleg);
 2144         while (tstp != LIST_END(&lfp->lf_deleg)) {
 2145             nstp = LIST_NEXT(tstp, ls_file);
 2146             if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
 2147                  ((new_stp->ls_flags & NFSLCK_CHECK) &&
 2148                   (new_lop->lo_flags & NFSLCK_READ))) &&
 2149                   clp != tstp->ls_clp &&
 2150                  (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
 2151                  ((new_stp->ls_flags & NFSLCK_CHECK) &&
 2152                    (new_lop->lo_flags & NFSLCK_WRITE) &&
 2153                   (clp != tstp->ls_clp ||
 2154                    (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
 2155                 ret = 0;
 2156                 if (filestruct_locked != 0) {
 2157                         /* Roll back local locks. */
 2158                         NFSUNLOCKSTATE();
 2159                         if (vnode_unlocked == 0) {
 2160                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
 2161                                 NFSVOPUNLOCK(vp, 0);
 2162                         }
 2163                         nfsrv_locallock_rollback(vp, lfp, p);
 2164                         NFSLOCKSTATE();
 2165                         nfsrv_unlocklf(lfp);
 2166                         NFSUNLOCKSTATE();
 2167                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2168                         vnode_unlocked = 0;
 2169                         if ((vp->v_iflag & VI_DOOMED) != 0)
 2170                                 ret = NFSERR_SERVERFAULT;
 2171                         NFSLOCKSTATE();
 2172                 }
 2173                 if (ret == 0)
 2174                         ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
 2175                 if (ret) {
 2176                     /*
 2177                      * nfsrv_delegconflict unlocks state when it
 2178                      * returns non-zero, which it always does.
 2179                      */
 2180                     if (other_lop) {
 2181                         free(other_lop, M_NFSDLOCK);
 2182                         other_lop = NULL;
 2183                     }
 2184                     if (ret == -1) {
 2185                         lckstp = NULL;
 2186                         goto tryagain;
 2187                     }
 2188                     error = ret;
 2189                     goto out;
 2190                 }
 2191                 /* Never gets here. */
 2192             }
 2193             tstp = nstp;
 2194         }
 2195 
 2196         /*
 2197          * Handle the unlock case by calling nfsrv_updatelock().
 2198          * (Should I have done some access checking above for unlock? For now,
 2199          *  just let it happen.)
 2200          */
 2201         if (new_stp->ls_flags & NFSLCK_UNLOCK) {
 2202                 first = new_lop->lo_first;
 2203                 end = new_lop->lo_end;
 2204                 nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
 2205                 stateidp->seqid = ++(stp->ls_stateid.seqid);
 2206                 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 2207                         stateidp->seqid = stp->ls_stateid.seqid = 1;
 2208                 stateidp->other[0] = stp->ls_stateid.other[0];
 2209                 stateidp->other[1] = stp->ls_stateid.other[1];
 2210                 stateidp->other[2] = stp->ls_stateid.other[2];
 2211                 if (filestruct_locked != 0) {
 2212                         NFSUNLOCKSTATE();
 2213                         if (vnode_unlocked == 0) {
 2214                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
 2215                                 vnode_unlocked = 1;
 2216                                 NFSVOPUNLOCK(vp, 0);
 2217                         }
 2218                         /* Update the local locks. */
 2219                         nfsrv_localunlock(vp, lfp, first, end, p);
 2220                         NFSLOCKSTATE();
 2221                         nfsrv_unlocklf(lfp);
 2222                 }
 2223                 NFSUNLOCKSTATE();
 2224                 goto out;
 2225         }
 2226 
 2227         /*
 2228          * Search for a conflicting lock. A lock conflicts if:
 2229          * - the lock range overlaps and
 2230          * - at least one lock is a write lock and
 2231          * - it is not owned by the same lock owner
 2232          */
 2233         if (!delegation) {
 2234           LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
 2235             if (new_lop->lo_end > lop->lo_first &&
 2236                 new_lop->lo_first < lop->lo_end &&
 2237                 (new_lop->lo_flags == NFSLCK_WRITE ||
 2238                  lop->lo_flags == NFSLCK_WRITE) &&
 2239                 lckstp != lop->lo_stp &&
 2240                 (clp != lop->lo_stp->ls_clp ||
 2241                  lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
 2242                  NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
 2243                     lckstp->ls_ownerlen))) {
 2244                 if (other_lop) {
 2245                     free(other_lop, M_NFSDLOCK);
 2246                     other_lop = NULL;
 2247                 }
 2248                 if (vnode_unlocked != 0)
 2249                     ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
 2250                         NULL, p);
 2251                 else
 2252                     ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
 2253                         vp, p);
 2254                 if (ret == 1) {
 2255                     if (filestruct_locked != 0) {
 2256                         if (vnode_unlocked == 0) {
 2257                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
 2258                                 NFSVOPUNLOCK(vp, 0);
 2259                         }
 2260                         /* Roll back local locks. */
 2261                         nfsrv_locallock_rollback(vp, lfp, p);
 2262                         NFSLOCKSTATE();
 2263                         nfsrv_unlocklf(lfp);
 2264                         NFSUNLOCKSTATE();
 2265                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2266                         vnode_unlocked = 0;
 2267                         if ((vp->v_iflag & VI_DOOMED) != 0) {
 2268                                 error = NFSERR_SERVERFAULT;
 2269                                 goto out;
 2270                         }
 2271                     }
 2272                     /*
 2273                      * nfsrv_clientconflict() unlocks state when it
 2274                      * returns non-zero.
 2275                      */
 2276                     lckstp = NULL;
 2277                     goto tryagain;
 2278                 }
 2279                 /*
 2280                  * Found a conflicting lock, so record the conflict and
 2281                  * return the error.
 2282                  */
 2283                 if (cfp != NULL && ret == 0) {
 2284                     cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
 2285                     cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
 2286                     cfp->cl_first = lop->lo_first;
 2287                     cfp->cl_end = lop->lo_end;
 2288                     cfp->cl_flags = lop->lo_flags;
 2289                     cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
 2290                     NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
 2291                         cfp->cl_ownerlen);
 2292                 }
 2293                 if (ret == 2)
 2294                     error = NFSERR_PERM;
 2295                 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2296                     error = NFSERR_RECLAIMCONFLICT;
 2297                 else if (new_stp->ls_flags & NFSLCK_CHECK)
 2298                     error = NFSERR_LOCKED;
 2299                 else
 2300                     error = NFSERR_DENIED;
 2301                 if (filestruct_locked != 0 && ret == 0) {
 2302                         /* Roll back local locks. */
 2303                         NFSUNLOCKSTATE();
 2304                         if (vnode_unlocked == 0) {
 2305                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
 2306                                 vnode_unlocked = 1;
 2307                                 NFSVOPUNLOCK(vp, 0);
 2308                         }
 2309                         nfsrv_locallock_rollback(vp, lfp, p);
 2310                         NFSLOCKSTATE();
 2311                         nfsrv_unlocklf(lfp);
 2312                 }
 2313                 if (ret == 0)
 2314                         NFSUNLOCKSTATE();
 2315                 goto out;
 2316             }
 2317           }
 2318         }
 2319 
 2320         /*
 2321          * We only get here if there was no lock that conflicted.
 2322          */
 2323         if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
 2324                 NFSUNLOCKSTATE();
 2325                 goto out;
 2326         }
 2327 
 2328         /*
 2329          * We only get here when we are creating or modifying a lock.
 2330          * There are two variants:
 2331          * - exist_lock_owner where lock_owner exists
 2332          * - open_to_lock_owner with new lock_owner
 2333          */
 2334         first = new_lop->lo_first;
 2335         end = new_lop->lo_end;
 2336         lock_flags = new_lop->lo_flags;
 2337         if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
 2338                 nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
 2339                 stateidp->seqid = ++(lckstp->ls_stateid.seqid);
 2340                 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 2341                         stateidp->seqid = lckstp->ls_stateid.seqid = 1;
 2342                 stateidp->other[0] = lckstp->ls_stateid.other[0];
 2343                 stateidp->other[1] = lckstp->ls_stateid.other[1];
 2344                 stateidp->other[2] = lckstp->ls_stateid.other[2];
 2345         } else {
 2346                 /*
 2347                  * The new open_to_lock_owner case.
 2348                  * Link the new nfsstate into the lists.
 2349                  */
 2350                 new_stp->ls_seq = new_stp->ls_opentolockseq;
 2351                 nfsrvd_refcache(new_stp->ls_op);
 2352                 stateidp->seqid = new_stp->ls_stateid.seqid = 1;
 2353                 stateidp->other[0] = new_stp->ls_stateid.other[0] =
 2354                     clp->lc_clientid.lval[0];
 2355                 stateidp->other[1] = new_stp->ls_stateid.other[1] =
 2356                     clp->lc_clientid.lval[1];
 2357                 stateidp->other[2] = new_stp->ls_stateid.other[2] =
 2358                     nfsrv_nextstateindex(clp);
 2359                 new_stp->ls_clp = clp;
 2360                 LIST_INIT(&new_stp->ls_lock);
 2361                 new_stp->ls_openstp = stp;
 2362                 new_stp->ls_lfp = lfp;
 2363                 nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
 2364                     lfp);
 2365                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
 2366                     new_stp, ls_hash);
 2367                 LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
 2368                 *new_lopp = NULL;
 2369                 *new_stpp = NULL;
 2370                 nfsstatsv1.srvlockowners++;
 2371                 nfsrv_openpluslock++;
 2372         }
 2373         if (filestruct_locked != 0) {
 2374                 NFSUNLOCKSTATE();
 2375                 nfsrv_locallock_commit(lfp, lock_flags, first, end);
 2376                 NFSLOCKSTATE();
 2377                 nfsrv_unlocklf(lfp);
 2378         }
 2379         NFSUNLOCKSTATE();
 2380 
 2381 out:
 2382         if (haslock) {
 2383                 NFSLOCKV4ROOTMUTEX();
 2384                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2385                 NFSUNLOCKV4ROOTMUTEX();
 2386         }
 2387         if (vnode_unlocked != 0) {
 2388                 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2389                 if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
 2390                         error = NFSERR_SERVERFAULT;
 2391         }
 2392         if (other_lop)
 2393                 free(other_lop, M_NFSDLOCK);
 2394         NFSEXITCODE2(error, nd);
 2395         return (error);
 2396 }
 2397 
 2398 /*
 2399  * Check for state errors for Open.
 2400  * repstat is passed back out as an error if more critical errors
 2401  * are not detected.
 2402  */
 2403 int
 2404 nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
 2405     struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
 2406     NFSPROC_T *p, int repstat)
 2407 {
 2408         struct nfsstate *stp, *nstp;
 2409         struct nfsclient *clp;
 2410         struct nfsstate *ownerstp;
 2411         struct nfslockfile *lfp, *new_lfp;
 2412         int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
 2413 
 2414         if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
 2415                 readonly = 1;
 2416         /*
 2417          * Check for restart conditions (client and server).
 2418          */
 2419         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 2420                 &new_stp->ls_stateid, 0);
 2421         if (error)
 2422                 goto out;
 2423 
 2424         /*
 2425          * Check for state resource limit exceeded.
 2426          * Technically this should be SMP protected, but the worst
 2427          * case error is "out by one or two" on the count when it
 2428          * returns NFSERR_RESOURCE and the limit is just a rather
 2429          * arbitrary high water mark, so no harm is done.
 2430          */
 2431         if (nfsrv_openpluslock > nfsrv_v4statelimit) {
 2432                 error = NFSERR_RESOURCE;
 2433                 goto out;
 2434         }
 2435 
 2436 tryagain:
 2437         new_lfp = malloc(sizeof (struct nfslockfile),
 2438             M_NFSDLOCKFILE, M_WAITOK);
 2439         if (vp)
 2440                 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
 2441                     NULL, p);
 2442         NFSLOCKSTATE();
 2443         /*
 2444          * Get the nfsclient structure.
 2445          */
 2446         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 2447             (nfsquad_t)((u_quad_t)0), 0, nd, p);
 2448 
 2449         /*
 2450          * Look up the open owner. See if it needs confirmation and
 2451          * check the seq#, as required.
 2452          */
 2453         if (!error)
 2454                 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
 2455 
 2456         if (!error && ownerstp) {
 2457                 error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
 2458                     new_stp->ls_op);
 2459                 /*
 2460                  * If the OpenOwner hasn't been confirmed, assume the
 2461                  * old one was a replay and this one is ok.
 2462                  * See: RFC3530 Sec. 14.2.18.
 2463                  */
 2464                 if (error == NFSERR_BADSEQID &&
 2465                     (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
 2466                         error = 0;
 2467         }
 2468 
 2469         /*
 2470          * Check for grace.
 2471          */
 2472         if (!error)
 2473                 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
 2474         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
 2475                 nfsrv_checkstable(clp))
 2476                 error = NFSERR_NOGRACE;
 2477 
 2478         /*
 2479          * If none of the above errors occurred, let repstat be
 2480          * returned.
 2481          */
 2482         if (repstat && !error)
 2483                 error = repstat;
 2484         if (error) {
 2485                 NFSUNLOCKSTATE();
 2486                 if (haslock) {
 2487                         NFSLOCKV4ROOTMUTEX();
 2488                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2489                         NFSUNLOCKV4ROOTMUTEX();
 2490                 }
 2491                 free(new_lfp, M_NFSDLOCKFILE);
 2492                 goto out;
 2493         }
 2494 
 2495         /*
 2496          * If vp == NULL, the file doesn't exist yet, so return ok.
 2497          * (This always happens on the first pass, so haslock must be 0.)
 2498          */
 2499         if (vp == NULL) {
 2500                 NFSUNLOCKSTATE();
 2501                 free(new_lfp, M_NFSDLOCKFILE);
 2502                 goto out;
 2503         }
 2504 
 2505         /*
 2506          * Get the structure for the underlying file.
 2507          */
 2508         if (getfhret)
 2509                 error = getfhret;
 2510         else
 2511                 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
 2512                     NULL, 0);
 2513         if (new_lfp)
 2514                 free(new_lfp, M_NFSDLOCKFILE);
 2515         if (error) {
 2516                 NFSUNLOCKSTATE();
 2517                 if (haslock) {
 2518                         NFSLOCKV4ROOTMUTEX();
 2519                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2520                         NFSUNLOCKV4ROOTMUTEX();
 2521                 }
 2522                 goto out;
 2523         }
 2524 
 2525         /*
 2526          * Search for a conflicting open/share.
 2527          */
 2528         if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
 2529             /*
 2530              * For Delegate_Cur, search for the matching Delegation,
 2531              * which indicates no conflict.
 2532              * An old delegation should have been recovered by the
 2533              * client doing a Claim_DELEGATE_Prev, so I won't let
 2534              * it match and return NFSERR_EXPIRED. Should I let it
 2535              * match?
 2536              */
 2537             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 2538                 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
 2539                     (((nd->nd_flag & ND_NFSV41) != 0 &&
 2540                     stateidp->seqid == 0) ||
 2541                     stateidp->seqid == stp->ls_stateid.seqid) &&
 2542                     !NFSBCMP(stateidp->other, stp->ls_stateid.other,
 2543                           NFSX_STATEIDOTHER))
 2544                         break;
 2545             }
 2546             if (stp == LIST_END(&lfp->lf_deleg) ||
 2547                 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
 2548                  (stp->ls_flags & NFSLCK_DELEGREAD))) {
 2549                 NFSUNLOCKSTATE();
 2550                 if (haslock) {
 2551                         NFSLOCKV4ROOTMUTEX();
 2552                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2553                         NFSUNLOCKV4ROOTMUTEX();
 2554                 }
 2555                 error = NFSERR_EXPIRED;
 2556                 goto out;
 2557             }
 2558         }
 2559 
 2560         /*
 2561          * Check for access/deny bit conflicts. I check for the same
 2562          * owner as well, in case the client didn't bother.
 2563          */
 2564         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 2565                 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
 2566                     (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
 2567                       ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
 2568                      ((stp->ls_flags & NFSLCK_ACCESSBITS) &
 2569                       ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
 2570                         ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
 2571                         if (ret == 1) {
 2572                                 /*
 2573                                  * nfsrv_clientconflict() unlocks
 2574                                  * state when it returns non-zero.
 2575                                  */
 2576                                 goto tryagain;
 2577                         }
 2578                         if (ret == 2)
 2579                                 error = NFSERR_PERM;
 2580                         else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2581                                 error = NFSERR_RECLAIMCONFLICT;
 2582                         else
 2583                                 error = NFSERR_SHAREDENIED;
 2584                         if (ret == 0)
 2585                                 NFSUNLOCKSTATE();
 2586                         if (haslock) {
 2587                                 NFSLOCKV4ROOTMUTEX();
 2588                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2589                                 NFSUNLOCKV4ROOTMUTEX();
 2590                         }
 2591                         goto out;
 2592                 }
 2593         }
 2594 
 2595         /*
 2596          * Check for a conflicting delegation. If one is found, call
 2597          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 2598          * been set yet, it will get the lock. Otherwise, it will recall
 2599          * the delegation. Then, we try try again...
 2600          * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
 2601          *  isn't a conflict.)
 2602          * I currently believe the conflict algorithm to be:
 2603          * For Open with Read Access and Deny None
 2604          * - there is a conflict iff a different client has a write delegation
 2605          * For Open with other Write Access or any Deny except None
 2606          * - there is a conflict if a different client has any delegation
 2607          * - there is a conflict if the same client has a read delegation
 2608          *   (The current consensus is that this last case should be
 2609          *    considered a conflict since the client with a read delegation
 2610          *    could have done an Open with ReadAccess and WriteDeny
 2611          *    locally and then not have checked for the WriteDeny.)
 2612          * Don't check for a Reclaim, since that will be dealt with
 2613          * by nfsrv_openctrl().
 2614          */
 2615         if (!(new_stp->ls_flags &
 2616                 (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
 2617             stp = LIST_FIRST(&lfp->lf_deleg);
 2618             while (stp != LIST_END(&lfp->lf_deleg)) {
 2619                 nstp = LIST_NEXT(stp, ls_file);
 2620                 if ((readonly && stp->ls_clp != clp &&
 2621                        (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
 2622                     (!readonly && (stp->ls_clp != clp ||
 2623                          (stp->ls_flags & NFSLCK_DELEGREAD)))) {
 2624                         ret = nfsrv_delegconflict(stp, &haslock, p, vp);
 2625                         if (ret) {
 2626                             /*
 2627                              * nfsrv_delegconflict() unlocks state
 2628                              * when it returns non-zero.
 2629                              */
 2630                             if (ret == -1)
 2631                                 goto tryagain;
 2632                             error = ret;
 2633                             goto out;
 2634                         }
 2635                 }
 2636                 stp = nstp;
 2637             }
 2638         }
 2639         NFSUNLOCKSTATE();
 2640         if (haslock) {
 2641                 NFSLOCKV4ROOTMUTEX();
 2642                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2643                 NFSUNLOCKV4ROOTMUTEX();
 2644         }
 2645 
 2646 out:
 2647         NFSEXITCODE2(error, nd);
 2648         return (error);
 2649 }
 2650 
 2651 /*
 2652  * Open control function to create/update open state for an open.
 2653  */
 2654 int
 2655 nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
 2656     struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
 2657     nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
 2658     NFSPROC_T *p, u_quad_t filerev)
 2659 {
 2660         struct nfsstate *new_stp = *new_stpp;
 2661         struct nfsstate *stp, *nstp;
 2662         struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
 2663         struct nfslockfile *lfp, *new_lfp;
 2664         struct nfsclient *clp;
 2665         int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
 2666         int readonly = 0, cbret = 1, getfhret = 0;
 2667         int gotstate = 0, len = 0;
 2668         u_char *clidp = NULL;
 2669 
 2670         if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
 2671                 readonly = 1;
 2672         /*
 2673          * Check for restart conditions (client and server).
 2674          * (Paranoia, should have been detected by nfsrv_opencheck().)
 2675          * If an error does show up, return NFSERR_EXPIRED, since the
 2676          * the seqid# has already been incremented.
 2677          */
 2678         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 2679             &new_stp->ls_stateid, 0);
 2680         if (error) {
 2681                 printf("Nfsd: openctrl unexpected restart err=%d\n",
 2682                     error);
 2683                 error = NFSERR_EXPIRED;
 2684                 goto out;
 2685         }
 2686 
 2687         clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
 2688 tryagain:
 2689         new_lfp = malloc(sizeof (struct nfslockfile),
 2690             M_NFSDLOCKFILE, M_WAITOK);
 2691         new_open = malloc(sizeof (struct nfsstate),
 2692             M_NFSDSTATE, M_WAITOK);
 2693         new_deleg = malloc(sizeof (struct nfsstate),
 2694             M_NFSDSTATE, M_WAITOK);
 2695         getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
 2696             NULL, p);
 2697         NFSLOCKSTATE();
 2698         /*
 2699          * Get the client structure. Since the linked lists could be changed
 2700          * by other nfsd processes if this process does a tsleep(), one of
 2701          * two things must be done.
 2702          * 1 - don't tsleep()
 2703          * or
 2704          * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
 2705          *     before using the lists, since this lock stops the other
 2706          *     nfsd. This should only be used for rare cases, since it
 2707          *     essentially single threads the nfsd.
 2708          *     At this time, it is only done for cases where the stable
 2709          *     storage file must be written prior to completion of state
 2710          *     expiration.
 2711          */
 2712         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 2713             (nfsquad_t)((u_quad_t)0), 0, nd, p);
 2714         if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
 2715             clp->lc_program) {
 2716                 /*
 2717                  * This happens on the first open for a client
 2718                  * that supports callbacks.
 2719                  */
 2720                 NFSUNLOCKSTATE();
 2721                 /*
 2722                  * Although nfsrv_docallback() will sleep, clp won't
 2723                  * go away, since they are only removed when the
 2724                  * nfsv4_lock() has blocked the nfsd threads. The
 2725                  * fields in clp can change, but having multiple
 2726                  * threads do this Null callback RPC should be
 2727                  * harmless.
 2728                  */
 2729                 cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
 2730                     NULL, 0, NULL, NULL, NULL, 0, p);
 2731                 NFSLOCKSTATE();
 2732                 clp->lc_flags &= ~LCL_NEEDSCBNULL;
 2733                 if (!cbret)
 2734                         clp->lc_flags |= LCL_CALLBACKSON;
 2735         }
 2736 
 2737         /*
 2738          * Look up the open owner. See if it needs confirmation and
 2739          * check the seq#, as required.
 2740          */
 2741         if (!error)
 2742                 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
 2743 
 2744         if (error) {
 2745                 NFSUNLOCKSTATE();
 2746                 printf("Nfsd: openctrl unexpected state err=%d\n",
 2747                         error);
 2748                 free(new_lfp, M_NFSDLOCKFILE);
 2749                 free(new_open, M_NFSDSTATE);
 2750                 free(new_deleg, M_NFSDSTATE);
 2751                 if (haslock) {
 2752                         NFSLOCKV4ROOTMUTEX();
 2753                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2754                         NFSUNLOCKV4ROOTMUTEX();
 2755                 }
 2756                 error = NFSERR_EXPIRED;
 2757                 goto out;
 2758         }
 2759 
 2760         if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2761                 nfsrv_markstable(clp);
 2762 
 2763         /*
 2764          * Get the structure for the underlying file.
 2765          */
 2766         if (getfhret)
 2767                 error = getfhret;
 2768         else
 2769                 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
 2770                     NULL, 0);
 2771         if (new_lfp)
 2772                 free(new_lfp, M_NFSDLOCKFILE);
 2773         if (error) {
 2774                 NFSUNLOCKSTATE();
 2775                 printf("Nfsd openctrl unexpected getlockfile err=%d\n",
 2776                     error);
 2777                 free(new_open, M_NFSDSTATE);
 2778                 free(new_deleg, M_NFSDSTATE);
 2779                 if (haslock) {
 2780                         NFSLOCKV4ROOTMUTEX();
 2781                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2782                         NFSUNLOCKV4ROOTMUTEX();
 2783                 }
 2784                 goto out;
 2785         }
 2786 
 2787         /*
 2788          * Search for a conflicting open/share.
 2789          */
 2790         if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
 2791             /*
 2792              * For Delegate_Cur, search for the matching Delegation,
 2793              * which indicates no conflict.
 2794              * An old delegation should have been recovered by the
 2795              * client doing a Claim_DELEGATE_Prev, so I won't let
 2796              * it match and return NFSERR_EXPIRED. Should I let it
 2797              * match?
 2798              */
 2799             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 2800                 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
 2801                     (((nd->nd_flag & ND_NFSV41) != 0 &&
 2802                     stateidp->seqid == 0) ||
 2803                     stateidp->seqid == stp->ls_stateid.seqid) &&
 2804                     !NFSBCMP(stateidp->other, stp->ls_stateid.other,
 2805                         NFSX_STATEIDOTHER))
 2806                         break;
 2807             }
 2808             if (stp == LIST_END(&lfp->lf_deleg) ||
 2809                 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
 2810                  (stp->ls_flags & NFSLCK_DELEGREAD))) {
 2811                 NFSUNLOCKSTATE();
 2812                 printf("Nfsd openctrl unexpected expiry\n");
 2813                 free(new_open, M_NFSDSTATE);
 2814                 free(new_deleg, M_NFSDSTATE);
 2815                 if (haslock) {
 2816                         NFSLOCKV4ROOTMUTEX();
 2817                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2818                         NFSUNLOCKV4ROOTMUTEX();
 2819                 }
 2820                 error = NFSERR_EXPIRED;
 2821                 goto out;
 2822             }
 2823 
 2824             /*
 2825              * Don't issue a Delegation, since one already exists and
 2826              * delay delegation timeout, as required.
 2827              */
 2828             delegate = 0;
 2829             nfsrv_delaydelegtimeout(stp);
 2830         }
 2831 
 2832         /*
 2833          * Check for access/deny bit conflicts. I also check for the
 2834          * same owner, since the client might not have bothered to check.
 2835          * Also, note an open for the same file and owner, if found,
 2836          * which is all we do here for Delegate_Cur, since conflict
 2837          * checking is already done.
 2838          */
 2839         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 2840                 if (ownerstp && stp->ls_openowner == ownerstp)
 2841                         openstp = stp;
 2842                 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
 2843                     /*
 2844                      * If another client has the file open, the only
 2845                      * delegation that can be issued is a Read delegation
 2846                      * and only if it is a Read open with Deny none.
 2847                      */
 2848                     if (clp != stp->ls_clp) {
 2849                         if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
 2850                             NFSLCK_READACCESS)
 2851                             writedeleg = 0;
 2852                         else
 2853                             delegate = 0;
 2854                     }
 2855                     if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
 2856                         ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
 2857                        ((stp->ls_flags & NFSLCK_ACCESSBITS) &
 2858                         ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
 2859                         ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
 2860                         if (ret == 1) {
 2861                                 /*
 2862                                  * nfsrv_clientconflict() unlocks state
 2863                                  * when it returns non-zero.
 2864                                  */
 2865                                 free(new_open, M_NFSDSTATE);
 2866                                 free(new_deleg, M_NFSDSTATE);
 2867                                 openstp = NULL;
 2868                                 goto tryagain;
 2869                         }
 2870                         if (ret == 2)
 2871                                 error = NFSERR_PERM;
 2872                         else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2873                                 error = NFSERR_RECLAIMCONFLICT;
 2874                         else
 2875                                 error = NFSERR_SHAREDENIED;
 2876                         if (ret == 0)
 2877                                 NFSUNLOCKSTATE();
 2878                         if (haslock) {
 2879                                 NFSLOCKV4ROOTMUTEX();
 2880                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2881                                 NFSUNLOCKV4ROOTMUTEX();
 2882                         }
 2883                         free(new_open, M_NFSDSTATE);
 2884                         free(new_deleg, M_NFSDSTATE);
 2885                         printf("nfsd openctrl unexpected client cnfl\n");
 2886                         goto out;
 2887                     }
 2888                 }
 2889         }
 2890 
 2891         /*
 2892          * Check for a conflicting delegation. If one is found, call
 2893          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 2894          * been set yet, it will get the lock. Otherwise, it will recall
 2895          * the delegation. Then, we try try again...
 2896          * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
 2897          *  isn't a conflict.)
 2898          * I currently believe the conflict algorithm to be:
 2899          * For Open with Read Access and Deny None
 2900          * - there is a conflict iff a different client has a write delegation
 2901          * For Open with other Write Access or any Deny except None
 2902          * - there is a conflict if a different client has any delegation
 2903          * - there is a conflict if the same client has a read delegation
 2904          *   (The current consensus is that this last case should be
 2905          *    considered a conflict since the client with a read delegation
 2906          *    could have done an Open with ReadAccess and WriteDeny
 2907          *    locally and then not have checked for the WriteDeny.)
 2908          */
 2909         if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
 2910             stp = LIST_FIRST(&lfp->lf_deleg);
 2911             while (stp != LIST_END(&lfp->lf_deleg)) {
 2912                 nstp = LIST_NEXT(stp, ls_file);
 2913                 if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
 2914                         writedeleg = 0;
 2915                 else
 2916                         delegate = 0;
 2917                 if ((readonly && stp->ls_clp != clp &&
 2918                        (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
 2919                     (!readonly && (stp->ls_clp != clp ||
 2920                          (stp->ls_flags & NFSLCK_DELEGREAD)))) {
 2921                     if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 2922                         delegate = 2;
 2923                     } else {
 2924                         ret = nfsrv_delegconflict(stp, &haslock, p, vp);
 2925                         if (ret) {
 2926                             /*
 2927                              * nfsrv_delegconflict() unlocks state
 2928                              * when it returns non-zero.
 2929                              */
 2930                             printf("Nfsd openctrl unexpected deleg cnfl\n");
 2931                             free(new_open, M_NFSDSTATE);
 2932                             free(new_deleg, M_NFSDSTATE);
 2933                             if (ret == -1) {
 2934                                 openstp = NULL;
 2935                                 goto tryagain;
 2936                             }
 2937                             error = ret;
 2938                             goto out;
 2939                         }
 2940                     }
 2941                 }
 2942                 stp = nstp;
 2943             }
 2944         }
 2945 
 2946         /*
 2947          * We only get here if there was no open that conflicted.
 2948          * If an open for the owner exists, or in the access/deny bits.
 2949          * Otherwise it is a new open. If the open_owner hasn't been
 2950          * confirmed, replace the open with the new one needing confirmation,
 2951          * otherwise add the open.
 2952          */
 2953         if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
 2954             /*
 2955              * Handle NFSLCK_DELEGPREV by searching the old delegations for
 2956              * a match. If found, just move the old delegation to the current
 2957              * delegation list and issue open. If not found, return
 2958              * NFSERR_EXPIRED.
 2959              */
 2960             LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
 2961                 if (stp->ls_lfp == lfp) {
 2962                     /* Found it */
 2963                     if (stp->ls_clp != clp)
 2964                         panic("olddeleg clp");
 2965                     LIST_REMOVE(stp, ls_list);
 2966                     LIST_REMOVE(stp, ls_hash);
 2967                     stp->ls_flags &= ~NFSLCK_OLDDELEG;
 2968                     stp->ls_stateid.seqid = delegstateidp->seqid = 1;
 2969                     stp->ls_stateid.other[0] = delegstateidp->other[0] =
 2970                         clp->lc_clientid.lval[0];
 2971                     stp->ls_stateid.other[1] = delegstateidp->other[1] =
 2972                         clp->lc_clientid.lval[1];
 2973                     stp->ls_stateid.other[2] = delegstateidp->other[2] =
 2974                         nfsrv_nextstateindex(clp);
 2975                     stp->ls_compref = nd->nd_compref;
 2976                     LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
 2977                     LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 2978                         stp->ls_stateid), stp, ls_hash);
 2979                     if (stp->ls_flags & NFSLCK_DELEGWRITE)
 2980                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 2981                     else
 2982                         *rflagsp |= NFSV4OPEN_READDELEGATE;
 2983                     clp->lc_delegtime = NFSD_MONOSEC +
 2984                         nfsrv_lease + NFSRV_LEASEDELTA;
 2985 
 2986                     /*
 2987                      * Now, do the associated open.
 2988                      */
 2989                     new_open->ls_stateid.seqid = 1;
 2990                     new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 2991                     new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 2992                     new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 2993                     new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
 2994                         NFSLCK_OPEN;
 2995                     if (stp->ls_flags & NFSLCK_DELEGWRITE)
 2996                         new_open->ls_flags |= (NFSLCK_READACCESS |
 2997                             NFSLCK_WRITEACCESS);
 2998                     else
 2999                         new_open->ls_flags |= NFSLCK_READACCESS;
 3000                     new_open->ls_uid = new_stp->ls_uid;
 3001                     new_open->ls_lfp = lfp;
 3002                     new_open->ls_clp = clp;
 3003                     LIST_INIT(&new_open->ls_open);
 3004                     LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3005                     LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3006                         new_open, ls_hash);
 3007                     /*
 3008                      * and handle the open owner
 3009                      */
 3010                     if (ownerstp) {
 3011                         new_open->ls_openowner = ownerstp;
 3012                         LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
 3013                     } else {
 3014                         new_open->ls_openowner = new_stp;
 3015                         new_stp->ls_flags = 0;
 3016                         nfsrvd_refcache(new_stp->ls_op);
 3017                         new_stp->ls_noopens = 0;
 3018                         LIST_INIT(&new_stp->ls_open);
 3019                         LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 3020                         LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 3021                         *new_stpp = NULL;
 3022                         nfsstatsv1.srvopenowners++;
 3023                         nfsrv_openpluslock++;
 3024                     }
 3025                     openstp = new_open;
 3026                     new_open = NULL;
 3027                     nfsstatsv1.srvopens++;
 3028                     nfsrv_openpluslock++;
 3029                     break;
 3030                 }
 3031             }
 3032             if (stp == LIST_END(&clp->lc_olddeleg))
 3033                 error = NFSERR_EXPIRED;
 3034         } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
 3035             /*
 3036              * Scan to see that no delegation for this client and file
 3037              * doesn't already exist.
 3038              * There also shouldn't yet be an Open for this file and
 3039              * openowner.
 3040              */
 3041             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 3042                 if (stp->ls_clp == clp)
 3043                     break;
 3044             }
 3045             if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
 3046                 /*
 3047                  * This is the Claim_Previous case with a delegation
 3048                  * type != Delegate_None.
 3049                  */
 3050                 /*
 3051                  * First, add the delegation. (Although we must issue the
 3052                  * delegation, we can also ask for an immediate return.)
 3053                  */
 3054                 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 3055                 new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
 3056                     clp->lc_clientid.lval[0];
 3057                 new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
 3058                     clp->lc_clientid.lval[1];
 3059                 new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
 3060                     nfsrv_nextstateindex(clp);
 3061                 if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
 3062                     new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 3063                         NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 3064                     *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3065                     nfsrv_writedelegcnt++;
 3066                 } else {
 3067                     new_deleg->ls_flags = (NFSLCK_DELEGREAD |
 3068                         NFSLCK_READACCESS);
 3069                     *rflagsp |= NFSV4OPEN_READDELEGATE;
 3070                 }
 3071                 new_deleg->ls_uid = new_stp->ls_uid;
 3072                 new_deleg->ls_lfp = lfp;
 3073                 new_deleg->ls_clp = clp;
 3074                 new_deleg->ls_filerev = filerev;
 3075                 new_deleg->ls_compref = nd->nd_compref;
 3076                 new_deleg->ls_lastrecall = 0;
 3077                 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 3078                 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3079                     new_deleg->ls_stateid), new_deleg, ls_hash);
 3080                 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 3081                 new_deleg = NULL;
 3082                 if (delegate == 2 || nfsrv_issuedelegs == 0 ||
 3083                     (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 3084                      LCL_CALLBACKSON ||
 3085                     NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
 3086                     !NFSVNO_DELEGOK(vp))
 3087                     *rflagsp |= NFSV4OPEN_RECALL;
 3088                 nfsstatsv1.srvdelegates++;
 3089                 nfsrv_openpluslock++;
 3090                 nfsrv_delegatecnt++;
 3091 
 3092                 /*
 3093                  * Now, do the associated open.
 3094                  */
 3095                 new_open->ls_stateid.seqid = 1;
 3096                 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 3097                 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 3098                 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 3099                 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
 3100                     NFSLCK_OPEN;
 3101                 if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
 3102                         new_open->ls_flags |= (NFSLCK_READACCESS |
 3103                             NFSLCK_WRITEACCESS);
 3104                 else
 3105                         new_open->ls_flags |= NFSLCK_READACCESS;
 3106                 new_open->ls_uid = new_stp->ls_uid;
 3107                 new_open->ls_lfp = lfp;
 3108                 new_open->ls_clp = clp;
 3109                 LIST_INIT(&new_open->ls_open);
 3110                 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3111                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3112                    new_open, ls_hash);
 3113                 /*
 3114                  * and handle the open owner
 3115                  */
 3116                 if (ownerstp) {
 3117                     new_open->ls_openowner = ownerstp;
 3118                     LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
 3119                 } else {
 3120                     new_open->ls_openowner = new_stp;
 3121                     new_stp->ls_flags = 0;
 3122                     nfsrvd_refcache(new_stp->ls_op);
 3123                     new_stp->ls_noopens = 0;
 3124                     LIST_INIT(&new_stp->ls_open);
 3125                     LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 3126                     LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 3127                     *new_stpp = NULL;
 3128                     nfsstatsv1.srvopenowners++;
 3129                     nfsrv_openpluslock++;
 3130                 }
 3131                 openstp = new_open;
 3132                 new_open = NULL;
 3133                 nfsstatsv1.srvopens++;
 3134                 nfsrv_openpluslock++;
 3135             } else {
 3136                 error = NFSERR_RECLAIMCONFLICT;
 3137             }
 3138         } else if (ownerstp) {
 3139                 if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
 3140                     /* Replace the open */
 3141                     if (ownerstp->ls_op)
 3142                         nfsrvd_derefcache(ownerstp->ls_op);
 3143                     ownerstp->ls_op = new_stp->ls_op;
 3144                     nfsrvd_refcache(ownerstp->ls_op);
 3145                     ownerstp->ls_seq = new_stp->ls_seq;
 3146                     *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
 3147                     stp = LIST_FIRST(&ownerstp->ls_open);
 3148                     stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
 3149                         NFSLCK_OPEN;
 3150                     stp->ls_stateid.seqid = 1;
 3151                     stp->ls_uid = new_stp->ls_uid;
 3152                     if (lfp != stp->ls_lfp) {
 3153                         LIST_REMOVE(stp, ls_file);
 3154                         LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
 3155                         stp->ls_lfp = lfp;
 3156                     }
 3157                     openstp = stp;
 3158                 } else if (openstp) {
 3159                     openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
 3160                     openstp->ls_stateid.seqid++;
 3161                     if ((nd->nd_flag & ND_NFSV41) != 0 &&
 3162                         openstp->ls_stateid.seqid == 0)
 3163                         openstp->ls_stateid.seqid = 1;
 3164 
 3165                     /*
 3166                      * This is where we can choose to issue a delegation.
 3167                      */
 3168                     if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
 3169                         *rflagsp |= NFSV4OPEN_WDNOTWANTED;
 3170                     else if (nfsrv_issuedelegs == 0)
 3171                         *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
 3172                     else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
 3173                         *rflagsp |= NFSV4OPEN_WDRESOURCE;
 3174                     else if (delegate == 0 || writedeleg == 0 ||
 3175                         NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
 3176                         nfsrv_writedelegifpos == 0) ||
 3177                         !NFSVNO_DELEGOK(vp) ||
 3178                         (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
 3179                         (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 3180                          LCL_CALLBACKSON)
 3181                         *rflagsp |= NFSV4OPEN_WDCONTENTION;
 3182                     else {
 3183                         new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 3184                         new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
 3185                             = clp->lc_clientid.lval[0];
 3186                         new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
 3187                             = clp->lc_clientid.lval[1];
 3188                         new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
 3189                             = nfsrv_nextstateindex(clp);
 3190                         new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 3191                             NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 3192                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3193                         new_deleg->ls_uid = new_stp->ls_uid;
 3194                         new_deleg->ls_lfp = lfp;
 3195                         new_deleg->ls_clp = clp;
 3196                         new_deleg->ls_filerev = filerev;
 3197                         new_deleg->ls_compref = nd->nd_compref;
 3198                         new_deleg->ls_lastrecall = 0;
 3199                         nfsrv_writedelegcnt++;
 3200                         LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 3201                         LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3202                             new_deleg->ls_stateid), new_deleg, ls_hash);
 3203                         LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 3204                         new_deleg = NULL;
 3205                         nfsstatsv1.srvdelegates++;
 3206                         nfsrv_openpluslock++;
 3207                         nfsrv_delegatecnt++;
 3208                     }
 3209                 } else {
 3210                     new_open->ls_stateid.seqid = 1;
 3211                     new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 3212                     new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 3213                     new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 3214                     new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
 3215                         NFSLCK_OPEN;
 3216                     new_open->ls_uid = new_stp->ls_uid;
 3217                     new_open->ls_openowner = ownerstp;
 3218                     new_open->ls_lfp = lfp;
 3219                     new_open->ls_clp = clp;
 3220                     LIST_INIT(&new_open->ls_open);
 3221                     LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3222                     LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
 3223                     LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3224                         new_open, ls_hash);
 3225                     openstp = new_open;
 3226                     new_open = NULL;
 3227                     nfsstatsv1.srvopens++;
 3228                     nfsrv_openpluslock++;
 3229 
 3230                     /*
 3231                      * This is where we can choose to issue a delegation.
 3232                      */
 3233                     if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
 3234                         *rflagsp |= NFSV4OPEN_WDNOTWANTED;
 3235                     else if (nfsrv_issuedelegs == 0)
 3236                         *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
 3237                     else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
 3238                         *rflagsp |= NFSV4OPEN_WDRESOURCE;
 3239                     else if (delegate == 0 || (writedeleg == 0 &&
 3240                         readonly == 0) || !NFSVNO_DELEGOK(vp) ||
 3241                         (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 3242                          LCL_CALLBACKSON)
 3243                         *rflagsp |= NFSV4OPEN_WDCONTENTION;
 3244                     else {
 3245                         new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 3246                         new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
 3247                             = clp->lc_clientid.lval[0];
 3248                         new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
 3249                             = clp->lc_clientid.lval[1];
 3250                         new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
 3251                             = nfsrv_nextstateindex(clp);
 3252                         if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
 3253                             (nfsrv_writedelegifpos || !readonly) &&
 3254                             (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
 3255                             new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 3256                                 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 3257                             *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3258                             nfsrv_writedelegcnt++;
 3259                         } else {
 3260                             new_deleg->ls_flags = (NFSLCK_DELEGREAD |
 3261                                 NFSLCK_READACCESS);
 3262                             *rflagsp |= NFSV4OPEN_READDELEGATE;
 3263                         }
 3264                         new_deleg->ls_uid = new_stp->ls_uid;
 3265                         new_deleg->ls_lfp = lfp;
 3266                         new_deleg->ls_clp = clp;
 3267                         new_deleg->ls_filerev = filerev;
 3268                         new_deleg->ls_compref = nd->nd_compref;
 3269                         new_deleg->ls_lastrecall = 0;
 3270                         LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 3271                         LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3272                             new_deleg->ls_stateid), new_deleg, ls_hash);
 3273                         LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 3274                         new_deleg = NULL;
 3275                         nfsstatsv1.srvdelegates++;
 3276                         nfsrv_openpluslock++;
 3277                         nfsrv_delegatecnt++;
 3278                     }
 3279                 }
 3280         } else {
 3281                 /*
 3282                  * New owner case. Start the open_owner sequence with a
 3283                  * Needs confirmation (unless a reclaim) and hang the
 3284                  * new open off it.
 3285                  */
 3286                 new_open->ls_stateid.seqid = 1;
 3287                 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 3288                 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 3289                 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 3290                 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
 3291                     NFSLCK_OPEN;
 3292                 new_open->ls_uid = new_stp->ls_uid;
 3293                 LIST_INIT(&new_open->ls_open);
 3294                 new_open->ls_openowner = new_stp;
 3295                 new_open->ls_lfp = lfp;
 3296                 new_open->ls_clp = clp;
 3297                 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3298                 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 3299                         new_stp->ls_flags = 0;
 3300                 } else if ((nd->nd_flag & ND_NFSV41) != 0) {
 3301                         /* NFSv4.1 never needs confirmation. */
 3302                         new_stp->ls_flags = 0;
 3303 
 3304                         /*
 3305                          * This is where we can choose to issue a delegation.
 3306                          */
 3307                         if (delegate && nfsrv_issuedelegs &&
 3308                             (writedeleg || readonly) &&
 3309                             (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
 3310                              LCL_CALLBACKSON &&
 3311                             !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
 3312                             NFSVNO_DELEGOK(vp) &&
 3313                             ((nd->nd_flag & ND_NFSV41) == 0 ||
 3314                              (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
 3315                                 new_deleg->ls_stateid.seqid =
 3316                                     delegstateidp->seqid = 1;
 3317                                 new_deleg->ls_stateid.other[0] =
 3318                                     delegstateidp->other[0]
 3319                                     = clp->lc_clientid.lval[0];
 3320                                 new_deleg->ls_stateid.other[1] =
 3321                                     delegstateidp->other[1]
 3322                                     = clp->lc_clientid.lval[1];
 3323                                 new_deleg->ls_stateid.other[2] =
 3324                                     delegstateidp->other[2]
 3325                                     = nfsrv_nextstateindex(clp);
 3326                                 if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
 3327                                     (nfsrv_writedelegifpos || !readonly) &&
 3328                                     ((nd->nd_flag & ND_NFSV41) == 0 ||
 3329                                      (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
 3330                                      0)) {
 3331                                         new_deleg->ls_flags =
 3332                                             (NFSLCK_DELEGWRITE |
 3333                                              NFSLCK_READACCESS |
 3334                                              NFSLCK_WRITEACCESS);
 3335                                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3336                                         nfsrv_writedelegcnt++;
 3337                                 } else {
 3338                                         new_deleg->ls_flags =
 3339                                             (NFSLCK_DELEGREAD |
 3340                                              NFSLCK_READACCESS);
 3341                                         *rflagsp |= NFSV4OPEN_READDELEGATE;
 3342                                 }
 3343                                 new_deleg->ls_uid = new_stp->ls_uid;
 3344                                 new_deleg->ls_lfp = lfp;
 3345                                 new_deleg->ls_clp = clp;
 3346                                 new_deleg->ls_filerev = filerev;
 3347                                 new_deleg->ls_compref = nd->nd_compref;
 3348                                 new_deleg->ls_lastrecall = 0;
 3349                                 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
 3350                                     ls_file);
 3351                                 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3352                                     new_deleg->ls_stateid), new_deleg, ls_hash);
 3353                                 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
 3354                                     ls_list);
 3355                                 new_deleg = NULL;
 3356                                 nfsstatsv1.srvdelegates++;
 3357                                 nfsrv_openpluslock++;
 3358                                 nfsrv_delegatecnt++;
 3359                         }
 3360                         /*
 3361                          * Since NFSv4.1 never does an OpenConfirm, the first
 3362                          * open state will be acquired here.
 3363                          */
 3364                         if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
 3365                                 clp->lc_flags |= LCL_STAMPEDSTABLE;
 3366                                 len = clp->lc_idlen;
 3367                                 NFSBCOPY(clp->lc_id, clidp, len);
 3368                                 gotstate = 1;
 3369                         }
 3370                 } else {
 3371                         *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
 3372                         new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
 3373                 }
 3374                 nfsrvd_refcache(new_stp->ls_op);
 3375                 new_stp->ls_noopens = 0;
 3376                 LIST_INIT(&new_stp->ls_open);
 3377                 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 3378                 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 3379                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3380                     new_open, ls_hash);
 3381                 openstp = new_open;
 3382                 new_open = NULL;
 3383                 *new_stpp = NULL;
 3384                 nfsstatsv1.srvopens++;
 3385                 nfsrv_openpluslock++;
 3386                 nfsstatsv1.srvopenowners++;
 3387                 nfsrv_openpluslock++;
 3388         }
 3389         if (!error) {
 3390                 stateidp->seqid = openstp->ls_stateid.seqid;
 3391                 stateidp->other[0] = openstp->ls_stateid.other[0];
 3392                 stateidp->other[1] = openstp->ls_stateid.other[1];
 3393                 stateidp->other[2] = openstp->ls_stateid.other[2];
 3394         }
 3395         NFSUNLOCKSTATE();
 3396         if (haslock) {
 3397                 NFSLOCKV4ROOTMUTEX();
 3398                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 3399                 NFSUNLOCKV4ROOTMUTEX();
 3400         }
 3401         if (new_open)
 3402                 free(new_open, M_NFSDSTATE);
 3403         if (new_deleg)
 3404                 free(new_deleg, M_NFSDSTATE);
 3405 
 3406         /*
 3407          * If the NFSv4.1 client just acquired its first open, write a timestamp
 3408          * to the stable storage file.
 3409          */
 3410         if (gotstate != 0) {
 3411                 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
 3412                 nfsrv_backupstable();
 3413         }
 3414 
 3415 out:
 3416         free(clidp, M_TEMP);
 3417         NFSEXITCODE2(error, nd);
 3418         return (error);
 3419 }
 3420 
 3421 /*
 3422  * Open update. Does the confirm, downgrade and close.
 3423  */
 3424 int
 3425 nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
 3426     nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p,
 3427     int *retwriteaccessp)
 3428 {
 3429         struct nfsstate *stp;
 3430         struct nfsclient *clp;
 3431         struct nfslockfile *lfp;
 3432         u_int32_t bits;
 3433         int error = 0, gotstate = 0, len = 0;
 3434         u_char *clidp = NULL;
 3435 
 3436         /*
 3437          * Check for restart conditions (client and server).
 3438          */
 3439         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 3440             &new_stp->ls_stateid, 0);
 3441         if (error)
 3442                 goto out;
 3443 
 3444         clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
 3445         NFSLOCKSTATE();
 3446         /*
 3447          * Get the open structure via clientid and stateid.
 3448          */
 3449         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 3450             (nfsquad_t)((u_quad_t)0), 0, nd, p);
 3451         if (!error)
 3452                 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
 3453                     new_stp->ls_flags, &stp);
 3454 
 3455         /*
 3456          * Sanity check the open.
 3457          */
 3458         if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
 3459                 (!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
 3460                  (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
 3461                 ((new_stp->ls_flags & NFSLCK_CONFIRM) &&
 3462                  (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
 3463                 error = NFSERR_BADSTATEID;
 3464 
 3465         if (!error)
 3466                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 3467                     stp->ls_openowner, new_stp->ls_op);
 3468         if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
 3469             (((nd->nd_flag & ND_NFSV41) == 0 &&
 3470               !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
 3471              ((nd->nd_flag & ND_NFSV41) != 0 &&
 3472               new_stp->ls_stateid.seqid != 0)))
 3473                 error = NFSERR_OLDSTATEID;
 3474         if (!error && vnode_vtype(vp) != VREG) {
 3475                 if (vnode_vtype(vp) == VDIR)
 3476                         error = NFSERR_ISDIR;
 3477                 else
 3478                         error = NFSERR_INVAL;
 3479         }
 3480 
 3481         if (error) {
 3482                 /*
 3483                  * If a client tries to confirm an Open with a bad
 3484                  * seqid# and there are no byte range locks or other Opens
 3485                  * on the openowner, just throw it away, so the next use of the
 3486                  * openowner will start a fresh seq#.
 3487                  */
 3488                 if (error == NFSERR_BADSEQID &&
 3489                     (new_stp->ls_flags & NFSLCK_CONFIRM) &&
 3490                     nfsrv_nootherstate(stp))
 3491                         nfsrv_freeopenowner(stp->ls_openowner, 0, p);
 3492                 NFSUNLOCKSTATE();
 3493                 goto out;
 3494         }
 3495 
 3496         /*
 3497          * Set the return stateid.
 3498          */
 3499         stateidp->seqid = stp->ls_stateid.seqid + 1;
 3500         if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 3501                 stateidp->seqid = 1;
 3502         stateidp->other[0] = stp->ls_stateid.other[0];
 3503         stateidp->other[1] = stp->ls_stateid.other[1];
 3504         stateidp->other[2] = stp->ls_stateid.other[2];
 3505         /*
 3506          * Now, handle the three cases.
 3507          */
 3508         if (new_stp->ls_flags & NFSLCK_CONFIRM) {
 3509                 /*
 3510                  * If the open doesn't need confirmation, it seems to me that
 3511                  * there is a client error, but I'll just log it and keep going?
 3512                  */
 3513                 if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
 3514                         printf("Nfsv4d: stray open confirm\n");
 3515                 stp->ls_openowner->ls_flags = 0;
 3516                 stp->ls_stateid.seqid++;
 3517                 if ((nd->nd_flag & ND_NFSV41) != 0 &&
 3518                     stp->ls_stateid.seqid == 0)
 3519                         stp->ls_stateid.seqid = 1;
 3520                 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
 3521                         clp->lc_flags |= LCL_STAMPEDSTABLE;
 3522                         len = clp->lc_idlen;
 3523                         NFSBCOPY(clp->lc_id, clidp, len);
 3524                         gotstate = 1;
 3525                 }
 3526                 NFSUNLOCKSTATE();
 3527         } else if (new_stp->ls_flags & NFSLCK_CLOSE) {
 3528                 lfp = stp->ls_lfp;
 3529                 if (retwriteaccessp != NULL) {
 3530                         if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0)
 3531                                 *retwriteaccessp = 1;
 3532                         else
 3533                                 *retwriteaccessp = 0;
 3534                 }
 3535                 if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
 3536                         /* Get the lf lock */
 3537                         nfsrv_locklf(lfp);
 3538                         NFSUNLOCKSTATE();
 3539                         ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
 3540                         NFSVOPUNLOCK(vp, 0);
 3541                         if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
 3542                                 NFSLOCKSTATE();
 3543                                 nfsrv_unlocklf(lfp);
 3544                                 NFSUNLOCKSTATE();
 3545                         }
 3546                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 3547                 } else {
 3548                         (void) nfsrv_freeopen(stp, NULL, 0, p);
 3549                         NFSUNLOCKSTATE();
 3550                 }
 3551         } else {
 3552                 /*
 3553                  * Update the share bits, making sure that the new set are a
 3554                  * subset of the old ones.
 3555                  */
 3556                 bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
 3557                 if (~(stp->ls_flags) & bits) {
 3558                         NFSUNLOCKSTATE();
 3559                         error = NFSERR_INVAL;
 3560                         goto out;
 3561                 }
 3562                 stp->ls_flags = (bits | NFSLCK_OPEN);
 3563                 stp->ls_stateid.seqid++;
 3564                 if ((nd->nd_flag & ND_NFSV41) != 0 &&
 3565                     stp->ls_stateid.seqid == 0)
 3566                         stp->ls_stateid.seqid = 1;
 3567                 NFSUNLOCKSTATE();
 3568         }
 3569 
 3570         /*
 3571          * If the client just confirmed its first open, write a timestamp
 3572          * to the stable storage file.
 3573          */
 3574         if (gotstate != 0) {
 3575                 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
 3576                 nfsrv_backupstable();
 3577         }
 3578 
 3579 out:
 3580         free(clidp, M_TEMP);
 3581         NFSEXITCODE2(error, nd);
 3582         return (error);
 3583 }
 3584 
 3585 /*
 3586  * Delegation update. Does the purge and return.
 3587  */
 3588 int
 3589 nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
 3590     nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
 3591     NFSPROC_T *p, int *retwriteaccessp)
 3592 {
 3593         struct nfsstate *stp;
 3594         struct nfsclient *clp;
 3595         int error = 0;
 3596         fhandle_t fh;
 3597 
 3598         /*
 3599          * Do a sanity check against the file handle for DelegReturn.
 3600          */
 3601         if (vp) {
 3602                 error = nfsvno_getfh(vp, &fh, p);
 3603                 if (error)
 3604                         goto out;
 3605         }
 3606         /*
 3607          * Check for restart conditions (client and server).
 3608          */
 3609         if (op == NFSV4OP_DELEGRETURN)
 3610                 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
 3611                         stateidp, 0);
 3612         else
 3613                 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
 3614                         stateidp, 0);
 3615 
 3616         NFSLOCKSTATE();
 3617         /*
 3618          * Get the open structure via clientid and stateid.
 3619          */
 3620         if (!error)
 3621             error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 3622                 (nfsquad_t)((u_quad_t)0), 0, nd, p);
 3623         if (error) {
 3624                 if (error == NFSERR_CBPATHDOWN)
 3625                         error = 0;
 3626                 if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
 3627                         error = NFSERR_STALESTATEID;
 3628         }
 3629         if (!error && op == NFSV4OP_DELEGRETURN) {
 3630             error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
 3631             if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
 3632                 ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
 3633                 error = NFSERR_OLDSTATEID;
 3634         }
 3635         /*
 3636          * NFSERR_EXPIRED means that the state has gone away,
 3637          * so Delegations have been purged. Just return ok.
 3638          */
 3639         if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
 3640                 NFSUNLOCKSTATE();
 3641                 error = 0;
 3642                 goto out;
 3643         }
 3644         if (error) {
 3645                 NFSUNLOCKSTATE();
 3646                 goto out;
 3647         }
 3648 
 3649         if (op == NFSV4OP_DELEGRETURN) {
 3650                 if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
 3651                     sizeof (fhandle_t))) {
 3652                         NFSUNLOCKSTATE();
 3653                         error = NFSERR_BADSTATEID;
 3654                         goto out;
 3655                 }
 3656                 if (retwriteaccessp != NULL) {
 3657                         if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
 3658                                 *retwriteaccessp = 1;
 3659                         else
 3660                                 *retwriteaccessp = 0;
 3661                 }
 3662                 nfsrv_freedeleg(stp);
 3663         } else {
 3664                 nfsrv_freedeleglist(&clp->lc_olddeleg);
 3665         }
 3666         NFSUNLOCKSTATE();
 3667         error = 0;
 3668 
 3669 out:
 3670         NFSEXITCODE(error);
 3671         return (error);
 3672 }
 3673 
 3674 /*
 3675  * Release lock owner.
 3676  */
 3677 int
 3678 nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
 3679     NFSPROC_T *p)
 3680 {
 3681         struct nfsstate *stp, *nstp, *openstp, *ownstp;
 3682         struct nfsclient *clp;
 3683         int error = 0;
 3684 
 3685         /*
 3686          * Check for restart conditions (client and server).
 3687          */
 3688         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 3689             &new_stp->ls_stateid, 0);
 3690         if (error)
 3691                 goto out;
 3692 
 3693         NFSLOCKSTATE();
 3694         /*
 3695          * Get the lock owner by name.
 3696          */
 3697         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 3698             (nfsquad_t)((u_quad_t)0), 0, NULL, p);
 3699         if (error) {
 3700                 NFSUNLOCKSTATE();
 3701                 goto out;
 3702         }
 3703         LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
 3704             LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
 3705                 stp = LIST_FIRST(&openstp->ls_open);
 3706                 while (stp != LIST_END(&openstp->ls_open)) {
 3707                     nstp = LIST_NEXT(stp, ls_list);
 3708                     /*
 3709                      * If the owner matches, check for locks and
 3710                      * then free or return an error.
 3711                      */
 3712                     if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
 3713                         !NFSBCMP(stp->ls_owner, new_stp->ls_owner,
 3714                          stp->ls_ownerlen)){
 3715                         if (LIST_EMPTY(&stp->ls_lock)) {
 3716                             nfsrv_freelockowner(stp, NULL, 0, p);
 3717                         } else {
 3718                             NFSUNLOCKSTATE();
 3719                             error = NFSERR_LOCKSHELD;
 3720                             goto out;
 3721                         }
 3722                     }
 3723                     stp = nstp;
 3724                 }
 3725             }
 3726         }
 3727         NFSUNLOCKSTATE();
 3728 
 3729 out:
 3730         NFSEXITCODE(error);
 3731         return (error);
 3732 }
 3733 
 3734 /*
 3735  * Get the file handle for a lock structure.
 3736  */
 3737 static int
 3738 nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
 3739     fhandle_t *nfhp, NFSPROC_T *p)
 3740 {
 3741         fhandle_t *fhp = NULL;
 3742         int error;
 3743 
 3744         /*
 3745          * For lock, use the new nfslock structure, otherwise just
 3746          * a fhandle_t on the stack.
 3747          */
 3748         if (flags & NFSLCK_OPEN) {
 3749                 KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
 3750                 fhp = &new_lfp->lf_fh;
 3751         } else if (nfhp) {
 3752                 fhp = nfhp;
 3753         } else {
 3754                 panic("nfsrv_getlockfh");
 3755         }
 3756         error = nfsvno_getfh(vp, fhp, p);
 3757         NFSEXITCODE(error);
 3758         return (error);
 3759 }
 3760 
 3761 /*
 3762  * Get an nfs lock structure. Allocate one, as required, and return a
 3763  * pointer to it.
 3764  * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
 3765  */
 3766 static int
 3767 nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
 3768     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
 3769 {
 3770         struct nfslockfile *lfp;
 3771         fhandle_t *fhp = NULL, *tfhp;
 3772         struct nfslockhashhead *hp;
 3773         struct nfslockfile *new_lfp = NULL;
 3774 
 3775         /*
 3776          * For lock, use the new nfslock structure, otherwise just
 3777          * a fhandle_t on the stack.
 3778          */
 3779         if (flags & NFSLCK_OPEN) {
 3780                 new_lfp = *new_lfpp;
 3781                 fhp = &new_lfp->lf_fh;
 3782         } else if (nfhp) {
 3783                 fhp = nfhp;
 3784         } else {
 3785                 panic("nfsrv_getlockfile");
 3786         }
 3787 
 3788         hp = NFSLOCKHASH(fhp);
 3789         LIST_FOREACH(lfp, hp, lf_hash) {
 3790                 tfhp = &lfp->lf_fh;
 3791                 if (NFSVNO_CMPFH(fhp, tfhp)) {
 3792                         if (lockit)
 3793                                 nfsrv_locklf(lfp);
 3794                         *lfpp = lfp;
 3795                         return (0);
 3796                 }
 3797         }
 3798         if (!(flags & NFSLCK_OPEN))
 3799                 return (-1);
 3800 
 3801         /*
 3802          * No match, so chain the new one into the list.
 3803          */
 3804         LIST_INIT(&new_lfp->lf_open);
 3805         LIST_INIT(&new_lfp->lf_lock);
 3806         LIST_INIT(&new_lfp->lf_deleg);
 3807         LIST_INIT(&new_lfp->lf_locallock);
 3808         LIST_INIT(&new_lfp->lf_rollback);
 3809         new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
 3810         new_lfp->lf_locallock_lck.nfslock_lock = 0;
 3811         new_lfp->lf_usecount = 0;
 3812         LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
 3813         *lfpp = new_lfp;
 3814         *new_lfpp = NULL;
 3815         return (0);
 3816 }
 3817 
 3818 /*
 3819  * This function adds a nfslock lock structure to the list for the associated
 3820  * nfsstate and nfslockfile structures. It will be inserted after the
 3821  * entry pointed at by insert_lop.
 3822  */
 3823 static void
 3824 nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
 3825     struct nfsstate *stp, struct nfslockfile *lfp)
 3826 {
 3827         struct nfslock *lop, *nlop;
 3828 
 3829         new_lop->lo_stp = stp;
 3830         new_lop->lo_lfp = lfp;
 3831 
 3832         if (stp != NULL) {
 3833                 /* Insert in increasing lo_first order */
 3834                 lop = LIST_FIRST(&lfp->lf_lock);
 3835                 if (lop == LIST_END(&lfp->lf_lock) ||
 3836                     new_lop->lo_first <= lop->lo_first) {
 3837                         LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
 3838                 } else {
 3839                         nlop = LIST_NEXT(lop, lo_lckfile);
 3840                         while (nlop != LIST_END(&lfp->lf_lock) &&
 3841                                nlop->lo_first < new_lop->lo_first) {
 3842                                 lop = nlop;
 3843                                 nlop = LIST_NEXT(lop, lo_lckfile);
 3844                         }
 3845                         LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
 3846                 }
 3847         } else {
 3848                 new_lop->lo_lckfile.le_prev = NULL;     /* list not used */
 3849         }
 3850 
 3851         /*
 3852          * Insert after insert_lop, which is overloaded as stp or lfp for
 3853          * an empty list.
 3854          */
 3855         if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
 3856                 LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
 3857         else if ((struct nfsstate *)insert_lop == stp)
 3858                 LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
 3859         else
 3860                 LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
 3861         if (stp != NULL) {
 3862                 nfsstatsv1.srvlocks++;
 3863                 nfsrv_openpluslock++;
 3864         }
 3865 }
 3866 
 3867 /*
 3868  * This function updates the locking for a lock owner and given file. It
 3869  * maintains a list of lock ranges ordered on increasing file offset that
 3870  * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
 3871  * It always adds new_lop to the list and sometimes uses the one pointed
 3872  * at by other_lopp.
 3873  */
 3874 static void
 3875 nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
 3876     struct nfslock **other_lopp, struct nfslockfile *lfp)
 3877 {
 3878         struct nfslock *new_lop = *new_lopp;
 3879         struct nfslock *lop, *tlop, *ilop;
 3880         struct nfslock *other_lop = *other_lopp;
 3881         int unlock = 0, myfile = 0;
 3882         u_int64_t tmp;
 3883 
 3884         /*
 3885          * Work down the list until the lock is merged.
 3886          */
 3887         if (new_lop->lo_flags & NFSLCK_UNLOCK)
 3888                 unlock = 1;
 3889         if (stp != NULL) {
 3890                 ilop = (struct nfslock *)stp;
 3891                 lop = LIST_FIRST(&stp->ls_lock);
 3892         } else {
 3893                 ilop = (struct nfslock *)lfp;
 3894                 lop = LIST_FIRST(&lfp->lf_locallock);
 3895         }
 3896         while (lop != NULL) {
 3897             /*
 3898              * Only check locks for this file that aren't before the start of
 3899              * new lock's range.
 3900              */
 3901             if (lop->lo_lfp == lfp) {
 3902               myfile = 1;
 3903               if (lop->lo_end >= new_lop->lo_first) {
 3904                 if (new_lop->lo_end < lop->lo_first) {
 3905                         /*
 3906                          * If the new lock ends before the start of the
 3907                          * current lock's range, no merge, just insert
 3908                          * the new lock.
 3909                          */
 3910                         break;
 3911                 }
 3912                 if (new_lop->lo_flags == lop->lo_flags ||
 3913                     (new_lop->lo_first <= lop->lo_first &&
 3914                      new_lop->lo_end >= lop->lo_end)) {
 3915                         /*
 3916                          * This lock can be absorbed by the new lock/unlock.
 3917                          * This happens when it covers the entire range
 3918                          * of the old lock or is contiguous
 3919                          * with the old lock and is of the same type or an
 3920                          * unlock.
 3921                          */
 3922                         if (lop->lo_first < new_lop->lo_first)
 3923                                 new_lop->lo_first = lop->lo_first;
 3924                         if (lop->lo_end > new_lop->lo_end)
 3925                                 new_lop->lo_end = lop->lo_end;
 3926                         tlop = lop;
 3927                         lop = LIST_NEXT(lop, lo_lckowner);
 3928                         nfsrv_freenfslock(tlop);
 3929                         continue;
 3930                 }
 3931 
 3932                 /*
 3933                  * All these cases are for contiguous locks that are not the
 3934                  * same type, so they can't be merged.
 3935                  */
 3936                 if (new_lop->lo_first <= lop->lo_first) {
 3937                         /*
 3938                          * This case is where the new lock overlaps with the
 3939                          * first part of the old lock. Move the start of the
 3940                          * old lock to just past the end of the new lock. The
 3941                          * new lock will be inserted in front of the old, since
 3942                          * ilop hasn't been updated. (We are done now.)
 3943                          */
 3944                         lop->lo_first = new_lop->lo_end;
 3945                         break;
 3946                 }
 3947                 if (new_lop->lo_end >= lop->lo_end) {
 3948                         /*
 3949                          * This case is where the new lock overlaps with the
 3950                          * end of the old lock's range. Move the old lock's
 3951                          * end to just before the new lock's first and insert
 3952                          * the new lock after the old lock.
 3953                          * Might not be done yet, since the new lock could
 3954                          * overlap further locks with higher ranges.
 3955                          */
 3956                         lop->lo_end = new_lop->lo_first;
 3957                         ilop = lop;
 3958                         lop = LIST_NEXT(lop, lo_lckowner);
 3959                         continue;
 3960                 }
 3961                 /*
 3962                  * The final case is where the new lock's range is in the
 3963                  * middle of the current lock's and splits the current lock
 3964                  * up. Use *other_lopp to handle the second part of the
 3965                  * split old lock range. (We are done now.)
 3966                  * For unlock, we use new_lop as other_lop and tmp, since
 3967                  * other_lop and new_lop are the same for this case.
 3968                  * We noted the unlock case above, so we don't need
 3969                  * new_lop->lo_flags any longer.
 3970                  */
 3971                 tmp = new_lop->lo_first;
 3972                 if (other_lop == NULL) {
 3973                         if (!unlock)
 3974                                 panic("nfsd srv update unlock");
 3975                         other_lop = new_lop;
 3976                         *new_lopp = NULL;
 3977                 }
 3978                 other_lop->lo_first = new_lop->lo_end;
 3979                 other_lop->lo_end = lop->lo_end;
 3980                 other_lop->lo_flags = lop->lo_flags;
 3981                 other_lop->lo_stp = stp;
 3982                 other_lop->lo_lfp = lfp;
 3983                 lop->lo_end = tmp;
 3984                 nfsrv_insertlock(other_lop, lop, stp, lfp);
 3985                 *other_lopp = NULL;
 3986                 ilop = lop;
 3987                 break;
 3988               }
 3989             }
 3990             ilop = lop;
 3991             lop = LIST_NEXT(lop, lo_lckowner);
 3992             if (myfile && (lop == NULL || lop->lo_lfp != lfp))
 3993                 break;
 3994         }
 3995 
 3996         /*
 3997          * Insert the new lock in the list at the appropriate place.
 3998          */
 3999         if (!unlock) {
 4000                 nfsrv_insertlock(new_lop, ilop, stp, lfp);
 4001                 *new_lopp = NULL;
 4002         }
 4003 }
 4004 
 4005 /*
 4006  * This function handles sequencing of locks, etc.
 4007  * It returns an error that indicates what the caller should do.
 4008  */
 4009 static int
 4010 nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
 4011     struct nfsstate *stp, struct nfsrvcache *op)
 4012 {
 4013         int error = 0;
 4014 
 4015         if ((nd->nd_flag & ND_NFSV41) != 0)
 4016                 /* NFSv4.1 ignores the open_seqid and lock_seqid. */
 4017                 goto out;
 4018         if (op != nd->nd_rp)
 4019                 panic("nfsrvstate checkseqid");
 4020         if (!(op->rc_flag & RC_INPROG))
 4021                 panic("nfsrvstate not inprog");
 4022         if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
 4023                 printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
 4024                 panic("nfsrvstate op refcnt");
 4025         }
 4026         if ((stp->ls_seq + 1) == seqid) {
 4027                 if (stp->ls_op)
 4028                         nfsrvd_derefcache(stp->ls_op);
 4029                 stp->ls_op = op;
 4030                 nfsrvd_refcache(op);
 4031                 stp->ls_seq = seqid;
 4032                 goto out;
 4033         } else if (stp->ls_seq == seqid && stp->ls_op &&
 4034                 op->rc_xid == stp->ls_op->rc_xid &&
 4035                 op->rc_refcnt == 0 &&
 4036                 op->rc_reqlen == stp->ls_op->rc_reqlen &&
 4037                 op->rc_cksum == stp->ls_op->rc_cksum) {
 4038                 if (stp->ls_op->rc_flag & RC_INPROG) {
 4039                         error = NFSERR_DONTREPLY;
 4040                         goto out;
 4041                 }
 4042                 nd->nd_rp = stp->ls_op;
 4043                 nd->nd_rp->rc_flag |= RC_INPROG;
 4044                 nfsrvd_delcache(op);
 4045                 error = NFSERR_REPLYFROMCACHE;
 4046                 goto out;
 4047         }
 4048         error = NFSERR_BADSEQID;
 4049 
 4050 out:
 4051         NFSEXITCODE2(error, nd);
 4052         return (error);
 4053 }
 4054 
 4055 /*
 4056  * Get the client ip address for callbacks. If the strings can't be parsed,
 4057  * just set lc_program to 0 to indicate no callbacks are possible.
 4058  * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
 4059  *  the address to the client's transport address. This won't be used
 4060  *  for callbacks, but can be printed out by nfsstats for info.)
 4061  * Return error if the xdr can't be parsed, 0 otherwise.
 4062  */
 4063 int
 4064 nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
 4065 {
 4066         u_int32_t *tl;
 4067         u_char *cp, *cp2;
 4068         int i, j, maxalen = 0, minalen = 0;
 4069         sa_family_t af;
 4070 #ifdef INET
 4071         struct sockaddr_in *rin, *sin;
 4072 #endif
 4073 #ifdef INET6
 4074         struct sockaddr_in6 *rin6, *sin6;
 4075 #endif
 4076         u_char *addr;
 4077         int error = 0, cantparse = 0;
 4078         union {
 4079                 in_addr_t ival;
 4080                 u_char cval[4];
 4081         } ip;
 4082         union {
 4083                 in_port_t sval;
 4084                 u_char cval[2];
 4085         } port;
 4086 
 4087         /* 8 is the maximum length of the port# string. */
 4088         addr = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK);
 4089         clp->lc_req.nr_client = NULL;
 4090         clp->lc_req.nr_lock = 0;
 4091         af = AF_UNSPEC;
 4092         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 4093         i = fxdr_unsigned(int, *tl);
 4094         if (i >= 3 && i <= 4) {
 4095                 error = nfsrv_mtostr(nd, addr, i);
 4096                 if (error)
 4097                         goto nfsmout;
 4098 #ifdef INET
 4099                 if (!strcmp(addr, "tcp")) {
 4100                         clp->lc_flags |= LCL_TCPCALLBACK;
 4101                         clp->lc_req.nr_sotype = SOCK_STREAM;
 4102                         clp->lc_req.nr_soproto = IPPROTO_TCP;
 4103                         af = AF_INET;
 4104                 } else if (!strcmp(addr, "udp")) {
 4105                         clp->lc_req.nr_sotype = SOCK_DGRAM;
 4106                         clp->lc_req.nr_soproto = IPPROTO_UDP;
 4107                         af = AF_INET;
 4108                 }
 4109 #endif
 4110 #ifdef INET6
 4111                 if (af == AF_UNSPEC) {
 4112                         if (!strcmp(addr, "tcp6")) {
 4113                                 clp->lc_flags |= LCL_TCPCALLBACK;
 4114                                 clp->lc_req.nr_sotype = SOCK_STREAM;
 4115                                 clp->lc_req.nr_soproto = IPPROTO_TCP;
 4116                                 af = AF_INET6;
 4117                         } else if (!strcmp(addr, "udp6")) {
 4118                                 clp->lc_req.nr_sotype = SOCK_DGRAM;
 4119                                 clp->lc_req.nr_soproto = IPPROTO_UDP;
 4120                                 af = AF_INET6;
 4121                         }
 4122                 }
 4123 #endif
 4124                 if (af == AF_UNSPEC) {
 4125                         cantparse = 1;
 4126                 }
 4127         } else {
 4128                 cantparse = 1;
 4129                 if (i > 0) {
 4130                         error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 4131                         if (error)
 4132                                 goto nfsmout;
 4133                 }
 4134         }
 4135         /*
 4136          * The caller has allocated clp->lc_req.nr_nam to be large enough
 4137          * for either AF_INET or AF_INET6 and zeroed out the contents.
 4138          * maxalen is set to the maximum length of the host IP address string
 4139          * plus 8 for the maximum length of the port#.
 4140          * minalen is set to the minimum length of the host IP address string
 4141          * plus 4 for the minimum length of the port#.
 4142          * These lengths do not include NULL termination,
 4143          * so INET[6]_ADDRSTRLEN - 1 is used in the calculations.
 4144          */
 4145         switch (af) {
 4146 #ifdef INET
 4147         case AF_INET:
 4148                 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 4149                 rin->sin_family = AF_INET;
 4150                 rin->sin_len = sizeof(struct sockaddr_in);
 4151                 maxalen = INET_ADDRSTRLEN - 1 + 8;
 4152                 minalen = 7 + 4;
 4153                 break;
 4154 #endif
 4155 #ifdef INET6
 4156         case AF_INET6:
 4157                 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 4158                 rin6->sin6_family = AF_INET6;
 4159                 rin6->sin6_len = sizeof(struct sockaddr_in6);
 4160                 maxalen = INET6_ADDRSTRLEN - 1 + 8;
 4161                 minalen = 3 + 4;
 4162                 break;
 4163 #endif
 4164         }
 4165         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 4166         i = fxdr_unsigned(int, *tl);
 4167         if (i < 0) {
 4168                 error = NFSERR_BADXDR;
 4169                 goto nfsmout;
 4170         } else if (i == 0) {
 4171                 cantparse = 1;
 4172         } else if (!cantparse && i <= maxalen && i >= minalen) {
 4173                 error = nfsrv_mtostr(nd, addr, i);
 4174                 if (error)
 4175                         goto nfsmout;
 4176 
 4177                 /*
 4178                  * Parse out the address fields. We expect 6 decimal numbers
 4179                  * separated by '.'s for AF_INET and two decimal numbers
 4180                  * preceeded by '.'s for AF_INET6.
 4181                  */
 4182                 cp = NULL;
 4183                 switch (af) {
 4184 #ifdef INET6
 4185                 /*
 4186                  * For AF_INET6, first parse the host address.
 4187                  */
 4188                 case AF_INET6:
 4189                         cp = strchr(addr, '.');
 4190                         if (cp != NULL) {
 4191                                 *cp++ = '\0';
 4192                                 if (inet_pton(af, addr, &rin6->sin6_addr) == 1)
 4193                                         i = 4;
 4194                                 else {
 4195                                         cp = NULL;
 4196                                         cantparse = 1;
 4197                                 }
 4198                         }
 4199                         break;
 4200 #endif
 4201 #ifdef INET
 4202                 case AF_INET:
 4203                         cp = addr;
 4204                         i = 0;
 4205                         break;
 4206 #endif
 4207                 }
 4208                 while (cp != NULL && *cp && i < 6) {
 4209                         cp2 = cp;
 4210                         while (*cp2 && *cp2 != '.')
 4211                                 cp2++;
 4212                         if (*cp2)
 4213                                 *cp2++ = '\0';
 4214                         else if (i != 5) {
 4215                                 cantparse = 1;
 4216                                 break;
 4217                         }
 4218                         j = nfsrv_getipnumber(cp);
 4219                         if (j >= 0) {
 4220                                 if (i < 4)
 4221                                         ip.cval[3 - i] = j;
 4222                                 else
 4223                                         port.cval[5 - i] = j;
 4224                         } else {
 4225                                 cantparse = 1;
 4226                                 break;
 4227                         }
 4228                         cp = cp2;
 4229                         i++;
 4230                 }
 4231                 if (!cantparse) {
 4232                         /*
 4233                          * The host address INADDR_ANY is (mis)used to indicate
 4234                          * "there is no valid callback address".
 4235                          */
 4236                         switch (af) {
 4237 #ifdef INET6
 4238                         case AF_INET6:
 4239                                 if (!IN6_ARE_ADDR_EQUAL(&rin6->sin6_addr,
 4240                                     &in6addr_any))
 4241                                         rin6->sin6_port = htons(port.sval);
 4242                                 else
 4243                                         cantparse = 1;
 4244                                 break;
 4245 #endif
 4246 #ifdef INET
 4247                         case AF_INET:
 4248                                 if (ip.ival != INADDR_ANY) {
 4249                                         rin->sin_addr.s_addr = htonl(ip.ival);
 4250                                         rin->sin_port = htons(port.sval);
 4251                                 } else {
 4252                                         cantparse = 1;
 4253                                 }
 4254                                 break;
 4255 #endif
 4256                         }
 4257                 }
 4258         } else {
 4259                 cantparse = 1;
 4260                 if (i > 0) {
 4261                         error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 4262                         if (error)
 4263                                 goto nfsmout;
 4264                 }
 4265         }
 4266         if (cantparse) {
 4267                 switch (nd->nd_nam->sa_family) {
 4268 #ifdef INET
 4269                 case AF_INET:
 4270                         sin = (struct sockaddr_in *)nd->nd_nam;
 4271                         rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 4272                         rin->sin_family = AF_INET;
 4273                         rin->sin_len = sizeof(struct sockaddr_in);
 4274                         rin->sin_addr.s_addr = sin->sin_addr.s_addr;
 4275                         rin->sin_port = 0x0;
 4276                         break;
 4277 #endif
 4278 #ifdef INET6
 4279                 case AF_INET6:
 4280                         sin6 = (struct sockaddr_in6 *)nd->nd_nam;
 4281                         rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 4282                         rin6->sin6_family = AF_INET6;
 4283                         rin6->sin6_len = sizeof(struct sockaddr_in6);
 4284                         rin6->sin6_addr = sin6->sin6_addr;
 4285                         rin6->sin6_port = 0x0;
 4286                         break;
 4287 #endif
 4288                 }
 4289                 clp->lc_program = 0;
 4290         }
 4291 nfsmout:
 4292         free(addr, M_TEMP);
 4293         NFSEXITCODE2(error, nd);
 4294         return (error);
 4295 }
 4296 
 4297 /*
 4298  * Turn a string of up to three decimal digits into a number. Return -1 upon
 4299  * error.
 4300  */
 4301 static int
 4302 nfsrv_getipnumber(u_char *cp)
 4303 {
 4304         int i = 0, j = 0;
 4305 
 4306         while (*cp) {
 4307                 if (j > 2 || *cp < '' || *cp > '9')
 4308                         return (-1);
 4309                 i *= 10;
 4310                 i += (*cp - '');
 4311                 cp++;
 4312                 j++;
 4313         }
 4314         if (i < 256)
 4315                 return (i);
 4316         return (-1);
 4317 }
 4318 
 4319 /*
 4320  * This function checks for restart conditions.
 4321  */
 4322 static int
 4323 nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
 4324     nfsv4stateid_t *stateidp, int specialid)
 4325 {
 4326         int ret = 0;
 4327 
 4328         /*
 4329          * First check for a server restart. Open, LockT, ReleaseLockOwner
 4330          * and DelegPurge have a clientid, the rest a stateid.
 4331          */
 4332         if (flags &
 4333             (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
 4334                 if (clientid.lval[0] != nfsrvboottime) {
 4335                         ret = NFSERR_STALECLIENTID;
 4336                         goto out;
 4337                 }
 4338         } else if (stateidp->other[0] != nfsrvboottime &&
 4339                 specialid == 0) {
 4340                 ret = NFSERR_STALESTATEID;
 4341                 goto out;
 4342         }
 4343 
 4344         /*
 4345          * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
 4346          * not use a lock/open owner seqid#, so the check can be done now.
 4347          * (The others will be checked, as required, later.)
 4348          */
 4349         if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
 4350                 goto out;
 4351 
 4352         NFSLOCKSTATE();
 4353         ret = nfsrv_checkgrace(NULL, NULL, flags);
 4354         NFSUNLOCKSTATE();
 4355 
 4356 out:
 4357         NFSEXITCODE(ret);
 4358         return (ret);
 4359 }
 4360 
 4361 /*
 4362  * Check for grace.
 4363  */
 4364 static int
 4365 nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
 4366     u_int32_t flags)
 4367 {
 4368         int error = 0, notreclaimed;
 4369         struct nfsrv_stable *sp;
 4370 
 4371         if ((nfsrv_stablefirst.nsf_flags & (NFSNSF_UPDATEDONE |
 4372              NFSNSF_GRACEOVER)) == 0) {
 4373                 /*
 4374                  * First, check to see if all of the clients have done a
 4375                  * ReclaimComplete.  If so, grace can end now.
 4376                  */
 4377                 notreclaimed = 0;
 4378                 LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 4379                         if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
 4380                                 notreclaimed = 1;
 4381                                 break;
 4382                         }
 4383                 }
 4384                 if (notreclaimed == 0)
 4385                         nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER |
 4386                             NFSNSF_NEEDLOCK);
 4387         }
 4388 
 4389         if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
 4390                 if (flags & NFSLCK_RECLAIM) {
 4391                         error = NFSERR_NOGRACE;
 4392                         goto out;
 4393                 }
 4394         } else {
 4395                 if (!(flags & NFSLCK_RECLAIM)) {
 4396                         error = NFSERR_GRACE;
 4397                         goto out;
 4398                 }
 4399                 if (nd != NULL && clp != NULL &&
 4400                     (nd->nd_flag & ND_NFSV41) != 0 &&
 4401                     (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
 4402                         error = NFSERR_NOGRACE;
 4403                         goto out;
 4404                 }
 4405 
 4406                 /*
 4407                  * If grace is almost over and we are still getting Reclaims,
 4408                  * extend grace a bit.
 4409                  */
 4410                 if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
 4411                     nfsrv_stablefirst.nsf_eograce)
 4412                         nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
 4413                                 NFSRV_LEASEDELTA;
 4414         }
 4415 
 4416 out:
 4417         NFSEXITCODE(error);
 4418         return (error);
 4419 }
 4420 
 4421 /*
 4422  * Do a server callback.
 4423  * The "trunc" argument is slightly overloaded and refers to different
 4424  * boolean arguments for CBRECALL and CBLAYOUTRECALL.
 4425  */
 4426 static int
 4427 nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
 4428     int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp,
 4429     int laytype, NFSPROC_T *p)
 4430 {
 4431         mbuf_t m;
 4432         u_int32_t *tl;
 4433         struct nfsrv_descript *nd;
 4434         struct ucred *cred;
 4435         int error = 0, slotpos;
 4436         u_int32_t callback;
 4437         struct nfsdsession *sep = NULL;
 4438         uint64_t tval;
 4439 
 4440         nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 4441         cred = newnfs_getcred();
 4442         NFSLOCKSTATE(); /* mostly for lc_cbref++ */
 4443         if (clp->lc_flags & LCL_NEEDSCONFIRM) {
 4444                 NFSUNLOCKSTATE();
 4445                 panic("docallb");
 4446         }
 4447         clp->lc_cbref++;
 4448 
 4449         /*
 4450          * Fill the callback program# and version into the request
 4451          * structure for newnfs_connect() to use.
 4452          */
 4453         clp->lc_req.nr_prog = clp->lc_program;
 4454 #ifdef notnow
 4455         if ((clp->lc_flags & LCL_NFSV41) != 0)
 4456                 clp->lc_req.nr_vers = NFSV41_CBVERS;
 4457         else
 4458 #endif
 4459                 clp->lc_req.nr_vers = NFSV4_CBVERS;
 4460 
 4461         /*
 4462          * First, fill in some of the fields of nd and cr.
 4463          */
 4464         nd->nd_flag = ND_NFSV4;
 4465         if (clp->lc_flags & LCL_GSS)
 4466                 nd->nd_flag |= ND_KERBV;
 4467         if ((clp->lc_flags & LCL_NFSV41) != 0)
 4468                 nd->nd_flag |= ND_NFSV41;
 4469         nd->nd_repstat = 0;
 4470         cred->cr_uid = clp->lc_uid;
 4471         cred->cr_gid = clp->lc_gid;
 4472         callback = clp->lc_callback;
 4473         NFSUNLOCKSTATE();
 4474         cred->cr_ngroups = 1;
 4475 
 4476         /*
 4477          * Get the first mbuf for the request.
 4478          */
 4479         MGET(m, M_WAITOK, MT_DATA);
 4480         mbuf_setlen(m, 0);
 4481         nd->nd_mreq = nd->nd_mb = m;
 4482         nd->nd_bpos = NFSMTOD(m, caddr_t);
 4483         
 4484         /*
 4485          * and build the callback request.
 4486          */
 4487         if (procnum == NFSV4OP_CBGETATTR) {
 4488                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 4489                 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
 4490                     "CB Getattr", &sep, &slotpos);
 4491                 if (error != 0) {
 4492                         mbuf_freem(nd->nd_mreq);
 4493                         goto errout;
 4494                 }
 4495                 (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
 4496                 (void)nfsrv_putattrbit(nd, attrbitp);
 4497         } else if (procnum == NFSV4OP_CBRECALL) {
 4498                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 4499                 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
 4500                     "CB Recall", &sep, &slotpos);
 4501                 if (error != 0) {
 4502                         mbuf_freem(nd->nd_mreq);
 4503                         goto errout;
 4504                 }
 4505                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 4506                 *tl++ = txdr_unsigned(stateidp->seqid);
 4507                 NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
 4508                     NFSX_STATEIDOTHER);
 4509                 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 4510                 if (trunc)
 4511                         *tl = newnfs_true;
 4512                 else
 4513                         *tl = newnfs_false;
 4514                 (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
 4515         } else if (procnum == NFSV4OP_CBLAYOUTRECALL) {
 4516                 NFSD_DEBUG(4, "docallback layout recall\n");
 4517                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 4518                 error = nfsrv_cbcallargs(nd, clp, callback,
 4519                     NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep, &slotpos);
 4520                 NFSD_DEBUG(4, "aft cbcallargs=%d\n", error);
 4521                 if (error != 0) {
 4522                         mbuf_freem(nd->nd_mreq);
 4523                         goto errout;
 4524                 }
 4525                 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 4526                 *tl++ = txdr_unsigned(laytype);
 4527                 *tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY);
 4528                 if (trunc)
 4529                         *tl++ = newnfs_true;
 4530                 else
 4531                         *tl++ = newnfs_false;
 4532                 *tl = txdr_unsigned(NFSV4LAYOUTRET_FILE);
 4533                 nfsm_fhtom(nd, (uint8_t *)fhp, NFSX_MYFH, 0);
 4534                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID);
 4535                 tval = 0;
 4536                 txdr_hyper(tval, tl); tl += 2;
 4537                 tval = UINT64_MAX;
 4538                 txdr_hyper(tval, tl); tl += 2;
 4539                 *tl++ = txdr_unsigned(stateidp->seqid);
 4540                 NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER);
 4541                 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 4542                 NFSD_DEBUG(4, "aft args\n");
 4543         } else if (procnum == NFSV4PROC_CBNULL) {
 4544                 nd->nd_procnum = NFSV4PROC_CBNULL;
 4545                 if ((clp->lc_flags & LCL_NFSV41) != 0) {
 4546                         error = nfsv4_getcbsession(clp, &sep);
 4547                         if (error != 0) {
 4548                                 mbuf_freem(nd->nd_mreq);
 4549                                 goto errout;
 4550                         }
 4551                 }
 4552         } else {
 4553                 error = NFSERR_SERVERFAULT;
 4554                 mbuf_freem(nd->nd_mreq);
 4555                 goto errout;
 4556         }
 4557 
 4558         /*
 4559          * Call newnfs_connect(), as required, and then newnfs_request().
 4560          */
 4561         (void) newnfs_sndlock(&clp->lc_req.nr_lock);
 4562         if (clp->lc_req.nr_client == NULL) {
 4563                 if ((clp->lc_flags & LCL_NFSV41) != 0) {
 4564                         error = ECONNREFUSED;
 4565                         if (procnum != NFSV4PROC_CBNULL)
 4566                                 nfsv4_freeslot(&sep->sess_cbsess, slotpos,
 4567                                     true);
 4568                         nfsrv_freesession(sep, NULL);
 4569                 } else if (nd->nd_procnum == NFSV4PROC_CBNULL)
 4570                         error = newnfs_connect(NULL, &clp->lc_req, cred,
 4571                             NULL, 1);
 4572                 else
 4573                         error = newnfs_connect(NULL, &clp->lc_req, cred,
 4574                             NULL, 3);
 4575         }
 4576         newnfs_sndunlock(&clp->lc_req.nr_lock);
 4577         NFSD_DEBUG(4, "aft sndunlock=%d\n", error);
 4578         if (!error) {
 4579                 if ((nd->nd_flag & ND_NFSV41) != 0) {
 4580                         KASSERT(sep != NULL, ("sep NULL"));
 4581                         if (sep->sess_cbsess.nfsess_xprt != NULL)
 4582                                 error = newnfs_request(nd, NULL, clp,
 4583                                     &clp->lc_req, NULL, NULL, cred,
 4584                                     clp->lc_program, clp->lc_req.nr_vers, NULL,
 4585                                     1, NULL, &sep->sess_cbsess);
 4586                         else {
 4587                                 /*
 4588                                  * This should probably never occur, but if a
 4589                                  * client somehow does an RPC without a
 4590                                  * SequenceID Op that causes a callback just
 4591                                  * after the nfsd threads have been terminated
 4592                                  * and restared we could conceivably get here
 4593                                  * without a backchannel xprt.
 4594                                  */
 4595                                 printf("nfsrv_docallback: no xprt\n");
 4596                                 error = ECONNREFUSED;
 4597                         }
 4598                         NFSD_DEBUG(4, "aft newnfs_request=%d\n", error);
 4599                         if (error != 0 && procnum != NFSV4PROC_CBNULL) {
 4600                                 /*
 4601                                  * It is likely that the callback was never
 4602                                  * processed by the client and, as such,
 4603                                  * the sequence# for the session slot needs
 4604                                  * to be backed up by one to avoid a
 4605                                  * NFSERR_SEQMISORDERED error reply.
 4606                                  * For the unlikely case where the callback
 4607                                  * was processed by the client, this will
 4608                                  * make the next callback on the slot
 4609                                  * appear to be a retry.
 4610                                  * Since callbacks never specify that the
 4611                                  * reply be cached, this "apparent retry"
 4612                                  * should not be a problem.
 4613                                  */
 4614                                 nfsv4_freeslot(&sep->sess_cbsess, slotpos,
 4615                                     true);
 4616                         }
 4617                         nfsrv_freesession(sep, NULL);
 4618                 } else
 4619                         error = newnfs_request(nd, NULL, clp, &clp->lc_req,
 4620                             NULL, NULL, cred, clp->lc_program,
 4621                             clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
 4622         }
 4623 errout:
 4624         NFSFREECRED(cred);
 4625 
 4626         /*
 4627          * If error is set here, the Callback path isn't working
 4628          * properly, so twiddle the appropriate LCL_ flags.
 4629          * (nd_repstat != 0 indicates the Callback path is working,
 4630          *  but the callback failed on the client.)
 4631          */
 4632         if (error) {
 4633                 /*
 4634                  * Mark the callback pathway down, which disabled issuing
 4635                  * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
 4636                  */
 4637                 NFSLOCKSTATE();
 4638                 clp->lc_flags |= LCL_CBDOWN;
 4639                 NFSUNLOCKSTATE();
 4640         } else {
 4641                 /*
 4642                  * Callback worked. If the callback path was down, disable
 4643                  * callbacks, so no more delegations will be issued. (This
 4644                  * is done on the assumption that the callback pathway is
 4645                  * flakey.)
 4646                  */
 4647                 NFSLOCKSTATE();
 4648                 if (clp->lc_flags & LCL_CBDOWN)
 4649                         clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
 4650                 NFSUNLOCKSTATE();
 4651                 if (nd->nd_repstat) {
 4652                         error = nd->nd_repstat;
 4653                         NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n",
 4654                             procnum, error);
 4655                 } else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
 4656                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 4657                             NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
 4658                             p, NULL);
 4659                 mbuf_freem(nd->nd_mrep);
 4660         }
 4661         NFSLOCKSTATE();
 4662         clp->lc_cbref--;
 4663         if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
 4664                 clp->lc_flags &= ~LCL_WAKEUPWANTED;
 4665                 wakeup(clp);
 4666         }
 4667         NFSUNLOCKSTATE();
 4668 
 4669         free(nd, M_TEMP);
 4670         NFSEXITCODE(error);
 4671         return (error);
 4672 }
 4673 
 4674 /*
 4675  * Set up the compound RPC for the callback.
 4676  */
 4677 static int
 4678 nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
 4679     uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
 4680     int *slotposp)
 4681 {
 4682         uint32_t *tl;
 4683         int error, len;
 4684 
 4685         len = strlen(optag);
 4686         (void)nfsm_strtom(nd, optag, len);
 4687         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 4688         if ((nd->nd_flag & ND_NFSV41) != 0) {
 4689                 *tl++ = txdr_unsigned(NFSV41_MINORVERSION);
 4690                 *tl++ = txdr_unsigned(callback);
 4691                 *tl++ = txdr_unsigned(2);
 4692                 *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
 4693                 error = nfsv4_setcbsequence(nd, clp, 1, sepp, slotposp);
 4694                 if (error != 0)
 4695                         return (error);
 4696                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 4697                 *tl = txdr_unsigned(op);
 4698         } else {
 4699                 *tl++ = txdr_unsigned(NFSV4_MINORVERSION);
 4700                 *tl++ = txdr_unsigned(callback);
 4701                 *tl++ = txdr_unsigned(1);
 4702                 *tl = txdr_unsigned(op);
 4703         }
 4704         return (0);
 4705 }
 4706 
 4707 /*
 4708  * Return the next index# for a clientid. Mostly just increment and return
 4709  * the next one, but... if the 32bit unsigned does actually wrap around,
 4710  * it should be rebooted.
 4711  * At an average rate of one new client per second, it will wrap around in
 4712  * approximately 136 years. (I think the server will have been shut
 4713  * down or rebooted before then.)
 4714  */
 4715 static u_int32_t
 4716 nfsrv_nextclientindex(void)
 4717 {
 4718         static u_int32_t client_index = 0;
 4719 
 4720         client_index++;
 4721         if (client_index != 0)
 4722                 return (client_index);
 4723 
 4724         printf("%s: out of clientids\n", __func__);
 4725         return (client_index);
 4726 }
 4727 
 4728 /*
 4729  * Return the next index# for a stateid. Mostly just increment and return
 4730  * the next one, but... if the 32bit unsigned does actually wrap around
 4731  * (will a BSD server stay up that long?), find
 4732  * new start and end values.
 4733  */
 4734 static u_int32_t
 4735 nfsrv_nextstateindex(struct nfsclient *clp)
 4736 {
 4737         struct nfsstate *stp;
 4738         int i;
 4739         u_int32_t canuse, min_index, max_index;
 4740 
 4741         if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
 4742                 clp->lc_stateindex++;
 4743                 if (clp->lc_stateindex != clp->lc_statemaxindex)
 4744                         return (clp->lc_stateindex);
 4745         }
 4746 
 4747         /*
 4748          * Yuck, we've hit the end.
 4749          * Look for a new min and max.
 4750          */
 4751         min_index = 0;
 4752         max_index = 0xffffffff;
 4753         for (i = 0; i < nfsrv_statehashsize; i++) {
 4754             LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 4755                 if (stp->ls_stateid.other[2] > 0x80000000) {
 4756                     if (stp->ls_stateid.other[2] < max_index)
 4757                         max_index = stp->ls_stateid.other[2];
 4758                 } else {
 4759                     if (stp->ls_stateid.other[2] > min_index)
 4760                         min_index = stp->ls_stateid.other[2];
 4761                 }
 4762             }
 4763         }
 4764 
 4765         /*
 4766          * Yikes, highly unlikely, but I'll handle it anyhow.
 4767          */
 4768         if (min_index == 0x80000000 && max_index == 0x80000001) {
 4769             canuse = 0;
 4770             /*
 4771              * Loop around until we find an unused entry. Return that
 4772              * and set LCL_INDEXNOTOK, so the search will continue next time.
 4773              * (This is one of those rare cases where a goto is the
 4774              *  cleanest way to code the loop.)
 4775              */
 4776 tryagain:
 4777             for (i = 0; i < nfsrv_statehashsize; i++) {
 4778                 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 4779                     if (stp->ls_stateid.other[2] == canuse) {
 4780                         canuse++;
 4781                         goto tryagain;
 4782                     }
 4783                 }
 4784             }
 4785             clp->lc_flags |= LCL_INDEXNOTOK;
 4786             return (canuse);
 4787         }
 4788 
 4789         /*
 4790          * Ok to start again from min + 1.
 4791          */
 4792         clp->lc_stateindex = min_index + 1;
 4793         clp->lc_statemaxindex = max_index;
 4794         clp->lc_flags &= ~LCL_INDEXNOTOK;
 4795         return (clp->lc_stateindex);
 4796 }
 4797 
 4798 /*
 4799  * The following functions handle the stable storage file that deals with
 4800  * the edge conditions described in RFC3530 Sec. 8.6.3.
 4801  * The file is as follows:
 4802  * - a single record at the beginning that has the lease time of the
 4803  *   previous server instance (before the last reboot) and the nfsrvboottime
 4804  *   values for the previous server boots.
 4805  *   These previous boot times are used to ensure that the current
 4806  *   nfsrvboottime does not, somehow, get set to a previous one.
 4807  *   (This is important so that Stale ClientIDs and StateIDs can
 4808  *    be recognized.)
 4809  *   The number of previous nfsvrboottime values precedes the list.
 4810  * - followed by some number of appended records with:
 4811  *   - client id string
 4812  *   - flag that indicates it is a record revoking state via lease
 4813  *     expiration or similar
 4814  *     OR has successfully acquired state.
 4815  * These structures vary in length, with the client string at the end, up
 4816  * to NFSV4_OPAQUELIMIT in size.
 4817  *
 4818  * At the end of the grace period, the file is truncated, the first
 4819  * record is rewritten with updated information and any acquired state
 4820  * records for successful reclaims of state are written.
 4821  *
 4822  * Subsequent records are appended when the first state is issued to
 4823  * a client and when state is revoked for a client.
 4824  *
 4825  * When reading the file in, state issued records that come later in
 4826  * the file override older ones, since the append log is in cronological order.
 4827  * If, for some reason, the file can't be read, the grace period is
 4828  * immediately terminated and all reclaims get NFSERR_NOGRACE.
 4829  */
 4830 
 4831 /*
 4832  * Read in the stable storage file. Called by nfssvc() before the nfsd
 4833  * processes start servicing requests.
 4834  */
 4835 void
 4836 nfsrv_setupstable(NFSPROC_T *p)
 4837 {
 4838         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 4839         struct nfsrv_stable *sp, *nsp;
 4840         struct nfst_rec *tsp;
 4841         int error, i, tryagain;
 4842         off_t off = 0;
 4843         ssize_t aresid, len;
 4844 
 4845         /*
 4846          * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
 4847          * a reboot, so state has not been lost.
 4848          */
 4849         if (sf->nsf_flags & NFSNSF_UPDATEDONE)
 4850                 return;
 4851         /*
 4852          * Set Grace over just until the file reads successfully.
 4853          */
 4854         nfsrvboottime = time_second;
 4855         LIST_INIT(&sf->nsf_head);
 4856         sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
 4857         sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
 4858         if (sf->nsf_fp == NULL)
 4859                 return;
 4860         error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 4861             (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
 4862             0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 4863         if (error || aresid || sf->nsf_numboots == 0 ||
 4864                 sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
 4865                 return;
 4866 
 4867         /*
 4868          * Now, read in the boottimes.
 4869          */
 4870         sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
 4871                 sizeof (time_t), M_TEMP, M_WAITOK);
 4872         off = sizeof (struct nfsf_rec);
 4873         error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 4874             (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
 4875             UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 4876         if (error || aresid) {
 4877                 free(sf->nsf_bootvals, M_TEMP);
 4878                 sf->nsf_bootvals = NULL;
 4879                 return;
 4880         }
 4881 
 4882         /*
 4883          * Make sure this nfsrvboottime is different from all recorded
 4884          * previous ones.
 4885          */
 4886         do {
 4887                 tryagain = 0;
 4888                 for (i = 0; i < sf->nsf_numboots; i++) {
 4889                         if (nfsrvboottime == sf->nsf_bootvals[i]) {
 4890                                 nfsrvboottime++;
 4891                                 tryagain = 1;
 4892                                 break;
 4893                         }
 4894                 }
 4895         } while (tryagain);
 4896 
 4897         sf->nsf_flags |= NFSNSF_OK;
 4898         off += (sf->nsf_numboots * sizeof (time_t));
 4899 
 4900         /*
 4901          * Read through the file, building a list of records for grace
 4902          * checking.
 4903          * Each record is between sizeof (struct nfst_rec) and
 4904          * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
 4905          * and is actually sizeof (struct nfst_rec) + nst_len - 1.
 4906          */
 4907         tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
 4908                 NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
 4909         do {
 4910             error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 4911                 (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
 4912                 off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 4913             len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
 4914             if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
 4915                 len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
 4916                 /*
 4917                  * Yuck, the file has been corrupted, so just return
 4918                  * after clearing out any restart state, so the grace period
 4919                  * is over.
 4920                  */
 4921                 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
 4922                         LIST_REMOVE(sp, nst_list);
 4923                         free(sp, M_TEMP);
 4924                 }
 4925                 free(tsp, M_TEMP);
 4926                 sf->nsf_flags &= ~NFSNSF_OK;
 4927                 free(sf->nsf_bootvals, M_TEMP);
 4928                 sf->nsf_bootvals = NULL;
 4929                 return;
 4930             }
 4931             if (len > 0) {
 4932                 off += sizeof (struct nfst_rec) + tsp->len - 1;
 4933                 /*
 4934                  * Search the list for a matching client.
 4935                  */
 4936                 LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
 4937                         if (tsp->len == sp->nst_len &&
 4938                             !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
 4939                                 break;
 4940                 }
 4941                 if (sp == LIST_END(&sf->nsf_head)) {
 4942                         sp = (struct nfsrv_stable *)malloc(tsp->len +
 4943                                 sizeof (struct nfsrv_stable) - 1, M_TEMP,
 4944                                 M_WAITOK);
 4945                         NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
 4946                                 sizeof (struct nfst_rec) + tsp->len - 1);
 4947                         LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
 4948                 } else {
 4949                         if (tsp->flag == NFSNST_REVOKE)
 4950                                 sp->nst_flag |= NFSNST_REVOKE;
 4951                         else
 4952                                 /*
 4953                                  * A subsequent timestamp indicates the client
 4954                                  * did a setclientid/confirm and any previous
 4955                                  * revoke is no longer relevant.
 4956                                  */
 4957                                 sp->nst_flag &= ~NFSNST_REVOKE;
 4958                 }
 4959             }
 4960         } while (len > 0);
 4961         free(tsp, M_TEMP);
 4962         sf->nsf_flags = NFSNSF_OK;
 4963         sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
 4964                 NFSRV_LEASEDELTA;
 4965 }
 4966 
 4967 /*
 4968  * Update the stable storage file, now that the grace period is over.
 4969  */
 4970 void
 4971 nfsrv_updatestable(NFSPROC_T *p)
 4972 {
 4973         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 4974         struct nfsrv_stable *sp, *nsp;
 4975         int i;
 4976         struct nfsvattr nva;
 4977         vnode_t vp;
 4978 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
 4979         mount_t mp = NULL;
 4980 #endif
 4981         int error;
 4982 
 4983         if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
 4984                 return;
 4985         sf->nsf_flags |= NFSNSF_UPDATEDONE;
 4986         /*
 4987          * Ok, we need to rewrite the stable storage file.
 4988          * - truncate to 0 length
 4989          * - write the new first structure
 4990          * - loop through the data structures, writing out any that
 4991          *   have timestamps older than the old boot
 4992          */
 4993         if (sf->nsf_bootvals) {
 4994                 sf->nsf_numboots++;
 4995                 for (i = sf->nsf_numboots - 2; i >= 0; i--)
 4996                         sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
 4997         } else {
 4998                 sf->nsf_numboots = 1;
 4999                 sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
 5000                         M_TEMP, M_WAITOK);
 5001         }
 5002         sf->nsf_bootvals[0] = nfsrvboottime;
 5003         sf->nsf_lease = nfsrv_lease;
 5004         NFSVNO_ATTRINIT(&nva);
 5005         NFSVNO_SETATTRVAL(&nva, size, 0);
 5006         vp = NFSFPVNODE(sf->nsf_fp);
 5007         vn_start_write(vp, &mp, V_WAIT);
 5008         if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
 5009                 error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
 5010                     NULL);
 5011                 NFSVOPUNLOCK(vp, 0);
 5012         } else
 5013                 error = EPERM;
 5014         vn_finished_write(mp);
 5015         if (!error)
 5016             error = NFSD_RDWR(UIO_WRITE, vp,
 5017                 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
 5018                 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
 5019         if (!error)
 5020             error = NFSD_RDWR(UIO_WRITE, vp,
 5021                 (caddr_t)sf->nsf_bootvals,
 5022                 sf->nsf_numboots * sizeof (time_t),
 5023                 (off_t)(sizeof (struct nfsf_rec)),
 5024                 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
 5025         free(sf->nsf_bootvals, M_TEMP);
 5026         sf->nsf_bootvals = NULL;
 5027         if (error) {
 5028                 sf->nsf_flags &= ~NFSNSF_OK;
 5029                 printf("EEK! Can't write NfsV4 stable storage file\n");
 5030                 return;
 5031         }
 5032         sf->nsf_flags |= NFSNSF_OK;
 5033 
 5034         /*
 5035          * Loop through the list and write out timestamp records for
 5036          * any clients that successfully reclaimed state.
 5037          */
 5038         LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
 5039                 if (sp->nst_flag & NFSNST_GOTSTATE) {
 5040                         nfsrv_writestable(sp->nst_client, sp->nst_len,
 5041                                 NFSNST_NEWSTATE, p);
 5042                         sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
 5043                 }
 5044                 LIST_REMOVE(sp, nst_list);
 5045                 free(sp, M_TEMP);
 5046         }
 5047         nfsrv_backupstable();
 5048 }
 5049 
 5050 /*
 5051  * Append a record to the stable storage file.
 5052  */
 5053 void
 5054 nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
 5055 {
 5056         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 5057         struct nfst_rec *sp;
 5058         int error;
 5059 
 5060         if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
 5061                 return;
 5062         sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
 5063                 len - 1, M_TEMP, M_WAITOK);
 5064         sp->len = len;
 5065         NFSBCOPY(client, sp->client, len);
 5066         sp->flag = flag;
 5067         error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
 5068             (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
 5069             UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
 5070         free(sp, M_TEMP);
 5071         if (error) {
 5072                 sf->nsf_flags &= ~NFSNSF_OK;
 5073                 printf("EEK! Can't write NfsV4 stable storage file\n");
 5074         }
 5075 }
 5076 
 5077 /*
 5078  * This function is called during the grace period to mark a client
 5079  * that successfully reclaimed state.
 5080  */
 5081 static void
 5082 nfsrv_markstable(struct nfsclient *clp)
 5083 {
 5084         struct nfsrv_stable *sp;
 5085 
 5086         /*
 5087          * First find the client structure.
 5088          */
 5089         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 5090                 if (sp->nst_len == clp->lc_idlen &&
 5091                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 5092                         break;
 5093         }
 5094         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
 5095                 return;
 5096 
 5097         /*
 5098          * Now, just mark it and set the nfsclient back pointer.
 5099          */
 5100         sp->nst_flag |= NFSNST_GOTSTATE;
 5101         sp->nst_clp = clp;
 5102 }
 5103 
 5104 /*
 5105  * This function is called when a NFSv4.1 client does a ReclaimComplete.
 5106  * Very similar to nfsrv_markstable(), except for the flag being set.
 5107  */
 5108 static void
 5109 nfsrv_markreclaim(struct nfsclient *clp)
 5110 {
 5111         struct nfsrv_stable *sp;
 5112 
 5113         /*
 5114          * First find the client structure.
 5115          */
 5116         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 5117                 if (sp->nst_len == clp->lc_idlen &&
 5118                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 5119                         break;
 5120         }
 5121         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
 5122                 return;
 5123 
 5124         /*
 5125          * Now, just set the flag.
 5126          */
 5127         sp->nst_flag |= NFSNST_RECLAIMED;
 5128 }
 5129 
 5130 /*
 5131  * This function is called for a reclaim, to see if it gets grace.
 5132  * It returns 0 if a reclaim is allowed, 1 otherwise.
 5133  */
 5134 static int
 5135 nfsrv_checkstable(struct nfsclient *clp)
 5136 {
 5137         struct nfsrv_stable *sp;
 5138 
 5139         /*
 5140          * First, find the entry for the client.
 5141          */
 5142         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 5143                 if (sp->nst_len == clp->lc_idlen &&
 5144                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 5145                         break;
 5146         }
 5147 
 5148         /*
 5149          * If not in the list, state was revoked or no state was issued
 5150          * since the previous reboot, a reclaim is denied.
 5151          */
 5152         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
 5153             (sp->nst_flag & NFSNST_REVOKE) ||
 5154             !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
 5155                 return (1);
 5156         return (0);
 5157 }
 5158 
 5159 /*
 5160  * Test for and try to clear out a conflicting client. This is called by
 5161  * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
 5162  * a found.
 5163  * The trick here is that it can't revoke a conflicting client with an
 5164  * expired lease unless it holds the v4root lock, so...
 5165  * If no v4root lock, get the lock and return 1 to indicate "try again".
 5166  * Return 0 to indicate the conflict can't be revoked and 1 to indicate
 5167  * the revocation worked and the conflicting client is "bye, bye", so it
 5168  * can be tried again.
 5169  * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK().
 5170  * Unlocks State before a non-zero value is returned.
 5171  */
 5172 static int
 5173 nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
 5174     NFSPROC_T *p)
 5175 {
 5176         int gotlock, lktype = 0;
 5177 
 5178         /*
 5179          * If lease hasn't expired, we can't fix it.
 5180          */
 5181         if (clp->lc_expiry >= NFSD_MONOSEC ||
 5182             !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
 5183                 return (0);
 5184         if (*haslockp == 0) {
 5185                 NFSUNLOCKSTATE();
 5186                 if (vp != NULL) {
 5187                         lktype = NFSVOPISLOCKED(vp);
 5188                         NFSVOPUNLOCK(vp, 0);
 5189                 }
 5190                 NFSLOCKV4ROOTMUTEX();
 5191                 nfsv4_relref(&nfsv4rootfs_lock);
 5192                 do {
 5193                         gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 5194                             NFSV4ROOTLOCKMUTEXPTR, NULL);
 5195                 } while (!gotlock);
 5196                 NFSUNLOCKV4ROOTMUTEX();
 5197                 *haslockp = 1;
 5198                 if (vp != NULL) {
 5199                         NFSVOPLOCK(vp, lktype | LK_RETRY);
 5200                         if ((vp->v_iflag & VI_DOOMED) != 0)
 5201                                 return (2);
 5202                 }
 5203                 return (1);
 5204         }
 5205         NFSUNLOCKSTATE();
 5206 
 5207         /*
 5208          * Ok, we can expire the conflicting client.
 5209          */
 5210         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 5211         nfsrv_backupstable();
 5212         nfsrv_cleanclient(clp, p);
 5213         nfsrv_freedeleglist(&clp->lc_deleg);
 5214         nfsrv_freedeleglist(&clp->lc_olddeleg);
 5215         LIST_REMOVE(clp, lc_hash);
 5216         nfsrv_zapclient(clp, p);
 5217         return (1);
 5218 }
 5219 
 5220 /*
 5221  * Resolve a delegation conflict.
 5222  * Returns 0 to indicate the conflict was resolved without sleeping.
 5223  * Return -1 to indicate that the caller should check for conflicts again.
 5224  * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
 5225  *
 5226  * Also, manipulate the nfsv4root_lock, as required. It isn't changed
 5227  * for a return of 0, since there was no sleep and it could be required
 5228  * later. It is released for a return of NFSERR_DELAY, since the caller
 5229  * will return that error. It is released when a sleep was done waiting
 5230  * for the delegation to be returned or expire (so that other nfsds can
 5231  * handle ops). Then, it must be acquired for the write to stable storage.
 5232  * (This function is somewhat similar to nfsrv_clientconflict(), but
 5233  *  the semantics differ in a couple of subtle ways. The return of 0
 5234  *  indicates the conflict was resolved without sleeping here, not
 5235  *  that the conflict can't be resolved and the handling of nfsv4root_lock
 5236  *  differs, as noted above.)
 5237  * Unlocks State before returning a non-zero value.
 5238  */
 5239 static int
 5240 nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
 5241     vnode_t vp)
 5242 {
 5243         struct nfsclient *clp = stp->ls_clp;
 5244         int gotlock, error, lktype = 0, retrycnt, zapped_clp;
 5245         nfsv4stateid_t tstateid;
 5246         fhandle_t tfh;
 5247 
 5248         /*
 5249          * If the conflict is with an old delegation...
 5250          */
 5251         if (stp->ls_flags & NFSLCK_OLDDELEG) {
 5252                 /*
 5253                  * You can delete it, if it has expired.
 5254                  */
 5255                 if (clp->lc_delegtime < NFSD_MONOSEC) {
 5256                         nfsrv_freedeleg(stp);
 5257                         NFSUNLOCKSTATE();
 5258                         error = -1;
 5259                         goto out;
 5260                 }
 5261                 NFSUNLOCKSTATE();
 5262                 /*
 5263                  * During this delay, the old delegation could expire or it
 5264                  * could be recovered by the client via an Open with
 5265                  * CLAIM_DELEGATE_PREV.
 5266                  * Release the nfsv4root_lock, if held.
 5267                  */
 5268                 if (*haslockp) {
 5269                         *haslockp = 0;
 5270                         NFSLOCKV4ROOTMUTEX();
 5271                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5272                         NFSUNLOCKV4ROOTMUTEX();
 5273                 }
 5274                 error = NFSERR_DELAY;
 5275                 goto out;
 5276         }
 5277 
 5278         /*
 5279          * It's a current delegation, so:
 5280          * - check to see if the delegation has expired
 5281          *   - if so, get the v4root lock and then expire it
 5282          */
 5283         if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0 || (stp->ls_lastrecall <
 5284             NFSD_MONOSEC && clp->lc_expiry >= NFSD_MONOSEC &&
 5285             stp->ls_delegtime >= NFSD_MONOSEC)) {
 5286                 /*
 5287                  * - do a recall callback, since not yet done
 5288                  * For now, never allow truncate to be set. To use
 5289                  * truncate safely, it must be guaranteed that the
 5290                  * Remove, Rename or Setattr with size of 0 will
 5291                  * succeed and that would require major changes to
 5292                  * the VFS/Vnode OPs.
 5293                  * Set the expiry time large enough so that it won't expire
 5294                  * until after the callback, then set it correctly, once
 5295                  * the callback is done. (The delegation will now time
 5296                  * out whether or not the Recall worked ok. The timeout
 5297                  * will be extended when ops are done on the delegation
 5298                  * stateid, up to the timelimit.)
 5299                  */
 5300                 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) {
 5301                         stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
 5302                             NFSRV_LEASEDELTA;
 5303                         stp->ls_delegtimelimit = NFSD_MONOSEC + (6 *
 5304                             nfsrv_lease) + NFSRV_LEASEDELTA;
 5305                         stp->ls_flags |= NFSLCK_DELEGRECALL;
 5306                 }
 5307                 stp->ls_lastrecall = time_uptime + 1;
 5308 
 5309                 /*
 5310                  * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
 5311                  * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
 5312                  * in order to try and avoid a race that could happen
 5313                  * when a CBRecall request passed the Open reply with
 5314                  * the delegation in it when transitting the network.
 5315                  * Since nfsrv_docallback will sleep, don't use stp after
 5316                  * the call.
 5317                  */
 5318                 NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
 5319                     sizeof (tstateid));
 5320                 NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
 5321                     sizeof (tfh));
 5322                 NFSUNLOCKSTATE();
 5323                 if (*haslockp) {
 5324                         *haslockp = 0;
 5325                         NFSLOCKV4ROOTMUTEX();
 5326                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5327                         NFSUNLOCKV4ROOTMUTEX();
 5328                 }
 5329                 retrycnt = 0;
 5330                 do {
 5331                     error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
 5332                         &tstateid, 0, &tfh, NULL, NULL, 0, p);
 5333                     retrycnt++;
 5334                 } while ((error == NFSERR_BADSTATEID ||
 5335                     error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
 5336                 error = NFSERR_DELAY;
 5337                 goto out;
 5338         }
 5339 
 5340         if (clp->lc_expiry >= NFSD_MONOSEC &&
 5341             stp->ls_delegtime >= NFSD_MONOSEC) {
 5342                 NFSUNLOCKSTATE();
 5343                 /*
 5344                  * A recall has been done, but it has not yet expired.
 5345                  * So, RETURN_DELAY.
 5346                  */
 5347                 if (*haslockp) {
 5348                         *haslockp = 0;
 5349                         NFSLOCKV4ROOTMUTEX();
 5350                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5351                         NFSUNLOCKV4ROOTMUTEX();
 5352                 }
 5353                 error = NFSERR_DELAY;
 5354                 goto out;
 5355         }
 5356 
 5357         /*
 5358          * If we don't yet have the lock, just get it and then return,
 5359          * since we need that before deleting expired state, such as
 5360          * this delegation.
 5361          * When getting the lock, unlock the vnode, so other nfsds that
 5362          * are in progress, won't get stuck waiting for the vnode lock.
 5363          */
 5364         if (*haslockp == 0) {
 5365                 NFSUNLOCKSTATE();
 5366                 if (vp != NULL) {
 5367                         lktype = NFSVOPISLOCKED(vp);
 5368                         NFSVOPUNLOCK(vp, 0);
 5369                 }
 5370                 NFSLOCKV4ROOTMUTEX();
 5371                 nfsv4_relref(&nfsv4rootfs_lock);
 5372                 do {
 5373                         gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 5374                             NFSV4ROOTLOCKMUTEXPTR, NULL);
 5375                 } while (!gotlock);
 5376                 NFSUNLOCKV4ROOTMUTEX();
 5377                 *haslockp = 1;
 5378                 if (vp != NULL) {
 5379                         NFSVOPLOCK(vp, lktype | LK_RETRY);
 5380                         if ((vp->v_iflag & VI_DOOMED) != 0) {
 5381                                 *haslockp = 0;
 5382                                 NFSLOCKV4ROOTMUTEX();
 5383                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5384                                 NFSUNLOCKV4ROOTMUTEX();
 5385                                 error = NFSERR_PERM;
 5386                                 goto out;
 5387                         }
 5388                 }
 5389                 error = -1;
 5390                 goto out;
 5391         }
 5392 
 5393         NFSUNLOCKSTATE();
 5394         /*
 5395          * Ok, we can delete the expired delegation.
 5396          * First, write the Revoke record to stable storage and then
 5397          * clear out the conflict.
 5398          * Since all other nfsd threads are now blocked, we can safely
 5399          * sleep without the state changing.
 5400          */
 5401         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 5402         nfsrv_backupstable();
 5403         if (clp->lc_expiry < NFSD_MONOSEC) {
 5404                 nfsrv_cleanclient(clp, p);
 5405                 nfsrv_freedeleglist(&clp->lc_deleg);
 5406                 nfsrv_freedeleglist(&clp->lc_olddeleg);
 5407                 LIST_REMOVE(clp, lc_hash);
 5408                 zapped_clp = 1;
 5409         } else {
 5410                 nfsrv_freedeleg(stp);
 5411                 zapped_clp = 0;
 5412         }
 5413         if (zapped_clp)
 5414                 nfsrv_zapclient(clp, p);
 5415         error = -1;
 5416 
 5417 out:
 5418         NFSEXITCODE(error);
 5419         return (error);
 5420 }
 5421 
 5422 /*
 5423  * Check for a remove allowed, if remove is set to 1 and get rid of
 5424  * delegations.
 5425  */
 5426 int
 5427 nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p)
 5428 {
 5429         struct nfsstate *stp;
 5430         struct nfslockfile *lfp;
 5431         int error, haslock = 0;
 5432         fhandle_t nfh;
 5433 
 5434         /*
 5435          * First, get the lock file structure.
 5436          * (A return of -1 means no associated state, so remove ok.)
 5437          */
 5438         error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
 5439 tryagain:
 5440         NFSLOCKSTATE();
 5441         if (!error)
 5442                 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
 5443         if (error) {
 5444                 NFSUNLOCKSTATE();
 5445                 if (haslock) {
 5446                         NFSLOCKV4ROOTMUTEX();
 5447                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5448                         NFSUNLOCKV4ROOTMUTEX();
 5449                 }
 5450                 if (error == -1)
 5451                         error = 0;
 5452                 goto out;
 5453         }
 5454 
 5455         /*
 5456          * Now, we must Recall any delegations.
 5457          */
 5458         error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p);
 5459         if (error) {
 5460                 /*
 5461                  * nfsrv_cleandeleg() unlocks state for non-zero
 5462                  * return.
 5463                  */
 5464                 if (error == -1)
 5465                         goto tryagain;
 5466                 if (haslock) {
 5467                         NFSLOCKV4ROOTMUTEX();
 5468                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5469                         NFSUNLOCKV4ROOTMUTEX();
 5470                 }
 5471                 goto out;
 5472         }
 5473 
 5474         /*
 5475          * Now, look for a conflicting open share.
 5476          */
 5477         if (remove) {
 5478                 /*
 5479                  * If the entry in the directory was the last reference to the
 5480                  * corresponding filesystem object, the object can be destroyed
 5481                  * */
 5482                 if(lfp->lf_usecount>1)
 5483                         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 5484                                 if (stp->ls_flags & NFSLCK_WRITEDENY) {
 5485                                         error = NFSERR_FILEOPEN;
 5486                                         break;
 5487                                 }
 5488                         }
 5489         }
 5490 
 5491         NFSUNLOCKSTATE();
 5492         if (haslock) {
 5493                 NFSLOCKV4ROOTMUTEX();
 5494                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5495                 NFSUNLOCKV4ROOTMUTEX();
 5496         }
 5497 
 5498 out:
 5499         NFSEXITCODE(error);
 5500         return (error);
 5501 }
 5502 
 5503 /*
 5504  * Clear out all delegations for the file referred to by lfp.
 5505  * May return NFSERR_DELAY, if there will be a delay waiting for
 5506  * delegations to expire.
 5507  * Returns -1 to indicate it slept while recalling a delegation.
 5508  * This function has the side effect of deleting the nfslockfile structure,
 5509  * if it no longer has associated state and didn't have to sleep.
 5510  * Unlocks State before a non-zero value is returned.
 5511  */
 5512 static int
 5513 nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
 5514     struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
 5515 {
 5516         struct nfsstate *stp, *nstp;
 5517         int ret = 0;
 5518 
 5519         stp = LIST_FIRST(&lfp->lf_deleg);
 5520         while (stp != LIST_END(&lfp->lf_deleg)) {
 5521                 nstp = LIST_NEXT(stp, ls_file);
 5522                 if (stp->ls_clp != clp) {
 5523                         ret = nfsrv_delegconflict(stp, haslockp, p, vp);
 5524                         if (ret) {
 5525                                 /*
 5526                                  * nfsrv_delegconflict() unlocks state
 5527                                  * when it returns non-zero.
 5528                                  */
 5529                                 goto out;
 5530                         }
 5531                 }
 5532                 stp = nstp;
 5533         }
 5534 out:
 5535         NFSEXITCODE(ret);
 5536         return (ret);
 5537 }
 5538 
 5539 /*
 5540  * There are certain operations that, when being done outside of NFSv4,
 5541  * require that any NFSv4 delegation for the file be recalled.
 5542  * This function is to be called for those cases:
 5543  * VOP_RENAME() - When a delegation is being recalled for any reason,
 5544  *      the client may have to do Opens against the server, using the file's
 5545  *      final component name. If the file has been renamed on the server,
 5546  *      that component name will be incorrect and the Open will fail.
 5547  * VOP_REMOVE() - Theoretically, a client could Open a file after it has
 5548  *      been removed on the server, if there is a delegation issued to
 5549  *      that client for the file. I say "theoretically" since clients
 5550  *      normally do an Access Op before the Open and that Access Op will
 5551  *      fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
 5552  *      they will detect the file's removal in the same manner. (There is
 5553  *      one case where RFC3530 allows a client to do an Open without first
 5554  *      doing an Access Op, which is passage of a check against the ACE
 5555  *      returned with a Write delegation, but current practice is to ignore
 5556  *      the ACE and always do an Access Op.)
 5557  *      Since the functions can only be called with an unlocked vnode, this
 5558  *      can't be done at this time.
 5559  * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
 5560  *      locks locally in the client, which are not visible to the server. To
 5561  *      deal with this, issuing of delegations for a vnode must be disabled
 5562  *      and all delegations for the vnode recalled. This is done via the
 5563  *      second function, using the VV_DISABLEDELEG vflag on the vnode.
 5564  */
 5565 void
 5566 nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
 5567 {
 5568         time_t starttime;
 5569         int error;
 5570 
 5571         /*
 5572          * First, check to see if the server is currently running and it has
 5573          * been called for a regular file when issuing delegations.
 5574          */
 5575         if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
 5576             nfsrv_issuedelegs == 0)
 5577                 return;
 5578 
 5579         KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
 5580         /*
 5581          * First, get a reference on the nfsv4rootfs_lock so that an
 5582          * exclusive lock cannot be acquired by another thread.
 5583          */
 5584         NFSLOCKV4ROOTMUTEX();
 5585         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 5586         NFSUNLOCKV4ROOTMUTEX();
 5587 
 5588         /*
 5589          * Now, call nfsrv_checkremove() in a loop while it returns
 5590          * NFSERR_DELAY. Return upon any other error or when timed out.
 5591          */
 5592         starttime = NFSD_MONOSEC;
 5593         do {
 5594                 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
 5595                         error = nfsrv_checkremove(vp, 0, p);
 5596                         NFSVOPUNLOCK(vp, 0);
 5597                 } else
 5598                         error = EPERM;
 5599                 if (error == NFSERR_DELAY) {
 5600                         if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
 5601                                 break;
 5602                         /* Sleep for a short period of time */
 5603                         (void) nfs_catnap(PZERO, 0, "nfsremove");
 5604                 }
 5605         } while (error == NFSERR_DELAY);
 5606         NFSLOCKV4ROOTMUTEX();
 5607         nfsv4_relref(&nfsv4rootfs_lock);
 5608         NFSUNLOCKV4ROOTMUTEX();
 5609 }
 5610 
 5611 void
 5612 nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
 5613 {
 5614 
 5615 #ifdef VV_DISABLEDELEG
 5616         /*
 5617          * First, flag issuance of delegations disabled.
 5618          */
 5619         atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
 5620 #endif
 5621 
 5622         /*
 5623          * Then call nfsd_recalldelegation() to get rid of all extant
 5624          * delegations.
 5625          */
 5626         nfsd_recalldelegation(vp, p);
 5627 }
 5628 
 5629 /*
 5630  * Check for conflicting locks, etc. and then get rid of delegations.
 5631  * (At one point I thought that I should get rid of delegations for any
 5632  *  Setattr, since it could potentially disallow the I/O op (read or write)
 5633  *  allowed by the delegation. However, Setattr Ops that aren't changing
 5634  *  the size get a stateid of all 0s, so you can't tell if it is a delegation
 5635  *  for the same client or a different one, so I decided to only get rid
 5636  *  of delegations for other clients when the size is being changed.)
 5637  * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
 5638  * as Write backs, even if there is no delegation, so it really isn't any
 5639  * different?)
 5640  */
 5641 int
 5642 nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
 5643     nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
 5644     struct nfsexstuff *exp, NFSPROC_T *p)
 5645 {
 5646         struct nfsstate st, *stp = &st;
 5647         struct nfslock lo, *lop = &lo;
 5648         int error = 0;
 5649         nfsquad_t clientid;
 5650 
 5651         if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
 5652                 stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
 5653                 lop->lo_first = nvap->na_size;
 5654         } else {
 5655                 stp->ls_flags = 0;
 5656                 lop->lo_first = 0;
 5657         }
 5658         if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
 5659             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
 5660             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
 5661             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
 5662                 stp->ls_flags |= NFSLCK_SETATTR;
 5663         if (stp->ls_flags == 0)
 5664                 goto out;
 5665         lop->lo_end = NFS64BITSSET;
 5666         lop->lo_flags = NFSLCK_WRITE;
 5667         stp->ls_ownerlen = 0;
 5668         stp->ls_op = NULL;
 5669         stp->ls_uid = nd->nd_cred->cr_uid;
 5670         stp->ls_stateid.seqid = stateidp->seqid;
 5671         clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
 5672         clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
 5673         stp->ls_stateid.other[2] = stateidp->other[2];
 5674         error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 5675             stateidp, exp, nd, p);
 5676 
 5677 out:
 5678         NFSEXITCODE2(error, nd);
 5679         return (error);
 5680 }
 5681 
 5682 /*
 5683  * Check for a write delegation and do a CBGETATTR if there is one, updating
 5684  * the attributes, as required.
 5685  * Should I return an error if I can't get the attributes? (For now, I'll
 5686  * just return ok.
 5687  */
 5688 int
 5689 nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
 5690     struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
 5691 {
 5692         struct nfsstate *stp;
 5693         struct nfslockfile *lfp;
 5694         struct nfsclient *clp;
 5695         struct nfsvattr nva;
 5696         fhandle_t nfh;
 5697         int error = 0;
 5698         nfsattrbit_t cbbits;
 5699         u_quad_t delegfilerev;
 5700 
 5701         NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
 5702         if (!NFSNONZERO_ATTRBIT(&cbbits))
 5703                 goto out;
 5704         if (nfsrv_writedelegcnt == 0)
 5705                 goto out;
 5706 
 5707         /*
 5708          * Get the lock file structure.
 5709          * (A return of -1 means no associated state, so return ok.)
 5710          */
 5711         error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
 5712         NFSLOCKSTATE();
 5713         if (!error)
 5714                 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
 5715         if (error) {
 5716                 NFSUNLOCKSTATE();
 5717                 if (error == -1)
 5718                         error = 0;
 5719                 goto out;
 5720         }
 5721 
 5722         /*
 5723          * Now, look for a write delegation.
 5724          */
 5725         LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 5726                 if (stp->ls_flags & NFSLCK_DELEGWRITE)
 5727                         break;
 5728         }
 5729         if (stp == LIST_END(&lfp->lf_deleg)) {
 5730                 NFSUNLOCKSTATE();
 5731                 goto out;
 5732         }
 5733         clp = stp->ls_clp;
 5734 
 5735         /* If the clientid is not confirmed, ignore the delegation. */
 5736         if (clp->lc_flags & LCL_NEEDSCONFIRM) {
 5737                 NFSUNLOCKSTATE();
 5738                 goto out;
 5739         }
 5740 
 5741         delegfilerev = stp->ls_filerev;
 5742         /*
 5743          * If the Write delegation was issued as a part of this Compound RPC
 5744          * or if we have an Implied Clientid (used in a previous Op in this
 5745          * compound) and it is the client the delegation was issued to,
 5746          * just return ok.
 5747          * I also assume that it is from the same client iff the network
 5748          * host IP address is the same as the callback address. (Not
 5749          * exactly correct by the RFC, but avoids a lot of Getattr
 5750          * callbacks.)
 5751          */
 5752         if (nd->nd_compref == stp->ls_compref ||
 5753             ((nd->nd_flag & ND_IMPLIEDCLID) &&
 5754              clp->lc_clientid.qval == nd->nd_clientid.qval) ||
 5755              nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
 5756                 NFSUNLOCKSTATE();
 5757                 goto out;
 5758         }
 5759 
 5760         /*
 5761          * We are now done with the delegation state structure,
 5762          * so the statelock can be released and we can now tsleep().
 5763          */
 5764 
 5765         /*
 5766          * Now, we must do the CB Getattr callback, to see if Change or Size
 5767          * has changed.
 5768          */
 5769         if (clp->lc_expiry >= NFSD_MONOSEC) {
 5770                 NFSUNLOCKSTATE();
 5771                 NFSVNO_ATTRINIT(&nva);
 5772                 nva.na_filerev = NFS64BITSSET;
 5773                 error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
 5774                     0, &nfh, &nva, &cbbits, 0, p);
 5775                 if (!error) {
 5776                         if ((nva.na_filerev != NFS64BITSSET &&
 5777                             nva.na_filerev > delegfilerev) ||
 5778                             (NFSVNO_ISSETSIZE(&nva) &&
 5779                              nva.na_size != nvap->na_size)) {
 5780                                 error = nfsvno_updfilerev(vp, nvap, nd, p);
 5781                                 if (NFSVNO_ISSETSIZE(&nva))
 5782                                         nvap->na_size = nva.na_size;
 5783                         }
 5784                 } else
 5785                         error = 0;      /* Ignore callback errors for now. */
 5786         } else {
 5787                 NFSUNLOCKSTATE();
 5788         }
 5789 
 5790 out:
 5791         NFSEXITCODE2(error, nd);
 5792         return (error);
 5793 }
 5794 
 5795 /*
 5796  * This function looks for openowners that haven't had any opens for
 5797  * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
 5798  * is set.
 5799  */
 5800 void
 5801 nfsrv_throwawayopens(NFSPROC_T *p)
 5802 {
 5803         struct nfsclient *clp, *nclp;
 5804         struct nfsstate *stp, *nstp;
 5805         int i;
 5806 
 5807         NFSLOCKSTATE();
 5808         nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
 5809         /*
 5810          * For each client...
 5811          */
 5812         for (i = 0; i < nfsrv_clienthashsize; i++) {
 5813             LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
 5814                 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
 5815                         if (LIST_EMPTY(&stp->ls_open) &&
 5816                             (stp->ls_noopens > NFSNOOPEN ||
 5817                              (nfsrv_openpluslock * 2) >
 5818                              nfsrv_v4statelimit))
 5819                                 nfsrv_freeopenowner(stp, 0, p);
 5820                 }
 5821             }
 5822         }
 5823         NFSUNLOCKSTATE();
 5824 }
 5825 
 5826 /*
 5827  * This function checks to see if the credentials are the same.
 5828  * Returns 1 for not same, 0 otherwise.
 5829  */
 5830 static int
 5831 nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
 5832 {
 5833 
 5834         if (nd->nd_flag & ND_GSS) {
 5835                 if (!(clp->lc_flags & LCL_GSS))
 5836                         return (1);
 5837                 if (clp->lc_flags & LCL_NAME) {
 5838                         if (nd->nd_princlen != clp->lc_namelen ||
 5839                             NFSBCMP(nd->nd_principal, clp->lc_name,
 5840                                 clp->lc_namelen))
 5841                                 return (1);
 5842                         else
 5843                                 return (0);
 5844                 }
 5845                 if (nd->nd_cred->cr_uid == clp->lc_uid)
 5846                         return (0);
 5847                 else
 5848                         return (1);
 5849         } else if (clp->lc_flags & LCL_GSS)
 5850                 return (1);
 5851         /*
 5852          * For AUTH_SYS, allow the same uid or root. (This is underspecified
 5853          * in RFC3530, which talks about principals, but doesn't say anything
 5854          * about uids for AUTH_SYS.)
 5855          */
 5856         if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
 5857                 return (0);
 5858         else
 5859                 return (1);
 5860 }
 5861 
 5862 /*
 5863  * Calculate the lease expiry time.
 5864  */
 5865 static time_t
 5866 nfsrv_leaseexpiry(void)
 5867 {
 5868 
 5869         if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
 5870                 return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
 5871         return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
 5872 }
 5873 
 5874 /*
 5875  * Delay the delegation timeout as far as ls_delegtimelimit, as required.
 5876  */
 5877 static void
 5878 nfsrv_delaydelegtimeout(struct nfsstate *stp)
 5879 {
 5880 
 5881         if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
 5882                 return;
 5883 
 5884         if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
 5885             stp->ls_delegtime < stp->ls_delegtimelimit) {
 5886                 stp->ls_delegtime += nfsrv_lease;
 5887                 if (stp->ls_delegtime > stp->ls_delegtimelimit)
 5888                         stp->ls_delegtime = stp->ls_delegtimelimit;
 5889         }
 5890 }
 5891 
 5892 /*
 5893  * This function checks to see if there is any other state associated
 5894  * with the openowner for this Open.
 5895  * It returns 1 if there is no other state, 0 otherwise.
 5896  */
 5897 static int
 5898 nfsrv_nootherstate(struct nfsstate *stp)
 5899 {
 5900         struct nfsstate *tstp;
 5901 
 5902         LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
 5903                 if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
 5904                         return (0);
 5905         }
 5906         return (1);
 5907 }
 5908 
 5909 /*
 5910  * Create a list of lock deltas (changes to local byte range locking
 5911  * that can be rolled back using the list) and apply the changes via
 5912  * nfsvno_advlock(). Optionally, lock the list. It is expected that either
 5913  * the rollback or update function will be called after this.
 5914  * It returns an error (and rolls back, as required), if any nfsvno_advlock()
 5915  * call fails. If it returns an error, it will unlock the list.
 5916  */
 5917 static int
 5918 nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
 5919     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
 5920 {
 5921         struct nfslock *lop, *nlop;
 5922         int error = 0;
 5923 
 5924         /* Loop through the list of locks. */
 5925         lop = LIST_FIRST(&lfp->lf_locallock);
 5926         while (first < end && lop != NULL) {
 5927                 nlop = LIST_NEXT(lop, lo_lckowner);
 5928                 if (first >= lop->lo_end) {
 5929                         /* not there yet */
 5930                         lop = nlop;
 5931                 } else if (first < lop->lo_first) {
 5932                         /* new one starts before entry in list */
 5933                         if (end <= lop->lo_first) {
 5934                                 /* no overlap between old and new */
 5935                                 error = nfsrv_dolocal(vp, lfp, flags,
 5936                                     NFSLCK_UNLOCK, first, end, cfp, p);
 5937                                 if (error != 0)
 5938                                         break;
 5939                                 first = end;
 5940                         } else {
 5941                                 /* handle fragment overlapped with new one */
 5942                                 error = nfsrv_dolocal(vp, lfp, flags,
 5943                                     NFSLCK_UNLOCK, first, lop->lo_first, cfp,
 5944                                     p);
 5945                                 if (error != 0)
 5946                                         break;
 5947                                 first = lop->lo_first;
 5948                         }
 5949                 } else {
 5950                         /* new one overlaps this entry in list */
 5951                         if (end <= lop->lo_end) {
 5952                                 /* overlaps all of new one */
 5953                                 error = nfsrv_dolocal(vp, lfp, flags,
 5954                                     lop->lo_flags, first, end, cfp, p);
 5955                                 if (error != 0)
 5956                                         break;
 5957                                 first = end;
 5958                         } else {
 5959                                 /* handle fragment overlapped with new one */
 5960                                 error = nfsrv_dolocal(vp, lfp, flags,
 5961                                     lop->lo_flags, first, lop->lo_end, cfp, p);
 5962                                 if (error != 0)
 5963                                         break;
 5964                                 first = lop->lo_end;
 5965                                 lop = nlop;
 5966                         }
 5967                 }
 5968         }
 5969         if (first < end && error == 0)
 5970                 /* handle fragment past end of list */
 5971                 error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
 5972                     end, cfp, p);
 5973 
 5974         NFSEXITCODE(error);
 5975         return (error);
 5976 }
 5977 
 5978 /*
 5979  * Local lock unlock. Unlock all byte ranges that are no longer locked
 5980  * by NFSv4. To do this, unlock any subranges of first-->end that
 5981  * do not overlap with the byte ranges of any lock in the lfp->lf_lock
 5982  * list. This list has all locks for the file held by other
 5983  * <clientid, lockowner> tuples. The list is ordered by increasing
 5984  * lo_first value, but may have entries that overlap each other, for
 5985  * the case of read locks.
 5986  */
 5987 static void
 5988 nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
 5989     uint64_t init_end, NFSPROC_T *p)
 5990 {
 5991         struct nfslock *lop;
 5992         uint64_t first, end, prevfirst __unused;
 5993 
 5994         first = init_first;
 5995         end = init_end;
 5996         while (first < init_end) {
 5997                 /* Loop through all nfs locks, adjusting first and end */
 5998                 prevfirst = 0;
 5999                 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
 6000                         KASSERT(prevfirst <= lop->lo_first,
 6001                             ("nfsv4 locks out of order"));
 6002                         KASSERT(lop->lo_first < lop->lo_end,
 6003                             ("nfsv4 bogus lock"));
 6004                         prevfirst = lop->lo_first;
 6005                         if (first >= lop->lo_first &&
 6006                             first < lop->lo_end)
 6007                                 /*
 6008                                  * Overlaps with initial part, so trim
 6009                                  * off that initial part by moving first past
 6010                                  * it.
 6011                                  */
 6012                                 first = lop->lo_end;
 6013                         else if (end > lop->lo_first &&
 6014                             lop->lo_first > first) {
 6015                                 /*
 6016                                  * This lock defines the end of the
 6017                                  * segment to unlock, so set end to the
 6018                                  * start of it and break out of the loop.
 6019                                  */
 6020                                 end = lop->lo_first;
 6021                                 break;
 6022                         }
 6023                         if (first >= end)
 6024                                 /*
 6025                                  * There is no segment left to do, so
 6026                                  * break out of this loop and then exit
 6027                                  * the outer while() since first will be set
 6028                                  * to end, which must equal init_end here.
 6029                                  */
 6030                                 break;
 6031                 }
 6032                 if (first < end) {
 6033                         /* Unlock this segment */
 6034                         (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
 6035                             NFSLCK_READ, first, end, NULL, p);
 6036                         nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
 6037                             first, end);
 6038                 }
 6039                 /*
 6040                  * Now move past this segment and look for any further
 6041                  * segment in the range, if there is one.
 6042                  */
 6043                 first = end;
 6044                 end = init_end;
 6045         }
 6046 }
 6047 
 6048 /*
 6049  * Do the local lock operation and update the rollback list, as required.
 6050  * Perform the rollback and return the error if nfsvno_advlock() fails.
 6051  */
 6052 static int
 6053 nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
 6054     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
 6055 {
 6056         struct nfsrollback *rlp;
 6057         int error = 0, ltype, oldltype;
 6058 
 6059         if (flags & NFSLCK_WRITE)
 6060                 ltype = F_WRLCK;
 6061         else if (flags & NFSLCK_READ)
 6062                 ltype = F_RDLCK;
 6063         else
 6064                 ltype = F_UNLCK;
 6065         if (oldflags & NFSLCK_WRITE)
 6066                 oldltype = F_WRLCK;
 6067         else if (oldflags & NFSLCK_READ)
 6068                 oldltype = F_RDLCK;
 6069         else
 6070                 oldltype = F_UNLCK;
 6071         if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
 6072                 /* nothing to do */
 6073                 goto out;
 6074         error = nfsvno_advlock(vp, ltype, first, end, p);
 6075         if (error != 0) {
 6076                 if (cfp != NULL) {
 6077                         cfp->cl_clientid.lval[0] = 0;
 6078                         cfp->cl_clientid.lval[1] = 0;
 6079                         cfp->cl_first = 0;
 6080                         cfp->cl_end = NFS64BITSSET;
 6081                         cfp->cl_flags = NFSLCK_WRITE;
 6082                         cfp->cl_ownerlen = 5;
 6083                         NFSBCOPY("LOCAL", cfp->cl_owner, 5);
 6084                 }
 6085                 nfsrv_locallock_rollback(vp, lfp, p);
 6086         } else if (ltype != F_UNLCK) {
 6087                 rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
 6088                     M_WAITOK);
 6089                 rlp->rlck_first = first;
 6090                 rlp->rlck_end = end;
 6091                 rlp->rlck_type = oldltype;
 6092                 LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
 6093         }
 6094 
 6095 out:
 6096         NFSEXITCODE(error);
 6097         return (error);
 6098 }
 6099 
 6100 /*
 6101  * Roll back local lock changes and free up the rollback list.
 6102  */
 6103 static void
 6104 nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
 6105 {
 6106         struct nfsrollback *rlp, *nrlp;
 6107 
 6108         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
 6109                 (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
 6110                     rlp->rlck_end, p);
 6111                 free(rlp, M_NFSDROLLBACK);
 6112         }
 6113         LIST_INIT(&lfp->lf_rollback);
 6114 }
 6115 
 6116 /*
 6117  * Update local lock list and delete rollback list (ie now committed to the
 6118  * local locks). Most of the work is done by the internal function.
 6119  */
 6120 static void
 6121 nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
 6122     uint64_t end)
 6123 {
 6124         struct nfsrollback *rlp, *nrlp;
 6125         struct nfslock *new_lop, *other_lop;
 6126 
 6127         new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
 6128         if (flags & (NFSLCK_READ | NFSLCK_WRITE))
 6129                 other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
 6130                     M_WAITOK);
 6131         else
 6132                 other_lop = NULL;
 6133         new_lop->lo_flags = flags;
 6134         new_lop->lo_first = first;
 6135         new_lop->lo_end = end;
 6136         nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
 6137         if (new_lop != NULL)
 6138                 free(new_lop, M_NFSDLOCK);
 6139         if (other_lop != NULL)
 6140                 free(other_lop, M_NFSDLOCK);
 6141 
 6142         /* and get rid of the rollback list */
 6143         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
 6144                 free(rlp, M_NFSDROLLBACK);
 6145         LIST_INIT(&lfp->lf_rollback);
 6146 }
 6147 
 6148 /*
 6149  * Lock the struct nfslockfile for local lock updating.
 6150  */
 6151 static void
 6152 nfsrv_locklf(struct nfslockfile *lfp)
 6153 {
 6154         int gotlock;
 6155 
 6156         /* lf_usecount ensures *lfp won't be free'd */
 6157         lfp->lf_usecount++;
 6158         do {
 6159                 gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
 6160                     NFSSTATEMUTEXPTR, NULL);
 6161         } while (gotlock == 0);
 6162         lfp->lf_usecount--;
 6163 }
 6164 
 6165 /*
 6166  * Unlock the struct nfslockfile after local lock updating.
 6167  */
 6168 static void
 6169 nfsrv_unlocklf(struct nfslockfile *lfp)
 6170 {
 6171 
 6172         nfsv4_unlock(&lfp->lf_locallock_lck, 0);
 6173 }
 6174 
 6175 /*
 6176  * Clear out all state for the NFSv4 server.
 6177  * Must be called by a thread that can sleep when no nfsds are running.
 6178  */
 6179 void
 6180 nfsrv_throwawayallstate(NFSPROC_T *p)
 6181 {
 6182         struct nfsclient *clp, *nclp;
 6183         struct nfslockfile *lfp, *nlfp;
 6184         int i;
 6185 
 6186         /*
 6187          * For each client, clean out the state and then free the structure.
 6188          */
 6189         for (i = 0; i < nfsrv_clienthashsize; i++) {
 6190                 LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
 6191                         nfsrv_cleanclient(clp, p);
 6192                         nfsrv_freedeleglist(&clp->lc_deleg);
 6193                         nfsrv_freedeleglist(&clp->lc_olddeleg);
 6194                         free(clp->lc_stateid, M_NFSDCLIENT);
 6195                         free(clp, M_NFSDCLIENT);
 6196                 }
 6197         }
 6198 
 6199         /*
 6200          * Also, free up any remaining lock file structures.
 6201          */
 6202         for (i = 0; i < nfsrv_lockhashsize; i++) {
 6203                 LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
 6204                         printf("nfsd unload: fnd a lock file struct\n");
 6205                         nfsrv_freenfslockfile(lfp);
 6206                 }
 6207         }
 6208 
 6209         /* And get rid of the deviceid structures and layouts. */
 6210         nfsrv_freealllayoutsanddevids();
 6211 }
 6212 
 6213 /*
 6214  * Check the sequence# for the session and slot provided as an argument.
 6215  * Also, renew the lease if the session will return NFS_OK.
 6216  */
 6217 int
 6218 nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
 6219     uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
 6220     uint32_t *sflagsp, NFSPROC_T *p)
 6221 {
 6222         struct nfsdsession *sep;
 6223         struct nfssessionhash *shp;
 6224         int error;
 6225 
 6226         shp = NFSSESSIONHASH(nd->nd_sessionid);
 6227         NFSLOCKSESSION(shp);
 6228         sep = nfsrv_findsession(nd->nd_sessionid);
 6229         if (sep == NULL) {
 6230                 NFSUNLOCKSESSION(shp);
 6231                 return (NFSERR_BADSESSION);
 6232         }
 6233         error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
 6234             sep->sess_slots, NULL, NFSV4_SLOTS - 1);
 6235         if (error != 0) {
 6236                 NFSUNLOCKSESSION(shp);
 6237                 return (error);
 6238         }
 6239         if (cache_this != 0)
 6240                 nd->nd_flag |= ND_SAVEREPLY;
 6241         /* Renew the lease. */
 6242         sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
 6243         nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
 6244         nd->nd_flag |= ND_IMPLIEDCLID;
 6245 
 6246         *sflagsp = 0;
 6247         if (sep->sess_clp->lc_req.nr_client == NULL ||
 6248             (sep->sess_clp->lc_flags & LCL_CBDOWN) != 0)
 6249                 *sflagsp |= NFSV4SEQ_CBPATHDOWN;
 6250         NFSUNLOCKSESSION(shp);
 6251         if (error == NFSERR_EXPIRED) {
 6252                 *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
 6253                 error = 0;
 6254         } else if (error == NFSERR_ADMINREVOKED) {
 6255                 *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
 6256                 error = 0;
 6257         }
 6258         *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
 6259         return (0);
 6260 }
 6261 
 6262 /*
 6263  * Check/set reclaim complete for this session/clientid.
 6264  */
 6265 int
 6266 nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs)
 6267 {
 6268         struct nfsdsession *sep;
 6269         struct nfssessionhash *shp;
 6270         int error = 0;
 6271 
 6272         shp = NFSSESSIONHASH(nd->nd_sessionid);
 6273         NFSLOCKSTATE();
 6274         NFSLOCKSESSION(shp);
 6275         sep = nfsrv_findsession(nd->nd_sessionid);
 6276         if (sep == NULL) {
 6277                 NFSUNLOCKSESSION(shp);
 6278                 NFSUNLOCKSTATE();
 6279                 return (NFSERR_BADSESSION);
 6280         }
 6281 
 6282         if (onefs != 0)
 6283                 sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS;
 6284                 /* Check to see if reclaim complete has already happened. */
 6285         else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
 6286                 error = NFSERR_COMPLETEALREADY;
 6287         else {
 6288                 sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
 6289                 nfsrv_markreclaim(sep->sess_clp);
 6290         }
 6291         NFSUNLOCKSESSION(shp);
 6292         NFSUNLOCKSTATE();
 6293         return (error);
 6294 }
 6295 
 6296 /*
 6297  * Cache the reply in a session slot.
 6298  */
 6299 void
 6300 nfsrv_cache_session(struct nfsrv_descript *nd, struct mbuf **m)
 6301 {
 6302         struct nfsdsession *sep;
 6303         struct nfssessionhash *shp;
 6304         char *buf, *cp;
 6305 #ifdef INET
 6306         struct sockaddr_in *sin;
 6307 #endif
 6308 #ifdef INET6
 6309         struct sockaddr_in6 *sin6;
 6310 #endif
 6311 
 6312         shp = NFSSESSIONHASH(nd->nd_sessionid);
 6313         NFSLOCKSESSION(shp);
 6314         sep = nfsrv_findsession(nd->nd_sessionid);
 6315         if (sep == NULL) {
 6316                 NFSUNLOCKSESSION(shp);
 6317                 if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
 6318                         buf = malloc(INET6_ADDRSTRLEN, M_TEMP, M_WAITOK);
 6319                         switch (nd->nd_nam->sa_family) {
 6320 #ifdef INET
 6321                         case AF_INET:
 6322                                 sin = (struct sockaddr_in *)nd->nd_nam;
 6323                                 cp = inet_ntop(sin->sin_family,
 6324                                     &sin->sin_addr.s_addr, buf,
 6325                                     INET6_ADDRSTRLEN);
 6326                                 break;
 6327 #endif
 6328 #ifdef INET6
 6329                         case AF_INET6:
 6330                                 sin6 = (struct sockaddr_in6 *)nd->nd_nam;
 6331                                 cp = inet_ntop(sin6->sin6_family,
 6332                                     &sin6->sin6_addr, buf, INET6_ADDRSTRLEN);
 6333                                 break;
 6334 #endif
 6335                         default:
 6336                                 cp = NULL;
 6337                         }
 6338                         if (cp != NULL)
 6339                                 printf("nfsrv_cache_session: no session "
 6340                                     "IPaddr=%s, check NFS clients for unique "
 6341                                     "/etc/hostid's\n", cp);
 6342                         else
 6343                                 printf("nfsrv_cache_session: no session, "
 6344                                     "check NFS clients for unique "
 6345                                     "/etc/hostid's\n");
 6346                         free(buf, M_TEMP);
 6347                 }
 6348                 m_freem(*m);
 6349                 return;
 6350         }
 6351         nfsv4_seqsess_cacherep(nd->nd_slotid, sep->sess_slots, nd->nd_repstat,
 6352             m);
 6353         NFSUNLOCKSESSION(shp);
 6354 }
 6355 
 6356 /*
 6357  * Search for a session that matches the sessionid.
 6358  */
 6359 static struct nfsdsession *
 6360 nfsrv_findsession(uint8_t *sessionid)
 6361 {
 6362         struct nfsdsession *sep;
 6363         struct nfssessionhash *shp;
 6364 
 6365         shp = NFSSESSIONHASH(sessionid);
 6366         LIST_FOREACH(sep, &shp->list, sess_hash) {
 6367                 if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
 6368                         break;
 6369         }
 6370         return (sep);
 6371 }
 6372 
 6373 /*
 6374  * Destroy a session.
 6375  */
 6376 int
 6377 nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
 6378 {
 6379         int error, igotlock, samesess;
 6380 
 6381         samesess = 0;
 6382         if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) &&
 6383             (nd->nd_flag & ND_HASSEQUENCE) != 0) {
 6384                 samesess = 1;
 6385                 if ((nd->nd_flag & ND_LASTOP) == 0)
 6386                         return (NFSERR_BADSESSION);
 6387         }
 6388 
 6389         /* Lock out other nfsd threads */
 6390         NFSLOCKV4ROOTMUTEX();
 6391         nfsv4_relref(&nfsv4rootfs_lock);
 6392         do {
 6393                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 6394                     NFSV4ROOTLOCKMUTEXPTR, NULL);
 6395         } while (igotlock == 0);
 6396         NFSUNLOCKV4ROOTMUTEX();
 6397 
 6398         error = nfsrv_freesession(NULL, sessionid);
 6399         if (error == 0 && samesess != 0)
 6400                 nd->nd_flag &= ~ND_HASSEQUENCE;
 6401 
 6402         NFSLOCKV4ROOTMUTEX();
 6403         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 6404         NFSUNLOCKV4ROOTMUTEX();
 6405         return (error);
 6406 }
 6407 
 6408 /*
 6409  * Bind a connection to a session.
 6410  * For now, only certain variants are supported, since the current session
 6411  * structure can only handle a single backchannel entry, which will be
 6412  * applied to all connections if it is set.
 6413  */
 6414 int
 6415 nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp)
 6416 {
 6417         struct nfssessionhash *shp;
 6418         struct nfsdsession *sep;
 6419         struct nfsclient *clp;
 6420         SVCXPRT *savxprt;
 6421         int error;
 6422 
 6423         error = 0;
 6424         savxprt = NULL;
 6425         shp = NFSSESSIONHASH(sessionid);
 6426         NFSLOCKSTATE();
 6427         NFSLOCKSESSION(shp);
 6428         sep = nfsrv_findsession(sessionid);
 6429         if (sep != NULL) {
 6430                 clp = sep->sess_clp;
 6431                 if (*foreaftp == NFSCDFC4_BACK ||
 6432                     *foreaftp == NFSCDFC4_BACK_OR_BOTH ||
 6433                     *foreaftp == NFSCDFC4_FORE_OR_BOTH) {
 6434                         /* Try to set up a backchannel. */
 6435                         if (clp->lc_req.nr_client == NULL) {
 6436                                 NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire "
 6437                                     "backchannel\n");
 6438                                 clp->lc_req.nr_client = (struct __rpc_client *)
 6439                                     clnt_bck_create(nd->nd_xprt->xp_socket,
 6440                                     sep->sess_cbprogram, NFSV4_CBVERS);
 6441                         }
 6442                         if (clp->lc_req.nr_client != NULL) {
 6443                                 NFSD_DEBUG(2, "nfsrv_bindconnsess: set up "
 6444                                     "backchannel\n");
 6445                                 savxprt = sep->sess_cbsess.nfsess_xprt;
 6446                                 SVC_ACQUIRE(nd->nd_xprt);
 6447                                 CLNT_ACQUIRE(clp->lc_req.nr_client);
 6448                                 nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
 6449                                 /* Disable idle timeout. */
 6450                                 nd->nd_xprt->xp_idletimeout = 0;
 6451                                 sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
 6452                                 sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN;
 6453                                 clp->lc_flags |= LCL_DONEBINDCONN |
 6454                                     LCL_NEEDSCBNULL;
 6455                                 clp->lc_flags &= ~LCL_CBDOWN;
 6456                                 if (*foreaftp == NFSCDFS4_BACK)
 6457                                         *foreaftp = NFSCDFS4_BACK;
 6458                                 else
 6459                                         *foreaftp = NFSCDFS4_BOTH;
 6460                         } else if (*foreaftp != NFSCDFC4_BACK) {
 6461                                 NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set "
 6462                                     "up backchannel\n");
 6463                                 sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
 6464                                 clp->lc_flags |= LCL_DONEBINDCONN;
 6465                                 *foreaftp = NFSCDFS4_FORE;
 6466                         } else {
 6467                                 error = NFSERR_NOTSUPP;
 6468                                 printf("nfsrv_bindconnsess: Can't add "
 6469                                     "backchannel\n");
 6470                         }
 6471                 } else {
 6472                         NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n");
 6473                         clp->lc_flags |= LCL_DONEBINDCONN;
 6474                         *foreaftp = NFSCDFS4_FORE;
 6475                 }
 6476         } else
 6477                 error = NFSERR_BADSESSION;
 6478         NFSUNLOCKSESSION(shp);
 6479         NFSUNLOCKSTATE();
 6480         if (savxprt != NULL)
 6481                 SVC_RELEASE(savxprt);
 6482         return (error);
 6483 }
 6484 
 6485 /*
 6486  * Free up a session structure.
 6487  */
 6488 static int
 6489 nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid)
 6490 {
 6491         struct nfssessionhash *shp;
 6492         int i;
 6493 
 6494         NFSLOCKSTATE();
 6495         if (sep == NULL) {
 6496                 shp = NFSSESSIONHASH(sessionid);
 6497                 NFSLOCKSESSION(shp);
 6498                 sep = nfsrv_findsession(sessionid);
 6499         } else {
 6500                 shp = NFSSESSIONHASH(sep->sess_sessionid);
 6501                 NFSLOCKSESSION(shp);
 6502         }
 6503         if (sep != NULL) {
 6504                 sep->sess_refcnt--;
 6505                 if (sep->sess_refcnt > 0) {
 6506                         NFSUNLOCKSESSION(shp);
 6507                         NFSUNLOCKSTATE();
 6508                         return (NFSERR_BACKCHANBUSY);
 6509                 }
 6510                 LIST_REMOVE(sep, sess_hash);
 6511                 LIST_REMOVE(sep, sess_list);
 6512         }
 6513         NFSUNLOCKSESSION(shp);
 6514         NFSUNLOCKSTATE();
 6515         if (sep == NULL)
 6516                 return (NFSERR_BADSESSION);
 6517         for (i = 0; i < NFSV4_SLOTS; i++)
 6518                 if (sep->sess_slots[i].nfssl_reply != NULL)
 6519                         m_freem(sep->sess_slots[i].nfssl_reply);
 6520         if (sep->sess_cbsess.nfsess_xprt != NULL)
 6521                 SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
 6522         free(sep, M_NFSDSESSION);
 6523         return (0);
 6524 }
 6525 
 6526 /*
 6527  * Free a stateid.
 6528  * RFC5661 says that it should fail when there are associated opens, locks
 6529  * or delegations. Since stateids represent opens, I don't see how you can
 6530  * free an open stateid (it will be free'd when closed), so this function
 6531  * only works for lock stateids (freeing the lock_owner) or delegations.
 6532  */
 6533 int
 6534 nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
 6535     NFSPROC_T *p)
 6536 {
 6537         struct nfsclient *clp;
 6538         struct nfsstate *stp;
 6539         int error;
 6540 
 6541         NFSLOCKSTATE();
 6542         /*
 6543          * Look up the stateid
 6544          */
 6545         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 6546             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 6547         if (error == 0) {
 6548                 /* First, check for a delegation. */
 6549                 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
 6550                         if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
 6551                             NFSX_STATEIDOTHER))
 6552                                 break;
 6553                 }
 6554                 if (stp != NULL) {
 6555                         nfsrv_freedeleg(stp);
 6556                         NFSUNLOCKSTATE();
 6557                         return (error);
 6558                 }
 6559         }
 6560         /* Not a delegation, try for a lock_owner. */
 6561         if (error == 0)
 6562                 error = nfsrv_getstate(clp, stateidp, 0, &stp);
 6563         if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
 6564             NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
 6565                 /* Not a lock_owner stateid. */
 6566                 error = NFSERR_LOCKSHELD;
 6567         if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
 6568                 error = NFSERR_LOCKSHELD;
 6569         if (error == 0)
 6570                 nfsrv_freelockowner(stp, NULL, 0, p);
 6571         NFSUNLOCKSTATE();
 6572         return (error);
 6573 }
 6574 
 6575 /*
 6576  * Test a stateid.
 6577  */
 6578 int
 6579 nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
 6580     NFSPROC_T *p)
 6581 {
 6582         struct nfsclient *clp;
 6583         struct nfsstate *stp;
 6584         int error;
 6585 
 6586         NFSLOCKSTATE();
 6587         /*
 6588          * Look up the stateid
 6589          */
 6590         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 6591             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 6592         if (error == 0)
 6593                 error = nfsrv_getstate(clp, stateidp, 0, &stp);
 6594         if (error == 0 && stateidp->seqid != 0 &&
 6595             SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid))
 6596                 error = NFSERR_OLDSTATEID;
 6597         NFSUNLOCKSTATE();
 6598         return (error);
 6599 }
 6600 
 6601 /*
 6602  * Generate the xdr for an NFSv4.1 CBSequence Operation.
 6603  */
 6604 static int
 6605 nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
 6606     int dont_replycache, struct nfsdsession **sepp, int *slotposp)
 6607 {
 6608         struct nfsdsession *sep;
 6609         uint32_t *tl, slotseq = 0;
 6610         int maxslot;
 6611         uint8_t sessionid[NFSX_V4SESSIONID];
 6612         int error;
 6613 
 6614         error = nfsv4_getcbsession(clp, sepp);
 6615         if (error != 0)
 6616                 return (error);
 6617         sep = *sepp;
 6618         (void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, slotposp, &maxslot,
 6619             &slotseq, sessionid);
 6620         KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
 6621 
 6622         /* Build the Sequence arguments. */
 6623         NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
 6624         bcopy(sessionid, tl, NFSX_V4SESSIONID);
 6625         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
 6626         nd->nd_slotseq = tl;
 6627         nd->nd_slotid = *slotposp;
 6628         nd->nd_flag |= ND_HASSLOTID;
 6629         *tl++ = txdr_unsigned(slotseq);
 6630         *tl++ = txdr_unsigned(*slotposp);
 6631         *tl++ = txdr_unsigned(maxslot);
 6632         if (dont_replycache == 0)
 6633                 *tl++ = newnfs_true;
 6634         else
 6635                 *tl++ = newnfs_false;
 6636         *tl = 0;                        /* No referring call list, for now. */
 6637         nd->nd_flag |= ND_HASSEQUENCE;
 6638         return (0);
 6639 }
 6640 
 6641 /*
 6642  * Get a session for the callback.
 6643  */
 6644 static int
 6645 nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
 6646 {
 6647         struct nfsdsession *sep;
 6648 
 6649         NFSLOCKSTATE();
 6650         LIST_FOREACH(sep, &clp->lc_session, sess_list) {
 6651                 if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
 6652                         break;
 6653         }
 6654         if (sep == NULL) {
 6655                 NFSUNLOCKSTATE();
 6656                 return (NFSERR_BADSESSION);
 6657         }
 6658         sep->sess_refcnt++;
 6659         *sepp = sep;
 6660         NFSUNLOCKSTATE();
 6661         return (0);
 6662 }
 6663 
 6664 /*
 6665  * Free up all backchannel xprts.  This needs to be done when the nfsd threads
 6666  * exit, since those transports will all be going away.
 6667  * This is only called after all the nfsd threads are done performing RPCs,
 6668  * so locking shouldn't be an issue.
 6669  */
 6670 void
 6671 nfsrv_freeallbackchannel_xprts(void)
 6672 {
 6673         struct nfsdsession *sep;
 6674         struct nfsclient *clp;
 6675         SVCXPRT *xprt;
 6676         int i;
 6677 
 6678         for (i = 0; i < nfsrv_clienthashsize; i++) {
 6679                 LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
 6680                         LIST_FOREACH(sep, &clp->lc_session, sess_list) {
 6681                                 xprt = sep->sess_cbsess.nfsess_xprt;
 6682                                 sep->sess_cbsess.nfsess_xprt = NULL;
 6683                                 if (xprt != NULL)
 6684                                         SVC_RELEASE(xprt);
 6685                         }
 6686                 }
 6687         }
 6688 }
 6689 
 6690 /*
 6691  * Do a layout commit.  Actually just call nfsrv_updatemdsattr().
 6692  * I have no idea if the rest of these arguments will ever be useful?
 6693  */
 6694 int
 6695 nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype,
 6696     int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len,
 6697     int hasnewmtime, struct timespec *newmtimep, int reclaim,
 6698     nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep,
 6699     uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p)
 6700 {
 6701         struct nfsvattr na;
 6702         int error;
 6703 
 6704         error = nfsrv_updatemdsattr(vp, &na, p);
 6705         if (error == 0) {
 6706                 *hasnewsizep = 1;
 6707                 *newsizep = na.na_size;
 6708         }
 6709         return (error);
 6710 }
 6711 
 6712 /*
 6713  * Try and get a layout.
 6714  */
 6715 int
 6716 nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp,
 6717     int layouttype, int *iomode, uint64_t *offset, uint64_t *len,
 6718     uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose,
 6719     int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p)
 6720 {
 6721         struct nfslayouthash *lhyp;
 6722         struct nfslayout *lyp;
 6723         char *devid;
 6724         fhandle_t fh, *dsfhp;
 6725         int error, mirrorcnt;
 6726 
 6727         if (nfsrv_devidcnt == 0)
 6728                 return (NFSERR_UNKNLAYOUTTYPE);
 6729 
 6730         if (*offset != 0)
 6731                 printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset,
 6732                     (uintmax_t)*len);
 6733         error = nfsvno_getfh(vp, &fh, p);
 6734         NFSD_DEBUG(4, "layoutget getfh=%d\n", error);
 6735         if (error != 0)
 6736                 return (error);
 6737 
 6738         /*
 6739          * For now, all layouts are for entire files.
 6740          * Only issue Read/Write layouts if requested for a non-readonly fs.
 6741          */
 6742         if (NFSVNO_EXRDONLY(exp)) {
 6743                 if (*iomode == NFSLAYOUTIOMODE_RW)
 6744                         return (NFSERR_LAYOUTTRYLATER);
 6745                 *iomode = NFSLAYOUTIOMODE_READ;
 6746         }
 6747         if (*iomode != NFSLAYOUTIOMODE_RW)
 6748                 *iomode = NFSLAYOUTIOMODE_READ;
 6749 
 6750         /*
 6751          * Check to see if a write layout can be issued for this file.
 6752          * This is used during mirror recovery to avoid RW layouts being
 6753          * issued for a file while it is being copied to the recovered
 6754          * mirror.
 6755          */
 6756         if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0)
 6757                 return (NFSERR_LAYOUTTRYLATER);
 6758 
 6759         *retonclose = 0;
 6760         *offset = 0;
 6761         *len = UINT64_MAX;
 6762 
 6763         /* First, see if a layout already exists and return if found. */
 6764         lhyp = NFSLAYOUTHASH(&fh);
 6765         NFSLOCKLAYOUT(lhyp);
 6766         error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp);
 6767         NFSD_DEBUG(4, "layoutget findlay=%d\n", error);
 6768         /*
 6769          * Not sure if the seqid must be the same, so I won't check it.
 6770          */
 6771         if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] ||
 6772             stateidp->other[1] != lyp->lay_stateid.other[1] ||
 6773             stateidp->other[2] != lyp->lay_stateid.other[2])) {
 6774                 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
 6775                         NFSUNLOCKLAYOUT(lhyp);
 6776                         NFSD_DEBUG(1, "ret bad stateid\n");
 6777                         return (NFSERR_BADSTATEID);
 6778                 }
 6779                 /*
 6780                  * I believe we get here because there is a race between
 6781                  * the client processing the CBLAYOUTRECALL and the layout
 6782                  * being deleted here on the server.
 6783                  * The client has now done a LayoutGet with a non-layout
 6784                  * stateid, as it would when there is no layout.
 6785                  * As such, free this layout and set error == NFSERR_BADSTATEID
 6786                  * so the code below will create a new layout structure as
 6787                  * would happen if no layout was found.
 6788                  * "lyp" will be set before being used below, but set it NULL
 6789                  * as a safety belt.
 6790                  */
 6791                 nfsrv_freelayout(&lhyp->list, lyp);
 6792                 lyp = NULL;
 6793                 error = NFSERR_BADSTATEID;
 6794         }
 6795         if (error == 0) {
 6796                 if (lyp->lay_layoutlen > maxcnt) {
 6797                         NFSUNLOCKLAYOUT(lhyp);
 6798                         NFSD_DEBUG(1, "ret layout too small\n");
 6799                         return (NFSERR_TOOSMALL);
 6800                 }
 6801                 if (*iomode == NFSLAYOUTIOMODE_RW)
 6802                         lyp->lay_flags |= NFSLAY_RW;
 6803                 else
 6804                         lyp->lay_flags |= NFSLAY_READ;
 6805                 NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
 6806                 *layoutlenp = lyp->lay_layoutlen;
 6807                 if (++lyp->lay_stateid.seqid == 0)
 6808                         lyp->lay_stateid.seqid = 1;
 6809                 stateidp->seqid = lyp->lay_stateid.seqid;
 6810                 NFSUNLOCKLAYOUT(lhyp);
 6811                 NFSD_DEBUG(4, "ret fnd layout\n");
 6812                 return (0);
 6813         }
 6814         NFSUNLOCKLAYOUT(lhyp);
 6815 
 6816         /* Find the device id and file handle. */
 6817         dsfhp = malloc(sizeof(fhandle_t) * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
 6818         devid = malloc(NFSX_V4DEVICEID * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
 6819         error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, dsfhp, devid);
 6820         NFSD_DEBUG(4, "layoutget devandfh=%d\n", error);
 6821         if (error == 0) {
 6822                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
 6823                         if (NFSX_V4FILELAYOUT > maxcnt)
 6824                                 error = NFSERR_TOOSMALL;
 6825                         else
 6826                                 lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp,
 6827                                     devid, vp->v_mount->mnt_stat.f_fsid);
 6828                 } else {
 6829                         if (NFSX_V4FLEXLAYOUT(mirrorcnt) > maxcnt)
 6830                                 error = NFSERR_TOOSMALL;
 6831                         else
 6832                                 lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt,
 6833                                     &fh, dsfhp, devid,
 6834                                     vp->v_mount->mnt_stat.f_fsid);
 6835                 }
 6836         }
 6837         free(dsfhp, M_TEMP);
 6838         free(devid, M_TEMP);
 6839         if (error != 0)
 6840                 return (error);
 6841 
 6842         /*
 6843          * Now, add this layout to the list.
 6844          */
 6845         error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p);
 6846         NFSD_DEBUG(4, "layoutget addl=%d\n", error);
 6847         /*
 6848          * The lyp will be set to NULL by nfsrv_addlayout() if it
 6849          * linked the new structure into the lists.
 6850          */
 6851         free(lyp, M_NFSDSTATE);
 6852         return (error);
 6853 }
 6854 
 6855 /*
 6856  * Generate a File Layout.
 6857  */
 6858 static struct nfslayout *
 6859 nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
 6860     fhandle_t *dsfhp, char *devid, fsid_t fs)
 6861 {
 6862         uint32_t *tl;
 6863         struct nfslayout *lyp;
 6864         uint64_t pattern_offset;
 6865 
 6866         lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE,
 6867             M_WAITOK | M_ZERO);
 6868         lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES;
 6869         if (iomode == NFSLAYOUTIOMODE_RW)
 6870                 lyp->lay_flags = NFSLAY_RW;
 6871         else
 6872                 lyp->lay_flags = NFSLAY_READ;
 6873         NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
 6874         lyp->lay_clientid.qval = nd->nd_clientid.qval;
 6875         lyp->lay_fsid = fs;
 6876 
 6877         /* Fill in the xdr for the files layout. */
 6878         tl = (uint32_t *)lyp->lay_xdr;
 6879         NFSBCOPY(devid, tl, NFSX_V4DEVICEID);           /* Device ID. */
 6880         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 6881 
 6882         /* Set the stripe size to the maximum I/O size. */
 6883         *tl++ = txdr_unsigned(NFS_SRVMAXIO & NFSFLAYUTIL_STRIPE_MASK);
 6884         *tl++ = 0;                                      /* 1st stripe index. */
 6885         pattern_offset = 0;
 6886         txdr_hyper(pattern_offset, tl); tl += 2;        /* Pattern offset. */
 6887         *tl++ = txdr_unsigned(1);                       /* 1 file handle. */
 6888         *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
 6889         NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
 6890         lyp->lay_layoutlen = NFSX_V4FILELAYOUT;
 6891         return (lyp);
 6892 }
 6893 
 6894 #define FLEX_OWNERID    "999"
 6895 #define FLEX_UID0       ""
 6896 /*
 6897  * Generate a Flex File Layout.
 6898  * The FLEX_OWNERID can be any string of 3 decimal digits. Although this
 6899  * string goes on the wire, it isn't supposed to be used by the client,
 6900  * since this server uses tight coupling.
 6901  * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use
 6902  * a string of "". This works around the Linux Flex File Layout driver bug
 6903  * which uses the synthetic uid/gid strings for the "tightly coupled" case.
 6904  */
 6905 static struct nfslayout *
 6906 nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt,
 6907     fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs)
 6908 {
 6909         uint32_t *tl;
 6910         struct nfslayout *lyp;
 6911         uint64_t lenval;
 6912         int i;
 6913 
 6914         lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FLEXLAYOUT(mirrorcnt),
 6915             M_NFSDSTATE, M_WAITOK | M_ZERO);
 6916         lyp->lay_type = NFSLAYOUT_FLEXFILE;
 6917         if (iomode == NFSLAYOUTIOMODE_RW)
 6918                 lyp->lay_flags = NFSLAY_RW;
 6919         else
 6920                 lyp->lay_flags = NFSLAY_READ;
 6921         NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
 6922         lyp->lay_clientid.qval = nd->nd_clientid.qval;
 6923         lyp->lay_fsid = fs;
 6924         lyp->lay_mirrorcnt = mirrorcnt;
 6925 
 6926         /* Fill in the xdr for the files layout. */
 6927         tl = (uint32_t *)lyp->lay_xdr;
 6928         lenval = 0;
 6929         txdr_hyper(lenval, tl); tl += 2;                /* Stripe unit. */
 6930         *tl++ = txdr_unsigned(mirrorcnt);               /* # of mirrors. */
 6931         for (i = 0; i < mirrorcnt; i++) {
 6932                 *tl++ = txdr_unsigned(1);               /* One stripe. */
 6933                 NFSBCOPY(devid, tl, NFSX_V4DEVICEID);   /* Device ID. */
 6934                 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 6935                 devid += NFSX_V4DEVICEID;
 6936                 *tl++ = txdr_unsigned(1);               /* Efficiency. */
 6937                 *tl++ = 0;                              /* Proxy Stateid. */
 6938                 *tl++ = 0x55555555;
 6939                 *tl++ = 0x55555555;
 6940                 *tl++ = 0x55555555;
 6941                 *tl++ = txdr_unsigned(1);               /* 1 file handle. */
 6942                 *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
 6943                 NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
 6944                 tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED);
 6945                 dsfhp++;
 6946                 if (nfsrv_flexlinuxhack != 0) {
 6947                         *tl++ = txdr_unsigned(strlen(FLEX_UID0));
 6948                         *tl = 0;                /* 0 pad string. */
 6949                         NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
 6950                         *tl++ = txdr_unsigned(strlen(FLEX_UID0));
 6951                         *tl = 0;                /* 0 pad string. */
 6952                         NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
 6953                 } else {
 6954                         *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
 6955                         NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
 6956                         *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
 6957                         NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
 6958                 }
 6959         }
 6960         *tl++ = txdr_unsigned(0);               /* ff_flags. */
 6961         *tl = txdr_unsigned(60);                /* Status interval hint. */
 6962         lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt);
 6963         return (lyp);
 6964 }
 6965 
 6966 /*
 6967  * Parse and process Flex File errors returned via LayoutReturn.
 6968  */
 6969 static void
 6970 nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt,
 6971     NFSPROC_T *p)
 6972 {
 6973         uint32_t *tl;
 6974         int cnt, errcnt, i, j, opnum, stat;
 6975         char devid[NFSX_V4DEVICEID];
 6976 
 6977         tl = layp;
 6978         maxcnt -= NFSX_UNSIGNED;
 6979         if (maxcnt > 0)
 6980                 cnt = fxdr_unsigned(int, *tl++);
 6981         else
 6982                 cnt = 0;
 6983         NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt);
 6984         for (i = 0; i < cnt; i++) {
 6985                 maxcnt -= NFSX_STATEID + 2 * NFSX_HYPER +
 6986                     NFSX_UNSIGNED;
 6987                 if (maxcnt <= 0)
 6988                         break;
 6989                 /* Skip offset, length and stateid for now. */
 6990                 tl += (4 + NFSX_STATEID / NFSX_UNSIGNED);
 6991                 errcnt = fxdr_unsigned(int, *tl++);
 6992                 NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt);
 6993                 for (j = 0; j < errcnt; j++) {
 6994                         maxcnt -= NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED;
 6995                         if (maxcnt < 0)
 6996                                 break;
 6997                         NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
 6998                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 6999                         stat = fxdr_unsigned(int, *tl++);
 7000                         opnum = fxdr_unsigned(int, *tl++);
 7001                         NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum,
 7002                             stat);
 7003                         /*
 7004                          * Except for NFSERR_ACCES and NFSERR_STALE errors,
 7005                          * disable the mirror.
 7006                          */
 7007                         if (stat != NFSERR_ACCES && stat != NFSERR_STALE)
 7008                                 nfsrv_delds(devid, p);
 7009                 }
 7010         }
 7011 }
 7012 
 7013 /*
 7014  * This function removes all flex file layouts which has a mirror with
 7015  * a device id that matches the argument.
 7016  * Called when the DS represented by the device id has failed.
 7017  */
 7018 void
 7019 nfsrv_flexmirrordel(char *devid, NFSPROC_T *p)
 7020 {
 7021         uint32_t *tl;
 7022         struct nfslayout *lyp, *nlyp;
 7023         struct nfslayouthash *lhyp;
 7024         struct nfslayouthead loclyp;
 7025         int i, j;
 7026 
 7027         NFSD_DEBUG(4, "flexmirrordel\n");
 7028         /* Move all layouts found onto a local list. */
 7029         TAILQ_INIT(&loclyp);
 7030         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7031                 lhyp = &nfslayouthash[i];
 7032                 NFSLOCKLAYOUT(lhyp);
 7033                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7034                         if (lyp->lay_type == NFSLAYOUT_FLEXFILE &&
 7035                             lyp->lay_mirrorcnt > 1) {
 7036                                 NFSD_DEBUG(4, "possible match\n");
 7037                                 tl = lyp->lay_xdr;
 7038                                 tl += 3;
 7039                                 for (j = 0; j < lyp->lay_mirrorcnt; j++) {
 7040                                         tl++;
 7041                                         if (NFSBCMP(devid, tl, NFSX_V4DEVICEID)
 7042                                             == 0) {
 7043                                                 /* Found one. */
 7044                                                 NFSD_DEBUG(4, "fnd one\n");
 7045                                                 TAILQ_REMOVE(&lhyp->list, lyp,
 7046                                                     lay_list);
 7047                                                 TAILQ_INSERT_HEAD(&loclyp, lyp,
 7048                                                     lay_list);
 7049                                                 break;
 7050                                         }
 7051                                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED +
 7052                                             NFSM_RNDUP(NFSX_V4PNFSFH) /
 7053                                             NFSX_UNSIGNED + 11 * NFSX_UNSIGNED);
 7054                                 }
 7055                         }
 7056                 }
 7057                 NFSUNLOCKLAYOUT(lhyp);
 7058         }
 7059 
 7060         /* Now, try to do a Layout recall for each one found. */
 7061         TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) {
 7062                 NFSD_DEBUG(4, "do layout recall\n");
 7063                 /*
 7064                  * The layout stateid.seqid needs to be incremented
 7065                  * before doing a LAYOUT_RECALL callback.
 7066                  */
 7067                 if (++lyp->lay_stateid.seqid == 0)
 7068                         lyp->lay_stateid.seqid = 1;
 7069                 nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
 7070                     &lyp->lay_fh, lyp, 1, lyp->lay_type, p);
 7071                 nfsrv_freelayout(&loclyp, lyp);
 7072         }
 7073 }
 7074 
 7075 /*
 7076  * Do a recall callback to the client for this layout.
 7077  */
 7078 static int
 7079 nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp,
 7080     struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p)
 7081 {
 7082         struct nfsclient *clp;
 7083         int error;
 7084 
 7085         NFSD_DEBUG(4, "nfsrv_recalllayout\n");
 7086         error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0),
 7087             0, NULL, p);
 7088         NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error);
 7089         if (error != 0) {
 7090                 printf("nfsrv_recalllayout: getclient err=%d\n", error);
 7091                 return (error);
 7092         }
 7093         if ((clp->lc_flags & LCL_NFSV41) != 0) {
 7094                 error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL,
 7095                     stateidp, changed, fhp, NULL, NULL, laytype, p);
 7096                 /* If lyp != NULL, handle an error return here. */
 7097                 if (error != 0 && lyp != NULL) {
 7098                         NFSDRECALLLOCK();
 7099                         /*
 7100                          * Mark it returned, since no layout recall
 7101                          * has been done.
 7102                          * All errors seem to be non-recoverable, although
 7103                          * NFSERR_NOMATCHLAYOUT is a normal event.
 7104                          */
 7105                         if ((lyp->lay_flags & NFSLAY_RECALL) != 0) {
 7106                                 lyp->lay_flags |= NFSLAY_RETURNED;
 7107                                 wakeup(lyp);
 7108                         }
 7109                         NFSDRECALLUNLOCK();
 7110                         if (error != NFSERR_NOMATCHLAYOUT)
 7111                                 printf("nfsrv_recalllayout: err=%d\n", error);
 7112                 }
 7113         } else
 7114                 printf("nfsrv_recalllayout: clp not NFSv4.1\n");
 7115         return (error);
 7116 }
 7117 
 7118 /*
 7119  * Find a layout to recall when we exceed our high water mark.
 7120  */
 7121 void
 7122 nfsrv_recalloldlayout(NFSPROC_T *p)
 7123 {
 7124         struct nfslayouthash *lhyp;
 7125         struct nfslayout *lyp;
 7126         nfsquad_t clientid;
 7127         nfsv4stateid_t stateid;
 7128         fhandle_t fh;
 7129         int error, laytype, ret;
 7130 
 7131         lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize];
 7132         NFSLOCKLAYOUT(lhyp);
 7133         TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) {
 7134                 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
 7135                         lyp->lay_flags |= NFSLAY_CALLB;
 7136                         /*
 7137                          * The layout stateid.seqid needs to be incremented
 7138                          * before doing a LAYOUT_RECALL callback.
 7139                          */
 7140                         if (++lyp->lay_stateid.seqid == 0)
 7141                                 lyp->lay_stateid.seqid = 1;
 7142                         clientid = lyp->lay_clientid;
 7143                         stateid = lyp->lay_stateid;
 7144                         NFSBCOPY(&lyp->lay_fh, &fh, sizeof(fh));
 7145                         laytype = lyp->lay_type;
 7146                         break;
 7147                 }
 7148         }
 7149         NFSUNLOCKLAYOUT(lhyp);
 7150         if (lyp != NULL) {
 7151                 error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, 0,
 7152                     laytype, p);
 7153                 if (error != 0 && error != NFSERR_NOMATCHLAYOUT)
 7154                         NFSD_DEBUG(4, "recallold=%d\n", error);
 7155                 if (error != 0) {
 7156                         NFSLOCKLAYOUT(lhyp);
 7157                         /*
 7158                          * Since the hash list was unlocked, we need to
 7159                          * find it again.
 7160                          */
 7161                         ret = nfsrv_findlayout(&clientid, &fh, laytype, p,
 7162                             &lyp);
 7163                         if (ret == 0 &&
 7164                             (lyp->lay_flags & NFSLAY_CALLB) != 0 &&
 7165                             lyp->lay_stateid.other[0] == stateid.other[0] &&
 7166                             lyp->lay_stateid.other[1] == stateid.other[1] &&
 7167                             lyp->lay_stateid.other[2] == stateid.other[2]) {
 7168                                 /*
 7169                                  * The client no longer knows this layout, so
 7170                                  * it can be free'd now.
 7171                                  */
 7172                                 if (error == NFSERR_NOMATCHLAYOUT)
 7173                                         nfsrv_freelayout(&lhyp->list, lyp);
 7174                                 else {
 7175                                         /*
 7176                                          * Leave it to be tried later by
 7177                                          * clearing NFSLAY_CALLB and moving
 7178                                          * it to the head of the list, so it
 7179                                          * won't be tried again for a while.
 7180                                          */
 7181                                         lyp->lay_flags &= ~NFSLAY_CALLB;
 7182                                         TAILQ_REMOVE(&lhyp->list, lyp,
 7183                                             lay_list);
 7184                                         TAILQ_INSERT_HEAD(&lhyp->list, lyp,
 7185                                             lay_list);
 7186                                 }
 7187                         }
 7188                         NFSUNLOCKLAYOUT(lhyp);
 7189                 }
 7190         }
 7191 }
 7192 
 7193 /*
 7194  * Try and return layout(s).
 7195  */
 7196 int
 7197 nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp,
 7198     int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim,
 7199     int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp,
 7200     struct ucred *cred, NFSPROC_T *p)
 7201 {
 7202         struct nfsvattr na;
 7203         struct nfslayouthash *lhyp;
 7204         struct nfslayout *lyp;
 7205         fhandle_t fh;
 7206         int error = 0;
 7207 
 7208         *fndp = 0;
 7209         if (kind == NFSV4LAYOUTRET_FILE) {
 7210                 error = nfsvno_getfh(vp, &fh, p);
 7211                 if (error == 0) {
 7212                         error = nfsrv_updatemdsattr(vp, &na, p);
 7213                         if (error != 0)
 7214                                 printf("nfsrv_layoutreturn: updatemdsattr"
 7215                                     " failed=%d\n", error);
 7216                 }
 7217                 if (error == 0) {
 7218                         if (reclaim == newnfs_true) {
 7219                                 error = nfsrv_checkgrace(NULL, NULL,
 7220                                     NFSLCK_RECLAIM);
 7221                                 if (error != NFSERR_NOGRACE)
 7222                                         error = 0;
 7223                                 return (error);
 7224                         }
 7225                         lhyp = NFSLAYOUTHASH(&fh);
 7226                         NFSDRECALLLOCK();
 7227                         NFSLOCKLAYOUT(lhyp);
 7228                         error = nfsrv_findlayout(&nd->nd_clientid, &fh,
 7229                             layouttype, p, &lyp);
 7230                         NFSD_DEBUG(4, "layoutret findlay=%d\n", error);
 7231                         if (error == 0 &&
 7232                             stateidp->other[0] == lyp->lay_stateid.other[0] &&
 7233                             stateidp->other[1] == lyp->lay_stateid.other[1] &&
 7234                             stateidp->other[2] == lyp->lay_stateid.other[2]) {
 7235                                 NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d"
 7236                                     " %x %x %x laystateid %d %x %x %x"
 7237                                     " off=%ju len=%ju flgs=0x%x\n",
 7238                                     stateidp->seqid, stateidp->other[0],
 7239                                     stateidp->other[1], stateidp->other[2],
 7240                                     lyp->lay_stateid.seqid,
 7241                                     lyp->lay_stateid.other[0],
 7242                                     lyp->lay_stateid.other[1],
 7243                                     lyp->lay_stateid.other[2],
 7244                                     (uintmax_t)offset, (uintmax_t)len,
 7245                                     lyp->lay_flags);
 7246                                 if (++lyp->lay_stateid.seqid == 0)
 7247                                         lyp->lay_stateid.seqid = 1;
 7248                                 stateidp->seqid = lyp->lay_stateid.seqid;
 7249                                 if (offset == 0 && len == UINT64_MAX) {
 7250                                         if ((iomode & NFSLAYOUTIOMODE_READ) !=
 7251                                             0)
 7252                                                 lyp->lay_flags &= ~NFSLAY_READ;
 7253                                         if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
 7254                                                 lyp->lay_flags &= ~NFSLAY_RW;
 7255                                         if ((lyp->lay_flags & (NFSLAY_READ |
 7256                                             NFSLAY_RW)) == 0)
 7257                                                 nfsrv_freelayout(&lhyp->list,
 7258                                                     lyp);
 7259                                         else
 7260                                                 *fndp = 1;
 7261                                 } else
 7262                                         *fndp = 1;
 7263                         }
 7264                         NFSUNLOCKLAYOUT(lhyp);
 7265                         /* Search the nfsrv_recalllist for a match. */
 7266                         TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 7267                                 if (NFSBCMP(&lyp->lay_fh, &fh,
 7268                                     sizeof(fh)) == 0 &&
 7269                                     lyp->lay_clientid.qval ==
 7270                                     nd->nd_clientid.qval &&
 7271                                     stateidp->other[0] ==
 7272                                     lyp->lay_stateid.other[0] &&
 7273                                     stateidp->other[1] ==
 7274                                     lyp->lay_stateid.other[1] &&
 7275                                     stateidp->other[2] ==
 7276                                     lyp->lay_stateid.other[2]) {
 7277                                         lyp->lay_flags |= NFSLAY_RETURNED;
 7278                                         wakeup(lyp);
 7279                                         error = 0;
 7280                                 }
 7281                         }
 7282                         NFSDRECALLUNLOCK();
 7283                 }
 7284                 if (layouttype == NFSLAYOUT_FLEXFILE && layp != NULL)
 7285                         nfsrv_flexlayouterr(nd, layp, maxcnt, p);
 7286         } else if (kind == NFSV4LAYOUTRET_FSID)
 7287                 nfsrv_freelayouts(&nd->nd_clientid,
 7288                     &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode);
 7289         else if (kind == NFSV4LAYOUTRET_ALL)
 7290                 nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode);
 7291         else
 7292                 error = NFSERR_INVAL;
 7293         if (error == -1)
 7294                 error = 0;
 7295         return (error);
 7296 }
 7297 
 7298 /*
 7299  * Look for an existing layout.
 7300  */
 7301 static int
 7302 nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
 7303     NFSPROC_T *p, struct nfslayout **lypp)
 7304 {
 7305         struct nfslayouthash *lhyp;
 7306         struct nfslayout *lyp;
 7307         int ret;
 7308 
 7309         *lypp = NULL;
 7310         ret = 0;
 7311         lhyp = NFSLAYOUTHASH(fhp);
 7312         TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
 7313                 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
 7314                     lyp->lay_clientid.qval == clientidp->qval &&
 7315                     lyp->lay_type == laytype)
 7316                         break;
 7317         }
 7318         if (lyp != NULL)
 7319                 *lypp = lyp;
 7320         else
 7321                 ret = -1;
 7322         return (ret);
 7323 }
 7324 
 7325 /*
 7326  * Add the new layout, as required.
 7327  */
 7328 static int
 7329 nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
 7330     nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p)
 7331 {
 7332         struct nfsclient *clp;
 7333         struct nfslayouthash *lhyp;
 7334         struct nfslayout *lyp, *nlyp;
 7335         fhandle_t *fhp;
 7336         int error;
 7337 
 7338         KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0,
 7339             ("nfsrv_layoutget: no nd_clientid\n"));
 7340         lyp = *lypp;
 7341         fhp = &lyp->lay_fh;
 7342         NFSLOCKSTATE();
 7343         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 7344             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 7345         if (error != 0) {
 7346                 NFSUNLOCKSTATE();
 7347                 return (error);
 7348         }
 7349         lyp->lay_stateid.seqid = stateidp->seqid = 1;
 7350         lyp->lay_stateid.other[0] = stateidp->other[0] =
 7351             clp->lc_clientid.lval[0];
 7352         lyp->lay_stateid.other[1] = stateidp->other[1] =
 7353             clp->lc_clientid.lval[1];
 7354         lyp->lay_stateid.other[2] = stateidp->other[2] =
 7355             nfsrv_nextstateindex(clp);
 7356         NFSUNLOCKSTATE();
 7357 
 7358         lhyp = NFSLAYOUTHASH(fhp);
 7359         NFSLOCKLAYOUT(lhyp);
 7360         TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) {
 7361                 if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
 7362                     nlyp->lay_clientid.qval == nd->nd_clientid.qval)
 7363                         break;
 7364         }
 7365         if (nlyp != NULL) {
 7366                 /* A layout already exists, so use it. */
 7367                 nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW));
 7368                 NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen);
 7369                 *layoutlenp = nlyp->lay_layoutlen;
 7370                 if (++nlyp->lay_stateid.seqid == 0)
 7371                         nlyp->lay_stateid.seqid = 1;
 7372                 stateidp->seqid = nlyp->lay_stateid.seqid;
 7373                 stateidp->other[0] = nlyp->lay_stateid.other[0];
 7374                 stateidp->other[1] = nlyp->lay_stateid.other[1];
 7375                 stateidp->other[2] = nlyp->lay_stateid.other[2];
 7376                 NFSUNLOCKLAYOUT(lhyp);
 7377                 return (0);
 7378         }
 7379 
 7380         /* Insert the new layout in the lists. */
 7381         *lypp = NULL;
 7382         atomic_add_int(&nfsrv_layoutcnt, 1);
 7383         NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
 7384         *layoutlenp = lyp->lay_layoutlen;
 7385         TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list);
 7386         NFSUNLOCKLAYOUT(lhyp);
 7387         return (0);
 7388 }
 7389 
 7390 /*
 7391  * Get the devinfo for a deviceid.
 7392  */
 7393 int
 7394 nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt,
 7395     uint32_t *notify, int *devaddrlen, char **devaddr)
 7396 {
 7397         struct nfsdevice *ds;
 7398 
 7399         if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype !=
 7400              NFSLAYOUT_FLEXFILE) ||
 7401             (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES))
 7402                 return (NFSERR_UNKNLAYOUTTYPE);
 7403 
 7404         /*
 7405          * Now, search for the device id.  Note that the structures won't go
 7406          * away, but the order changes in the list.  As such, the lock only
 7407          * needs to be held during the search through the list.
 7408          */
 7409         NFSDDSLOCK();
 7410         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 7411                 if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 &&
 7412                     ds->nfsdev_nmp != NULL)
 7413                         break;
 7414         }
 7415         NFSDDSUNLOCK();
 7416         if (ds == NULL)
 7417                 return (NFSERR_NOENT);
 7418 
 7419         /* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */
 7420         *devaddrlen = 0;
 7421         if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
 7422                 *devaddrlen = ds->nfsdev_fileaddrlen;
 7423                 *devaddr = ds->nfsdev_fileaddr;
 7424         } else if (layouttype == NFSLAYOUT_FLEXFILE) {
 7425                 *devaddrlen = ds->nfsdev_flexaddrlen;
 7426                 *devaddr = ds->nfsdev_flexaddr;
 7427         }
 7428         if (*devaddrlen == 0)
 7429                 return (NFSERR_UNKNLAYOUTTYPE);
 7430 
 7431         /*
 7432          * The XDR overhead is 3 unsigned values: layout_type,
 7433          * length_of_address and notify bitmap.
 7434          * If the notify array is changed to not all zeros, the
 7435          * count of unsigned values must be increased.
 7436          */
 7437         if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) +
 7438             3 * NFSX_UNSIGNED) {
 7439                 *maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED;
 7440                 return (NFSERR_TOOSMALL);
 7441         }
 7442         return (0);
 7443 }
 7444 
 7445 /*
 7446  * Free a list of layout state structures.
 7447  */
 7448 static void
 7449 nfsrv_freelayoutlist(nfsquad_t clientid)
 7450 {
 7451         struct nfslayouthash *lhyp;
 7452         struct nfslayout *lyp, *nlyp;
 7453         int i;
 7454 
 7455         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7456                 lhyp = &nfslayouthash[i];
 7457                 NFSLOCKLAYOUT(lhyp);
 7458                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7459                         if (lyp->lay_clientid.qval == clientid.qval)
 7460                                 nfsrv_freelayout(&lhyp->list, lyp);
 7461                 }
 7462                 NFSUNLOCKLAYOUT(lhyp);
 7463         }
 7464 }
 7465 
 7466 /*
 7467  * Free up a layout.
 7468  */
 7469 static void
 7470 nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp)
 7471 {
 7472 
 7473         NFSD_DEBUG(4, "Freelayout=%p\n", lyp);
 7474         atomic_add_int(&nfsrv_layoutcnt, -1);
 7475         TAILQ_REMOVE(lhp, lyp, lay_list);
 7476         free(lyp, M_NFSDSTATE);
 7477 }
 7478 
 7479 /*
 7480  * Free up a device id.
 7481  */
 7482 void
 7483 nfsrv_freeonedevid(struct nfsdevice *ds)
 7484 {
 7485         int i;
 7486 
 7487         atomic_add_int(&nfsrv_devidcnt, -1);
 7488         vrele(ds->nfsdev_dvp);
 7489         for (i = 0; i < nfsrv_dsdirsize; i++)
 7490                 if (ds->nfsdev_dsdir[i] != NULL)
 7491                         vrele(ds->nfsdev_dsdir[i]);
 7492         free(ds->nfsdev_fileaddr, M_NFSDSTATE);
 7493         free(ds->nfsdev_flexaddr, M_NFSDSTATE);
 7494         free(ds->nfsdev_host, M_NFSDSTATE);
 7495         free(ds, M_NFSDSTATE);
 7496 }
 7497 
 7498 /*
 7499  * Free up a device id and its mirrors.
 7500  */
 7501 static void
 7502 nfsrv_freedevid(struct nfsdevice *ds)
 7503 {
 7504 
 7505         TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
 7506         nfsrv_freeonedevid(ds);
 7507 }
 7508 
 7509 /*
 7510  * Free all layouts and device ids.
 7511  * Done when the nfsd threads are shut down since there may be a new
 7512  * modified device id list created when the nfsd is restarted.
 7513  */
 7514 void
 7515 nfsrv_freealllayoutsanddevids(void)
 7516 {
 7517         struct nfsdontlist *mrp, *nmrp;
 7518         struct nfslayout *lyp, *nlyp;
 7519 
 7520         /* Get rid of the deviceid structures. */
 7521         nfsrv_freealldevids();
 7522         TAILQ_INIT(&nfsrv_devidhead);
 7523         nfsrv_devidcnt = 0;
 7524 
 7525         /* Get rid of all layouts. */
 7526         nfsrv_freealllayouts();
 7527 
 7528         /* Get rid of any nfsdontlist entries. */
 7529         LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp)
 7530                 free(mrp, M_NFSDSTATE);
 7531         LIST_INIT(&nfsrv_dontlisthead);
 7532         nfsrv_dontlistlen = 0;
 7533 
 7534         /* Free layouts in the recall list. */
 7535         TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp)
 7536                 nfsrv_freelayout(&nfsrv_recalllisthead, lyp);
 7537         TAILQ_INIT(&nfsrv_recalllisthead);
 7538 }
 7539 
 7540 /*
 7541  * Free layouts that match the arguments.
 7542  */
 7543 static void
 7544 nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode)
 7545 {
 7546         struct nfslayouthash *lhyp;
 7547         struct nfslayout *lyp, *nlyp;
 7548         int i;
 7549 
 7550         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7551                 lhyp = &nfslayouthash[i];
 7552                 NFSLOCKLAYOUT(lhyp);
 7553                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7554                         if (clid->qval != lyp->lay_clientid.qval)
 7555                                 continue;
 7556                         if (fs != NULL && fsidcmp(fs, &lyp->lay_fsid) != 0)
 7557                                 continue;
 7558                         if (laytype != lyp->lay_type)
 7559                                 continue;
 7560                         if ((iomode & NFSLAYOUTIOMODE_READ) != 0)
 7561                                 lyp->lay_flags &= ~NFSLAY_READ;
 7562                         if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
 7563                                 lyp->lay_flags &= ~NFSLAY_RW;
 7564                         if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0)
 7565                                 nfsrv_freelayout(&lhyp->list, lyp);
 7566                 }
 7567                 NFSUNLOCKLAYOUT(lhyp);
 7568         }
 7569 }
 7570 
 7571 /*
 7572  * Free all layouts for the argument file.
 7573  */
 7574 void
 7575 nfsrv_freefilelayouts(fhandle_t *fhp)
 7576 {
 7577         struct nfslayouthash *lhyp;
 7578         struct nfslayout *lyp, *nlyp;
 7579 
 7580         lhyp = NFSLAYOUTHASH(fhp);
 7581         NFSLOCKLAYOUT(lhyp);
 7582         TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7583                 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0)
 7584                         nfsrv_freelayout(&lhyp->list, lyp);
 7585         }
 7586         NFSUNLOCKLAYOUT(lhyp);
 7587 }
 7588 
 7589 /*
 7590  * Free all layouts.
 7591  */
 7592 static void
 7593 nfsrv_freealllayouts(void)
 7594 {
 7595         struct nfslayouthash *lhyp;
 7596         struct nfslayout *lyp, *nlyp;
 7597         int i;
 7598 
 7599         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7600                 lhyp = &nfslayouthash[i];
 7601                 NFSLOCKLAYOUT(lhyp);
 7602                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp)
 7603                         nfsrv_freelayout(&lhyp->list, lyp);
 7604                 NFSUNLOCKLAYOUT(lhyp);
 7605         }
 7606 }
 7607 
 7608 /*
 7609  * Look up the mount path for the DS server.
 7610  */
 7611 static int
 7612 nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
 7613     struct nfsdevice **dsp)
 7614 {
 7615         struct nameidata nd;
 7616         struct nfsdevice *ds;
 7617         struct mount *mp;
 7618         int error, i;
 7619         char *dsdirpath;
 7620         size_t dsdirsize;
 7621 
 7622         NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp);
 7623         *dsp = NULL;
 7624         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 7625             dspathp, p);
 7626         error = namei(&nd);
 7627         NFSD_DEBUG(4, "lookup=%d\n", error);
 7628         if (error != 0)
 7629                 return (error);
 7630         if (nd.ni_vp->v_type != VDIR) {
 7631                 vput(nd.ni_vp);
 7632                 NFSD_DEBUG(4, "dspath not dir\n");
 7633                 return (ENOTDIR);
 7634         }
 7635         if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 7636                 vput(nd.ni_vp);
 7637                 NFSD_DEBUG(4, "dspath not an NFS mount\n");
 7638                 return (ENXIO);
 7639         }
 7640 
 7641         /*
 7642          * Allocate a DS server structure with the NFS mounted directory
 7643          * vnode reference counted, so that a non-forced dismount will
 7644          * fail with EBUSY.
 7645          * This structure is always linked into the list, even if an error
 7646          * is being returned.  The caller will free the entire list upon
 7647          * an error return.
 7648          */
 7649         *dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t),
 7650             M_NFSDSTATE, M_WAITOK | M_ZERO);
 7651         ds->nfsdev_dvp = nd.ni_vp;
 7652         ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount);
 7653         NFSVOPUNLOCK(nd.ni_vp, 0);
 7654 
 7655         dsdirsize = strlen(dspathp) + 16;
 7656         dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK);
 7657         /* Now, create the DS directory structures. */
 7658         for (i = 0; i < nfsrv_dsdirsize; i++) {
 7659                 snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i);
 7660                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 7661                     UIO_SYSSPACE, dsdirpath, p);
 7662                 error = namei(&nd);
 7663                 NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error);
 7664                 if (error != 0)
 7665                         break;
 7666                 if (nd.ni_vp->v_type != VDIR) {
 7667                         vput(nd.ni_vp);
 7668                         error = ENOTDIR;
 7669                         NFSD_DEBUG(4, "dsdirpath not a VDIR\n");
 7670                         break;
 7671                 }
 7672                 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 7673                         vput(nd.ni_vp);
 7674                         error = ENXIO;
 7675                         NFSD_DEBUG(4, "dsdirpath not an NFS mount\n");
 7676                         break;
 7677                 }
 7678                 ds->nfsdev_dsdir[i] = nd.ni_vp;
 7679                 NFSVOPUNLOCK(nd.ni_vp, 0);
 7680         }
 7681         free(dsdirpath, M_TEMP);
 7682 
 7683         if (strlen(mdspathp) > 0) {
 7684                 /*
 7685                  * This DS stores file for a specific MDS exported file
 7686                  * system.
 7687                  */
 7688                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 7689                     UIO_SYSSPACE, mdspathp, p);
 7690                 error = namei(&nd);
 7691                 NFSD_DEBUG(4, "mds lookup=%d\n", error);
 7692                 if (error != 0)
 7693                         goto out;
 7694                 if (nd.ni_vp->v_type != VDIR) {
 7695                         vput(nd.ni_vp);
 7696                         error = ENOTDIR;
 7697                         NFSD_DEBUG(4, "mdspath not dir\n");
 7698                         goto out;
 7699                 }
 7700                 mp = nd.ni_vp->v_mount;
 7701                 if ((mp->mnt_flag & MNT_EXPORTED) == 0) {
 7702                         vput(nd.ni_vp);
 7703                         error = ENXIO;
 7704                         NFSD_DEBUG(4, "mdspath not an exported fs\n");
 7705                         goto out;
 7706                 }
 7707                 ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid;
 7708                 ds->nfsdev_mdsisset = 1;
 7709                 vput(nd.ni_vp);
 7710         }
 7711 
 7712 out:
 7713         TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
 7714         atomic_add_int(&nfsrv_devidcnt, 1);
 7715         return (error);
 7716 }
 7717 
 7718 /*
 7719  * Look up the mount path for the DS server and delete it.
 7720  */
 7721 int
 7722 nfsrv_deldsserver(int op, char *dspathp, NFSPROC_T *p)
 7723 {
 7724         struct mount *mp;
 7725         struct nfsmount *nmp;
 7726         struct nfsdevice *ds;
 7727         int error;
 7728 
 7729         NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp);
 7730         /*
 7731          * Search for the path in the mount list.  Avoid looking the path
 7732          * up, since this mount point may be hung, with associated locked
 7733          * vnodes, etc.
 7734          * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked
 7735          * until this completes.
 7736          * As noted in the man page, this should be done before any forced
 7737          * dismount on the mount point, but at least the handshake on
 7738          * NFSMNTP_CANCELRPCS should make it safe.
 7739          */
 7740         error = 0;
 7741         ds = NULL;
 7742         nmp = NULL;
 7743         mtx_lock(&mountlist_mtx);
 7744         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 7745                 if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 &&
 7746                     strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 &&
 7747                     mp->mnt_data != NULL) {
 7748                         nmp = VFSTONFS(mp);
 7749                         NFSLOCKMNT(nmp);
 7750                         if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 7751                              NFSMNTP_CANCELRPCS)) == 0) {
 7752                                 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 7753                                 NFSUNLOCKMNT(nmp);
 7754                         } else {
 7755                                 NFSUNLOCKMNT(nmp);
 7756                                 nmp = NULL;
 7757                         }
 7758                         break;
 7759                 }
 7760         }
 7761         mtx_unlock(&mountlist_mtx);
 7762 
 7763         if (nmp != NULL) {
 7764                 ds = nfsrv_deldsnmp(op, nmp, p);
 7765                 NFSD_DEBUG(4, "deldsnmp=%p\n", ds);
 7766                 if (ds != NULL) {
 7767                         nfsrv_killrpcs(nmp);
 7768                         NFSD_DEBUG(4, "aft killrpcs\n");
 7769                 } else
 7770                         error = ENXIO;
 7771                 NFSLOCKMNT(nmp);
 7772                 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 7773                 wakeup(nmp);
 7774                 NFSUNLOCKMNT(nmp);
 7775         } else
 7776                 error = EINVAL;
 7777         return (error);
 7778 }
 7779 
 7780 /*
 7781  * Search for and remove a DS entry which matches the "nmp" argument.
 7782  * The nfsdevice structure pointer is returned so that the caller can
 7783  * free it via nfsrv_freeonedevid().
 7784  * For the forced case, do not try to do LayoutRecalls, since the server
 7785  * must be shut down now anyhow.
 7786  */
 7787 struct nfsdevice *
 7788 nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p)
 7789 {
 7790         struct nfsdevice *fndds;
 7791 
 7792         NFSD_DEBUG(4, "deldsdvp\n");
 7793         NFSDDSLOCK();
 7794         if (op == PNFSDOP_FORCEDELDS)
 7795                 fndds = nfsv4_findmirror(nmp);
 7796         else
 7797                 fndds = nfsrv_findmirroredds(nmp);
 7798         if (fndds != NULL)
 7799                 nfsrv_deleteds(fndds);
 7800         NFSDDSUNLOCK();
 7801         if (fndds != NULL) {
 7802                 if (op != PNFSDOP_FORCEDELDS)
 7803                         nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
 7804                 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
 7805         }
 7806         return (fndds);
 7807 }
 7808 
 7809 /*
 7810  * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid.
 7811  * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount
 7812  * point.
 7813  * Also, returns an error instead of the nfsdevice found.
 7814  */
 7815 static int
 7816 nfsrv_delds(char *devid, NFSPROC_T *p)
 7817 {
 7818         struct nfsdevice *ds, *fndds;
 7819         struct nfsmount *nmp;
 7820         int fndmirror;
 7821 
 7822         NFSD_DEBUG(4, "delds\n");
 7823         /*
 7824          * Search the DS server list for a match with devid.
 7825          * Remove the DS entry if found and there is a mirror.
 7826          */
 7827         fndds = NULL;
 7828         nmp = NULL;
 7829         fndmirror = 0;
 7830         NFSDDSLOCK();
 7831         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 7832                 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 &&
 7833                     ds->nfsdev_nmp != NULL) {
 7834                         NFSD_DEBUG(4, "fnd main ds\n");
 7835                         fndds = ds;
 7836                         break;
 7837                 }
 7838         }
 7839         if (fndds == NULL) {
 7840                 NFSDDSUNLOCK();
 7841                 return (ENXIO);
 7842         }
 7843         if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
 7844                 fndmirror = 1;
 7845         else if (fndds->nfsdev_mdsisset != 0) {
 7846                 /* For the fsid is set case, search for a mirror. */
 7847                 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 7848                         if (ds != fndds && ds->nfsdev_nmp != NULL &&
 7849                             ds->nfsdev_mdsisset != 0 &&
 7850                             fsidcmp(&ds->nfsdev_mdsfsid,
 7851                             &fndds->nfsdev_mdsfsid) == 0) {
 7852                                 fndmirror = 1;
 7853                                 break;
 7854                         }
 7855                 }
 7856         }
 7857         if (fndmirror != 0) {
 7858                 nmp = fndds->nfsdev_nmp;
 7859                 NFSLOCKMNT(nmp);
 7860                 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 7861                      NFSMNTP_CANCELRPCS)) == 0) {
 7862                         nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 7863                         NFSUNLOCKMNT(nmp);
 7864                         nfsrv_deleteds(fndds);
 7865                 } else {
 7866                         NFSUNLOCKMNT(nmp);
 7867                         nmp = NULL;
 7868                 }
 7869         }
 7870         NFSDDSUNLOCK();
 7871         if (nmp != NULL) {
 7872                 nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
 7873                 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
 7874                 nfsrv_killrpcs(nmp);
 7875                 NFSLOCKMNT(nmp);
 7876                 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 7877                 wakeup(nmp);
 7878                 NFSUNLOCKMNT(nmp);
 7879                 return (0);
 7880         }
 7881         return (ENXIO);
 7882 }
 7883 
 7884 /*
 7885  * Mark a DS as disabled by setting nfsdev_nmp = NULL.
 7886  */
 7887 static void
 7888 nfsrv_deleteds(struct nfsdevice *fndds)
 7889 {
 7890 
 7891         NFSD_DEBUG(4, "deleteds: deleting a mirror\n");
 7892         fndds->nfsdev_nmp = NULL;
 7893         if (fndds->nfsdev_mdsisset == 0)
 7894                 nfsrv_faildscnt--;
 7895 }
 7896 
 7897 /*
 7898  * Fill in the addr structures for the File and Flex File layouts.
 7899  */
 7900 static void
 7901 nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
 7902 {
 7903         uint32_t *tl;
 7904         char *netprot;
 7905         int addrlen;
 7906         static uint64_t new_devid = 0;
 7907 
 7908         if (strchr(addr, ':') != NULL)
 7909                 netprot = "tcp6";
 7910         else
 7911                 netprot = "tcp";
 7912 
 7913         /* Fill in the device id. */
 7914         NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time));
 7915         new_devid++;
 7916         NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)],
 7917             sizeof(new_devid));
 7918 
 7919         /*
 7920          * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4
 7921          * as defined in RFC5661) in XDR.
 7922          */
 7923         addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
 7924             6 * NFSX_UNSIGNED;
 7925         NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot);
 7926         ds->nfsdev_fileaddrlen = addrlen;
 7927         tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
 7928         ds->nfsdev_fileaddr = (char *)tl;
 7929         *tl++ = txdr_unsigned(1);               /* One stripe with index 0. */
 7930         *tl++ = 0;
 7931         *tl++ = txdr_unsigned(1);               /* One multipath list */
 7932         *tl++ = txdr_unsigned(1);               /* with one entry in it. */
 7933         /* The netaddr for this one entry. */
 7934         *tl++ = txdr_unsigned(strlen(netprot));
 7935         NFSBCOPY(netprot, tl, strlen(netprot));
 7936         tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
 7937         *tl++ = txdr_unsigned(strlen(addr));
 7938         NFSBCOPY(addr, tl, strlen(addr));
 7939 
 7940         /*
 7941          * Fill in the flex file addr (actually the ff_device_addr4
 7942          * as defined for Flexible File Layout) in XDR.
 7943          */
 7944         addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
 7945             9 * NFSX_UNSIGNED;
 7946         ds->nfsdev_flexaddrlen = addrlen;
 7947         tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
 7948         ds->nfsdev_flexaddr = (char *)tl;
 7949         *tl++ = txdr_unsigned(1);               /* One multipath entry. */
 7950         /* The netaddr for this one entry. */
 7951         *tl++ = txdr_unsigned(strlen(netprot));
 7952         NFSBCOPY(netprot, tl, strlen(netprot));
 7953         tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
 7954         *tl++ = txdr_unsigned(strlen(addr));
 7955         NFSBCOPY(addr, tl, strlen(addr));
 7956         tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED);
 7957         *tl++ = txdr_unsigned(1);               /* One NFS Version. */
 7958         *tl++ = txdr_unsigned(NFS_VER4);        /* NFSv4. */
 7959         *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
 7960         *tl++ = txdr_unsigned(NFS_SRVMAXIO);    /* DS max rsize. */
 7961         *tl++ = txdr_unsigned(NFS_SRVMAXIO);    /* DS max wsize. */
 7962         *tl = newnfs_true;                      /* Tightly coupled. */
 7963 
 7964         ds->nfsdev_hostnamelen = strlen(dnshost);
 7965         ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE,
 7966             M_WAITOK);
 7967         NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1);
 7968 }
 7969 
 7970 
 7971 /*
 7972  * Create the device id list.
 7973  * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument
 7974  * is misconfigured.
 7975  */
 7976 int
 7977 nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p)
 7978 {
 7979         struct nfsdevice *ds;
 7980         char *addrp, *dnshostp, *dspathp, *mdspathp;
 7981         int error, i;
 7982 
 7983         addrp = args->addr;
 7984         dnshostp = args->dnshost;
 7985         dspathp = args->dspath;
 7986         mdspathp = args->mdspath;
 7987         nfsrv_maxpnfsmirror = args->mirrorcnt;
 7988         if (addrp == NULL || dnshostp == NULL || dspathp == NULL ||
 7989             mdspathp == NULL)
 7990                 return (0);
 7991 
 7992         /*
 7993          * Loop around for each nul-terminated string in args->addr,
 7994          * args->dnshost, args->dnspath and args->mdspath.
 7995          */
 7996         while (addrp < (args->addr + args->addrlen) &&
 7997             dnshostp < (args->dnshost + args->dnshostlen) &&
 7998             dspathp < (args->dspath + args->dspathlen) &&
 7999             mdspathp < (args->mdspath + args->mdspathlen)) {
 8000                 error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds);
 8001                 if (error != 0) {
 8002                         /* Free all DS servers. */
 8003                         nfsrv_freealldevids();
 8004                         nfsrv_devidcnt = 0;
 8005                         return (ENXIO);
 8006                 }
 8007                 nfsrv_allocdevid(ds, addrp, dnshostp);
 8008                 addrp += (strlen(addrp) + 1);
 8009                 dnshostp += (strlen(dnshostp) + 1);
 8010                 dspathp += (strlen(dspathp) + 1);
 8011                 mdspathp += (strlen(mdspathp) + 1);
 8012         }
 8013         if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) {
 8014                 /* Free all DS servers. */
 8015                 nfsrv_freealldevids();
 8016                 nfsrv_devidcnt = 0;
 8017                 nfsrv_maxpnfsmirror = 1;
 8018                 return (ENXIO);
 8019         }
 8020         /* We can fail at most one less DS than the mirror level. */
 8021         nfsrv_faildscnt = nfsrv_maxpnfsmirror - 1;
 8022 
 8023         /*
 8024          * Allocate the nfslayout hash table now, since this is a pNFS server.
 8025          * Make it 1% of the high water mark and at least 100.
 8026          */
 8027         if (nfslayouthash == NULL) {
 8028                 nfsrv_layouthashsize = nfsrv_layouthighwater / 100;
 8029                 if (nfsrv_layouthashsize < 100)
 8030                         nfsrv_layouthashsize = 100;
 8031                 nfslayouthash = mallocarray(nfsrv_layouthashsize,
 8032                     sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK |
 8033                     M_ZERO);
 8034                 for (i = 0; i < nfsrv_layouthashsize; i++) {
 8035                         mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF);
 8036                         TAILQ_INIT(&nfslayouthash[i].list);
 8037                 }
 8038         }
 8039         return (0);
 8040 }
 8041 
 8042 /*
 8043  * Free all device ids.
 8044  */
 8045 static void
 8046 nfsrv_freealldevids(void)
 8047 {
 8048         struct nfsdevice *ds, *nds;
 8049 
 8050         TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds)
 8051                 nfsrv_freedevid(ds);
 8052 }
 8053 
 8054 /*
 8055  * Check to see if there is a Read/Write Layout plus either:
 8056  * - A Write Delegation
 8057  * or
 8058  * - An Open with Write_access.
 8059  * Return 1 if this is the case and 0 otherwise.
 8060  * This function is used by nfsrv_proxyds() to decide if doing a Proxy
 8061  * Getattr RPC to the Data Server (DS) is necessary.
 8062  */
 8063 #define NFSCLIDVECSIZE  6
 8064 int
 8065 nfsrv_checkdsattr(struct nfsrv_descript *nd, vnode_t vp, NFSPROC_T *p)
 8066 {
 8067         fhandle_t fh, *tfhp;
 8068         struct nfsstate *stp;
 8069         struct nfslayout *lyp;
 8070         struct nfslayouthash *lhyp;
 8071         struct nfslockhashhead *hp;
 8072         struct nfslockfile *lfp;
 8073         nfsquad_t clid[NFSCLIDVECSIZE];
 8074         int clidcnt, ret;
 8075 
 8076         ret = nfsvno_getfh(vp, &fh, p);
 8077         if (ret != 0)
 8078                 return (0);
 8079 
 8080         /* First check for a Read/Write Layout. */
 8081         clidcnt = 0;
 8082         lhyp = NFSLAYOUTHASH(&fh);
 8083         NFSLOCKLAYOUT(lhyp);
 8084         TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
 8085                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 8086                     ((lyp->lay_flags & NFSLAY_RW) != 0 ||
 8087                      ((lyp->lay_flags & NFSLAY_READ) != 0 &&
 8088                       nfsrv_pnfsatime != 0))) {
 8089                         if (clidcnt < NFSCLIDVECSIZE)
 8090                                 clid[clidcnt].qval = lyp->lay_clientid.qval;
 8091                         clidcnt++;
 8092                 }
 8093         }
 8094         NFSUNLOCKLAYOUT(lhyp);
 8095         if (clidcnt == 0) {
 8096                 /* None found, so return 0. */
 8097                 return (0);
 8098         }
 8099 
 8100         /* Get the nfslockfile for this fh. */
 8101         NFSLOCKSTATE();
 8102         hp = NFSLOCKHASH(&fh);
 8103         LIST_FOREACH(lfp, hp, lf_hash) {
 8104                 tfhp = &lfp->lf_fh;
 8105                 if (NFSVNO_CMPFH(&fh, tfhp))
 8106                         break;
 8107         }
 8108         if (lfp == NULL) {
 8109                 /* None found, so return 0. */
 8110                 NFSUNLOCKSTATE();
 8111                 return (0);
 8112         }
 8113 
 8114         /* Now, look for a Write delegation for this clientid. */
 8115         LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 8116                 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
 8117                     nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
 8118                         break;
 8119         }
 8120         if (stp != NULL) {
 8121                 /* Found one, so return 1. */
 8122                 NFSUNLOCKSTATE();
 8123                 return (1);
 8124         }
 8125 
 8126         /* No Write delegation, so look for an Open with Write_access. */
 8127         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 8128                 KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0,
 8129                     ("nfsrv_checkdsattr: Non-open in Open list\n"));
 8130                 if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 &&
 8131                     nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
 8132                         break;
 8133         }
 8134         NFSUNLOCKSTATE();
 8135         if (stp != NULL)
 8136                 return (1);
 8137         return (0);
 8138 }
 8139 
 8140 /*
 8141  * Look for a matching clientid in the vector. Return 1 if one might match.
 8142  */
 8143 static int
 8144 nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt)
 8145 {
 8146         int i;
 8147 
 8148         /* If too many for the vector, return 1 since there might be a match. */
 8149         if (clidcnt > NFSCLIDVECSIZE)
 8150                 return (1);
 8151 
 8152         for (i = 0; i < clidcnt; i++)
 8153                 if (clidvec[i].qval == clid.qval)
 8154                         return (1);
 8155         return (0);
 8156 }
 8157 
 8158 /*
 8159  * Check the don't list for "vp" and see if issuing an rw layout is allowed.
 8160  * Return 1 if issuing an rw layout isn't allowed, 0 otherwise.
 8161  */
 8162 static int
 8163 nfsrv_dontlayout(fhandle_t *fhp)
 8164 {
 8165         struct nfsdontlist *mrp;
 8166         int ret;
 8167 
 8168         if (nfsrv_dontlistlen == 0)
 8169                 return (0);
 8170         ret = 0;
 8171         NFSDDONTLISTLOCK();
 8172         LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
 8173                 if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 &&
 8174                     (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) {
 8175                         ret = 1;
 8176                         break;
 8177                 }
 8178         }
 8179         NFSDDONTLISTUNLOCK();
 8180         return (ret);
 8181 }
 8182 
 8183 #define PNFSDS_COPYSIZ  65536
 8184 /*
 8185  * Create a new file on a DS and copy the contents of an extant DS file to it.
 8186  * This can be used for recovery of a DS file onto a recovered DS.
 8187  * The steps are:
 8188  * - When called, the MDS file's vnode is locked, blocking LayoutGet operations.
 8189  * - Disable issuing of read/write layouts for the file via the nfsdontlist,
 8190  *   so that they will be disabled after the MDS file's vnode is unlocked.
 8191  * - Set up the nfsrv_recalllist so that recall of read/write layouts can
 8192  *   be done.
 8193  * - Unlock the MDS file's vnode, so that the client(s) can perform proxied
 8194  *   writes, LayoutCommits and LayoutReturns for the file when completing the
 8195  *   LayoutReturn requested by the LayoutRecall callback.
 8196  * - Issue a LayoutRecall callback for all read/write layouts and wait for
 8197  *   them to be returned. (If the LayoutRecall callback replies
 8198  *   NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.)
 8199  * - Exclusively lock the MDS file's vnode.  This ensures that no proxied
 8200  *   writes are in progress or can occur during the DS file copy.
 8201  *   It also blocks Setattr operations.
 8202  * - Create the file on the recovered mirror.
 8203  * - Copy the file from the operational DS.
 8204  * - Copy any ACL from the MDS file to the new DS file.
 8205  * - Set the modify time of the new DS file to that of the MDS file.
 8206  * - Update the extended attribute for the MDS file.
 8207  * - Enable issuing of rw layouts by deleting the nfsdontlist entry.
 8208  * - The caller will unlock the MDS file's vnode allowing operations
 8209  *   to continue normally, since it is now on the mirror again.
 8210  */
 8211 int
 8212 nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds,
 8213     struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt,
 8214     struct ucred *cred, NFSPROC_T *p)
 8215 {
 8216         struct nfsdontlist *mrp, *nmrp;
 8217         struct nfslayouthash *lhyp;
 8218         struct nfslayout *lyp, *nlyp;
 8219         struct nfslayouthead thl;
 8220         struct mount *mp, *tvmp;
 8221         struct acl *aclp;
 8222         struct vattr va;
 8223         struct timespec mtime;
 8224         fhandle_t fh;
 8225         vnode_t tvp;
 8226         off_t rdpos, wrpos;
 8227         ssize_t aresid;
 8228         char *dat;
 8229         int didprintf, ret, retacl, xfer;
 8230 
 8231         ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp");
 8232         ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp");
 8233         /*
 8234          * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag
 8235          * so that no more RW layouts will get issued.
 8236          */
 8237         ret = nfsvno_getfh(vp, &fh, p);
 8238         if (ret != 0) {
 8239                 NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret);
 8240                 return (ret);
 8241         }
 8242         nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK);
 8243         nmrp->nfsmr_flags = NFSMR_DONTLAYOUT;
 8244         NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh));
 8245         NFSDDONTLISTLOCK();
 8246         LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
 8247                 if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0)
 8248                         break;
 8249         }
 8250         if (mrp == NULL) {
 8251                 LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list);
 8252                 mrp = nmrp;
 8253                 nmrp = NULL;
 8254                 nfsrv_dontlistlen++;
 8255                 NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n");
 8256         } else {
 8257                 NFSDDONTLISTUNLOCK();
 8258                 free(nmrp, M_NFSDSTATE);
 8259                 NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n");
 8260                 return (ENXIO);
 8261         }
 8262         NFSDDONTLISTUNLOCK();
 8263 
 8264         /*
 8265          * Search for all RW layouts for this file.  Move them to the
 8266          * recall list, so they can be recalled and their return noted.
 8267          */
 8268         lhyp = NFSLAYOUTHASH(&fh);
 8269         NFSDRECALLLOCK();
 8270         NFSLOCKLAYOUT(lhyp);
 8271         TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 8272                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 8273                     (lyp->lay_flags & NFSLAY_RW) != 0) {
 8274                         TAILQ_REMOVE(&lhyp->list, lyp, lay_list);
 8275                         TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list);
 8276                         lyp->lay_trycnt = 0;
 8277                 }
 8278         }
 8279         NFSUNLOCKLAYOUT(lhyp);
 8280         NFSDRECALLUNLOCK();
 8281 
 8282         ret = 0;
 8283         mp = tvmp = NULL;
 8284         didprintf = 0;
 8285         TAILQ_INIT(&thl);
 8286         /* Unlock the MDS vp, so that a LayoutReturn can be done on it. */
 8287         NFSVOPUNLOCK(vp, 0);
 8288         /* Now, do a recall for all layouts not yet recalled. */
 8289 tryagain:
 8290         NFSDRECALLLOCK();
 8291         TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 8292                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 8293                     (lyp->lay_flags & NFSLAY_RECALL) == 0) {
 8294                         lyp->lay_flags |= NFSLAY_RECALL;
 8295                         /*
 8296                          * The layout stateid.seqid needs to be incremented
 8297                          * before doing a LAYOUT_RECALL callback.
 8298                          */
 8299                         if (++lyp->lay_stateid.seqid == 0)
 8300                                 lyp->lay_stateid.seqid = 1;
 8301                         NFSDRECALLUNLOCK();
 8302                         nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
 8303                             &lyp->lay_fh, lyp, 0, lyp->lay_type, p);
 8304                         NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n");
 8305                         goto tryagain;
 8306                 }
 8307         }
 8308 
 8309         /* Now wait for them to be returned. */
 8310 tryagain2:
 8311         TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 8312                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) {
 8313                         if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) {
 8314                                 TAILQ_REMOVE(&nfsrv_recalllisthead, lyp,
 8315                                     lay_list);
 8316                                 TAILQ_INSERT_HEAD(&thl, lyp, lay_list);
 8317                                 NFSD_DEBUG(4,
 8318                                     "nfsrv_copymr: layout returned\n");
 8319                         } else {
 8320                                 lyp->lay_trycnt++;
 8321                                 ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR,
 8322                                     PVFS | PCATCH, "nfsmrl", hz);
 8323                                 NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n",
 8324                                     ret);
 8325                                 if (ret == EINTR || ret == ERESTART)
 8326                                         break;
 8327                                 if ((lyp->lay_flags & NFSLAY_RETURNED) == 0) {
 8328                                         /*
 8329                                          * Give up after 60sec and return
 8330                                          * ENXIO, failing the copymr.
 8331                                          * This layout will remain on the
 8332                                          * recalllist.  It can only be cleared
 8333                                          * by restarting the nfsd.
 8334                                          * This seems the safe way to handle
 8335                                          * it, since it cannot be safely copied
 8336                                          * with an outstanding RW layout.
 8337                                          */
 8338                                         if (lyp->lay_trycnt >= 60) {
 8339                                                 ret = ENXIO;
 8340                                                 break;
 8341                                         }
 8342                                         if (didprintf == 0) {
 8343                                                 printf("nfsrv_copymr: layout "
 8344                                                     "not returned\n");
 8345                                                 didprintf = 1;
 8346                                         }
 8347                                 }
 8348                         }
 8349                         goto tryagain2;
 8350                 }
 8351         }
 8352         NFSDRECALLUNLOCK();
 8353         /* We can now get rid of the layouts that have been returned. */
 8354         TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp)
 8355                 nfsrv_freelayout(&thl, lyp);
 8356 
 8357         /*
 8358          * Do the vn_start_write() calls here, before the MDS vnode is
 8359          * locked and the tvp is created (locked) in the NFS file system
 8360          * that dvp is in.
 8361          * For tvmp, this probably isn't necessary, since it will be an
 8362          * NFS mount and they are not suspendable at this time.
 8363          */
 8364         if (ret == 0)
 8365                 ret = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 8366         if (ret == 0) {
 8367                 tvmp = dvp->v_mount;
 8368                 ret = vn_start_write(NULL, &tvmp, V_WAIT | PCATCH);
 8369         }
 8370 
 8371         /*
 8372          * LK_EXCLUSIVE lock the MDS vnode, so that any
 8373          * proxied writes through the MDS will be blocked until we have
 8374          * completed the copy and update of the extended attributes.
 8375          * This will also ensure that any attributes and ACL will not be
 8376          * changed until the copy is complete.
 8377          */
 8378         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 8379         if (ret == 0 && (vp->v_iflag & VI_DOOMED) != 0) {
 8380                 NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n");
 8381                 ret = ESTALE;
 8382         }
 8383 
 8384         /* Create the data file on the recovered DS. */
 8385         if (ret == 0)
 8386                 ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp);
 8387 
 8388         /* Copy the DS file, if created successfully. */
 8389         if (ret == 0) {
 8390                 /*
 8391                  * Get any NFSv4 ACL on the MDS file, so that it can be set
 8392                  * on the new DS file.
 8393                  */
 8394                 aclp = acl_alloc(M_WAITOK | M_ZERO);
 8395                 retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
 8396                 if (retacl != 0 && retacl != ENOATTR)
 8397                         NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl);
 8398                 dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK);
 8399                 /* Malloc a block of 0s used to check for holes. */
 8400                 if (nfsrv_zeropnfsdat == NULL)
 8401                         nfsrv_zeropnfsdat = malloc(PNFSDS_COPYSIZ, M_TEMP,
 8402                             M_WAITOK | M_ZERO);
 8403                 rdpos = wrpos = 0;
 8404                 ret = VOP_GETATTR(fvp, &va, cred);
 8405                 aresid = 0;
 8406                 while (ret == 0 && aresid == 0) {
 8407                         ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ,
 8408                             rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL,
 8409                             &aresid, p);
 8410                         xfer = PNFSDS_COPYSIZ - aresid;
 8411                         if (ret == 0 && xfer > 0) {
 8412                                 rdpos += xfer;
 8413                                 /*
 8414                                  * Skip the write for holes, except for the
 8415                                  * last block.
 8416                                  */
 8417                                 if (xfer < PNFSDS_COPYSIZ || rdpos ==
 8418                                     va.va_size || NFSBCMP(dat,
 8419                                     nfsrv_zeropnfsdat, PNFSDS_COPYSIZ) != 0)
 8420                                         ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer,
 8421                                             wrpos, UIO_SYSSPACE, IO_NODELOCKED,
 8422                                             cred, NULL, NULL, p);
 8423                                 if (ret == 0)
 8424                                         wrpos += xfer;
 8425                         }
 8426                 }
 8427 
 8428                 /* If there is an ACL and the copy succeeded, set the ACL. */
 8429                 if (ret == 0 && retacl == 0) {
 8430                         ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p);
 8431                         /*
 8432                          * Don't consider these as errors, since VOP_GETACL()
 8433                          * can return an ACL when they are not actually
 8434                          * supported.  For example, for UFS, VOP_GETACL()
 8435                          * will return a trivial ACL based on the uid/gid/mode
 8436                          * when there is no ACL on the file.
 8437                          * This case should be recognized as a trivial ACL
 8438                          * by UFS's VOP_SETACL() and succeed, but...
 8439                          */
 8440                         if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM)
 8441                                 ret = 0;
 8442                 }
 8443 
 8444                 if (ret == 0)
 8445                         ret = VOP_FSYNC(tvp, MNT_WAIT, p);
 8446 
 8447                 /* Set the DS data file's modify time that of the MDS file. */
 8448                 if (ret == 0)
 8449                         ret = VOP_GETATTR(vp, &va, cred);
 8450                 if (ret == 0) {
 8451                         mtime = va.va_mtime;
 8452                         VATTR_NULL(&va);
 8453                         va.va_mtime = mtime;
 8454                         ret = VOP_SETATTR(tvp, &va, cred);
 8455                 }
 8456 
 8457                 vput(tvp);
 8458                 acl_free(aclp);
 8459                 free(dat, M_TEMP);
 8460         }
 8461         if (tvmp != NULL)
 8462                 vn_finished_write(tvmp);
 8463 
 8464         /* Update the extended attributes for the newly created DS file. */
 8465         if (ret == 0)
 8466                 ret = vn_extattr_set(vp, IO_NODELOCKED,
 8467                     EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
 8468                     sizeof(*wpf) * mirrorcnt, (char *)wpf, p);
 8469         if (mp != NULL)
 8470                 vn_finished_write(mp);
 8471 
 8472         /* Get rid of the dontlist entry, so that Layouts can be issued. */
 8473         NFSDDONTLISTLOCK();
 8474         LIST_REMOVE(mrp, nfsmr_list);
 8475         NFSDDONTLISTUNLOCK();
 8476         free(mrp, M_NFSDSTATE);
 8477         return (ret);
 8478 }
 8479 
 8480 /*
 8481  * Create a data storage file on the recovered DS.
 8482  */
 8483 static int
 8484 nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
 8485     vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
 8486     vnode_t *tvpp)
 8487 {
 8488         struct vattr va, nva;
 8489         int error;
 8490 
 8491         /* Make data file name based on FH. */
 8492         error = VOP_GETATTR(vp, &va, cred);
 8493         if (error == 0) {
 8494                 /* Set the attributes for "vp" to Setattr the DS vp. */
 8495                 VATTR_NULL(&nva);
 8496                 nva.va_uid = va.va_uid;
 8497                 nva.va_gid = va.va_gid;
 8498                 nva.va_mode = va.va_mode;
 8499                 nva.va_size = 0;
 8500                 VATTR_NULL(&va);
 8501                 va.va_type = VREG;
 8502                 va.va_mode = nva.va_mode;
 8503                 NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf);
 8504                 error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL,
 8505                     pf->dsf_filename, cred, p, tvpp);
 8506         }
 8507         return (error);
 8508 }
 8509 
 8510 /*
 8511  * Look up the MDS file shared locked, and then get the extended attribute
 8512  * to find the extant DS file to be copied to the new mirror.
 8513  * If successful, *vpp is set to the MDS file's vp and *nvpp is
 8514  * set to a DS data file for the MDS file, both exclusively locked.
 8515  * The "buf" argument has the pnfsdsfile structure from the MDS file
 8516  * in it and buflen is set to its length.
 8517  */
 8518 int
 8519 nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf,
 8520     int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp,
 8521     struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp,
 8522     struct nfsdevice **fdsp)
 8523 {
 8524         struct nameidata nd;
 8525         struct vnode *vp, *curvp;
 8526         struct pnfsdsfile *pf;
 8527         struct nfsmount *nmp, *curnmp;
 8528         int dsdir, error, mirrorcnt, ippos;
 8529 
 8530         vp = NULL;
 8531         curvp = NULL;
 8532         curnmp = NULL;
 8533         *dsp = NULL;
 8534         *fdsp = NULL;
 8535         if (dspathp == NULL && curdspathp != NULL)
 8536                 return (EPERM);
 8537 
 8538         /*
 8539          * Look up the MDS file shared locked.  The lock will be upgraded
 8540          * to an exclusive lock after any rw layouts have been returned.
 8541          */
 8542         NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp);
 8543         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 8544             mdspathp, p);
 8545         error = namei(&nd);
 8546         NFSD_DEBUG(4, "lookup=%d\n", error);
 8547         if (error != 0)
 8548                 return (error);
 8549         if (nd.ni_vp->v_type != VREG) {
 8550                 vput(nd.ni_vp);
 8551                 NFSD_DEBUG(4, "mdspath not reg\n");
 8552                 return (EISDIR);
 8553         }
 8554         vp = nd.ni_vp;
 8555 
 8556         if (curdspathp != NULL) {
 8557                 /*
 8558                  * Look up the current DS path and find the nfsdev structure for
 8559                  * it.
 8560                  */
 8561                 NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp);
 8562                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 8563                     UIO_SYSSPACE, curdspathp, p);
 8564                 error = namei(&nd);
 8565                 NFSD_DEBUG(4, "ds lookup=%d\n", error);
 8566                 if (error != 0) {
 8567                         vput(vp);
 8568                         return (error);
 8569                 }
 8570                 if (nd.ni_vp->v_type != VDIR) {
 8571                         vput(nd.ni_vp);
 8572                         vput(vp);
 8573                         NFSD_DEBUG(4, "curdspath not dir\n");
 8574                         return (ENOTDIR);
 8575                 }
 8576                 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 8577                         vput(nd.ni_vp);
 8578                         vput(vp);
 8579                         NFSD_DEBUG(4, "curdspath not an NFS mount\n");
 8580                         return (ENXIO);
 8581                 }
 8582                 curnmp = VFSTONFS(nd.ni_vp->v_mount);
 8583         
 8584                 /* Search the nfsdev list for a match. */
 8585                 NFSDDSLOCK();
 8586                 *fdsp = nfsv4_findmirror(curnmp);
 8587                 NFSDDSUNLOCK();
 8588                 if (*fdsp == NULL)
 8589                         curnmp = NULL;
 8590                 if (curnmp == NULL) {
 8591                         vput(nd.ni_vp);
 8592                         vput(vp);
 8593                         NFSD_DEBUG(4, "mdscopymr: no current ds\n");
 8594                         return (ENXIO);
 8595                 }
 8596                 curvp = nd.ni_vp;
 8597         }
 8598 
 8599         if (dspathp != NULL) {
 8600                 /* Look up the nfsdev path and find the nfsdev structure. */
 8601                 NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp);
 8602                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 8603                     UIO_SYSSPACE, dspathp, p);
 8604                 error = namei(&nd);
 8605                 NFSD_DEBUG(4, "ds lookup=%d\n", error);
 8606                 if (error != 0) {
 8607                         vput(vp);
 8608                         if (curvp != NULL)
 8609                                 vput(curvp);
 8610                         return (error);
 8611                 }
 8612                 if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) {
 8613                         vput(nd.ni_vp);
 8614                         vput(vp);
 8615                         if (curvp != NULL)
 8616                                 vput(curvp);
 8617                         NFSD_DEBUG(4, "dspath not dir\n");
 8618                         if (nd.ni_vp == curvp)
 8619                                 return (EPERM);
 8620                         return (ENOTDIR);
 8621                 }
 8622                 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 8623                         vput(nd.ni_vp);
 8624                         vput(vp);
 8625                         if (curvp != NULL)
 8626                                 vput(curvp);
 8627                         NFSD_DEBUG(4, "dspath not an NFS mount\n");
 8628                         return (ENXIO);
 8629                 }
 8630                 nmp = VFSTONFS(nd.ni_vp->v_mount);
 8631         
 8632                 /*
 8633                  * Search the nfsdevice list for a match.  If curnmp == NULL,
 8634                  * this is a recovery and there must be a mirror.
 8635                  */
 8636                 NFSDDSLOCK();
 8637                 if (curnmp == NULL)
 8638                         *dsp = nfsrv_findmirroredds(nmp);
 8639                 else
 8640                         *dsp = nfsv4_findmirror(nmp);
 8641                 NFSDDSUNLOCK();
 8642                 if (*dsp == NULL) {
 8643                         vput(nd.ni_vp);
 8644                         vput(vp);
 8645                         if (curvp != NULL)
 8646                                 vput(curvp);
 8647                         NFSD_DEBUG(4, "mdscopymr: no ds\n");
 8648                         return (ENXIO);
 8649                 }
 8650         } else {
 8651                 nd.ni_vp = NULL;
 8652                 nmp = NULL;
 8653         }
 8654 
 8655         /*
 8656          * Get a vp for an available DS data file using the extended
 8657          * attribute on the MDS file.
 8658          * If there is a valid entry for the new DS in the extended attribute
 8659          * on the MDS file (as checked via the nmp argument),
 8660          * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur.
 8661          */
 8662         error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, &mirrorcnt, p,
 8663             NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir);
 8664         if (curvp != NULL)
 8665                 vput(curvp);
 8666         if (nd.ni_vp == NULL) {
 8667                 if (error == 0 && nmp != NULL) {
 8668                         /* Search the nfsdev list for a match. */
 8669                         NFSDDSLOCK();
 8670                         *dsp = nfsrv_findmirroredds(nmp);
 8671                         NFSDDSUNLOCK();
 8672                 }
 8673                 if (error == 0 && (nmp == NULL || *dsp == NULL)) {
 8674                         if (nvpp != NULL && *nvpp != NULL) {
 8675                                 vput(*nvpp);
 8676                                 *nvpp = NULL;
 8677                         }
 8678                         error = ENXIO;
 8679                 }
 8680         } else
 8681                 vput(nd.ni_vp);
 8682 
 8683         /*
 8684          * When dspathp != NULL and curdspathp == NULL, this is a recovery
 8685          * and is only allowed if there is a 0.0.0.0 IP address entry.
 8686          * When curdspathp != NULL, the ippos will be set to that entry.
 8687          */
 8688         if (error == 0 && dspathp != NULL && ippos == -1) {
 8689                 if (nvpp != NULL && *nvpp != NULL) {
 8690                         vput(*nvpp);
 8691                         *nvpp = NULL;
 8692                 }
 8693                 error = ENXIO;
 8694         }
 8695         if (error == 0) {
 8696                 *vpp = vp;
 8697 
 8698                 pf = (struct pnfsdsfile *)buf;
 8699                 if (ippos == -1) {
 8700                         /* If no zeroip pnfsdsfile, add one. */
 8701                         ippos = *buflenp / sizeof(*pf);
 8702                         *buflenp += sizeof(*pf);
 8703                         pf += ippos;
 8704                         pf->dsf_dir = dsdir;
 8705                         strlcpy(pf->dsf_filename, fname,
 8706                             sizeof(pf->dsf_filename));
 8707                 } else
 8708                         pf += ippos;
 8709                 *pfp = pf;
 8710         } else
 8711                 vput(vp);
 8712         return (error);
 8713 }
 8714 
 8715 /*
 8716  * Search for a matching pnfsd mirror device structure, base on the nmp arg.
 8717  * Return one if found, NULL otherwise.
 8718  */
 8719 static struct nfsdevice *
 8720 nfsrv_findmirroredds(struct nfsmount *nmp)
 8721 {
 8722         struct nfsdevice *ds, *fndds;
 8723         int fndmirror;
 8724 
 8725         mtx_assert(NFSDDSMUTEXPTR, MA_OWNED);
 8726         /*
 8727          * Search the DS server list for a match with nmp.
 8728          * Remove the DS entry if found and there is a mirror.
 8729          */
 8730         fndds = NULL;
 8731         fndmirror = 0;
 8732         if (nfsrv_devidcnt == 0)
 8733                 return (fndds);
 8734         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 8735                 if (ds->nfsdev_nmp == nmp) {
 8736                         NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n");
 8737                         fndds = ds;
 8738                         break;
 8739                 }
 8740         }
 8741         if (fndds == NULL)
 8742                 return (fndds);
 8743         if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
 8744                 fndmirror = 1;
 8745         else if (fndds->nfsdev_mdsisset != 0) {
 8746                 /* For the fsid is set case, search for a mirror. */
 8747                 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 8748                         if (ds != fndds && ds->nfsdev_nmp != NULL &&
 8749                             ds->nfsdev_mdsisset != 0 &&
 8750                             fsidcmp(&ds->nfsdev_mdsfsid,
 8751                             &fndds->nfsdev_mdsfsid) == 0) {
 8752                                 fndmirror = 1;
 8753                                 break;
 8754                         }
 8755                 }
 8756         }
 8757         if (fndmirror == 0) {
 8758                 NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n");
 8759                 return (NULL);
 8760         }
 8761         return (fndds);
 8762 }
 8763 

Cache object: 767fbd199c6c1baa13ddca70b2628a0b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.