The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/nfsserver/nfs_nfsdstate.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2009 Rick Macklem, University of Guelph
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  *
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 #include "opt_inet.h"
   34 #include "opt_inet6.h"
   35 #include <sys/extattr.h>
   36 #include <fs/nfs/nfsport.h>
   37 
   38 struct nfsrv_stablefirst nfsrv_stablefirst;
   39 int nfsrv_issuedelegs = 0;
   40 int nfsrv_dolocallocks = 0;
   41 struct nfsv4lock nfsv4rootfs_lock;
   42 time_t nfsdev_time = 0;
   43 int nfsrv_layouthashsize;
   44 volatile int nfsrv_layoutcnt = 0;
   45 extern uint32_t nfs_srvmaxio;
   46 
   47 extern int newnfs_numnfsd;
   48 extern struct nfsstatsv1 nfsstatsv1;
   49 extern int nfsrv_lease;
   50 extern struct timeval nfsboottime;
   51 extern u_int32_t newnfs_true, newnfs_false;
   52 extern struct mtx nfsrv_dslock_mtx;
   53 extern struct mtx nfsrv_recalllock_mtx;
   54 extern struct mtx nfsrv_dontlistlock_mtx;
   55 extern int nfsd_debuglevel;
   56 extern u_int nfsrv_dsdirsize;
   57 extern struct nfsdevicehead nfsrv_devidhead;
   58 extern int nfsrv_doflexfile;
   59 extern int nfsrv_maxpnfsmirror;
   60 NFSV4ROOTLOCKMUTEX;
   61 NFSSTATESPINLOCK;
   62 extern struct nfsdontlisthead nfsrv_dontlisthead;
   63 extern volatile int nfsrv_devidcnt;
   64 extern struct nfslayouthead nfsrv_recalllisthead;
   65 extern char *nfsrv_zeropnfsdat;
   66 
   67 SYSCTL_DECL(_vfs_nfsd);
   68 int     nfsrv_statehashsize = NFSSTATEHASHSIZE;
   69 SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
   70     &nfsrv_statehashsize, 0,
   71     "Size of state hash table set via loader.conf");
   72 
   73 int     nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
   74 SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
   75     &nfsrv_clienthashsize, 0,
   76     "Size of client hash table set via loader.conf");
   77 
   78 int     nfsrv_lockhashsize = NFSLOCKHASHSIZE;
   79 SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
   80     &nfsrv_lockhashsize, 0,
   81     "Size of file handle hash table set via loader.conf");
   82 
   83 int     nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
   84 SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
   85     &nfsrv_sessionhashsize, 0,
   86     "Size of session hash table set via loader.conf");
   87 
   88 int     nfsrv_layouthighwater = NFSLAYOUTHIGHWATER;
   89 SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN,
   90     &nfsrv_layouthighwater, 0,
   91     "High water mark for number of layouts set via loader.conf");
   92 
   93 static int      nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
   94 SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
   95     &nfsrv_v4statelimit, 0,
   96     "High water limit for NFSv4 opens+locks+delegations");
   97 
   98 static int      nfsrv_writedelegifpos = 0;
   99 SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
  100     &nfsrv_writedelegifpos, 0,
  101     "Issue a write delegation for read opens if possible");
  102 
  103 static int      nfsrv_allowreadforwriteopen = 1;
  104 SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW,
  105     &nfsrv_allowreadforwriteopen, 0,
  106     "Allow Reads to be done with Write Access StateIDs");
  107 
  108 int     nfsrv_pnfsatime = 0;
  109 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW,
  110     &nfsrv_pnfsatime, 0,
  111     "For pNFS service, do Getattr ops to keep atime up-to-date");
  112 
  113 int     nfsrv_flexlinuxhack = 0;
  114 SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
  115     &nfsrv_flexlinuxhack, 0,
  116     "For Linux clients, hack around Flex File Layout bug");
  117 
  118 /*
  119  * Hash lists for nfs V4.
  120  */
  121 struct nfsclienthashhead        *nfsclienthash;
  122 struct nfslockhashhead          *nfslockhash;
  123 struct nfssessionhash           *nfssessionhash;
  124 struct nfslayouthash            *nfslayouthash;
  125 volatile int nfsrv_dontlistlen = 0;
  126 
  127 static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
  128 static time_t nfsrvboottime;
  129 static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
  130 static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
  131 static int nfsrv_nogsscallback = 0;
  132 static volatile int nfsrv_writedelegcnt = 0;
  133 static int nfsrv_faildscnt;
  134 
  135 /* local functions */
  136 static void nfsrv_dumpaclient(struct nfsclient *clp,
  137     struct nfsd_dumpclients *dumpp);
  138 static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
  139     NFSPROC_T *p);
  140 static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
  141     NFSPROC_T *p);
  142 static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
  143     NFSPROC_T *p);
  144 static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
  145     int cansleep, NFSPROC_T *p);
  146 static void nfsrv_freenfslock(struct nfslock *lop);
  147 static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
  148 static void nfsrv_freedeleg(struct nfsstate *);
  149 static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, 
  150     u_int32_t flags, struct nfsstate **stpp);
  151 static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
  152     struct nfsstate **stpp);
  153 static int nfsrv_getlockfh(vnode_t vp, u_short flags,
  154     struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
  155 static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
  156     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
  157 static void nfsrv_insertlock(struct nfslock *new_lop,
  158     struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
  159 static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
  160     struct nfslock **other_lopp, struct nfslockfile *lfp);
  161 static int nfsrv_getipnumber(u_char *cp);
  162 static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
  163     nfsv4stateid_t *stateidp, int specialid);
  164 static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
  165     u_int32_t flags);
  166 static int nfsrv_docallback(struct nfsclient *clp, int procnum,
  167     nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
  168     struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p);
  169 static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
  170     uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
  171     int *slotposp);
  172 static u_int32_t nfsrv_nextclientindex(void);
  173 static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
  174 static void nfsrv_markstable(struct nfsclient *clp);
  175 static void nfsrv_markreclaim(struct nfsclient *clp);
  176 static int nfsrv_checkstable(struct nfsclient *clp);
  177 static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct 
  178     vnode *vp, NFSPROC_T *p);
  179 static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
  180     NFSPROC_T *p, vnode_t vp);
  181 static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
  182     struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
  183 static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
  184     struct nfsclient *clp);
  185 static time_t nfsrv_leaseexpiry(void);
  186 static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
  187 static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
  188     struct nfsstate *stp, struct nfsrvcache *op);
  189 static int nfsrv_nootherstate(struct nfsstate *stp);
  190 static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
  191     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
  192 static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
  193     uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
  194 static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
  195     int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
  196     NFSPROC_T *p);
  197 static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
  198     NFSPROC_T *p);
  199 static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
  200     uint64_t first, uint64_t end);
  201 static void nfsrv_locklf(struct nfslockfile *lfp);
  202 static void nfsrv_unlocklf(struct nfslockfile *lfp);
  203 static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
  204 static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid);
  205 static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
  206     int dont_replycache, struct nfsdsession **sepp, int *slotposp);
  207 static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
  208 static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
  209     nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p);
  210 static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp);
  211 static void nfsrv_freelayoutlist(nfsquad_t clientid);
  212 static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype,
  213     int iomode);
  214 static void nfsrv_freealllayouts(void);
  215 static void nfsrv_freedevid(struct nfsdevice *ds);
  216 static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
  217     struct nfsdevice **dsp);
  218 static void nfsrv_deleteds(struct nfsdevice *fndds);
  219 static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost);
  220 static void nfsrv_freealldevids(void);
  221 static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp,
  222     int maxcnt, NFSPROC_T *p);
  223 static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp,
  224     fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype,
  225     NFSPROC_T *p);
  226 static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
  227     NFSPROC_T *, struct nfslayout **lypp);
  228 static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt);
  229 static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode,
  230     fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
  231 static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode,
  232     int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
  233 static int nfsrv_dontlayout(fhandle_t *fhp);
  234 static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
  235     vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
  236     vnode_t *tvpp);
  237 static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
  238 
  239 /*
  240  * Scan the client list for a match and either return the current one,
  241  * create a new entry or return an error.
  242  * If returning a non-error, the clp structure must either be linked into
  243  * the client list or free'd.
  244  */
  245 int
  246 nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
  247     nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
  248 {
  249         struct nfsclient *clp = NULL, *new_clp = *new_clpp;
  250         int i, error = 0, ret;
  251         struct nfsstate *stp, *tstp;
  252 #ifdef INET
  253         struct sockaddr_in *sin, *rin;
  254 #endif
  255 #ifdef INET6
  256         struct sockaddr_in6 *sin6, *rin6;
  257 #endif
  258         struct nfsdsession *sep, *nsep;
  259         int zapit = 0, gotit, hasstate = 0, igotlock;
  260         static u_int64_t confirm_index = 0;
  261 
  262         /*
  263          * Check for state resource limit exceeded.
  264          */
  265         if (nfsrv_openpluslock > nfsrv_v4statelimit) {
  266                 error = NFSERR_RESOURCE;
  267                 goto out;
  268         }
  269 
  270         if (nfsrv_issuedelegs == 0 ||
  271             ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
  272                 /*
  273                  * Don't do callbacks when delegations are disabled or
  274                  * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
  275                  * If establishing a callback connection is attempted
  276                  * when a firewall is blocking the callback path, the
  277                  * server may wait too long for the connect attempt to
  278                  * succeed during the Open. Some clients, such as Linux,
  279                  * may timeout and give up on the Open before the server
  280                  * replies. Also, since AUTH_GSS callbacks are not
  281                  * yet interoperability tested, they might cause the
  282                  * server to crap out, if they get past the Init call to
  283                  * the client.
  284                  */
  285                 new_clp->lc_program = 0;
  286 
  287         /* Lock out other nfsd threads */
  288         NFSLOCKV4ROOTMUTEX();
  289         nfsv4_relref(&nfsv4rootfs_lock);
  290         do {
  291                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  292                     NFSV4ROOTLOCKMUTEXPTR, NULL);
  293         } while (!igotlock);
  294         NFSUNLOCKV4ROOTMUTEX();
  295 
  296         /*
  297          * Search for a match in the client list.
  298          */
  299         gotit = i = 0;
  300         while (i < nfsrv_clienthashsize && !gotit) {
  301             LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
  302                 if (new_clp->lc_idlen == clp->lc_idlen &&
  303                     !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
  304                         gotit = 1;
  305                         break;
  306                 }
  307             }
  308             if (gotit == 0)
  309                 i++;
  310         }
  311         if (!gotit ||
  312             (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
  313                 if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
  314                         /*
  315                          * For NFSv4.1, if confirmp->lval[1] is non-zero, the
  316                          * client is trying to update a confirmed clientid.
  317                          */
  318                         NFSLOCKV4ROOTMUTEX();
  319                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  320                         NFSUNLOCKV4ROOTMUTEX();
  321                         confirmp->lval[1] = 0;
  322                         error = NFSERR_NOENT;
  323                         goto out;
  324                 }
  325                 /*
  326                  * Get rid of the old one.
  327                  */
  328                 if (i != nfsrv_clienthashsize) {
  329                         LIST_REMOVE(clp, lc_hash);
  330                         nfsrv_cleanclient(clp, p);
  331                         nfsrv_freedeleglist(&clp->lc_deleg);
  332                         nfsrv_freedeleglist(&clp->lc_olddeleg);
  333                         zapit = 1;
  334                 }
  335                 /*
  336                  * Add it after assigning a client id to it.
  337                  */
  338                 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
  339                 if ((nd->nd_flag & ND_NFSV41) != 0)
  340                         new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
  341                             ++confirm_index;
  342                 else
  343                         confirmp->qval = new_clp->lc_confirm.qval =
  344                             ++confirm_index;
  345                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
  346                     (u_int32_t)nfsrvboottime;
  347                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
  348                     nfsrv_nextclientindex();
  349                 new_clp->lc_stateindex = 0;
  350                 new_clp->lc_statemaxindex = 0;
  351                 new_clp->lc_cbref = 0;
  352                 new_clp->lc_expiry = nfsrv_leaseexpiry();
  353                 LIST_INIT(&new_clp->lc_open);
  354                 LIST_INIT(&new_clp->lc_deleg);
  355                 LIST_INIT(&new_clp->lc_olddeleg);
  356                 LIST_INIT(&new_clp->lc_session);
  357                 for (i = 0; i < nfsrv_statehashsize; i++)
  358                         LIST_INIT(&new_clp->lc_stateid[i]);
  359                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
  360                     lc_hash);
  361                 nfsstatsv1.srvclients++;
  362                 nfsrv_openpluslock++;
  363                 nfsrv_clients++;
  364                 NFSLOCKV4ROOTMUTEX();
  365                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  366                 NFSUNLOCKV4ROOTMUTEX();
  367                 if (zapit)
  368                         nfsrv_zapclient(clp, p);
  369                 *new_clpp = NULL;
  370                 goto out;
  371         }
  372 
  373         /*
  374          * Now, handle the cases where the id is already issued.
  375          */
  376         if (nfsrv_notsamecredname(nd, clp)) {
  377             /*
  378              * Check to see if there is expired state that should go away.
  379              */
  380             if (clp->lc_expiry < NFSD_MONOSEC &&
  381                 (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
  382                 nfsrv_cleanclient(clp, p);
  383                 nfsrv_freedeleglist(&clp->lc_deleg);
  384             }
  385 
  386             /*
  387              * If there is outstanding state, then reply NFSERR_CLIDINUSE per
  388              * RFC3530 Sec. 8.1.2 last para.
  389              */
  390             if (!LIST_EMPTY(&clp->lc_deleg)) {
  391                 hasstate = 1;
  392             } else if (LIST_EMPTY(&clp->lc_open)) {
  393                 hasstate = 0;
  394             } else {
  395                 hasstate = 0;
  396                 /* Look for an Open on the OpenOwner */
  397                 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
  398                     if (!LIST_EMPTY(&stp->ls_open)) {
  399                         hasstate = 1;
  400                         break;
  401                     }
  402                 }
  403             }
  404             if (hasstate) {
  405                 /*
  406                  * If the uid doesn't match, return NFSERR_CLIDINUSE after
  407                  * filling out the correct ipaddr and portnum.
  408                  */
  409                 switch (clp->lc_req.nr_nam->sa_family) {
  410 #ifdef INET
  411                 case AF_INET:
  412                         sin = (struct sockaddr_in *)new_clp->lc_req.nr_nam;
  413                         rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
  414                         sin->sin_addr.s_addr = rin->sin_addr.s_addr;
  415                         sin->sin_port = rin->sin_port;
  416                         break;
  417 #endif
  418 #ifdef INET6
  419                 case AF_INET6:
  420                         sin6 = (struct sockaddr_in6 *)new_clp->lc_req.nr_nam;
  421                         rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
  422                         sin6->sin6_addr = rin6->sin6_addr;
  423                         sin6->sin6_port = rin6->sin6_port;
  424                         break;
  425 #endif
  426                 }
  427                 NFSLOCKV4ROOTMUTEX();
  428                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  429                 NFSUNLOCKV4ROOTMUTEX();
  430                 error = NFSERR_CLIDINUSE;
  431                 goto out;
  432             }
  433         }
  434 
  435         if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
  436                 /*
  437                  * If the verifier has changed, the client has rebooted
  438                  * and a new client id is issued. The old state info
  439                  * can be thrown away once the SETCLIENTID_CONFIRM occurs.
  440                  */
  441                 LIST_REMOVE(clp, lc_hash);
  442 
  443                 /* Get rid of all sessions on this clientid. */
  444                 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) {
  445                         ret = nfsrv_freesession(sep, NULL);
  446                         if (ret != 0)
  447                                 printf("nfsrv_setclient: verifier changed free"
  448                                     " session failed=%d\n", ret);
  449                 }
  450 
  451                 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
  452                 if ((nd->nd_flag & ND_NFSV41) != 0)
  453                         new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
  454                             ++confirm_index;
  455                 else
  456                         confirmp->qval = new_clp->lc_confirm.qval =
  457                             ++confirm_index;
  458                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
  459                     nfsrvboottime;
  460                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
  461                     nfsrv_nextclientindex();
  462                 new_clp->lc_stateindex = 0;
  463                 new_clp->lc_statemaxindex = 0;
  464                 new_clp->lc_cbref = 0;
  465                 new_clp->lc_expiry = nfsrv_leaseexpiry();
  466 
  467                 /*
  468                  * Save the state until confirmed.
  469                  */
  470                 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
  471                 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
  472                         tstp->ls_clp = new_clp;
  473                 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
  474                 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
  475                         tstp->ls_clp = new_clp;
  476                 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
  477                     ls_list);
  478                 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
  479                         tstp->ls_clp = new_clp;
  480                 for (i = 0; i < nfsrv_statehashsize; i++) {
  481                         LIST_NEWHEAD(&new_clp->lc_stateid[i],
  482                             &clp->lc_stateid[i], ls_hash);
  483                         LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
  484                                 tstp->ls_clp = new_clp;
  485                 }
  486                 LIST_INIT(&new_clp->lc_session);
  487                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
  488                     lc_hash);
  489                 nfsstatsv1.srvclients++;
  490                 nfsrv_openpluslock++;
  491                 nfsrv_clients++;
  492                 NFSLOCKV4ROOTMUTEX();
  493                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  494                 NFSUNLOCKV4ROOTMUTEX();
  495 
  496                 /*
  497                  * Must wait until any outstanding callback on the old clp
  498                  * completes.
  499                  */
  500                 NFSLOCKSTATE();
  501                 while (clp->lc_cbref) {
  502                         clp->lc_flags |= LCL_WAKEUPWANTED;
  503                         (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
  504                             "nfsd clp", 10 * hz);
  505                 }
  506                 NFSUNLOCKSTATE();
  507                 nfsrv_zapclient(clp, p);
  508                 *new_clpp = NULL;
  509                 goto out;
  510         }
  511 
  512         /* For NFSv4.1, mark that we found a confirmed clientid. */
  513         if ((nd->nd_flag & ND_NFSV41) != 0) {
  514                 clientidp->lval[0] = clp->lc_clientid.lval[0];
  515                 clientidp->lval[1] = clp->lc_clientid.lval[1];
  516                 confirmp->lval[0] = 0;  /* Ignored by client */
  517                 confirmp->lval[1] = 1;
  518         } else {
  519                 /*
  520                  * id and verifier match, so update the net address info
  521                  * and get rid of any existing callback authentication
  522                  * handle, so a new one will be acquired.
  523                  */
  524                 LIST_REMOVE(clp, lc_hash);
  525                 new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
  526                 new_clp->lc_expiry = nfsrv_leaseexpiry();
  527                 confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
  528                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
  529                     clp->lc_clientid.lval[0];
  530                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
  531                     clp->lc_clientid.lval[1];
  532                 new_clp->lc_delegtime = clp->lc_delegtime;
  533                 new_clp->lc_stateindex = clp->lc_stateindex;
  534                 new_clp->lc_statemaxindex = clp->lc_statemaxindex;
  535                 new_clp->lc_cbref = 0;
  536                 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
  537                 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
  538                         tstp->ls_clp = new_clp;
  539                 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
  540                 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
  541                         tstp->ls_clp = new_clp;
  542                 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
  543                 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
  544                         tstp->ls_clp = new_clp;
  545                 for (i = 0; i < nfsrv_statehashsize; i++) {
  546                         LIST_NEWHEAD(&new_clp->lc_stateid[i],
  547                             &clp->lc_stateid[i], ls_hash);
  548                         LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
  549                                 tstp->ls_clp = new_clp;
  550                 }
  551                 LIST_INIT(&new_clp->lc_session);
  552                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
  553                     lc_hash);
  554                 nfsstatsv1.srvclients++;
  555                 nfsrv_openpluslock++;
  556                 nfsrv_clients++;
  557         }
  558         NFSLOCKV4ROOTMUTEX();
  559         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  560         NFSUNLOCKV4ROOTMUTEX();
  561 
  562         if ((nd->nd_flag & ND_NFSV41) == 0) {
  563                 /*
  564                  * Must wait until any outstanding callback on the old clp
  565                  * completes.
  566                  */
  567                 NFSLOCKSTATE();
  568                 while (clp->lc_cbref) {
  569                         clp->lc_flags |= LCL_WAKEUPWANTED;
  570                         (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
  571                             "nfsdclp", 10 * hz);
  572                 }
  573                 NFSUNLOCKSTATE();
  574                 nfsrv_zapclient(clp, p);
  575                 *new_clpp = NULL;
  576         }
  577 
  578 out:
  579         NFSEXITCODE2(error, nd);
  580         return (error);
  581 }
  582 
  583 /*
  584  * Check to see if the client id exists and optionally confirm it.
  585  */
  586 int
  587 nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
  588     struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
  589     struct nfsrv_descript *nd, NFSPROC_T *p)
  590 {
  591         struct nfsclient *clp;
  592         struct nfsstate *stp;
  593         int i;
  594         struct nfsclienthashhead *hp;
  595         int error = 0, igotlock, doneok;
  596         struct nfssessionhash *shp;
  597         struct nfsdsession *sep;
  598         uint64_t sessid[2];
  599         static uint64_t next_sess = 0;
  600 
  601         if (clpp)
  602                 *clpp = NULL;
  603         if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
  604             opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) {
  605                 error = NFSERR_STALECLIENTID;
  606                 goto out;
  607         }
  608 
  609         /*
  610          * If called with opflags == CLOPS_RENEW, the State Lock is
  611          * already held. Otherwise, we need to get either that or,
  612          * for the case of Confirm, lock out the nfsd threads.
  613          */
  614         if (opflags & CLOPS_CONFIRM) {
  615                 NFSLOCKV4ROOTMUTEX();
  616                 nfsv4_relref(&nfsv4rootfs_lock);
  617                 do {
  618                         igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  619                             NFSV4ROOTLOCKMUTEXPTR, NULL);
  620                 } while (!igotlock);
  621                 /*
  622                  * Create a new sessionid here, since we need to do it where
  623                  * there is a mutex held to serialize update of next_sess.
  624                  */
  625                 if ((nd->nd_flag & ND_NFSV41) != 0) {
  626                         sessid[0] = ++next_sess;
  627                         sessid[1] = clientid.qval;
  628                 }
  629                 NFSUNLOCKV4ROOTMUTEX();
  630         } else if (opflags != CLOPS_RENEW) {
  631                 NFSLOCKSTATE();
  632         }
  633 
  634         /* For NFSv4.1, the clp is acquired from the associated session. */
  635         if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
  636             opflags == CLOPS_RENEW) {
  637                 clp = NULL;
  638                 if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
  639                         shp = NFSSESSIONHASH(nd->nd_sessionid);
  640                         NFSLOCKSESSION(shp);
  641                         sep = nfsrv_findsession(nd->nd_sessionid);
  642                         if (sep != NULL)
  643                                 clp = sep->sess_clp;
  644                         NFSUNLOCKSESSION(shp);
  645                 }
  646         } else {
  647                 hp = NFSCLIENTHASH(clientid);
  648                 LIST_FOREACH(clp, hp, lc_hash) {
  649                         if (clp->lc_clientid.lval[1] == clientid.lval[1])
  650                                 break;
  651                 }
  652         }
  653         if (clp == NULL) {
  654                 if (opflags & CLOPS_CONFIRM)
  655                         error = NFSERR_STALECLIENTID;
  656                 else
  657                         error = NFSERR_EXPIRED;
  658         } else if (clp->lc_flags & LCL_ADMINREVOKED) {
  659                 /*
  660                  * If marked admin revoked, just return the error.
  661                  */
  662                 error = NFSERR_ADMINREVOKED;
  663         }
  664         if (error) {
  665                 if (opflags & CLOPS_CONFIRM) {
  666                         NFSLOCKV4ROOTMUTEX();
  667                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  668                         NFSUNLOCKV4ROOTMUTEX();
  669                 } else if (opflags != CLOPS_RENEW) {
  670                         NFSUNLOCKSTATE();
  671                 }
  672                 goto out;
  673         }
  674 
  675         /*
  676          * Perform any operations specified by the opflags.
  677          */
  678         if (opflags & CLOPS_CONFIRM) {
  679                 if (((nd->nd_flag & ND_NFSV41) != 0 &&
  680                      clp->lc_confirm.lval[0] != confirm.lval[0]) ||
  681                     ((nd->nd_flag & ND_NFSV41) == 0 &&
  682                      clp->lc_confirm.qval != confirm.qval))
  683                         error = NFSERR_STALECLIENTID;
  684                 else if (nfsrv_notsamecredname(nd, clp))
  685                         error = NFSERR_CLIDINUSE;
  686 
  687                 if (!error) {
  688                     if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
  689                         LCL_NEEDSCONFIRM) {
  690                         /*
  691                          * Hang onto the delegations (as old delegations)
  692                          * for an Open with CLAIM_DELEGATE_PREV unless in
  693                          * grace, but get rid of the rest of the state.
  694                          */
  695                         nfsrv_cleanclient(clp, p);
  696                         nfsrv_freedeleglist(&clp->lc_olddeleg);
  697                         if (nfsrv_checkgrace(nd, clp, 0)) {
  698                             /* In grace, so just delete delegations */
  699                             nfsrv_freedeleglist(&clp->lc_deleg);
  700                         } else {
  701                             LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
  702                                 stp->ls_flags |= NFSLCK_OLDDELEG;
  703                             clp->lc_delegtime = NFSD_MONOSEC +
  704                                 nfsrv_lease + NFSRV_LEASEDELTA;
  705                             LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
  706                                 ls_list);
  707                         }
  708                         if ((nd->nd_flag & ND_NFSV41) != 0)
  709                             clp->lc_program = cbprogram;
  710                     }
  711                     clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
  712                     if (clp->lc_program)
  713                         clp->lc_flags |= LCL_NEEDSCBNULL;
  714                     /* For NFSv4.1, link the session onto the client. */
  715                     if (nsep != NULL) {
  716                         /* Hold a reference on the xprt for a backchannel. */
  717                         if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
  718                             != 0) {
  719                             if (clp->lc_req.nr_client == NULL)
  720                                 clp->lc_req.nr_client = (struct __rpc_client *)
  721                                     clnt_bck_create(nd->nd_xprt->xp_socket,
  722                                     cbprogram, NFSV4_CBVERS);
  723                             if (clp->lc_req.nr_client != NULL) {
  724                                 SVC_ACQUIRE(nd->nd_xprt);
  725                                 CLNT_ACQUIRE(clp->lc_req.nr_client);
  726                                 nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
  727                                 /* Disable idle timeout. */
  728                                 nd->nd_xprt->xp_idletimeout = 0;
  729                                 nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
  730                             } else
  731                                 nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
  732                         }
  733                         NFSBCOPY(sessid, nsep->sess_sessionid,
  734                             NFSX_V4SESSIONID);
  735                         NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
  736                             NFSX_V4SESSIONID);
  737                         shp = NFSSESSIONHASH(nsep->sess_sessionid);
  738                         NFSLOCKSTATE();
  739                         NFSLOCKSESSION(shp);
  740                         LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
  741                         LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
  742                         nsep->sess_clp = clp;
  743                         NFSUNLOCKSESSION(shp);
  744                         NFSUNLOCKSTATE();
  745                     }
  746                 }
  747         } else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
  748                 error = NFSERR_EXPIRED;
  749         }
  750 
  751         /*
  752          * If called by the Renew Op, we must check the principal.
  753          */
  754         if (!error && (opflags & CLOPS_RENEWOP)) {
  755             if (nfsrv_notsamecredname(nd, clp)) {
  756                 doneok = 0;
  757                 for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
  758                     LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
  759                         if ((stp->ls_flags & NFSLCK_OPEN) &&
  760                             stp->ls_uid == nd->nd_cred->cr_uid) {
  761                                 doneok = 1;
  762                                 break;
  763                         }
  764                     }
  765                 }
  766                 if (!doneok)
  767                         error = NFSERR_ACCES;
  768             }
  769             if (!error && (clp->lc_flags & LCL_CBDOWN))
  770                 error = NFSERR_CBPATHDOWN;
  771         }
  772         if ((!error || error == NFSERR_CBPATHDOWN) &&
  773              (opflags & CLOPS_RENEW)) {
  774                 clp->lc_expiry = nfsrv_leaseexpiry();
  775         }
  776         if (opflags & CLOPS_CONFIRM) {
  777                 NFSLOCKV4ROOTMUTEX();
  778                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  779                 NFSUNLOCKV4ROOTMUTEX();
  780         } else if (opflags != CLOPS_RENEW) {
  781                 NFSUNLOCKSTATE();
  782         }
  783         if (clpp)
  784                 *clpp = clp;
  785 
  786 out:
  787         NFSEXITCODE2(error, nd);
  788         return (error);
  789 }
  790 
  791 /*
  792  * Perform the NFSv4.1 destroy clientid.
  793  */
  794 int
  795 nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p)
  796 {
  797         struct nfsclient *clp;
  798         struct nfsclienthashhead *hp;
  799         int error = 0, i, igotlock;
  800 
  801         if (nfsrvboottime != clientid.lval[0]) {
  802                 error = NFSERR_STALECLIENTID;
  803                 goto out;
  804         }
  805 
  806         /* Lock out other nfsd threads */
  807         NFSLOCKV4ROOTMUTEX();
  808         nfsv4_relref(&nfsv4rootfs_lock);
  809         do {
  810                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  811                     NFSV4ROOTLOCKMUTEXPTR, NULL);
  812         } while (igotlock == 0);
  813         NFSUNLOCKV4ROOTMUTEX();
  814 
  815         hp = NFSCLIENTHASH(clientid);
  816         LIST_FOREACH(clp, hp, lc_hash) {
  817                 if (clp->lc_clientid.lval[1] == clientid.lval[1])
  818                         break;
  819         }
  820         if (clp == NULL) {
  821                 NFSLOCKV4ROOTMUTEX();
  822                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  823                 NFSUNLOCKV4ROOTMUTEX();
  824                 /* Just return ok, since it is gone. */
  825                 goto out;
  826         }
  827 
  828         /*
  829          * Free up all layouts on the clientid.  Should the client return the
  830          * layouts?
  831          */
  832         nfsrv_freelayoutlist(clientid);
  833 
  834         /* Scan for state on the clientid. */
  835         for (i = 0; i < nfsrv_statehashsize; i++)
  836                 if (!LIST_EMPTY(&clp->lc_stateid[i])) {
  837                         NFSLOCKV4ROOTMUTEX();
  838                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  839                         NFSUNLOCKV4ROOTMUTEX();
  840                         error = NFSERR_CLIENTIDBUSY;
  841                         goto out;
  842                 }
  843         if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
  844                 NFSLOCKV4ROOTMUTEX();
  845                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  846                 NFSUNLOCKV4ROOTMUTEX();
  847                 error = NFSERR_CLIENTIDBUSY;
  848                 goto out;
  849         }
  850 
  851         /* Destroy the clientid and return ok. */
  852         nfsrv_cleanclient(clp, p);
  853         nfsrv_freedeleglist(&clp->lc_deleg);
  854         nfsrv_freedeleglist(&clp->lc_olddeleg);
  855         LIST_REMOVE(clp, lc_hash);
  856         NFSLOCKV4ROOTMUTEX();
  857         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  858         NFSUNLOCKV4ROOTMUTEX();
  859         nfsrv_zapclient(clp, p);
  860 out:
  861         NFSEXITCODE2(error, nd);
  862         return (error);
  863 }
  864 
  865 /*
  866  * Called from the new nfssvc syscall to admin revoke a clientid.
  867  * Returns 0 for success, error otherwise.
  868  */
  869 int
  870 nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
  871 {
  872         struct nfsclient *clp = NULL;
  873         int i, error = 0;
  874         int gotit, igotlock;
  875 
  876         /*
  877          * First, lock out the nfsd so that state won't change while the
  878          * revocation record is being written to the stable storage restart
  879          * file.
  880          */
  881         NFSLOCKV4ROOTMUTEX();
  882         do {
  883                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  884                     NFSV4ROOTLOCKMUTEXPTR, NULL);
  885         } while (!igotlock);
  886         NFSUNLOCKV4ROOTMUTEX();
  887 
  888         /*
  889          * Search for a match in the client list.
  890          */
  891         gotit = i = 0;
  892         while (i < nfsrv_clienthashsize && !gotit) {
  893             LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
  894                 if (revokep->nclid_idlen == clp->lc_idlen &&
  895                     !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
  896                         gotit = 1;
  897                         break;
  898                 }
  899             }
  900             i++;
  901         }
  902         if (!gotit) {
  903                 NFSLOCKV4ROOTMUTEX();
  904                 nfsv4_unlock(&nfsv4rootfs_lock, 0);
  905                 NFSUNLOCKV4ROOTMUTEX();
  906                 error = EPERM;
  907                 goto out;
  908         }
  909 
  910         /*
  911          * Now, write out the revocation record
  912          */
  913         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
  914         nfsrv_backupstable();
  915 
  916         /*
  917          * and clear out the state, marking the clientid revoked.
  918          */
  919         clp->lc_flags &= ~LCL_CALLBACKSON;
  920         clp->lc_flags |= LCL_ADMINREVOKED;
  921         nfsrv_cleanclient(clp, p);
  922         nfsrv_freedeleglist(&clp->lc_deleg);
  923         nfsrv_freedeleglist(&clp->lc_olddeleg);
  924         NFSLOCKV4ROOTMUTEX();
  925         nfsv4_unlock(&nfsv4rootfs_lock, 0);
  926         NFSUNLOCKV4ROOTMUTEX();
  927 
  928 out:
  929         NFSEXITCODE(error);
  930         return (error);
  931 }
  932 
  933 /*
  934  * Dump out stats for all clients. Called from nfssvc(2), that is used
  935  * nfsstatsv1.
  936  */
  937 void
  938 nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
  939 {
  940         struct nfsclient *clp;
  941         int i = 0, cnt = 0;
  942 
  943         /*
  944          * First, get a reference on the nfsv4rootfs_lock so that an
  945          * exclusive lock cannot be acquired while dumping the clients.
  946          */
  947         NFSLOCKV4ROOTMUTEX();
  948         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
  949         NFSUNLOCKV4ROOTMUTEX();
  950         NFSLOCKSTATE();
  951         /*
  952          * Rattle through the client lists until done.
  953          */
  954         while (i < nfsrv_clienthashsize && cnt < maxcnt) {
  955             clp = LIST_FIRST(&nfsclienthash[i]);
  956             while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
  957                 nfsrv_dumpaclient(clp, &dumpp[cnt]);
  958                 cnt++;
  959                 clp = LIST_NEXT(clp, lc_hash);
  960             }
  961             i++;
  962         }
  963         if (cnt < maxcnt)
  964             dumpp[cnt].ndcl_clid.nclid_idlen = 0;
  965         NFSUNLOCKSTATE();
  966         NFSLOCKV4ROOTMUTEX();
  967         nfsv4_relref(&nfsv4rootfs_lock);
  968         NFSUNLOCKV4ROOTMUTEX();
  969 }
  970 
  971 /*
  972  * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
  973  */
  974 static void
  975 nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
  976 {
  977         struct nfsstate *stp, *openstp, *lckownstp;
  978         struct nfslock *lop;
  979         sa_family_t af;
  980 #ifdef INET
  981         struct sockaddr_in *rin;
  982 #endif
  983 #ifdef INET6
  984         struct sockaddr_in6 *rin6;
  985 #endif
  986 
  987         dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
  988         dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
  989         dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
  990         dumpp->ndcl_flags = clp->lc_flags;
  991         dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
  992         NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
  993         af = clp->lc_req.nr_nam->sa_family;
  994         dumpp->ndcl_addrfam = af;
  995         switch (af) {
  996 #ifdef INET
  997         case AF_INET:
  998                 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
  999                 dumpp->ndcl_cbaddr.sin_addr = rin->sin_addr;
 1000                 break;
 1001 #endif
 1002 #ifdef INET6
 1003         case AF_INET6:
 1004                 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 1005                 dumpp->ndcl_cbaddr.sin6_addr = rin6->sin6_addr;
 1006                 break;
 1007 #endif
 1008         }
 1009 
 1010         /*
 1011          * Now, scan the state lists and total up the opens and locks.
 1012          */
 1013         LIST_FOREACH(stp, &clp->lc_open, ls_list) {
 1014             dumpp->ndcl_nopenowners++;
 1015             LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
 1016                 dumpp->ndcl_nopens++;
 1017                 LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
 1018                     dumpp->ndcl_nlockowners++;
 1019                     LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
 1020                         dumpp->ndcl_nlocks++;
 1021                     }
 1022                 }
 1023             }
 1024         }
 1025 
 1026         /*
 1027          * and the delegation lists.
 1028          */
 1029         LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
 1030             dumpp->ndcl_ndelegs++;
 1031         }
 1032         LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
 1033             dumpp->ndcl_nolddelegs++;
 1034         }
 1035 }
 1036 
 1037 /*
 1038  * Dump out lock stats for a file.
 1039  */
 1040 void
 1041 nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
 1042     NFSPROC_T *p)
 1043 {
 1044         struct nfsstate *stp;
 1045         struct nfslock *lop;
 1046         int cnt = 0;
 1047         struct nfslockfile *lfp;
 1048         sa_family_t af;
 1049 #ifdef INET
 1050         struct sockaddr_in *rin;
 1051 #endif
 1052 #ifdef INET6
 1053         struct sockaddr_in6 *rin6;
 1054 #endif
 1055         int ret;
 1056         fhandle_t nfh;
 1057 
 1058         ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
 1059         /*
 1060          * First, get a reference on the nfsv4rootfs_lock so that an
 1061          * exclusive lock on it cannot be acquired while dumping the locks.
 1062          */
 1063         NFSLOCKV4ROOTMUTEX();
 1064         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 1065         NFSUNLOCKV4ROOTMUTEX();
 1066         NFSLOCKSTATE();
 1067         if (!ret)
 1068                 ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
 1069         if (ret) {
 1070                 ldumpp[0].ndlck_clid.nclid_idlen = 0;
 1071                 NFSUNLOCKSTATE();
 1072                 NFSLOCKV4ROOTMUTEX();
 1073                 nfsv4_relref(&nfsv4rootfs_lock);
 1074                 NFSUNLOCKV4ROOTMUTEX();
 1075                 return;
 1076         }
 1077 
 1078         /*
 1079          * For each open share on file, dump it out.
 1080          */
 1081         stp = LIST_FIRST(&lfp->lf_open);
 1082         while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
 1083                 ldumpp[cnt].ndlck_flags = stp->ls_flags;
 1084                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 1085                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 1086                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 1087                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 1088                 ldumpp[cnt].ndlck_owner.nclid_idlen =
 1089                     stp->ls_openowner->ls_ownerlen;
 1090                 NFSBCOPY(stp->ls_openowner->ls_owner,
 1091                     ldumpp[cnt].ndlck_owner.nclid_id,
 1092                     stp->ls_openowner->ls_ownerlen);
 1093                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 1094                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 1095                     stp->ls_clp->lc_idlen);
 1096                 af = stp->ls_clp->lc_req.nr_nam->sa_family;
 1097                 ldumpp[cnt].ndlck_addrfam = af;
 1098                 switch (af) {
 1099 #ifdef INET
 1100                 case AF_INET:
 1101                         rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
 1102                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
 1103                         break;
 1104 #endif
 1105 #ifdef INET6
 1106                 case AF_INET6:
 1107                         rin6 = (struct sockaddr_in6 *)
 1108                             stp->ls_clp->lc_req.nr_nam;
 1109                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
 1110                         break;
 1111 #endif
 1112                 }
 1113                 stp = LIST_NEXT(stp, ls_file);
 1114                 cnt++;
 1115         }
 1116 
 1117         /*
 1118          * and all locks.
 1119          */
 1120         lop = LIST_FIRST(&lfp->lf_lock);
 1121         while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
 1122                 stp = lop->lo_stp;
 1123                 ldumpp[cnt].ndlck_flags = lop->lo_flags;
 1124                 ldumpp[cnt].ndlck_first = lop->lo_first;
 1125                 ldumpp[cnt].ndlck_end = lop->lo_end;
 1126                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 1127                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 1128                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 1129                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 1130                 ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
 1131                 NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
 1132                     stp->ls_ownerlen);
 1133                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 1134                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 1135                     stp->ls_clp->lc_idlen);
 1136                 af = stp->ls_clp->lc_req.nr_nam->sa_family;
 1137                 ldumpp[cnt].ndlck_addrfam = af;
 1138                 switch (af) {
 1139 #ifdef INET
 1140                 case AF_INET:
 1141                         rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
 1142                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
 1143                         break;
 1144 #endif
 1145 #ifdef INET6
 1146                 case AF_INET6:
 1147                         rin6 = (struct sockaddr_in6 *)
 1148                             stp->ls_clp->lc_req.nr_nam;
 1149                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
 1150                         break;
 1151 #endif
 1152                 }
 1153                 lop = LIST_NEXT(lop, lo_lckfile);
 1154                 cnt++;
 1155         }
 1156 
 1157         /*
 1158          * and the delegations.
 1159          */
 1160         stp = LIST_FIRST(&lfp->lf_deleg);
 1161         while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
 1162                 ldumpp[cnt].ndlck_flags = stp->ls_flags;
 1163                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 1164                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 1165                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 1166                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 1167                 ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
 1168                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 1169                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 1170                     stp->ls_clp->lc_idlen);
 1171                 af = stp->ls_clp->lc_req.nr_nam->sa_family;
 1172                 ldumpp[cnt].ndlck_addrfam = af;
 1173                 switch (af) {
 1174 #ifdef INET
 1175                 case AF_INET:
 1176                         rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
 1177                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
 1178                         break;
 1179 #endif
 1180 #ifdef INET6
 1181                 case AF_INET6:
 1182                         rin6 = (struct sockaddr_in6 *)
 1183                             stp->ls_clp->lc_req.nr_nam;
 1184                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
 1185                         break;
 1186 #endif
 1187                 }
 1188                 stp = LIST_NEXT(stp, ls_file);
 1189                 cnt++;
 1190         }
 1191 
 1192         /*
 1193          * If list isn't full, mark end of list by setting the client name
 1194          * to zero length.
 1195          */
 1196         if (cnt < maxcnt)
 1197                 ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
 1198         NFSUNLOCKSTATE();
 1199         NFSLOCKV4ROOTMUTEX();
 1200         nfsv4_relref(&nfsv4rootfs_lock);
 1201         NFSUNLOCKV4ROOTMUTEX();
 1202 }
 1203 
 1204 /*
 1205  * Server timer routine. It can scan any linked list, so long
 1206  * as it holds the spin/mutex lock and there is no exclusive lock on
 1207  * nfsv4rootfs_lock.
 1208  * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
 1209  *  to do this from a callout, since the spin locks work. For
 1210  *  Darwin, I'm not sure what will work correctly yet.)
 1211  * Should be called once per second.
 1212  */
 1213 void
 1214 nfsrv_servertimer(void)
 1215 {
 1216         struct nfsclient *clp, *nclp;
 1217         struct nfsstate *stp, *nstp;
 1218         int got_ref, i;
 1219 
 1220         /*
 1221          * Make sure nfsboottime is set. This is used by V3 as well
 1222          * as V4. Note that nfsboottime is not nfsrvboottime, which is
 1223          * only used by the V4 server for leases.
 1224          */
 1225         if (nfsboottime.tv_sec == 0)
 1226                 NFSSETBOOTTIME(nfsboottime);
 1227 
 1228         /*
 1229          * If server hasn't started yet, just return.
 1230          */
 1231         NFSLOCKSTATE();
 1232         if (nfsrv_stablefirst.nsf_eograce == 0) {
 1233                 NFSUNLOCKSTATE();
 1234                 return;
 1235         }
 1236         if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
 1237                 if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
 1238                     NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
 1239                         nfsrv_stablefirst.nsf_flags |=
 1240                             (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
 1241                 NFSUNLOCKSTATE();
 1242                 return;
 1243         }
 1244 
 1245         /*
 1246          * Try and get a reference count on the nfsv4rootfs_lock so that
 1247          * no nfsd thread can acquire an exclusive lock on it before this
 1248          * call is done. If it is already exclusively locked, just return.
 1249          */
 1250         NFSLOCKV4ROOTMUTEX();
 1251         got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
 1252         NFSUNLOCKV4ROOTMUTEX();
 1253         if (got_ref == 0) {
 1254                 NFSUNLOCKSTATE();
 1255                 return;
 1256         }
 1257 
 1258         /*
 1259          * For each client...
 1260          */
 1261         for (i = 0; i < nfsrv_clienthashsize; i++) {
 1262             clp = LIST_FIRST(&nfsclienthash[i]);
 1263             while (clp != LIST_END(&nfsclienthash[i])) {
 1264                 nclp = LIST_NEXT(clp, lc_hash);
 1265                 if (!(clp->lc_flags & LCL_EXPIREIT)) {
 1266                     if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
 1267                          && ((LIST_EMPTY(&clp->lc_deleg)
 1268                               && LIST_EMPTY(&clp->lc_open)) ||
 1269                              nfsrv_clients > nfsrv_clienthighwater)) ||
 1270                         (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
 1271                         (clp->lc_expiry < NFSD_MONOSEC &&
 1272                          (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
 1273                         /*
 1274                          * Lease has expired several nfsrv_lease times ago:
 1275                          * PLUS
 1276                          *    - no state is associated with it
 1277                          *    OR
 1278                          *    - above high water mark for number of clients
 1279                          *      (nfsrv_clienthighwater should be large enough
 1280                          *       that this only occurs when clients fail to
 1281                          *       use the same nfs_client_id4.id. Maybe somewhat
 1282                          *       higher that the maximum number of clients that
 1283                          *       will mount this server?)
 1284                          * OR
 1285                          * Lease has expired a very long time ago
 1286                          * OR
 1287                          * Lease has expired PLUS the number of opens + locks
 1288                          * has exceeded 90% of capacity
 1289                          *
 1290                          * --> Mark for expiry. The actual expiry will be done
 1291                          *     by an nfsd sometime soon.
 1292                          */
 1293                         clp->lc_flags |= LCL_EXPIREIT;
 1294                         nfsrv_stablefirst.nsf_flags |=
 1295                             (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
 1296                     } else {
 1297                         /*
 1298                          * If there are no opens, increment no open tick cnt
 1299                          * If time exceeds NFSNOOPEN, mark it to be thrown away
 1300                          * otherwise, if there is an open, reset no open time
 1301                          * Hopefully, this will avoid excessive re-creation
 1302                          * of open owners and subsequent open confirms.
 1303                          */
 1304                         stp = LIST_FIRST(&clp->lc_open);
 1305                         while (stp != LIST_END(&clp->lc_open)) {
 1306                                 nstp = LIST_NEXT(stp, ls_list);
 1307                                 if (LIST_EMPTY(&stp->ls_open)) {
 1308                                         stp->ls_noopens++;
 1309                                         if (stp->ls_noopens > NFSNOOPEN ||
 1310                                             (nfsrv_openpluslock * 2) >
 1311                                             nfsrv_v4statelimit)
 1312                                                 nfsrv_stablefirst.nsf_flags |=
 1313                                                         NFSNSF_NOOPENS;
 1314                                 } else {
 1315                                         stp->ls_noopens = 0;
 1316                                 }
 1317                                 stp = nstp;
 1318                         }
 1319                     }
 1320                 }
 1321                 clp = nclp;
 1322             }
 1323         }
 1324         NFSUNLOCKSTATE();
 1325         NFSLOCKV4ROOTMUTEX();
 1326         nfsv4_relref(&nfsv4rootfs_lock);
 1327         NFSUNLOCKV4ROOTMUTEX();
 1328 }
 1329 
 1330 /*
 1331  * The following set of functions free up the various data structures.
 1332  */
 1333 /*
 1334  * Clear out all open/lock state related to this nfsclient.
 1335  * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
 1336  * there are no other active nfsd threads.
 1337  */
 1338 void
 1339 nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
 1340 {
 1341         struct nfsstate *stp, *nstp;
 1342         struct nfsdsession *sep, *nsep;
 1343 
 1344         LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
 1345                 nfsrv_freeopenowner(stp, 1, p);
 1346         if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
 1347                 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
 1348                         (void)nfsrv_freesession(sep, NULL);
 1349 }
 1350 
 1351 /*
 1352  * Free a client that has been cleaned. It should also already have been
 1353  * removed from the lists.
 1354  * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
 1355  *  softclock interrupts are enabled.)
 1356  */
 1357 void
 1358 nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
 1359 {
 1360 
 1361 #ifdef notyet
 1362         if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
 1363              (LCL_GSS | LCL_CALLBACKSON) &&
 1364             (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
 1365             clp->lc_handlelen > 0) {
 1366                 clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
 1367                 clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
 1368                 (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
 1369                         NULL, 0, NULL, NULL, NULL, 0, p);
 1370         }
 1371 #endif
 1372         newnfs_disconnect(NULL, &clp->lc_req);
 1373         free(clp->lc_req.nr_nam, M_SONAME);
 1374         NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 1375         free(clp->lc_stateid, M_NFSDCLIENT);
 1376         free(clp, M_NFSDCLIENT);
 1377         NFSLOCKSTATE();
 1378         nfsstatsv1.srvclients--;
 1379         nfsrv_openpluslock--;
 1380         nfsrv_clients--;
 1381         NFSUNLOCKSTATE();
 1382 }
 1383 
 1384 /*
 1385  * Free a list of delegation state structures.
 1386  * (This function will also free all nfslockfile structures that no
 1387  *  longer have associated state.)
 1388  */
 1389 void
 1390 nfsrv_freedeleglist(struct nfsstatehead *sthp)
 1391 {
 1392         struct nfsstate *stp, *nstp;
 1393 
 1394         LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
 1395                 nfsrv_freedeleg(stp);
 1396         }
 1397         LIST_INIT(sthp);
 1398 }
 1399 
 1400 /*
 1401  * Free up a delegation.
 1402  */
 1403 static void
 1404 nfsrv_freedeleg(struct nfsstate *stp)
 1405 {
 1406         struct nfslockfile *lfp;
 1407 
 1408         LIST_REMOVE(stp, ls_hash);
 1409         LIST_REMOVE(stp, ls_list);
 1410         LIST_REMOVE(stp, ls_file);
 1411         if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
 1412                 nfsrv_writedelegcnt--;
 1413         lfp = stp->ls_lfp;
 1414         if (LIST_EMPTY(&lfp->lf_open) &&
 1415             LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
 1416             LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
 1417             lfp->lf_usecount == 0 &&
 1418             nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
 1419                 nfsrv_freenfslockfile(lfp);
 1420         free(stp, M_NFSDSTATE);
 1421         nfsstatsv1.srvdelegates--;
 1422         nfsrv_openpluslock--;
 1423         nfsrv_delegatecnt--;
 1424 }
 1425 
 1426 /*
 1427  * This function frees an open owner and all associated opens.
 1428  */
 1429 static void
 1430 nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
 1431 {
 1432         struct nfsstate *nstp, *tstp;
 1433 
 1434         LIST_REMOVE(stp, ls_list);
 1435         /*
 1436          * Now, free all associated opens.
 1437          */
 1438         nstp = LIST_FIRST(&stp->ls_open);
 1439         while (nstp != LIST_END(&stp->ls_open)) {
 1440                 tstp = nstp;
 1441                 nstp = LIST_NEXT(nstp, ls_list);
 1442                 (void) nfsrv_freeopen(tstp, NULL, cansleep, p);
 1443         }
 1444         if (stp->ls_op)
 1445                 nfsrvd_derefcache(stp->ls_op);
 1446         free(stp, M_NFSDSTATE);
 1447         nfsstatsv1.srvopenowners--;
 1448         nfsrv_openpluslock--;
 1449 }
 1450 
 1451 /*
 1452  * This function frees an open (nfsstate open structure) with all associated
 1453  * lock_owners and locks. It also frees the nfslockfile structure iff there
 1454  * are no other opens on the file.
 1455  * Returns 1 if it free'd the nfslockfile, 0 otherwise.
 1456  */
 1457 static int
 1458 nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
 1459 {
 1460         struct nfsstate *nstp, *tstp;
 1461         struct nfslockfile *lfp;
 1462         int ret;
 1463 
 1464         LIST_REMOVE(stp, ls_hash);
 1465         LIST_REMOVE(stp, ls_list);
 1466         LIST_REMOVE(stp, ls_file);
 1467 
 1468         lfp = stp->ls_lfp;
 1469         /*
 1470          * Now, free all lockowners associated with this open.
 1471          */
 1472         LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
 1473                 nfsrv_freelockowner(tstp, vp, cansleep, p);
 1474 
 1475         /*
 1476          * The nfslockfile is freed here if there are no locks
 1477          * associated with the open.
 1478          * If there are locks associated with the open, the
 1479          * nfslockfile structure can be freed via nfsrv_freelockowner().
 1480          * Acquire the state mutex to avoid races with calls to
 1481          * nfsrv_getlockfile().
 1482          */
 1483         if (cansleep != 0)
 1484                 NFSLOCKSTATE();
 1485         if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
 1486             LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
 1487             LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
 1488             lfp->lf_usecount == 0 &&
 1489             (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
 1490                 nfsrv_freenfslockfile(lfp);
 1491                 ret = 1;
 1492         } else
 1493                 ret = 0;
 1494         if (cansleep != 0)
 1495                 NFSUNLOCKSTATE();
 1496         free(stp, M_NFSDSTATE);
 1497         nfsstatsv1.srvopens--;
 1498         nfsrv_openpluslock--;
 1499         return (ret);
 1500 }
 1501 
 1502 /*
 1503  * Frees a lockowner and all associated locks.
 1504  */
 1505 static void
 1506 nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
 1507     NFSPROC_T *p)
 1508 {
 1509 
 1510         LIST_REMOVE(stp, ls_hash);
 1511         LIST_REMOVE(stp, ls_list);
 1512         nfsrv_freeallnfslocks(stp, vp, cansleep, p);
 1513         if (stp->ls_op)
 1514                 nfsrvd_derefcache(stp->ls_op);
 1515         free(stp, M_NFSDSTATE);
 1516         nfsstatsv1.srvlockowners--;
 1517         nfsrv_openpluslock--;
 1518 }
 1519 
 1520 /*
 1521  * Free all the nfs locks on a lockowner.
 1522  */
 1523 static void
 1524 nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
 1525     NFSPROC_T *p)
 1526 {
 1527         struct nfslock *lop, *nlop;
 1528         struct nfsrollback *rlp, *nrlp;
 1529         struct nfslockfile *lfp = NULL;
 1530         int gottvp = 0;
 1531         vnode_t tvp = NULL;
 1532         uint64_t first, end;
 1533 
 1534         if (vp != NULL)
 1535                 ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
 1536         lop = LIST_FIRST(&stp->ls_lock);
 1537         while (lop != LIST_END(&stp->ls_lock)) {
 1538                 nlop = LIST_NEXT(lop, lo_lckowner);
 1539                 /*
 1540                  * Since all locks should be for the same file, lfp should
 1541                  * not change.
 1542                  */
 1543                 if (lfp == NULL)
 1544                         lfp = lop->lo_lfp;
 1545                 else if (lfp != lop->lo_lfp)
 1546                         panic("allnfslocks");
 1547                 /*
 1548                  * If vp is NULL and cansleep != 0, a vnode must be acquired
 1549                  * from the file handle. This only occurs when called from
 1550                  * nfsrv_cleanclient().
 1551                  */
 1552                 if (gottvp == 0) {
 1553                         if (nfsrv_dolocallocks == 0)
 1554                                 tvp = NULL;
 1555                         else if (vp == NULL && cansleep != 0) {
 1556                                 tvp = nfsvno_getvp(&lfp->lf_fh);
 1557                                 if (tvp != NULL)
 1558                                         NFSVOPUNLOCK(tvp);
 1559                         } else
 1560                                 tvp = vp;
 1561                         gottvp = 1;
 1562                 }
 1563 
 1564                 if (tvp != NULL) {
 1565                         if (cansleep == 0)
 1566                                 panic("allnfs2");
 1567                         first = lop->lo_first;
 1568                         end = lop->lo_end;
 1569                         nfsrv_freenfslock(lop);
 1570                         nfsrv_localunlock(tvp, lfp, first, end, p);
 1571                         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
 1572                             nrlp)
 1573                                 free(rlp, M_NFSDROLLBACK);
 1574                         LIST_INIT(&lfp->lf_rollback);
 1575                 } else
 1576                         nfsrv_freenfslock(lop);
 1577                 lop = nlop;
 1578         }
 1579         if (vp == NULL && tvp != NULL)
 1580                 vrele(tvp);
 1581 }
 1582 
 1583 /*
 1584  * Free an nfslock structure.
 1585  */
 1586 static void
 1587 nfsrv_freenfslock(struct nfslock *lop)
 1588 {
 1589 
 1590         if (lop->lo_lckfile.le_prev != NULL) {
 1591                 LIST_REMOVE(lop, lo_lckfile);
 1592                 nfsstatsv1.srvlocks--;
 1593                 nfsrv_openpluslock--;
 1594         }
 1595         LIST_REMOVE(lop, lo_lckowner);
 1596         free(lop, M_NFSDLOCK);
 1597 }
 1598 
 1599 /*
 1600  * This function frees an nfslockfile structure.
 1601  */
 1602 static void
 1603 nfsrv_freenfslockfile(struct nfslockfile *lfp)
 1604 {
 1605 
 1606         LIST_REMOVE(lfp, lf_hash);
 1607         free(lfp, M_NFSDLOCKFILE);
 1608 }
 1609 
 1610 /*
 1611  * This function looks up an nfsstate structure via stateid.
 1612  */
 1613 static int
 1614 nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
 1615     struct nfsstate **stpp)
 1616 {
 1617         struct nfsstate *stp;
 1618         struct nfsstatehead *hp;
 1619         int error = 0;
 1620 
 1621         *stpp = NULL;
 1622         hp = NFSSTATEHASH(clp, *stateidp);
 1623         LIST_FOREACH(stp, hp, ls_hash) {
 1624                 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
 1625                         NFSX_STATEIDOTHER))
 1626                         break;
 1627         }
 1628 
 1629         /*
 1630          * If no state id in list, return NFSERR_BADSTATEID.
 1631          */
 1632         if (stp == LIST_END(hp)) {
 1633                 error = NFSERR_BADSTATEID;
 1634                 goto out;
 1635         }
 1636         *stpp = stp;
 1637 
 1638 out:
 1639         NFSEXITCODE(error);
 1640         return (error);
 1641 }
 1642 
 1643 /*
 1644  * This function gets an nfsstate structure via owner string.
 1645  */
 1646 static void
 1647 nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
 1648     struct nfsstate **stpp)
 1649 {
 1650         struct nfsstate *stp;
 1651 
 1652         *stpp = NULL;
 1653         LIST_FOREACH(stp, hp, ls_list) {
 1654                 if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
 1655                   !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
 1656                         *stpp = stp;
 1657                         return;
 1658                 }
 1659         }
 1660 }
 1661 
 1662 /*
 1663  * Lock control function called to update lock status.
 1664  * Returns 0 upon success, -1 if there is no lock and the flags indicate
 1665  * that one isn't to be created and an NFSERR_xxx for other errors.
 1666  * The structures new_stp and new_lop are passed in as pointers that should
 1667  * be set to NULL if the structure is used and shouldn't be free'd.
 1668  * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
 1669  * never used and can safely be allocated on the stack. For all other
 1670  * cases, *new_stpp and *new_lopp should be malloc'd before the call,
 1671  * in case they are used.
 1672  */
 1673 int
 1674 nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
 1675     struct nfslock **new_lopp, struct nfslockconflict *cfp,
 1676     nfsquad_t clientid, nfsv4stateid_t *stateidp,
 1677     __unused struct nfsexstuff *exp,
 1678     struct nfsrv_descript *nd, NFSPROC_T *p)
 1679 {
 1680         struct nfslock *lop;
 1681         struct nfsstate *new_stp = *new_stpp;
 1682         struct nfslock *new_lop = *new_lopp;
 1683         struct nfsstate *tstp, *mystp, *nstp;
 1684         int specialid = 0;
 1685         struct nfslockfile *lfp;
 1686         struct nfslock *other_lop = NULL;
 1687         struct nfsstate *stp, *lckstp = NULL;
 1688         struct nfsclient *clp = NULL;
 1689         u_int32_t bits;
 1690         int error = 0, haslock = 0, ret, reterr;
 1691         int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
 1692         fhandle_t nfh;
 1693         uint64_t first, end;
 1694         uint32_t lock_flags;
 1695 
 1696         if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
 1697                 /*
 1698                  * Note the special cases of "all 1s" or "all 0s" stateids and
 1699                  * let reads with all 1s go ahead.
 1700                  */
 1701                 if (new_stp->ls_stateid.seqid == 0x0 &&
 1702                     new_stp->ls_stateid.other[0] == 0x0 &&
 1703                     new_stp->ls_stateid.other[1] == 0x0 &&
 1704                     new_stp->ls_stateid.other[2] == 0x0)
 1705                         specialid = 1;
 1706                 else if (new_stp->ls_stateid.seqid == 0xffffffff &&
 1707                     new_stp->ls_stateid.other[0] == 0xffffffff &&
 1708                     new_stp->ls_stateid.other[1] == 0xffffffff &&
 1709                     new_stp->ls_stateid.other[2] == 0xffffffff)
 1710                         specialid = 2;
 1711         }
 1712 
 1713         /*
 1714          * Check for restart conditions (client and server).
 1715          */
 1716         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 1717             &new_stp->ls_stateid, specialid);
 1718         if (error)
 1719                 goto out;
 1720 
 1721         /*
 1722          * Check for state resource limit exceeded.
 1723          */
 1724         if ((new_stp->ls_flags & NFSLCK_LOCK) &&
 1725             nfsrv_openpluslock > nfsrv_v4statelimit) {
 1726                 error = NFSERR_RESOURCE;
 1727                 goto out;
 1728         }
 1729 
 1730         /*
 1731          * For the lock case, get another nfslock structure,
 1732          * just in case we need it.
 1733          * Malloc now, before we start sifting through the linked lists,
 1734          * in case we have to wait for memory.
 1735          */
 1736 tryagain:
 1737         if (new_stp->ls_flags & NFSLCK_LOCK)
 1738                 other_lop = malloc(sizeof (struct nfslock),
 1739                     M_NFSDLOCK, M_WAITOK);
 1740         filestruct_locked = 0;
 1741         reterr = 0;
 1742         lfp = NULL;
 1743 
 1744         /*
 1745          * Get the lockfile structure for CFH now, so we can do a sanity
 1746          * check against the stateid, before incrementing the seqid#, since
 1747          * we want to return NFSERR_BADSTATEID on failure and the seqid#
 1748          * shouldn't be incremented for this case.
 1749          * If nfsrv_getlockfile() returns -1, it means "not found", which
 1750          * will be handled later.
 1751          * If we are doing Lock/LockU and local locking is enabled, sleep
 1752          * lock the nfslockfile structure.
 1753          */
 1754         getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
 1755         NFSLOCKSTATE();
 1756         if (getlckret == 0) {
 1757                 if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
 1758                     nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
 1759                         getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
 1760                             &lfp, &nfh, 1);
 1761                         if (getlckret == 0)
 1762                                 filestruct_locked = 1;
 1763                 } else
 1764                         getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
 1765                             &lfp, &nfh, 0);
 1766         }
 1767         if (getlckret != 0 && getlckret != -1)
 1768                 reterr = getlckret;
 1769 
 1770         if (filestruct_locked != 0) {
 1771                 LIST_INIT(&lfp->lf_rollback);
 1772                 if ((new_stp->ls_flags & NFSLCK_LOCK)) {
 1773                         /*
 1774                          * For local locking, do the advisory locking now, so
 1775                          * that any conflict can be detected. A failure later
 1776                          * can be rolled back locally. If an error is returned,
 1777                          * struct nfslockfile has been unlocked and any local
 1778                          * locking rolled back.
 1779                          */
 1780                         NFSUNLOCKSTATE();
 1781                         if (vnode_unlocked == 0) {
 1782                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
 1783                                 vnode_unlocked = 1;
 1784                                 NFSVOPUNLOCK(vp);
 1785                         }
 1786                         reterr = nfsrv_locallock(vp, lfp,
 1787                             (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
 1788                             new_lop->lo_first, new_lop->lo_end, cfp, p);
 1789                         NFSLOCKSTATE();
 1790                 }
 1791         }
 1792 
 1793         if (specialid == 0) {
 1794             if (new_stp->ls_flags & NFSLCK_TEST) {
 1795                 /*
 1796                  * RFC 3530 does not list LockT as an op that renews a
 1797                  * lease, but the consensus seems to be that it is ok
 1798                  * for a server to do so.
 1799                  */
 1800                 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 1801                     (nfsquad_t)((u_quad_t)0), 0, nd, p);
 1802 
 1803                 /*
 1804                  * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
 1805                  * error returns for LockT, just go ahead and test for a lock,
 1806                  * since there are no locks for this client, but other locks
 1807                  * can conflict. (ie. same client will always be false)
 1808                  */
 1809                 if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
 1810                     error = 0;
 1811                 lckstp = new_stp;
 1812             } else {
 1813               error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 1814                 (nfsquad_t)((u_quad_t)0), 0, nd, p);
 1815               if (error == 0)
 1816                 /*
 1817                  * Look up the stateid
 1818                  */
 1819                 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
 1820                   new_stp->ls_flags, &stp);
 1821               /*
 1822                * do some sanity checks for an unconfirmed open or a
 1823                * stateid that refers to the wrong file, for an open stateid
 1824                */
 1825               if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
 1826                   ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
 1827                    (getlckret == 0 && stp->ls_lfp != lfp))){
 1828                       /*
 1829                        * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID
 1830                        * The only exception is using SETATTR with SIZE.
 1831                        * */
 1832                     if ((new_stp->ls_flags &
 1833                          (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR)
 1834                              error = NFSERR_BADSTATEID;
 1835               }
 1836               
 1837                 if (error == 0 &&
 1838                   (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
 1839                   getlckret == 0 && stp->ls_lfp != lfp)
 1840                         error = NFSERR_BADSTATEID;
 1841 
 1842               /*
 1843                * If the lockowner stateid doesn't refer to the same file,
 1844                * I believe that is considered ok, since some clients will
 1845                * only create a single lockowner and use that for all locks
 1846                * on all files.
 1847                * For now, log it as a diagnostic, instead of considering it
 1848                * a BadStateid.
 1849                */
 1850               if (error == 0 && (stp->ls_flags &
 1851                   (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
 1852                   getlckret == 0 && stp->ls_lfp != lfp) {
 1853 #ifdef DIAGNOSTIC
 1854                   printf("Got a lock statid for different file open\n");
 1855 #endif
 1856                   /*
 1857                   error = NFSERR_BADSTATEID;
 1858                   */
 1859               }
 1860 
 1861               if (error == 0) {
 1862                     if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
 1863                         /*
 1864                          * If haslock set, we've already checked the seqid.
 1865                          */
 1866                         if (!haslock) {
 1867                             if (stp->ls_flags & NFSLCK_OPEN)
 1868                                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 1869                                     stp->ls_openowner, new_stp->ls_op);
 1870                             else
 1871                                 error = NFSERR_BADSTATEID;
 1872                         }
 1873                         if (!error)
 1874                             nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
 1875                         if (lckstp) {
 1876                             /*
 1877                              * For NFSv4.1 and NFSv4.2 allow an
 1878                              * open_to_lock_owner when the lock_owner already
 1879                              * exists.  Just clear NFSLCK_OPENTOLOCK so that
 1880                              * a new lock_owner will not be created.
 1881                              * RFC7530 states that the error for NFSv4.0
 1882                              * is NFS4ERR_BAD_SEQID.
 1883                              */
 1884                             if ((nd->nd_flag & ND_NFSV41) != 0)
 1885                                 new_stp->ls_flags &= ~NFSLCK_OPENTOLOCK;
 1886                             else
 1887                                 error = NFSERR_BADSEQID;
 1888                         } else
 1889                             lckstp = new_stp;
 1890                     } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
 1891                         /*
 1892                          * If haslock set, ditto above.
 1893                          */
 1894                         if (!haslock) {
 1895                             if (stp->ls_flags & NFSLCK_OPEN)
 1896                                 error = NFSERR_BADSTATEID;
 1897                             else
 1898                                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 1899                                     stp, new_stp->ls_op);
 1900                         }
 1901                         lckstp = stp;
 1902                     } else {
 1903                         lckstp = stp;
 1904                     }
 1905               }
 1906               /*
 1907                * If the seqid part of the stateid isn't the same, return
 1908                * NFSERR_OLDSTATEID for cases other than I/O Ops.
 1909                * For I/O Ops, only return NFSERR_OLDSTATEID if
 1910                * nfsrv_returnoldstateid is set. (The consensus on the email
 1911                * list was that most clients would prefer to not receive
 1912                * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
 1913                * is what will happen, so I use the nfsrv_returnoldstateid to
 1914                * allow for either server configuration.)
 1915                */
 1916               if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
 1917                   (((nd->nd_flag & ND_NFSV41) == 0 &&
 1918                    (!(new_stp->ls_flags & NFSLCK_CHECK) ||
 1919                     nfsrv_returnoldstateid)) ||
 1920                    ((nd->nd_flag & ND_NFSV41) != 0 &&
 1921                     new_stp->ls_stateid.seqid != 0)))
 1922                     error = NFSERR_OLDSTATEID;
 1923             }
 1924         }
 1925 
 1926         /*
 1927          * Now we can check for grace.
 1928          */
 1929         if (!error)
 1930                 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
 1931         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
 1932                 nfsrv_checkstable(clp))
 1933                 error = NFSERR_NOGRACE;
 1934         /*
 1935          * If we successfully Reclaimed state, note that.
 1936          */
 1937         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
 1938                 nfsrv_markstable(clp);
 1939 
 1940         /*
 1941          * At this point, either error == NFSERR_BADSTATEID or the
 1942          * seqid# has been updated, so we can return any error.
 1943          * If error == 0, there may be an error in:
 1944          *    nd_repstat - Set by the calling function.
 1945          *    reterr - Set above, if getting the nfslockfile structure
 1946          *       or acquiring the local lock failed.
 1947          *    (If both of these are set, nd_repstat should probably be
 1948          *     returned, since that error was detected before this
 1949          *     function call.)
 1950          */
 1951         if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
 1952                 if (error == 0) {
 1953                         if (nd->nd_repstat != 0)
 1954                                 error = nd->nd_repstat;
 1955                         else
 1956                                 error = reterr;
 1957                 }
 1958                 if (filestruct_locked != 0) {
 1959                         /* Roll back local locks. */
 1960                         NFSUNLOCKSTATE();
 1961                         if (vnode_unlocked == 0) {
 1962                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
 1963                                 vnode_unlocked = 1;
 1964                                 NFSVOPUNLOCK(vp);
 1965                         }
 1966                         nfsrv_locallock_rollback(vp, lfp, p);
 1967                         NFSLOCKSTATE();
 1968                         nfsrv_unlocklf(lfp);
 1969                 }
 1970                 NFSUNLOCKSTATE();
 1971                 goto out;
 1972         }
 1973 
 1974         /*
 1975          * Check the nfsrv_getlockfile return.
 1976          * Returned -1 if no structure found.
 1977          */
 1978         if (getlckret == -1) {
 1979                 error = NFSERR_EXPIRED;
 1980                 /*
 1981                  * Called from lockt, so no lock is OK.
 1982                  */
 1983                 if (new_stp->ls_flags & NFSLCK_TEST) {
 1984                         error = 0;
 1985                 } else if (new_stp->ls_flags &
 1986                     (NFSLCK_CHECK | NFSLCK_SETATTR)) {
 1987                         /*
 1988                          * Called to check for a lock, OK if the stateid is all
 1989                          * 1s or all 0s, but there should be an nfsstate
 1990                          * otherwise.
 1991                          * (ie. If there is no open, I'll assume no share
 1992                          *  deny bits.)
 1993                          */
 1994                         if (specialid)
 1995                                 error = 0;
 1996                         else
 1997                                 error = NFSERR_BADSTATEID;
 1998                 }
 1999                 NFSUNLOCKSTATE();
 2000                 goto out;
 2001         }
 2002 
 2003         /*
 2004          * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
 2005          * For NFSLCK_CHECK, allow a read if write access is granted,
 2006          * but check for a deny. For NFSLCK_LOCK, require correct access,
 2007          * which implies a conflicting deny can't exist.
 2008          */
 2009         if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
 2010             /*
 2011              * Four kinds of state id:
 2012              * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
 2013              * - stateid for an open
 2014              * - stateid for a delegation
 2015              * - stateid for a lock owner
 2016              */
 2017             if (!specialid) {
 2018                 if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
 2019                     delegation = 1;
 2020                     mystp = stp;
 2021                     nfsrv_delaydelegtimeout(stp);
 2022                 } else if (stp->ls_flags & NFSLCK_OPEN) {
 2023                     mystp = stp;
 2024                 } else {
 2025                     mystp = stp->ls_openstp;
 2026                 }
 2027                 /*
 2028                  * If locking or checking, require correct access
 2029                  * bit set.
 2030                  */
 2031                 if (((new_stp->ls_flags & NFSLCK_LOCK) &&
 2032                      !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
 2033                        mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
 2034                     ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
 2035                       (NFSLCK_CHECK | NFSLCK_READACCESS) &&
 2036                      !(mystp->ls_flags & NFSLCK_READACCESS) &&
 2037                      nfsrv_allowreadforwriteopen == 0) ||
 2038                     ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
 2039                       (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
 2040                      !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
 2041                         if (filestruct_locked != 0) {
 2042                                 /* Roll back local locks. */
 2043                                 NFSUNLOCKSTATE();
 2044                                 if (vnode_unlocked == 0) {
 2045                                         ASSERT_VOP_ELOCKED(vp,
 2046                                             "nfsrv_lockctrl3");
 2047                                         vnode_unlocked = 1;
 2048                                         NFSVOPUNLOCK(vp);
 2049                                 }
 2050                                 nfsrv_locallock_rollback(vp, lfp, p);
 2051                                 NFSLOCKSTATE();
 2052                                 nfsrv_unlocklf(lfp);
 2053                         }
 2054                         NFSUNLOCKSTATE();
 2055                         error = NFSERR_OPENMODE;
 2056                         goto out;
 2057                 }
 2058             } else
 2059                 mystp = NULL;
 2060             if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
 2061                 /*
 2062                  * Check for a conflicting deny bit.
 2063                  */
 2064                 LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
 2065                     if (tstp != mystp) {
 2066                         bits = tstp->ls_flags;
 2067                         bits >>= NFSLCK_SHIFT;
 2068                         if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
 2069                             KASSERT(vnode_unlocked == 0,
 2070                                 ("nfsrv_lockctrl: vnode unlocked1"));
 2071                             ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
 2072                                 vp, p);
 2073                             if (ret == 1) {
 2074                                 /*
 2075                                 * nfsrv_clientconflict unlocks state
 2076                                  * when it returns non-zero.
 2077                                  */
 2078                                 lckstp = NULL;
 2079                                 goto tryagain;
 2080                             }
 2081                             if (ret == 0)
 2082                                 NFSUNLOCKSTATE();
 2083                             if (ret == 2)
 2084                                 error = NFSERR_PERM;
 2085                             else
 2086                                 error = NFSERR_OPENMODE;
 2087                             goto out;
 2088                         }
 2089                     }
 2090                 }
 2091 
 2092                 /* We're outta here */
 2093                 NFSUNLOCKSTATE();
 2094                 goto out;
 2095             }
 2096         }
 2097 
 2098         /*
 2099          * For setattr, just get rid of all the Delegations for other clients.
 2100          */
 2101         if (new_stp->ls_flags & NFSLCK_SETATTR) {
 2102                 KASSERT(vnode_unlocked == 0,
 2103                     ("nfsrv_lockctrl: vnode unlocked2"));
 2104                 ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
 2105                 if (ret) {
 2106                         /*
 2107                          * nfsrv_cleandeleg() unlocks state when it
 2108                          * returns non-zero.
 2109                          */
 2110                         if (ret == -1) {
 2111                                 lckstp = NULL;
 2112                                 goto tryagain;
 2113                         }
 2114                         error = ret;
 2115                         goto out;
 2116                 }
 2117                 if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
 2118                     (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
 2119                      LIST_EMPTY(&lfp->lf_deleg))) {
 2120                         NFSUNLOCKSTATE();
 2121                         goto out;
 2122                 }
 2123         }
 2124 
 2125         /*
 2126          * Check for a conflicting delegation. If one is found, call
 2127          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 2128          * been set yet, it will get the lock. Otherwise, it will recall
 2129          * the delegation. Then, we try try again...
 2130          * I currently believe the conflict algorithm to be:
 2131          * For Lock Ops (Lock/LockT/LockU)
 2132          * - there is a conflict iff a different client has a write delegation
 2133          * For Reading (Read Op)
 2134          * - there is a conflict iff a different client has a write delegation
 2135          *   (the specialids are always a different client)
 2136          * For Writing (Write/Setattr of size)
 2137          * - there is a conflict if a different client has any delegation
 2138          * - there is a conflict if the same client has a read delegation
 2139          *   (I don't understand why this isn't allowed, but that seems to be
 2140          *    the current consensus?)
 2141          */
 2142         tstp = LIST_FIRST(&lfp->lf_deleg);
 2143         while (tstp != LIST_END(&lfp->lf_deleg)) {
 2144             nstp = LIST_NEXT(tstp, ls_file);
 2145             if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
 2146                  ((new_stp->ls_flags & NFSLCK_CHECK) &&
 2147                   (new_lop->lo_flags & NFSLCK_READ))) &&
 2148                   clp != tstp->ls_clp &&
 2149                  (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
 2150                  ((new_stp->ls_flags & NFSLCK_CHECK) &&
 2151                    (new_lop->lo_flags & NFSLCK_WRITE) &&
 2152                   (clp != tstp->ls_clp ||
 2153                    (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
 2154                 ret = 0;
 2155                 if (filestruct_locked != 0) {
 2156                         /* Roll back local locks. */
 2157                         NFSUNLOCKSTATE();
 2158                         if (vnode_unlocked == 0) {
 2159                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
 2160                                 NFSVOPUNLOCK(vp);
 2161                         }
 2162                         nfsrv_locallock_rollback(vp, lfp, p);
 2163                         NFSLOCKSTATE();
 2164                         nfsrv_unlocklf(lfp);
 2165                         NFSUNLOCKSTATE();
 2166                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2167                         vnode_unlocked = 0;
 2168                         if (VN_IS_DOOMED(vp))
 2169                                 ret = NFSERR_SERVERFAULT;
 2170                         NFSLOCKSTATE();
 2171                 }
 2172                 if (ret == 0)
 2173                         ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
 2174                 if (ret) {
 2175                     /*
 2176                      * nfsrv_delegconflict unlocks state when it
 2177                      * returns non-zero, which it always does.
 2178                      */
 2179                     if (other_lop) {
 2180                         free(other_lop, M_NFSDLOCK);
 2181                         other_lop = NULL;
 2182                     }
 2183                     if (ret == -1) {
 2184                         lckstp = NULL;
 2185                         goto tryagain;
 2186                     }
 2187                     error = ret;
 2188                     goto out;
 2189                 }
 2190                 /* Never gets here. */
 2191             }
 2192             tstp = nstp;
 2193         }
 2194 
 2195         /*
 2196          * Handle the unlock case by calling nfsrv_updatelock().
 2197          * (Should I have done some access checking above for unlock? For now,
 2198          *  just let it happen.)
 2199          */
 2200         if (new_stp->ls_flags & NFSLCK_UNLOCK) {
 2201                 first = new_lop->lo_first;
 2202                 end = new_lop->lo_end;
 2203                 nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
 2204                 stateidp->seqid = ++(stp->ls_stateid.seqid);
 2205                 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 2206                         stateidp->seqid = stp->ls_stateid.seqid = 1;
 2207                 stateidp->other[0] = stp->ls_stateid.other[0];
 2208                 stateidp->other[1] = stp->ls_stateid.other[1];
 2209                 stateidp->other[2] = stp->ls_stateid.other[2];
 2210                 if (filestruct_locked != 0) {
 2211                         NFSUNLOCKSTATE();
 2212                         if (vnode_unlocked == 0) {
 2213                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
 2214                                 vnode_unlocked = 1;
 2215                                 NFSVOPUNLOCK(vp);
 2216                         }
 2217                         /* Update the local locks. */
 2218                         nfsrv_localunlock(vp, lfp, first, end, p);
 2219                         NFSLOCKSTATE();
 2220                         nfsrv_unlocklf(lfp);
 2221                 }
 2222                 NFSUNLOCKSTATE();
 2223                 goto out;
 2224         }
 2225 
 2226         /*
 2227          * Search for a conflicting lock. A lock conflicts if:
 2228          * - the lock range overlaps and
 2229          * - at least one lock is a write lock and
 2230          * - it is not owned by the same lock owner
 2231          */
 2232         if (!delegation) {
 2233           LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
 2234             if (new_lop->lo_end > lop->lo_first &&
 2235                 new_lop->lo_first < lop->lo_end &&
 2236                 (new_lop->lo_flags == NFSLCK_WRITE ||
 2237                  lop->lo_flags == NFSLCK_WRITE) &&
 2238                 lckstp != lop->lo_stp &&
 2239                 (clp != lop->lo_stp->ls_clp ||
 2240                  lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
 2241                  NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
 2242                     lckstp->ls_ownerlen))) {
 2243                 if (other_lop) {
 2244                     free(other_lop, M_NFSDLOCK);
 2245                     other_lop = NULL;
 2246                 }
 2247                 if (vnode_unlocked != 0)
 2248                     ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
 2249                         NULL, p);
 2250                 else
 2251                     ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
 2252                         vp, p);
 2253                 if (ret == 1) {
 2254                     if (filestruct_locked != 0) {
 2255                         if (vnode_unlocked == 0) {
 2256                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
 2257                                 NFSVOPUNLOCK(vp);
 2258                         }
 2259                         /* Roll back local locks. */
 2260                         nfsrv_locallock_rollback(vp, lfp, p);
 2261                         NFSLOCKSTATE();
 2262                         nfsrv_unlocklf(lfp);
 2263                         NFSUNLOCKSTATE();
 2264                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2265                         vnode_unlocked = 0;
 2266                         if (VN_IS_DOOMED(vp)) {
 2267                                 error = NFSERR_SERVERFAULT;
 2268                                 goto out;
 2269                         }
 2270                     }
 2271                     /*
 2272                      * nfsrv_clientconflict() unlocks state when it
 2273                      * returns non-zero.
 2274                      */
 2275                     lckstp = NULL;
 2276                     goto tryagain;
 2277                 }
 2278                 /*
 2279                  * Found a conflicting lock, so record the conflict and
 2280                  * return the error.
 2281                  */
 2282                 if (cfp != NULL && ret == 0) {
 2283                     cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
 2284                     cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
 2285                     cfp->cl_first = lop->lo_first;
 2286                     cfp->cl_end = lop->lo_end;
 2287                     cfp->cl_flags = lop->lo_flags;
 2288                     cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
 2289                     NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
 2290                         cfp->cl_ownerlen);
 2291                 }
 2292                 if (ret == 2)
 2293                     error = NFSERR_PERM;
 2294                 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2295                     error = NFSERR_RECLAIMCONFLICT;
 2296                 else if (new_stp->ls_flags & NFSLCK_CHECK)
 2297                     error = NFSERR_LOCKED;
 2298                 else
 2299                     error = NFSERR_DENIED;
 2300                 if (filestruct_locked != 0 && ret == 0) {
 2301                         /* Roll back local locks. */
 2302                         NFSUNLOCKSTATE();
 2303                         if (vnode_unlocked == 0) {
 2304                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
 2305                                 vnode_unlocked = 1;
 2306                                 NFSVOPUNLOCK(vp);
 2307                         }
 2308                         nfsrv_locallock_rollback(vp, lfp, p);
 2309                         NFSLOCKSTATE();
 2310                         nfsrv_unlocklf(lfp);
 2311                 }
 2312                 if (ret == 0)
 2313                         NFSUNLOCKSTATE();
 2314                 goto out;
 2315             }
 2316           }
 2317         }
 2318 
 2319         /*
 2320          * We only get here if there was no lock that conflicted.
 2321          */
 2322         if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
 2323                 NFSUNLOCKSTATE();
 2324                 goto out;
 2325         }
 2326 
 2327         /*
 2328          * We only get here when we are creating or modifying a lock.
 2329          * There are two variants:
 2330          * - exist_lock_owner where lock_owner exists
 2331          * - open_to_lock_owner with new lock_owner
 2332          */
 2333         first = new_lop->lo_first;
 2334         end = new_lop->lo_end;
 2335         lock_flags = new_lop->lo_flags;
 2336         if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
 2337                 nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
 2338                 stateidp->seqid = ++(lckstp->ls_stateid.seqid);
 2339                 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 2340                         stateidp->seqid = lckstp->ls_stateid.seqid = 1;
 2341                 stateidp->other[0] = lckstp->ls_stateid.other[0];
 2342                 stateidp->other[1] = lckstp->ls_stateid.other[1];
 2343                 stateidp->other[2] = lckstp->ls_stateid.other[2];
 2344         } else {
 2345                 /*
 2346                  * The new open_to_lock_owner case.
 2347                  * Link the new nfsstate into the lists.
 2348                  */
 2349                 new_stp->ls_seq = new_stp->ls_opentolockseq;
 2350                 nfsrvd_refcache(new_stp->ls_op);
 2351                 stateidp->seqid = new_stp->ls_stateid.seqid = 1;
 2352                 stateidp->other[0] = new_stp->ls_stateid.other[0] =
 2353                     clp->lc_clientid.lval[0];
 2354                 stateidp->other[1] = new_stp->ls_stateid.other[1] =
 2355                     clp->lc_clientid.lval[1];
 2356                 stateidp->other[2] = new_stp->ls_stateid.other[2] =
 2357                     nfsrv_nextstateindex(clp);
 2358                 new_stp->ls_clp = clp;
 2359                 LIST_INIT(&new_stp->ls_lock);
 2360                 new_stp->ls_openstp = stp;
 2361                 new_stp->ls_lfp = lfp;
 2362                 nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
 2363                     lfp);
 2364                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
 2365                     new_stp, ls_hash);
 2366                 LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
 2367                 *new_lopp = NULL;
 2368                 *new_stpp = NULL;
 2369                 nfsstatsv1.srvlockowners++;
 2370                 nfsrv_openpluslock++;
 2371         }
 2372         if (filestruct_locked != 0) {
 2373                 NFSUNLOCKSTATE();
 2374                 nfsrv_locallock_commit(lfp, lock_flags, first, end);
 2375                 NFSLOCKSTATE();
 2376                 nfsrv_unlocklf(lfp);
 2377         }
 2378         NFSUNLOCKSTATE();
 2379 
 2380 out:
 2381         if (haslock) {
 2382                 NFSLOCKV4ROOTMUTEX();
 2383                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2384                 NFSUNLOCKV4ROOTMUTEX();
 2385         }
 2386         if (vnode_unlocked != 0) {
 2387                 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2388                 if (error == 0 && VN_IS_DOOMED(vp))
 2389                         error = NFSERR_SERVERFAULT;
 2390         }
 2391         if (other_lop)
 2392                 free(other_lop, M_NFSDLOCK);
 2393         NFSEXITCODE2(error, nd);
 2394         return (error);
 2395 }
 2396 
 2397 /*
 2398  * Check for state errors for Open.
 2399  * repstat is passed back out as an error if more critical errors
 2400  * are not detected.
 2401  */
 2402 int
 2403 nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
 2404     struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
 2405     NFSPROC_T *p, int repstat)
 2406 {
 2407         struct nfsstate *stp, *nstp;
 2408         struct nfsclient *clp;
 2409         struct nfsstate *ownerstp;
 2410         struct nfslockfile *lfp, *new_lfp;
 2411         int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
 2412 
 2413         if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
 2414                 readonly = 1;
 2415         /*
 2416          * Check for restart conditions (client and server).
 2417          */
 2418         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 2419                 &new_stp->ls_stateid, 0);
 2420         if (error)
 2421                 goto out;
 2422 
 2423         /*
 2424          * Check for state resource limit exceeded.
 2425          * Technically this should be SMP protected, but the worst
 2426          * case error is "out by one or two" on the count when it
 2427          * returns NFSERR_RESOURCE and the limit is just a rather
 2428          * arbitrary high water mark, so no harm is done.
 2429          */
 2430         if (nfsrv_openpluslock > nfsrv_v4statelimit) {
 2431                 error = NFSERR_RESOURCE;
 2432                 goto out;
 2433         }
 2434 
 2435 tryagain:
 2436         new_lfp = malloc(sizeof (struct nfslockfile),
 2437             M_NFSDLOCKFILE, M_WAITOK);
 2438         if (vp)
 2439                 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
 2440                     NULL, p);
 2441         NFSLOCKSTATE();
 2442         /*
 2443          * Get the nfsclient structure.
 2444          */
 2445         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 2446             (nfsquad_t)((u_quad_t)0), 0, nd, p);
 2447 
 2448         /*
 2449          * Look up the open owner. See if it needs confirmation and
 2450          * check the seq#, as required.
 2451          */
 2452         if (!error)
 2453                 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
 2454 
 2455         if (!error && ownerstp) {
 2456                 error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
 2457                     new_stp->ls_op);
 2458                 /*
 2459                  * If the OpenOwner hasn't been confirmed, assume the
 2460                  * old one was a replay and this one is ok.
 2461                  * See: RFC3530 Sec. 14.2.18.
 2462                  */
 2463                 if (error == NFSERR_BADSEQID &&
 2464                     (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
 2465                         error = 0;
 2466         }
 2467 
 2468         /*
 2469          * Check for grace.
 2470          */
 2471         if (!error)
 2472                 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
 2473         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
 2474                 nfsrv_checkstable(clp))
 2475                 error = NFSERR_NOGRACE;
 2476 
 2477         /*
 2478          * If none of the above errors occurred, let repstat be
 2479          * returned.
 2480          */
 2481         if (repstat && !error)
 2482                 error = repstat;
 2483         if (error) {
 2484                 NFSUNLOCKSTATE();
 2485                 if (haslock) {
 2486                         NFSLOCKV4ROOTMUTEX();
 2487                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2488                         NFSUNLOCKV4ROOTMUTEX();
 2489                 }
 2490                 free(new_lfp, M_NFSDLOCKFILE);
 2491                 goto out;
 2492         }
 2493 
 2494         /*
 2495          * If vp == NULL, the file doesn't exist yet, so return ok.
 2496          * (This always happens on the first pass, so haslock must be 0.)
 2497          */
 2498         if (vp == NULL) {
 2499                 NFSUNLOCKSTATE();
 2500                 free(new_lfp, M_NFSDLOCKFILE);
 2501                 goto out;
 2502         }
 2503 
 2504         /*
 2505          * Get the structure for the underlying file.
 2506          */
 2507         if (getfhret)
 2508                 error = getfhret;
 2509         else
 2510                 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
 2511                     NULL, 0);
 2512         if (new_lfp)
 2513                 free(new_lfp, M_NFSDLOCKFILE);
 2514         if (error) {
 2515                 NFSUNLOCKSTATE();
 2516                 if (haslock) {
 2517                         NFSLOCKV4ROOTMUTEX();
 2518                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2519                         NFSUNLOCKV4ROOTMUTEX();
 2520                 }
 2521                 goto out;
 2522         }
 2523 
 2524         /*
 2525          * Search for a conflicting open/share.
 2526          */
 2527         if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
 2528             /*
 2529              * For Delegate_Cur, search for the matching Delegation,
 2530              * which indicates no conflict.
 2531              * An old delegation should have been recovered by the
 2532              * client doing a Claim_DELEGATE_Prev, so I won't let
 2533              * it match and return NFSERR_EXPIRED. Should I let it
 2534              * match?
 2535              */
 2536             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 2537                 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
 2538                     (((nd->nd_flag & ND_NFSV41) != 0 &&
 2539                     stateidp->seqid == 0) ||
 2540                     stateidp->seqid == stp->ls_stateid.seqid) &&
 2541                     !NFSBCMP(stateidp->other, stp->ls_stateid.other,
 2542                           NFSX_STATEIDOTHER))
 2543                         break;
 2544             }
 2545             if (stp == LIST_END(&lfp->lf_deleg) ||
 2546                 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
 2547                  (stp->ls_flags & NFSLCK_DELEGREAD))) {
 2548                 NFSUNLOCKSTATE();
 2549                 if (haslock) {
 2550                         NFSLOCKV4ROOTMUTEX();
 2551                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2552                         NFSUNLOCKV4ROOTMUTEX();
 2553                 }
 2554                 error = NFSERR_EXPIRED;
 2555                 goto out;
 2556             }
 2557         }
 2558 
 2559         /*
 2560          * Check for access/deny bit conflicts. I check for the same
 2561          * owner as well, in case the client didn't bother.
 2562          */
 2563         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 2564                 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
 2565                     (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
 2566                       ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
 2567                      ((stp->ls_flags & NFSLCK_ACCESSBITS) &
 2568                       ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
 2569                         ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
 2570                         if (ret == 1) {
 2571                                 /*
 2572                                  * nfsrv_clientconflict() unlocks
 2573                                  * state when it returns non-zero.
 2574                                  */
 2575                                 goto tryagain;
 2576                         }
 2577                         if (ret == 2)
 2578                                 error = NFSERR_PERM;
 2579                         else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2580                                 error = NFSERR_RECLAIMCONFLICT;
 2581                         else
 2582                                 error = NFSERR_SHAREDENIED;
 2583                         if (ret == 0)
 2584                                 NFSUNLOCKSTATE();
 2585                         if (haslock) {
 2586                                 NFSLOCKV4ROOTMUTEX();
 2587                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2588                                 NFSUNLOCKV4ROOTMUTEX();
 2589                         }
 2590                         goto out;
 2591                 }
 2592         }
 2593 
 2594         /*
 2595          * Check for a conflicting delegation. If one is found, call
 2596          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 2597          * been set yet, it will get the lock. Otherwise, it will recall
 2598          * the delegation. Then, we try try again...
 2599          * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
 2600          *  isn't a conflict.)
 2601          * I currently believe the conflict algorithm to be:
 2602          * For Open with Read Access and Deny None
 2603          * - there is a conflict iff a different client has a write delegation
 2604          * For Open with other Write Access or any Deny except None
 2605          * - there is a conflict if a different client has any delegation
 2606          * - there is a conflict if the same client has a read delegation
 2607          *   (The current consensus is that this last case should be
 2608          *    considered a conflict since the client with a read delegation
 2609          *    could have done an Open with ReadAccess and WriteDeny
 2610          *    locally and then not have checked for the WriteDeny.)
 2611          * Don't check for a Reclaim, since that will be dealt with
 2612          * by nfsrv_openctrl().
 2613          */
 2614         if (!(new_stp->ls_flags &
 2615                 (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
 2616             stp = LIST_FIRST(&lfp->lf_deleg);
 2617             while (stp != LIST_END(&lfp->lf_deleg)) {
 2618                 nstp = LIST_NEXT(stp, ls_file);
 2619                 if ((readonly && stp->ls_clp != clp &&
 2620                        (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
 2621                     (!readonly && (stp->ls_clp != clp ||
 2622                          (stp->ls_flags & NFSLCK_DELEGREAD)))) {
 2623                         ret = nfsrv_delegconflict(stp, &haslock, p, vp);
 2624                         if (ret) {
 2625                             /*
 2626                              * nfsrv_delegconflict() unlocks state
 2627                              * when it returns non-zero.
 2628                              */
 2629                             if (ret == -1)
 2630                                 goto tryagain;
 2631                             error = ret;
 2632                             goto out;
 2633                         }
 2634                 }
 2635                 stp = nstp;
 2636             }
 2637         }
 2638         NFSUNLOCKSTATE();
 2639         if (haslock) {
 2640                 NFSLOCKV4ROOTMUTEX();
 2641                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2642                 NFSUNLOCKV4ROOTMUTEX();
 2643         }
 2644 
 2645 out:
 2646         NFSEXITCODE2(error, nd);
 2647         return (error);
 2648 }
 2649 
 2650 /*
 2651  * Open control function to create/update open state for an open.
 2652  */
 2653 int
 2654 nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
 2655     struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
 2656     nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
 2657     NFSPROC_T *p, u_quad_t filerev)
 2658 {
 2659         struct nfsstate *new_stp = *new_stpp;
 2660         struct nfsstate *stp, *nstp;
 2661         struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
 2662         struct nfslockfile *lfp, *new_lfp;
 2663         struct nfsclient *clp;
 2664         int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
 2665         int readonly = 0, cbret = 1, getfhret = 0;
 2666         int gotstate = 0, len = 0;
 2667         u_char *clidp = NULL;
 2668 
 2669         if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
 2670                 readonly = 1;
 2671         /*
 2672          * Check for restart conditions (client and server).
 2673          * (Paranoia, should have been detected by nfsrv_opencheck().)
 2674          * If an error does show up, return NFSERR_EXPIRED, since the
 2675          * the seqid# has already been incremented.
 2676          */
 2677         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 2678             &new_stp->ls_stateid, 0);
 2679         if (error) {
 2680                 printf("Nfsd: openctrl unexpected restart err=%d\n",
 2681                     error);
 2682                 error = NFSERR_EXPIRED;
 2683                 goto out;
 2684         }
 2685 
 2686         clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
 2687 tryagain:
 2688         new_lfp = malloc(sizeof (struct nfslockfile),
 2689             M_NFSDLOCKFILE, M_WAITOK);
 2690         new_open = malloc(sizeof (struct nfsstate),
 2691             M_NFSDSTATE, M_WAITOK);
 2692         new_deleg = malloc(sizeof (struct nfsstate),
 2693             M_NFSDSTATE, M_WAITOK);
 2694         getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
 2695             NULL, p);
 2696         NFSLOCKSTATE();
 2697         /*
 2698          * Get the client structure. Since the linked lists could be changed
 2699          * by other nfsd processes if this process does a tsleep(), one of
 2700          * two things must be done.
 2701          * 1 - don't tsleep()
 2702          * or
 2703          * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
 2704          *     before using the lists, since this lock stops the other
 2705          *     nfsd. This should only be used for rare cases, since it
 2706          *     essentially single threads the nfsd.
 2707          *     At this time, it is only done for cases where the stable
 2708          *     storage file must be written prior to completion of state
 2709          *     expiration.
 2710          */
 2711         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 2712             (nfsquad_t)((u_quad_t)0), 0, nd, p);
 2713         if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
 2714             clp->lc_program) {
 2715                 /*
 2716                  * This happens on the first open for a client
 2717                  * that supports callbacks.
 2718                  */
 2719                 NFSUNLOCKSTATE();
 2720                 /*
 2721                  * Although nfsrv_docallback() will sleep, clp won't
 2722                  * go away, since they are only removed when the
 2723                  * nfsv4_lock() has blocked the nfsd threads. The
 2724                  * fields in clp can change, but having multiple
 2725                  * threads do this Null callback RPC should be
 2726                  * harmless.
 2727                  */
 2728                 cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
 2729                     NULL, 0, NULL, NULL, NULL, 0, p);
 2730                 NFSLOCKSTATE();
 2731                 clp->lc_flags &= ~LCL_NEEDSCBNULL;
 2732                 if (!cbret)
 2733                         clp->lc_flags |= LCL_CALLBACKSON;
 2734         }
 2735 
 2736         /*
 2737          * Look up the open owner. See if it needs confirmation and
 2738          * check the seq#, as required.
 2739          */
 2740         if (!error)
 2741                 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
 2742 
 2743         if (error) {
 2744                 NFSUNLOCKSTATE();
 2745                 printf("Nfsd: openctrl unexpected state err=%d\n",
 2746                         error);
 2747                 free(new_lfp, M_NFSDLOCKFILE);
 2748                 free(new_open, M_NFSDSTATE);
 2749                 free(new_deleg, M_NFSDSTATE);
 2750                 if (haslock) {
 2751                         NFSLOCKV4ROOTMUTEX();
 2752                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2753                         NFSUNLOCKV4ROOTMUTEX();
 2754                 }
 2755                 error = NFSERR_EXPIRED;
 2756                 goto out;
 2757         }
 2758 
 2759         if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2760                 nfsrv_markstable(clp);
 2761 
 2762         /*
 2763          * Get the structure for the underlying file.
 2764          */
 2765         if (getfhret)
 2766                 error = getfhret;
 2767         else
 2768                 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
 2769                     NULL, 0);
 2770         if (new_lfp)
 2771                 free(new_lfp, M_NFSDLOCKFILE);
 2772         if (error) {
 2773                 NFSUNLOCKSTATE();
 2774                 printf("Nfsd openctrl unexpected getlockfile err=%d\n",
 2775                     error);
 2776                 free(new_open, M_NFSDSTATE);
 2777                 free(new_deleg, M_NFSDSTATE);
 2778                 if (haslock) {
 2779                         NFSLOCKV4ROOTMUTEX();
 2780                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2781                         NFSUNLOCKV4ROOTMUTEX();
 2782                 }
 2783                 goto out;
 2784         }
 2785 
 2786         /*
 2787          * Search for a conflicting open/share.
 2788          */
 2789         if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
 2790             /*
 2791              * For Delegate_Cur, search for the matching Delegation,
 2792              * which indicates no conflict.
 2793              * An old delegation should have been recovered by the
 2794              * client doing a Claim_DELEGATE_Prev, so I won't let
 2795              * it match and return NFSERR_EXPIRED. Should I let it
 2796              * match?
 2797              */
 2798             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 2799                 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
 2800                     (((nd->nd_flag & ND_NFSV41) != 0 &&
 2801                     stateidp->seqid == 0) ||
 2802                     stateidp->seqid == stp->ls_stateid.seqid) &&
 2803                     !NFSBCMP(stateidp->other, stp->ls_stateid.other,
 2804                         NFSX_STATEIDOTHER))
 2805                         break;
 2806             }
 2807             if (stp == LIST_END(&lfp->lf_deleg) ||
 2808                 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
 2809                  (stp->ls_flags & NFSLCK_DELEGREAD))) {
 2810                 NFSUNLOCKSTATE();
 2811                 printf("Nfsd openctrl unexpected expiry\n");
 2812                 free(new_open, M_NFSDSTATE);
 2813                 free(new_deleg, M_NFSDSTATE);
 2814                 if (haslock) {
 2815                         NFSLOCKV4ROOTMUTEX();
 2816                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2817                         NFSUNLOCKV4ROOTMUTEX();
 2818                 }
 2819                 error = NFSERR_EXPIRED;
 2820                 goto out;
 2821             }
 2822 
 2823             /*
 2824              * Don't issue a Delegation, since one already exists and
 2825              * delay delegation timeout, as required.
 2826              */
 2827             delegate = 0;
 2828             nfsrv_delaydelegtimeout(stp);
 2829         }
 2830 
 2831         /*
 2832          * Check for access/deny bit conflicts. I also check for the
 2833          * same owner, since the client might not have bothered to check.
 2834          * Also, note an open for the same file and owner, if found,
 2835          * which is all we do here for Delegate_Cur, since conflict
 2836          * checking is already done.
 2837          */
 2838         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 2839                 if (ownerstp && stp->ls_openowner == ownerstp)
 2840                         openstp = stp;
 2841                 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
 2842                     /*
 2843                      * If another client has the file open, the only
 2844                      * delegation that can be issued is a Read delegation
 2845                      * and only if it is a Read open with Deny none.
 2846                      */
 2847                     if (clp != stp->ls_clp) {
 2848                         if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
 2849                             NFSLCK_READACCESS)
 2850                             writedeleg = 0;
 2851                         else
 2852                             delegate = 0;
 2853                     }
 2854                     if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
 2855                         ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
 2856                        ((stp->ls_flags & NFSLCK_ACCESSBITS) &
 2857                         ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
 2858                         ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
 2859                         if (ret == 1) {
 2860                                 /*
 2861                                  * nfsrv_clientconflict() unlocks state
 2862                                  * when it returns non-zero.
 2863                                  */
 2864                                 free(new_open, M_NFSDSTATE);
 2865                                 free(new_deleg, M_NFSDSTATE);
 2866                                 openstp = NULL;
 2867                                 goto tryagain;
 2868                         }
 2869                         if (ret == 2)
 2870                                 error = NFSERR_PERM;
 2871                         else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2872                                 error = NFSERR_RECLAIMCONFLICT;
 2873                         else
 2874                                 error = NFSERR_SHAREDENIED;
 2875                         if (ret == 0)
 2876                                 NFSUNLOCKSTATE();
 2877                         if (haslock) {
 2878                                 NFSLOCKV4ROOTMUTEX();
 2879                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2880                                 NFSUNLOCKV4ROOTMUTEX();
 2881                         }
 2882                         free(new_open, M_NFSDSTATE);
 2883                         free(new_deleg, M_NFSDSTATE);
 2884                         printf("nfsd openctrl unexpected client cnfl\n");
 2885                         goto out;
 2886                     }
 2887                 }
 2888         }
 2889 
 2890         /*
 2891          * Check for a conflicting delegation. If one is found, call
 2892          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 2893          * been set yet, it will get the lock. Otherwise, it will recall
 2894          * the delegation. Then, we try try again...
 2895          * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
 2896          *  isn't a conflict.)
 2897          * I currently believe the conflict algorithm to be:
 2898          * For Open with Read Access and Deny None
 2899          * - there is a conflict iff a different client has a write delegation
 2900          * For Open with other Write Access or any Deny except None
 2901          * - there is a conflict if a different client has any delegation
 2902          * - there is a conflict if the same client has a read delegation
 2903          *   (The current consensus is that this last case should be
 2904          *    considered a conflict since the client with a read delegation
 2905          *    could have done an Open with ReadAccess and WriteDeny
 2906          *    locally and then not have checked for the WriteDeny.)
 2907          */
 2908         if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
 2909             stp = LIST_FIRST(&lfp->lf_deleg);
 2910             while (stp != LIST_END(&lfp->lf_deleg)) {
 2911                 nstp = LIST_NEXT(stp, ls_file);
 2912                 if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
 2913                         writedeleg = 0;
 2914                 else
 2915                         delegate = 0;
 2916                 if ((readonly && stp->ls_clp != clp &&
 2917                        (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
 2918                     (!readonly && (stp->ls_clp != clp ||
 2919                          (stp->ls_flags & NFSLCK_DELEGREAD)))) {
 2920                     if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 2921                         delegate = 2;
 2922                     } else {
 2923                         ret = nfsrv_delegconflict(stp, &haslock, p, vp);
 2924                         if (ret) {
 2925                             /*
 2926                              * nfsrv_delegconflict() unlocks state
 2927                              * when it returns non-zero.
 2928                              */
 2929                             printf("Nfsd openctrl unexpected deleg cnfl\n");
 2930                             free(new_open, M_NFSDSTATE);
 2931                             free(new_deleg, M_NFSDSTATE);
 2932                             if (ret == -1) {
 2933                                 openstp = NULL;
 2934                                 goto tryagain;
 2935                             }
 2936                             error = ret;
 2937                             goto out;
 2938                         }
 2939                     }
 2940                 }
 2941                 stp = nstp;
 2942             }
 2943         }
 2944 
 2945         /*
 2946          * We only get here if there was no open that conflicted.
 2947          * If an open for the owner exists, or in the access/deny bits.
 2948          * Otherwise it is a new open. If the open_owner hasn't been
 2949          * confirmed, replace the open with the new one needing confirmation,
 2950          * otherwise add the open.
 2951          */
 2952         if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
 2953             /*
 2954              * Handle NFSLCK_DELEGPREV by searching the old delegations for
 2955              * a match. If found, just move the old delegation to the current
 2956              * delegation list and issue open. If not found, return
 2957              * NFSERR_EXPIRED.
 2958              */
 2959             LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
 2960                 if (stp->ls_lfp == lfp) {
 2961                     /* Found it */
 2962                     if (stp->ls_clp != clp)
 2963                         panic("olddeleg clp");
 2964                     LIST_REMOVE(stp, ls_list);
 2965                     LIST_REMOVE(stp, ls_hash);
 2966                     stp->ls_flags &= ~NFSLCK_OLDDELEG;
 2967                     stp->ls_stateid.seqid = delegstateidp->seqid = 1;
 2968                     stp->ls_stateid.other[0] = delegstateidp->other[0] =
 2969                         clp->lc_clientid.lval[0];
 2970                     stp->ls_stateid.other[1] = delegstateidp->other[1] =
 2971                         clp->lc_clientid.lval[1];
 2972                     stp->ls_stateid.other[2] = delegstateidp->other[2] =
 2973                         nfsrv_nextstateindex(clp);
 2974                     stp->ls_compref = nd->nd_compref;
 2975                     LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
 2976                     LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 2977                         stp->ls_stateid), stp, ls_hash);
 2978                     if (stp->ls_flags & NFSLCK_DELEGWRITE)
 2979                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 2980                     else
 2981                         *rflagsp |= NFSV4OPEN_READDELEGATE;
 2982                     clp->lc_delegtime = NFSD_MONOSEC +
 2983                         nfsrv_lease + NFSRV_LEASEDELTA;
 2984 
 2985                     /*
 2986                      * Now, do the associated open.
 2987                      */
 2988                     new_open->ls_stateid.seqid = 1;
 2989                     new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 2990                     new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 2991                     new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 2992                     new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
 2993                         NFSLCK_OPEN;
 2994                     if (stp->ls_flags & NFSLCK_DELEGWRITE)
 2995                         new_open->ls_flags |= (NFSLCK_READACCESS |
 2996                             NFSLCK_WRITEACCESS);
 2997                     else
 2998                         new_open->ls_flags |= NFSLCK_READACCESS;
 2999                     new_open->ls_uid = new_stp->ls_uid;
 3000                     new_open->ls_lfp = lfp;
 3001                     new_open->ls_clp = clp;
 3002                     LIST_INIT(&new_open->ls_open);
 3003                     LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3004                     LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3005                         new_open, ls_hash);
 3006                     /*
 3007                      * and handle the open owner
 3008                      */
 3009                     if (ownerstp) {
 3010                         new_open->ls_openowner = ownerstp;
 3011                         LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
 3012                     } else {
 3013                         new_open->ls_openowner = new_stp;
 3014                         new_stp->ls_flags = 0;
 3015                         nfsrvd_refcache(new_stp->ls_op);
 3016                         new_stp->ls_noopens = 0;
 3017                         LIST_INIT(&new_stp->ls_open);
 3018                         LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 3019                         LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 3020                         *new_stpp = NULL;
 3021                         nfsstatsv1.srvopenowners++;
 3022                         nfsrv_openpluslock++;
 3023                     }
 3024                     openstp = new_open;
 3025                     new_open = NULL;
 3026                     nfsstatsv1.srvopens++;
 3027                     nfsrv_openpluslock++;
 3028                     break;
 3029                 }
 3030             }
 3031             if (stp == LIST_END(&clp->lc_olddeleg))
 3032                 error = NFSERR_EXPIRED;
 3033         } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
 3034             /*
 3035              * Scan to see that no delegation for this client and file
 3036              * doesn't already exist.
 3037              * There also shouldn't yet be an Open for this file and
 3038              * openowner.
 3039              */
 3040             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 3041                 if (stp->ls_clp == clp)
 3042                     break;
 3043             }
 3044             if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
 3045                 /*
 3046                  * This is the Claim_Previous case with a delegation
 3047                  * type != Delegate_None.
 3048                  */
 3049                 /*
 3050                  * First, add the delegation. (Although we must issue the
 3051                  * delegation, we can also ask for an immediate return.)
 3052                  */
 3053                 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 3054                 new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
 3055                     clp->lc_clientid.lval[0];
 3056                 new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
 3057                     clp->lc_clientid.lval[1];
 3058                 new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
 3059                     nfsrv_nextstateindex(clp);
 3060                 if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
 3061                     new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 3062                         NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 3063                     *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3064                     nfsrv_writedelegcnt++;
 3065                 } else {
 3066                     new_deleg->ls_flags = (NFSLCK_DELEGREAD |
 3067                         NFSLCK_READACCESS);
 3068                     *rflagsp |= NFSV4OPEN_READDELEGATE;
 3069                 }
 3070                 new_deleg->ls_uid = new_stp->ls_uid;
 3071                 new_deleg->ls_lfp = lfp;
 3072                 new_deleg->ls_clp = clp;
 3073                 new_deleg->ls_filerev = filerev;
 3074                 new_deleg->ls_compref = nd->nd_compref;
 3075                 new_deleg->ls_lastrecall = 0;
 3076                 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 3077                 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3078                     new_deleg->ls_stateid), new_deleg, ls_hash);
 3079                 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 3080                 new_deleg = NULL;
 3081                 if (delegate == 2 || nfsrv_issuedelegs == 0 ||
 3082                     (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 3083                      LCL_CALLBACKSON ||
 3084                     NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
 3085                     !NFSVNO_DELEGOK(vp))
 3086                     *rflagsp |= NFSV4OPEN_RECALL;
 3087                 nfsstatsv1.srvdelegates++;
 3088                 nfsrv_openpluslock++;
 3089                 nfsrv_delegatecnt++;
 3090 
 3091                 /*
 3092                  * Now, do the associated open.
 3093                  */
 3094                 new_open->ls_stateid.seqid = 1;
 3095                 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 3096                 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 3097                 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 3098                 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
 3099                     NFSLCK_OPEN;
 3100                 if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
 3101                         new_open->ls_flags |= (NFSLCK_READACCESS |
 3102                             NFSLCK_WRITEACCESS);
 3103                 else
 3104                         new_open->ls_flags |= NFSLCK_READACCESS;
 3105                 new_open->ls_uid = new_stp->ls_uid;
 3106                 new_open->ls_lfp = lfp;
 3107                 new_open->ls_clp = clp;
 3108                 LIST_INIT(&new_open->ls_open);
 3109                 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3110                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3111                    new_open, ls_hash);
 3112                 /*
 3113                  * and handle the open owner
 3114                  */
 3115                 if (ownerstp) {
 3116                     new_open->ls_openowner = ownerstp;
 3117                     LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
 3118                 } else {
 3119                     new_open->ls_openowner = new_stp;
 3120                     new_stp->ls_flags = 0;
 3121                     nfsrvd_refcache(new_stp->ls_op);
 3122                     new_stp->ls_noopens = 0;
 3123                     LIST_INIT(&new_stp->ls_open);
 3124                     LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 3125                     LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 3126                     *new_stpp = NULL;
 3127                     nfsstatsv1.srvopenowners++;
 3128                     nfsrv_openpluslock++;
 3129                 }
 3130                 openstp = new_open;
 3131                 new_open = NULL;
 3132                 nfsstatsv1.srvopens++;
 3133                 nfsrv_openpluslock++;
 3134             } else {
 3135                 error = NFSERR_RECLAIMCONFLICT;
 3136             }
 3137         } else if (ownerstp) {
 3138                 if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
 3139                     /* Replace the open */
 3140                     if (ownerstp->ls_op)
 3141                         nfsrvd_derefcache(ownerstp->ls_op);
 3142                     ownerstp->ls_op = new_stp->ls_op;
 3143                     nfsrvd_refcache(ownerstp->ls_op);
 3144                     ownerstp->ls_seq = new_stp->ls_seq;
 3145                     *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
 3146                     stp = LIST_FIRST(&ownerstp->ls_open);
 3147                     stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
 3148                         NFSLCK_OPEN;
 3149                     stp->ls_stateid.seqid = 1;
 3150                     stp->ls_uid = new_stp->ls_uid;
 3151                     if (lfp != stp->ls_lfp) {
 3152                         LIST_REMOVE(stp, ls_file);
 3153                         LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
 3154                         stp->ls_lfp = lfp;
 3155                     }
 3156                     openstp = stp;
 3157                 } else if (openstp) {
 3158                     openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
 3159                     openstp->ls_stateid.seqid++;
 3160                     if ((nd->nd_flag & ND_NFSV41) != 0 &&
 3161                         openstp->ls_stateid.seqid == 0)
 3162                         openstp->ls_stateid.seqid = 1;
 3163 
 3164                     /*
 3165                      * This is where we can choose to issue a delegation.
 3166                      */
 3167                     if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
 3168                         *rflagsp |= NFSV4OPEN_WDNOTWANTED;
 3169                     else if (nfsrv_issuedelegs == 0)
 3170                         *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
 3171                     else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
 3172                         *rflagsp |= NFSV4OPEN_WDRESOURCE;
 3173                     else if (delegate == 0 || writedeleg == 0 ||
 3174                         NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
 3175                         nfsrv_writedelegifpos == 0) ||
 3176                         !NFSVNO_DELEGOK(vp) ||
 3177                         (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
 3178                         (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 3179                          LCL_CALLBACKSON)
 3180                         *rflagsp |= NFSV4OPEN_WDCONTENTION;
 3181                     else {
 3182                         new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 3183                         new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
 3184                             = clp->lc_clientid.lval[0];
 3185                         new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
 3186                             = clp->lc_clientid.lval[1];
 3187                         new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
 3188                             = nfsrv_nextstateindex(clp);
 3189                         new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 3190                             NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 3191                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3192                         new_deleg->ls_uid = new_stp->ls_uid;
 3193                         new_deleg->ls_lfp = lfp;
 3194                         new_deleg->ls_clp = clp;
 3195                         new_deleg->ls_filerev = filerev;
 3196                         new_deleg->ls_compref = nd->nd_compref;
 3197                         new_deleg->ls_lastrecall = 0;
 3198                         nfsrv_writedelegcnt++;
 3199                         LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 3200                         LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3201                             new_deleg->ls_stateid), new_deleg, ls_hash);
 3202                         LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 3203                         new_deleg = NULL;
 3204                         nfsstatsv1.srvdelegates++;
 3205                         nfsrv_openpluslock++;
 3206                         nfsrv_delegatecnt++;
 3207                     }
 3208                 } else {
 3209                     new_open->ls_stateid.seqid = 1;
 3210                     new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 3211                     new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 3212                     new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 3213                     new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
 3214                         NFSLCK_OPEN;
 3215                     new_open->ls_uid = new_stp->ls_uid;
 3216                     new_open->ls_openowner = ownerstp;
 3217                     new_open->ls_lfp = lfp;
 3218                     new_open->ls_clp = clp;
 3219                     LIST_INIT(&new_open->ls_open);
 3220                     LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3221                     LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
 3222                     LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3223                         new_open, ls_hash);
 3224                     openstp = new_open;
 3225                     new_open = NULL;
 3226                     nfsstatsv1.srvopens++;
 3227                     nfsrv_openpluslock++;
 3228 
 3229                     /*
 3230                      * This is where we can choose to issue a delegation.
 3231                      */
 3232                     if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
 3233                         *rflagsp |= NFSV4OPEN_WDNOTWANTED;
 3234                     else if (nfsrv_issuedelegs == 0)
 3235                         *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
 3236                     else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
 3237                         *rflagsp |= NFSV4OPEN_WDRESOURCE;
 3238                     else if (delegate == 0 || (writedeleg == 0 &&
 3239                         readonly == 0) || !NFSVNO_DELEGOK(vp) ||
 3240                         (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 3241                          LCL_CALLBACKSON)
 3242                         *rflagsp |= NFSV4OPEN_WDCONTENTION;
 3243                     else {
 3244                         new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 3245                         new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
 3246                             = clp->lc_clientid.lval[0];
 3247                         new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
 3248                             = clp->lc_clientid.lval[1];
 3249                         new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
 3250                             = nfsrv_nextstateindex(clp);
 3251                         if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
 3252                             (nfsrv_writedelegifpos || !readonly) &&
 3253                             (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
 3254                             new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 3255                                 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 3256                             *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3257                             nfsrv_writedelegcnt++;
 3258                         } else {
 3259                             new_deleg->ls_flags = (NFSLCK_DELEGREAD |
 3260                                 NFSLCK_READACCESS);
 3261                             *rflagsp |= NFSV4OPEN_READDELEGATE;
 3262                         }
 3263                         new_deleg->ls_uid = new_stp->ls_uid;
 3264                         new_deleg->ls_lfp = lfp;
 3265                         new_deleg->ls_clp = clp;
 3266                         new_deleg->ls_filerev = filerev;
 3267                         new_deleg->ls_compref = nd->nd_compref;
 3268                         new_deleg->ls_lastrecall = 0;
 3269                         LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 3270                         LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3271                             new_deleg->ls_stateid), new_deleg, ls_hash);
 3272                         LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 3273                         new_deleg = NULL;
 3274                         nfsstatsv1.srvdelegates++;
 3275                         nfsrv_openpluslock++;
 3276                         nfsrv_delegatecnt++;
 3277                     }
 3278                 }
 3279         } else {
 3280                 /*
 3281                  * New owner case. Start the open_owner sequence with a
 3282                  * Needs confirmation (unless a reclaim) and hang the
 3283                  * new open off it.
 3284                  */
 3285                 new_open->ls_stateid.seqid = 1;
 3286                 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 3287                 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 3288                 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 3289                 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
 3290                     NFSLCK_OPEN;
 3291                 new_open->ls_uid = new_stp->ls_uid;
 3292                 LIST_INIT(&new_open->ls_open);
 3293                 new_open->ls_openowner = new_stp;
 3294                 new_open->ls_lfp = lfp;
 3295                 new_open->ls_clp = clp;
 3296                 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3297                 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 3298                         new_stp->ls_flags = 0;
 3299                 } else if ((nd->nd_flag & ND_NFSV41) != 0) {
 3300                         /* NFSv4.1 never needs confirmation. */
 3301                         new_stp->ls_flags = 0;
 3302 
 3303                         /*
 3304                          * This is where we can choose to issue a delegation.
 3305                          */
 3306                         if (delegate && nfsrv_issuedelegs &&
 3307                             (writedeleg || readonly) &&
 3308                             (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
 3309                              LCL_CALLBACKSON &&
 3310                             !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
 3311                             NFSVNO_DELEGOK(vp) &&
 3312                             ((nd->nd_flag & ND_NFSV41) == 0 ||
 3313                              (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
 3314                                 new_deleg->ls_stateid.seqid =
 3315                                     delegstateidp->seqid = 1;
 3316                                 new_deleg->ls_stateid.other[0] =
 3317                                     delegstateidp->other[0]
 3318                                     = clp->lc_clientid.lval[0];
 3319                                 new_deleg->ls_stateid.other[1] =
 3320                                     delegstateidp->other[1]
 3321                                     = clp->lc_clientid.lval[1];
 3322                                 new_deleg->ls_stateid.other[2] =
 3323                                     delegstateidp->other[2]
 3324                                     = nfsrv_nextstateindex(clp);
 3325                                 if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
 3326                                     (nfsrv_writedelegifpos || !readonly) &&
 3327                                     ((nd->nd_flag & ND_NFSV41) == 0 ||
 3328                                      (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
 3329                                      0)) {
 3330                                         new_deleg->ls_flags =
 3331                                             (NFSLCK_DELEGWRITE |
 3332                                              NFSLCK_READACCESS |
 3333                                              NFSLCK_WRITEACCESS);
 3334                                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3335                                         nfsrv_writedelegcnt++;
 3336                                 } else {
 3337                                         new_deleg->ls_flags =
 3338                                             (NFSLCK_DELEGREAD |
 3339                                              NFSLCK_READACCESS);
 3340                                         *rflagsp |= NFSV4OPEN_READDELEGATE;
 3341                                 }
 3342                                 new_deleg->ls_uid = new_stp->ls_uid;
 3343                                 new_deleg->ls_lfp = lfp;
 3344                                 new_deleg->ls_clp = clp;
 3345                                 new_deleg->ls_filerev = filerev;
 3346                                 new_deleg->ls_compref = nd->nd_compref;
 3347                                 new_deleg->ls_lastrecall = 0;
 3348                                 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
 3349                                     ls_file);
 3350                                 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3351                                     new_deleg->ls_stateid), new_deleg, ls_hash);
 3352                                 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
 3353                                     ls_list);
 3354                                 new_deleg = NULL;
 3355                                 nfsstatsv1.srvdelegates++;
 3356                                 nfsrv_openpluslock++;
 3357                                 nfsrv_delegatecnt++;
 3358                         }
 3359                         /*
 3360                          * Since NFSv4.1 never does an OpenConfirm, the first
 3361                          * open state will be acquired here.
 3362                          */
 3363                         if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
 3364                                 clp->lc_flags |= LCL_STAMPEDSTABLE;
 3365                                 len = clp->lc_idlen;
 3366                                 NFSBCOPY(clp->lc_id, clidp, len);
 3367                                 gotstate = 1;
 3368                         }
 3369                 } else {
 3370                         *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
 3371                         new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
 3372                 }
 3373                 nfsrvd_refcache(new_stp->ls_op);
 3374                 new_stp->ls_noopens = 0;
 3375                 LIST_INIT(&new_stp->ls_open);
 3376                 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 3377                 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 3378                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3379                     new_open, ls_hash);
 3380                 openstp = new_open;
 3381                 new_open = NULL;
 3382                 *new_stpp = NULL;
 3383                 nfsstatsv1.srvopens++;
 3384                 nfsrv_openpluslock++;
 3385                 nfsstatsv1.srvopenowners++;
 3386                 nfsrv_openpluslock++;
 3387         }
 3388         if (!error) {
 3389                 stateidp->seqid = openstp->ls_stateid.seqid;
 3390                 stateidp->other[0] = openstp->ls_stateid.other[0];
 3391                 stateidp->other[1] = openstp->ls_stateid.other[1];
 3392                 stateidp->other[2] = openstp->ls_stateid.other[2];
 3393         }
 3394         NFSUNLOCKSTATE();
 3395         if (haslock) {
 3396                 NFSLOCKV4ROOTMUTEX();
 3397                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 3398                 NFSUNLOCKV4ROOTMUTEX();
 3399         }
 3400         if (new_open)
 3401                 free(new_open, M_NFSDSTATE);
 3402         if (new_deleg)
 3403                 free(new_deleg, M_NFSDSTATE);
 3404 
 3405         /*
 3406          * If the NFSv4.1 client just acquired its first open, write a timestamp
 3407          * to the stable storage file.
 3408          */
 3409         if (gotstate != 0) {
 3410                 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
 3411                 nfsrv_backupstable();
 3412         }
 3413 
 3414 out:
 3415         free(clidp, M_TEMP);
 3416         NFSEXITCODE2(error, nd);
 3417         return (error);
 3418 }
 3419 
 3420 /*
 3421  * Open update. Does the confirm, downgrade and close.
 3422  */
 3423 int
 3424 nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
 3425     nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p,
 3426     int *retwriteaccessp)
 3427 {
 3428         struct nfsstate *stp;
 3429         struct nfsclient *clp;
 3430         struct nfslockfile *lfp;
 3431         u_int32_t bits;
 3432         int error = 0, gotstate = 0, len = 0;
 3433         u_char *clidp = NULL;
 3434 
 3435         /*
 3436          * Check for restart conditions (client and server).
 3437          */
 3438         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 3439             &new_stp->ls_stateid, 0);
 3440         if (error)
 3441                 goto out;
 3442 
 3443         clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
 3444         NFSLOCKSTATE();
 3445         /*
 3446          * Get the open structure via clientid and stateid.
 3447          */
 3448         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 3449             (nfsquad_t)((u_quad_t)0), 0, nd, p);
 3450         if (!error)
 3451                 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
 3452                     new_stp->ls_flags, &stp);
 3453 
 3454         /*
 3455          * Sanity check the open.
 3456          */
 3457         if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
 3458                 (!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
 3459                  (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
 3460                 ((new_stp->ls_flags & NFSLCK_CONFIRM) &&
 3461                  (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
 3462                 error = NFSERR_BADSTATEID;
 3463 
 3464         if (!error)
 3465                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 3466                     stp->ls_openowner, new_stp->ls_op);
 3467         if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
 3468             (((nd->nd_flag & ND_NFSV41) == 0 &&
 3469               !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
 3470              ((nd->nd_flag & ND_NFSV41) != 0 &&
 3471               new_stp->ls_stateid.seqid != 0)))
 3472                 error = NFSERR_OLDSTATEID;
 3473         if (!error && vnode_vtype(vp) != VREG) {
 3474                 if (vnode_vtype(vp) == VDIR)
 3475                         error = NFSERR_ISDIR;
 3476                 else
 3477                         error = NFSERR_INVAL;
 3478         }
 3479 
 3480         if (error) {
 3481                 /*
 3482                  * If a client tries to confirm an Open with a bad
 3483                  * seqid# and there are no byte range locks or other Opens
 3484                  * on the openowner, just throw it away, so the next use of the
 3485                  * openowner will start a fresh seq#.
 3486                  */
 3487                 if (error == NFSERR_BADSEQID &&
 3488                     (new_stp->ls_flags & NFSLCK_CONFIRM) &&
 3489                     nfsrv_nootherstate(stp))
 3490                         nfsrv_freeopenowner(stp->ls_openowner, 0, p);
 3491                 NFSUNLOCKSTATE();
 3492                 goto out;
 3493         }
 3494 
 3495         /*
 3496          * Set the return stateid.
 3497          */
 3498         stateidp->seqid = stp->ls_stateid.seqid + 1;
 3499         if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 3500                 stateidp->seqid = 1;
 3501         stateidp->other[0] = stp->ls_stateid.other[0];
 3502         stateidp->other[1] = stp->ls_stateid.other[1];
 3503         stateidp->other[2] = stp->ls_stateid.other[2];
 3504         /*
 3505          * Now, handle the three cases.
 3506          */
 3507         if (new_stp->ls_flags & NFSLCK_CONFIRM) {
 3508                 /*
 3509                  * If the open doesn't need confirmation, it seems to me that
 3510                  * there is a client error, but I'll just log it and keep going?
 3511                  */
 3512                 if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
 3513                         printf("Nfsv4d: stray open confirm\n");
 3514                 stp->ls_openowner->ls_flags = 0;
 3515                 stp->ls_stateid.seqid++;
 3516                 if ((nd->nd_flag & ND_NFSV41) != 0 &&
 3517                     stp->ls_stateid.seqid == 0)
 3518                         stp->ls_stateid.seqid = 1;
 3519                 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
 3520                         clp->lc_flags |= LCL_STAMPEDSTABLE;
 3521                         len = clp->lc_idlen;
 3522                         NFSBCOPY(clp->lc_id, clidp, len);
 3523                         gotstate = 1;
 3524                 }
 3525                 NFSUNLOCKSTATE();
 3526         } else if (new_stp->ls_flags & NFSLCK_CLOSE) {
 3527                 lfp = stp->ls_lfp;
 3528                 if (retwriteaccessp != NULL) {
 3529                         if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0)
 3530                                 *retwriteaccessp = 1;
 3531                         else
 3532                                 *retwriteaccessp = 0;
 3533                 }
 3534                 if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
 3535                         /* Get the lf lock */
 3536                         nfsrv_locklf(lfp);
 3537                         NFSUNLOCKSTATE();
 3538                         ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
 3539                         NFSVOPUNLOCK(vp);
 3540                         if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
 3541                                 NFSLOCKSTATE();
 3542                                 nfsrv_unlocklf(lfp);
 3543                                 NFSUNLOCKSTATE();
 3544                         }
 3545                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 3546                 } else {
 3547                         (void) nfsrv_freeopen(stp, NULL, 0, p);
 3548                         NFSUNLOCKSTATE();
 3549                 }
 3550         } else {
 3551                 /*
 3552                  * Update the share bits, making sure that the new set are a
 3553                  * subset of the old ones.
 3554                  */
 3555                 bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
 3556                 if (~(stp->ls_flags) & bits) {
 3557                         NFSUNLOCKSTATE();
 3558                         error = NFSERR_INVAL;
 3559                         goto out;
 3560                 }
 3561                 stp->ls_flags = (bits | NFSLCK_OPEN);
 3562                 stp->ls_stateid.seqid++;
 3563                 if ((nd->nd_flag & ND_NFSV41) != 0 &&
 3564                     stp->ls_stateid.seqid == 0)
 3565                         stp->ls_stateid.seqid = 1;
 3566                 NFSUNLOCKSTATE();
 3567         }
 3568 
 3569         /*
 3570          * If the client just confirmed its first open, write a timestamp
 3571          * to the stable storage file.
 3572          */
 3573         if (gotstate != 0) {
 3574                 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
 3575                 nfsrv_backupstable();
 3576         }
 3577 
 3578 out:
 3579         free(clidp, M_TEMP);
 3580         NFSEXITCODE2(error, nd);
 3581         return (error);
 3582 }
 3583 
 3584 /*
 3585  * Delegation update. Does the purge and return.
 3586  */
 3587 int
 3588 nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
 3589     nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
 3590     NFSPROC_T *p, int *retwriteaccessp)
 3591 {
 3592         struct nfsstate *stp;
 3593         struct nfsclient *clp;
 3594         int error = 0;
 3595         fhandle_t fh;
 3596 
 3597         /*
 3598          * Do a sanity check against the file handle for DelegReturn.
 3599          */
 3600         if (vp) {
 3601                 error = nfsvno_getfh(vp, &fh, p);
 3602                 if (error)
 3603                         goto out;
 3604         }
 3605         /*
 3606          * Check for restart conditions (client and server).
 3607          */
 3608         if (op == NFSV4OP_DELEGRETURN)
 3609                 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
 3610                         stateidp, 0);
 3611         else
 3612                 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
 3613                         stateidp, 0);
 3614 
 3615         NFSLOCKSTATE();
 3616         /*
 3617          * Get the open structure via clientid and stateid.
 3618          */
 3619         if (!error)
 3620             error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 3621                 (nfsquad_t)((u_quad_t)0), 0, nd, p);
 3622         if (error) {
 3623                 if (error == NFSERR_CBPATHDOWN)
 3624                         error = 0;
 3625                 if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
 3626                         error = NFSERR_STALESTATEID;
 3627         }
 3628         if (!error && op == NFSV4OP_DELEGRETURN) {
 3629             error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
 3630             if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
 3631                 ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
 3632                 error = NFSERR_OLDSTATEID;
 3633         }
 3634         /*
 3635          * NFSERR_EXPIRED means that the state has gone away,
 3636          * so Delegations have been purged. Just return ok.
 3637          */
 3638         if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
 3639                 NFSUNLOCKSTATE();
 3640                 error = 0;
 3641                 goto out;
 3642         }
 3643         if (error) {
 3644                 NFSUNLOCKSTATE();
 3645                 goto out;
 3646         }
 3647 
 3648         if (op == NFSV4OP_DELEGRETURN) {
 3649                 if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
 3650                     sizeof (fhandle_t))) {
 3651                         NFSUNLOCKSTATE();
 3652                         error = NFSERR_BADSTATEID;
 3653                         goto out;
 3654                 }
 3655                 if (retwriteaccessp != NULL) {
 3656                         if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
 3657                                 *retwriteaccessp = 1;
 3658                         else
 3659                                 *retwriteaccessp = 0;
 3660                 }
 3661                 nfsrv_freedeleg(stp);
 3662         } else {
 3663                 nfsrv_freedeleglist(&clp->lc_olddeleg);
 3664         }
 3665         NFSUNLOCKSTATE();
 3666         error = 0;
 3667 
 3668 out:
 3669         NFSEXITCODE(error);
 3670         return (error);
 3671 }
 3672 
 3673 /*
 3674  * Release lock owner.
 3675  */
 3676 int
 3677 nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
 3678     NFSPROC_T *p)
 3679 {
 3680         struct nfsstate *stp, *nstp, *openstp, *ownstp;
 3681         struct nfsclient *clp;
 3682         int error = 0;
 3683 
 3684         /*
 3685          * Check for restart conditions (client and server).
 3686          */
 3687         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 3688             &new_stp->ls_stateid, 0);
 3689         if (error)
 3690                 goto out;
 3691 
 3692         NFSLOCKSTATE();
 3693         /*
 3694          * Get the lock owner by name.
 3695          */
 3696         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 3697             (nfsquad_t)((u_quad_t)0), 0, NULL, p);
 3698         if (error) {
 3699                 NFSUNLOCKSTATE();
 3700                 goto out;
 3701         }
 3702         LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
 3703             LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
 3704                 stp = LIST_FIRST(&openstp->ls_open);
 3705                 while (stp != LIST_END(&openstp->ls_open)) {
 3706                     nstp = LIST_NEXT(stp, ls_list);
 3707                     /*
 3708                      * If the owner matches, check for locks and
 3709                      * then free or return an error.
 3710                      */
 3711                     if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
 3712                         !NFSBCMP(stp->ls_owner, new_stp->ls_owner,
 3713                          stp->ls_ownerlen)){
 3714                         if (LIST_EMPTY(&stp->ls_lock)) {
 3715                             nfsrv_freelockowner(stp, NULL, 0, p);
 3716                         } else {
 3717                             NFSUNLOCKSTATE();
 3718                             error = NFSERR_LOCKSHELD;
 3719                             goto out;
 3720                         }
 3721                     }
 3722                     stp = nstp;
 3723                 }
 3724             }
 3725         }
 3726         NFSUNLOCKSTATE();
 3727 
 3728 out:
 3729         NFSEXITCODE(error);
 3730         return (error);
 3731 }
 3732 
 3733 /*
 3734  * Get the file handle for a lock structure.
 3735  */
 3736 static int
 3737 nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
 3738     fhandle_t *nfhp, NFSPROC_T *p)
 3739 {
 3740         fhandle_t *fhp = NULL;
 3741         int error;
 3742 
 3743         /*
 3744          * For lock, use the new nfslock structure, otherwise just
 3745          * a fhandle_t on the stack.
 3746          */
 3747         if (flags & NFSLCK_OPEN) {
 3748                 KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
 3749                 fhp = &new_lfp->lf_fh;
 3750         } else if (nfhp) {
 3751                 fhp = nfhp;
 3752         } else {
 3753                 panic("nfsrv_getlockfh");
 3754         }
 3755         error = nfsvno_getfh(vp, fhp, p);
 3756         NFSEXITCODE(error);
 3757         return (error);
 3758 }
 3759 
 3760 /*
 3761  * Get an nfs lock structure. Allocate one, as required, and return a
 3762  * pointer to it.
 3763  * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
 3764  */
 3765 static int
 3766 nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
 3767     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
 3768 {
 3769         struct nfslockfile *lfp;
 3770         fhandle_t *fhp = NULL, *tfhp;
 3771         struct nfslockhashhead *hp;
 3772         struct nfslockfile *new_lfp = NULL;
 3773 
 3774         /*
 3775          * For lock, use the new nfslock structure, otherwise just
 3776          * a fhandle_t on the stack.
 3777          */
 3778         if (flags & NFSLCK_OPEN) {
 3779                 new_lfp = *new_lfpp;
 3780                 fhp = &new_lfp->lf_fh;
 3781         } else if (nfhp) {
 3782                 fhp = nfhp;
 3783         } else {
 3784                 panic("nfsrv_getlockfile");
 3785         }
 3786 
 3787         hp = NFSLOCKHASH(fhp);
 3788         LIST_FOREACH(lfp, hp, lf_hash) {
 3789                 tfhp = &lfp->lf_fh;
 3790                 if (NFSVNO_CMPFH(fhp, tfhp)) {
 3791                         if (lockit)
 3792                                 nfsrv_locklf(lfp);
 3793                         *lfpp = lfp;
 3794                         return (0);
 3795                 }
 3796         }
 3797         if (!(flags & NFSLCK_OPEN))
 3798                 return (-1);
 3799 
 3800         /*
 3801          * No match, so chain the new one into the list.
 3802          */
 3803         LIST_INIT(&new_lfp->lf_open);
 3804         LIST_INIT(&new_lfp->lf_lock);
 3805         LIST_INIT(&new_lfp->lf_deleg);
 3806         LIST_INIT(&new_lfp->lf_locallock);
 3807         LIST_INIT(&new_lfp->lf_rollback);
 3808         new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
 3809         new_lfp->lf_locallock_lck.nfslock_lock = 0;
 3810         new_lfp->lf_usecount = 0;
 3811         LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
 3812         *lfpp = new_lfp;
 3813         *new_lfpp = NULL;
 3814         return (0);
 3815 }
 3816 
 3817 /*
 3818  * This function adds a nfslock lock structure to the list for the associated
 3819  * nfsstate and nfslockfile structures. It will be inserted after the
 3820  * entry pointed at by insert_lop.
 3821  */
 3822 static void
 3823 nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
 3824     struct nfsstate *stp, struct nfslockfile *lfp)
 3825 {
 3826         struct nfslock *lop, *nlop;
 3827 
 3828         new_lop->lo_stp = stp;
 3829         new_lop->lo_lfp = lfp;
 3830 
 3831         if (stp != NULL) {
 3832                 /* Insert in increasing lo_first order */
 3833                 lop = LIST_FIRST(&lfp->lf_lock);
 3834                 if (lop == LIST_END(&lfp->lf_lock) ||
 3835                     new_lop->lo_first <= lop->lo_first) {
 3836                         LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
 3837                 } else {
 3838                         nlop = LIST_NEXT(lop, lo_lckfile);
 3839                         while (nlop != LIST_END(&lfp->lf_lock) &&
 3840                                nlop->lo_first < new_lop->lo_first) {
 3841                                 lop = nlop;
 3842                                 nlop = LIST_NEXT(lop, lo_lckfile);
 3843                         }
 3844                         LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
 3845                 }
 3846         } else {
 3847                 new_lop->lo_lckfile.le_prev = NULL;     /* list not used */
 3848         }
 3849 
 3850         /*
 3851          * Insert after insert_lop, which is overloaded as stp or lfp for
 3852          * an empty list.
 3853          */
 3854         if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
 3855                 LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
 3856         else if ((struct nfsstate *)insert_lop == stp)
 3857                 LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
 3858         else
 3859                 LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
 3860         if (stp != NULL) {
 3861                 nfsstatsv1.srvlocks++;
 3862                 nfsrv_openpluslock++;
 3863         }
 3864 }
 3865 
 3866 /*
 3867  * This function updates the locking for a lock owner and given file. It
 3868  * maintains a list of lock ranges ordered on increasing file offset that
 3869  * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
 3870  * It always adds new_lop to the list and sometimes uses the one pointed
 3871  * at by other_lopp.
 3872  */
 3873 static void
 3874 nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
 3875     struct nfslock **other_lopp, struct nfslockfile *lfp)
 3876 {
 3877         struct nfslock *new_lop = *new_lopp;
 3878         struct nfslock *lop, *tlop, *ilop;
 3879         struct nfslock *other_lop = *other_lopp;
 3880         int unlock = 0, myfile = 0;
 3881         u_int64_t tmp;
 3882 
 3883         /*
 3884          * Work down the list until the lock is merged.
 3885          */
 3886         if (new_lop->lo_flags & NFSLCK_UNLOCK)
 3887                 unlock = 1;
 3888         if (stp != NULL) {
 3889                 ilop = (struct nfslock *)stp;
 3890                 lop = LIST_FIRST(&stp->ls_lock);
 3891         } else {
 3892                 ilop = (struct nfslock *)lfp;
 3893                 lop = LIST_FIRST(&lfp->lf_locallock);
 3894         }
 3895         while (lop != NULL) {
 3896             /*
 3897              * Only check locks for this file that aren't before the start of
 3898              * new lock's range.
 3899              */
 3900             if (lop->lo_lfp == lfp) {
 3901               myfile = 1;
 3902               if (lop->lo_end >= new_lop->lo_first) {
 3903                 if (new_lop->lo_end < lop->lo_first) {
 3904                         /*
 3905                          * If the new lock ends before the start of the
 3906                          * current lock's range, no merge, just insert
 3907                          * the new lock.
 3908                          */
 3909                         break;
 3910                 }
 3911                 if (new_lop->lo_flags == lop->lo_flags ||
 3912                     (new_lop->lo_first <= lop->lo_first &&
 3913                      new_lop->lo_end >= lop->lo_end)) {
 3914                         /*
 3915                          * This lock can be absorbed by the new lock/unlock.
 3916                          * This happens when it covers the entire range
 3917                          * of the old lock or is contiguous
 3918                          * with the old lock and is of the same type or an
 3919                          * unlock.
 3920                          */
 3921                         if (lop->lo_first < new_lop->lo_first)
 3922                                 new_lop->lo_first = lop->lo_first;
 3923                         if (lop->lo_end > new_lop->lo_end)
 3924                                 new_lop->lo_end = lop->lo_end;
 3925                         tlop = lop;
 3926                         lop = LIST_NEXT(lop, lo_lckowner);
 3927                         nfsrv_freenfslock(tlop);
 3928                         continue;
 3929                 }
 3930 
 3931                 /*
 3932                  * All these cases are for contiguous locks that are not the
 3933                  * same type, so they can't be merged.
 3934                  */
 3935                 if (new_lop->lo_first <= lop->lo_first) {
 3936                         /*
 3937                          * This case is where the new lock overlaps with the
 3938                          * first part of the old lock. Move the start of the
 3939                          * old lock to just past the end of the new lock. The
 3940                          * new lock will be inserted in front of the old, since
 3941                          * ilop hasn't been updated. (We are done now.)
 3942                          */
 3943                         lop->lo_first = new_lop->lo_end;
 3944                         break;
 3945                 }
 3946                 if (new_lop->lo_end >= lop->lo_end) {
 3947                         /*
 3948                          * This case is where the new lock overlaps with the
 3949                          * end of the old lock's range. Move the old lock's
 3950                          * end to just before the new lock's first and insert
 3951                          * the new lock after the old lock.
 3952                          * Might not be done yet, since the new lock could
 3953                          * overlap further locks with higher ranges.
 3954                          */
 3955                         lop->lo_end = new_lop->lo_first;
 3956                         ilop = lop;
 3957                         lop = LIST_NEXT(lop, lo_lckowner);
 3958                         continue;
 3959                 }
 3960                 /*
 3961                  * The final case is where the new lock's range is in the
 3962                  * middle of the current lock's and splits the current lock
 3963                  * up. Use *other_lopp to handle the second part of the
 3964                  * split old lock range. (We are done now.)
 3965                  * For unlock, we use new_lop as other_lop and tmp, since
 3966                  * other_lop and new_lop are the same for this case.
 3967                  * We noted the unlock case above, so we don't need
 3968                  * new_lop->lo_flags any longer.
 3969                  */
 3970                 tmp = new_lop->lo_first;
 3971                 if (other_lop == NULL) {
 3972                         if (!unlock)
 3973                                 panic("nfsd srv update unlock");
 3974                         other_lop = new_lop;
 3975                         *new_lopp = NULL;
 3976                 }
 3977                 other_lop->lo_first = new_lop->lo_end;
 3978                 other_lop->lo_end = lop->lo_end;
 3979                 other_lop->lo_flags = lop->lo_flags;
 3980                 other_lop->lo_stp = stp;
 3981                 other_lop->lo_lfp = lfp;
 3982                 lop->lo_end = tmp;
 3983                 nfsrv_insertlock(other_lop, lop, stp, lfp);
 3984                 *other_lopp = NULL;
 3985                 ilop = lop;
 3986                 break;
 3987               }
 3988             }
 3989             ilop = lop;
 3990             lop = LIST_NEXT(lop, lo_lckowner);
 3991             if (myfile && (lop == NULL || lop->lo_lfp != lfp))
 3992                 break;
 3993         }
 3994 
 3995         /*
 3996          * Insert the new lock in the list at the appropriate place.
 3997          */
 3998         if (!unlock) {
 3999                 nfsrv_insertlock(new_lop, ilop, stp, lfp);
 4000                 *new_lopp = NULL;
 4001         }
 4002 }
 4003 
 4004 /*
 4005  * This function handles sequencing of locks, etc.
 4006  * It returns an error that indicates what the caller should do.
 4007  */
 4008 static int
 4009 nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
 4010     struct nfsstate *stp, struct nfsrvcache *op)
 4011 {
 4012         int error = 0;
 4013 
 4014         if ((nd->nd_flag & ND_NFSV41) != 0)
 4015                 /* NFSv4.1 ignores the open_seqid and lock_seqid. */
 4016                 goto out;
 4017         if (op != nd->nd_rp)
 4018                 panic("nfsrvstate checkseqid");
 4019         if (!(op->rc_flag & RC_INPROG))
 4020                 panic("nfsrvstate not inprog");
 4021         if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
 4022                 printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
 4023                 panic("nfsrvstate op refcnt");
 4024         }
 4025 
 4026         /* If ND_ERELOOKUP is set, the seqid has already been handled. */
 4027         if ((nd->nd_flag & ND_ERELOOKUP) != 0)
 4028                 goto out;
 4029 
 4030         if ((stp->ls_seq + 1) == seqid) {
 4031                 if (stp->ls_op)
 4032                         nfsrvd_derefcache(stp->ls_op);
 4033                 stp->ls_op = op;
 4034                 nfsrvd_refcache(op);
 4035                 stp->ls_seq = seqid;
 4036                 goto out;
 4037         } else if (stp->ls_seq == seqid && stp->ls_op &&
 4038                 op->rc_xid == stp->ls_op->rc_xid &&
 4039                 op->rc_refcnt == 0 &&
 4040                 op->rc_reqlen == stp->ls_op->rc_reqlen &&
 4041                 op->rc_cksum == stp->ls_op->rc_cksum) {
 4042                 if (stp->ls_op->rc_flag & RC_INPROG) {
 4043                         error = NFSERR_DONTREPLY;
 4044                         goto out;
 4045                 }
 4046                 nd->nd_rp = stp->ls_op;
 4047                 nd->nd_rp->rc_flag |= RC_INPROG;
 4048                 nfsrvd_delcache(op);
 4049                 error = NFSERR_REPLYFROMCACHE;
 4050                 goto out;
 4051         }
 4052         error = NFSERR_BADSEQID;
 4053 
 4054 out:
 4055         NFSEXITCODE2(error, nd);
 4056         return (error);
 4057 }
 4058 
 4059 /*
 4060  * Get the client ip address for callbacks. If the strings can't be parsed,
 4061  * just set lc_program to 0 to indicate no callbacks are possible.
 4062  * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
 4063  *  the address to the client's transport address. This won't be used
 4064  *  for callbacks, but can be printed out by nfsstats for info.)
 4065  * Return error if the xdr can't be parsed, 0 otherwise.
 4066  */
 4067 int
 4068 nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
 4069 {
 4070         u_int32_t *tl;
 4071         u_char *cp, *cp2;
 4072         int i, j, maxalen = 0, minalen = 0;
 4073         sa_family_t af;
 4074 #ifdef INET
 4075         struct sockaddr_in *rin = NULL, *sin;
 4076 #endif
 4077 #ifdef INET6
 4078         struct sockaddr_in6 *rin6 = NULL, *sin6;
 4079 #endif
 4080         u_char *addr;
 4081         int error = 0, cantparse = 0;
 4082         union {
 4083                 in_addr_t ival;
 4084                 u_char cval[4];
 4085         } ip;
 4086         union {
 4087                 in_port_t sval;
 4088                 u_char cval[2];
 4089         } port;
 4090 
 4091         /* 8 is the maximum length of the port# string. */
 4092         addr = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK);
 4093         clp->lc_req.nr_client = NULL;
 4094         clp->lc_req.nr_lock = 0;
 4095         af = AF_UNSPEC;
 4096         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 4097         i = fxdr_unsigned(int, *tl);
 4098         if (i >= 3 && i <= 4) {
 4099                 error = nfsrv_mtostr(nd, addr, i);
 4100                 if (error)
 4101                         goto nfsmout;
 4102 #ifdef INET
 4103                 if (!strcmp(addr, "tcp")) {
 4104                         clp->lc_flags |= LCL_TCPCALLBACK;
 4105                         clp->lc_req.nr_sotype = SOCK_STREAM;
 4106                         clp->lc_req.nr_soproto = IPPROTO_TCP;
 4107                         af = AF_INET;
 4108                 } else if (!strcmp(addr, "udp")) {
 4109                         clp->lc_req.nr_sotype = SOCK_DGRAM;
 4110                         clp->lc_req.nr_soproto = IPPROTO_UDP;
 4111                         af = AF_INET;
 4112                 }
 4113 #endif
 4114 #ifdef INET6
 4115                 if (af == AF_UNSPEC) {
 4116                         if (!strcmp(addr, "tcp6")) {
 4117                                 clp->lc_flags |= LCL_TCPCALLBACK;
 4118                                 clp->lc_req.nr_sotype = SOCK_STREAM;
 4119                                 clp->lc_req.nr_soproto = IPPROTO_TCP;
 4120                                 af = AF_INET6;
 4121                         } else if (!strcmp(addr, "udp6")) {
 4122                                 clp->lc_req.nr_sotype = SOCK_DGRAM;
 4123                                 clp->lc_req.nr_soproto = IPPROTO_UDP;
 4124                                 af = AF_INET6;
 4125                         }
 4126                 }
 4127 #endif
 4128                 if (af == AF_UNSPEC) {
 4129                         cantparse = 1;
 4130                 }
 4131         } else {
 4132                 cantparse = 1;
 4133                 if (i > 0) {
 4134                         error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 4135                         if (error)
 4136                                 goto nfsmout;
 4137                 }
 4138         }
 4139         /*
 4140          * The caller has allocated clp->lc_req.nr_nam to be large enough
 4141          * for either AF_INET or AF_INET6 and zeroed out the contents.
 4142          * maxalen is set to the maximum length of the host IP address string
 4143          * plus 8 for the maximum length of the port#.
 4144          * minalen is set to the minimum length of the host IP address string
 4145          * plus 4 for the minimum length of the port#.
 4146          * These lengths do not include NULL termination,
 4147          * so INET[6]_ADDRSTRLEN - 1 is used in the calculations.
 4148          */
 4149         switch (af) {
 4150 #ifdef INET
 4151         case AF_INET:
 4152                 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 4153                 rin->sin_family = AF_INET;
 4154                 rin->sin_len = sizeof(struct sockaddr_in);
 4155                 maxalen = INET_ADDRSTRLEN - 1 + 8;
 4156                 minalen = 7 + 4;
 4157                 break;
 4158 #endif
 4159 #ifdef INET6
 4160         case AF_INET6:
 4161                 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 4162                 rin6->sin6_family = AF_INET6;
 4163                 rin6->sin6_len = sizeof(struct sockaddr_in6);
 4164                 maxalen = INET6_ADDRSTRLEN - 1 + 8;
 4165                 minalen = 3 + 4;
 4166                 break;
 4167 #endif
 4168         }
 4169         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 4170         i = fxdr_unsigned(int, *tl);
 4171         if (i < 0) {
 4172                 error = NFSERR_BADXDR;
 4173                 goto nfsmout;
 4174         } else if (i == 0) {
 4175                 cantparse = 1;
 4176         } else if (!cantparse && i <= maxalen && i >= minalen) {
 4177                 error = nfsrv_mtostr(nd, addr, i);
 4178                 if (error)
 4179                         goto nfsmout;
 4180 
 4181                 /*
 4182                  * Parse out the address fields. We expect 6 decimal numbers
 4183                  * separated by '.'s for AF_INET and two decimal numbers
 4184                  * preceeded by '.'s for AF_INET6.
 4185                  */
 4186                 cp = NULL;
 4187                 switch (af) {
 4188 #ifdef INET6
 4189                 /*
 4190                  * For AF_INET6, first parse the host address.
 4191                  */
 4192                 case AF_INET6:
 4193                         cp = strchr(addr, '.');
 4194                         if (cp != NULL) {
 4195                                 *cp++ = '\0';
 4196                                 if (inet_pton(af, addr, &rin6->sin6_addr) == 1)
 4197                                         i = 4;
 4198                                 else {
 4199                                         cp = NULL;
 4200                                         cantparse = 1;
 4201                                 }
 4202                         }
 4203                         break;
 4204 #endif
 4205 #ifdef INET
 4206                 case AF_INET:
 4207                         cp = addr;
 4208                         i = 0;
 4209                         break;
 4210 #endif
 4211                 }
 4212                 while (cp != NULL && *cp && i < 6) {
 4213                         cp2 = cp;
 4214                         while (*cp2 && *cp2 != '.')
 4215                                 cp2++;
 4216                         if (*cp2)
 4217                                 *cp2++ = '\0';
 4218                         else if (i != 5) {
 4219                                 cantparse = 1;
 4220                                 break;
 4221                         }
 4222                         j = nfsrv_getipnumber(cp);
 4223                         if (j >= 0) {
 4224                                 if (i < 4)
 4225                                         ip.cval[3 - i] = j;
 4226                                 else
 4227                                         port.cval[5 - i] = j;
 4228                         } else {
 4229                                 cantparse = 1;
 4230                                 break;
 4231                         }
 4232                         cp = cp2;
 4233                         i++;
 4234                 }
 4235                 if (!cantparse) {
 4236                         /*
 4237                          * The host address INADDR_ANY is (mis)used to indicate
 4238                          * "there is no valid callback address".
 4239                          */
 4240                         switch (af) {
 4241 #ifdef INET6
 4242                         case AF_INET6:
 4243                                 if (!IN6_ARE_ADDR_EQUAL(&rin6->sin6_addr,
 4244                                     &in6addr_any))
 4245                                         rin6->sin6_port = htons(port.sval);
 4246                                 else
 4247                                         cantparse = 1;
 4248                                 break;
 4249 #endif
 4250 #ifdef INET
 4251                         case AF_INET:
 4252                                 if (ip.ival != INADDR_ANY) {
 4253                                         rin->sin_addr.s_addr = htonl(ip.ival);
 4254                                         rin->sin_port = htons(port.sval);
 4255                                 } else {
 4256                                         cantparse = 1;
 4257                                 }
 4258                                 break;
 4259 #endif
 4260                         }
 4261                 }
 4262         } else {
 4263                 cantparse = 1;
 4264                 if (i > 0) {
 4265                         error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 4266                         if (error)
 4267                                 goto nfsmout;
 4268                 }
 4269         }
 4270         if (cantparse) {
 4271                 switch (nd->nd_nam->sa_family) {
 4272 #ifdef INET
 4273                 case AF_INET:
 4274                         sin = (struct sockaddr_in *)nd->nd_nam;
 4275                         rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 4276                         rin->sin_family = AF_INET;
 4277                         rin->sin_len = sizeof(struct sockaddr_in);
 4278                         rin->sin_addr.s_addr = sin->sin_addr.s_addr;
 4279                         rin->sin_port = 0x0;
 4280                         break;
 4281 #endif
 4282 #ifdef INET6
 4283                 case AF_INET6:
 4284                         sin6 = (struct sockaddr_in6 *)nd->nd_nam;
 4285                         rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 4286                         rin6->sin6_family = AF_INET6;
 4287                         rin6->sin6_len = sizeof(struct sockaddr_in6);
 4288                         rin6->sin6_addr = sin6->sin6_addr;
 4289                         rin6->sin6_port = 0x0;
 4290                         break;
 4291 #endif
 4292                 }
 4293                 clp->lc_program = 0;
 4294         }
 4295 nfsmout:
 4296         free(addr, M_TEMP);
 4297         NFSEXITCODE2(error, nd);
 4298         return (error);
 4299 }
 4300 
 4301 /*
 4302  * Turn a string of up to three decimal digits into a number. Return -1 upon
 4303  * error.
 4304  */
 4305 static int
 4306 nfsrv_getipnumber(u_char *cp)
 4307 {
 4308         int i = 0, j = 0;
 4309 
 4310         while (*cp) {
 4311                 if (j > 2 || *cp < '' || *cp > '9')
 4312                         return (-1);
 4313                 i *= 10;
 4314                 i += (*cp - '');
 4315                 cp++;
 4316                 j++;
 4317         }
 4318         if (i < 256)
 4319                 return (i);
 4320         return (-1);
 4321 }
 4322 
 4323 /*
 4324  * This function checks for restart conditions.
 4325  */
 4326 static int
 4327 nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
 4328     nfsv4stateid_t *stateidp, int specialid)
 4329 {
 4330         int ret = 0;
 4331 
 4332         /*
 4333          * First check for a server restart. Open, LockT, ReleaseLockOwner
 4334          * and DelegPurge have a clientid, the rest a stateid.
 4335          */
 4336         if (flags &
 4337             (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
 4338                 if (clientid.lval[0] != nfsrvboottime) {
 4339                         ret = NFSERR_STALECLIENTID;
 4340                         goto out;
 4341                 }
 4342         } else if (stateidp->other[0] != nfsrvboottime &&
 4343                 specialid == 0) {
 4344                 ret = NFSERR_STALESTATEID;
 4345                 goto out;
 4346         }
 4347 
 4348         /*
 4349          * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
 4350          * not use a lock/open owner seqid#, so the check can be done now.
 4351          * (The others will be checked, as required, later.)
 4352          */
 4353         if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
 4354                 goto out;
 4355 
 4356         NFSLOCKSTATE();
 4357         ret = nfsrv_checkgrace(NULL, NULL, flags);
 4358         NFSUNLOCKSTATE();
 4359 
 4360 out:
 4361         NFSEXITCODE(ret);
 4362         return (ret);
 4363 }
 4364 
 4365 /*
 4366  * Check for grace.
 4367  */
 4368 static int
 4369 nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
 4370     u_int32_t flags)
 4371 {
 4372         int error = 0, notreclaimed;
 4373         struct nfsrv_stable *sp;
 4374 
 4375         if ((nfsrv_stablefirst.nsf_flags & (NFSNSF_UPDATEDONE |
 4376              NFSNSF_GRACEOVER)) == 0) {
 4377                 /*
 4378                  * First, check to see if all of the clients have done a
 4379                  * ReclaimComplete.  If so, grace can end now.
 4380                  */
 4381                 notreclaimed = 0;
 4382                 LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 4383                         if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
 4384                                 notreclaimed = 1;
 4385                                 break;
 4386                         }
 4387                 }
 4388                 if (notreclaimed == 0)
 4389                         nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER |
 4390                             NFSNSF_NEEDLOCK);
 4391         }
 4392 
 4393         if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
 4394                 if (flags & NFSLCK_RECLAIM) {
 4395                         error = NFSERR_NOGRACE;
 4396                         goto out;
 4397                 }
 4398         } else {
 4399                 if (!(flags & NFSLCK_RECLAIM)) {
 4400                         error = NFSERR_GRACE;
 4401                         goto out;
 4402                 }
 4403                 if (nd != NULL && clp != NULL &&
 4404                     (nd->nd_flag & ND_NFSV41) != 0 &&
 4405                     (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
 4406                         error = NFSERR_NOGRACE;
 4407                         goto out;
 4408                 }
 4409 
 4410                 /*
 4411                  * If grace is almost over and we are still getting Reclaims,
 4412                  * extend grace a bit.
 4413                  */
 4414                 if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
 4415                     nfsrv_stablefirst.nsf_eograce)
 4416                         nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
 4417                                 NFSRV_LEASEDELTA;
 4418         }
 4419 
 4420 out:
 4421         NFSEXITCODE(error);
 4422         return (error);
 4423 }
 4424 
 4425 /*
 4426  * Do a server callback.
 4427  * The "trunc" argument is slightly overloaded and refers to different
 4428  * boolean arguments for CBRECALL and CBLAYOUTRECALL.
 4429  */
 4430 static int
 4431 nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
 4432     int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp,
 4433     int laytype, NFSPROC_T *p)
 4434 {
 4435         struct mbuf *m;
 4436         u_int32_t *tl;
 4437         struct nfsrv_descript *nd;
 4438         struct ucred *cred;
 4439         int error = 0, slotpos;
 4440         u_int32_t callback;
 4441         struct nfsdsession *sep = NULL;
 4442         uint64_t tval;
 4443         bool dotls;
 4444 
 4445         nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 4446         cred = newnfs_getcred();
 4447         NFSLOCKSTATE(); /* mostly for lc_cbref++ */
 4448         if (clp->lc_flags & LCL_NEEDSCONFIRM) {
 4449                 NFSUNLOCKSTATE();
 4450                 panic("docallb");
 4451         }
 4452         clp->lc_cbref++;
 4453 
 4454         /*
 4455          * Fill the callback program# and version into the request
 4456          * structure for newnfs_connect() to use.
 4457          */
 4458         clp->lc_req.nr_prog = clp->lc_program;
 4459 #ifdef notnow
 4460         if ((clp->lc_flags & LCL_NFSV41) != 0)
 4461                 clp->lc_req.nr_vers = NFSV41_CBVERS;
 4462         else
 4463 #endif
 4464                 clp->lc_req.nr_vers = NFSV4_CBVERS;
 4465 
 4466         /*
 4467          * First, fill in some of the fields of nd and cr.
 4468          */
 4469         nd->nd_flag = ND_NFSV4;
 4470         if (clp->lc_flags & LCL_GSS)
 4471                 nd->nd_flag |= ND_KERBV;
 4472         if ((clp->lc_flags & LCL_NFSV41) != 0)
 4473                 nd->nd_flag |= ND_NFSV41;
 4474         if ((clp->lc_flags & LCL_NFSV42) != 0)
 4475                 nd->nd_flag |= ND_NFSV42;
 4476         nd->nd_repstat = 0;
 4477         cred->cr_uid = clp->lc_uid;
 4478         cred->cr_gid = clp->lc_gid;
 4479         callback = clp->lc_callback;
 4480         NFSUNLOCKSTATE();
 4481         cred->cr_ngroups = 1;
 4482 
 4483         /*
 4484          * Get the first mbuf for the request.
 4485          */
 4486         MGET(m, M_WAITOK, MT_DATA);
 4487         m->m_len = 0;
 4488         nd->nd_mreq = nd->nd_mb = m;
 4489         nd->nd_bpos = mtod(m, caddr_t);
 4490 
 4491         /*
 4492          * and build the callback request.
 4493          */
 4494         if (procnum == NFSV4OP_CBGETATTR) {
 4495                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 4496                 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
 4497                     "CB Getattr", &sep, &slotpos);
 4498                 if (error != 0) {
 4499                         m_freem(nd->nd_mreq);
 4500                         goto errout;
 4501                 }
 4502                 (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
 4503                 (void)nfsrv_putattrbit(nd, attrbitp);
 4504         } else if (procnum == NFSV4OP_CBRECALL) {
 4505                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 4506                 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
 4507                     "CB Recall", &sep, &slotpos);
 4508                 if (error != 0) {
 4509                         m_freem(nd->nd_mreq);
 4510                         goto errout;
 4511                 }
 4512                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 4513                 *tl++ = txdr_unsigned(stateidp->seqid);
 4514                 NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
 4515                     NFSX_STATEIDOTHER);
 4516                 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 4517                 if (trunc)
 4518                         *tl = newnfs_true;
 4519                 else
 4520                         *tl = newnfs_false;
 4521                 (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
 4522         } else if (procnum == NFSV4OP_CBLAYOUTRECALL) {
 4523                 NFSD_DEBUG(4, "docallback layout recall\n");
 4524                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 4525                 error = nfsrv_cbcallargs(nd, clp, callback,
 4526                     NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep, &slotpos);
 4527                 NFSD_DEBUG(4, "aft cbcallargs=%d\n", error);
 4528                 if (error != 0) {
 4529                         m_freem(nd->nd_mreq);
 4530                         goto errout;
 4531                 }
 4532                 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 4533                 *tl++ = txdr_unsigned(laytype);
 4534                 *tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY);
 4535                 if (trunc)
 4536                         *tl++ = newnfs_true;
 4537                 else
 4538                         *tl++ = newnfs_false;
 4539                 *tl = txdr_unsigned(NFSV4LAYOUTRET_FILE);
 4540                 nfsm_fhtom(nd, (uint8_t *)fhp, NFSX_MYFH, 0);
 4541                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID);
 4542                 tval = 0;
 4543                 txdr_hyper(tval, tl); tl += 2;
 4544                 tval = UINT64_MAX;
 4545                 txdr_hyper(tval, tl); tl += 2;
 4546                 *tl++ = txdr_unsigned(stateidp->seqid);
 4547                 NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER);
 4548                 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 4549                 NFSD_DEBUG(4, "aft args\n");
 4550         } else if (procnum == NFSV4PROC_CBNULL) {
 4551                 nd->nd_procnum = NFSV4PROC_CBNULL;
 4552                 if ((clp->lc_flags & LCL_NFSV41) != 0) {
 4553                         error = nfsv4_getcbsession(clp, &sep);
 4554                         if (error != 0) {
 4555                                 m_freem(nd->nd_mreq);
 4556                                 goto errout;
 4557                         }
 4558                 }
 4559         } else {
 4560                 error = NFSERR_SERVERFAULT;
 4561                 m_freem(nd->nd_mreq);
 4562                 goto errout;
 4563         }
 4564 
 4565         /*
 4566          * Call newnfs_connect(), as required, and then newnfs_request().
 4567          */
 4568         dotls = false;
 4569         if ((clp->lc_flags & LCL_TLSCB) != 0)
 4570                 dotls = true;
 4571         (void) newnfs_sndlock(&clp->lc_req.nr_lock);
 4572         if (clp->lc_req.nr_client == NULL) {
 4573                 if ((clp->lc_flags & LCL_NFSV41) != 0) {
 4574                         error = ECONNREFUSED;
 4575                         if (procnum != NFSV4PROC_CBNULL)
 4576                                 nfsv4_freeslot(&sep->sess_cbsess, slotpos,
 4577                                     true);
 4578                         nfsrv_freesession(sep, NULL);
 4579                 } else if (nd->nd_procnum == NFSV4PROC_CBNULL)
 4580                         error = newnfs_connect(NULL, &clp->lc_req, cred,
 4581                             NULL, 1, dotls, &clp->lc_req.nr_client);
 4582                 else
 4583                         error = newnfs_connect(NULL, &clp->lc_req, cred,
 4584                             NULL, 3, dotls, &clp->lc_req.nr_client);
 4585         }
 4586         newnfs_sndunlock(&clp->lc_req.nr_lock);
 4587         NFSD_DEBUG(4, "aft sndunlock=%d\n", error);
 4588         if (!error) {
 4589                 if ((nd->nd_flag & ND_NFSV41) != 0) {
 4590                         KASSERT(sep != NULL, ("sep NULL"));
 4591                         if (sep->sess_cbsess.nfsess_xprt != NULL)
 4592                                 error = newnfs_request(nd, NULL, clp,
 4593                                     &clp->lc_req, NULL, NULL, cred,
 4594                                     clp->lc_program, clp->lc_req.nr_vers, NULL,
 4595                                     1, NULL, &sep->sess_cbsess);
 4596                         else {
 4597                                 /*
 4598                                  * This should probably never occur, but if a
 4599                                  * client somehow does an RPC without a
 4600                                  * SequenceID Op that causes a callback just
 4601                                  * after the nfsd threads have been terminated
 4602                                  * and restared we could conceivably get here
 4603                                  * without a backchannel xprt.
 4604                                  */
 4605                                 printf("nfsrv_docallback: no xprt\n");
 4606                                 error = ECONNREFUSED;
 4607                         }
 4608                         NFSD_DEBUG(4, "aft newnfs_request=%d\n", error);
 4609                         if (error != 0 && procnum != NFSV4PROC_CBNULL) {
 4610                                 /*
 4611                                  * It is likely that the callback was never
 4612                                  * processed by the client and, as such,
 4613                                  * the sequence# for the session slot needs
 4614                                  * to be backed up by one to avoid a
 4615                                  * NFSERR_SEQMISORDERED error reply.
 4616                                  * For the unlikely case where the callback
 4617                                  * was processed by the client, this will
 4618                                  * make the next callback on the slot
 4619                                  * appear to be a retry.
 4620                                  * Since callbacks never specify that the
 4621                                  * reply be cached, this "apparent retry"
 4622                                  * should not be a problem.
 4623                                  */
 4624                                 nfsv4_freeslot(&sep->sess_cbsess, slotpos,
 4625                                     true);
 4626                         }
 4627                         nfsrv_freesession(sep, NULL);
 4628                 } else
 4629                         error = newnfs_request(nd, NULL, clp, &clp->lc_req,
 4630                             NULL, NULL, cred, clp->lc_program,
 4631                             clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
 4632         }
 4633 errout:
 4634         NFSFREECRED(cred);
 4635 
 4636         /*
 4637          * If error is set here, the Callback path isn't working
 4638          * properly, so twiddle the appropriate LCL_ flags.
 4639          * (nd_repstat != 0 indicates the Callback path is working,
 4640          *  but the callback failed on the client.)
 4641          */
 4642         if (error) {
 4643                 /*
 4644                  * Mark the callback pathway down, which disabled issuing
 4645                  * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
 4646                  */
 4647                 NFSLOCKSTATE();
 4648                 clp->lc_flags |= LCL_CBDOWN;
 4649                 NFSUNLOCKSTATE();
 4650         } else {
 4651                 /*
 4652                  * Callback worked. If the callback path was down, disable
 4653                  * callbacks, so no more delegations will be issued. (This
 4654                  * is done on the assumption that the callback pathway is
 4655                  * flakey.)
 4656                  */
 4657                 NFSLOCKSTATE();
 4658                 if (clp->lc_flags & LCL_CBDOWN)
 4659                         clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
 4660                 NFSUNLOCKSTATE();
 4661                 if (nd->nd_repstat) {
 4662                         error = nd->nd_repstat;
 4663                         NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n",
 4664                             procnum, error);
 4665                 } else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
 4666                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 4667                             NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
 4668                             p, NULL);
 4669                 m_freem(nd->nd_mrep);
 4670         }
 4671         NFSLOCKSTATE();
 4672         clp->lc_cbref--;
 4673         if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
 4674                 clp->lc_flags &= ~LCL_WAKEUPWANTED;
 4675                 wakeup(clp);
 4676         }
 4677         NFSUNLOCKSTATE();
 4678 
 4679         free(nd, M_TEMP);
 4680         NFSEXITCODE(error);
 4681         return (error);
 4682 }
 4683 
 4684 /*
 4685  * Set up the compound RPC for the callback.
 4686  */
 4687 static int
 4688 nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
 4689     uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
 4690     int *slotposp)
 4691 {
 4692         uint32_t *tl;
 4693         int error, len;
 4694 
 4695         len = strlen(optag);
 4696         (void)nfsm_strtom(nd, optag, len);
 4697         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 4698         if ((nd->nd_flag & ND_NFSV41) != 0) {
 4699                 if ((nd->nd_flag & ND_NFSV42) != 0)
 4700                         *tl++ = txdr_unsigned(NFSV42_MINORVERSION);
 4701                 else
 4702                         *tl++ = txdr_unsigned(NFSV41_MINORVERSION);
 4703                 *tl++ = txdr_unsigned(callback);
 4704                 *tl++ = txdr_unsigned(2);
 4705                 *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
 4706                 error = nfsv4_setcbsequence(nd, clp, 1, sepp, slotposp);
 4707                 if (error != 0)
 4708                         return (error);
 4709                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 4710                 *tl = txdr_unsigned(op);
 4711         } else {
 4712                 *tl++ = txdr_unsigned(NFSV4_MINORVERSION);
 4713                 *tl++ = txdr_unsigned(callback);
 4714                 *tl++ = txdr_unsigned(1);
 4715                 *tl = txdr_unsigned(op);
 4716         }
 4717         return (0);
 4718 }
 4719 
 4720 /*
 4721  * Return the next index# for a clientid. Mostly just increment and return
 4722  * the next one, but... if the 32bit unsigned does actually wrap around,
 4723  * it should be rebooted.
 4724  * At an average rate of one new client per second, it will wrap around in
 4725  * approximately 136 years. (I think the server will have been shut
 4726  * down or rebooted before then.)
 4727  */
 4728 static u_int32_t
 4729 nfsrv_nextclientindex(void)
 4730 {
 4731         static u_int32_t client_index = 0;
 4732 
 4733         client_index++;
 4734         if (client_index != 0)
 4735                 return (client_index);
 4736 
 4737         printf("%s: out of clientids\n", __func__);
 4738         return (client_index);
 4739 }
 4740 
 4741 /*
 4742  * Return the next index# for a stateid. Mostly just increment and return
 4743  * the next one, but... if the 32bit unsigned does actually wrap around
 4744  * (will a BSD server stay up that long?), find
 4745  * new start and end values.
 4746  */
 4747 static u_int32_t
 4748 nfsrv_nextstateindex(struct nfsclient *clp)
 4749 {
 4750         struct nfsstate *stp;
 4751         int i;
 4752         u_int32_t canuse, min_index, max_index;
 4753 
 4754         if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
 4755                 clp->lc_stateindex++;
 4756                 if (clp->lc_stateindex != clp->lc_statemaxindex)
 4757                         return (clp->lc_stateindex);
 4758         }
 4759 
 4760         /*
 4761          * Yuck, we've hit the end.
 4762          * Look for a new min and max.
 4763          */
 4764         min_index = 0;
 4765         max_index = 0xffffffff;
 4766         for (i = 0; i < nfsrv_statehashsize; i++) {
 4767             LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 4768                 if (stp->ls_stateid.other[2] > 0x80000000) {
 4769                     if (stp->ls_stateid.other[2] < max_index)
 4770                         max_index = stp->ls_stateid.other[2];
 4771                 } else {
 4772                     if (stp->ls_stateid.other[2] > min_index)
 4773                         min_index = stp->ls_stateid.other[2];
 4774                 }
 4775             }
 4776         }
 4777 
 4778         /*
 4779          * Yikes, highly unlikely, but I'll handle it anyhow.
 4780          */
 4781         if (min_index == 0x80000000 && max_index == 0x80000001) {
 4782             canuse = 0;
 4783             /*
 4784              * Loop around until we find an unused entry. Return that
 4785              * and set LCL_INDEXNOTOK, so the search will continue next time.
 4786              * (This is one of those rare cases where a goto is the
 4787              *  cleanest way to code the loop.)
 4788              */
 4789 tryagain:
 4790             for (i = 0; i < nfsrv_statehashsize; i++) {
 4791                 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 4792                     if (stp->ls_stateid.other[2] == canuse) {
 4793                         canuse++;
 4794                         goto tryagain;
 4795                     }
 4796                 }
 4797             }
 4798             clp->lc_flags |= LCL_INDEXNOTOK;
 4799             return (canuse);
 4800         }
 4801 
 4802         /*
 4803          * Ok to start again from min + 1.
 4804          */
 4805         clp->lc_stateindex = min_index + 1;
 4806         clp->lc_statemaxindex = max_index;
 4807         clp->lc_flags &= ~LCL_INDEXNOTOK;
 4808         return (clp->lc_stateindex);
 4809 }
 4810 
 4811 /*
 4812  * The following functions handle the stable storage file that deals with
 4813  * the edge conditions described in RFC3530 Sec. 8.6.3.
 4814  * The file is as follows:
 4815  * - a single record at the beginning that has the lease time of the
 4816  *   previous server instance (before the last reboot) and the nfsrvboottime
 4817  *   values for the previous server boots.
 4818  *   These previous boot times are used to ensure that the current
 4819  *   nfsrvboottime does not, somehow, get set to a previous one.
 4820  *   (This is important so that Stale ClientIDs and StateIDs can
 4821  *    be recognized.)
 4822  *   The number of previous nfsvrboottime values precedes the list.
 4823  * - followed by some number of appended records with:
 4824  *   - client id string
 4825  *   - flag that indicates it is a record revoking state via lease
 4826  *     expiration or similar
 4827  *     OR has successfully acquired state.
 4828  * These structures vary in length, with the client string at the end, up
 4829  * to NFSV4_OPAQUELIMIT in size.
 4830  *
 4831  * At the end of the grace period, the file is truncated, the first
 4832  * record is rewritten with updated information and any acquired state
 4833  * records for successful reclaims of state are written.
 4834  *
 4835  * Subsequent records are appended when the first state is issued to
 4836  * a client and when state is revoked for a client.
 4837  *
 4838  * When reading the file in, state issued records that come later in
 4839  * the file override older ones, since the append log is in cronological order.
 4840  * If, for some reason, the file can't be read, the grace period is
 4841  * immediately terminated and all reclaims get NFSERR_NOGRACE.
 4842  */
 4843 
 4844 /*
 4845  * Read in the stable storage file. Called by nfssvc() before the nfsd
 4846  * processes start servicing requests.
 4847  */
 4848 void
 4849 nfsrv_setupstable(NFSPROC_T *p)
 4850 {
 4851         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 4852         struct nfsrv_stable *sp, *nsp;
 4853         struct nfst_rec *tsp;
 4854         int error, i, tryagain;
 4855         off_t off = 0;
 4856         ssize_t aresid, len;
 4857 
 4858         /*
 4859          * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
 4860          * a reboot, so state has not been lost.
 4861          */
 4862         if (sf->nsf_flags & NFSNSF_UPDATEDONE)
 4863                 return;
 4864         /*
 4865          * Set Grace over just until the file reads successfully.
 4866          */
 4867         nfsrvboottime = time_second;
 4868         LIST_INIT(&sf->nsf_head);
 4869         sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
 4870         sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
 4871         if (sf->nsf_fp == NULL)
 4872                 return;
 4873         error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 4874             (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
 4875             0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 4876         if (error || aresid || sf->nsf_numboots == 0 ||
 4877                 sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
 4878                 return;
 4879 
 4880         /*
 4881          * Now, read in the boottimes.
 4882          */
 4883         sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
 4884                 sizeof (time_t), M_TEMP, M_WAITOK);
 4885         off = sizeof (struct nfsf_rec);
 4886         error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 4887             (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
 4888             UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 4889         if (error || aresid) {
 4890                 free(sf->nsf_bootvals, M_TEMP);
 4891                 sf->nsf_bootvals = NULL;
 4892                 return;
 4893         }
 4894 
 4895         /*
 4896          * Make sure this nfsrvboottime is different from all recorded
 4897          * previous ones.
 4898          */
 4899         do {
 4900                 tryagain = 0;
 4901                 for (i = 0; i < sf->nsf_numboots; i++) {
 4902                         if (nfsrvboottime == sf->nsf_bootvals[i]) {
 4903                                 nfsrvboottime++;
 4904                                 tryagain = 1;
 4905                                 break;
 4906                         }
 4907                 }
 4908         } while (tryagain);
 4909 
 4910         sf->nsf_flags |= NFSNSF_OK;
 4911         off += (sf->nsf_numboots * sizeof (time_t));
 4912 
 4913         /*
 4914          * Read through the file, building a list of records for grace
 4915          * checking.
 4916          * Each record is between sizeof (struct nfst_rec) and
 4917          * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
 4918          * and is actually sizeof (struct nfst_rec) + nst_len - 1.
 4919          */
 4920         tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
 4921                 NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
 4922         do {
 4923             error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 4924                 (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
 4925                 off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 4926             len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
 4927             if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
 4928                 len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
 4929                 /*
 4930                  * Yuck, the file has been corrupted, so just return
 4931                  * after clearing out any restart state, so the grace period
 4932                  * is over.
 4933                  */
 4934                 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
 4935                         LIST_REMOVE(sp, nst_list);
 4936                         free(sp, M_TEMP);
 4937                 }
 4938                 free(tsp, M_TEMP);
 4939                 sf->nsf_flags &= ~NFSNSF_OK;
 4940                 free(sf->nsf_bootvals, M_TEMP);
 4941                 sf->nsf_bootvals = NULL;
 4942                 return;
 4943             }
 4944             if (len > 0) {
 4945                 off += sizeof (struct nfst_rec) + tsp->len - 1;
 4946                 /*
 4947                  * Search the list for a matching client.
 4948                  */
 4949                 LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
 4950                         if (tsp->len == sp->nst_len &&
 4951                             !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
 4952                                 break;
 4953                 }
 4954                 if (sp == LIST_END(&sf->nsf_head)) {
 4955                         sp = (struct nfsrv_stable *)malloc(tsp->len +
 4956                                 sizeof (struct nfsrv_stable) - 1, M_TEMP,
 4957                                 M_WAITOK);
 4958                         NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
 4959                                 sizeof (struct nfst_rec) + tsp->len - 1);
 4960                         LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
 4961                 } else {
 4962                         if (tsp->flag == NFSNST_REVOKE)
 4963                                 sp->nst_flag |= NFSNST_REVOKE;
 4964                         else
 4965                                 /*
 4966                                  * A subsequent timestamp indicates the client
 4967                                  * did a setclientid/confirm and any previous
 4968                                  * revoke is no longer relevant.
 4969                                  */
 4970                                 sp->nst_flag &= ~NFSNST_REVOKE;
 4971                 }
 4972             }
 4973         } while (len > 0);
 4974         free(tsp, M_TEMP);
 4975         sf->nsf_flags = NFSNSF_OK;
 4976         sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
 4977                 NFSRV_LEASEDELTA;
 4978 }
 4979 
 4980 /*
 4981  * Update the stable storage file, now that the grace period is over.
 4982  */
 4983 void
 4984 nfsrv_updatestable(NFSPROC_T *p)
 4985 {
 4986         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 4987         struct nfsrv_stable *sp, *nsp;
 4988         int i;
 4989         struct nfsvattr nva;
 4990         vnode_t vp;
 4991 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
 4992         mount_t mp = NULL;
 4993 #endif
 4994         int error;
 4995 
 4996         if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
 4997                 return;
 4998         sf->nsf_flags |= NFSNSF_UPDATEDONE;
 4999         /*
 5000          * Ok, we need to rewrite the stable storage file.
 5001          * - truncate to 0 length
 5002          * - write the new first structure
 5003          * - loop through the data structures, writing out any that
 5004          *   have timestamps older than the old boot
 5005          */
 5006         if (sf->nsf_bootvals) {
 5007                 sf->nsf_numboots++;
 5008                 for (i = sf->nsf_numboots - 2; i >= 0; i--)
 5009                         sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
 5010         } else {
 5011                 sf->nsf_numboots = 1;
 5012                 sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
 5013                         M_TEMP, M_WAITOK);
 5014         }
 5015         sf->nsf_bootvals[0] = nfsrvboottime;
 5016         sf->nsf_lease = nfsrv_lease;
 5017         NFSVNO_ATTRINIT(&nva);
 5018         NFSVNO_SETATTRVAL(&nva, size, 0);
 5019         vp = NFSFPVNODE(sf->nsf_fp);
 5020         vn_start_write(vp, &mp, V_WAIT);
 5021         if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
 5022                 error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
 5023                     NULL);
 5024                 NFSVOPUNLOCK(vp);
 5025         } else
 5026                 error = EPERM;
 5027         vn_finished_write(mp);
 5028         if (!error)
 5029             error = NFSD_RDWR(UIO_WRITE, vp,
 5030                 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
 5031                 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
 5032         if (!error)
 5033             error = NFSD_RDWR(UIO_WRITE, vp,
 5034                 (caddr_t)sf->nsf_bootvals,
 5035                 sf->nsf_numboots * sizeof (time_t),
 5036                 (off_t)(sizeof (struct nfsf_rec)),
 5037                 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
 5038         free(sf->nsf_bootvals, M_TEMP);
 5039         sf->nsf_bootvals = NULL;
 5040         if (error) {
 5041                 sf->nsf_flags &= ~NFSNSF_OK;
 5042                 printf("EEK! Can't write NfsV4 stable storage file\n");
 5043                 return;
 5044         }
 5045         sf->nsf_flags |= NFSNSF_OK;
 5046 
 5047         /*
 5048          * Loop through the list and write out timestamp records for
 5049          * any clients that successfully reclaimed state.
 5050          */
 5051         LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
 5052                 if (sp->nst_flag & NFSNST_GOTSTATE) {
 5053                         nfsrv_writestable(sp->nst_client, sp->nst_len,
 5054                                 NFSNST_NEWSTATE, p);
 5055                         sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
 5056                 }
 5057                 LIST_REMOVE(sp, nst_list);
 5058                 free(sp, M_TEMP);
 5059         }
 5060         nfsrv_backupstable();
 5061 }
 5062 
 5063 /*
 5064  * Append a record to the stable storage file.
 5065  */
 5066 void
 5067 nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
 5068 {
 5069         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 5070         struct nfst_rec *sp;
 5071         int error;
 5072 
 5073         if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
 5074                 return;
 5075         sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
 5076                 len - 1, M_TEMP, M_WAITOK);
 5077         sp->len = len;
 5078         NFSBCOPY(client, sp->client, len);
 5079         sp->flag = flag;
 5080         error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
 5081             (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
 5082             UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
 5083         free(sp, M_TEMP);
 5084         if (error) {
 5085                 sf->nsf_flags &= ~NFSNSF_OK;
 5086                 printf("EEK! Can't write NfsV4 stable storage file\n");
 5087         }
 5088 }
 5089 
 5090 /*
 5091  * This function is called during the grace period to mark a client
 5092  * that successfully reclaimed state.
 5093  */
 5094 static void
 5095 nfsrv_markstable(struct nfsclient *clp)
 5096 {
 5097         struct nfsrv_stable *sp;
 5098 
 5099         /*
 5100          * First find the client structure.
 5101          */
 5102         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 5103                 if (sp->nst_len == clp->lc_idlen &&
 5104                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 5105                         break;
 5106         }
 5107         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
 5108                 return;
 5109 
 5110         /*
 5111          * Now, just mark it and set the nfsclient back pointer.
 5112          */
 5113         sp->nst_flag |= NFSNST_GOTSTATE;
 5114         sp->nst_clp = clp;
 5115 }
 5116 
 5117 /*
 5118  * This function is called when a NFSv4.1 client does a ReclaimComplete.
 5119  * Very similar to nfsrv_markstable(), except for the flag being set.
 5120  */
 5121 static void
 5122 nfsrv_markreclaim(struct nfsclient *clp)
 5123 {
 5124         struct nfsrv_stable *sp;
 5125 
 5126         /*
 5127          * First find the client structure.
 5128          */
 5129         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 5130                 if (sp->nst_len == clp->lc_idlen &&
 5131                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 5132                         break;
 5133         }
 5134         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
 5135                 return;
 5136 
 5137         /*
 5138          * Now, just set the flag.
 5139          */
 5140         sp->nst_flag |= NFSNST_RECLAIMED;
 5141 }
 5142 
 5143 /*
 5144  * This function is called for a reclaim, to see if it gets grace.
 5145  * It returns 0 if a reclaim is allowed, 1 otherwise.
 5146  */
 5147 static int
 5148 nfsrv_checkstable(struct nfsclient *clp)
 5149 {
 5150         struct nfsrv_stable *sp;
 5151 
 5152         /*
 5153          * First, find the entry for the client.
 5154          */
 5155         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 5156                 if (sp->nst_len == clp->lc_idlen &&
 5157                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 5158                         break;
 5159         }
 5160 
 5161         /*
 5162          * If not in the list, state was revoked or no state was issued
 5163          * since the previous reboot, a reclaim is denied.
 5164          */
 5165         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
 5166             (sp->nst_flag & NFSNST_REVOKE) ||
 5167             !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
 5168                 return (1);
 5169         return (0);
 5170 }
 5171 
 5172 /*
 5173  * Test for and try to clear out a conflicting client. This is called by
 5174  * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
 5175  * a found.
 5176  * The trick here is that it can't revoke a conflicting client with an
 5177  * expired lease unless it holds the v4root lock, so...
 5178  * If no v4root lock, get the lock and return 1 to indicate "try again".
 5179  * Return 0 to indicate the conflict can't be revoked and 1 to indicate
 5180  * the revocation worked and the conflicting client is "bye, bye", so it
 5181  * can be tried again.
 5182  * Return 2 to indicate that the vnode is VIRF_DOOMED after NFSVOPLOCK().
 5183  * Unlocks State before a non-zero value is returned.
 5184  */
 5185 static int
 5186 nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
 5187     NFSPROC_T *p)
 5188 {
 5189         int gotlock, lktype = 0;
 5190 
 5191         /*
 5192          * If lease hasn't expired, we can't fix it.
 5193          */
 5194         if (clp->lc_expiry >= NFSD_MONOSEC ||
 5195             !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
 5196                 return (0);
 5197         if (*haslockp == 0) {
 5198                 NFSUNLOCKSTATE();
 5199                 if (vp != NULL) {
 5200                         lktype = NFSVOPISLOCKED(vp);
 5201                         NFSVOPUNLOCK(vp);
 5202                 }
 5203                 NFSLOCKV4ROOTMUTEX();
 5204                 nfsv4_relref(&nfsv4rootfs_lock);
 5205                 do {
 5206                         gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 5207                             NFSV4ROOTLOCKMUTEXPTR, NULL);
 5208                 } while (!gotlock);
 5209                 NFSUNLOCKV4ROOTMUTEX();
 5210                 *haslockp = 1;
 5211                 if (vp != NULL) {
 5212                         NFSVOPLOCK(vp, lktype | LK_RETRY);
 5213                         if (VN_IS_DOOMED(vp))
 5214                                 return (2);
 5215                 }
 5216                 return (1);
 5217         }
 5218         NFSUNLOCKSTATE();
 5219 
 5220         /*
 5221          * Ok, we can expire the conflicting client.
 5222          */
 5223         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 5224         nfsrv_backupstable();
 5225         nfsrv_cleanclient(clp, p);
 5226         nfsrv_freedeleglist(&clp->lc_deleg);
 5227         nfsrv_freedeleglist(&clp->lc_olddeleg);
 5228         LIST_REMOVE(clp, lc_hash);
 5229         nfsrv_zapclient(clp, p);
 5230         return (1);
 5231 }
 5232 
 5233 /*
 5234  * Resolve a delegation conflict.
 5235  * Returns 0 to indicate the conflict was resolved without sleeping.
 5236  * Return -1 to indicate that the caller should check for conflicts again.
 5237  * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
 5238  *
 5239  * Also, manipulate the nfsv4root_lock, as required. It isn't changed
 5240  * for a return of 0, since there was no sleep and it could be required
 5241  * later. It is released for a return of NFSERR_DELAY, since the caller
 5242  * will return that error. It is released when a sleep was done waiting
 5243  * for the delegation to be returned or expire (so that other nfsds can
 5244  * handle ops). Then, it must be acquired for the write to stable storage.
 5245  * (This function is somewhat similar to nfsrv_clientconflict(), but
 5246  *  the semantics differ in a couple of subtle ways. The return of 0
 5247  *  indicates the conflict was resolved without sleeping here, not
 5248  *  that the conflict can't be resolved and the handling of nfsv4root_lock
 5249  *  differs, as noted above.)
 5250  * Unlocks State before returning a non-zero value.
 5251  */
 5252 static int
 5253 nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
 5254     vnode_t vp)
 5255 {
 5256         struct nfsclient *clp = stp->ls_clp;
 5257         int gotlock, error, lktype = 0, retrycnt, zapped_clp;
 5258         nfsv4stateid_t tstateid;
 5259         fhandle_t tfh;
 5260 
 5261         /*
 5262          * If the conflict is with an old delegation...
 5263          */
 5264         if (stp->ls_flags & NFSLCK_OLDDELEG) {
 5265                 /*
 5266                  * You can delete it, if it has expired.
 5267                  */
 5268                 if (clp->lc_delegtime < NFSD_MONOSEC) {
 5269                         nfsrv_freedeleg(stp);
 5270                         NFSUNLOCKSTATE();
 5271                         error = -1;
 5272                         goto out;
 5273                 }
 5274                 NFSUNLOCKSTATE();
 5275                 /*
 5276                  * During this delay, the old delegation could expire or it
 5277                  * could be recovered by the client via an Open with
 5278                  * CLAIM_DELEGATE_PREV.
 5279                  * Release the nfsv4root_lock, if held.
 5280                  */
 5281                 if (*haslockp) {
 5282                         *haslockp = 0;
 5283                         NFSLOCKV4ROOTMUTEX();
 5284                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5285                         NFSUNLOCKV4ROOTMUTEX();
 5286                 }
 5287                 error = NFSERR_DELAY;
 5288                 goto out;
 5289         }
 5290 
 5291         /*
 5292          * It's a current delegation, so:
 5293          * - check to see if the delegation has expired
 5294          *   - if so, get the v4root lock and then expire it
 5295          */
 5296         if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0 || (stp->ls_lastrecall <
 5297             NFSD_MONOSEC && clp->lc_expiry >= NFSD_MONOSEC &&
 5298             stp->ls_delegtime >= NFSD_MONOSEC)) {
 5299                 /*
 5300                  * - do a recall callback, since not yet done
 5301                  * For now, never allow truncate to be set. To use
 5302                  * truncate safely, it must be guaranteed that the
 5303                  * Remove, Rename or Setattr with size of 0 will
 5304                  * succeed and that would require major changes to
 5305                  * the VFS/Vnode OPs.
 5306                  * Set the expiry time large enough so that it won't expire
 5307                  * until after the callback, then set it correctly, once
 5308                  * the callback is done. (The delegation will now time
 5309                  * out whether or not the Recall worked ok. The timeout
 5310                  * will be extended when ops are done on the delegation
 5311                  * stateid, up to the timelimit.)
 5312                  */
 5313                 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) {
 5314                         stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
 5315                             NFSRV_LEASEDELTA;
 5316                         stp->ls_delegtimelimit = NFSD_MONOSEC + (6 *
 5317                             nfsrv_lease) + NFSRV_LEASEDELTA;
 5318                         stp->ls_flags |= NFSLCK_DELEGRECALL;
 5319                 }
 5320                 stp->ls_lastrecall = time_uptime + 1;
 5321 
 5322                 /*
 5323                  * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
 5324                  * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
 5325                  * in order to try and avoid a race that could happen
 5326                  * when a CBRecall request passed the Open reply with
 5327                  * the delegation in it when transitting the network.
 5328                  * Since nfsrv_docallback will sleep, don't use stp after
 5329                  * the call.
 5330                  */
 5331                 NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
 5332                     sizeof (tstateid));
 5333                 NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
 5334                     sizeof (tfh));
 5335                 NFSUNLOCKSTATE();
 5336                 if (*haslockp) {
 5337                         *haslockp = 0;
 5338                         NFSLOCKV4ROOTMUTEX();
 5339                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5340                         NFSUNLOCKV4ROOTMUTEX();
 5341                 }
 5342                 retrycnt = 0;
 5343                 do {
 5344                     error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
 5345                         &tstateid, 0, &tfh, NULL, NULL, 0, p);
 5346                     retrycnt++;
 5347                 } while ((error == NFSERR_BADSTATEID ||
 5348                     error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
 5349                 error = NFSERR_DELAY;
 5350                 goto out;
 5351         }
 5352 
 5353         if (clp->lc_expiry >= NFSD_MONOSEC &&
 5354             stp->ls_delegtime >= NFSD_MONOSEC) {
 5355                 NFSUNLOCKSTATE();
 5356                 /*
 5357                  * A recall has been done, but it has not yet expired.
 5358                  * So, RETURN_DELAY.
 5359                  */
 5360                 if (*haslockp) {
 5361                         *haslockp = 0;
 5362                         NFSLOCKV4ROOTMUTEX();
 5363                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5364                         NFSUNLOCKV4ROOTMUTEX();
 5365                 }
 5366                 error = NFSERR_DELAY;
 5367                 goto out;
 5368         }
 5369 
 5370         /*
 5371          * If we don't yet have the lock, just get it and then return,
 5372          * since we need that before deleting expired state, such as
 5373          * this delegation.
 5374          * When getting the lock, unlock the vnode, so other nfsds that
 5375          * are in progress, won't get stuck waiting for the vnode lock.
 5376          */
 5377         if (*haslockp == 0) {
 5378                 NFSUNLOCKSTATE();
 5379                 if (vp != NULL) {
 5380                         lktype = NFSVOPISLOCKED(vp);
 5381                         NFSVOPUNLOCK(vp);
 5382                 }
 5383                 NFSLOCKV4ROOTMUTEX();
 5384                 nfsv4_relref(&nfsv4rootfs_lock);
 5385                 do {
 5386                         gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 5387                             NFSV4ROOTLOCKMUTEXPTR, NULL);
 5388                 } while (!gotlock);
 5389                 NFSUNLOCKV4ROOTMUTEX();
 5390                 *haslockp = 1;
 5391                 if (vp != NULL) {
 5392                         NFSVOPLOCK(vp, lktype | LK_RETRY);
 5393                         if (VN_IS_DOOMED(vp)) {
 5394                                 *haslockp = 0;
 5395                                 NFSLOCKV4ROOTMUTEX();
 5396                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5397                                 NFSUNLOCKV4ROOTMUTEX();
 5398                                 error = NFSERR_PERM;
 5399                                 goto out;
 5400                         }
 5401                 }
 5402                 error = -1;
 5403                 goto out;
 5404         }
 5405 
 5406         NFSUNLOCKSTATE();
 5407         /*
 5408          * Ok, we can delete the expired delegation.
 5409          * First, write the Revoke record to stable storage and then
 5410          * clear out the conflict.
 5411          * Since all other nfsd threads are now blocked, we can safely
 5412          * sleep without the state changing.
 5413          */
 5414         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 5415         nfsrv_backupstable();
 5416         if (clp->lc_expiry < NFSD_MONOSEC) {
 5417                 nfsrv_cleanclient(clp, p);
 5418                 nfsrv_freedeleglist(&clp->lc_deleg);
 5419                 nfsrv_freedeleglist(&clp->lc_olddeleg);
 5420                 LIST_REMOVE(clp, lc_hash);
 5421                 zapped_clp = 1;
 5422         } else {
 5423                 nfsrv_freedeleg(stp);
 5424                 zapped_clp = 0;
 5425         }
 5426         if (zapped_clp)
 5427                 nfsrv_zapclient(clp, p);
 5428         error = -1;
 5429 
 5430 out:
 5431         NFSEXITCODE(error);
 5432         return (error);
 5433 }
 5434 
 5435 /*
 5436  * Check for a remove allowed, if remove is set to 1 and get rid of
 5437  * delegations.
 5438  */
 5439 int
 5440 nfsrv_checkremove(vnode_t vp, int remove, struct nfsrv_descript *nd,
 5441     nfsquad_t clientid, NFSPROC_T *p)
 5442 {
 5443         struct nfsclient *clp;
 5444         struct nfsstate *stp;
 5445         struct nfslockfile *lfp;
 5446         int error, haslock = 0;
 5447         fhandle_t nfh;
 5448 
 5449         clp = NULL;
 5450         /*
 5451          * First, get the lock file structure.
 5452          * (A return of -1 means no associated state, so remove ok.)
 5453          */
 5454         error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
 5455 tryagain:
 5456         NFSLOCKSTATE();
 5457         if (error == 0 && clientid.qval != 0)
 5458                 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 5459                     (nfsquad_t)((u_quad_t)0), 0, nd, p);
 5460         if (!error)
 5461                 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
 5462         if (error) {
 5463                 NFSUNLOCKSTATE();
 5464                 if (haslock) {
 5465                         NFSLOCKV4ROOTMUTEX();
 5466                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5467                         NFSUNLOCKV4ROOTMUTEX();
 5468                 }
 5469                 if (error == -1)
 5470                         error = 0;
 5471                 goto out;
 5472         }
 5473 
 5474         /*
 5475          * Now, we must Recall any delegations.
 5476          */
 5477         error = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
 5478         if (error) {
 5479                 /*
 5480                  * nfsrv_cleandeleg() unlocks state for non-zero
 5481                  * return.
 5482                  */
 5483                 if (error == -1)
 5484                         goto tryagain;
 5485                 if (haslock) {
 5486                         NFSLOCKV4ROOTMUTEX();
 5487                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5488                         NFSUNLOCKV4ROOTMUTEX();
 5489                 }
 5490                 goto out;
 5491         }
 5492 
 5493         /*
 5494          * Now, look for a conflicting open share.
 5495          */
 5496         if (remove) {
 5497                 /*
 5498                  * If the entry in the directory was the last reference to the
 5499                  * corresponding filesystem object, the object can be destroyed
 5500                  * */
 5501                 if(lfp->lf_usecount>1)
 5502                         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 5503                                 if (stp->ls_flags & NFSLCK_WRITEDENY) {
 5504                                         error = NFSERR_FILEOPEN;
 5505                                         break;
 5506                                 }
 5507                         }
 5508         }
 5509 
 5510         NFSUNLOCKSTATE();
 5511         if (haslock) {
 5512                 NFSLOCKV4ROOTMUTEX();
 5513                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5514                 NFSUNLOCKV4ROOTMUTEX();
 5515         }
 5516 
 5517 out:
 5518         NFSEXITCODE(error);
 5519         return (error);
 5520 }
 5521 
 5522 /*
 5523  * Clear out all delegations for the file referred to by lfp.
 5524  * May return NFSERR_DELAY, if there will be a delay waiting for
 5525  * delegations to expire.
 5526  * Returns -1 to indicate it slept while recalling a delegation.
 5527  * This function has the side effect of deleting the nfslockfile structure,
 5528  * if it no longer has associated state and didn't have to sleep.
 5529  * Unlocks State before a non-zero value is returned.
 5530  */
 5531 static int
 5532 nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
 5533     struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
 5534 {
 5535         struct nfsstate *stp, *nstp;
 5536         int ret = 0;
 5537 
 5538         stp = LIST_FIRST(&lfp->lf_deleg);
 5539         while (stp != LIST_END(&lfp->lf_deleg)) {
 5540                 nstp = LIST_NEXT(stp, ls_file);
 5541                 if (stp->ls_clp != clp) {
 5542                         ret = nfsrv_delegconflict(stp, haslockp, p, vp);
 5543                         if (ret) {
 5544                                 /*
 5545                                  * nfsrv_delegconflict() unlocks state
 5546                                  * when it returns non-zero.
 5547                                  */
 5548                                 goto out;
 5549                         }
 5550                 }
 5551                 stp = nstp;
 5552         }
 5553 out:
 5554         NFSEXITCODE(ret);
 5555         return (ret);
 5556 }
 5557 
 5558 /*
 5559  * There are certain operations that, when being done outside of NFSv4,
 5560  * require that any NFSv4 delegation for the file be recalled.
 5561  * This function is to be called for those cases:
 5562  * VOP_RENAME() - When a delegation is being recalled for any reason,
 5563  *      the client may have to do Opens against the server, using the file's
 5564  *      final component name. If the file has been renamed on the server,
 5565  *      that component name will be incorrect and the Open will fail.
 5566  * VOP_REMOVE() - Theoretically, a client could Open a file after it has
 5567  *      been removed on the server, if there is a delegation issued to
 5568  *      that client for the file. I say "theoretically" since clients
 5569  *      normally do an Access Op before the Open and that Access Op will
 5570  *      fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
 5571  *      they will detect the file's removal in the same manner. (There is
 5572  *      one case where RFC3530 allows a client to do an Open without first
 5573  *      doing an Access Op, which is passage of a check against the ACE
 5574  *      returned with a Write delegation, but current practice is to ignore
 5575  *      the ACE and always do an Access Op.)
 5576  *      Since the functions can only be called with an unlocked vnode, this
 5577  *      can't be done at this time.
 5578  * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
 5579  *      locks locally in the client, which are not visible to the server. To
 5580  *      deal with this, issuing of delegations for a vnode must be disabled
 5581  *      and all delegations for the vnode recalled. This is done via the
 5582  *      second function, using the VV_DISABLEDELEG vflag on the vnode.
 5583  */
 5584 void
 5585 nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
 5586 {
 5587         time_t starttime;
 5588         int error;
 5589 
 5590         /*
 5591          * First, check to see if the server is currently running and it has
 5592          * been called for a regular file when issuing delegations.
 5593          */
 5594         if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
 5595             nfsrv_issuedelegs == 0)
 5596                 return;
 5597 
 5598         KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
 5599         /*
 5600          * First, get a reference on the nfsv4rootfs_lock so that an
 5601          * exclusive lock cannot be acquired by another thread.
 5602          */
 5603         NFSLOCKV4ROOTMUTEX();
 5604         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 5605         NFSUNLOCKV4ROOTMUTEX();
 5606 
 5607         /*
 5608          * Now, call nfsrv_checkremove() in a loop while it returns
 5609          * NFSERR_DELAY. Return upon any other error or when timed out.
 5610          */
 5611         starttime = NFSD_MONOSEC;
 5612         do {
 5613                 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
 5614                         error = nfsrv_checkremove(vp, 0, NULL,
 5615                             (nfsquad_t)((u_quad_t)0), p);
 5616                         NFSVOPUNLOCK(vp);
 5617                 } else
 5618                         error = EPERM;
 5619                 if (error == NFSERR_DELAY) {
 5620                         if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
 5621                                 break;
 5622                         /* Sleep for a short period of time */
 5623                         (void) nfs_catnap(PZERO, 0, "nfsremove");
 5624                 }
 5625         } while (error == NFSERR_DELAY);
 5626         NFSLOCKV4ROOTMUTEX();
 5627         nfsv4_relref(&nfsv4rootfs_lock);
 5628         NFSUNLOCKV4ROOTMUTEX();
 5629 }
 5630 
 5631 void
 5632 nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
 5633 {
 5634 
 5635 #ifdef VV_DISABLEDELEG
 5636         /*
 5637          * First, flag issuance of delegations disabled.
 5638          */
 5639         atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
 5640 #endif
 5641 
 5642         /*
 5643          * Then call nfsd_recalldelegation() to get rid of all extant
 5644          * delegations.
 5645          */
 5646         nfsd_recalldelegation(vp, p);
 5647 }
 5648 
 5649 /*
 5650  * Check for conflicting locks, etc. and then get rid of delegations.
 5651  * (At one point I thought that I should get rid of delegations for any
 5652  *  Setattr, since it could potentially disallow the I/O op (read or write)
 5653  *  allowed by the delegation. However, Setattr Ops that aren't changing
 5654  *  the size get a stateid of all 0s, so you can't tell if it is a delegation
 5655  *  for the same client or a different one, so I decided to only get rid
 5656  *  of delegations for other clients when the size is being changed.)
 5657  * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
 5658  * as Write backs, even if there is no delegation, so it really isn't any
 5659  * different?)
 5660  */
 5661 int
 5662 nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
 5663     nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
 5664     struct nfsexstuff *exp, NFSPROC_T *p)
 5665 {
 5666         struct nfsstate st, *stp = &st;
 5667         struct nfslock lo, *lop = &lo;
 5668         int error = 0;
 5669         nfsquad_t clientid;
 5670 
 5671         if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
 5672                 stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
 5673                 lop->lo_first = nvap->na_size;
 5674         } else {
 5675                 stp->ls_flags = 0;
 5676                 lop->lo_first = 0;
 5677         }
 5678         if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
 5679             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
 5680             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
 5681             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
 5682                 stp->ls_flags |= NFSLCK_SETATTR;
 5683         if (stp->ls_flags == 0)
 5684                 goto out;
 5685         lop->lo_end = NFS64BITSSET;
 5686         lop->lo_flags = NFSLCK_WRITE;
 5687         stp->ls_ownerlen = 0;
 5688         stp->ls_op = NULL;
 5689         stp->ls_uid = nd->nd_cred->cr_uid;
 5690         stp->ls_stateid.seqid = stateidp->seqid;
 5691         clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
 5692         clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
 5693         stp->ls_stateid.other[2] = stateidp->other[2];
 5694         error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 5695             stateidp, exp, nd, p);
 5696 
 5697 out:
 5698         NFSEXITCODE2(error, nd);
 5699         return (error);
 5700 }
 5701 
 5702 /*
 5703  * Check for a write delegation and do a CBGETATTR if there is one, updating
 5704  * the attributes, as required.
 5705  * Should I return an error if I can't get the attributes? (For now, I'll
 5706  * just return ok.
 5707  */
 5708 int
 5709 nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
 5710     struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
 5711 {
 5712         struct nfsstate *stp;
 5713         struct nfslockfile *lfp;
 5714         struct nfsclient *clp;
 5715         struct nfsvattr nva;
 5716         fhandle_t nfh;
 5717         int error = 0;
 5718         nfsattrbit_t cbbits;
 5719         u_quad_t delegfilerev;
 5720 
 5721         NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
 5722         if (!NFSNONZERO_ATTRBIT(&cbbits))
 5723                 goto out;
 5724         if (nfsrv_writedelegcnt == 0)
 5725                 goto out;
 5726 
 5727         /*
 5728          * Get the lock file structure.
 5729          * (A return of -1 means no associated state, so return ok.)
 5730          */
 5731         error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
 5732         NFSLOCKSTATE();
 5733         if (!error)
 5734                 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
 5735         if (error) {
 5736                 NFSUNLOCKSTATE();
 5737                 if (error == -1)
 5738                         error = 0;
 5739                 goto out;
 5740         }
 5741 
 5742         /*
 5743          * Now, look for a write delegation.
 5744          */
 5745         LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 5746                 if (stp->ls_flags & NFSLCK_DELEGWRITE)
 5747                         break;
 5748         }
 5749         if (stp == LIST_END(&lfp->lf_deleg)) {
 5750                 NFSUNLOCKSTATE();
 5751                 goto out;
 5752         }
 5753         clp = stp->ls_clp;
 5754 
 5755         /* If the clientid is not confirmed, ignore the delegation. */
 5756         if (clp->lc_flags & LCL_NEEDSCONFIRM) {
 5757                 NFSUNLOCKSTATE();
 5758                 goto out;
 5759         }
 5760 
 5761         delegfilerev = stp->ls_filerev;
 5762         /*
 5763          * If the Write delegation was issued as a part of this Compound RPC
 5764          * or if we have an Implied Clientid (used in a previous Op in this
 5765          * compound) and it is the client the delegation was issued to,
 5766          * just return ok.
 5767          * I also assume that it is from the same client iff the network
 5768          * host IP address is the same as the callback address. (Not
 5769          * exactly correct by the RFC, but avoids a lot of Getattr
 5770          * callbacks.)
 5771          */
 5772         if (nd->nd_compref == stp->ls_compref ||
 5773             ((nd->nd_flag & ND_IMPLIEDCLID) &&
 5774              clp->lc_clientid.qval == nd->nd_clientid.qval) ||
 5775              nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
 5776                 NFSUNLOCKSTATE();
 5777                 goto out;
 5778         }
 5779 
 5780         /*
 5781          * We are now done with the delegation state structure,
 5782          * so the statelock can be released and we can now tsleep().
 5783          */
 5784 
 5785         /*
 5786          * Now, we must do the CB Getattr callback, to see if Change or Size
 5787          * has changed.
 5788          */
 5789         if (clp->lc_expiry >= NFSD_MONOSEC) {
 5790                 NFSUNLOCKSTATE();
 5791                 NFSVNO_ATTRINIT(&nva);
 5792                 nva.na_filerev = NFS64BITSSET;
 5793                 error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
 5794                     0, &nfh, &nva, &cbbits, 0, p);
 5795                 if (!error) {
 5796                         if ((nva.na_filerev != NFS64BITSSET &&
 5797                             nva.na_filerev > delegfilerev) ||
 5798                             (NFSVNO_ISSETSIZE(&nva) &&
 5799                              nva.na_size != nvap->na_size)) {
 5800                                 error = nfsvno_updfilerev(vp, nvap, nd, p);
 5801                                 if (NFSVNO_ISSETSIZE(&nva))
 5802                                         nvap->na_size = nva.na_size;
 5803                         }
 5804                 } else
 5805                         error = 0;      /* Ignore callback errors for now. */
 5806         } else {
 5807                 NFSUNLOCKSTATE();
 5808         }
 5809 
 5810 out:
 5811         NFSEXITCODE2(error, nd);
 5812         return (error);
 5813 }
 5814 
 5815 /*
 5816  * This function looks for openowners that haven't had any opens for
 5817  * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
 5818  * is set.
 5819  */
 5820 void
 5821 nfsrv_throwawayopens(NFSPROC_T *p)
 5822 {
 5823         struct nfsclient *clp, *nclp;
 5824         struct nfsstate *stp, *nstp;
 5825         int i;
 5826 
 5827         NFSLOCKSTATE();
 5828         nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
 5829         /*
 5830          * For each client...
 5831          */
 5832         for (i = 0; i < nfsrv_clienthashsize; i++) {
 5833             LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
 5834                 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
 5835                         if (LIST_EMPTY(&stp->ls_open) &&
 5836                             (stp->ls_noopens > NFSNOOPEN ||
 5837                              (nfsrv_openpluslock * 2) >
 5838                              nfsrv_v4statelimit))
 5839                                 nfsrv_freeopenowner(stp, 0, p);
 5840                 }
 5841             }
 5842         }
 5843         NFSUNLOCKSTATE();
 5844 }
 5845 
 5846 /*
 5847  * This function checks to see if the credentials are the same.
 5848  * Returns 1 for not same, 0 otherwise.
 5849  */
 5850 static int
 5851 nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
 5852 {
 5853 
 5854         if (nd->nd_flag & ND_GSS) {
 5855                 if (!(clp->lc_flags & LCL_GSS))
 5856                         return (1);
 5857                 if (clp->lc_flags & LCL_NAME) {
 5858                         if (nd->nd_princlen != clp->lc_namelen ||
 5859                             NFSBCMP(nd->nd_principal, clp->lc_name,
 5860                                 clp->lc_namelen))
 5861                                 return (1);
 5862                         else
 5863                                 return (0);
 5864                 }
 5865                 if (nd->nd_cred->cr_uid == clp->lc_uid)
 5866                         return (0);
 5867                 else
 5868                         return (1);
 5869         } else if (clp->lc_flags & LCL_GSS)
 5870                 return (1);
 5871         /*
 5872          * For AUTH_SYS, allow the same uid or root. (This is underspecified
 5873          * in RFC3530, which talks about principals, but doesn't say anything
 5874          * about uids for AUTH_SYS.)
 5875          */
 5876         if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
 5877                 return (0);
 5878         else
 5879                 return (1);
 5880 }
 5881 
 5882 /*
 5883  * Calculate the lease expiry time.
 5884  */
 5885 static time_t
 5886 nfsrv_leaseexpiry(void)
 5887 {
 5888 
 5889         if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
 5890                 return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
 5891         return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
 5892 }
 5893 
 5894 /*
 5895  * Delay the delegation timeout as far as ls_delegtimelimit, as required.
 5896  */
 5897 static void
 5898 nfsrv_delaydelegtimeout(struct nfsstate *stp)
 5899 {
 5900 
 5901         if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
 5902                 return;
 5903 
 5904         if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
 5905             stp->ls_delegtime < stp->ls_delegtimelimit) {
 5906                 stp->ls_delegtime += nfsrv_lease;
 5907                 if (stp->ls_delegtime > stp->ls_delegtimelimit)
 5908                         stp->ls_delegtime = stp->ls_delegtimelimit;
 5909         }
 5910 }
 5911 
 5912 /*
 5913  * This function checks to see if there is any other state associated
 5914  * with the openowner for this Open.
 5915  * It returns 1 if there is no other state, 0 otherwise.
 5916  */
 5917 static int
 5918 nfsrv_nootherstate(struct nfsstate *stp)
 5919 {
 5920         struct nfsstate *tstp;
 5921 
 5922         LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
 5923                 if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
 5924                         return (0);
 5925         }
 5926         return (1);
 5927 }
 5928 
 5929 /*
 5930  * Create a list of lock deltas (changes to local byte range locking
 5931  * that can be rolled back using the list) and apply the changes via
 5932  * nfsvno_advlock(). Optionally, lock the list. It is expected that either
 5933  * the rollback or update function will be called after this.
 5934  * It returns an error (and rolls back, as required), if any nfsvno_advlock()
 5935  * call fails. If it returns an error, it will unlock the list.
 5936  */
 5937 static int
 5938 nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
 5939     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
 5940 {
 5941         struct nfslock *lop, *nlop;
 5942         int error = 0;
 5943 
 5944         /* Loop through the list of locks. */
 5945         lop = LIST_FIRST(&lfp->lf_locallock);
 5946         while (first < end && lop != NULL) {
 5947                 nlop = LIST_NEXT(lop, lo_lckowner);
 5948                 if (first >= lop->lo_end) {
 5949                         /* not there yet */
 5950                         lop = nlop;
 5951                 } else if (first < lop->lo_first) {
 5952                         /* new one starts before entry in list */
 5953                         if (end <= lop->lo_first) {
 5954                                 /* no overlap between old and new */
 5955                                 error = nfsrv_dolocal(vp, lfp, flags,
 5956                                     NFSLCK_UNLOCK, first, end, cfp, p);
 5957                                 if (error != 0)
 5958                                         break;
 5959                                 first = end;
 5960                         } else {
 5961                                 /* handle fragment overlapped with new one */
 5962                                 error = nfsrv_dolocal(vp, lfp, flags,
 5963                                     NFSLCK_UNLOCK, first, lop->lo_first, cfp,
 5964                                     p);
 5965                                 if (error != 0)
 5966                                         break;
 5967                                 first = lop->lo_first;
 5968                         }
 5969                 } else {
 5970                         /* new one overlaps this entry in list */
 5971                         if (end <= lop->lo_end) {
 5972                                 /* overlaps all of new one */
 5973                                 error = nfsrv_dolocal(vp, lfp, flags,
 5974                                     lop->lo_flags, first, end, cfp, p);
 5975                                 if (error != 0)
 5976                                         break;
 5977                                 first = end;
 5978                         } else {
 5979                                 /* handle fragment overlapped with new one */
 5980                                 error = nfsrv_dolocal(vp, lfp, flags,
 5981                                     lop->lo_flags, first, lop->lo_end, cfp, p);
 5982                                 if (error != 0)
 5983                                         break;
 5984                                 first = lop->lo_end;
 5985                                 lop = nlop;
 5986                         }
 5987                 }
 5988         }
 5989         if (first < end && error == 0)
 5990                 /* handle fragment past end of list */
 5991                 error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
 5992                     end, cfp, p);
 5993 
 5994         NFSEXITCODE(error);
 5995         return (error);
 5996 }
 5997 
 5998 /*
 5999  * Local lock unlock. Unlock all byte ranges that are no longer locked
 6000  * by NFSv4. To do this, unlock any subranges of first-->end that
 6001  * do not overlap with the byte ranges of any lock in the lfp->lf_lock
 6002  * list. This list has all locks for the file held by other
 6003  * <clientid, lockowner> tuples. The list is ordered by increasing
 6004  * lo_first value, but may have entries that overlap each other, for
 6005  * the case of read locks.
 6006  */
 6007 static void
 6008 nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
 6009     uint64_t init_end, NFSPROC_T *p)
 6010 {
 6011         struct nfslock *lop;
 6012         uint64_t first, end, prevfirst __unused;
 6013 
 6014         first = init_first;
 6015         end = init_end;
 6016         while (first < init_end) {
 6017                 /* Loop through all nfs locks, adjusting first and end */
 6018                 prevfirst = 0;
 6019                 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
 6020                         KASSERT(prevfirst <= lop->lo_first,
 6021                             ("nfsv4 locks out of order"));
 6022                         KASSERT(lop->lo_first < lop->lo_end,
 6023                             ("nfsv4 bogus lock"));
 6024                         prevfirst = lop->lo_first;
 6025                         if (first >= lop->lo_first &&
 6026                             first < lop->lo_end)
 6027                                 /*
 6028                                  * Overlaps with initial part, so trim
 6029                                  * off that initial part by moving first past
 6030                                  * it.
 6031                                  */
 6032                                 first = lop->lo_end;
 6033                         else if (end > lop->lo_first &&
 6034                             lop->lo_first > first) {
 6035                                 /*
 6036                                  * This lock defines the end of the
 6037                                  * segment to unlock, so set end to the
 6038                                  * start of it and break out of the loop.
 6039                                  */
 6040                                 end = lop->lo_first;
 6041                                 break;
 6042                         }
 6043                         if (first >= end)
 6044                                 /*
 6045                                  * There is no segment left to do, so
 6046                                  * break out of this loop and then exit
 6047                                  * the outer while() since first will be set
 6048                                  * to end, which must equal init_end here.
 6049                                  */
 6050                                 break;
 6051                 }
 6052                 if (first < end) {
 6053                         /* Unlock this segment */
 6054                         (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
 6055                             NFSLCK_READ, first, end, NULL, p);
 6056                         nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
 6057                             first, end);
 6058                 }
 6059                 /*
 6060                  * Now move past this segment and look for any further
 6061                  * segment in the range, if there is one.
 6062                  */
 6063                 first = end;
 6064                 end = init_end;
 6065         }
 6066 }
 6067 
 6068 /*
 6069  * Do the local lock operation and update the rollback list, as required.
 6070  * Perform the rollback and return the error if nfsvno_advlock() fails.
 6071  */
 6072 static int
 6073 nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
 6074     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
 6075 {
 6076         struct nfsrollback *rlp;
 6077         int error = 0, ltype, oldltype;
 6078 
 6079         if (flags & NFSLCK_WRITE)
 6080                 ltype = F_WRLCK;
 6081         else if (flags & NFSLCK_READ)
 6082                 ltype = F_RDLCK;
 6083         else
 6084                 ltype = F_UNLCK;
 6085         if (oldflags & NFSLCK_WRITE)
 6086                 oldltype = F_WRLCK;
 6087         else if (oldflags & NFSLCK_READ)
 6088                 oldltype = F_RDLCK;
 6089         else
 6090                 oldltype = F_UNLCK;
 6091         if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
 6092                 /* nothing to do */
 6093                 goto out;
 6094         error = nfsvno_advlock(vp, ltype, first, end, p);
 6095         if (error != 0) {
 6096                 if (cfp != NULL) {
 6097                         cfp->cl_clientid.lval[0] = 0;
 6098                         cfp->cl_clientid.lval[1] = 0;
 6099                         cfp->cl_first = 0;
 6100                         cfp->cl_end = NFS64BITSSET;
 6101                         cfp->cl_flags = NFSLCK_WRITE;
 6102                         cfp->cl_ownerlen = 5;
 6103                         NFSBCOPY("LOCAL", cfp->cl_owner, 5);
 6104                 }
 6105                 nfsrv_locallock_rollback(vp, lfp, p);
 6106         } else if (ltype != F_UNLCK) {
 6107                 rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
 6108                     M_WAITOK);
 6109                 rlp->rlck_first = first;
 6110                 rlp->rlck_end = end;
 6111                 rlp->rlck_type = oldltype;
 6112                 LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
 6113         }
 6114 
 6115 out:
 6116         NFSEXITCODE(error);
 6117         return (error);
 6118 }
 6119 
 6120 /*
 6121  * Roll back local lock changes and free up the rollback list.
 6122  */
 6123 static void
 6124 nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
 6125 {
 6126         struct nfsrollback *rlp, *nrlp;
 6127 
 6128         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
 6129                 (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
 6130                     rlp->rlck_end, p);
 6131                 free(rlp, M_NFSDROLLBACK);
 6132         }
 6133         LIST_INIT(&lfp->lf_rollback);
 6134 }
 6135 
 6136 /*
 6137  * Update local lock list and delete rollback list (ie now committed to the
 6138  * local locks). Most of the work is done by the internal function.
 6139  */
 6140 static void
 6141 nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
 6142     uint64_t end)
 6143 {
 6144         struct nfsrollback *rlp, *nrlp;
 6145         struct nfslock *new_lop, *other_lop;
 6146 
 6147         new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
 6148         if (flags & (NFSLCK_READ | NFSLCK_WRITE))
 6149                 other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
 6150                     M_WAITOK);
 6151         else
 6152                 other_lop = NULL;
 6153         new_lop->lo_flags = flags;
 6154         new_lop->lo_first = first;
 6155         new_lop->lo_end = end;
 6156         nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
 6157         if (new_lop != NULL)
 6158                 free(new_lop, M_NFSDLOCK);
 6159         if (other_lop != NULL)
 6160                 free(other_lop, M_NFSDLOCK);
 6161 
 6162         /* and get rid of the rollback list */
 6163         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
 6164                 free(rlp, M_NFSDROLLBACK);
 6165         LIST_INIT(&lfp->lf_rollback);
 6166 }
 6167 
 6168 /*
 6169  * Lock the struct nfslockfile for local lock updating.
 6170  */
 6171 static void
 6172 nfsrv_locklf(struct nfslockfile *lfp)
 6173 {
 6174         int gotlock;
 6175 
 6176         /* lf_usecount ensures *lfp won't be free'd */
 6177         lfp->lf_usecount++;
 6178         do {
 6179                 gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
 6180                     NFSSTATEMUTEXPTR, NULL);
 6181         } while (gotlock == 0);
 6182         lfp->lf_usecount--;
 6183 }
 6184 
 6185 /*
 6186  * Unlock the struct nfslockfile after local lock updating.
 6187  */
 6188 static void
 6189 nfsrv_unlocklf(struct nfslockfile *lfp)
 6190 {
 6191 
 6192         nfsv4_unlock(&lfp->lf_locallock_lck, 0);
 6193 }
 6194 
 6195 /*
 6196  * Clear out all state for the NFSv4 server.
 6197  * Must be called by a thread that can sleep when no nfsds are running.
 6198  */
 6199 void
 6200 nfsrv_throwawayallstate(NFSPROC_T *p)
 6201 {
 6202         struct nfsclient *clp, *nclp;
 6203         struct nfslockfile *lfp, *nlfp;
 6204         int i;
 6205 
 6206         /*
 6207          * For each client, clean out the state and then free the structure.
 6208          */
 6209         for (i = 0; i < nfsrv_clienthashsize; i++) {
 6210                 LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
 6211                         nfsrv_cleanclient(clp, p);
 6212                         nfsrv_freedeleglist(&clp->lc_deleg);
 6213                         nfsrv_freedeleglist(&clp->lc_olddeleg);
 6214                         free(clp->lc_stateid, M_NFSDCLIENT);
 6215                         free(clp, M_NFSDCLIENT);
 6216                 }
 6217         }
 6218 
 6219         /*
 6220          * Also, free up any remaining lock file structures.
 6221          */
 6222         for (i = 0; i < nfsrv_lockhashsize; i++) {
 6223                 LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
 6224                         printf("nfsd unload: fnd a lock file struct\n");
 6225                         nfsrv_freenfslockfile(lfp);
 6226                 }
 6227         }
 6228 
 6229         /* And get rid of the deviceid structures and layouts. */
 6230         nfsrv_freealllayoutsanddevids();
 6231 }
 6232 
 6233 /*
 6234  * Check the sequence# for the session and slot provided as an argument.
 6235  * Also, renew the lease if the session will return NFS_OK.
 6236  */
 6237 int
 6238 nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
 6239     uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
 6240     uint32_t *sflagsp, NFSPROC_T *p)
 6241 {
 6242         struct nfsdsession *sep;
 6243         struct nfssessionhash *shp;
 6244         int error;
 6245 
 6246         shp = NFSSESSIONHASH(nd->nd_sessionid);
 6247         NFSLOCKSESSION(shp);
 6248         sep = nfsrv_findsession(nd->nd_sessionid);
 6249         if (sep == NULL) {
 6250                 NFSUNLOCKSESSION(shp);
 6251                 return (NFSERR_BADSESSION);
 6252         }
 6253         error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
 6254             sep->sess_slots, NULL, NFSV4_SLOTS - 1);
 6255         if (error != 0) {
 6256                 NFSUNLOCKSESSION(shp);
 6257                 return (error);
 6258         }
 6259         if (cache_this != 0)
 6260                 nd->nd_flag |= ND_SAVEREPLY;
 6261         /* Renew the lease. */
 6262         sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
 6263         nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
 6264         nd->nd_flag |= ND_IMPLIEDCLID;
 6265 
 6266         /* Save maximum request and reply sizes. */
 6267         nd->nd_maxreq = sep->sess_maxreq;
 6268         nd->nd_maxresp = sep->sess_maxresp;
 6269 
 6270         *sflagsp = 0;
 6271         if (sep->sess_clp->lc_req.nr_client == NULL ||
 6272             (sep->sess_clp->lc_flags & LCL_CBDOWN) != 0)
 6273                 *sflagsp |= NFSV4SEQ_CBPATHDOWN;
 6274         NFSUNLOCKSESSION(shp);
 6275         if (error == NFSERR_EXPIRED) {
 6276                 *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
 6277                 error = 0;
 6278         } else if (error == NFSERR_ADMINREVOKED) {
 6279                 *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
 6280                 error = 0;
 6281         }
 6282         *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
 6283         return (0);
 6284 }
 6285 
 6286 /*
 6287  * Check/set reclaim complete for this session/clientid.
 6288  */
 6289 int
 6290 nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs)
 6291 {
 6292         struct nfsdsession *sep;
 6293         struct nfssessionhash *shp;
 6294         int error = 0;
 6295 
 6296         shp = NFSSESSIONHASH(nd->nd_sessionid);
 6297         NFSLOCKSTATE();
 6298         NFSLOCKSESSION(shp);
 6299         sep = nfsrv_findsession(nd->nd_sessionid);
 6300         if (sep == NULL) {
 6301                 NFSUNLOCKSESSION(shp);
 6302                 NFSUNLOCKSTATE();
 6303                 return (NFSERR_BADSESSION);
 6304         }
 6305 
 6306         if (onefs != 0)
 6307                 sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS;
 6308                 /* Check to see if reclaim complete has already happened. */
 6309         else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
 6310                 error = NFSERR_COMPLETEALREADY;
 6311         else {
 6312                 sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
 6313                 nfsrv_markreclaim(sep->sess_clp);
 6314         }
 6315         NFSUNLOCKSESSION(shp);
 6316         NFSUNLOCKSTATE();
 6317         return (error);
 6318 }
 6319 
 6320 /*
 6321  * Cache the reply in a session slot.
 6322  */
 6323 void
 6324 nfsrv_cache_session(struct nfsrv_descript *nd, struct mbuf **m)
 6325 {
 6326         struct nfsdsession *sep;
 6327         struct nfssessionhash *shp;
 6328         char *buf, *cp;
 6329 #ifdef INET
 6330         struct sockaddr_in *sin;
 6331 #endif
 6332 #ifdef INET6
 6333         struct sockaddr_in6 *sin6;
 6334 #endif
 6335 
 6336         shp = NFSSESSIONHASH(nd->nd_sessionid);
 6337         NFSLOCKSESSION(shp);
 6338         sep = nfsrv_findsession(nd->nd_sessionid);
 6339         if (sep == NULL) {
 6340                 NFSUNLOCKSESSION(shp);
 6341                 if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
 6342                         buf = malloc(INET6_ADDRSTRLEN, M_TEMP, M_WAITOK);
 6343                         switch (nd->nd_nam->sa_family) {
 6344 #ifdef INET
 6345                         case AF_INET:
 6346                                 sin = (struct sockaddr_in *)nd->nd_nam;
 6347                                 cp = inet_ntop(sin->sin_family,
 6348                                     &sin->sin_addr.s_addr, buf,
 6349                                     INET6_ADDRSTRLEN);
 6350                                 break;
 6351 #endif
 6352 #ifdef INET6
 6353                         case AF_INET6:
 6354                                 sin6 = (struct sockaddr_in6 *)nd->nd_nam;
 6355                                 cp = inet_ntop(sin6->sin6_family,
 6356                                     &sin6->sin6_addr, buf, INET6_ADDRSTRLEN);
 6357                                 break;
 6358 #endif
 6359                         default:
 6360                                 cp = NULL;
 6361                         }
 6362                         if (cp != NULL)
 6363                                 printf("nfsrv_cache_session: no session "
 6364                                     "IPaddr=%s\n", cp);
 6365                         else
 6366                                 printf("nfsrv_cache_session: no session\n");
 6367                         free(buf, M_TEMP);
 6368                 }
 6369                 m_freem(*m);
 6370                 return;
 6371         }
 6372         nfsv4_seqsess_cacherep(nd->nd_slotid, sep->sess_slots, nd->nd_repstat,
 6373             m);
 6374         NFSUNLOCKSESSION(shp);
 6375 }
 6376 
 6377 /*
 6378  * Search for a session that matches the sessionid.
 6379  */
 6380 static struct nfsdsession *
 6381 nfsrv_findsession(uint8_t *sessionid)
 6382 {
 6383         struct nfsdsession *sep;
 6384         struct nfssessionhash *shp;
 6385 
 6386         shp = NFSSESSIONHASH(sessionid);
 6387         LIST_FOREACH(sep, &shp->list, sess_hash) {
 6388                 if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
 6389                         break;
 6390         }
 6391         return (sep);
 6392 }
 6393 
 6394 /*
 6395  * Destroy a session.
 6396  */
 6397 int
 6398 nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
 6399 {
 6400         int error, igotlock, samesess;
 6401 
 6402         samesess = 0;
 6403         if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) &&
 6404             (nd->nd_flag & ND_HASSEQUENCE) != 0) {
 6405                 samesess = 1;
 6406                 if ((nd->nd_flag & ND_LASTOP) == 0)
 6407                         return (NFSERR_BADSESSION);
 6408         }
 6409 
 6410         /* Lock out other nfsd threads */
 6411         NFSLOCKV4ROOTMUTEX();
 6412         nfsv4_relref(&nfsv4rootfs_lock);
 6413         do {
 6414                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 6415                     NFSV4ROOTLOCKMUTEXPTR, NULL);
 6416         } while (igotlock == 0);
 6417         NFSUNLOCKV4ROOTMUTEX();
 6418 
 6419         error = nfsrv_freesession(NULL, sessionid);
 6420         if (error == 0 && samesess != 0)
 6421                 nd->nd_flag &= ~ND_HASSEQUENCE;
 6422 
 6423         NFSLOCKV4ROOTMUTEX();
 6424         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 6425         NFSUNLOCKV4ROOTMUTEX();
 6426         return (error);
 6427 }
 6428 
 6429 /*
 6430  * Bind a connection to a session.
 6431  * For now, only certain variants are supported, since the current session
 6432  * structure can only handle a single backchannel entry, which will be
 6433  * applied to all connections if it is set.
 6434  */
 6435 int
 6436 nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp)
 6437 {
 6438         struct nfssessionhash *shp;
 6439         struct nfsdsession *sep;
 6440         struct nfsclient *clp;
 6441         SVCXPRT *savxprt;
 6442         int error;
 6443 
 6444         error = 0;
 6445         savxprt = NULL;
 6446         shp = NFSSESSIONHASH(sessionid);
 6447         NFSLOCKSTATE();
 6448         NFSLOCKSESSION(shp);
 6449         sep = nfsrv_findsession(sessionid);
 6450         if (sep != NULL) {
 6451                 clp = sep->sess_clp;
 6452                 if (*foreaftp == NFSCDFC4_BACK ||
 6453                     *foreaftp == NFSCDFC4_BACK_OR_BOTH ||
 6454                     *foreaftp == NFSCDFC4_FORE_OR_BOTH) {
 6455                         /* Try to set up a backchannel. */
 6456                         if (clp->lc_req.nr_client == NULL) {
 6457                                 NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire "
 6458                                     "backchannel\n");
 6459                                 clp->lc_req.nr_client = (struct __rpc_client *)
 6460                                     clnt_bck_create(nd->nd_xprt->xp_socket,
 6461                                     sep->sess_cbprogram, NFSV4_CBVERS);
 6462                         }
 6463                         if (clp->lc_req.nr_client != NULL) {
 6464                                 NFSD_DEBUG(2, "nfsrv_bindconnsess: set up "
 6465                                     "backchannel\n");
 6466                                 savxprt = sep->sess_cbsess.nfsess_xprt;
 6467                                 SVC_ACQUIRE(nd->nd_xprt);
 6468                                 CLNT_ACQUIRE(clp->lc_req.nr_client);
 6469                                 nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
 6470                                 /* Disable idle timeout. */
 6471                                 nd->nd_xprt->xp_idletimeout = 0;
 6472                                 sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
 6473                                 sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN;
 6474                                 clp->lc_flags |= LCL_DONEBINDCONN |
 6475                                     LCL_NEEDSCBNULL;
 6476                                 clp->lc_flags &= ~LCL_CBDOWN;
 6477                                 if (*foreaftp == NFSCDFS4_BACK)
 6478                                         *foreaftp = NFSCDFS4_BACK;
 6479                                 else
 6480                                         *foreaftp = NFSCDFS4_BOTH;
 6481                         } else if (*foreaftp != NFSCDFC4_BACK) {
 6482                                 NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set "
 6483                                     "up backchannel\n");
 6484                                 sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
 6485                                 clp->lc_flags |= LCL_DONEBINDCONN;
 6486                                 *foreaftp = NFSCDFS4_FORE;
 6487                         } else {
 6488                                 error = NFSERR_NOTSUPP;
 6489                                 printf("nfsrv_bindconnsess: Can't add "
 6490                                     "backchannel\n");
 6491                         }
 6492                 } else {
 6493                         NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n");
 6494                         clp->lc_flags |= LCL_DONEBINDCONN;
 6495                         *foreaftp = NFSCDFS4_FORE;
 6496                 }
 6497         } else
 6498                 error = NFSERR_BADSESSION;
 6499         NFSUNLOCKSESSION(shp);
 6500         NFSUNLOCKSTATE();
 6501         if (savxprt != NULL)
 6502                 SVC_RELEASE(savxprt);
 6503         return (error);
 6504 }
 6505 
 6506 /*
 6507  * Free up a session structure.
 6508  */
 6509 static int
 6510 nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid)
 6511 {
 6512         struct nfssessionhash *shp;
 6513         int i;
 6514 
 6515         NFSLOCKSTATE();
 6516         if (sep == NULL) {
 6517                 shp = NFSSESSIONHASH(sessionid);
 6518                 NFSLOCKSESSION(shp);
 6519                 sep = nfsrv_findsession(sessionid);
 6520         } else {
 6521                 shp = NFSSESSIONHASH(sep->sess_sessionid);
 6522                 NFSLOCKSESSION(shp);
 6523         }
 6524         if (sep != NULL) {
 6525                 sep->sess_refcnt--;
 6526                 if (sep->sess_refcnt > 0) {
 6527                         NFSUNLOCKSESSION(shp);
 6528                         NFSUNLOCKSTATE();
 6529                         return (NFSERR_BACKCHANBUSY);
 6530                 }
 6531                 LIST_REMOVE(sep, sess_hash);
 6532                 LIST_REMOVE(sep, sess_list);
 6533         }
 6534         NFSUNLOCKSESSION(shp);
 6535         NFSUNLOCKSTATE();
 6536         if (sep == NULL)
 6537                 return (NFSERR_BADSESSION);
 6538         for (i = 0; i < NFSV4_SLOTS; i++)
 6539                 if (sep->sess_slots[i].nfssl_reply != NULL)
 6540                         m_freem(sep->sess_slots[i].nfssl_reply);
 6541         if (sep->sess_cbsess.nfsess_xprt != NULL)
 6542                 SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
 6543         free(sep, M_NFSDSESSION);
 6544         return (0);
 6545 }
 6546 
 6547 /*
 6548  * Free a stateid.
 6549  * RFC5661 says that it should fail when there are associated opens, locks
 6550  * or delegations. Since stateids represent opens, I don't see how you can
 6551  * free an open stateid (it will be free'd when closed), so this function
 6552  * only works for lock stateids (freeing the lock_owner) or delegations.
 6553  */
 6554 int
 6555 nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
 6556     NFSPROC_T *p)
 6557 {
 6558         struct nfsclient *clp;
 6559         struct nfsstate *stp;
 6560         int error;
 6561 
 6562         NFSLOCKSTATE();
 6563         /*
 6564          * Look up the stateid
 6565          */
 6566         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 6567             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 6568         if (error == 0) {
 6569                 /* First, check for a delegation. */
 6570                 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
 6571                         if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
 6572                             NFSX_STATEIDOTHER))
 6573                                 break;
 6574                 }
 6575                 if (stp != NULL) {
 6576                         nfsrv_freedeleg(stp);
 6577                         NFSUNLOCKSTATE();
 6578                         return (error);
 6579                 }
 6580         }
 6581         /* Not a delegation, try for a lock_owner. */
 6582         if (error == 0)
 6583                 error = nfsrv_getstate(clp, stateidp, 0, &stp);
 6584         if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
 6585             NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
 6586                 /* Not a lock_owner stateid. */
 6587                 error = NFSERR_LOCKSHELD;
 6588         if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
 6589                 error = NFSERR_LOCKSHELD;
 6590         if (error == 0)
 6591                 nfsrv_freelockowner(stp, NULL, 0, p);
 6592         NFSUNLOCKSTATE();
 6593         return (error);
 6594 }
 6595 
 6596 /*
 6597  * Test a stateid.
 6598  */
 6599 int
 6600 nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
 6601     NFSPROC_T *p)
 6602 {
 6603         struct nfsclient *clp;
 6604         struct nfsstate *stp;
 6605         int error;
 6606 
 6607         NFSLOCKSTATE();
 6608         /*
 6609          * Look up the stateid
 6610          */
 6611         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 6612             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 6613         if (error == 0)
 6614                 error = nfsrv_getstate(clp, stateidp, 0, &stp);
 6615         if (error == 0 && stateidp->seqid != 0 &&
 6616             SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid))
 6617                 error = NFSERR_OLDSTATEID;
 6618         NFSUNLOCKSTATE();
 6619         return (error);
 6620 }
 6621 
 6622 /*
 6623  * Generate the xdr for an NFSv4.1 CBSequence Operation.
 6624  */
 6625 static int
 6626 nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
 6627     int dont_replycache, struct nfsdsession **sepp, int *slotposp)
 6628 {
 6629         struct nfsdsession *sep;
 6630         uint32_t *tl, slotseq = 0;
 6631         int maxslot;
 6632         uint8_t sessionid[NFSX_V4SESSIONID];
 6633         int error;
 6634 
 6635         error = nfsv4_getcbsession(clp, sepp);
 6636         if (error != 0)
 6637                 return (error);
 6638         sep = *sepp;
 6639         (void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, slotposp, &maxslot,
 6640             &slotseq, sessionid);
 6641         KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
 6642 
 6643         /* Build the Sequence arguments. */
 6644         NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
 6645         bcopy(sessionid, tl, NFSX_V4SESSIONID);
 6646         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
 6647         nd->nd_slotseq = tl;
 6648         *tl++ = txdr_unsigned(slotseq);
 6649         *tl++ = txdr_unsigned(*slotposp);
 6650         *tl++ = txdr_unsigned(maxslot);
 6651         if (dont_replycache == 0)
 6652                 *tl++ = newnfs_true;
 6653         else
 6654                 *tl++ = newnfs_false;
 6655         *tl = 0;                        /* No referring call list, for now. */
 6656         nd->nd_flag |= ND_HASSEQUENCE;
 6657         return (0);
 6658 }
 6659 
 6660 /*
 6661  * Get a session for the callback.
 6662  */
 6663 static int
 6664 nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
 6665 {
 6666         struct nfsdsession *sep;
 6667 
 6668         NFSLOCKSTATE();
 6669         LIST_FOREACH(sep, &clp->lc_session, sess_list) {
 6670                 if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
 6671                         break;
 6672         }
 6673         if (sep == NULL) {
 6674                 NFSUNLOCKSTATE();
 6675                 return (NFSERR_BADSESSION);
 6676         }
 6677         sep->sess_refcnt++;
 6678         *sepp = sep;
 6679         NFSUNLOCKSTATE();
 6680         return (0);
 6681 }
 6682 
 6683 /*
 6684  * Free up all backchannel xprts.  This needs to be done when the nfsd threads
 6685  * exit, since those transports will all be going away.
 6686  * This is only called after all the nfsd threads are done performing RPCs,
 6687  * so locking shouldn't be an issue.
 6688  */
 6689 void
 6690 nfsrv_freeallbackchannel_xprts(void)
 6691 {
 6692         struct nfsdsession *sep;
 6693         struct nfsclient *clp;
 6694         SVCXPRT *xprt;
 6695         int i;
 6696 
 6697         for (i = 0; i < nfsrv_clienthashsize; i++) {
 6698                 LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
 6699                         LIST_FOREACH(sep, &clp->lc_session, sess_list) {
 6700                                 xprt = sep->sess_cbsess.nfsess_xprt;
 6701                                 sep->sess_cbsess.nfsess_xprt = NULL;
 6702                                 if (xprt != NULL)
 6703                                         SVC_RELEASE(xprt);
 6704                         }
 6705                 }
 6706         }
 6707 }
 6708 
 6709 /*
 6710  * Do a layout commit.  Actually just call nfsrv_updatemdsattr().
 6711  * I have no idea if the rest of these arguments will ever be useful?
 6712  */
 6713 int
 6714 nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype,
 6715     int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len,
 6716     int hasnewmtime, struct timespec *newmtimep, int reclaim,
 6717     nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep,
 6718     uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p)
 6719 {
 6720         struct nfsvattr na;
 6721         int error;
 6722 
 6723         error = nfsrv_updatemdsattr(vp, &na, p);
 6724         if (error == 0) {
 6725                 *hasnewsizep = 1;
 6726                 *newsizep = na.na_size;
 6727         }
 6728         return (error);
 6729 }
 6730 
 6731 /*
 6732  * Try and get a layout.
 6733  */
 6734 int
 6735 nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp,
 6736     int layouttype, int *iomode, uint64_t *offset, uint64_t *len,
 6737     uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose,
 6738     int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p)
 6739 {
 6740         struct nfslayouthash *lhyp;
 6741         struct nfslayout *lyp;
 6742         char *devid;
 6743         fhandle_t fh, *dsfhp;
 6744         int error, mirrorcnt;
 6745 
 6746         if (nfsrv_devidcnt == 0)
 6747                 return (NFSERR_UNKNLAYOUTTYPE);
 6748 
 6749         if (*offset != 0)
 6750                 printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset,
 6751                     (uintmax_t)*len);
 6752         error = nfsvno_getfh(vp, &fh, p);
 6753         NFSD_DEBUG(4, "layoutget getfh=%d\n", error);
 6754         if (error != 0)
 6755                 return (error);
 6756 
 6757         /*
 6758          * For now, all layouts are for entire files.
 6759          * Only issue Read/Write layouts if requested for a non-readonly fs.
 6760          */
 6761         if (NFSVNO_EXRDONLY(exp)) {
 6762                 if (*iomode == NFSLAYOUTIOMODE_RW)
 6763                         return (NFSERR_LAYOUTTRYLATER);
 6764                 *iomode = NFSLAYOUTIOMODE_READ;
 6765         }
 6766         if (*iomode != NFSLAYOUTIOMODE_RW)
 6767                 *iomode = NFSLAYOUTIOMODE_READ;
 6768 
 6769         /*
 6770          * Check to see if a write layout can be issued for this file.
 6771          * This is used during mirror recovery to avoid RW layouts being
 6772          * issued for a file while it is being copied to the recovered
 6773          * mirror.
 6774          */
 6775         if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0)
 6776                 return (NFSERR_LAYOUTTRYLATER);
 6777 
 6778         *retonclose = 0;
 6779         *offset = 0;
 6780         *len = UINT64_MAX;
 6781 
 6782         /* First, see if a layout already exists and return if found. */
 6783         lhyp = NFSLAYOUTHASH(&fh);
 6784         NFSLOCKLAYOUT(lhyp);
 6785         error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp);
 6786         NFSD_DEBUG(4, "layoutget findlay=%d\n", error);
 6787         /*
 6788          * Not sure if the seqid must be the same, so I won't check it.
 6789          */
 6790         if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] ||
 6791             stateidp->other[1] != lyp->lay_stateid.other[1] ||
 6792             stateidp->other[2] != lyp->lay_stateid.other[2])) {
 6793                 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
 6794                         NFSUNLOCKLAYOUT(lhyp);
 6795                         NFSD_DEBUG(1, "ret bad stateid\n");
 6796                         return (NFSERR_BADSTATEID);
 6797                 }
 6798                 /*
 6799                  * I believe we get here because there is a race between
 6800                  * the client processing the CBLAYOUTRECALL and the layout
 6801                  * being deleted here on the server.
 6802                  * The client has now done a LayoutGet with a non-layout
 6803                  * stateid, as it would when there is no layout.
 6804                  * As such, free this layout and set error == NFSERR_BADSTATEID
 6805                  * so the code below will create a new layout structure as
 6806                  * would happen if no layout was found.
 6807                  * "lyp" will be set before being used below, but set it NULL
 6808                  * as a safety belt.
 6809                  */
 6810                 nfsrv_freelayout(&lhyp->list, lyp);
 6811                 lyp = NULL;
 6812                 error = NFSERR_BADSTATEID;
 6813         }
 6814         if (error == 0) {
 6815                 if (lyp->lay_layoutlen > maxcnt) {
 6816                         NFSUNLOCKLAYOUT(lhyp);
 6817                         NFSD_DEBUG(1, "ret layout too small\n");
 6818                         return (NFSERR_TOOSMALL);
 6819                 }
 6820                 if (*iomode == NFSLAYOUTIOMODE_RW) {
 6821                         if ((lyp->lay_flags & NFSLAY_NOSPC) != 0) {
 6822                                 NFSUNLOCKLAYOUT(lhyp);
 6823                                 NFSD_DEBUG(1, "ret layout nospace\n");
 6824                                 return (NFSERR_NOSPC);
 6825                         }
 6826                         lyp->lay_flags |= NFSLAY_RW;
 6827                 } else
 6828                         lyp->lay_flags |= NFSLAY_READ;
 6829                 NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
 6830                 *layoutlenp = lyp->lay_layoutlen;
 6831                 if (++lyp->lay_stateid.seqid == 0)
 6832                         lyp->lay_stateid.seqid = 1;
 6833                 stateidp->seqid = lyp->lay_stateid.seqid;
 6834                 NFSUNLOCKLAYOUT(lhyp);
 6835                 NFSD_DEBUG(4, "ret fnd layout\n");
 6836                 return (0);
 6837         }
 6838         NFSUNLOCKLAYOUT(lhyp);
 6839 
 6840         /* Find the device id and file handle. */
 6841         dsfhp = malloc(sizeof(fhandle_t) * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
 6842         devid = malloc(NFSX_V4DEVICEID * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
 6843         error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, dsfhp, devid);
 6844         NFSD_DEBUG(4, "layoutget devandfh=%d\n", error);
 6845         if (error == 0) {
 6846                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
 6847                         if (NFSX_V4FILELAYOUT > maxcnt)
 6848                                 error = NFSERR_TOOSMALL;
 6849                         else
 6850                                 lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp,
 6851                                     devid, vp->v_mount->mnt_stat.f_fsid);
 6852                 } else {
 6853                         if (NFSX_V4FLEXLAYOUT(mirrorcnt) > maxcnt)
 6854                                 error = NFSERR_TOOSMALL;
 6855                         else
 6856                                 lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt,
 6857                                     &fh, dsfhp, devid,
 6858                                     vp->v_mount->mnt_stat.f_fsid);
 6859                 }
 6860         }
 6861         free(dsfhp, M_TEMP);
 6862         free(devid, M_TEMP);
 6863         if (error != 0)
 6864                 return (error);
 6865 
 6866         /*
 6867          * Now, add this layout to the list.
 6868          */
 6869         error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p);
 6870         NFSD_DEBUG(4, "layoutget addl=%d\n", error);
 6871         /*
 6872          * The lyp will be set to NULL by nfsrv_addlayout() if it
 6873          * linked the new structure into the lists.
 6874          */
 6875         free(lyp, M_NFSDSTATE);
 6876         return (error);
 6877 }
 6878 
 6879 /*
 6880  * Generate a File Layout.
 6881  */
 6882 static struct nfslayout *
 6883 nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
 6884     fhandle_t *dsfhp, char *devid, fsid_t fs)
 6885 {
 6886         uint32_t *tl;
 6887         struct nfslayout *lyp;
 6888         uint64_t pattern_offset;
 6889 
 6890         lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE,
 6891             M_WAITOK | M_ZERO);
 6892         lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES;
 6893         if (iomode == NFSLAYOUTIOMODE_RW)
 6894                 lyp->lay_flags = NFSLAY_RW;
 6895         else
 6896                 lyp->lay_flags = NFSLAY_READ;
 6897         NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
 6898         lyp->lay_clientid.qval = nd->nd_clientid.qval;
 6899         lyp->lay_fsid = fs;
 6900         NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
 6901 
 6902         /* Fill in the xdr for the files layout. */
 6903         tl = (uint32_t *)lyp->lay_xdr;
 6904         NFSBCOPY(devid, tl, NFSX_V4DEVICEID);           /* Device ID. */
 6905         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 6906 
 6907         /* Set the stripe size to the maximum I/O size. */
 6908         *tl++ = txdr_unsigned(nfs_srvmaxio & NFSFLAYUTIL_STRIPE_MASK);
 6909         *tl++ = 0;                                      /* 1st stripe index. */
 6910         pattern_offset = 0;
 6911         txdr_hyper(pattern_offset, tl); tl += 2;        /* Pattern offset. */
 6912         *tl++ = txdr_unsigned(1);                       /* 1 file handle. */
 6913         *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
 6914         NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
 6915         lyp->lay_layoutlen = NFSX_V4FILELAYOUT;
 6916         return (lyp);
 6917 }
 6918 
 6919 #define FLEX_OWNERID    "999"
 6920 #define FLEX_UID0       ""
 6921 /*
 6922  * Generate a Flex File Layout.
 6923  * The FLEX_OWNERID can be any string of 3 decimal digits. Although this
 6924  * string goes on the wire, it isn't supposed to be used by the client,
 6925  * since this server uses tight coupling.
 6926  * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use
 6927  * a string of "". This works around the Linux Flex File Layout driver bug
 6928  * which uses the synthetic uid/gid strings for the "tightly coupled" case.
 6929  */
 6930 static struct nfslayout *
 6931 nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt,
 6932     fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs)
 6933 {
 6934         uint32_t *tl;
 6935         struct nfslayout *lyp;
 6936         uint64_t lenval;
 6937         int i;
 6938 
 6939         lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FLEXLAYOUT(mirrorcnt),
 6940             M_NFSDSTATE, M_WAITOK | M_ZERO);
 6941         lyp->lay_type = NFSLAYOUT_FLEXFILE;
 6942         if (iomode == NFSLAYOUTIOMODE_RW)
 6943                 lyp->lay_flags = NFSLAY_RW;
 6944         else
 6945                 lyp->lay_flags = NFSLAY_READ;
 6946         NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
 6947         lyp->lay_clientid.qval = nd->nd_clientid.qval;
 6948         lyp->lay_fsid = fs;
 6949         lyp->lay_mirrorcnt = mirrorcnt;
 6950         NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
 6951 
 6952         /* Fill in the xdr for the files layout. */
 6953         tl = (uint32_t *)lyp->lay_xdr;
 6954         lenval = 0;
 6955         txdr_hyper(lenval, tl); tl += 2;                /* Stripe unit. */
 6956         *tl++ = txdr_unsigned(mirrorcnt);               /* # of mirrors. */
 6957         for (i = 0; i < mirrorcnt; i++) {
 6958                 *tl++ = txdr_unsigned(1);               /* One stripe. */
 6959                 NFSBCOPY(devid, tl, NFSX_V4DEVICEID);   /* Device ID. */
 6960                 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 6961                 devid += NFSX_V4DEVICEID;
 6962                 *tl++ = txdr_unsigned(1);               /* Efficiency. */
 6963                 *tl++ = 0;                              /* Proxy Stateid. */
 6964                 *tl++ = 0x55555555;
 6965                 *tl++ = 0x55555555;
 6966                 *tl++ = 0x55555555;
 6967                 *tl++ = txdr_unsigned(1);               /* 1 file handle. */
 6968                 *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
 6969                 NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
 6970                 tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED);
 6971                 dsfhp++;
 6972                 if (nfsrv_flexlinuxhack != 0) {
 6973                         *tl++ = txdr_unsigned(strlen(FLEX_UID0));
 6974                         *tl = 0;                /* 0 pad string. */
 6975                         NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
 6976                         *tl++ = txdr_unsigned(strlen(FLEX_UID0));
 6977                         *tl = 0;                /* 0 pad string. */
 6978                         NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
 6979                 } else {
 6980                         *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
 6981                         NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
 6982                         *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
 6983                         NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
 6984                 }
 6985         }
 6986         *tl++ = txdr_unsigned(0);               /* ff_flags. */
 6987         *tl = txdr_unsigned(60);                /* Status interval hint. */
 6988         lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt);
 6989         return (lyp);
 6990 }
 6991 
 6992 /*
 6993  * Parse and process Flex File errors returned via LayoutReturn.
 6994  */
 6995 static void
 6996 nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt,
 6997     NFSPROC_T *p)
 6998 {
 6999         uint32_t *tl;
 7000         int cnt, errcnt, i, j, opnum, stat;
 7001         char devid[NFSX_V4DEVICEID];
 7002 
 7003         tl = layp;
 7004         cnt = fxdr_unsigned(int, *tl++);
 7005         NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt);
 7006         for (i = 0; i < cnt; i++) {
 7007                 /* Skip offset, length and stateid for now. */
 7008                 tl += (4 + NFSX_STATEID / NFSX_UNSIGNED);
 7009                 errcnt = fxdr_unsigned(int, *tl++);
 7010                 NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt);
 7011                 for (j = 0; j < errcnt; j++) {
 7012                         NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
 7013                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 7014                         stat = fxdr_unsigned(int, *tl++);
 7015                         opnum = fxdr_unsigned(int, *tl++);
 7016                         NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum,
 7017                             stat);
 7018                         /*
 7019                          * Except for NFSERR_ACCES, NFSERR_STALE and
 7020                          * NFSERR_NOSPC errors, disable the mirror.
 7021                          */
 7022                         if (stat != NFSERR_ACCES && stat != NFSERR_STALE &&
 7023                             stat != NFSERR_NOSPC)
 7024                                 nfsrv_delds(devid, p);
 7025 
 7026                         /* For NFSERR_NOSPC, mark all devids and layouts. */
 7027                         if (stat == NFSERR_NOSPC)
 7028                                 nfsrv_marknospc(devid, true);
 7029                 }
 7030         }
 7031 }
 7032 
 7033 /*
 7034  * This function removes all flex file layouts which has a mirror with
 7035  * a device id that matches the argument.
 7036  * Called when the DS represented by the device id has failed.
 7037  */
 7038 void
 7039 nfsrv_flexmirrordel(char *devid, NFSPROC_T *p)
 7040 {
 7041         uint32_t *tl;
 7042         struct nfslayout *lyp, *nlyp;
 7043         struct nfslayouthash *lhyp;
 7044         struct nfslayouthead loclyp;
 7045         int i, j;
 7046 
 7047         NFSD_DEBUG(4, "flexmirrordel\n");
 7048         /* Move all layouts found onto a local list. */
 7049         TAILQ_INIT(&loclyp);
 7050         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7051                 lhyp = &nfslayouthash[i];
 7052                 NFSLOCKLAYOUT(lhyp);
 7053                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7054                         if (lyp->lay_type == NFSLAYOUT_FLEXFILE &&
 7055                             lyp->lay_mirrorcnt > 1) {
 7056                                 NFSD_DEBUG(4, "possible match\n");
 7057                                 tl = lyp->lay_xdr;
 7058                                 tl += 3;
 7059                                 for (j = 0; j < lyp->lay_mirrorcnt; j++) {
 7060                                         tl++;
 7061                                         if (NFSBCMP(devid, tl, NFSX_V4DEVICEID)
 7062                                             == 0) {
 7063                                                 /* Found one. */
 7064                                                 NFSD_DEBUG(4, "fnd one\n");
 7065                                                 TAILQ_REMOVE(&lhyp->list, lyp,
 7066                                                     lay_list);
 7067                                                 TAILQ_INSERT_HEAD(&loclyp, lyp,
 7068                                                     lay_list);
 7069                                                 break;
 7070                                         }
 7071                                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED +
 7072                                             NFSM_RNDUP(NFSX_V4PNFSFH) /
 7073                                             NFSX_UNSIGNED + 11 * NFSX_UNSIGNED);
 7074                                 }
 7075                         }
 7076                 }
 7077                 NFSUNLOCKLAYOUT(lhyp);
 7078         }
 7079 
 7080         /* Now, try to do a Layout recall for each one found. */
 7081         TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) {
 7082                 NFSD_DEBUG(4, "do layout recall\n");
 7083                 /*
 7084                  * The layout stateid.seqid needs to be incremented
 7085                  * before doing a LAYOUT_RECALL callback.
 7086                  */
 7087                 if (++lyp->lay_stateid.seqid == 0)
 7088                         lyp->lay_stateid.seqid = 1;
 7089                 nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
 7090                     &lyp->lay_fh, lyp, 1, lyp->lay_type, p);
 7091                 nfsrv_freelayout(&loclyp, lyp);
 7092         }
 7093 }
 7094 
 7095 /*
 7096  * Do a recall callback to the client for this layout.
 7097  */
 7098 static int
 7099 nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp,
 7100     struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p)
 7101 {
 7102         struct nfsclient *clp;
 7103         int error;
 7104 
 7105         NFSD_DEBUG(4, "nfsrv_recalllayout\n");
 7106         error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0),
 7107             0, NULL, p);
 7108         NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error);
 7109         if (error != 0) {
 7110                 printf("nfsrv_recalllayout: getclient err=%d\n", error);
 7111                 return (error);
 7112         }
 7113         if ((clp->lc_flags & LCL_NFSV41) != 0) {
 7114                 error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL,
 7115                     stateidp, changed, fhp, NULL, NULL, laytype, p);
 7116                 /* If lyp != NULL, handle an error return here. */
 7117                 if (error != 0 && lyp != NULL) {
 7118                         NFSDRECALLLOCK();
 7119                         /*
 7120                          * Mark it returned, since no layout recall
 7121                          * has been done.
 7122                          * All errors seem to be non-recoverable, although
 7123                          * NFSERR_NOMATCHLAYOUT is a normal event.
 7124                          */
 7125                         if ((lyp->lay_flags & NFSLAY_RECALL) != 0) {
 7126                                 lyp->lay_flags |= NFSLAY_RETURNED;
 7127                                 wakeup(lyp);
 7128                         }
 7129                         NFSDRECALLUNLOCK();
 7130                         if (error != NFSERR_NOMATCHLAYOUT)
 7131                                 printf("nfsrv_recalllayout: err=%d\n", error);
 7132                 }
 7133         } else
 7134                 printf("nfsrv_recalllayout: clp not NFSv4.1\n");
 7135         return (error);
 7136 }
 7137 
 7138 /*
 7139  * Find a layout to recall when we exceed our high water mark.
 7140  */
 7141 void
 7142 nfsrv_recalloldlayout(NFSPROC_T *p)
 7143 {
 7144         struct nfslayouthash *lhyp;
 7145         struct nfslayout *lyp;
 7146         nfsquad_t clientid;
 7147         nfsv4stateid_t stateid;
 7148         fhandle_t fh;
 7149         int error, laytype = 0, ret;
 7150 
 7151         lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize];
 7152         NFSLOCKLAYOUT(lhyp);
 7153         TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) {
 7154                 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
 7155                         lyp->lay_flags |= NFSLAY_CALLB;
 7156                         /*
 7157                          * The layout stateid.seqid needs to be incremented
 7158                          * before doing a LAYOUT_RECALL callback.
 7159                          */
 7160                         if (++lyp->lay_stateid.seqid == 0)
 7161                                 lyp->lay_stateid.seqid = 1;
 7162                         clientid = lyp->lay_clientid;
 7163                         stateid = lyp->lay_stateid;
 7164                         NFSBCOPY(&lyp->lay_fh, &fh, sizeof(fh));
 7165                         laytype = lyp->lay_type;
 7166                         break;
 7167                 }
 7168         }
 7169         NFSUNLOCKLAYOUT(lhyp);
 7170         if (lyp != NULL) {
 7171                 error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, 0,
 7172                     laytype, p);
 7173                 if (error != 0 && error != NFSERR_NOMATCHLAYOUT)
 7174                         NFSD_DEBUG(4, "recallold=%d\n", error);
 7175                 if (error != 0) {
 7176                         NFSLOCKLAYOUT(lhyp);
 7177                         /*
 7178                          * Since the hash list was unlocked, we need to
 7179                          * find it again.
 7180                          */
 7181                         ret = nfsrv_findlayout(&clientid, &fh, laytype, p,
 7182                             &lyp);
 7183                         if (ret == 0 &&
 7184                             (lyp->lay_flags & NFSLAY_CALLB) != 0 &&
 7185                             lyp->lay_stateid.other[0] == stateid.other[0] &&
 7186                             lyp->lay_stateid.other[1] == stateid.other[1] &&
 7187                             lyp->lay_stateid.other[2] == stateid.other[2]) {
 7188                                 /*
 7189                                  * The client no longer knows this layout, so
 7190                                  * it can be free'd now.
 7191                                  */
 7192                                 if (error == NFSERR_NOMATCHLAYOUT)
 7193                                         nfsrv_freelayout(&lhyp->list, lyp);
 7194                                 else {
 7195                                         /*
 7196                                          * Leave it to be tried later by
 7197                                          * clearing NFSLAY_CALLB and moving
 7198                                          * it to the head of the list, so it
 7199                                          * won't be tried again for a while.
 7200                                          */
 7201                                         lyp->lay_flags &= ~NFSLAY_CALLB;
 7202                                         TAILQ_REMOVE(&lhyp->list, lyp,
 7203                                             lay_list);
 7204                                         TAILQ_INSERT_HEAD(&lhyp->list, lyp,
 7205                                             lay_list);
 7206                                 }
 7207                         }
 7208                         NFSUNLOCKLAYOUT(lhyp);
 7209                 }
 7210         }
 7211 }
 7212 
 7213 /*
 7214  * Try and return layout(s).
 7215  */
 7216 int
 7217 nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp,
 7218     int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim,
 7219     int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp,
 7220     struct ucred *cred, NFSPROC_T *p)
 7221 {
 7222         struct nfsvattr na;
 7223         struct nfslayouthash *lhyp;
 7224         struct nfslayout *lyp;
 7225         fhandle_t fh;
 7226         int error = 0;
 7227 
 7228         *fndp = 0;
 7229         if (kind == NFSV4LAYOUTRET_FILE) {
 7230                 error = nfsvno_getfh(vp, &fh, p);
 7231                 if (error == 0) {
 7232                         error = nfsrv_updatemdsattr(vp, &na, p);
 7233                         if (error != 0)
 7234                                 printf("nfsrv_layoutreturn: updatemdsattr"
 7235                                     " failed=%d\n", error);
 7236                 }
 7237                 if (error == 0) {
 7238                         if (reclaim == newnfs_true) {
 7239                                 error = nfsrv_checkgrace(NULL, NULL,
 7240                                     NFSLCK_RECLAIM);
 7241                                 if (error != NFSERR_NOGRACE)
 7242                                         error = 0;
 7243                                 return (error);
 7244                         }
 7245                         lhyp = NFSLAYOUTHASH(&fh);
 7246                         NFSDRECALLLOCK();
 7247                         NFSLOCKLAYOUT(lhyp);
 7248                         error = nfsrv_findlayout(&nd->nd_clientid, &fh,
 7249                             layouttype, p, &lyp);
 7250                         NFSD_DEBUG(4, "layoutret findlay=%d\n", error);
 7251                         if (error == 0 &&
 7252                             stateidp->other[0] == lyp->lay_stateid.other[0] &&
 7253                             stateidp->other[1] == lyp->lay_stateid.other[1] &&
 7254                             stateidp->other[2] == lyp->lay_stateid.other[2]) {
 7255                                 NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d"
 7256                                     " %x %x %x laystateid %d %x %x %x"
 7257                                     " off=%ju len=%ju flgs=0x%x\n",
 7258                                     stateidp->seqid, stateidp->other[0],
 7259                                     stateidp->other[1], stateidp->other[2],
 7260                                     lyp->lay_stateid.seqid,
 7261                                     lyp->lay_stateid.other[0],
 7262                                     lyp->lay_stateid.other[1],
 7263                                     lyp->lay_stateid.other[2],
 7264                                     (uintmax_t)offset, (uintmax_t)len,
 7265                                     lyp->lay_flags);
 7266                                 if (++lyp->lay_stateid.seqid == 0)
 7267                                         lyp->lay_stateid.seqid = 1;
 7268                                 stateidp->seqid = lyp->lay_stateid.seqid;
 7269                                 if (offset == 0 && len == UINT64_MAX) {
 7270                                         if ((iomode & NFSLAYOUTIOMODE_READ) !=
 7271                                             0)
 7272                                                 lyp->lay_flags &= ~NFSLAY_READ;
 7273                                         if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
 7274                                                 lyp->lay_flags &= ~NFSLAY_RW;
 7275                                         if ((lyp->lay_flags & (NFSLAY_READ |
 7276                                             NFSLAY_RW)) == 0)
 7277                                                 nfsrv_freelayout(&lhyp->list,
 7278                                                     lyp);
 7279                                         else
 7280                                                 *fndp = 1;
 7281                                 } else
 7282                                         *fndp = 1;
 7283                         }
 7284                         NFSUNLOCKLAYOUT(lhyp);
 7285                         /* Search the nfsrv_recalllist for a match. */
 7286                         TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 7287                                 if (NFSBCMP(&lyp->lay_fh, &fh,
 7288                                     sizeof(fh)) == 0 &&
 7289                                     lyp->lay_clientid.qval ==
 7290                                     nd->nd_clientid.qval &&
 7291                                     stateidp->other[0] ==
 7292                                     lyp->lay_stateid.other[0] &&
 7293                                     stateidp->other[1] ==
 7294                                     lyp->lay_stateid.other[1] &&
 7295                                     stateidp->other[2] ==
 7296                                     lyp->lay_stateid.other[2]) {
 7297                                         lyp->lay_flags |= NFSLAY_RETURNED;
 7298                                         wakeup(lyp);
 7299                                         error = 0;
 7300                                 }
 7301                         }
 7302                         NFSDRECALLUNLOCK();
 7303                 }
 7304                 if (layouttype == NFSLAYOUT_FLEXFILE && layp != NULL)
 7305                         nfsrv_flexlayouterr(nd, layp, maxcnt, p);
 7306         } else if (kind == NFSV4LAYOUTRET_FSID)
 7307                 nfsrv_freelayouts(&nd->nd_clientid,
 7308                     &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode);
 7309         else if (kind == NFSV4LAYOUTRET_ALL)
 7310                 nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode);
 7311         else
 7312                 error = NFSERR_INVAL;
 7313         if (error == -1)
 7314                 error = 0;
 7315         return (error);
 7316 }
 7317 
 7318 /*
 7319  * Look for an existing layout.
 7320  */
 7321 static int
 7322 nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
 7323     NFSPROC_T *p, struct nfslayout **lypp)
 7324 {
 7325         struct nfslayouthash *lhyp;
 7326         struct nfslayout *lyp;
 7327         int ret;
 7328 
 7329         *lypp = NULL;
 7330         ret = 0;
 7331         lhyp = NFSLAYOUTHASH(fhp);
 7332         TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
 7333                 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
 7334                     lyp->lay_clientid.qval == clientidp->qval &&
 7335                     lyp->lay_type == laytype)
 7336                         break;
 7337         }
 7338         if (lyp != NULL)
 7339                 *lypp = lyp;
 7340         else
 7341                 ret = -1;
 7342         return (ret);
 7343 }
 7344 
 7345 /*
 7346  * Add the new layout, as required.
 7347  */
 7348 static int
 7349 nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
 7350     nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p)
 7351 {
 7352         struct nfsclient *clp;
 7353         struct nfslayouthash *lhyp;
 7354         struct nfslayout *lyp, *nlyp;
 7355         fhandle_t *fhp;
 7356         int error;
 7357 
 7358         KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0,
 7359             ("nfsrv_layoutget: no nd_clientid\n"));
 7360         lyp = *lypp;
 7361         fhp = &lyp->lay_fh;
 7362         NFSLOCKSTATE();
 7363         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 7364             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 7365         if (error != 0) {
 7366                 NFSUNLOCKSTATE();
 7367                 return (error);
 7368         }
 7369         lyp->lay_stateid.seqid = stateidp->seqid = 1;
 7370         lyp->lay_stateid.other[0] = stateidp->other[0] =
 7371             clp->lc_clientid.lval[0];
 7372         lyp->lay_stateid.other[1] = stateidp->other[1] =
 7373             clp->lc_clientid.lval[1];
 7374         lyp->lay_stateid.other[2] = stateidp->other[2] =
 7375             nfsrv_nextstateindex(clp);
 7376         NFSUNLOCKSTATE();
 7377 
 7378         lhyp = NFSLAYOUTHASH(fhp);
 7379         NFSLOCKLAYOUT(lhyp);
 7380         TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) {
 7381                 if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
 7382                     nlyp->lay_clientid.qval == nd->nd_clientid.qval)
 7383                         break;
 7384         }
 7385         if (nlyp != NULL) {
 7386                 /* A layout already exists, so use it. */
 7387                 nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW));
 7388                 NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen);
 7389                 *layoutlenp = nlyp->lay_layoutlen;
 7390                 if (++nlyp->lay_stateid.seqid == 0)
 7391                         nlyp->lay_stateid.seqid = 1;
 7392                 stateidp->seqid = nlyp->lay_stateid.seqid;
 7393                 stateidp->other[0] = nlyp->lay_stateid.other[0];
 7394                 stateidp->other[1] = nlyp->lay_stateid.other[1];
 7395                 stateidp->other[2] = nlyp->lay_stateid.other[2];
 7396                 NFSUNLOCKLAYOUT(lhyp);
 7397                 return (0);
 7398         }
 7399 
 7400         /* Insert the new layout in the lists. */
 7401         *lypp = NULL;
 7402         atomic_add_int(&nfsrv_layoutcnt, 1);
 7403         nfsstatsv1.srvlayouts++;
 7404         NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
 7405         *layoutlenp = lyp->lay_layoutlen;
 7406         TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list);
 7407         NFSUNLOCKLAYOUT(lhyp);
 7408         return (0);
 7409 }
 7410 
 7411 /*
 7412  * Get the devinfo for a deviceid.
 7413  */
 7414 int
 7415 nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt,
 7416     uint32_t *notify, int *devaddrlen, char **devaddr)
 7417 {
 7418         struct nfsdevice *ds;
 7419 
 7420         if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype !=
 7421              NFSLAYOUT_FLEXFILE) ||
 7422             (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES))
 7423                 return (NFSERR_UNKNLAYOUTTYPE);
 7424 
 7425         /*
 7426          * Now, search for the device id.  Note that the structures won't go
 7427          * away, but the order changes in the list.  As such, the lock only
 7428          * needs to be held during the search through the list.
 7429          */
 7430         NFSDDSLOCK();
 7431         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 7432                 if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 &&
 7433                     ds->nfsdev_nmp != NULL)
 7434                         break;
 7435         }
 7436         NFSDDSUNLOCK();
 7437         if (ds == NULL)
 7438                 return (NFSERR_NOENT);
 7439 
 7440         /* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */
 7441         *devaddrlen = 0;
 7442         if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
 7443                 *devaddrlen = ds->nfsdev_fileaddrlen;
 7444                 *devaddr = ds->nfsdev_fileaddr;
 7445         } else if (layouttype == NFSLAYOUT_FLEXFILE) {
 7446                 *devaddrlen = ds->nfsdev_flexaddrlen;
 7447                 *devaddr = ds->nfsdev_flexaddr;
 7448         }
 7449         if (*devaddrlen == 0)
 7450                 return (NFSERR_UNKNLAYOUTTYPE);
 7451 
 7452         /*
 7453          * The XDR overhead is 3 unsigned values: layout_type,
 7454          * length_of_address and notify bitmap.
 7455          * If the notify array is changed to not all zeros, the
 7456          * count of unsigned values must be increased.
 7457          */
 7458         if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) +
 7459             3 * NFSX_UNSIGNED) {
 7460                 *maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED;
 7461                 return (NFSERR_TOOSMALL);
 7462         }
 7463         return (0);
 7464 }
 7465 
 7466 /*
 7467  * Free a list of layout state structures.
 7468  */
 7469 static void
 7470 nfsrv_freelayoutlist(nfsquad_t clientid)
 7471 {
 7472         struct nfslayouthash *lhyp;
 7473         struct nfslayout *lyp, *nlyp;
 7474         int i;
 7475 
 7476         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7477                 lhyp = &nfslayouthash[i];
 7478                 NFSLOCKLAYOUT(lhyp);
 7479                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7480                         if (lyp->lay_clientid.qval == clientid.qval)
 7481                                 nfsrv_freelayout(&lhyp->list, lyp);
 7482                 }
 7483                 NFSUNLOCKLAYOUT(lhyp);
 7484         }
 7485 }
 7486 
 7487 /*
 7488  * Free up a layout.
 7489  */
 7490 static void
 7491 nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp)
 7492 {
 7493 
 7494         NFSD_DEBUG(4, "Freelayout=%p\n", lyp);
 7495         atomic_add_int(&nfsrv_layoutcnt, -1);
 7496         nfsstatsv1.srvlayouts--;
 7497         TAILQ_REMOVE(lhp, lyp, lay_list);
 7498         free(lyp, M_NFSDSTATE);
 7499 }
 7500 
 7501 /*
 7502  * Free up a device id.
 7503  */
 7504 void
 7505 nfsrv_freeonedevid(struct nfsdevice *ds)
 7506 {
 7507         int i;
 7508 
 7509         atomic_add_int(&nfsrv_devidcnt, -1);
 7510         vrele(ds->nfsdev_dvp);
 7511         for (i = 0; i < nfsrv_dsdirsize; i++)
 7512                 if (ds->nfsdev_dsdir[i] != NULL)
 7513                         vrele(ds->nfsdev_dsdir[i]);
 7514         free(ds->nfsdev_fileaddr, M_NFSDSTATE);
 7515         free(ds->nfsdev_flexaddr, M_NFSDSTATE);
 7516         free(ds->nfsdev_host, M_NFSDSTATE);
 7517         free(ds, M_NFSDSTATE);
 7518 }
 7519 
 7520 /*
 7521  * Free up a device id and its mirrors.
 7522  */
 7523 static void
 7524 nfsrv_freedevid(struct nfsdevice *ds)
 7525 {
 7526 
 7527         TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
 7528         nfsrv_freeonedevid(ds);
 7529 }
 7530 
 7531 /*
 7532  * Free all layouts and device ids.
 7533  * Done when the nfsd threads are shut down since there may be a new
 7534  * modified device id list created when the nfsd is restarted.
 7535  */
 7536 void
 7537 nfsrv_freealllayoutsanddevids(void)
 7538 {
 7539         struct nfsdontlist *mrp, *nmrp;
 7540         struct nfslayout *lyp, *nlyp;
 7541 
 7542         /* Get rid of the deviceid structures. */
 7543         nfsrv_freealldevids();
 7544         TAILQ_INIT(&nfsrv_devidhead);
 7545         nfsrv_devidcnt = 0;
 7546 
 7547         /* Get rid of all layouts. */
 7548         nfsrv_freealllayouts();
 7549 
 7550         /* Get rid of any nfsdontlist entries. */
 7551         LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp)
 7552                 free(mrp, M_NFSDSTATE);
 7553         LIST_INIT(&nfsrv_dontlisthead);
 7554         nfsrv_dontlistlen = 0;
 7555 
 7556         /* Free layouts in the recall list. */
 7557         TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp)
 7558                 nfsrv_freelayout(&nfsrv_recalllisthead, lyp);
 7559         TAILQ_INIT(&nfsrv_recalllisthead);
 7560 }
 7561 
 7562 /*
 7563  * Free layouts that match the arguments.
 7564  */
 7565 static void
 7566 nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode)
 7567 {
 7568         struct nfslayouthash *lhyp;
 7569         struct nfslayout *lyp, *nlyp;
 7570         int i;
 7571 
 7572         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7573                 lhyp = &nfslayouthash[i];
 7574                 NFSLOCKLAYOUT(lhyp);
 7575                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7576                         if (clid->qval != lyp->lay_clientid.qval)
 7577                                 continue;
 7578                         if (fs != NULL && fsidcmp(fs, &lyp->lay_fsid) != 0)
 7579                                 continue;
 7580                         if (laytype != lyp->lay_type)
 7581                                 continue;
 7582                         if ((iomode & NFSLAYOUTIOMODE_READ) != 0)
 7583                                 lyp->lay_flags &= ~NFSLAY_READ;
 7584                         if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
 7585                                 lyp->lay_flags &= ~NFSLAY_RW;
 7586                         if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0)
 7587                                 nfsrv_freelayout(&lhyp->list, lyp);
 7588                 }
 7589                 NFSUNLOCKLAYOUT(lhyp);
 7590         }
 7591 }
 7592 
 7593 /*
 7594  * Free all layouts for the argument file.
 7595  */
 7596 void
 7597 nfsrv_freefilelayouts(fhandle_t *fhp)
 7598 {
 7599         struct nfslayouthash *lhyp;
 7600         struct nfslayout *lyp, *nlyp;
 7601 
 7602         lhyp = NFSLAYOUTHASH(fhp);
 7603         NFSLOCKLAYOUT(lhyp);
 7604         TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7605                 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0)
 7606                         nfsrv_freelayout(&lhyp->list, lyp);
 7607         }
 7608         NFSUNLOCKLAYOUT(lhyp);
 7609 }
 7610 
 7611 /*
 7612  * Free all layouts.
 7613  */
 7614 static void
 7615 nfsrv_freealllayouts(void)
 7616 {
 7617         struct nfslayouthash *lhyp;
 7618         struct nfslayout *lyp, *nlyp;
 7619         int i;
 7620 
 7621         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7622                 lhyp = &nfslayouthash[i];
 7623                 NFSLOCKLAYOUT(lhyp);
 7624                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp)
 7625                         nfsrv_freelayout(&lhyp->list, lyp);
 7626                 NFSUNLOCKLAYOUT(lhyp);
 7627         }
 7628 }
 7629 
 7630 /*
 7631  * Look up the mount path for the DS server.
 7632  */
 7633 static int
 7634 nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
 7635     struct nfsdevice **dsp)
 7636 {
 7637         struct nameidata nd;
 7638         struct nfsdevice *ds;
 7639         struct mount *mp;
 7640         int error, i;
 7641         char *dsdirpath;
 7642         size_t dsdirsize;
 7643 
 7644         NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp);
 7645         *dsp = NULL;
 7646         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 7647             dspathp, p);
 7648         error = namei(&nd);
 7649         NFSD_DEBUG(4, "lookup=%d\n", error);
 7650         if (error != 0)
 7651                 return (error);
 7652         if (nd.ni_vp->v_type != VDIR) {
 7653                 vput(nd.ni_vp);
 7654                 NFSD_DEBUG(4, "dspath not dir\n");
 7655                 return (ENOTDIR);
 7656         }
 7657         if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 7658                 vput(nd.ni_vp);
 7659                 NFSD_DEBUG(4, "dspath not an NFS mount\n");
 7660                 return (ENXIO);
 7661         }
 7662 
 7663         /*
 7664          * Allocate a DS server structure with the NFS mounted directory
 7665          * vnode reference counted, so that a non-forced dismount will
 7666          * fail with EBUSY.
 7667          * This structure is always linked into the list, even if an error
 7668          * is being returned.  The caller will free the entire list upon
 7669          * an error return.
 7670          */
 7671         *dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t),
 7672             M_NFSDSTATE, M_WAITOK | M_ZERO);
 7673         ds->nfsdev_dvp = nd.ni_vp;
 7674         ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount);
 7675         NFSVOPUNLOCK(nd.ni_vp);
 7676 
 7677         dsdirsize = strlen(dspathp) + 16;
 7678         dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK);
 7679         /* Now, create the DS directory structures. */
 7680         for (i = 0; i < nfsrv_dsdirsize; i++) {
 7681                 snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i);
 7682                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 7683                     UIO_SYSSPACE, dsdirpath, p);
 7684                 error = namei(&nd);
 7685                 NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error);
 7686                 if (error != 0)
 7687                         break;
 7688                 if (nd.ni_vp->v_type != VDIR) {
 7689                         vput(nd.ni_vp);
 7690                         error = ENOTDIR;
 7691                         NFSD_DEBUG(4, "dsdirpath not a VDIR\n");
 7692                         break;
 7693                 }
 7694                 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 7695                         vput(nd.ni_vp);
 7696                         error = ENXIO;
 7697                         NFSD_DEBUG(4, "dsdirpath not an NFS mount\n");
 7698                         break;
 7699                 }
 7700                 ds->nfsdev_dsdir[i] = nd.ni_vp;
 7701                 NFSVOPUNLOCK(nd.ni_vp);
 7702         }
 7703         free(dsdirpath, M_TEMP);
 7704 
 7705         if (strlen(mdspathp) > 0) {
 7706                 /*
 7707                  * This DS stores file for a specific MDS exported file
 7708                  * system.
 7709                  */
 7710                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 7711                     UIO_SYSSPACE, mdspathp, p);
 7712                 error = namei(&nd);
 7713                 NFSD_DEBUG(4, "mds lookup=%d\n", error);
 7714                 if (error != 0)
 7715                         goto out;
 7716                 if (nd.ni_vp->v_type != VDIR) {
 7717                         vput(nd.ni_vp);
 7718                         error = ENOTDIR;
 7719                         NFSD_DEBUG(4, "mdspath not dir\n");
 7720                         goto out;
 7721                 }
 7722                 mp = nd.ni_vp->v_mount;
 7723                 if ((mp->mnt_flag & MNT_EXPORTED) == 0) {
 7724                         vput(nd.ni_vp);
 7725                         error = ENXIO;
 7726                         NFSD_DEBUG(4, "mdspath not an exported fs\n");
 7727                         goto out;
 7728                 }
 7729                 ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid;
 7730                 ds->nfsdev_mdsisset = 1;
 7731                 vput(nd.ni_vp);
 7732         }
 7733 
 7734 out:
 7735         TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
 7736         atomic_add_int(&nfsrv_devidcnt, 1);
 7737         return (error);
 7738 }
 7739 
 7740 /*
 7741  * Look up the mount path for the DS server and delete it.
 7742  */
 7743 int
 7744 nfsrv_deldsserver(int op, char *dspathp, NFSPROC_T *p)
 7745 {
 7746         struct mount *mp;
 7747         struct nfsmount *nmp;
 7748         struct nfsdevice *ds;
 7749         int error;
 7750 
 7751         NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp);
 7752         /*
 7753          * Search for the path in the mount list.  Avoid looking the path
 7754          * up, since this mount point may be hung, with associated locked
 7755          * vnodes, etc.
 7756          * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked
 7757          * until this completes.
 7758          * As noted in the man page, this should be done before any forced
 7759          * dismount on the mount point, but at least the handshake on
 7760          * NFSMNTP_CANCELRPCS should make it safe.
 7761          */
 7762         error = 0;
 7763         ds = NULL;
 7764         nmp = NULL;
 7765         mtx_lock(&mountlist_mtx);
 7766         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 7767                 if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 &&
 7768                     strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 &&
 7769                     mp->mnt_data != NULL) {
 7770                         nmp = VFSTONFS(mp);
 7771                         NFSLOCKMNT(nmp);
 7772                         if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 7773                              NFSMNTP_CANCELRPCS)) == 0) {
 7774                                 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 7775                                 NFSUNLOCKMNT(nmp);
 7776                         } else {
 7777                                 NFSUNLOCKMNT(nmp);
 7778                                 nmp = NULL;
 7779                         }
 7780                         break;
 7781                 }
 7782         }
 7783         mtx_unlock(&mountlist_mtx);
 7784 
 7785         if (nmp != NULL) {
 7786                 ds = nfsrv_deldsnmp(op, nmp, p);
 7787                 NFSD_DEBUG(4, "deldsnmp=%p\n", ds);
 7788                 if (ds != NULL) {
 7789                         nfsrv_killrpcs(nmp);
 7790                         NFSD_DEBUG(4, "aft killrpcs\n");
 7791                 } else
 7792                         error = ENXIO;
 7793                 NFSLOCKMNT(nmp);
 7794                 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 7795                 wakeup(nmp);
 7796                 NFSUNLOCKMNT(nmp);
 7797         } else
 7798                 error = EINVAL;
 7799         return (error);
 7800 }
 7801 
 7802 /*
 7803  * Search for and remove a DS entry which matches the "nmp" argument.
 7804  * The nfsdevice structure pointer is returned so that the caller can
 7805  * free it via nfsrv_freeonedevid().
 7806  * For the forced case, do not try to do LayoutRecalls, since the server
 7807  * must be shut down now anyhow.
 7808  */
 7809 struct nfsdevice *
 7810 nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p)
 7811 {
 7812         struct nfsdevice *fndds;
 7813 
 7814         NFSD_DEBUG(4, "deldsdvp\n");
 7815         NFSDDSLOCK();
 7816         if (op == PNFSDOP_FORCEDELDS)
 7817                 fndds = nfsv4_findmirror(nmp);
 7818         else
 7819                 fndds = nfsrv_findmirroredds(nmp);
 7820         if (fndds != NULL)
 7821                 nfsrv_deleteds(fndds);
 7822         NFSDDSUNLOCK();
 7823         if (fndds != NULL) {
 7824                 if (op != PNFSDOP_FORCEDELDS)
 7825                         nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
 7826                 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
 7827         }
 7828         return (fndds);
 7829 }
 7830 
 7831 /*
 7832  * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid.
 7833  * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount
 7834  * point.
 7835  * Also, returns an error instead of the nfsdevice found.
 7836  */
 7837 int
 7838 nfsrv_delds(char *devid, NFSPROC_T *p)
 7839 {
 7840         struct nfsdevice *ds, *fndds;
 7841         struct nfsmount *nmp;
 7842         int fndmirror;
 7843 
 7844         NFSD_DEBUG(4, "delds\n");
 7845         /*
 7846          * Search the DS server list for a match with devid.
 7847          * Remove the DS entry if found and there is a mirror.
 7848          */
 7849         fndds = NULL;
 7850         nmp = NULL;
 7851         fndmirror = 0;
 7852         NFSDDSLOCK();
 7853         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 7854                 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 &&
 7855                     ds->nfsdev_nmp != NULL) {
 7856                         NFSD_DEBUG(4, "fnd main ds\n");
 7857                         fndds = ds;
 7858                         break;
 7859                 }
 7860         }
 7861         if (fndds == NULL) {
 7862                 NFSDDSUNLOCK();
 7863                 return (ENXIO);
 7864         }
 7865         if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
 7866                 fndmirror = 1;
 7867         else if (fndds->nfsdev_mdsisset != 0) {
 7868                 /* For the fsid is set case, search for a mirror. */
 7869                 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 7870                         if (ds != fndds && ds->nfsdev_nmp != NULL &&
 7871                             ds->nfsdev_mdsisset != 0 &&
 7872                             fsidcmp(&ds->nfsdev_mdsfsid,
 7873                             &fndds->nfsdev_mdsfsid) == 0) {
 7874                                 fndmirror = 1;
 7875                                 break;
 7876                         }
 7877                 }
 7878         }
 7879         if (fndmirror != 0) {
 7880                 nmp = fndds->nfsdev_nmp;
 7881                 NFSLOCKMNT(nmp);
 7882                 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 7883                      NFSMNTP_CANCELRPCS)) == 0) {
 7884                         nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 7885                         NFSUNLOCKMNT(nmp);
 7886                         nfsrv_deleteds(fndds);
 7887                 } else {
 7888                         NFSUNLOCKMNT(nmp);
 7889                         nmp = NULL;
 7890                 }
 7891         }
 7892         NFSDDSUNLOCK();
 7893         if (nmp != NULL) {
 7894                 nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
 7895                 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
 7896                 nfsrv_killrpcs(nmp);
 7897                 NFSLOCKMNT(nmp);
 7898                 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 7899                 wakeup(nmp);
 7900                 NFSUNLOCKMNT(nmp);
 7901                 return (0);
 7902         }
 7903         return (ENXIO);
 7904 }
 7905 
 7906 /*
 7907  * Mark a DS as disabled by setting nfsdev_nmp = NULL.
 7908  */
 7909 static void
 7910 nfsrv_deleteds(struct nfsdevice *fndds)
 7911 {
 7912 
 7913         NFSD_DEBUG(4, "deleteds: deleting a mirror\n");
 7914         fndds->nfsdev_nmp = NULL;
 7915         if (fndds->nfsdev_mdsisset == 0)
 7916                 nfsrv_faildscnt--;
 7917 }
 7918 
 7919 /*
 7920  * Fill in the addr structures for the File and Flex File layouts.
 7921  */
 7922 static void
 7923 nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
 7924 {
 7925         uint32_t *tl;
 7926         char *netprot;
 7927         int addrlen;
 7928         static uint64_t new_devid = 0;
 7929 
 7930         if (strchr(addr, ':') != NULL)
 7931                 netprot = "tcp6";
 7932         else
 7933                 netprot = "tcp";
 7934 
 7935         /* Fill in the device id. */
 7936         NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time));
 7937         new_devid++;
 7938         NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)],
 7939             sizeof(new_devid));
 7940 
 7941         /*
 7942          * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4
 7943          * as defined in RFC5661) in XDR.
 7944          */
 7945         addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
 7946             6 * NFSX_UNSIGNED;
 7947         NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot);
 7948         ds->nfsdev_fileaddrlen = addrlen;
 7949         tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
 7950         ds->nfsdev_fileaddr = (char *)tl;
 7951         *tl++ = txdr_unsigned(1);               /* One stripe with index 0. */
 7952         *tl++ = 0;
 7953         *tl++ = txdr_unsigned(1);               /* One multipath list */
 7954         *tl++ = txdr_unsigned(1);               /* with one entry in it. */
 7955         /* The netaddr for this one entry. */
 7956         *tl++ = txdr_unsigned(strlen(netprot));
 7957         NFSBCOPY(netprot, tl, strlen(netprot));
 7958         tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
 7959         *tl++ = txdr_unsigned(strlen(addr));
 7960         NFSBCOPY(addr, tl, strlen(addr));
 7961 
 7962         /*
 7963          * Fill in the flex file addr (actually the ff_device_addr4
 7964          * as defined for Flexible File Layout) in XDR.
 7965          */
 7966         addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
 7967             14 * NFSX_UNSIGNED;
 7968         ds->nfsdev_flexaddrlen = addrlen;
 7969         tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
 7970         ds->nfsdev_flexaddr = (char *)tl;
 7971         *tl++ = txdr_unsigned(1);               /* One multipath entry. */
 7972         /* The netaddr for this one entry. */
 7973         *tl++ = txdr_unsigned(strlen(netprot));
 7974         NFSBCOPY(netprot, tl, strlen(netprot));
 7975         tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
 7976         *tl++ = txdr_unsigned(strlen(addr));
 7977         NFSBCOPY(addr, tl, strlen(addr));
 7978         tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED);
 7979         *tl++ = txdr_unsigned(2);               /* Two NFS Versions. */
 7980         *tl++ = txdr_unsigned(NFS_VER4);        /* NFSv4. */
 7981         *tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */
 7982         *tl++ = txdr_unsigned(nfs_srvmaxio);    /* DS max rsize. */
 7983         *tl++ = txdr_unsigned(nfs_srvmaxio);    /* DS max wsize. */
 7984         *tl++ = newnfs_true;                    /* Tightly coupled. */
 7985         *tl++ = txdr_unsigned(NFS_VER4);        /* NFSv4. */
 7986         *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
 7987         *tl++ = txdr_unsigned(nfs_srvmaxio);    /* DS max rsize. */
 7988         *tl++ = txdr_unsigned(nfs_srvmaxio);    /* DS max wsize. */
 7989         *tl = newnfs_true;                      /* Tightly coupled. */
 7990 
 7991         ds->nfsdev_hostnamelen = strlen(dnshost);
 7992         ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE,
 7993             M_WAITOK);
 7994         NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1);
 7995 }
 7996 
 7997 /*
 7998  * Create the device id list.
 7999  * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument
 8000  * is misconfigured.
 8001  */
 8002 int
 8003 nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p)
 8004 {
 8005         struct nfsdevice *ds;
 8006         char *addrp, *dnshostp, *dspathp, *mdspathp;
 8007         int error, i;
 8008 
 8009         addrp = args->addr;
 8010         dnshostp = args->dnshost;
 8011         dspathp = args->dspath;
 8012         mdspathp = args->mdspath;
 8013         nfsrv_maxpnfsmirror = args->mirrorcnt;
 8014         if (addrp == NULL || dnshostp == NULL || dspathp == NULL ||
 8015             mdspathp == NULL)
 8016                 return (0);
 8017 
 8018         /*
 8019          * Loop around for each nul-terminated string in args->addr,
 8020          * args->dnshost, args->dnspath and args->mdspath.
 8021          */
 8022         while (addrp < (args->addr + args->addrlen) &&
 8023             dnshostp < (args->dnshost + args->dnshostlen) &&
 8024             dspathp < (args->dspath + args->dspathlen) &&
 8025             mdspathp < (args->mdspath + args->mdspathlen)) {
 8026                 error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds);
 8027                 if (error != 0) {
 8028                         /* Free all DS servers. */
 8029                         nfsrv_freealldevids();
 8030                         nfsrv_devidcnt = 0;
 8031                         return (ENXIO);
 8032                 }
 8033                 nfsrv_allocdevid(ds, addrp, dnshostp);
 8034                 addrp += (strlen(addrp) + 1);
 8035                 dnshostp += (strlen(dnshostp) + 1);
 8036                 dspathp += (strlen(dspathp) + 1);
 8037                 mdspathp += (strlen(mdspathp) + 1);
 8038         }
 8039         if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) {
 8040                 /* Free all DS servers. */
 8041                 nfsrv_freealldevids();
 8042                 nfsrv_devidcnt = 0;
 8043                 nfsrv_maxpnfsmirror = 1;
 8044                 return (ENXIO);
 8045         }
 8046         /* We can fail at most one less DS than the mirror level. */
 8047         nfsrv_faildscnt = nfsrv_maxpnfsmirror - 1;
 8048 
 8049         /*
 8050          * Allocate the nfslayout hash table now, since this is a pNFS server.
 8051          * Make it 1% of the high water mark and at least 100.
 8052          */
 8053         if (nfslayouthash == NULL) {
 8054                 nfsrv_layouthashsize = nfsrv_layouthighwater / 100;
 8055                 if (nfsrv_layouthashsize < 100)
 8056                         nfsrv_layouthashsize = 100;
 8057                 nfslayouthash = mallocarray(nfsrv_layouthashsize,
 8058                     sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK |
 8059                     M_ZERO);
 8060                 for (i = 0; i < nfsrv_layouthashsize; i++) {
 8061                         mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF);
 8062                         TAILQ_INIT(&nfslayouthash[i].list);
 8063                 }
 8064         }
 8065         return (0);
 8066 }
 8067 
 8068 /*
 8069  * Free all device ids.
 8070  */
 8071 static void
 8072 nfsrv_freealldevids(void)
 8073 {
 8074         struct nfsdevice *ds, *nds;
 8075 
 8076         TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds)
 8077                 nfsrv_freedevid(ds);
 8078 }
 8079 
 8080 /*
 8081  * Check to see if there is a Read/Write Layout plus either:
 8082  * - A Write Delegation
 8083  * or
 8084  * - An Open with Write_access.
 8085  * Return 1 if this is the case and 0 otherwise.
 8086  * This function is used by nfsrv_proxyds() to decide if doing a Proxy
 8087  * Getattr RPC to the Data Server (DS) is necessary.
 8088  */
 8089 #define NFSCLIDVECSIZE  6
 8090 int
 8091 nfsrv_checkdsattr(vnode_t vp, NFSPROC_T *p)
 8092 {
 8093         fhandle_t fh, *tfhp;
 8094         struct nfsstate *stp;
 8095         struct nfslayout *lyp;
 8096         struct nfslayouthash *lhyp;
 8097         struct nfslockhashhead *hp;
 8098         struct nfslockfile *lfp;
 8099         nfsquad_t clid[NFSCLIDVECSIZE];
 8100         int clidcnt, ret;
 8101 
 8102         ret = nfsvno_getfh(vp, &fh, p);
 8103         if (ret != 0)
 8104                 return (0);
 8105 
 8106         /* First check for a Read/Write Layout. */
 8107         clidcnt = 0;
 8108         lhyp = NFSLAYOUTHASH(&fh);
 8109         NFSLOCKLAYOUT(lhyp);
 8110         TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
 8111                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 8112                     ((lyp->lay_flags & NFSLAY_RW) != 0 ||
 8113                      ((lyp->lay_flags & NFSLAY_READ) != 0 &&
 8114                       nfsrv_pnfsatime != 0))) {
 8115                         if (clidcnt < NFSCLIDVECSIZE)
 8116                                 clid[clidcnt].qval = lyp->lay_clientid.qval;
 8117                         clidcnt++;
 8118                 }
 8119         }
 8120         NFSUNLOCKLAYOUT(lhyp);
 8121         if (clidcnt == 0) {
 8122                 /* None found, so return 0. */
 8123                 return (0);
 8124         }
 8125 
 8126         /* Get the nfslockfile for this fh. */
 8127         NFSLOCKSTATE();
 8128         hp = NFSLOCKHASH(&fh);
 8129         LIST_FOREACH(lfp, hp, lf_hash) {
 8130                 tfhp = &lfp->lf_fh;
 8131                 if (NFSVNO_CMPFH(&fh, tfhp))
 8132                         break;
 8133         }
 8134         if (lfp == NULL) {
 8135                 /* None found, so return 0. */
 8136                 NFSUNLOCKSTATE();
 8137                 return (0);
 8138         }
 8139 
 8140         /* Now, look for a Write delegation for this clientid. */
 8141         LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 8142                 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
 8143                     nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
 8144                         break;
 8145         }
 8146         if (stp != NULL) {
 8147                 /* Found one, so return 1. */
 8148                 NFSUNLOCKSTATE();
 8149                 return (1);
 8150         }
 8151 
 8152         /* No Write delegation, so look for an Open with Write_access. */
 8153         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 8154                 KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0,
 8155                     ("nfsrv_checkdsattr: Non-open in Open list\n"));
 8156                 if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 &&
 8157                     nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
 8158                         break;
 8159         }
 8160         NFSUNLOCKSTATE();
 8161         if (stp != NULL)
 8162                 return (1);
 8163         return (0);
 8164 }
 8165 
 8166 /*
 8167  * Look for a matching clientid in the vector. Return 1 if one might match.
 8168  */
 8169 static int
 8170 nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt)
 8171 {
 8172         int i;
 8173 
 8174         /* If too many for the vector, return 1 since there might be a match. */
 8175         if (clidcnt > NFSCLIDVECSIZE)
 8176                 return (1);
 8177 
 8178         for (i = 0; i < clidcnt; i++)
 8179                 if (clidvec[i].qval == clid.qval)
 8180                         return (1);
 8181         return (0);
 8182 }
 8183 
 8184 /*
 8185  * Check the don't list for "vp" and see if issuing an rw layout is allowed.
 8186  * Return 1 if issuing an rw layout isn't allowed, 0 otherwise.
 8187  */
 8188 static int
 8189 nfsrv_dontlayout(fhandle_t *fhp)
 8190 {
 8191         struct nfsdontlist *mrp;
 8192         int ret;
 8193 
 8194         if (nfsrv_dontlistlen == 0)
 8195                 return (0);
 8196         ret = 0;
 8197         NFSDDONTLISTLOCK();
 8198         LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
 8199                 if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 &&
 8200                     (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) {
 8201                         ret = 1;
 8202                         break;
 8203                 }
 8204         }
 8205         NFSDDONTLISTUNLOCK();
 8206         return (ret);
 8207 }
 8208 
 8209 #define PNFSDS_COPYSIZ  65536
 8210 /*
 8211  * Create a new file on a DS and copy the contents of an extant DS file to it.
 8212  * This can be used for recovery of a DS file onto a recovered DS.
 8213  * The steps are:
 8214  * - When called, the MDS file's vnode is locked, blocking LayoutGet operations.
 8215  * - Disable issuing of read/write layouts for the file via the nfsdontlist,
 8216  *   so that they will be disabled after the MDS file's vnode is unlocked.
 8217  * - Set up the nfsrv_recalllist so that recall of read/write layouts can
 8218  *   be done.
 8219  * - Unlock the MDS file's vnode, so that the client(s) can perform proxied
 8220  *   writes, LayoutCommits and LayoutReturns for the file when completing the
 8221  *   LayoutReturn requested by the LayoutRecall callback.
 8222  * - Issue a LayoutRecall callback for all read/write layouts and wait for
 8223  *   them to be returned. (If the LayoutRecall callback replies
 8224  *   NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.)
 8225  * - Exclusively lock the MDS file's vnode.  This ensures that no proxied
 8226  *   writes are in progress or can occur during the DS file copy.
 8227  *   It also blocks Setattr operations.
 8228  * - Create the file on the recovered mirror.
 8229  * - Copy the file from the operational DS.
 8230  * - Copy any ACL from the MDS file to the new DS file.
 8231  * - Set the modify time of the new DS file to that of the MDS file.
 8232  * - Update the extended attribute for the MDS file.
 8233  * - Enable issuing of rw layouts by deleting the nfsdontlist entry.
 8234  * - The caller will unlock the MDS file's vnode allowing operations
 8235  *   to continue normally, since it is now on the mirror again.
 8236  */
 8237 int
 8238 nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds,
 8239     struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt,
 8240     struct ucred *cred, NFSPROC_T *p)
 8241 {
 8242         struct nfsdontlist *mrp, *nmrp;
 8243         struct nfslayouthash *lhyp;
 8244         struct nfslayout *lyp, *nlyp;
 8245         struct nfslayouthead thl;
 8246         struct mount *mp, *tvmp;
 8247         struct acl *aclp;
 8248         struct vattr va;
 8249         struct timespec mtime;
 8250         fhandle_t fh;
 8251         vnode_t tvp;
 8252         off_t rdpos, wrpos;
 8253         ssize_t aresid;
 8254         char *dat;
 8255         int didprintf, ret, retacl, xfer;
 8256 
 8257         ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp");
 8258         ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp");
 8259         /*
 8260          * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag
 8261          * so that no more RW layouts will get issued.
 8262          */
 8263         ret = nfsvno_getfh(vp, &fh, p);
 8264         if (ret != 0) {
 8265                 NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret);
 8266                 return (ret);
 8267         }
 8268         nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK);
 8269         nmrp->nfsmr_flags = NFSMR_DONTLAYOUT;
 8270         NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh));
 8271         NFSDDONTLISTLOCK();
 8272         LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
 8273                 if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0)
 8274                         break;
 8275         }
 8276         if (mrp == NULL) {
 8277                 LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list);
 8278                 mrp = nmrp;
 8279                 nmrp = NULL;
 8280                 nfsrv_dontlistlen++;
 8281                 NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n");
 8282         } else {
 8283                 NFSDDONTLISTUNLOCK();
 8284                 free(nmrp, M_NFSDSTATE);
 8285                 NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n");
 8286                 return (ENXIO);
 8287         }
 8288         NFSDDONTLISTUNLOCK();
 8289 
 8290         /*
 8291          * Search for all RW layouts for this file.  Move them to the
 8292          * recall list, so they can be recalled and their return noted.
 8293          */
 8294         lhyp = NFSLAYOUTHASH(&fh);
 8295         NFSDRECALLLOCK();
 8296         NFSLOCKLAYOUT(lhyp);
 8297         TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 8298                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 8299                     (lyp->lay_flags & NFSLAY_RW) != 0) {
 8300                         TAILQ_REMOVE(&lhyp->list, lyp, lay_list);
 8301                         TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list);
 8302                         lyp->lay_trycnt = 0;
 8303                 }
 8304         }
 8305         NFSUNLOCKLAYOUT(lhyp);
 8306         NFSDRECALLUNLOCK();
 8307 
 8308         ret = 0;
 8309         mp = tvmp = NULL;
 8310         didprintf = 0;
 8311         TAILQ_INIT(&thl);
 8312         /* Unlock the MDS vp, so that a LayoutReturn can be done on it. */
 8313         NFSVOPUNLOCK(vp);
 8314         /* Now, do a recall for all layouts not yet recalled. */
 8315 tryagain:
 8316         NFSDRECALLLOCK();
 8317         TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 8318                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 8319                     (lyp->lay_flags & NFSLAY_RECALL) == 0) {
 8320                         lyp->lay_flags |= NFSLAY_RECALL;
 8321                         /*
 8322                          * The layout stateid.seqid needs to be incremented
 8323                          * before doing a LAYOUT_RECALL callback.
 8324                          */
 8325                         if (++lyp->lay_stateid.seqid == 0)
 8326                                 lyp->lay_stateid.seqid = 1;
 8327                         NFSDRECALLUNLOCK();
 8328                         nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
 8329                             &lyp->lay_fh, lyp, 0, lyp->lay_type, p);
 8330                         NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n");
 8331                         goto tryagain;
 8332                 }
 8333         }
 8334 
 8335         /* Now wait for them to be returned. */
 8336 tryagain2:
 8337         TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 8338                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) {
 8339                         if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) {
 8340                                 TAILQ_REMOVE(&nfsrv_recalllisthead, lyp,
 8341                                     lay_list);
 8342                                 TAILQ_INSERT_HEAD(&thl, lyp, lay_list);
 8343                                 NFSD_DEBUG(4,
 8344                                     "nfsrv_copymr: layout returned\n");
 8345                         } else {
 8346                                 lyp->lay_trycnt++;
 8347                                 ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR,
 8348                                     PVFS | PCATCH, "nfsmrl", hz);
 8349                                 NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n",
 8350                                     ret);
 8351                                 if (ret == EINTR || ret == ERESTART)
 8352                                         break;
 8353                                 if ((lyp->lay_flags & NFSLAY_RETURNED) == 0) {
 8354                                         /*
 8355                                          * Give up after 60sec and return
 8356                                          * ENXIO, failing the copymr.
 8357                                          * This layout will remain on the
 8358                                          * recalllist.  It can only be cleared
 8359                                          * by restarting the nfsd.
 8360                                          * This seems the safe way to handle
 8361                                          * it, since it cannot be safely copied
 8362                                          * with an outstanding RW layout.
 8363                                          */
 8364                                         if (lyp->lay_trycnt >= 60) {
 8365                                                 ret = ENXIO;
 8366                                                 break;
 8367                                         }
 8368                                         if (didprintf == 0) {
 8369                                                 printf("nfsrv_copymr: layout "
 8370                                                     "not returned\n");
 8371                                                 didprintf = 1;
 8372                                         }
 8373                                 }
 8374                         }
 8375                         goto tryagain2;
 8376                 }
 8377         }
 8378         NFSDRECALLUNLOCK();
 8379         /* We can now get rid of the layouts that have been returned. */
 8380         TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp)
 8381                 nfsrv_freelayout(&thl, lyp);
 8382 
 8383         /*
 8384          * Do the vn_start_write() calls here, before the MDS vnode is
 8385          * locked and the tvp is created (locked) in the NFS file system
 8386          * that dvp is in.
 8387          * For tvmp, this probably isn't necessary, since it will be an
 8388          * NFS mount and they are not suspendable at this time.
 8389          */
 8390         if (ret == 0)
 8391                 ret = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 8392         if (ret == 0) {
 8393                 tvmp = dvp->v_mount;
 8394                 ret = vn_start_write(NULL, &tvmp, V_WAIT | PCATCH);
 8395         }
 8396 
 8397         /*
 8398          * LK_EXCLUSIVE lock the MDS vnode, so that any
 8399          * proxied writes through the MDS will be blocked until we have
 8400          * completed the copy and update of the extended attributes.
 8401          * This will also ensure that any attributes and ACL will not be
 8402          * changed until the copy is complete.
 8403          */
 8404         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 8405         if (ret == 0 && VN_IS_DOOMED(vp)) {
 8406                 NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n");
 8407                 ret = ESTALE;
 8408         }
 8409 
 8410         /* Create the data file on the recovered DS. */
 8411         if (ret == 0)
 8412                 ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp);
 8413 
 8414         /* Copy the DS file, if created successfully. */
 8415         if (ret == 0) {
 8416                 /*
 8417                  * Get any NFSv4 ACL on the MDS file, so that it can be set
 8418                  * on the new DS file.
 8419                  */
 8420                 aclp = acl_alloc(M_WAITOK | M_ZERO);
 8421                 retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
 8422                 if (retacl != 0 && retacl != ENOATTR)
 8423                         NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl);
 8424                 dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK);
 8425                 /* Malloc a block of 0s used to check for holes. */
 8426                 if (nfsrv_zeropnfsdat == NULL)
 8427                         nfsrv_zeropnfsdat = malloc(PNFSDS_COPYSIZ, M_TEMP,
 8428                             M_WAITOK | M_ZERO);
 8429                 rdpos = wrpos = 0;
 8430                 ret = VOP_GETATTR(fvp, &va, cred);
 8431                 aresid = 0;
 8432                 while (ret == 0 && aresid == 0) {
 8433                         ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ,
 8434                             rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL,
 8435                             &aresid, p);
 8436                         xfer = PNFSDS_COPYSIZ - aresid;
 8437                         if (ret == 0 && xfer > 0) {
 8438                                 rdpos += xfer;
 8439                                 /*
 8440                                  * Skip the write for holes, except for the
 8441                                  * last block.
 8442                                  */
 8443                                 if (xfer < PNFSDS_COPYSIZ || rdpos ==
 8444                                     va.va_size || NFSBCMP(dat,
 8445                                     nfsrv_zeropnfsdat, PNFSDS_COPYSIZ) != 0)
 8446                                         ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer,
 8447                                             wrpos, UIO_SYSSPACE, IO_NODELOCKED,
 8448                                             cred, NULL, NULL, p);
 8449                                 if (ret == 0)
 8450                                         wrpos += xfer;
 8451                         }
 8452                 }
 8453 
 8454                 /* If there is an ACL and the copy succeeded, set the ACL. */
 8455                 if (ret == 0 && retacl == 0) {
 8456                         ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p);
 8457                         /*
 8458                          * Don't consider these as errors, since VOP_GETACL()
 8459                          * can return an ACL when they are not actually
 8460                          * supported.  For example, for UFS, VOP_GETACL()
 8461                          * will return a trivial ACL based on the uid/gid/mode
 8462                          * when there is no ACL on the file.
 8463                          * This case should be recognized as a trivial ACL
 8464                          * by UFS's VOP_SETACL() and succeed, but...
 8465                          */
 8466                         if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM)
 8467                                 ret = 0;
 8468                 }
 8469 
 8470                 if (ret == 0)
 8471                         ret = VOP_FSYNC(tvp, MNT_WAIT, p);
 8472 
 8473                 /* Set the DS data file's modify time that of the MDS file. */
 8474                 if (ret == 0)
 8475                         ret = VOP_GETATTR(vp, &va, cred);
 8476                 if (ret == 0) {
 8477                         mtime = va.va_mtime;
 8478                         VATTR_NULL(&va);
 8479                         va.va_mtime = mtime;
 8480                         ret = VOP_SETATTR(tvp, &va, cred);
 8481                 }
 8482 
 8483                 vput(tvp);
 8484                 acl_free(aclp);
 8485                 free(dat, M_TEMP);
 8486         }
 8487         if (tvmp != NULL)
 8488                 vn_finished_write(tvmp);
 8489 
 8490         /* Update the extended attributes for the newly created DS file. */
 8491         if (ret == 0)
 8492                 ret = vn_extattr_set(vp, IO_NODELOCKED,
 8493                     EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
 8494                     sizeof(*wpf) * mirrorcnt, (char *)wpf, p);
 8495         if (mp != NULL)
 8496                 vn_finished_write(mp);
 8497 
 8498         /* Get rid of the dontlist entry, so that Layouts can be issued. */
 8499         NFSDDONTLISTLOCK();
 8500         LIST_REMOVE(mrp, nfsmr_list);
 8501         NFSDDONTLISTUNLOCK();
 8502         free(mrp, M_NFSDSTATE);
 8503         return (ret);
 8504 }
 8505 
 8506 /*
 8507  * Create a data storage file on the recovered DS.
 8508  */
 8509 static int
 8510 nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
 8511     vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
 8512     vnode_t *tvpp)
 8513 {
 8514         struct vattr va, nva;
 8515         int error;
 8516 
 8517         /* Make data file name based on FH. */
 8518         error = VOP_GETATTR(vp, &va, cred);
 8519         if (error == 0) {
 8520                 /* Set the attributes for "vp" to Setattr the DS vp. */
 8521                 VATTR_NULL(&nva);
 8522                 nva.va_uid = va.va_uid;
 8523                 nva.va_gid = va.va_gid;
 8524                 nva.va_mode = va.va_mode;
 8525                 nva.va_size = 0;
 8526                 VATTR_NULL(&va);
 8527                 va.va_type = VREG;
 8528                 va.va_mode = nva.va_mode;
 8529                 NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf);
 8530                 error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL,
 8531                     pf->dsf_filename, cred, p, tvpp);
 8532         }
 8533         return (error);
 8534 }
 8535 
 8536 /*
 8537  * Look up the MDS file shared locked, and then get the extended attribute
 8538  * to find the extant DS file to be copied to the new mirror.
 8539  * If successful, *vpp is set to the MDS file's vp and *nvpp is
 8540  * set to a DS data file for the MDS file, both exclusively locked.
 8541  * The "buf" argument has the pnfsdsfile structure from the MDS file
 8542  * in it and buflen is set to its length.
 8543  */
 8544 int
 8545 nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf,
 8546     int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp,
 8547     struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp,
 8548     struct nfsdevice **fdsp)
 8549 {
 8550         struct nameidata nd;
 8551         struct vnode *vp, *curvp;
 8552         struct pnfsdsfile *pf;
 8553         struct nfsmount *nmp, *curnmp;
 8554         int dsdir, error, mirrorcnt, ippos;
 8555 
 8556         vp = NULL;
 8557         curvp = NULL;
 8558         curnmp = NULL;
 8559         *dsp = NULL;
 8560         *fdsp = NULL;
 8561         if (dspathp == NULL && curdspathp != NULL)
 8562                 return (EPERM);
 8563 
 8564         /*
 8565          * Look up the MDS file shared locked.  The lock will be upgraded
 8566          * to an exclusive lock after any rw layouts have been returned.
 8567          */
 8568         NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp);
 8569         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 8570             mdspathp, p);
 8571         error = namei(&nd);
 8572         NFSD_DEBUG(4, "lookup=%d\n", error);
 8573         if (error != 0)
 8574                 return (error);
 8575         if (nd.ni_vp->v_type != VREG) {
 8576                 vput(nd.ni_vp);
 8577                 NFSD_DEBUG(4, "mdspath not reg\n");
 8578                 return (EISDIR);
 8579         }
 8580         vp = nd.ni_vp;
 8581 
 8582         if (curdspathp != NULL) {
 8583                 /*
 8584                  * Look up the current DS path and find the nfsdev structure for
 8585                  * it.
 8586                  */
 8587                 NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp);
 8588                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 8589                     UIO_SYSSPACE, curdspathp, p);
 8590                 error = namei(&nd);
 8591                 NFSD_DEBUG(4, "ds lookup=%d\n", error);
 8592                 if (error != 0) {
 8593                         vput(vp);
 8594                         return (error);
 8595                 }
 8596                 if (nd.ni_vp->v_type != VDIR) {
 8597                         vput(nd.ni_vp);
 8598                         vput(vp);
 8599                         NFSD_DEBUG(4, "curdspath not dir\n");
 8600                         return (ENOTDIR);
 8601                 }
 8602                 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 8603                         vput(nd.ni_vp);
 8604                         vput(vp);
 8605                         NFSD_DEBUG(4, "curdspath not an NFS mount\n");
 8606                         return (ENXIO);
 8607                 }
 8608                 curnmp = VFSTONFS(nd.ni_vp->v_mount);
 8609 
 8610                 /* Search the nfsdev list for a match. */
 8611                 NFSDDSLOCK();
 8612                 *fdsp = nfsv4_findmirror(curnmp);
 8613                 NFSDDSUNLOCK();
 8614                 if (*fdsp == NULL)
 8615                         curnmp = NULL;
 8616                 if (curnmp == NULL) {
 8617                         vput(nd.ni_vp);
 8618                         vput(vp);
 8619                         NFSD_DEBUG(4, "mdscopymr: no current ds\n");
 8620                         return (ENXIO);
 8621                 }
 8622                 curvp = nd.ni_vp;
 8623         }
 8624 
 8625         if (dspathp != NULL) {
 8626                 /* Look up the nfsdev path and find the nfsdev structure. */
 8627                 NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp);
 8628                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 8629                     UIO_SYSSPACE, dspathp, p);
 8630                 error = namei(&nd);
 8631                 NFSD_DEBUG(4, "ds lookup=%d\n", error);
 8632                 if (error != 0) {
 8633                         vput(vp);
 8634                         if (curvp != NULL)
 8635                                 vput(curvp);
 8636                         return (error);
 8637                 }
 8638                 if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) {
 8639                         vput(nd.ni_vp);
 8640                         vput(vp);
 8641                         if (curvp != NULL)
 8642                                 vput(curvp);
 8643                         NFSD_DEBUG(4, "dspath not dir\n");
 8644                         if (nd.ni_vp == curvp)
 8645                                 return (EPERM);
 8646                         return (ENOTDIR);
 8647                 }
 8648                 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 8649                         vput(nd.ni_vp);
 8650                         vput(vp);
 8651                         if (curvp != NULL)
 8652                                 vput(curvp);
 8653                         NFSD_DEBUG(4, "dspath not an NFS mount\n");
 8654                         return (ENXIO);
 8655                 }
 8656                 nmp = VFSTONFS(nd.ni_vp->v_mount);
 8657 
 8658                 /*
 8659                  * Search the nfsdevice list for a match.  If curnmp == NULL,
 8660                  * this is a recovery and there must be a mirror.
 8661                  */
 8662                 NFSDDSLOCK();
 8663                 if (curnmp == NULL)
 8664                         *dsp = nfsrv_findmirroredds(nmp);
 8665                 else
 8666                         *dsp = nfsv4_findmirror(nmp);
 8667                 NFSDDSUNLOCK();
 8668                 if (*dsp == NULL) {
 8669                         vput(nd.ni_vp);
 8670                         vput(vp);
 8671                         if (curvp != NULL)
 8672                                 vput(curvp);
 8673                         NFSD_DEBUG(4, "mdscopymr: no ds\n");
 8674                         return (ENXIO);
 8675                 }
 8676         } else {
 8677                 nd.ni_vp = NULL;
 8678                 nmp = NULL;
 8679         }
 8680 
 8681         /*
 8682          * Get a vp for an available DS data file using the extended
 8683          * attribute on the MDS file.
 8684          * If there is a valid entry for the new DS in the extended attribute
 8685          * on the MDS file (as checked via the nmp argument),
 8686          * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur.
 8687          */
 8688         error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, &mirrorcnt, p,
 8689             NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir);
 8690         if (curvp != NULL)
 8691                 vput(curvp);
 8692         if (nd.ni_vp == NULL) {
 8693                 if (error == 0 && nmp != NULL) {
 8694                         /* Search the nfsdev list for a match. */
 8695                         NFSDDSLOCK();
 8696                         *dsp = nfsrv_findmirroredds(nmp);
 8697                         NFSDDSUNLOCK();
 8698                 }
 8699                 if (error == 0 && (nmp == NULL || *dsp == NULL)) {
 8700                         if (nvpp != NULL && *nvpp != NULL) {
 8701                                 vput(*nvpp);
 8702                                 *nvpp = NULL;
 8703                         }
 8704                         error = ENXIO;
 8705                 }
 8706         } else
 8707                 vput(nd.ni_vp);
 8708 
 8709         /*
 8710          * When dspathp != NULL and curdspathp == NULL, this is a recovery
 8711          * and is only allowed if there is a 0.0.0.0 IP address entry.
 8712          * When curdspathp != NULL, the ippos will be set to that entry.
 8713          */
 8714         if (error == 0 && dspathp != NULL && ippos == -1) {
 8715                 if (nvpp != NULL && *nvpp != NULL) {
 8716                         vput(*nvpp);
 8717                         *nvpp = NULL;
 8718                 }
 8719                 error = ENXIO;
 8720         }
 8721         if (error == 0) {
 8722                 *vpp = vp;
 8723 
 8724                 pf = (struct pnfsdsfile *)buf;
 8725                 if (ippos == -1) {
 8726                         /* If no zeroip pnfsdsfile, add one. */
 8727                         ippos = *buflenp / sizeof(*pf);
 8728                         *buflenp += sizeof(*pf);
 8729                         pf += ippos;
 8730                         pf->dsf_dir = dsdir;
 8731                         strlcpy(pf->dsf_filename, fname,
 8732                             sizeof(pf->dsf_filename));
 8733                 } else
 8734                         pf += ippos;
 8735                 *pfp = pf;
 8736         } else
 8737                 vput(vp);
 8738         return (error);
 8739 }
 8740 
 8741 /*
 8742  * Search for a matching pnfsd mirror device structure, base on the nmp arg.
 8743  * Return one if found, NULL otherwise.
 8744  */
 8745 static struct nfsdevice *
 8746 nfsrv_findmirroredds(struct nfsmount *nmp)
 8747 {
 8748         struct nfsdevice *ds, *fndds;
 8749         int fndmirror;
 8750 
 8751         mtx_assert(NFSDDSMUTEXPTR, MA_OWNED);
 8752         /*
 8753          * Search the DS server list for a match with nmp.
 8754          * Remove the DS entry if found and there is a mirror.
 8755          */
 8756         fndds = NULL;
 8757         fndmirror = 0;
 8758         if (nfsrv_devidcnt == 0)
 8759                 return (fndds);
 8760         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 8761                 if (ds->nfsdev_nmp == nmp) {
 8762                         NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n");
 8763                         fndds = ds;
 8764                         break;
 8765                 }
 8766         }
 8767         if (fndds == NULL)
 8768                 return (fndds);
 8769         if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
 8770                 fndmirror = 1;
 8771         else if (fndds->nfsdev_mdsisset != 0) {
 8772                 /* For the fsid is set case, search for a mirror. */
 8773                 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 8774                         if (ds != fndds && ds->nfsdev_nmp != NULL &&
 8775                             ds->nfsdev_mdsisset != 0 &&
 8776                             fsidcmp(&ds->nfsdev_mdsfsid,
 8777                             &fndds->nfsdev_mdsfsid) == 0) {
 8778                                 fndmirror = 1;
 8779                                 break;
 8780                         }
 8781                 }
 8782         }
 8783         if (fndmirror == 0) {
 8784                 NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n");
 8785                 return (NULL);
 8786         }
 8787         return (fndds);
 8788 }
 8789 
 8790 /*
 8791  * Mark the appropriate devid and all associated layout as "out of space".
 8792  */
 8793 void
 8794 nfsrv_marknospc(char *devid, bool setit)
 8795 {
 8796         struct nfsdevice *ds;
 8797         struct nfslayout *lyp;
 8798         struct nfslayouthash *lhyp;
 8799         int i;
 8800 
 8801         NFSDDSLOCK();
 8802         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 8803                 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0) {
 8804                         NFSD_DEBUG(1, "nfsrv_marknospc: devid %d\n", setit);
 8805                         ds->nfsdev_nospc = setit;
 8806                 }
 8807         }
 8808         NFSDDSUNLOCK();
 8809 
 8810         for (i = 0; i < nfsrv_layouthashsize; i++) {
 8811                 lhyp = &nfslayouthash[i];
 8812                 NFSLOCKLAYOUT(lhyp);
 8813                 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
 8814                         if (NFSBCMP(lyp->lay_deviceid, devid,
 8815                             NFSX_V4DEVICEID) == 0) {
 8816                                 NFSD_DEBUG(1, "nfsrv_marknospc: layout %d\n",
 8817                                     setit);
 8818                                 if (setit)
 8819                                         lyp->lay_flags |= NFSLAY_NOSPC;
 8820                                 else
 8821                                         lyp->lay_flags &= ~NFSLAY_NOSPC;
 8822                         }
 8823                 }
 8824                 NFSUNLOCKLAYOUT(lhyp);
 8825         }
 8826 }

Cache object: b93740f7f33236cab97266034847f5ef


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.