The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/nfsserver/nfs_nfsdstate.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2009 Rick Macklem, University of Guelph
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  *
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD: releng/12.0/sys/fs/nfsserver/nfs_nfsdstate.c 338019 2018-08-18 19:14:06Z rmacklem $");
   32 
   33 #ifndef APPLEKEXT
   34 #include <sys/extattr.h>
   35 #include <fs/nfs/nfsport.h>
   36 
   37 struct nfsrv_stablefirst nfsrv_stablefirst;
   38 int nfsrv_issuedelegs = 0;
   39 int nfsrv_dolocallocks = 0;
   40 struct nfsv4lock nfsv4rootfs_lock;
   41 time_t nfsdev_time = 0;
   42 int nfsrv_layouthashsize;
   43 volatile int nfsrv_layoutcnt = 0;
   44 
   45 extern int newnfs_numnfsd;
   46 extern struct nfsstatsv1 nfsstatsv1;
   47 extern int nfsrv_lease;
   48 extern struct timeval nfsboottime;
   49 extern u_int32_t newnfs_true, newnfs_false;
   50 extern struct mtx nfsrv_dslock_mtx;
   51 extern struct mtx nfsrv_recalllock_mtx;
   52 extern struct mtx nfsrv_dontlistlock_mtx;
   53 extern int nfsd_debuglevel;
   54 extern u_int nfsrv_dsdirsize;
   55 extern struct nfsdevicehead nfsrv_devidhead;
   56 extern int nfsrv_doflexfile;
   57 extern int nfsrv_maxpnfsmirror;
   58 NFSV4ROOTLOCKMUTEX;
   59 NFSSTATESPINLOCK;
   60 extern struct nfsdontlisthead nfsrv_dontlisthead;
   61 extern volatile int nfsrv_devidcnt;
   62 extern struct nfslayouthead nfsrv_recalllisthead;
   63 extern char *nfsrv_zeropnfsdat;
   64 
   65 SYSCTL_DECL(_vfs_nfsd);
   66 int     nfsrv_statehashsize = NFSSTATEHASHSIZE;
   67 SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
   68     &nfsrv_statehashsize, 0,
   69     "Size of state hash table set via loader.conf");
   70 
   71 int     nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
   72 SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
   73     &nfsrv_clienthashsize, 0,
   74     "Size of client hash table set via loader.conf");
   75 
   76 int     nfsrv_lockhashsize = NFSLOCKHASHSIZE;
   77 SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
   78     &nfsrv_lockhashsize, 0,
   79     "Size of file handle hash table set via loader.conf");
   80 
   81 int     nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
   82 SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
   83     &nfsrv_sessionhashsize, 0,
   84     "Size of session hash table set via loader.conf");
   85 
   86 int     nfsrv_layouthighwater = NFSLAYOUTHIGHWATER;
   87 SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN,
   88     &nfsrv_layouthighwater, 0,
   89     "High water mark for number of layouts set via loader.conf");
   90 
   91 static int      nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
   92 SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
   93     &nfsrv_v4statelimit, 0,
   94     "High water limit for NFSv4 opens+locks+delegations");
   95 
   96 static int      nfsrv_writedelegifpos = 0;
   97 SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
   98     &nfsrv_writedelegifpos, 0,
   99     "Issue a write delegation for read opens if possible");
  100 
  101 static int      nfsrv_allowreadforwriteopen = 1;
  102 SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW,
  103     &nfsrv_allowreadforwriteopen, 0,
  104     "Allow Reads to be done with Write Access StateIDs");
  105 
  106 int     nfsrv_pnfsatime = 0;
  107 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW,
  108     &nfsrv_pnfsatime, 0,
  109     "For pNFS service, do Getattr ops to keep atime up-to-date");
  110 
  111 int     nfsrv_flexlinuxhack = 0;
  112 SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
  113     &nfsrv_flexlinuxhack, 0,
  114     "For Linux clients, hack around Flex File Layout bug");
  115 
  116 /*
  117  * Hash lists for nfs V4.
  118  */
  119 struct nfsclienthashhead        *nfsclienthash;
  120 struct nfslockhashhead          *nfslockhash;
  121 struct nfssessionhash           *nfssessionhash;
  122 struct nfslayouthash            *nfslayouthash;
  123 volatile int nfsrv_dontlistlen = 0;
  124 #endif  /* !APPLEKEXT */
  125 
  126 static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
  127 static time_t nfsrvboottime;
  128 static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
  129 static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
  130 static int nfsrv_nogsscallback = 0;
  131 static volatile int nfsrv_writedelegcnt = 0;
  132 static int nfsrv_faildscnt;
  133 
  134 /* local functions */
  135 static void nfsrv_dumpaclient(struct nfsclient *clp,
  136     struct nfsd_dumpclients *dumpp);
  137 static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
  138     NFSPROC_T *p);
  139 static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
  140     NFSPROC_T *p);
  141 static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
  142     NFSPROC_T *p);
  143 static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
  144     int cansleep, NFSPROC_T *p);
  145 static void nfsrv_freenfslock(struct nfslock *lop);
  146 static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
  147 static void nfsrv_freedeleg(struct nfsstate *);
  148 static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, 
  149     u_int32_t flags, struct nfsstate **stpp);
  150 static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
  151     struct nfsstate **stpp);
  152 static int nfsrv_getlockfh(vnode_t vp, u_short flags,
  153     struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
  154 static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
  155     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
  156 static void nfsrv_insertlock(struct nfslock *new_lop,
  157     struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
  158 static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
  159     struct nfslock **other_lopp, struct nfslockfile *lfp);
  160 static int nfsrv_getipnumber(u_char *cp);
  161 static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
  162     nfsv4stateid_t *stateidp, int specialid);
  163 static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
  164     u_int32_t flags);
  165 static int nfsrv_docallback(struct nfsclient *clp, int procnum,
  166     nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
  167     struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p);
  168 static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
  169     uint32_t callback, int op, const char *optag, struct nfsdsession **sepp);
  170 static u_int32_t nfsrv_nextclientindex(void);
  171 static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
  172 static void nfsrv_markstable(struct nfsclient *clp);
  173 static void nfsrv_markreclaim(struct nfsclient *clp);
  174 static int nfsrv_checkstable(struct nfsclient *clp);
  175 static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct 
  176     vnode *vp, NFSPROC_T *p);
  177 static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
  178     NFSPROC_T *p, vnode_t vp);
  179 static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
  180     struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
  181 static int nfsrv_notsamecredname(struct nfsrv_descript *nd,
  182     struct nfsclient *clp);
  183 static time_t nfsrv_leaseexpiry(void);
  184 static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
  185 static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
  186     struct nfsstate *stp, struct nfsrvcache *op);
  187 static int nfsrv_nootherstate(struct nfsstate *stp);
  188 static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
  189     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
  190 static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
  191     uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
  192 static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
  193     int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
  194     NFSPROC_T *p);
  195 static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
  196     NFSPROC_T *p);
  197 static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
  198     uint64_t first, uint64_t end);
  199 static void nfsrv_locklf(struct nfslockfile *lfp);
  200 static void nfsrv_unlocklf(struct nfslockfile *lfp);
  201 static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
  202 static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid);
  203 static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
  204     int dont_replycache, struct nfsdsession **sepp);
  205 static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
  206 static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
  207     nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p);
  208 static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp);
  209 static void nfsrv_freelayoutlist(nfsquad_t clientid);
  210 static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype,
  211     int iomode);
  212 static void nfsrv_freealllayouts(void);
  213 static void nfsrv_freedevid(struct nfsdevice *ds);
  214 static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
  215     struct nfsdevice **dsp);
  216 static int nfsrv_delds(char *devid, NFSPROC_T *p);
  217 static void nfsrv_deleteds(struct nfsdevice *fndds);
  218 static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost);
  219 static void nfsrv_freealldevids(void);
  220 static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp,
  221     int maxcnt, NFSPROC_T *p);
  222 static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp,
  223     fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype,
  224     NFSPROC_T *p);
  225 static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
  226     NFSPROC_T *, struct nfslayout **lypp);
  227 static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt);
  228 static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode,
  229     fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
  230 static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode,
  231     int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
  232 static int nfsrv_dontlayout(fhandle_t *fhp);
  233 static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
  234     vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
  235     vnode_t *tvpp);
  236 static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
  237 
  238 /*
  239  * Scan the client list for a match and either return the current one,
  240  * create a new entry or return an error.
  241  * If returning a non-error, the clp structure must either be linked into
  242  * the client list or free'd.
  243  */
  244 APPLESTATIC int
  245 nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
  246     nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
  247 {
  248         struct nfsclient *clp = NULL, *new_clp = *new_clpp;
  249         int i, error = 0, ret;
  250         struct nfsstate *stp, *tstp;
  251         struct sockaddr_in *sad, *rad;
  252         struct nfsdsession *sep, *nsep;
  253         int zapit = 0, gotit, hasstate = 0, igotlock;
  254         static u_int64_t confirm_index = 0;
  255 
  256         /*
  257          * Check for state resource limit exceeded.
  258          */
  259         if (nfsrv_openpluslock > nfsrv_v4statelimit) {
  260                 error = NFSERR_RESOURCE;
  261                 goto out;
  262         }
  263 
  264         if (nfsrv_issuedelegs == 0 ||
  265             ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
  266                 /*
  267                  * Don't do callbacks when delegations are disabled or
  268                  * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
  269                  * If establishing a callback connection is attempted
  270                  * when a firewall is blocking the callback path, the
  271                  * server may wait too long for the connect attempt to
  272                  * succeed during the Open. Some clients, such as Linux,
  273                  * may timeout and give up on the Open before the server
  274                  * replies. Also, since AUTH_GSS callbacks are not
  275                  * yet interoperability tested, they might cause the
  276                  * server to crap out, if they get past the Init call to
  277                  * the client.
  278                  */
  279                 new_clp->lc_program = 0;
  280 
  281         /* Lock out other nfsd threads */
  282         NFSLOCKV4ROOTMUTEX();
  283         nfsv4_relref(&nfsv4rootfs_lock);
  284         do {
  285                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  286                     NFSV4ROOTLOCKMUTEXPTR, NULL);
  287         } while (!igotlock);
  288         NFSUNLOCKV4ROOTMUTEX();
  289 
  290         /*
  291          * Search for a match in the client list.
  292          */
  293         gotit = i = 0;
  294         while (i < nfsrv_clienthashsize && !gotit) {
  295             LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
  296                 if (new_clp->lc_idlen == clp->lc_idlen &&
  297                     !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
  298                         gotit = 1;
  299                         break;
  300                 }
  301             }
  302             if (gotit == 0)
  303                 i++;
  304         }
  305         if (!gotit ||
  306             (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
  307                 if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
  308                         /*
  309                          * For NFSv4.1, if confirmp->lval[1] is non-zero, the
  310                          * client is trying to update a confirmed clientid.
  311                          */
  312                         NFSLOCKV4ROOTMUTEX();
  313                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  314                         NFSUNLOCKV4ROOTMUTEX();
  315                         confirmp->lval[1] = 0;
  316                         error = NFSERR_NOENT;
  317                         goto out;
  318                 }
  319                 /*
  320                  * Get rid of the old one.
  321                  */
  322                 if (i != nfsrv_clienthashsize) {
  323                         LIST_REMOVE(clp, lc_hash);
  324                         nfsrv_cleanclient(clp, p);
  325                         nfsrv_freedeleglist(&clp->lc_deleg);
  326                         nfsrv_freedeleglist(&clp->lc_olddeleg);
  327                         zapit = 1;
  328                 }
  329                 /*
  330                  * Add it after assigning a client id to it.
  331                  */
  332                 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
  333                 if ((nd->nd_flag & ND_NFSV41) != 0)
  334                         new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
  335                             ++confirm_index;
  336                 else
  337                         confirmp->qval = new_clp->lc_confirm.qval =
  338                             ++confirm_index;
  339                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
  340                     (u_int32_t)nfsrvboottime;
  341                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
  342                     nfsrv_nextclientindex();
  343                 new_clp->lc_stateindex = 0;
  344                 new_clp->lc_statemaxindex = 0;
  345                 new_clp->lc_cbref = 0;
  346                 new_clp->lc_expiry = nfsrv_leaseexpiry();
  347                 LIST_INIT(&new_clp->lc_open);
  348                 LIST_INIT(&new_clp->lc_deleg);
  349                 LIST_INIT(&new_clp->lc_olddeleg);
  350                 LIST_INIT(&new_clp->lc_session);
  351                 for (i = 0; i < nfsrv_statehashsize; i++)
  352                         LIST_INIT(&new_clp->lc_stateid[i]);
  353                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
  354                     lc_hash);
  355                 nfsstatsv1.srvclients++;
  356                 nfsrv_openpluslock++;
  357                 nfsrv_clients++;
  358                 NFSLOCKV4ROOTMUTEX();
  359                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  360                 NFSUNLOCKV4ROOTMUTEX();
  361                 if (zapit)
  362                         nfsrv_zapclient(clp, p);
  363                 *new_clpp = NULL;
  364                 goto out;
  365         }
  366 
  367         /*
  368          * Now, handle the cases where the id is already issued.
  369          */
  370         if (nfsrv_notsamecredname(nd, clp)) {
  371             /*
  372              * Check to see if there is expired state that should go away.
  373              */
  374             if (clp->lc_expiry < NFSD_MONOSEC &&
  375                 (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
  376                 nfsrv_cleanclient(clp, p);
  377                 nfsrv_freedeleglist(&clp->lc_deleg);
  378             }
  379 
  380             /*
  381              * If there is outstanding state, then reply NFSERR_CLIDINUSE per
  382              * RFC3530 Sec. 8.1.2 last para.
  383              */
  384             if (!LIST_EMPTY(&clp->lc_deleg)) {
  385                 hasstate = 1;
  386             } else if (LIST_EMPTY(&clp->lc_open)) {
  387                 hasstate = 0;
  388             } else {
  389                 hasstate = 0;
  390                 /* Look for an Open on the OpenOwner */
  391                 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
  392                     if (!LIST_EMPTY(&stp->ls_open)) {
  393                         hasstate = 1;
  394                         break;
  395                     }
  396                 }
  397             }
  398             if (hasstate) {
  399                 /*
  400                  * If the uid doesn't match, return NFSERR_CLIDINUSE after
  401                  * filling out the correct ipaddr and portnum.
  402                  */
  403                 sad = NFSSOCKADDR(new_clp->lc_req.nr_nam, struct sockaddr_in *);
  404                 rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
  405                 sad->sin_addr.s_addr = rad->sin_addr.s_addr;
  406                 sad->sin_port = rad->sin_port;
  407                 NFSLOCKV4ROOTMUTEX();
  408                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  409                 NFSUNLOCKV4ROOTMUTEX();
  410                 error = NFSERR_CLIDINUSE;
  411                 goto out;
  412             }
  413         }
  414 
  415         if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
  416                 /*
  417                  * If the verifier has changed, the client has rebooted
  418                  * and a new client id is issued. The old state info
  419                  * can be thrown away once the SETCLIENTID_CONFIRM occurs.
  420                  */
  421                 LIST_REMOVE(clp, lc_hash);
  422 
  423                 /* Get rid of all sessions on this clientid. */
  424                 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) {
  425                         ret = nfsrv_freesession(sep, NULL);
  426                         if (ret != 0)
  427                                 printf("nfsrv_setclient: verifier changed free"
  428                                     " session failed=%d\n", ret);
  429                 }
  430 
  431                 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
  432                 if ((nd->nd_flag & ND_NFSV41) != 0)
  433                         new_clp->lc_confirm.lval[0] = confirmp->lval[0] =
  434                             ++confirm_index;
  435                 else
  436                         confirmp->qval = new_clp->lc_confirm.qval =
  437                             ++confirm_index;
  438                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
  439                     nfsrvboottime;
  440                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
  441                     nfsrv_nextclientindex();
  442                 new_clp->lc_stateindex = 0;
  443                 new_clp->lc_statemaxindex = 0;
  444                 new_clp->lc_cbref = 0;
  445                 new_clp->lc_expiry = nfsrv_leaseexpiry();
  446 
  447                 /*
  448                  * Save the state until confirmed.
  449                  */
  450                 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
  451                 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
  452                         tstp->ls_clp = new_clp;
  453                 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
  454                 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
  455                         tstp->ls_clp = new_clp;
  456                 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
  457                     ls_list);
  458                 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
  459                         tstp->ls_clp = new_clp;
  460                 for (i = 0; i < nfsrv_statehashsize; i++) {
  461                         LIST_NEWHEAD(&new_clp->lc_stateid[i],
  462                             &clp->lc_stateid[i], ls_hash);
  463                         LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
  464                                 tstp->ls_clp = new_clp;
  465                 }
  466                 LIST_INIT(&new_clp->lc_session);
  467                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
  468                     lc_hash);
  469                 nfsstatsv1.srvclients++;
  470                 nfsrv_openpluslock++;
  471                 nfsrv_clients++;
  472                 NFSLOCKV4ROOTMUTEX();
  473                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  474                 NFSUNLOCKV4ROOTMUTEX();
  475 
  476                 /*
  477                  * Must wait until any outstanding callback on the old clp
  478                  * completes.
  479                  */
  480                 NFSLOCKSTATE();
  481                 while (clp->lc_cbref) {
  482                         clp->lc_flags |= LCL_WAKEUPWANTED;
  483                         (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
  484                             "nfsd clp", 10 * hz);
  485                 }
  486                 NFSUNLOCKSTATE();
  487                 nfsrv_zapclient(clp, p);
  488                 *new_clpp = NULL;
  489                 goto out;
  490         }
  491 
  492         /* For NFSv4.1, mark that we found a confirmed clientid. */
  493         if ((nd->nd_flag & ND_NFSV41) != 0) {
  494                 clientidp->lval[0] = clp->lc_clientid.lval[0];
  495                 clientidp->lval[1] = clp->lc_clientid.lval[1];
  496                 confirmp->lval[0] = 0;  /* Ignored by client */
  497                 confirmp->lval[1] = 1;
  498         } else {
  499                 /*
  500                  * id and verifier match, so update the net address info
  501                  * and get rid of any existing callback authentication
  502                  * handle, so a new one will be acquired.
  503                  */
  504                 LIST_REMOVE(clp, lc_hash);
  505                 new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
  506                 new_clp->lc_expiry = nfsrv_leaseexpiry();
  507                 confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
  508                 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
  509                     clp->lc_clientid.lval[0];
  510                 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
  511                     clp->lc_clientid.lval[1];
  512                 new_clp->lc_delegtime = clp->lc_delegtime;
  513                 new_clp->lc_stateindex = clp->lc_stateindex;
  514                 new_clp->lc_statemaxindex = clp->lc_statemaxindex;
  515                 new_clp->lc_cbref = 0;
  516                 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
  517                 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
  518                         tstp->ls_clp = new_clp;
  519                 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
  520                 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
  521                         tstp->ls_clp = new_clp;
  522                 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
  523                 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
  524                         tstp->ls_clp = new_clp;
  525                 for (i = 0; i < nfsrv_statehashsize; i++) {
  526                         LIST_NEWHEAD(&new_clp->lc_stateid[i],
  527                             &clp->lc_stateid[i], ls_hash);
  528                         LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
  529                                 tstp->ls_clp = new_clp;
  530                 }
  531                 LIST_INIT(&new_clp->lc_session);
  532                 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
  533                     lc_hash);
  534                 nfsstatsv1.srvclients++;
  535                 nfsrv_openpluslock++;
  536                 nfsrv_clients++;
  537         }
  538         NFSLOCKV4ROOTMUTEX();
  539         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  540         NFSUNLOCKV4ROOTMUTEX();
  541 
  542         if ((nd->nd_flag & ND_NFSV41) == 0) {
  543                 /*
  544                  * Must wait until any outstanding callback on the old clp
  545                  * completes.
  546                  */
  547                 NFSLOCKSTATE();
  548                 while (clp->lc_cbref) {
  549                         clp->lc_flags |= LCL_WAKEUPWANTED;
  550                         (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
  551                             "nfsdclp", 10 * hz);
  552                 }
  553                 NFSUNLOCKSTATE();
  554                 nfsrv_zapclient(clp, p);
  555                 *new_clpp = NULL;
  556         }
  557 
  558 out:
  559         NFSEXITCODE2(error, nd);
  560         return (error);
  561 }
  562 
  563 /*
  564  * Check to see if the client id exists and optionally confirm it.
  565  */
  566 APPLESTATIC int
  567 nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
  568     struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
  569     struct nfsrv_descript *nd, NFSPROC_T *p)
  570 {
  571         struct nfsclient *clp;
  572         struct nfsstate *stp;
  573         int i;
  574         struct nfsclienthashhead *hp;
  575         int error = 0, igotlock, doneok;
  576         struct nfssessionhash *shp;
  577         struct nfsdsession *sep;
  578         uint64_t sessid[2];
  579         static uint64_t next_sess = 0;
  580 
  581         if (clpp)
  582                 *clpp = NULL;
  583         if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
  584             opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) {
  585                 error = NFSERR_STALECLIENTID;
  586                 goto out;
  587         }
  588 
  589         /*
  590          * If called with opflags == CLOPS_RENEW, the State Lock is
  591          * already held. Otherwise, we need to get either that or,
  592          * for the case of Confirm, lock out the nfsd threads.
  593          */
  594         if (opflags & CLOPS_CONFIRM) {
  595                 NFSLOCKV4ROOTMUTEX();
  596                 nfsv4_relref(&nfsv4rootfs_lock);
  597                 do {
  598                         igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  599                             NFSV4ROOTLOCKMUTEXPTR, NULL);
  600                 } while (!igotlock);
  601                 /*
  602                  * Create a new sessionid here, since we need to do it where
  603                  * there is a mutex held to serialize update of next_sess.
  604                  */
  605                 if ((nd->nd_flag & ND_NFSV41) != 0) {
  606                         sessid[0] = ++next_sess;
  607                         sessid[1] = clientid.qval;
  608                 }
  609                 NFSUNLOCKV4ROOTMUTEX();
  610         } else if (opflags != CLOPS_RENEW) {
  611                 NFSLOCKSTATE();
  612         }
  613 
  614         /* For NFSv4.1, the clp is acquired from the associated session. */
  615         if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
  616             opflags == CLOPS_RENEW) {
  617                 clp = NULL;
  618                 if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
  619                         shp = NFSSESSIONHASH(nd->nd_sessionid);
  620                         NFSLOCKSESSION(shp);
  621                         sep = nfsrv_findsession(nd->nd_sessionid);
  622                         if (sep != NULL)
  623                                 clp = sep->sess_clp;
  624                         NFSUNLOCKSESSION(shp);
  625                 }
  626         } else {
  627                 hp = NFSCLIENTHASH(clientid);
  628                 LIST_FOREACH(clp, hp, lc_hash) {
  629                         if (clp->lc_clientid.lval[1] == clientid.lval[1])
  630                                 break;
  631                 }
  632         }
  633         if (clp == NULL) {
  634                 if (opflags & CLOPS_CONFIRM)
  635                         error = NFSERR_STALECLIENTID;
  636                 else
  637                         error = NFSERR_EXPIRED;
  638         } else if (clp->lc_flags & LCL_ADMINREVOKED) {
  639                 /*
  640                  * If marked admin revoked, just return the error.
  641                  */
  642                 error = NFSERR_ADMINREVOKED;
  643         }
  644         if (error) {
  645                 if (opflags & CLOPS_CONFIRM) {
  646                         NFSLOCKV4ROOTMUTEX();
  647                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  648                         NFSUNLOCKV4ROOTMUTEX();
  649                 } else if (opflags != CLOPS_RENEW) {
  650                         NFSUNLOCKSTATE();
  651                 }
  652                 goto out;
  653         }
  654 
  655         /*
  656          * Perform any operations specified by the opflags.
  657          */
  658         if (opflags & CLOPS_CONFIRM) {
  659                 if (((nd->nd_flag & ND_NFSV41) != 0 &&
  660                      clp->lc_confirm.lval[0] != confirm.lval[0]) ||
  661                     ((nd->nd_flag & ND_NFSV41) == 0 &&
  662                      clp->lc_confirm.qval != confirm.qval))
  663                         error = NFSERR_STALECLIENTID;
  664                 else if (nfsrv_notsamecredname(nd, clp))
  665                         error = NFSERR_CLIDINUSE;
  666 
  667                 if (!error) {
  668                     if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
  669                         LCL_NEEDSCONFIRM) {
  670                         /*
  671                          * Hang onto the delegations (as old delegations)
  672                          * for an Open with CLAIM_DELEGATE_PREV unless in
  673                          * grace, but get rid of the rest of the state.
  674                          */
  675                         nfsrv_cleanclient(clp, p);
  676                         nfsrv_freedeleglist(&clp->lc_olddeleg);
  677                         if (nfsrv_checkgrace(nd, clp, 0)) {
  678                             /* In grace, so just delete delegations */
  679                             nfsrv_freedeleglist(&clp->lc_deleg);
  680                         } else {
  681                             LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
  682                                 stp->ls_flags |= NFSLCK_OLDDELEG;
  683                             clp->lc_delegtime = NFSD_MONOSEC +
  684                                 nfsrv_lease + NFSRV_LEASEDELTA;
  685                             LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
  686                                 ls_list);
  687                         }
  688                         if ((nd->nd_flag & ND_NFSV41) != 0)
  689                             clp->lc_program = cbprogram;
  690                     }
  691                     clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
  692                     if (clp->lc_program)
  693                         clp->lc_flags |= LCL_NEEDSCBNULL;
  694                     /* For NFSv4.1, link the session onto the client. */
  695                     if (nsep != NULL) {
  696                         /* Hold a reference on the xprt for a backchannel. */
  697                         if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
  698                             != 0) {
  699                             if (clp->lc_req.nr_client == NULL)
  700                                 clp->lc_req.nr_client = (struct __rpc_client *)
  701                                     clnt_bck_create(nd->nd_xprt->xp_socket,
  702                                     cbprogram, NFSV4_CBVERS);
  703                             if (clp->lc_req.nr_client != NULL) {
  704                                 SVC_ACQUIRE(nd->nd_xprt);
  705                                 nd->nd_xprt->xp_p2 =
  706                                     clp->lc_req.nr_client->cl_private;
  707                                 /* Disable idle timeout. */
  708                                 nd->nd_xprt->xp_idletimeout = 0;
  709                                 nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
  710                             } else
  711                                 nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
  712                         }
  713                         NFSBCOPY(sessid, nsep->sess_sessionid,
  714                             NFSX_V4SESSIONID);
  715                         NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
  716                             NFSX_V4SESSIONID);
  717                         shp = NFSSESSIONHASH(nsep->sess_sessionid);
  718                         NFSLOCKSTATE();
  719                         NFSLOCKSESSION(shp);
  720                         LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
  721                         LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
  722                         nsep->sess_clp = clp;
  723                         NFSUNLOCKSESSION(shp);
  724                         NFSUNLOCKSTATE();
  725                     }
  726                 }
  727         } else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
  728                 error = NFSERR_EXPIRED;
  729         }
  730 
  731         /*
  732          * If called by the Renew Op, we must check the principal.
  733          */
  734         if (!error && (opflags & CLOPS_RENEWOP)) {
  735             if (nfsrv_notsamecredname(nd, clp)) {
  736                 doneok = 0;
  737                 for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
  738                     LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
  739                         if ((stp->ls_flags & NFSLCK_OPEN) &&
  740                             stp->ls_uid == nd->nd_cred->cr_uid) {
  741                                 doneok = 1;
  742                                 break;
  743                         }
  744                     }
  745                 }
  746                 if (!doneok)
  747                         error = NFSERR_ACCES;
  748             }
  749             if (!error && (clp->lc_flags & LCL_CBDOWN))
  750                 error = NFSERR_CBPATHDOWN;
  751         }
  752         if ((!error || error == NFSERR_CBPATHDOWN) &&
  753              (opflags & CLOPS_RENEW)) {
  754                 clp->lc_expiry = nfsrv_leaseexpiry();
  755         }
  756         if (opflags & CLOPS_CONFIRM) {
  757                 NFSLOCKV4ROOTMUTEX();
  758                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  759                 NFSUNLOCKV4ROOTMUTEX();
  760         } else if (opflags != CLOPS_RENEW) {
  761                 NFSUNLOCKSTATE();
  762         }
  763         if (clpp)
  764                 *clpp = clp;
  765 
  766 out:
  767         NFSEXITCODE2(error, nd);
  768         return (error);
  769 }
  770 
  771 /*
  772  * Perform the NFSv4.1 destroy clientid.
  773  */
  774 int
  775 nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p)
  776 {
  777         struct nfsclient *clp;
  778         struct nfsclienthashhead *hp;
  779         int error = 0, i, igotlock;
  780 
  781         if (nfsrvboottime != clientid.lval[0]) {
  782                 error = NFSERR_STALECLIENTID;
  783                 goto out;
  784         }
  785 
  786         /* Lock out other nfsd threads */
  787         NFSLOCKV4ROOTMUTEX();
  788         nfsv4_relref(&nfsv4rootfs_lock);
  789         do {
  790                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  791                     NFSV4ROOTLOCKMUTEXPTR, NULL);
  792         } while (igotlock == 0);
  793         NFSUNLOCKV4ROOTMUTEX();
  794 
  795         hp = NFSCLIENTHASH(clientid);
  796         LIST_FOREACH(clp, hp, lc_hash) {
  797                 if (clp->lc_clientid.lval[1] == clientid.lval[1])
  798                         break;
  799         }
  800         if (clp == NULL) {
  801                 NFSLOCKV4ROOTMUTEX();
  802                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  803                 NFSUNLOCKV4ROOTMUTEX();
  804                 /* Just return ok, since it is gone. */
  805                 goto out;
  806         }
  807 
  808         /*
  809          * Free up all layouts on the clientid.  Should the client return the
  810          * layouts?
  811          */
  812         nfsrv_freelayoutlist(clientid);
  813 
  814         /* Scan for state on the clientid. */
  815         for (i = 0; i < nfsrv_statehashsize; i++)
  816                 if (!LIST_EMPTY(&clp->lc_stateid[i])) {
  817                         NFSLOCKV4ROOTMUTEX();
  818                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  819                         NFSUNLOCKV4ROOTMUTEX();
  820                         error = NFSERR_CLIENTIDBUSY;
  821                         goto out;
  822                 }
  823         if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
  824                 NFSLOCKV4ROOTMUTEX();
  825                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
  826                 NFSUNLOCKV4ROOTMUTEX();
  827                 error = NFSERR_CLIENTIDBUSY;
  828                 goto out;
  829         }
  830 
  831         /* Destroy the clientid and return ok. */
  832         nfsrv_cleanclient(clp, p);
  833         nfsrv_freedeleglist(&clp->lc_deleg);
  834         nfsrv_freedeleglist(&clp->lc_olddeleg);
  835         LIST_REMOVE(clp, lc_hash);
  836         NFSLOCKV4ROOTMUTEX();
  837         nfsv4_unlock(&nfsv4rootfs_lock, 1);
  838         NFSUNLOCKV4ROOTMUTEX();
  839         nfsrv_zapclient(clp, p);
  840 out:
  841         NFSEXITCODE2(error, nd);
  842         return (error);
  843 }
  844 
  845 /*
  846  * Called from the new nfssvc syscall to admin revoke a clientid.
  847  * Returns 0 for success, error otherwise.
  848  */
  849 APPLESTATIC int
  850 nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
  851 {
  852         struct nfsclient *clp = NULL;
  853         int i, error = 0;
  854         int gotit, igotlock;
  855 
  856         /*
  857          * First, lock out the nfsd so that state won't change while the
  858          * revocation record is being written to the stable storage restart
  859          * file.
  860          */
  861         NFSLOCKV4ROOTMUTEX();
  862         do {
  863                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
  864                     NFSV4ROOTLOCKMUTEXPTR, NULL);
  865         } while (!igotlock);
  866         NFSUNLOCKV4ROOTMUTEX();
  867 
  868         /*
  869          * Search for a match in the client list.
  870          */
  871         gotit = i = 0;
  872         while (i < nfsrv_clienthashsize && !gotit) {
  873             LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
  874                 if (revokep->nclid_idlen == clp->lc_idlen &&
  875                     !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
  876                         gotit = 1;
  877                         break;
  878                 }
  879             }
  880             i++;
  881         }
  882         if (!gotit) {
  883                 NFSLOCKV4ROOTMUTEX();
  884                 nfsv4_unlock(&nfsv4rootfs_lock, 0);
  885                 NFSUNLOCKV4ROOTMUTEX();
  886                 error = EPERM;
  887                 goto out;
  888         }
  889 
  890         /*
  891          * Now, write out the revocation record
  892          */
  893         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
  894         nfsrv_backupstable();
  895 
  896         /*
  897          * and clear out the state, marking the clientid revoked.
  898          */
  899         clp->lc_flags &= ~LCL_CALLBACKSON;
  900         clp->lc_flags |= LCL_ADMINREVOKED;
  901         nfsrv_cleanclient(clp, p);
  902         nfsrv_freedeleglist(&clp->lc_deleg);
  903         nfsrv_freedeleglist(&clp->lc_olddeleg);
  904         NFSLOCKV4ROOTMUTEX();
  905         nfsv4_unlock(&nfsv4rootfs_lock, 0);
  906         NFSUNLOCKV4ROOTMUTEX();
  907 
  908 out:
  909         NFSEXITCODE(error);
  910         return (error);
  911 }
  912 
  913 /*
  914  * Dump out stats for all clients. Called from nfssvc(2), that is used
  915  * nfsstatsv1.
  916  */
  917 APPLESTATIC void
  918 nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
  919 {
  920         struct nfsclient *clp;
  921         int i = 0, cnt = 0;
  922 
  923         /*
  924          * First, get a reference on the nfsv4rootfs_lock so that an
  925          * exclusive lock cannot be acquired while dumping the clients.
  926          */
  927         NFSLOCKV4ROOTMUTEX();
  928         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
  929         NFSUNLOCKV4ROOTMUTEX();
  930         NFSLOCKSTATE();
  931         /*
  932          * Rattle through the client lists until done.
  933          */
  934         while (i < nfsrv_clienthashsize && cnt < maxcnt) {
  935             clp = LIST_FIRST(&nfsclienthash[i]);
  936             while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) {
  937                 nfsrv_dumpaclient(clp, &dumpp[cnt]);
  938                 cnt++;
  939                 clp = LIST_NEXT(clp, lc_hash);
  940             }
  941             i++;
  942         }
  943         if (cnt < maxcnt)
  944             dumpp[cnt].ndcl_clid.nclid_idlen = 0;
  945         NFSUNLOCKSTATE();
  946         NFSLOCKV4ROOTMUTEX();
  947         nfsv4_relref(&nfsv4rootfs_lock);
  948         NFSUNLOCKV4ROOTMUTEX();
  949 }
  950 
  951 /*
  952  * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
  953  */
  954 static void
  955 nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
  956 {
  957         struct nfsstate *stp, *openstp, *lckownstp;
  958         struct nfslock *lop;
  959         struct sockaddr *sad;
  960         struct sockaddr_in *rad;
  961         struct sockaddr_in6 *rad6;
  962 
  963         dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
  964         dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
  965         dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
  966         dumpp->ndcl_flags = clp->lc_flags;
  967         dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
  968         NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
  969         sad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr *);
  970         dumpp->ndcl_addrfam = sad->sa_family;
  971         if (sad->sa_family == AF_INET) {
  972                 rad = (struct sockaddr_in *)sad;
  973                 dumpp->ndcl_cbaddr.sin_addr = rad->sin_addr;
  974         } else {
  975                 rad6 = (struct sockaddr_in6 *)sad;
  976                 dumpp->ndcl_cbaddr.sin6_addr = rad6->sin6_addr;
  977         }
  978 
  979         /*
  980          * Now, scan the state lists and total up the opens and locks.
  981          */
  982         LIST_FOREACH(stp, &clp->lc_open, ls_list) {
  983             dumpp->ndcl_nopenowners++;
  984             LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
  985                 dumpp->ndcl_nopens++;
  986                 LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
  987                     dumpp->ndcl_nlockowners++;
  988                     LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
  989                         dumpp->ndcl_nlocks++;
  990                     }
  991                 }
  992             }
  993         }
  994 
  995         /*
  996          * and the delegation lists.
  997          */
  998         LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
  999             dumpp->ndcl_ndelegs++;
 1000         }
 1001         LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
 1002             dumpp->ndcl_nolddelegs++;
 1003         }
 1004 }
 1005 
 1006 /*
 1007  * Dump out lock stats for a file.
 1008  */
 1009 APPLESTATIC void
 1010 nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
 1011     NFSPROC_T *p)
 1012 {
 1013         struct nfsstate *stp;
 1014         struct nfslock *lop;
 1015         int cnt = 0;
 1016         struct nfslockfile *lfp;
 1017         struct sockaddr *sad;
 1018         struct sockaddr_in *rad;
 1019         struct sockaddr_in6 *rad6;
 1020         int ret;
 1021         fhandle_t nfh;
 1022 
 1023         ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
 1024         /*
 1025          * First, get a reference on the nfsv4rootfs_lock so that an
 1026          * exclusive lock on it cannot be acquired while dumping the locks.
 1027          */
 1028         NFSLOCKV4ROOTMUTEX();
 1029         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 1030         NFSUNLOCKV4ROOTMUTEX();
 1031         NFSLOCKSTATE();
 1032         if (!ret)
 1033                 ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
 1034         if (ret) {
 1035                 ldumpp[0].ndlck_clid.nclid_idlen = 0;
 1036                 NFSUNLOCKSTATE();
 1037                 NFSLOCKV4ROOTMUTEX();
 1038                 nfsv4_relref(&nfsv4rootfs_lock);
 1039                 NFSUNLOCKV4ROOTMUTEX();
 1040                 return;
 1041         }
 1042 
 1043         /*
 1044          * For each open share on file, dump it out.
 1045          */
 1046         stp = LIST_FIRST(&lfp->lf_open);
 1047         while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
 1048                 ldumpp[cnt].ndlck_flags = stp->ls_flags;
 1049                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 1050                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 1051                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 1052                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 1053                 ldumpp[cnt].ndlck_owner.nclid_idlen =
 1054                     stp->ls_openowner->ls_ownerlen;
 1055                 NFSBCOPY(stp->ls_openowner->ls_owner,
 1056                     ldumpp[cnt].ndlck_owner.nclid_id,
 1057                     stp->ls_openowner->ls_ownerlen);
 1058                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 1059                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 1060                     stp->ls_clp->lc_idlen);
 1061                 sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
 1062                 ldumpp[cnt].ndlck_addrfam = sad->sa_family;
 1063                 if (sad->sa_family == AF_INET) {
 1064                         rad = (struct sockaddr_in *)sad;
 1065                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
 1066                 } else {
 1067                         rad6 = (struct sockaddr_in6 *)sad;
 1068                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
 1069                 }
 1070                 stp = LIST_NEXT(stp, ls_file);
 1071                 cnt++;
 1072         }
 1073 
 1074         /*
 1075          * and all locks.
 1076          */
 1077         lop = LIST_FIRST(&lfp->lf_lock);
 1078         while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
 1079                 stp = lop->lo_stp;
 1080                 ldumpp[cnt].ndlck_flags = lop->lo_flags;
 1081                 ldumpp[cnt].ndlck_first = lop->lo_first;
 1082                 ldumpp[cnt].ndlck_end = lop->lo_end;
 1083                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 1084                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 1085                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 1086                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 1087                 ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
 1088                 NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
 1089                     stp->ls_ownerlen);
 1090                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 1091                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 1092                     stp->ls_clp->lc_idlen);
 1093                 sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
 1094                 ldumpp[cnt].ndlck_addrfam = sad->sa_family;
 1095                 if (sad->sa_family == AF_INET) {
 1096                         rad = (struct sockaddr_in *)sad;
 1097                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
 1098                 } else {
 1099                         rad6 = (struct sockaddr_in6 *)sad;
 1100                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
 1101                 }
 1102                 lop = LIST_NEXT(lop, lo_lckfile);
 1103                 cnt++;
 1104         }
 1105 
 1106         /*
 1107          * and the delegations.
 1108          */
 1109         stp = LIST_FIRST(&lfp->lf_deleg);
 1110         while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
 1111                 ldumpp[cnt].ndlck_flags = stp->ls_flags;
 1112                 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
 1113                 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
 1114                 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
 1115                 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
 1116                 ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
 1117                 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
 1118                 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
 1119                     stp->ls_clp->lc_idlen);
 1120                 sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *);
 1121                 ldumpp[cnt].ndlck_addrfam = sad->sa_family;
 1122                 if (sad->sa_family == AF_INET) {
 1123                         rad = (struct sockaddr_in *)sad;
 1124                         ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr;
 1125                 } else {
 1126                         rad6 = (struct sockaddr_in6 *)sad;
 1127                         ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr;
 1128                 }
 1129                 stp = LIST_NEXT(stp, ls_file);
 1130                 cnt++;
 1131         }
 1132 
 1133         /*
 1134          * If list isn't full, mark end of list by setting the client name
 1135          * to zero length.
 1136          */
 1137         if (cnt < maxcnt)
 1138                 ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
 1139         NFSUNLOCKSTATE();
 1140         NFSLOCKV4ROOTMUTEX();
 1141         nfsv4_relref(&nfsv4rootfs_lock);
 1142         NFSUNLOCKV4ROOTMUTEX();
 1143 }
 1144 
 1145 /*
 1146  * Server timer routine. It can scan any linked list, so long
 1147  * as it holds the spin/mutex lock and there is no exclusive lock on
 1148  * nfsv4rootfs_lock.
 1149  * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
 1150  *  to do this from a callout, since the spin locks work. For
 1151  *  Darwin, I'm not sure what will work correctly yet.)
 1152  * Should be called once per second.
 1153  */
 1154 APPLESTATIC void
 1155 nfsrv_servertimer(void)
 1156 {
 1157         struct nfsclient *clp, *nclp;
 1158         struct nfsstate *stp, *nstp;
 1159         int got_ref, i;
 1160 
 1161         /*
 1162          * Make sure nfsboottime is set. This is used by V3 as well
 1163          * as V4. Note that nfsboottime is not nfsrvboottime, which is
 1164          * only used by the V4 server for leases.
 1165          */
 1166         if (nfsboottime.tv_sec == 0)
 1167                 NFSSETBOOTTIME(nfsboottime);
 1168 
 1169         /*
 1170          * If server hasn't started yet, just return.
 1171          */
 1172         NFSLOCKSTATE();
 1173         if (nfsrv_stablefirst.nsf_eograce == 0) {
 1174                 NFSUNLOCKSTATE();
 1175                 return;
 1176         }
 1177         if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) {
 1178                 if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) &&
 1179                     NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce)
 1180                         nfsrv_stablefirst.nsf_flags |=
 1181                             (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
 1182                 NFSUNLOCKSTATE();
 1183                 return;
 1184         }
 1185 
 1186         /*
 1187          * Try and get a reference count on the nfsv4rootfs_lock so that
 1188          * no nfsd thread can acquire an exclusive lock on it before this
 1189          * call is done. If it is already exclusively locked, just return.
 1190          */
 1191         NFSLOCKV4ROOTMUTEX();
 1192         got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
 1193         NFSUNLOCKV4ROOTMUTEX();
 1194         if (got_ref == 0) {
 1195                 NFSUNLOCKSTATE();
 1196                 return;
 1197         }
 1198 
 1199         /*
 1200          * For each client...
 1201          */
 1202         for (i = 0; i < nfsrv_clienthashsize; i++) {
 1203             clp = LIST_FIRST(&nfsclienthash[i]);
 1204             while (clp != LIST_END(&nfsclienthash[i])) {
 1205                 nclp = LIST_NEXT(clp, lc_hash);
 1206                 if (!(clp->lc_flags & LCL_EXPIREIT)) {
 1207                     if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
 1208                          && ((LIST_EMPTY(&clp->lc_deleg)
 1209                               && LIST_EMPTY(&clp->lc_open)) ||
 1210                              nfsrv_clients > nfsrv_clienthighwater)) ||
 1211                         (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
 1212                         (clp->lc_expiry < NFSD_MONOSEC &&
 1213                          (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
 1214                         /*
 1215                          * Lease has expired several nfsrv_lease times ago:
 1216                          * PLUS
 1217                          *    - no state is associated with it
 1218                          *    OR
 1219                          *    - above high water mark for number of clients
 1220                          *      (nfsrv_clienthighwater should be large enough
 1221                          *       that this only occurs when clients fail to
 1222                          *       use the same nfs_client_id4.id. Maybe somewhat
 1223                          *       higher that the maximum number of clients that
 1224                          *       will mount this server?)
 1225                          * OR
 1226                          * Lease has expired a very long time ago
 1227                          * OR
 1228                          * Lease has expired PLUS the number of opens + locks
 1229                          * has exceeded 90% of capacity
 1230                          *
 1231                          * --> Mark for expiry. The actual expiry will be done
 1232                          *     by an nfsd sometime soon.
 1233                          */
 1234                         clp->lc_flags |= LCL_EXPIREIT;
 1235                         nfsrv_stablefirst.nsf_flags |=
 1236                             (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
 1237                     } else {
 1238                         /*
 1239                          * If there are no opens, increment no open tick cnt
 1240                          * If time exceeds NFSNOOPEN, mark it to be thrown away
 1241                          * otherwise, if there is an open, reset no open time
 1242                          * Hopefully, this will avoid excessive re-creation
 1243                          * of open owners and subsequent open confirms.
 1244                          */
 1245                         stp = LIST_FIRST(&clp->lc_open);
 1246                         while (stp != LIST_END(&clp->lc_open)) {
 1247                                 nstp = LIST_NEXT(stp, ls_list);
 1248                                 if (LIST_EMPTY(&stp->ls_open)) {
 1249                                         stp->ls_noopens++;
 1250                                         if (stp->ls_noopens > NFSNOOPEN ||
 1251                                             (nfsrv_openpluslock * 2) >
 1252                                             nfsrv_v4statelimit)
 1253                                                 nfsrv_stablefirst.nsf_flags |=
 1254                                                         NFSNSF_NOOPENS;
 1255                                 } else {
 1256                                         stp->ls_noopens = 0;
 1257                                 }
 1258                                 stp = nstp;
 1259                         }
 1260                     }
 1261                 }
 1262                 clp = nclp;
 1263             }
 1264         }
 1265         NFSUNLOCKSTATE();
 1266         NFSLOCKV4ROOTMUTEX();
 1267         nfsv4_relref(&nfsv4rootfs_lock);
 1268         NFSUNLOCKV4ROOTMUTEX();
 1269 }
 1270 
 1271 /*
 1272  * The following set of functions free up the various data structures.
 1273  */
 1274 /*
 1275  * Clear out all open/lock state related to this nfsclient.
 1276  * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
 1277  * there are no other active nfsd threads.
 1278  */
 1279 APPLESTATIC void
 1280 nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
 1281 {
 1282         struct nfsstate *stp, *nstp;
 1283         struct nfsdsession *sep, *nsep;
 1284 
 1285         LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
 1286                 nfsrv_freeopenowner(stp, 1, p);
 1287         if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
 1288                 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
 1289                         (void)nfsrv_freesession(sep, NULL);
 1290 }
 1291 
 1292 /*
 1293  * Free a client that has been cleaned. It should also already have been
 1294  * removed from the lists.
 1295  * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
 1296  *  softclock interrupts are enabled.)
 1297  */
 1298 APPLESTATIC void
 1299 nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
 1300 {
 1301 
 1302 #ifdef notyet
 1303         if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
 1304              (LCL_GSS | LCL_CALLBACKSON) &&
 1305             (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
 1306             clp->lc_handlelen > 0) {
 1307                 clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
 1308                 clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
 1309                 (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
 1310                         NULL, 0, NULL, NULL, NULL, 0, p);
 1311         }
 1312 #endif
 1313         newnfs_disconnect(&clp->lc_req);
 1314         free(clp->lc_req.nr_nam, M_SONAME);
 1315         NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 1316         free(clp->lc_stateid, M_NFSDCLIENT);
 1317         free(clp, M_NFSDCLIENT);
 1318         NFSLOCKSTATE();
 1319         nfsstatsv1.srvclients--;
 1320         nfsrv_openpluslock--;
 1321         nfsrv_clients--;
 1322         NFSUNLOCKSTATE();
 1323 }
 1324 
 1325 /*
 1326  * Free a list of delegation state structures.
 1327  * (This function will also free all nfslockfile structures that no
 1328  *  longer have associated state.)
 1329  */
 1330 APPLESTATIC void
 1331 nfsrv_freedeleglist(struct nfsstatehead *sthp)
 1332 {
 1333         struct nfsstate *stp, *nstp;
 1334 
 1335         LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
 1336                 nfsrv_freedeleg(stp);
 1337         }
 1338         LIST_INIT(sthp);
 1339 }
 1340 
 1341 /*
 1342  * Free up a delegation.
 1343  */
 1344 static void
 1345 nfsrv_freedeleg(struct nfsstate *stp)
 1346 {
 1347         struct nfslockfile *lfp;
 1348 
 1349         LIST_REMOVE(stp, ls_hash);
 1350         LIST_REMOVE(stp, ls_list);
 1351         LIST_REMOVE(stp, ls_file);
 1352         if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
 1353                 nfsrv_writedelegcnt--;
 1354         lfp = stp->ls_lfp;
 1355         if (LIST_EMPTY(&lfp->lf_open) &&
 1356             LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
 1357             LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
 1358             lfp->lf_usecount == 0 &&
 1359             nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
 1360                 nfsrv_freenfslockfile(lfp);
 1361         free(stp, M_NFSDSTATE);
 1362         nfsstatsv1.srvdelegates--;
 1363         nfsrv_openpluslock--;
 1364         nfsrv_delegatecnt--;
 1365 }
 1366 
 1367 /*
 1368  * This function frees an open owner and all associated opens.
 1369  */
 1370 static void
 1371 nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
 1372 {
 1373         struct nfsstate *nstp, *tstp;
 1374 
 1375         LIST_REMOVE(stp, ls_list);
 1376         /*
 1377          * Now, free all associated opens.
 1378          */
 1379         nstp = LIST_FIRST(&stp->ls_open);
 1380         while (nstp != LIST_END(&stp->ls_open)) {
 1381                 tstp = nstp;
 1382                 nstp = LIST_NEXT(nstp, ls_list);
 1383                 (void) nfsrv_freeopen(tstp, NULL, cansleep, p);
 1384         }
 1385         if (stp->ls_op)
 1386                 nfsrvd_derefcache(stp->ls_op);
 1387         free(stp, M_NFSDSTATE);
 1388         nfsstatsv1.srvopenowners--;
 1389         nfsrv_openpluslock--;
 1390 }
 1391 
 1392 /*
 1393  * This function frees an open (nfsstate open structure) with all associated
 1394  * lock_owners and locks. It also frees the nfslockfile structure iff there
 1395  * are no other opens on the file.
 1396  * Returns 1 if it free'd the nfslockfile, 0 otherwise.
 1397  */
 1398 static int
 1399 nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
 1400 {
 1401         struct nfsstate *nstp, *tstp;
 1402         struct nfslockfile *lfp;
 1403         int ret;
 1404 
 1405         LIST_REMOVE(stp, ls_hash);
 1406         LIST_REMOVE(stp, ls_list);
 1407         LIST_REMOVE(stp, ls_file);
 1408 
 1409         lfp = stp->ls_lfp;
 1410         /*
 1411          * Now, free all lockowners associated with this open.
 1412          */
 1413         LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
 1414                 nfsrv_freelockowner(tstp, vp, cansleep, p);
 1415 
 1416         /*
 1417          * The nfslockfile is freed here if there are no locks
 1418          * associated with the open.
 1419          * If there are locks associated with the open, the
 1420          * nfslockfile structure can be freed via nfsrv_freelockowner().
 1421          * Acquire the state mutex to avoid races with calls to
 1422          * nfsrv_getlockfile().
 1423          */
 1424         if (cansleep != 0)
 1425                 NFSLOCKSTATE();
 1426         if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
 1427             LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
 1428             LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
 1429             lfp->lf_usecount == 0 &&
 1430             (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
 1431                 nfsrv_freenfslockfile(lfp);
 1432                 ret = 1;
 1433         } else
 1434                 ret = 0;
 1435         if (cansleep != 0)
 1436                 NFSUNLOCKSTATE();
 1437         free(stp, M_NFSDSTATE);
 1438         nfsstatsv1.srvopens--;
 1439         nfsrv_openpluslock--;
 1440         return (ret);
 1441 }
 1442 
 1443 /*
 1444  * Frees a lockowner and all associated locks.
 1445  */
 1446 static void
 1447 nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
 1448     NFSPROC_T *p)
 1449 {
 1450 
 1451         LIST_REMOVE(stp, ls_hash);
 1452         LIST_REMOVE(stp, ls_list);
 1453         nfsrv_freeallnfslocks(stp, vp, cansleep, p);
 1454         if (stp->ls_op)
 1455                 nfsrvd_derefcache(stp->ls_op);
 1456         free(stp, M_NFSDSTATE);
 1457         nfsstatsv1.srvlockowners--;
 1458         nfsrv_openpluslock--;
 1459 }
 1460 
 1461 /*
 1462  * Free all the nfs locks on a lockowner.
 1463  */
 1464 static void
 1465 nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
 1466     NFSPROC_T *p)
 1467 {
 1468         struct nfslock *lop, *nlop;
 1469         struct nfsrollback *rlp, *nrlp;
 1470         struct nfslockfile *lfp = NULL;
 1471         int gottvp = 0;
 1472         vnode_t tvp = NULL;
 1473         uint64_t first, end;
 1474 
 1475         if (vp != NULL)
 1476                 ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
 1477         lop = LIST_FIRST(&stp->ls_lock);
 1478         while (lop != LIST_END(&stp->ls_lock)) {
 1479                 nlop = LIST_NEXT(lop, lo_lckowner);
 1480                 /*
 1481                  * Since all locks should be for the same file, lfp should
 1482                  * not change.
 1483                  */
 1484                 if (lfp == NULL)
 1485                         lfp = lop->lo_lfp;
 1486                 else if (lfp != lop->lo_lfp)
 1487                         panic("allnfslocks");
 1488                 /*
 1489                  * If vp is NULL and cansleep != 0, a vnode must be acquired
 1490                  * from the file handle. This only occurs when called from
 1491                  * nfsrv_cleanclient().
 1492                  */
 1493                 if (gottvp == 0) {
 1494                         if (nfsrv_dolocallocks == 0)
 1495                                 tvp = NULL;
 1496                         else if (vp == NULL && cansleep != 0) {
 1497                                 tvp = nfsvno_getvp(&lfp->lf_fh);
 1498                                 NFSVOPUNLOCK(tvp, 0);
 1499                         } else
 1500                                 tvp = vp;
 1501                         gottvp = 1;
 1502                 }
 1503 
 1504                 if (tvp != NULL) {
 1505                         if (cansleep == 0)
 1506                                 panic("allnfs2");
 1507                         first = lop->lo_first;
 1508                         end = lop->lo_end;
 1509                         nfsrv_freenfslock(lop);
 1510                         nfsrv_localunlock(tvp, lfp, first, end, p);
 1511                         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
 1512                             nrlp)
 1513                                 free(rlp, M_NFSDROLLBACK);
 1514                         LIST_INIT(&lfp->lf_rollback);
 1515                 } else
 1516                         nfsrv_freenfslock(lop);
 1517                 lop = nlop;
 1518         }
 1519         if (vp == NULL && tvp != NULL)
 1520                 vrele(tvp);
 1521 }
 1522 
 1523 /*
 1524  * Free an nfslock structure.
 1525  */
 1526 static void
 1527 nfsrv_freenfslock(struct nfslock *lop)
 1528 {
 1529 
 1530         if (lop->lo_lckfile.le_prev != NULL) {
 1531                 LIST_REMOVE(lop, lo_lckfile);
 1532                 nfsstatsv1.srvlocks--;
 1533                 nfsrv_openpluslock--;
 1534         }
 1535         LIST_REMOVE(lop, lo_lckowner);
 1536         free(lop, M_NFSDLOCK);
 1537 }
 1538 
 1539 /*
 1540  * This function frees an nfslockfile structure.
 1541  */
 1542 static void
 1543 nfsrv_freenfslockfile(struct nfslockfile *lfp)
 1544 {
 1545 
 1546         LIST_REMOVE(lfp, lf_hash);
 1547         free(lfp, M_NFSDLOCKFILE);
 1548 }
 1549 
 1550 /*
 1551  * This function looks up an nfsstate structure via stateid.
 1552  */
 1553 static int
 1554 nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
 1555     struct nfsstate **stpp)
 1556 {
 1557         struct nfsstate *stp;
 1558         struct nfsstatehead *hp;
 1559         int error = 0;
 1560 
 1561         *stpp = NULL;
 1562         hp = NFSSTATEHASH(clp, *stateidp);
 1563         LIST_FOREACH(stp, hp, ls_hash) {
 1564                 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
 1565                         NFSX_STATEIDOTHER))
 1566                         break;
 1567         }
 1568 
 1569         /*
 1570          * If no state id in list, return NFSERR_BADSTATEID.
 1571          */
 1572         if (stp == LIST_END(hp)) {
 1573                 error = NFSERR_BADSTATEID;
 1574                 goto out;
 1575         }
 1576         *stpp = stp;
 1577 
 1578 out:
 1579         NFSEXITCODE(error);
 1580         return (error);
 1581 }
 1582 
 1583 /*
 1584  * This function gets an nfsstate structure via owner string.
 1585  */
 1586 static void
 1587 nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
 1588     struct nfsstate **stpp)
 1589 {
 1590         struct nfsstate *stp;
 1591 
 1592         *stpp = NULL;
 1593         LIST_FOREACH(stp, hp, ls_list) {
 1594                 if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
 1595                   !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
 1596                         *stpp = stp;
 1597                         return;
 1598                 }
 1599         }
 1600 }
 1601 
 1602 /*
 1603  * Lock control function called to update lock status.
 1604  * Returns 0 upon success, -1 if there is no lock and the flags indicate
 1605  * that one isn't to be created and an NFSERR_xxx for other errors.
 1606  * The structures new_stp and new_lop are passed in as pointers that should
 1607  * be set to NULL if the structure is used and shouldn't be free'd.
 1608  * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
 1609  * never used and can safely be allocated on the stack. For all other
 1610  * cases, *new_stpp and *new_lopp should be malloc'd before the call,
 1611  * in case they are used.
 1612  */
 1613 APPLESTATIC int
 1614 nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
 1615     struct nfslock **new_lopp, struct nfslockconflict *cfp,
 1616     nfsquad_t clientid, nfsv4stateid_t *stateidp,
 1617     __unused struct nfsexstuff *exp,
 1618     struct nfsrv_descript *nd, NFSPROC_T *p)
 1619 {
 1620         struct nfslock *lop;
 1621         struct nfsstate *new_stp = *new_stpp;
 1622         struct nfslock *new_lop = *new_lopp;
 1623         struct nfsstate *tstp, *mystp, *nstp;
 1624         int specialid = 0;
 1625         struct nfslockfile *lfp;
 1626         struct nfslock *other_lop = NULL;
 1627         struct nfsstate *stp, *lckstp = NULL;
 1628         struct nfsclient *clp = NULL;
 1629         u_int32_t bits;
 1630         int error = 0, haslock = 0, ret, reterr;
 1631         int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
 1632         fhandle_t nfh;
 1633         uint64_t first, end;
 1634         uint32_t lock_flags;
 1635 
 1636         if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
 1637                 /*
 1638                  * Note the special cases of "all 1s" or "all 0s" stateids and
 1639                  * let reads with all 1s go ahead.
 1640                  */
 1641                 if (new_stp->ls_stateid.seqid == 0x0 &&
 1642                     new_stp->ls_stateid.other[0] == 0x0 &&
 1643                     new_stp->ls_stateid.other[1] == 0x0 &&
 1644                     new_stp->ls_stateid.other[2] == 0x0)
 1645                         specialid = 1;
 1646                 else if (new_stp->ls_stateid.seqid == 0xffffffff &&
 1647                     new_stp->ls_stateid.other[0] == 0xffffffff &&
 1648                     new_stp->ls_stateid.other[1] == 0xffffffff &&
 1649                     new_stp->ls_stateid.other[2] == 0xffffffff)
 1650                         specialid = 2;
 1651         }
 1652 
 1653         /*
 1654          * Check for restart conditions (client and server).
 1655          */
 1656         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 1657             &new_stp->ls_stateid, specialid);
 1658         if (error)
 1659                 goto out;
 1660 
 1661         /*
 1662          * Check for state resource limit exceeded.
 1663          */
 1664         if ((new_stp->ls_flags & NFSLCK_LOCK) &&
 1665             nfsrv_openpluslock > nfsrv_v4statelimit) {
 1666                 error = NFSERR_RESOURCE;
 1667                 goto out;
 1668         }
 1669 
 1670         /*
 1671          * For the lock case, get another nfslock structure,
 1672          * just in case we need it.
 1673          * Malloc now, before we start sifting through the linked lists,
 1674          * in case we have to wait for memory.
 1675          */
 1676 tryagain:
 1677         if (new_stp->ls_flags & NFSLCK_LOCK)
 1678                 other_lop = malloc(sizeof (struct nfslock),
 1679                     M_NFSDLOCK, M_WAITOK);
 1680         filestruct_locked = 0;
 1681         reterr = 0;
 1682         lfp = NULL;
 1683 
 1684         /*
 1685          * Get the lockfile structure for CFH now, so we can do a sanity
 1686          * check against the stateid, before incrementing the seqid#, since
 1687          * we want to return NFSERR_BADSTATEID on failure and the seqid#
 1688          * shouldn't be incremented for this case.
 1689          * If nfsrv_getlockfile() returns -1, it means "not found", which
 1690          * will be handled later.
 1691          * If we are doing Lock/LockU and local locking is enabled, sleep
 1692          * lock the nfslockfile structure.
 1693          */
 1694         getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
 1695         NFSLOCKSTATE();
 1696         if (getlckret == 0) {
 1697                 if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
 1698                     nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
 1699                         getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
 1700                             &lfp, &nfh, 1);
 1701                         if (getlckret == 0)
 1702                                 filestruct_locked = 1;
 1703                 } else
 1704                         getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
 1705                             &lfp, &nfh, 0);
 1706         }
 1707         if (getlckret != 0 && getlckret != -1)
 1708                 reterr = getlckret;
 1709 
 1710         if (filestruct_locked != 0) {
 1711                 LIST_INIT(&lfp->lf_rollback);
 1712                 if ((new_stp->ls_flags & NFSLCK_LOCK)) {
 1713                         /*
 1714                          * For local locking, do the advisory locking now, so
 1715                          * that any conflict can be detected. A failure later
 1716                          * can be rolled back locally. If an error is returned,
 1717                          * struct nfslockfile has been unlocked and any local
 1718                          * locking rolled back.
 1719                          */
 1720                         NFSUNLOCKSTATE();
 1721                         if (vnode_unlocked == 0) {
 1722                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
 1723                                 vnode_unlocked = 1;
 1724                                 NFSVOPUNLOCK(vp, 0);
 1725                         }
 1726                         reterr = nfsrv_locallock(vp, lfp,
 1727                             (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
 1728                             new_lop->lo_first, new_lop->lo_end, cfp, p);
 1729                         NFSLOCKSTATE();
 1730                 }
 1731         }
 1732 
 1733         if (specialid == 0) {
 1734             if (new_stp->ls_flags & NFSLCK_TEST) {
 1735                 /*
 1736                  * RFC 3530 does not list LockT as an op that renews a
 1737                  * lease, but the consensus seems to be that it is ok
 1738                  * for a server to do so.
 1739                  */
 1740                 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 1741                     (nfsquad_t)((u_quad_t)0), 0, nd, p);
 1742 
 1743                 /*
 1744                  * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
 1745                  * error returns for LockT, just go ahead and test for a lock,
 1746                  * since there are no locks for this client, but other locks
 1747                  * can conflict. (ie. same client will always be false)
 1748                  */
 1749                 if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
 1750                     error = 0;
 1751                 lckstp = new_stp;
 1752             } else {
 1753               error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 1754                 (nfsquad_t)((u_quad_t)0), 0, nd, p);
 1755               if (error == 0)
 1756                 /*
 1757                  * Look up the stateid
 1758                  */
 1759                 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
 1760                   new_stp->ls_flags, &stp);
 1761               /*
 1762                * do some sanity checks for an unconfirmed open or a
 1763                * stateid that refers to the wrong file, for an open stateid
 1764                */
 1765               if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
 1766                   ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
 1767                    (getlckret == 0 && stp->ls_lfp != lfp))){
 1768                       /*
 1769                        * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID
 1770                        * The only exception is using SETATTR with SIZE.
 1771                        * */
 1772                     if ((new_stp->ls_flags &
 1773                          (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR)
 1774                              error = NFSERR_BADSTATEID;
 1775               }
 1776               
 1777                 if (error == 0 &&
 1778                   (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
 1779                   getlckret == 0 && stp->ls_lfp != lfp)
 1780                         error = NFSERR_BADSTATEID;
 1781 
 1782               /*
 1783                * If the lockowner stateid doesn't refer to the same file,
 1784                * I believe that is considered ok, since some clients will
 1785                * only create a single lockowner and use that for all locks
 1786                * on all files.
 1787                * For now, log it as a diagnostic, instead of considering it
 1788                * a BadStateid.
 1789                */
 1790               if (error == 0 && (stp->ls_flags &
 1791                   (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
 1792                   getlckret == 0 && stp->ls_lfp != lfp) {
 1793 #ifdef DIAGNOSTIC
 1794                   printf("Got a lock statid for different file open\n");
 1795 #endif
 1796                   /*
 1797                   error = NFSERR_BADSTATEID;
 1798                   */
 1799               }
 1800 
 1801               if (error == 0) {
 1802                     if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
 1803                         /*
 1804                          * If haslock set, we've already checked the seqid.
 1805                          */
 1806                         if (!haslock) {
 1807                             if (stp->ls_flags & NFSLCK_OPEN)
 1808                                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 1809                                     stp->ls_openowner, new_stp->ls_op);
 1810                             else
 1811                                 error = NFSERR_BADSTATEID;
 1812                         }
 1813                         if (!error)
 1814                             nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
 1815                         if (lckstp)
 1816                             /*
 1817                              * I believe this should be an error, but it
 1818                              * isn't obvious what NFSERR_xxx would be
 1819                              * appropriate, so I'll use NFSERR_INVAL for now.
 1820                              */
 1821                             error = NFSERR_INVAL;
 1822                         else
 1823                             lckstp = new_stp;
 1824                     } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
 1825                         /*
 1826                          * If haslock set, ditto above.
 1827                          */
 1828                         if (!haslock) {
 1829                             if (stp->ls_flags & NFSLCK_OPEN)
 1830                                 error = NFSERR_BADSTATEID;
 1831                             else
 1832                                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 1833                                     stp, new_stp->ls_op);
 1834                         }
 1835                         lckstp = stp;
 1836                     } else {
 1837                         lckstp = stp;
 1838                     }
 1839               }
 1840               /*
 1841                * If the seqid part of the stateid isn't the same, return
 1842                * NFSERR_OLDSTATEID for cases other than I/O Ops.
 1843                * For I/O Ops, only return NFSERR_OLDSTATEID if
 1844                * nfsrv_returnoldstateid is set. (The consensus on the email
 1845                * list was that most clients would prefer to not receive
 1846                * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
 1847                * is what will happen, so I use the nfsrv_returnoldstateid to
 1848                * allow for either server configuration.)
 1849                */
 1850               if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
 1851                   (((nd->nd_flag & ND_NFSV41) == 0 &&
 1852                    (!(new_stp->ls_flags & NFSLCK_CHECK) ||
 1853                     nfsrv_returnoldstateid)) ||
 1854                    ((nd->nd_flag & ND_NFSV41) != 0 &&
 1855                     new_stp->ls_stateid.seqid != 0)))
 1856                     error = NFSERR_OLDSTATEID;
 1857             }
 1858         }
 1859 
 1860         /*
 1861          * Now we can check for grace.
 1862          */
 1863         if (!error)
 1864                 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
 1865         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
 1866                 nfsrv_checkstable(clp))
 1867                 error = NFSERR_NOGRACE;
 1868         /*
 1869          * If we successfully Reclaimed state, note that.
 1870          */
 1871         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
 1872                 nfsrv_markstable(clp);
 1873 
 1874         /*
 1875          * At this point, either error == NFSERR_BADSTATEID or the
 1876          * seqid# has been updated, so we can return any error.
 1877          * If error == 0, there may be an error in:
 1878          *    nd_repstat - Set by the calling function.
 1879          *    reterr - Set above, if getting the nfslockfile structure
 1880          *       or acquiring the local lock failed.
 1881          *    (If both of these are set, nd_repstat should probably be
 1882          *     returned, since that error was detected before this
 1883          *     function call.)
 1884          */
 1885         if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
 1886                 if (error == 0) {
 1887                         if (nd->nd_repstat != 0)
 1888                                 error = nd->nd_repstat;
 1889                         else
 1890                                 error = reterr;
 1891                 }
 1892                 if (filestruct_locked != 0) {
 1893                         /* Roll back local locks. */
 1894                         NFSUNLOCKSTATE();
 1895                         if (vnode_unlocked == 0) {
 1896                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
 1897                                 vnode_unlocked = 1;
 1898                                 NFSVOPUNLOCK(vp, 0);
 1899                         }
 1900                         nfsrv_locallock_rollback(vp, lfp, p);
 1901                         NFSLOCKSTATE();
 1902                         nfsrv_unlocklf(lfp);
 1903                 }
 1904                 NFSUNLOCKSTATE();
 1905                 goto out;
 1906         }
 1907 
 1908         /*
 1909          * Check the nfsrv_getlockfile return.
 1910          * Returned -1 if no structure found.
 1911          */
 1912         if (getlckret == -1) {
 1913                 error = NFSERR_EXPIRED;
 1914                 /*
 1915                  * Called from lockt, so no lock is OK.
 1916                  */
 1917                 if (new_stp->ls_flags & NFSLCK_TEST) {
 1918                         error = 0;
 1919                 } else if (new_stp->ls_flags &
 1920                     (NFSLCK_CHECK | NFSLCK_SETATTR)) {
 1921                         /*
 1922                          * Called to check for a lock, OK if the stateid is all
 1923                          * 1s or all 0s, but there should be an nfsstate
 1924                          * otherwise.
 1925                          * (ie. If there is no open, I'll assume no share
 1926                          *  deny bits.)
 1927                          */
 1928                         if (specialid)
 1929                                 error = 0;
 1930                         else
 1931                                 error = NFSERR_BADSTATEID;
 1932                 }
 1933                 NFSUNLOCKSTATE();
 1934                 goto out;
 1935         }
 1936 
 1937         /*
 1938          * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
 1939          * For NFSLCK_CHECK, allow a read if write access is granted,
 1940          * but check for a deny. For NFSLCK_LOCK, require correct access,
 1941          * which implies a conflicting deny can't exist.
 1942          */
 1943         if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
 1944             /*
 1945              * Four kinds of state id:
 1946              * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
 1947              * - stateid for an open
 1948              * - stateid for a delegation
 1949              * - stateid for a lock owner
 1950              */
 1951             if (!specialid) {
 1952                 if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
 1953                     delegation = 1;
 1954                     mystp = stp;
 1955                     nfsrv_delaydelegtimeout(stp);
 1956                 } else if (stp->ls_flags & NFSLCK_OPEN) {
 1957                     mystp = stp;
 1958                 } else {
 1959                     mystp = stp->ls_openstp;
 1960                 }
 1961                 /*
 1962                  * If locking or checking, require correct access
 1963                  * bit set.
 1964                  */
 1965                 if (((new_stp->ls_flags & NFSLCK_LOCK) &&
 1966                      !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
 1967                        mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
 1968                     ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
 1969                       (NFSLCK_CHECK | NFSLCK_READACCESS) &&
 1970                      !(mystp->ls_flags & NFSLCK_READACCESS) &&
 1971                      nfsrv_allowreadforwriteopen == 0) ||
 1972                     ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
 1973                       (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
 1974                      !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
 1975                         if (filestruct_locked != 0) {
 1976                                 /* Roll back local locks. */
 1977                                 NFSUNLOCKSTATE();
 1978                                 if (vnode_unlocked == 0) {
 1979                                         ASSERT_VOP_ELOCKED(vp,
 1980                                             "nfsrv_lockctrl3");
 1981                                         vnode_unlocked = 1;
 1982                                         NFSVOPUNLOCK(vp, 0);
 1983                                 }
 1984                                 nfsrv_locallock_rollback(vp, lfp, p);
 1985                                 NFSLOCKSTATE();
 1986                                 nfsrv_unlocklf(lfp);
 1987                         }
 1988                         NFSUNLOCKSTATE();
 1989                         error = NFSERR_OPENMODE;
 1990                         goto out;
 1991                 }
 1992             } else
 1993                 mystp = NULL;
 1994             if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
 1995                 /*
 1996                  * Check for a conflicting deny bit.
 1997                  */
 1998                 LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
 1999                     if (tstp != mystp) {
 2000                         bits = tstp->ls_flags;
 2001                         bits >>= NFSLCK_SHIFT;
 2002                         if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
 2003                             KASSERT(vnode_unlocked == 0,
 2004                                 ("nfsrv_lockctrl: vnode unlocked1"));
 2005                             ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
 2006                                 vp, p);
 2007                             if (ret == 1) {
 2008                                 /*
 2009                                 * nfsrv_clientconflict unlocks state
 2010                                  * when it returns non-zero.
 2011                                  */
 2012                                 lckstp = NULL;
 2013                                 goto tryagain;
 2014                             }
 2015                             if (ret == 0)
 2016                                 NFSUNLOCKSTATE();
 2017                             if (ret == 2)
 2018                                 error = NFSERR_PERM;
 2019                             else
 2020                                 error = NFSERR_OPENMODE;
 2021                             goto out;
 2022                         }
 2023                     }
 2024                 }
 2025 
 2026                 /* We're outta here */
 2027                 NFSUNLOCKSTATE();
 2028                 goto out;
 2029             }
 2030         }
 2031 
 2032         /*
 2033          * For setattr, just get rid of all the Delegations for other clients.
 2034          */
 2035         if (new_stp->ls_flags & NFSLCK_SETATTR) {
 2036                 KASSERT(vnode_unlocked == 0,
 2037                     ("nfsrv_lockctrl: vnode unlocked2"));
 2038                 ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
 2039                 if (ret) {
 2040                         /*
 2041                          * nfsrv_cleandeleg() unlocks state when it
 2042                          * returns non-zero.
 2043                          */
 2044                         if (ret == -1) {
 2045                                 lckstp = NULL;
 2046                                 goto tryagain;
 2047                         }
 2048                         error = ret;
 2049                         goto out;
 2050                 }
 2051                 if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
 2052                     (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
 2053                      LIST_EMPTY(&lfp->lf_deleg))) {
 2054                         NFSUNLOCKSTATE();
 2055                         goto out;
 2056                 }
 2057         }
 2058 
 2059         /*
 2060          * Check for a conflicting delegation. If one is found, call
 2061          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 2062          * been set yet, it will get the lock. Otherwise, it will recall
 2063          * the delegation. Then, we try try again...
 2064          * I currently believe the conflict algorithm to be:
 2065          * For Lock Ops (Lock/LockT/LockU)
 2066          * - there is a conflict iff a different client has a write delegation
 2067          * For Reading (Read Op)
 2068          * - there is a conflict iff a different client has a write delegation
 2069          *   (the specialids are always a different client)
 2070          * For Writing (Write/Setattr of size)
 2071          * - there is a conflict if a different client has any delegation
 2072          * - there is a conflict if the same client has a read delegation
 2073          *   (I don't understand why this isn't allowed, but that seems to be
 2074          *    the current consensus?)
 2075          */
 2076         tstp = LIST_FIRST(&lfp->lf_deleg);
 2077         while (tstp != LIST_END(&lfp->lf_deleg)) {
 2078             nstp = LIST_NEXT(tstp, ls_file);
 2079             if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
 2080                  ((new_stp->ls_flags & NFSLCK_CHECK) &&
 2081                   (new_lop->lo_flags & NFSLCK_READ))) &&
 2082                   clp != tstp->ls_clp &&
 2083                  (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
 2084                  ((new_stp->ls_flags & NFSLCK_CHECK) &&
 2085                    (new_lop->lo_flags & NFSLCK_WRITE) &&
 2086                   (clp != tstp->ls_clp ||
 2087                    (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
 2088                 ret = 0;
 2089                 if (filestruct_locked != 0) {
 2090                         /* Roll back local locks. */
 2091                         NFSUNLOCKSTATE();
 2092                         if (vnode_unlocked == 0) {
 2093                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
 2094                                 NFSVOPUNLOCK(vp, 0);
 2095                         }
 2096                         nfsrv_locallock_rollback(vp, lfp, p);
 2097                         NFSLOCKSTATE();
 2098                         nfsrv_unlocklf(lfp);
 2099                         NFSUNLOCKSTATE();
 2100                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2101                         vnode_unlocked = 0;
 2102                         if ((vp->v_iflag & VI_DOOMED) != 0)
 2103                                 ret = NFSERR_SERVERFAULT;
 2104                         NFSLOCKSTATE();
 2105                 }
 2106                 if (ret == 0)
 2107                         ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
 2108                 if (ret) {
 2109                     /*
 2110                      * nfsrv_delegconflict unlocks state when it
 2111                      * returns non-zero, which it always does.
 2112                      */
 2113                     if (other_lop) {
 2114                         free(other_lop, M_NFSDLOCK);
 2115                         other_lop = NULL;
 2116                     }
 2117                     if (ret == -1) {
 2118                         lckstp = NULL;
 2119                         goto tryagain;
 2120                     }
 2121                     error = ret;
 2122                     goto out;
 2123                 }
 2124                 /* Never gets here. */
 2125             }
 2126             tstp = nstp;
 2127         }
 2128 
 2129         /*
 2130          * Handle the unlock case by calling nfsrv_updatelock().
 2131          * (Should I have done some access checking above for unlock? For now,
 2132          *  just let it happen.)
 2133          */
 2134         if (new_stp->ls_flags & NFSLCK_UNLOCK) {
 2135                 first = new_lop->lo_first;
 2136                 end = new_lop->lo_end;
 2137                 nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
 2138                 stateidp->seqid = ++(stp->ls_stateid.seqid);
 2139                 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 2140                         stateidp->seqid = stp->ls_stateid.seqid = 1;
 2141                 stateidp->other[0] = stp->ls_stateid.other[0];
 2142                 stateidp->other[1] = stp->ls_stateid.other[1];
 2143                 stateidp->other[2] = stp->ls_stateid.other[2];
 2144                 if (filestruct_locked != 0) {
 2145                         NFSUNLOCKSTATE();
 2146                         if (vnode_unlocked == 0) {
 2147                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
 2148                                 vnode_unlocked = 1;
 2149                                 NFSVOPUNLOCK(vp, 0);
 2150                         }
 2151                         /* Update the local locks. */
 2152                         nfsrv_localunlock(vp, lfp, first, end, p);
 2153                         NFSLOCKSTATE();
 2154                         nfsrv_unlocklf(lfp);
 2155                 }
 2156                 NFSUNLOCKSTATE();
 2157                 goto out;
 2158         }
 2159 
 2160         /*
 2161          * Search for a conflicting lock. A lock conflicts if:
 2162          * - the lock range overlaps and
 2163          * - at least one lock is a write lock and
 2164          * - it is not owned by the same lock owner
 2165          */
 2166         if (!delegation) {
 2167           LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
 2168             if (new_lop->lo_end > lop->lo_first &&
 2169                 new_lop->lo_first < lop->lo_end &&
 2170                 (new_lop->lo_flags == NFSLCK_WRITE ||
 2171                  lop->lo_flags == NFSLCK_WRITE) &&
 2172                 lckstp != lop->lo_stp &&
 2173                 (clp != lop->lo_stp->ls_clp ||
 2174                  lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
 2175                  NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
 2176                     lckstp->ls_ownerlen))) {
 2177                 if (other_lop) {
 2178                     free(other_lop, M_NFSDLOCK);
 2179                     other_lop = NULL;
 2180                 }
 2181                 if (vnode_unlocked != 0)
 2182                     ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
 2183                         NULL, p);
 2184                 else
 2185                     ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
 2186                         vp, p);
 2187                 if (ret == 1) {
 2188                     if (filestruct_locked != 0) {
 2189                         if (vnode_unlocked == 0) {
 2190                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
 2191                                 NFSVOPUNLOCK(vp, 0);
 2192                         }
 2193                         /* Roll back local locks. */
 2194                         nfsrv_locallock_rollback(vp, lfp, p);
 2195                         NFSLOCKSTATE();
 2196                         nfsrv_unlocklf(lfp);
 2197                         NFSUNLOCKSTATE();
 2198                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2199                         vnode_unlocked = 0;
 2200                         if ((vp->v_iflag & VI_DOOMED) != 0) {
 2201                                 error = NFSERR_SERVERFAULT;
 2202                                 goto out;
 2203                         }
 2204                     }
 2205                     /*
 2206                      * nfsrv_clientconflict() unlocks state when it
 2207                      * returns non-zero.
 2208                      */
 2209                     lckstp = NULL;
 2210                     goto tryagain;
 2211                 }
 2212                 /*
 2213                  * Found a conflicting lock, so record the conflict and
 2214                  * return the error.
 2215                  */
 2216                 if (cfp != NULL && ret == 0) {
 2217                     cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
 2218                     cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
 2219                     cfp->cl_first = lop->lo_first;
 2220                     cfp->cl_end = lop->lo_end;
 2221                     cfp->cl_flags = lop->lo_flags;
 2222                     cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
 2223                     NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
 2224                         cfp->cl_ownerlen);
 2225                 }
 2226                 if (ret == 2)
 2227                     error = NFSERR_PERM;
 2228                 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2229                     error = NFSERR_RECLAIMCONFLICT;
 2230                 else if (new_stp->ls_flags & NFSLCK_CHECK)
 2231                     error = NFSERR_LOCKED;
 2232                 else
 2233                     error = NFSERR_DENIED;
 2234                 if (filestruct_locked != 0 && ret == 0) {
 2235                         /* Roll back local locks. */
 2236                         NFSUNLOCKSTATE();
 2237                         if (vnode_unlocked == 0) {
 2238                                 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
 2239                                 vnode_unlocked = 1;
 2240                                 NFSVOPUNLOCK(vp, 0);
 2241                         }
 2242                         nfsrv_locallock_rollback(vp, lfp, p);
 2243                         NFSLOCKSTATE();
 2244                         nfsrv_unlocklf(lfp);
 2245                 }
 2246                 if (ret == 0)
 2247                         NFSUNLOCKSTATE();
 2248                 goto out;
 2249             }
 2250           }
 2251         }
 2252 
 2253         /*
 2254          * We only get here if there was no lock that conflicted.
 2255          */
 2256         if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
 2257                 NFSUNLOCKSTATE();
 2258                 goto out;
 2259         }
 2260 
 2261         /*
 2262          * We only get here when we are creating or modifying a lock.
 2263          * There are two variants:
 2264          * - exist_lock_owner where lock_owner exists
 2265          * - open_to_lock_owner with new lock_owner
 2266          */
 2267         first = new_lop->lo_first;
 2268         end = new_lop->lo_end;
 2269         lock_flags = new_lop->lo_flags;
 2270         if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
 2271                 nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
 2272                 stateidp->seqid = ++(lckstp->ls_stateid.seqid);
 2273                 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 2274                         stateidp->seqid = lckstp->ls_stateid.seqid = 1;
 2275                 stateidp->other[0] = lckstp->ls_stateid.other[0];
 2276                 stateidp->other[1] = lckstp->ls_stateid.other[1];
 2277                 stateidp->other[2] = lckstp->ls_stateid.other[2];
 2278         } else {
 2279                 /*
 2280                  * The new open_to_lock_owner case.
 2281                  * Link the new nfsstate into the lists.
 2282                  */
 2283                 new_stp->ls_seq = new_stp->ls_opentolockseq;
 2284                 nfsrvd_refcache(new_stp->ls_op);
 2285                 stateidp->seqid = new_stp->ls_stateid.seqid = 1;
 2286                 stateidp->other[0] = new_stp->ls_stateid.other[0] =
 2287                     clp->lc_clientid.lval[0];
 2288                 stateidp->other[1] = new_stp->ls_stateid.other[1] =
 2289                     clp->lc_clientid.lval[1];
 2290                 stateidp->other[2] = new_stp->ls_stateid.other[2] =
 2291                     nfsrv_nextstateindex(clp);
 2292                 new_stp->ls_clp = clp;
 2293                 LIST_INIT(&new_stp->ls_lock);
 2294                 new_stp->ls_openstp = stp;
 2295                 new_stp->ls_lfp = lfp;
 2296                 nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
 2297                     lfp);
 2298                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
 2299                     new_stp, ls_hash);
 2300                 LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
 2301                 *new_lopp = NULL;
 2302                 *new_stpp = NULL;
 2303                 nfsstatsv1.srvlockowners++;
 2304                 nfsrv_openpluslock++;
 2305         }
 2306         if (filestruct_locked != 0) {
 2307                 NFSUNLOCKSTATE();
 2308                 nfsrv_locallock_commit(lfp, lock_flags, first, end);
 2309                 NFSLOCKSTATE();
 2310                 nfsrv_unlocklf(lfp);
 2311         }
 2312         NFSUNLOCKSTATE();
 2313 
 2314 out:
 2315         if (haslock) {
 2316                 NFSLOCKV4ROOTMUTEX();
 2317                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2318                 NFSUNLOCKV4ROOTMUTEX();
 2319         }
 2320         if (vnode_unlocked != 0) {
 2321                 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 2322                 if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
 2323                         error = NFSERR_SERVERFAULT;
 2324         }
 2325         if (other_lop)
 2326                 free(other_lop, M_NFSDLOCK);
 2327         NFSEXITCODE2(error, nd);
 2328         return (error);
 2329 }
 2330 
 2331 /*
 2332  * Check for state errors for Open.
 2333  * repstat is passed back out as an error if more critical errors
 2334  * are not detected.
 2335  */
 2336 APPLESTATIC int
 2337 nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
 2338     struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
 2339     NFSPROC_T *p, int repstat)
 2340 {
 2341         struct nfsstate *stp, *nstp;
 2342         struct nfsclient *clp;
 2343         struct nfsstate *ownerstp;
 2344         struct nfslockfile *lfp, *new_lfp;
 2345         int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
 2346 
 2347         if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
 2348                 readonly = 1;
 2349         /*
 2350          * Check for restart conditions (client and server).
 2351          */
 2352         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 2353                 &new_stp->ls_stateid, 0);
 2354         if (error)
 2355                 goto out;
 2356 
 2357         /*
 2358          * Check for state resource limit exceeded.
 2359          * Technically this should be SMP protected, but the worst
 2360          * case error is "out by one or two" on the count when it
 2361          * returns NFSERR_RESOURCE and the limit is just a rather
 2362          * arbitrary high water mark, so no harm is done.
 2363          */
 2364         if (nfsrv_openpluslock > nfsrv_v4statelimit) {
 2365                 error = NFSERR_RESOURCE;
 2366                 goto out;
 2367         }
 2368 
 2369 tryagain:
 2370         new_lfp = malloc(sizeof (struct nfslockfile),
 2371             M_NFSDLOCKFILE, M_WAITOK);
 2372         if (vp)
 2373                 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
 2374                     NULL, p);
 2375         NFSLOCKSTATE();
 2376         /*
 2377          * Get the nfsclient structure.
 2378          */
 2379         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 2380             (nfsquad_t)((u_quad_t)0), 0, nd, p);
 2381 
 2382         /*
 2383          * Look up the open owner. See if it needs confirmation and
 2384          * check the seq#, as required.
 2385          */
 2386         if (!error)
 2387                 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
 2388 
 2389         if (!error && ownerstp) {
 2390                 error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
 2391                     new_stp->ls_op);
 2392                 /*
 2393                  * If the OpenOwner hasn't been confirmed, assume the
 2394                  * old one was a replay and this one is ok.
 2395                  * See: RFC3530 Sec. 14.2.18.
 2396                  */
 2397                 if (error == NFSERR_BADSEQID &&
 2398                     (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
 2399                         error = 0;
 2400         }
 2401 
 2402         /*
 2403          * Check for grace.
 2404          */
 2405         if (!error)
 2406                 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
 2407         if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
 2408                 nfsrv_checkstable(clp))
 2409                 error = NFSERR_NOGRACE;
 2410 
 2411         /*
 2412          * If none of the above errors occurred, let repstat be
 2413          * returned.
 2414          */
 2415         if (repstat && !error)
 2416                 error = repstat;
 2417         if (error) {
 2418                 NFSUNLOCKSTATE();
 2419                 if (haslock) {
 2420                         NFSLOCKV4ROOTMUTEX();
 2421                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2422                         NFSUNLOCKV4ROOTMUTEX();
 2423                 }
 2424                 free(new_lfp, M_NFSDLOCKFILE);
 2425                 goto out;
 2426         }
 2427 
 2428         /*
 2429          * If vp == NULL, the file doesn't exist yet, so return ok.
 2430          * (This always happens on the first pass, so haslock must be 0.)
 2431          */
 2432         if (vp == NULL) {
 2433                 NFSUNLOCKSTATE();
 2434                 free(new_lfp, M_NFSDLOCKFILE);
 2435                 goto out;
 2436         }
 2437 
 2438         /*
 2439          * Get the structure for the underlying file.
 2440          */
 2441         if (getfhret)
 2442                 error = getfhret;
 2443         else
 2444                 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
 2445                     NULL, 0);
 2446         if (new_lfp)
 2447                 free(new_lfp, M_NFSDLOCKFILE);
 2448         if (error) {
 2449                 NFSUNLOCKSTATE();
 2450                 if (haslock) {
 2451                         NFSLOCKV4ROOTMUTEX();
 2452                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2453                         NFSUNLOCKV4ROOTMUTEX();
 2454                 }
 2455                 goto out;
 2456         }
 2457 
 2458         /*
 2459          * Search for a conflicting open/share.
 2460          */
 2461         if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
 2462             /*
 2463              * For Delegate_Cur, search for the matching Delegation,
 2464              * which indicates no conflict.
 2465              * An old delegation should have been recovered by the
 2466              * client doing a Claim_DELEGATE_Prev, so I won't let
 2467              * it match and return NFSERR_EXPIRED. Should I let it
 2468              * match?
 2469              */
 2470             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 2471                 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
 2472                     (((nd->nd_flag & ND_NFSV41) != 0 &&
 2473                     stateidp->seqid == 0) ||
 2474                     stateidp->seqid == stp->ls_stateid.seqid) &&
 2475                     !NFSBCMP(stateidp->other, stp->ls_stateid.other,
 2476                           NFSX_STATEIDOTHER))
 2477                         break;
 2478             }
 2479             if (stp == LIST_END(&lfp->lf_deleg) ||
 2480                 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
 2481                  (stp->ls_flags & NFSLCK_DELEGREAD))) {
 2482                 NFSUNLOCKSTATE();
 2483                 if (haslock) {
 2484                         NFSLOCKV4ROOTMUTEX();
 2485                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2486                         NFSUNLOCKV4ROOTMUTEX();
 2487                 }
 2488                 error = NFSERR_EXPIRED;
 2489                 goto out;
 2490             }
 2491         }
 2492 
 2493         /*
 2494          * Check for access/deny bit conflicts. I check for the same
 2495          * owner as well, in case the client didn't bother.
 2496          */
 2497         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 2498                 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
 2499                     (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
 2500                       ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
 2501                      ((stp->ls_flags & NFSLCK_ACCESSBITS) &
 2502                       ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
 2503                         ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
 2504                         if (ret == 1) {
 2505                                 /*
 2506                                  * nfsrv_clientconflict() unlocks
 2507                                  * state when it returns non-zero.
 2508                                  */
 2509                                 goto tryagain;
 2510                         }
 2511                         if (ret == 2)
 2512                                 error = NFSERR_PERM;
 2513                         else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2514                                 error = NFSERR_RECLAIMCONFLICT;
 2515                         else
 2516                                 error = NFSERR_SHAREDENIED;
 2517                         if (ret == 0)
 2518                                 NFSUNLOCKSTATE();
 2519                         if (haslock) {
 2520                                 NFSLOCKV4ROOTMUTEX();
 2521                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2522                                 NFSUNLOCKV4ROOTMUTEX();
 2523                         }
 2524                         goto out;
 2525                 }
 2526         }
 2527 
 2528         /*
 2529          * Check for a conflicting delegation. If one is found, call
 2530          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 2531          * been set yet, it will get the lock. Otherwise, it will recall
 2532          * the delegation. Then, we try try again...
 2533          * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
 2534          *  isn't a conflict.)
 2535          * I currently believe the conflict algorithm to be:
 2536          * For Open with Read Access and Deny None
 2537          * - there is a conflict iff a different client has a write delegation
 2538          * For Open with other Write Access or any Deny except None
 2539          * - there is a conflict if a different client has any delegation
 2540          * - there is a conflict if the same client has a read delegation
 2541          *   (The current consensus is that this last case should be
 2542          *    considered a conflict since the client with a read delegation
 2543          *    could have done an Open with ReadAccess and WriteDeny
 2544          *    locally and then not have checked for the WriteDeny.)
 2545          * Don't check for a Reclaim, since that will be dealt with
 2546          * by nfsrv_openctrl().
 2547          */
 2548         if (!(new_stp->ls_flags &
 2549                 (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
 2550             stp = LIST_FIRST(&lfp->lf_deleg);
 2551             while (stp != LIST_END(&lfp->lf_deleg)) {
 2552                 nstp = LIST_NEXT(stp, ls_file);
 2553                 if ((readonly && stp->ls_clp != clp &&
 2554                        (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
 2555                     (!readonly && (stp->ls_clp != clp ||
 2556                          (stp->ls_flags & NFSLCK_DELEGREAD)))) {
 2557                         ret = nfsrv_delegconflict(stp, &haslock, p, vp);
 2558                         if (ret) {
 2559                             /*
 2560                              * nfsrv_delegconflict() unlocks state
 2561                              * when it returns non-zero.
 2562                              */
 2563                             if (ret == -1)
 2564                                 goto tryagain;
 2565                             error = ret;
 2566                             goto out;
 2567                         }
 2568                 }
 2569                 stp = nstp;
 2570             }
 2571         }
 2572         NFSUNLOCKSTATE();
 2573         if (haslock) {
 2574                 NFSLOCKV4ROOTMUTEX();
 2575                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2576                 NFSUNLOCKV4ROOTMUTEX();
 2577         }
 2578 
 2579 out:
 2580         NFSEXITCODE2(error, nd);
 2581         return (error);
 2582 }
 2583 
 2584 /*
 2585  * Open control function to create/update open state for an open.
 2586  */
 2587 APPLESTATIC int
 2588 nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
 2589     struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
 2590     nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
 2591     NFSPROC_T *p, u_quad_t filerev)
 2592 {
 2593         struct nfsstate *new_stp = *new_stpp;
 2594         struct nfsstate *stp, *nstp;
 2595         struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
 2596         struct nfslockfile *lfp, *new_lfp;
 2597         struct nfsclient *clp;
 2598         int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
 2599         int readonly = 0, cbret = 1, getfhret = 0;
 2600         int gotstate = 0, len = 0;
 2601         u_char *clidp = NULL;
 2602 
 2603         if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
 2604                 readonly = 1;
 2605         /*
 2606          * Check for restart conditions (client and server).
 2607          * (Paranoia, should have been detected by nfsrv_opencheck().)
 2608          * If an error does show up, return NFSERR_EXPIRED, since the
 2609          * the seqid# has already been incremented.
 2610          */
 2611         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 2612             &new_stp->ls_stateid, 0);
 2613         if (error) {
 2614                 printf("Nfsd: openctrl unexpected restart err=%d\n",
 2615                     error);
 2616                 error = NFSERR_EXPIRED;
 2617                 goto out;
 2618         }
 2619 
 2620         clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
 2621 tryagain:
 2622         new_lfp = malloc(sizeof (struct nfslockfile),
 2623             M_NFSDLOCKFILE, M_WAITOK);
 2624         new_open = malloc(sizeof (struct nfsstate),
 2625             M_NFSDSTATE, M_WAITOK);
 2626         new_deleg = malloc(sizeof (struct nfsstate),
 2627             M_NFSDSTATE, M_WAITOK);
 2628         getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
 2629             NULL, p);
 2630         NFSLOCKSTATE();
 2631         /*
 2632          * Get the client structure. Since the linked lists could be changed
 2633          * by other nfsd processes if this process does a tsleep(), one of
 2634          * two things must be done.
 2635          * 1 - don't tsleep()
 2636          * or
 2637          * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
 2638          *     before using the lists, since this lock stops the other
 2639          *     nfsd. This should only be used for rare cases, since it
 2640          *     essentially single threads the nfsd.
 2641          *     At this time, it is only done for cases where the stable
 2642          *     storage file must be written prior to completion of state
 2643          *     expiration.
 2644          */
 2645         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 2646             (nfsquad_t)((u_quad_t)0), 0, nd, p);
 2647         if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
 2648             clp->lc_program) {
 2649                 /*
 2650                  * This happens on the first open for a client
 2651                  * that supports callbacks.
 2652                  */
 2653                 NFSUNLOCKSTATE();
 2654                 /*
 2655                  * Although nfsrv_docallback() will sleep, clp won't
 2656                  * go away, since they are only removed when the
 2657                  * nfsv4_lock() has blocked the nfsd threads. The
 2658                  * fields in clp can change, but having multiple
 2659                  * threads do this Null callback RPC should be
 2660                  * harmless.
 2661                  */
 2662                 cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
 2663                     NULL, 0, NULL, NULL, NULL, 0, p);
 2664                 NFSLOCKSTATE();
 2665                 clp->lc_flags &= ~LCL_NEEDSCBNULL;
 2666                 if (!cbret)
 2667                         clp->lc_flags |= LCL_CALLBACKSON;
 2668         }
 2669 
 2670         /*
 2671          * Look up the open owner. See if it needs confirmation and
 2672          * check the seq#, as required.
 2673          */
 2674         if (!error)
 2675                 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
 2676 
 2677         if (error) {
 2678                 NFSUNLOCKSTATE();
 2679                 printf("Nfsd: openctrl unexpected state err=%d\n",
 2680                         error);
 2681                 free(new_lfp, M_NFSDLOCKFILE);
 2682                 free(new_open, M_NFSDSTATE);
 2683                 free(new_deleg, M_NFSDSTATE);
 2684                 if (haslock) {
 2685                         NFSLOCKV4ROOTMUTEX();
 2686                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2687                         NFSUNLOCKV4ROOTMUTEX();
 2688                 }
 2689                 error = NFSERR_EXPIRED;
 2690                 goto out;
 2691         }
 2692 
 2693         if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2694                 nfsrv_markstable(clp);
 2695 
 2696         /*
 2697          * Get the structure for the underlying file.
 2698          */
 2699         if (getfhret)
 2700                 error = getfhret;
 2701         else
 2702                 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
 2703                     NULL, 0);
 2704         if (new_lfp)
 2705                 free(new_lfp, M_NFSDLOCKFILE);
 2706         if (error) {
 2707                 NFSUNLOCKSTATE();
 2708                 printf("Nfsd openctrl unexpected getlockfile err=%d\n",
 2709                     error);
 2710                 free(new_open, M_NFSDSTATE);
 2711                 free(new_deleg, M_NFSDSTATE);
 2712                 if (haslock) {
 2713                         NFSLOCKV4ROOTMUTEX();
 2714                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2715                         NFSUNLOCKV4ROOTMUTEX();
 2716                 }
 2717                 goto out;
 2718         }
 2719 
 2720         /*
 2721          * Search for a conflicting open/share.
 2722          */
 2723         if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
 2724             /*
 2725              * For Delegate_Cur, search for the matching Delegation,
 2726              * which indicates no conflict.
 2727              * An old delegation should have been recovered by the
 2728              * client doing a Claim_DELEGATE_Prev, so I won't let
 2729              * it match and return NFSERR_EXPIRED. Should I let it
 2730              * match?
 2731              */
 2732             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 2733                 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
 2734                     (((nd->nd_flag & ND_NFSV41) != 0 &&
 2735                     stateidp->seqid == 0) ||
 2736                     stateidp->seqid == stp->ls_stateid.seqid) &&
 2737                     !NFSBCMP(stateidp->other, stp->ls_stateid.other,
 2738                         NFSX_STATEIDOTHER))
 2739                         break;
 2740             }
 2741             if (stp == LIST_END(&lfp->lf_deleg) ||
 2742                 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
 2743                  (stp->ls_flags & NFSLCK_DELEGREAD))) {
 2744                 NFSUNLOCKSTATE();
 2745                 printf("Nfsd openctrl unexpected expiry\n");
 2746                 free(new_open, M_NFSDSTATE);
 2747                 free(new_deleg, M_NFSDSTATE);
 2748                 if (haslock) {
 2749                         NFSLOCKV4ROOTMUTEX();
 2750                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2751                         NFSUNLOCKV4ROOTMUTEX();
 2752                 }
 2753                 error = NFSERR_EXPIRED;
 2754                 goto out;
 2755             }
 2756 
 2757             /*
 2758              * Don't issue a Delegation, since one already exists and
 2759              * delay delegation timeout, as required.
 2760              */
 2761             delegate = 0;
 2762             nfsrv_delaydelegtimeout(stp);
 2763         }
 2764 
 2765         /*
 2766          * Check for access/deny bit conflicts. I also check for the
 2767          * same owner, since the client might not have bothered to check.
 2768          * Also, note an open for the same file and owner, if found,
 2769          * which is all we do here for Delegate_Cur, since conflict
 2770          * checking is already done.
 2771          */
 2772         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 2773                 if (ownerstp && stp->ls_openowner == ownerstp)
 2774                         openstp = stp;
 2775                 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
 2776                     /*
 2777                      * If another client has the file open, the only
 2778                      * delegation that can be issued is a Read delegation
 2779                      * and only if it is a Read open with Deny none.
 2780                      */
 2781                     if (clp != stp->ls_clp) {
 2782                         if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
 2783                             NFSLCK_READACCESS)
 2784                             writedeleg = 0;
 2785                         else
 2786                             delegate = 0;
 2787                     }
 2788                     if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
 2789                         ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
 2790                        ((stp->ls_flags & NFSLCK_ACCESSBITS) &
 2791                         ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
 2792                         ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
 2793                         if (ret == 1) {
 2794                                 /*
 2795                                  * nfsrv_clientconflict() unlocks state
 2796                                  * when it returns non-zero.
 2797                                  */
 2798                                 free(new_open, M_NFSDSTATE);
 2799                                 free(new_deleg, M_NFSDSTATE);
 2800                                 openstp = NULL;
 2801                                 goto tryagain;
 2802                         }
 2803                         if (ret == 2)
 2804                                 error = NFSERR_PERM;
 2805                         else if (new_stp->ls_flags & NFSLCK_RECLAIM)
 2806                                 error = NFSERR_RECLAIMCONFLICT;
 2807                         else
 2808                                 error = NFSERR_SHAREDENIED;
 2809                         if (ret == 0)
 2810                                 NFSUNLOCKSTATE();
 2811                         if (haslock) {
 2812                                 NFSLOCKV4ROOTMUTEX();
 2813                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 2814                                 NFSUNLOCKV4ROOTMUTEX();
 2815                         }
 2816                         free(new_open, M_NFSDSTATE);
 2817                         free(new_deleg, M_NFSDSTATE);
 2818                         printf("nfsd openctrl unexpected client cnfl\n");
 2819                         goto out;
 2820                     }
 2821                 }
 2822         }
 2823 
 2824         /*
 2825          * Check for a conflicting delegation. If one is found, call
 2826          * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
 2827          * been set yet, it will get the lock. Otherwise, it will recall
 2828          * the delegation. Then, we try try again...
 2829          * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
 2830          *  isn't a conflict.)
 2831          * I currently believe the conflict algorithm to be:
 2832          * For Open with Read Access and Deny None
 2833          * - there is a conflict iff a different client has a write delegation
 2834          * For Open with other Write Access or any Deny except None
 2835          * - there is a conflict if a different client has any delegation
 2836          * - there is a conflict if the same client has a read delegation
 2837          *   (The current consensus is that this last case should be
 2838          *    considered a conflict since the client with a read delegation
 2839          *    could have done an Open with ReadAccess and WriteDeny
 2840          *    locally and then not have checked for the WriteDeny.)
 2841          */
 2842         if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
 2843             stp = LIST_FIRST(&lfp->lf_deleg);
 2844             while (stp != LIST_END(&lfp->lf_deleg)) {
 2845                 nstp = LIST_NEXT(stp, ls_file);
 2846                 if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
 2847                         writedeleg = 0;
 2848                 else
 2849                         delegate = 0;
 2850                 if ((readonly && stp->ls_clp != clp &&
 2851                        (stp->ls_flags & NFSLCK_DELEGWRITE)) ||
 2852                     (!readonly && (stp->ls_clp != clp ||
 2853                          (stp->ls_flags & NFSLCK_DELEGREAD)))) {
 2854                     if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 2855                         delegate = 2;
 2856                     } else {
 2857                         ret = nfsrv_delegconflict(stp, &haslock, p, vp);
 2858                         if (ret) {
 2859                             /*
 2860                              * nfsrv_delegconflict() unlocks state
 2861                              * when it returns non-zero.
 2862                              */
 2863                             printf("Nfsd openctrl unexpected deleg cnfl\n");
 2864                             free(new_open, M_NFSDSTATE);
 2865                             free(new_deleg, M_NFSDSTATE);
 2866                             if (ret == -1) {
 2867                                 openstp = NULL;
 2868                                 goto tryagain;
 2869                             }
 2870                             error = ret;
 2871                             goto out;
 2872                         }
 2873                     }
 2874                 }
 2875                 stp = nstp;
 2876             }
 2877         }
 2878 
 2879         /*
 2880          * We only get here if there was no open that conflicted.
 2881          * If an open for the owner exists, or in the access/deny bits.
 2882          * Otherwise it is a new open. If the open_owner hasn't been
 2883          * confirmed, replace the open with the new one needing confirmation,
 2884          * otherwise add the open.
 2885          */
 2886         if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
 2887             /*
 2888              * Handle NFSLCK_DELEGPREV by searching the old delegations for
 2889              * a match. If found, just move the old delegation to the current
 2890              * delegation list and issue open. If not found, return
 2891              * NFSERR_EXPIRED.
 2892              */
 2893             LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
 2894                 if (stp->ls_lfp == lfp) {
 2895                     /* Found it */
 2896                     if (stp->ls_clp != clp)
 2897                         panic("olddeleg clp");
 2898                     LIST_REMOVE(stp, ls_list);
 2899                     LIST_REMOVE(stp, ls_hash);
 2900                     stp->ls_flags &= ~NFSLCK_OLDDELEG;
 2901                     stp->ls_stateid.seqid = delegstateidp->seqid = 1;
 2902                     stp->ls_stateid.other[0] = delegstateidp->other[0] =
 2903                         clp->lc_clientid.lval[0];
 2904                     stp->ls_stateid.other[1] = delegstateidp->other[1] =
 2905                         clp->lc_clientid.lval[1];
 2906                     stp->ls_stateid.other[2] = delegstateidp->other[2] =
 2907                         nfsrv_nextstateindex(clp);
 2908                     stp->ls_compref = nd->nd_compref;
 2909                     LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
 2910                     LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 2911                         stp->ls_stateid), stp, ls_hash);
 2912                     if (stp->ls_flags & NFSLCK_DELEGWRITE)
 2913                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 2914                     else
 2915                         *rflagsp |= NFSV4OPEN_READDELEGATE;
 2916                     clp->lc_delegtime = NFSD_MONOSEC +
 2917                         nfsrv_lease + NFSRV_LEASEDELTA;
 2918 
 2919                     /*
 2920                      * Now, do the associated open.
 2921                      */
 2922                     new_open->ls_stateid.seqid = 1;
 2923                     new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 2924                     new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 2925                     new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 2926                     new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
 2927                         NFSLCK_OPEN;
 2928                     if (stp->ls_flags & NFSLCK_DELEGWRITE)
 2929                         new_open->ls_flags |= (NFSLCK_READACCESS |
 2930                             NFSLCK_WRITEACCESS);
 2931                     else
 2932                         new_open->ls_flags |= NFSLCK_READACCESS;
 2933                     new_open->ls_uid = new_stp->ls_uid;
 2934                     new_open->ls_lfp = lfp;
 2935                     new_open->ls_clp = clp;
 2936                     LIST_INIT(&new_open->ls_open);
 2937                     LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 2938                     LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 2939                         new_open, ls_hash);
 2940                     /*
 2941                      * and handle the open owner
 2942                      */
 2943                     if (ownerstp) {
 2944                         new_open->ls_openowner = ownerstp;
 2945                         LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
 2946                     } else {
 2947                         new_open->ls_openowner = new_stp;
 2948                         new_stp->ls_flags = 0;
 2949                         nfsrvd_refcache(new_stp->ls_op);
 2950                         new_stp->ls_noopens = 0;
 2951                         LIST_INIT(&new_stp->ls_open);
 2952                         LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 2953                         LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 2954                         *new_stpp = NULL;
 2955                         nfsstatsv1.srvopenowners++;
 2956                         nfsrv_openpluslock++;
 2957                     }
 2958                     openstp = new_open;
 2959                     new_open = NULL;
 2960                     nfsstatsv1.srvopens++;
 2961                     nfsrv_openpluslock++;
 2962                     break;
 2963                 }
 2964             }
 2965             if (stp == LIST_END(&clp->lc_olddeleg))
 2966                 error = NFSERR_EXPIRED;
 2967         } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
 2968             /*
 2969              * Scan to see that no delegation for this client and file
 2970              * doesn't already exist.
 2971              * There also shouldn't yet be an Open for this file and
 2972              * openowner.
 2973              */
 2974             LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 2975                 if (stp->ls_clp == clp)
 2976                     break;
 2977             }
 2978             if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
 2979                 /*
 2980                  * This is the Claim_Previous case with a delegation
 2981                  * type != Delegate_None.
 2982                  */
 2983                 /*
 2984                  * First, add the delegation. (Although we must issue the
 2985                  * delegation, we can also ask for an immediate return.)
 2986                  */
 2987                 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 2988                 new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
 2989                     clp->lc_clientid.lval[0];
 2990                 new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
 2991                     clp->lc_clientid.lval[1];
 2992                 new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
 2993                     nfsrv_nextstateindex(clp);
 2994                 if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
 2995                     new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 2996                         NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 2997                     *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 2998                     nfsrv_writedelegcnt++;
 2999                 } else {
 3000                     new_deleg->ls_flags = (NFSLCK_DELEGREAD |
 3001                         NFSLCK_READACCESS);
 3002                     *rflagsp |= NFSV4OPEN_READDELEGATE;
 3003                 }
 3004                 new_deleg->ls_uid = new_stp->ls_uid;
 3005                 new_deleg->ls_lfp = lfp;
 3006                 new_deleg->ls_clp = clp;
 3007                 new_deleg->ls_filerev = filerev;
 3008                 new_deleg->ls_compref = nd->nd_compref;
 3009                 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 3010                 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3011                     new_deleg->ls_stateid), new_deleg, ls_hash);
 3012                 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 3013                 new_deleg = NULL;
 3014                 if (delegate == 2 || nfsrv_issuedelegs == 0 ||
 3015                     (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 3016                      LCL_CALLBACKSON ||
 3017                     NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
 3018                     !NFSVNO_DELEGOK(vp))
 3019                     *rflagsp |= NFSV4OPEN_RECALL;
 3020                 nfsstatsv1.srvdelegates++;
 3021                 nfsrv_openpluslock++;
 3022                 nfsrv_delegatecnt++;
 3023 
 3024                 /*
 3025                  * Now, do the associated open.
 3026                  */
 3027                 new_open->ls_stateid.seqid = 1;
 3028                 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 3029                 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 3030                 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 3031                 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
 3032                     NFSLCK_OPEN;
 3033                 if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
 3034                         new_open->ls_flags |= (NFSLCK_READACCESS |
 3035                             NFSLCK_WRITEACCESS);
 3036                 else
 3037                         new_open->ls_flags |= NFSLCK_READACCESS;
 3038                 new_open->ls_uid = new_stp->ls_uid;
 3039                 new_open->ls_lfp = lfp;
 3040                 new_open->ls_clp = clp;
 3041                 LIST_INIT(&new_open->ls_open);
 3042                 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3043                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3044                    new_open, ls_hash);
 3045                 /*
 3046                  * and handle the open owner
 3047                  */
 3048                 if (ownerstp) {
 3049                     new_open->ls_openowner = ownerstp;
 3050                     LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
 3051                 } else {
 3052                     new_open->ls_openowner = new_stp;
 3053                     new_stp->ls_flags = 0;
 3054                     nfsrvd_refcache(new_stp->ls_op);
 3055                     new_stp->ls_noopens = 0;
 3056                     LIST_INIT(&new_stp->ls_open);
 3057                     LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 3058                     LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 3059                     *new_stpp = NULL;
 3060                     nfsstatsv1.srvopenowners++;
 3061                     nfsrv_openpluslock++;
 3062                 }
 3063                 openstp = new_open;
 3064                 new_open = NULL;
 3065                 nfsstatsv1.srvopens++;
 3066                 nfsrv_openpluslock++;
 3067             } else {
 3068                 error = NFSERR_RECLAIMCONFLICT;
 3069             }
 3070         } else if (ownerstp) {
 3071                 if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
 3072                     /* Replace the open */
 3073                     if (ownerstp->ls_op)
 3074                         nfsrvd_derefcache(ownerstp->ls_op);
 3075                     ownerstp->ls_op = new_stp->ls_op;
 3076                     nfsrvd_refcache(ownerstp->ls_op);
 3077                     ownerstp->ls_seq = new_stp->ls_seq;
 3078                     *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
 3079                     stp = LIST_FIRST(&ownerstp->ls_open);
 3080                     stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
 3081                         NFSLCK_OPEN;
 3082                     stp->ls_stateid.seqid = 1;
 3083                     stp->ls_uid = new_stp->ls_uid;
 3084                     if (lfp != stp->ls_lfp) {
 3085                         LIST_REMOVE(stp, ls_file);
 3086                         LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
 3087                         stp->ls_lfp = lfp;
 3088                     }
 3089                     openstp = stp;
 3090                 } else if (openstp) {
 3091                     openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
 3092                     openstp->ls_stateid.seqid++;
 3093                     if ((nd->nd_flag & ND_NFSV41) != 0 &&
 3094                         openstp->ls_stateid.seqid == 0)
 3095                         openstp->ls_stateid.seqid = 1;
 3096 
 3097                     /*
 3098                      * This is where we can choose to issue a delegation.
 3099                      */
 3100                     if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
 3101                         *rflagsp |= NFSV4OPEN_WDNOTWANTED;
 3102                     else if (nfsrv_issuedelegs == 0)
 3103                         *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
 3104                     else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
 3105                         *rflagsp |= NFSV4OPEN_WDRESOURCE;
 3106                     else if (delegate == 0 || writedeleg == 0 ||
 3107                         NFSVNO_EXRDONLY(exp) || (readonly != 0 &&
 3108                         nfsrv_writedelegifpos == 0) ||
 3109                         !NFSVNO_DELEGOK(vp) ||
 3110                         (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 ||
 3111                         (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 3112                          LCL_CALLBACKSON)
 3113                         *rflagsp |= NFSV4OPEN_WDCONTENTION;
 3114                     else {
 3115                         new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 3116                         new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
 3117                             = clp->lc_clientid.lval[0];
 3118                         new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
 3119                             = clp->lc_clientid.lval[1];
 3120                         new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
 3121                             = nfsrv_nextstateindex(clp);
 3122                         new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 3123                             NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 3124                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3125                         new_deleg->ls_uid = new_stp->ls_uid;
 3126                         new_deleg->ls_lfp = lfp;
 3127                         new_deleg->ls_clp = clp;
 3128                         new_deleg->ls_filerev = filerev;
 3129                         new_deleg->ls_compref = nd->nd_compref;
 3130                         nfsrv_writedelegcnt++;
 3131                         LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 3132                         LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3133                             new_deleg->ls_stateid), new_deleg, ls_hash);
 3134                         LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 3135                         new_deleg = NULL;
 3136                         nfsstatsv1.srvdelegates++;
 3137                         nfsrv_openpluslock++;
 3138                         nfsrv_delegatecnt++;
 3139                     }
 3140                 } else {
 3141                     new_open->ls_stateid.seqid = 1;
 3142                     new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 3143                     new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 3144                     new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 3145                     new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
 3146                         NFSLCK_OPEN;
 3147                     new_open->ls_uid = new_stp->ls_uid;
 3148                     new_open->ls_openowner = ownerstp;
 3149                     new_open->ls_lfp = lfp;
 3150                     new_open->ls_clp = clp;
 3151                     LIST_INIT(&new_open->ls_open);
 3152                     LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3153                     LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
 3154                     LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3155                         new_open, ls_hash);
 3156                     openstp = new_open;
 3157                     new_open = NULL;
 3158                     nfsstatsv1.srvopens++;
 3159                     nfsrv_openpluslock++;
 3160 
 3161                     /*
 3162                      * This is where we can choose to issue a delegation.
 3163                      */
 3164                     if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
 3165                         *rflagsp |= NFSV4OPEN_WDNOTWANTED;
 3166                     else if (nfsrv_issuedelegs == 0)
 3167                         *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
 3168                     else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
 3169                         *rflagsp |= NFSV4OPEN_WDRESOURCE;
 3170                     else if (delegate == 0 || (writedeleg == 0 &&
 3171                         readonly == 0) || !NFSVNO_DELEGOK(vp) ||
 3172                         (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
 3173                          LCL_CALLBACKSON)
 3174                         *rflagsp |= NFSV4OPEN_WDCONTENTION;
 3175                     else {
 3176                         new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
 3177                         new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
 3178                             = clp->lc_clientid.lval[0];
 3179                         new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
 3180                             = clp->lc_clientid.lval[1];
 3181                         new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
 3182                             = nfsrv_nextstateindex(clp);
 3183                         if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
 3184                             (nfsrv_writedelegifpos || !readonly) &&
 3185                             (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) {
 3186                             new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
 3187                                 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 3188                             *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3189                             nfsrv_writedelegcnt++;
 3190                         } else {
 3191                             new_deleg->ls_flags = (NFSLCK_DELEGREAD |
 3192                                 NFSLCK_READACCESS);
 3193                             *rflagsp |= NFSV4OPEN_READDELEGATE;
 3194                         }
 3195                         new_deleg->ls_uid = new_stp->ls_uid;
 3196                         new_deleg->ls_lfp = lfp;
 3197                         new_deleg->ls_clp = clp;
 3198                         new_deleg->ls_filerev = filerev;
 3199                         new_deleg->ls_compref = nd->nd_compref;
 3200                         LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
 3201                         LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3202                             new_deleg->ls_stateid), new_deleg, ls_hash);
 3203                         LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
 3204                         new_deleg = NULL;
 3205                         nfsstatsv1.srvdelegates++;
 3206                         nfsrv_openpluslock++;
 3207                         nfsrv_delegatecnt++;
 3208                     }
 3209                 }
 3210         } else {
 3211                 /*
 3212                  * New owner case. Start the open_owner sequence with a
 3213                  * Needs confirmation (unless a reclaim) and hang the
 3214                  * new open off it.
 3215                  */
 3216                 new_open->ls_stateid.seqid = 1;
 3217                 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
 3218                 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
 3219                 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
 3220                 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
 3221                     NFSLCK_OPEN;
 3222                 new_open->ls_uid = new_stp->ls_uid;
 3223                 LIST_INIT(&new_open->ls_open);
 3224                 new_open->ls_openowner = new_stp;
 3225                 new_open->ls_lfp = lfp;
 3226                 new_open->ls_clp = clp;
 3227                 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
 3228                 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
 3229                         new_stp->ls_flags = 0;
 3230                 } else if ((nd->nd_flag & ND_NFSV41) != 0) {
 3231                         /* NFSv4.1 never needs confirmation. */
 3232                         new_stp->ls_flags = 0;
 3233 
 3234                         /*
 3235                          * This is where we can choose to issue a delegation.
 3236                          */
 3237                         if (delegate && nfsrv_issuedelegs &&
 3238                             (writedeleg || readonly) &&
 3239                             (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) ==
 3240                              LCL_CALLBACKSON &&
 3241                             !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) &&
 3242                             NFSVNO_DELEGOK(vp) &&
 3243                             ((nd->nd_flag & ND_NFSV41) == 0 ||
 3244                              (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) {
 3245                                 new_deleg->ls_stateid.seqid =
 3246                                     delegstateidp->seqid = 1;
 3247                                 new_deleg->ls_stateid.other[0] =
 3248                                     delegstateidp->other[0]
 3249                                     = clp->lc_clientid.lval[0];
 3250                                 new_deleg->ls_stateid.other[1] =
 3251                                     delegstateidp->other[1]
 3252                                     = clp->lc_clientid.lval[1];
 3253                                 new_deleg->ls_stateid.other[2] =
 3254                                     delegstateidp->other[2]
 3255                                     = nfsrv_nextstateindex(clp);
 3256                                 if (writedeleg && !NFSVNO_EXRDONLY(exp) &&
 3257                                     (nfsrv_writedelegifpos || !readonly) &&
 3258                                     ((nd->nd_flag & ND_NFSV41) == 0 ||
 3259                                      (new_stp->ls_flags & NFSLCK_WANTRDELEG) ==
 3260                                      0)) {
 3261                                         new_deleg->ls_flags =
 3262                                             (NFSLCK_DELEGWRITE |
 3263                                              NFSLCK_READACCESS |
 3264                                              NFSLCK_WRITEACCESS);
 3265                                         *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
 3266                                         nfsrv_writedelegcnt++;
 3267                                 } else {
 3268                                         new_deleg->ls_flags =
 3269                                             (NFSLCK_DELEGREAD |
 3270                                              NFSLCK_READACCESS);
 3271                                         *rflagsp |= NFSV4OPEN_READDELEGATE;
 3272                                 }
 3273                                 new_deleg->ls_uid = new_stp->ls_uid;
 3274                                 new_deleg->ls_lfp = lfp;
 3275                                 new_deleg->ls_clp = clp;
 3276                                 new_deleg->ls_filerev = filerev;
 3277                                 new_deleg->ls_compref = nd->nd_compref;
 3278                                 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg,
 3279                                     ls_file);
 3280                                 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
 3281                                     new_deleg->ls_stateid), new_deleg, ls_hash);
 3282                                 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg,
 3283                                     ls_list);
 3284                                 new_deleg = NULL;
 3285                                 nfsstatsv1.srvdelegates++;
 3286                                 nfsrv_openpluslock++;
 3287                                 nfsrv_delegatecnt++;
 3288                         }
 3289                         /*
 3290                          * Since NFSv4.1 never does an OpenConfirm, the first
 3291                          * open state will be acquired here.
 3292                          */
 3293                         if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
 3294                                 clp->lc_flags |= LCL_STAMPEDSTABLE;
 3295                                 len = clp->lc_idlen;
 3296                                 NFSBCOPY(clp->lc_id, clidp, len);
 3297                                 gotstate = 1;
 3298                         }
 3299                 } else {
 3300                         *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
 3301                         new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
 3302                 }
 3303                 nfsrvd_refcache(new_stp->ls_op);
 3304                 new_stp->ls_noopens = 0;
 3305                 LIST_INIT(&new_stp->ls_open);
 3306                 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
 3307                 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
 3308                 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
 3309                     new_open, ls_hash);
 3310                 openstp = new_open;
 3311                 new_open = NULL;
 3312                 *new_stpp = NULL;
 3313                 nfsstatsv1.srvopens++;
 3314                 nfsrv_openpluslock++;
 3315                 nfsstatsv1.srvopenowners++;
 3316                 nfsrv_openpluslock++;
 3317         }
 3318         if (!error) {
 3319                 stateidp->seqid = openstp->ls_stateid.seqid;
 3320                 stateidp->other[0] = openstp->ls_stateid.other[0];
 3321                 stateidp->other[1] = openstp->ls_stateid.other[1];
 3322                 stateidp->other[2] = openstp->ls_stateid.other[2];
 3323         }
 3324         NFSUNLOCKSTATE();
 3325         if (haslock) {
 3326                 NFSLOCKV4ROOTMUTEX();
 3327                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 3328                 NFSUNLOCKV4ROOTMUTEX();
 3329         }
 3330         if (new_open)
 3331                 free(new_open, M_NFSDSTATE);
 3332         if (new_deleg)
 3333                 free(new_deleg, M_NFSDSTATE);
 3334 
 3335         /*
 3336          * If the NFSv4.1 client just acquired its first open, write a timestamp
 3337          * to the stable storage file.
 3338          */
 3339         if (gotstate != 0) {
 3340                 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
 3341                 nfsrv_backupstable();
 3342         }
 3343 
 3344 out:
 3345         free(clidp, M_TEMP);
 3346         NFSEXITCODE2(error, nd);
 3347         return (error);
 3348 }
 3349 
 3350 /*
 3351  * Open update. Does the confirm, downgrade and close.
 3352  */
 3353 APPLESTATIC int
 3354 nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
 3355     nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p,
 3356     int *retwriteaccessp)
 3357 {
 3358         struct nfsstate *stp;
 3359         struct nfsclient *clp;
 3360         struct nfslockfile *lfp;
 3361         u_int32_t bits;
 3362         int error = 0, gotstate = 0, len = 0;
 3363         u_char *clidp = NULL;
 3364 
 3365         /*
 3366          * Check for restart conditions (client and server).
 3367          */
 3368         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 3369             &new_stp->ls_stateid, 0);
 3370         if (error)
 3371                 goto out;
 3372 
 3373         clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
 3374         NFSLOCKSTATE();
 3375         /*
 3376          * Get the open structure via clientid and stateid.
 3377          */
 3378         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 3379             (nfsquad_t)((u_quad_t)0), 0, nd, p);
 3380         if (!error)
 3381                 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
 3382                     new_stp->ls_flags, &stp);
 3383 
 3384         /*
 3385          * Sanity check the open.
 3386          */
 3387         if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
 3388                 (!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
 3389                  (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
 3390                 ((new_stp->ls_flags & NFSLCK_CONFIRM) &&
 3391                  (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
 3392                 error = NFSERR_BADSTATEID;
 3393 
 3394         if (!error)
 3395                 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
 3396                     stp->ls_openowner, new_stp->ls_op);
 3397         if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
 3398             (((nd->nd_flag & ND_NFSV41) == 0 &&
 3399               !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
 3400              ((nd->nd_flag & ND_NFSV41) != 0 &&
 3401               new_stp->ls_stateid.seqid != 0)))
 3402                 error = NFSERR_OLDSTATEID;
 3403         if (!error && vnode_vtype(vp) != VREG) {
 3404                 if (vnode_vtype(vp) == VDIR)
 3405                         error = NFSERR_ISDIR;
 3406                 else
 3407                         error = NFSERR_INVAL;
 3408         }
 3409 
 3410         if (error) {
 3411                 /*
 3412                  * If a client tries to confirm an Open with a bad
 3413                  * seqid# and there are no byte range locks or other Opens
 3414                  * on the openowner, just throw it away, so the next use of the
 3415                  * openowner will start a fresh seq#.
 3416                  */
 3417                 if (error == NFSERR_BADSEQID &&
 3418                     (new_stp->ls_flags & NFSLCK_CONFIRM) &&
 3419                     nfsrv_nootherstate(stp))
 3420                         nfsrv_freeopenowner(stp->ls_openowner, 0, p);
 3421                 NFSUNLOCKSTATE();
 3422                 goto out;
 3423         }
 3424 
 3425         /*
 3426          * Set the return stateid.
 3427          */
 3428         stateidp->seqid = stp->ls_stateid.seqid + 1;
 3429         if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
 3430                 stateidp->seqid = 1;
 3431         stateidp->other[0] = stp->ls_stateid.other[0];
 3432         stateidp->other[1] = stp->ls_stateid.other[1];
 3433         stateidp->other[2] = stp->ls_stateid.other[2];
 3434         /*
 3435          * Now, handle the three cases.
 3436          */
 3437         if (new_stp->ls_flags & NFSLCK_CONFIRM) {
 3438                 /*
 3439                  * If the open doesn't need confirmation, it seems to me that
 3440                  * there is a client error, but I'll just log it and keep going?
 3441                  */
 3442                 if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
 3443                         printf("Nfsv4d: stray open confirm\n");
 3444                 stp->ls_openowner->ls_flags = 0;
 3445                 stp->ls_stateid.seqid++;
 3446                 if ((nd->nd_flag & ND_NFSV41) != 0 &&
 3447                     stp->ls_stateid.seqid == 0)
 3448                         stp->ls_stateid.seqid = 1;
 3449                 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
 3450                         clp->lc_flags |= LCL_STAMPEDSTABLE;
 3451                         len = clp->lc_idlen;
 3452                         NFSBCOPY(clp->lc_id, clidp, len);
 3453                         gotstate = 1;
 3454                 }
 3455                 NFSUNLOCKSTATE();
 3456         } else if (new_stp->ls_flags & NFSLCK_CLOSE) {
 3457                 lfp = stp->ls_lfp;
 3458                 if (retwriteaccessp != NULL) {
 3459                         if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0)
 3460                                 *retwriteaccessp = 1;
 3461                         else
 3462                                 *retwriteaccessp = 0;
 3463                 }
 3464                 if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
 3465                         /* Get the lf lock */
 3466                         nfsrv_locklf(lfp);
 3467                         NFSUNLOCKSTATE();
 3468                         ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
 3469                         NFSVOPUNLOCK(vp, 0);
 3470                         if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
 3471                                 NFSLOCKSTATE();
 3472                                 nfsrv_unlocklf(lfp);
 3473                                 NFSUNLOCKSTATE();
 3474                         }
 3475                         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 3476                 } else {
 3477                         (void) nfsrv_freeopen(stp, NULL, 0, p);
 3478                         NFSUNLOCKSTATE();
 3479                 }
 3480         } else {
 3481                 /*
 3482                  * Update the share bits, making sure that the new set are a
 3483                  * subset of the old ones.
 3484                  */
 3485                 bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
 3486                 if (~(stp->ls_flags) & bits) {
 3487                         NFSUNLOCKSTATE();
 3488                         error = NFSERR_INVAL;
 3489                         goto out;
 3490                 }
 3491                 stp->ls_flags = (bits | NFSLCK_OPEN);
 3492                 stp->ls_stateid.seqid++;
 3493                 if ((nd->nd_flag & ND_NFSV41) != 0 &&
 3494                     stp->ls_stateid.seqid == 0)
 3495                         stp->ls_stateid.seqid = 1;
 3496                 NFSUNLOCKSTATE();
 3497         }
 3498 
 3499         /*
 3500          * If the client just confirmed its first open, write a timestamp
 3501          * to the stable storage file.
 3502          */
 3503         if (gotstate != 0) {
 3504                 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
 3505                 nfsrv_backupstable();
 3506         }
 3507 
 3508 out:
 3509         free(clidp, M_TEMP);
 3510         NFSEXITCODE2(error, nd);
 3511         return (error);
 3512 }
 3513 
 3514 /*
 3515  * Delegation update. Does the purge and return.
 3516  */
 3517 APPLESTATIC int
 3518 nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
 3519     nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
 3520     NFSPROC_T *p, int *retwriteaccessp)
 3521 {
 3522         struct nfsstate *stp;
 3523         struct nfsclient *clp;
 3524         int error = 0;
 3525         fhandle_t fh;
 3526 
 3527         /*
 3528          * Do a sanity check against the file handle for DelegReturn.
 3529          */
 3530         if (vp) {
 3531                 error = nfsvno_getfh(vp, &fh, p);
 3532                 if (error)
 3533                         goto out;
 3534         }
 3535         /*
 3536          * Check for restart conditions (client and server).
 3537          */
 3538         if (op == NFSV4OP_DELEGRETURN)
 3539                 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
 3540                         stateidp, 0);
 3541         else
 3542                 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
 3543                         stateidp, 0);
 3544 
 3545         NFSLOCKSTATE();
 3546         /*
 3547          * Get the open structure via clientid and stateid.
 3548          */
 3549         if (!error)
 3550             error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 3551                 (nfsquad_t)((u_quad_t)0), 0, nd, p);
 3552         if (error) {
 3553                 if (error == NFSERR_CBPATHDOWN)
 3554                         error = 0;
 3555                 if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
 3556                         error = NFSERR_STALESTATEID;
 3557         }
 3558         if (!error && op == NFSV4OP_DELEGRETURN) {
 3559             error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
 3560             if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
 3561                 ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
 3562                 error = NFSERR_OLDSTATEID;
 3563         }
 3564         /*
 3565          * NFSERR_EXPIRED means that the state has gone away,
 3566          * so Delegations have been purged. Just return ok.
 3567          */
 3568         if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
 3569                 NFSUNLOCKSTATE();
 3570                 error = 0;
 3571                 goto out;
 3572         }
 3573         if (error) {
 3574                 NFSUNLOCKSTATE();
 3575                 goto out;
 3576         }
 3577 
 3578         if (op == NFSV4OP_DELEGRETURN) {
 3579                 if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
 3580                     sizeof (fhandle_t))) {
 3581                         NFSUNLOCKSTATE();
 3582                         error = NFSERR_BADSTATEID;
 3583                         goto out;
 3584                 }
 3585                 if (retwriteaccessp != NULL) {
 3586                         if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
 3587                                 *retwriteaccessp = 1;
 3588                         else
 3589                                 *retwriteaccessp = 0;
 3590                 }
 3591                 nfsrv_freedeleg(stp);
 3592         } else {
 3593                 nfsrv_freedeleglist(&clp->lc_olddeleg);
 3594         }
 3595         NFSUNLOCKSTATE();
 3596         error = 0;
 3597 
 3598 out:
 3599         NFSEXITCODE(error);
 3600         return (error);
 3601 }
 3602 
 3603 /*
 3604  * Release lock owner.
 3605  */
 3606 APPLESTATIC int
 3607 nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
 3608     NFSPROC_T *p)
 3609 {
 3610         struct nfsstate *stp, *nstp, *openstp, *ownstp;
 3611         struct nfsclient *clp;
 3612         int error = 0;
 3613 
 3614         /*
 3615          * Check for restart conditions (client and server).
 3616          */
 3617         error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
 3618             &new_stp->ls_stateid, 0);
 3619         if (error)
 3620                 goto out;
 3621 
 3622         NFSLOCKSTATE();
 3623         /*
 3624          * Get the lock owner by name.
 3625          */
 3626         error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
 3627             (nfsquad_t)((u_quad_t)0), 0, NULL, p);
 3628         if (error) {
 3629                 NFSUNLOCKSTATE();
 3630                 goto out;
 3631         }
 3632         LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
 3633             LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
 3634                 stp = LIST_FIRST(&openstp->ls_open);
 3635                 while (stp != LIST_END(&openstp->ls_open)) {
 3636                     nstp = LIST_NEXT(stp, ls_list);
 3637                     /*
 3638                      * If the owner matches, check for locks and
 3639                      * then free or return an error.
 3640                      */
 3641                     if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
 3642                         !NFSBCMP(stp->ls_owner, new_stp->ls_owner,
 3643                          stp->ls_ownerlen)){
 3644                         if (LIST_EMPTY(&stp->ls_lock)) {
 3645                             nfsrv_freelockowner(stp, NULL, 0, p);
 3646                         } else {
 3647                             NFSUNLOCKSTATE();
 3648                             error = NFSERR_LOCKSHELD;
 3649                             goto out;
 3650                         }
 3651                     }
 3652                     stp = nstp;
 3653                 }
 3654             }
 3655         }
 3656         NFSUNLOCKSTATE();
 3657 
 3658 out:
 3659         NFSEXITCODE(error);
 3660         return (error);
 3661 }
 3662 
 3663 /*
 3664  * Get the file handle for a lock structure.
 3665  */
 3666 static int
 3667 nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
 3668     fhandle_t *nfhp, NFSPROC_T *p)
 3669 {
 3670         fhandle_t *fhp = NULL;
 3671         int error;
 3672 
 3673         /*
 3674          * For lock, use the new nfslock structure, otherwise just
 3675          * a fhandle_t on the stack.
 3676          */
 3677         if (flags & NFSLCK_OPEN) {
 3678                 KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
 3679                 fhp = &new_lfp->lf_fh;
 3680         } else if (nfhp) {
 3681                 fhp = nfhp;
 3682         } else {
 3683                 panic("nfsrv_getlockfh");
 3684         }
 3685         error = nfsvno_getfh(vp, fhp, p);
 3686         NFSEXITCODE(error);
 3687         return (error);
 3688 }
 3689 
 3690 /*
 3691  * Get an nfs lock structure. Allocate one, as required, and return a
 3692  * pointer to it.
 3693  * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
 3694  */
 3695 static int
 3696 nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
 3697     struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
 3698 {
 3699         struct nfslockfile *lfp;
 3700         fhandle_t *fhp = NULL, *tfhp;
 3701         struct nfslockhashhead *hp;
 3702         struct nfslockfile *new_lfp = NULL;
 3703 
 3704         /*
 3705          * For lock, use the new nfslock structure, otherwise just
 3706          * a fhandle_t on the stack.
 3707          */
 3708         if (flags & NFSLCK_OPEN) {
 3709                 new_lfp = *new_lfpp;
 3710                 fhp = &new_lfp->lf_fh;
 3711         } else if (nfhp) {
 3712                 fhp = nfhp;
 3713         } else {
 3714                 panic("nfsrv_getlockfile");
 3715         }
 3716 
 3717         hp = NFSLOCKHASH(fhp);
 3718         LIST_FOREACH(lfp, hp, lf_hash) {
 3719                 tfhp = &lfp->lf_fh;
 3720                 if (NFSVNO_CMPFH(fhp, tfhp)) {
 3721                         if (lockit)
 3722                                 nfsrv_locklf(lfp);
 3723                         *lfpp = lfp;
 3724                         return (0);
 3725                 }
 3726         }
 3727         if (!(flags & NFSLCK_OPEN))
 3728                 return (-1);
 3729 
 3730         /*
 3731          * No match, so chain the new one into the list.
 3732          */
 3733         LIST_INIT(&new_lfp->lf_open);
 3734         LIST_INIT(&new_lfp->lf_lock);
 3735         LIST_INIT(&new_lfp->lf_deleg);
 3736         LIST_INIT(&new_lfp->lf_locallock);
 3737         LIST_INIT(&new_lfp->lf_rollback);
 3738         new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
 3739         new_lfp->lf_locallock_lck.nfslock_lock = 0;
 3740         new_lfp->lf_usecount = 0;
 3741         LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
 3742         *lfpp = new_lfp;
 3743         *new_lfpp = NULL;
 3744         return (0);
 3745 }
 3746 
 3747 /*
 3748  * This function adds a nfslock lock structure to the list for the associated
 3749  * nfsstate and nfslockfile structures. It will be inserted after the
 3750  * entry pointed at by insert_lop.
 3751  */
 3752 static void
 3753 nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
 3754     struct nfsstate *stp, struct nfslockfile *lfp)
 3755 {
 3756         struct nfslock *lop, *nlop;
 3757 
 3758         new_lop->lo_stp = stp;
 3759         new_lop->lo_lfp = lfp;
 3760 
 3761         if (stp != NULL) {
 3762                 /* Insert in increasing lo_first order */
 3763                 lop = LIST_FIRST(&lfp->lf_lock);
 3764                 if (lop == LIST_END(&lfp->lf_lock) ||
 3765                     new_lop->lo_first <= lop->lo_first) {
 3766                         LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
 3767                 } else {
 3768                         nlop = LIST_NEXT(lop, lo_lckfile);
 3769                         while (nlop != LIST_END(&lfp->lf_lock) &&
 3770                                nlop->lo_first < new_lop->lo_first) {
 3771                                 lop = nlop;
 3772                                 nlop = LIST_NEXT(lop, lo_lckfile);
 3773                         }
 3774                         LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
 3775                 }
 3776         } else {
 3777                 new_lop->lo_lckfile.le_prev = NULL;     /* list not used */
 3778         }
 3779 
 3780         /*
 3781          * Insert after insert_lop, which is overloaded as stp or lfp for
 3782          * an empty list.
 3783          */
 3784         if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
 3785                 LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
 3786         else if ((struct nfsstate *)insert_lop == stp)
 3787                 LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
 3788         else
 3789                 LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
 3790         if (stp != NULL) {
 3791                 nfsstatsv1.srvlocks++;
 3792                 nfsrv_openpluslock++;
 3793         }
 3794 }
 3795 
 3796 /*
 3797  * This function updates the locking for a lock owner and given file. It
 3798  * maintains a list of lock ranges ordered on increasing file offset that
 3799  * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
 3800  * It always adds new_lop to the list and sometimes uses the one pointed
 3801  * at by other_lopp.
 3802  */
 3803 static void
 3804 nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
 3805     struct nfslock **other_lopp, struct nfslockfile *lfp)
 3806 {
 3807         struct nfslock *new_lop = *new_lopp;
 3808         struct nfslock *lop, *tlop, *ilop;
 3809         struct nfslock *other_lop = *other_lopp;
 3810         int unlock = 0, myfile = 0;
 3811         u_int64_t tmp;
 3812 
 3813         /*
 3814          * Work down the list until the lock is merged.
 3815          */
 3816         if (new_lop->lo_flags & NFSLCK_UNLOCK)
 3817                 unlock = 1;
 3818         if (stp != NULL) {
 3819                 ilop = (struct nfslock *)stp;
 3820                 lop = LIST_FIRST(&stp->ls_lock);
 3821         } else {
 3822                 ilop = (struct nfslock *)lfp;
 3823                 lop = LIST_FIRST(&lfp->lf_locallock);
 3824         }
 3825         while (lop != NULL) {
 3826             /*
 3827              * Only check locks for this file that aren't before the start of
 3828              * new lock's range.
 3829              */
 3830             if (lop->lo_lfp == lfp) {
 3831               myfile = 1;
 3832               if (lop->lo_end >= new_lop->lo_first) {
 3833                 if (new_lop->lo_end < lop->lo_first) {
 3834                         /*
 3835                          * If the new lock ends before the start of the
 3836                          * current lock's range, no merge, just insert
 3837                          * the new lock.
 3838                          */
 3839                         break;
 3840                 }
 3841                 if (new_lop->lo_flags == lop->lo_flags ||
 3842                     (new_lop->lo_first <= lop->lo_first &&
 3843                      new_lop->lo_end >= lop->lo_end)) {
 3844                         /*
 3845                          * This lock can be absorbed by the new lock/unlock.
 3846                          * This happens when it covers the entire range
 3847                          * of the old lock or is contiguous
 3848                          * with the old lock and is of the same type or an
 3849                          * unlock.
 3850                          */
 3851                         if (lop->lo_first < new_lop->lo_first)
 3852                                 new_lop->lo_first = lop->lo_first;
 3853                         if (lop->lo_end > new_lop->lo_end)
 3854                                 new_lop->lo_end = lop->lo_end;
 3855                         tlop = lop;
 3856                         lop = LIST_NEXT(lop, lo_lckowner);
 3857                         nfsrv_freenfslock(tlop);
 3858                         continue;
 3859                 }
 3860 
 3861                 /*
 3862                  * All these cases are for contiguous locks that are not the
 3863                  * same type, so they can't be merged.
 3864                  */
 3865                 if (new_lop->lo_first <= lop->lo_first) {
 3866                         /*
 3867                          * This case is where the new lock overlaps with the
 3868                          * first part of the old lock. Move the start of the
 3869                          * old lock to just past the end of the new lock. The
 3870                          * new lock will be inserted in front of the old, since
 3871                          * ilop hasn't been updated. (We are done now.)
 3872                          */
 3873                         lop->lo_first = new_lop->lo_end;
 3874                         break;
 3875                 }
 3876                 if (new_lop->lo_end >= lop->lo_end) {
 3877                         /*
 3878                          * This case is where the new lock overlaps with the
 3879                          * end of the old lock's range. Move the old lock's
 3880                          * end to just before the new lock's first and insert
 3881                          * the new lock after the old lock.
 3882                          * Might not be done yet, since the new lock could
 3883                          * overlap further locks with higher ranges.
 3884                          */
 3885                         lop->lo_end = new_lop->lo_first;
 3886                         ilop = lop;
 3887                         lop = LIST_NEXT(lop, lo_lckowner);
 3888                         continue;
 3889                 }
 3890                 /*
 3891                  * The final case is where the new lock's range is in the
 3892                  * middle of the current lock's and splits the current lock
 3893                  * up. Use *other_lopp to handle the second part of the
 3894                  * split old lock range. (We are done now.)
 3895                  * For unlock, we use new_lop as other_lop and tmp, since
 3896                  * other_lop and new_lop are the same for this case.
 3897                  * We noted the unlock case above, so we don't need
 3898                  * new_lop->lo_flags any longer.
 3899                  */
 3900                 tmp = new_lop->lo_first;
 3901                 if (other_lop == NULL) {
 3902                         if (!unlock)
 3903                                 panic("nfsd srv update unlock");
 3904                         other_lop = new_lop;
 3905                         *new_lopp = NULL;
 3906                 }
 3907                 other_lop->lo_first = new_lop->lo_end;
 3908                 other_lop->lo_end = lop->lo_end;
 3909                 other_lop->lo_flags = lop->lo_flags;
 3910                 other_lop->lo_stp = stp;
 3911                 other_lop->lo_lfp = lfp;
 3912                 lop->lo_end = tmp;
 3913                 nfsrv_insertlock(other_lop, lop, stp, lfp);
 3914                 *other_lopp = NULL;
 3915                 ilop = lop;
 3916                 break;
 3917               }
 3918             }
 3919             ilop = lop;
 3920             lop = LIST_NEXT(lop, lo_lckowner);
 3921             if (myfile && (lop == NULL || lop->lo_lfp != lfp))
 3922                 break;
 3923         }
 3924 
 3925         /*
 3926          * Insert the new lock in the list at the appropriate place.
 3927          */
 3928         if (!unlock) {
 3929                 nfsrv_insertlock(new_lop, ilop, stp, lfp);
 3930                 *new_lopp = NULL;
 3931         }
 3932 }
 3933 
 3934 /*
 3935  * This function handles sequencing of locks, etc.
 3936  * It returns an error that indicates what the caller should do.
 3937  */
 3938 static int
 3939 nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
 3940     struct nfsstate *stp, struct nfsrvcache *op)
 3941 {
 3942         int error = 0;
 3943 
 3944         if ((nd->nd_flag & ND_NFSV41) != 0)
 3945                 /* NFSv4.1 ignores the open_seqid and lock_seqid. */
 3946                 goto out;
 3947         if (op != nd->nd_rp)
 3948                 panic("nfsrvstate checkseqid");
 3949         if (!(op->rc_flag & RC_INPROG))
 3950                 panic("nfsrvstate not inprog");
 3951         if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
 3952                 printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
 3953                 panic("nfsrvstate op refcnt");
 3954         }
 3955         if ((stp->ls_seq + 1) == seqid) {
 3956                 if (stp->ls_op)
 3957                         nfsrvd_derefcache(stp->ls_op);
 3958                 stp->ls_op = op;
 3959                 nfsrvd_refcache(op);
 3960                 stp->ls_seq = seqid;
 3961                 goto out;
 3962         } else if (stp->ls_seq == seqid && stp->ls_op &&
 3963                 op->rc_xid == stp->ls_op->rc_xid &&
 3964                 op->rc_refcnt == 0 &&
 3965                 op->rc_reqlen == stp->ls_op->rc_reqlen &&
 3966                 op->rc_cksum == stp->ls_op->rc_cksum) {
 3967                 if (stp->ls_op->rc_flag & RC_INPROG) {
 3968                         error = NFSERR_DONTREPLY;
 3969                         goto out;
 3970                 }
 3971                 nd->nd_rp = stp->ls_op;
 3972                 nd->nd_rp->rc_flag |= RC_INPROG;
 3973                 nfsrvd_delcache(op);
 3974                 error = NFSERR_REPLYFROMCACHE;
 3975                 goto out;
 3976         }
 3977         error = NFSERR_BADSEQID;
 3978 
 3979 out:
 3980         NFSEXITCODE2(error, nd);
 3981         return (error);
 3982 }
 3983 
 3984 /*
 3985  * Get the client ip address for callbacks. If the strings can't be parsed,
 3986  * just set lc_program to 0 to indicate no callbacks are possible.
 3987  * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
 3988  *  the address to the client's transport address. This won't be used
 3989  *  for callbacks, but can be printed out by nfsstats for info.)
 3990  * Return error if the xdr can't be parsed, 0 otherwise.
 3991  */
 3992 APPLESTATIC int
 3993 nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
 3994 {
 3995         u_int32_t *tl;
 3996         u_char *cp, *cp2;
 3997         int i, j;
 3998         struct sockaddr_in *rad, *sad;
 3999         u_char protocol[5], addr[24];
 4000         int error = 0, cantparse = 0;
 4001         union {
 4002                 in_addr_t ival;
 4003                 u_char cval[4];
 4004         } ip;
 4005         union {
 4006                 in_port_t sval;
 4007                 u_char cval[2];
 4008         } port;
 4009 
 4010         rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
 4011         rad->sin_family = AF_INET;
 4012         rad->sin_len = sizeof (struct sockaddr_in);
 4013         rad->sin_addr.s_addr = 0;
 4014         rad->sin_port = 0;
 4015         clp->lc_req.nr_client = NULL;
 4016         clp->lc_req.nr_lock = 0;
 4017         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 4018         i = fxdr_unsigned(int, *tl);
 4019         if (i >= 3 && i <= 4) {
 4020                 error = nfsrv_mtostr(nd, protocol, i);
 4021                 if (error)
 4022                         goto nfsmout;
 4023                 if (!strcmp(protocol, "tcp")) {
 4024                         clp->lc_flags |= LCL_TCPCALLBACK;
 4025                         clp->lc_req.nr_sotype = SOCK_STREAM;
 4026                         clp->lc_req.nr_soproto = IPPROTO_TCP;
 4027                 } else if (!strcmp(protocol, "udp")) {
 4028                         clp->lc_req.nr_sotype = SOCK_DGRAM;
 4029                         clp->lc_req.nr_soproto = IPPROTO_UDP;
 4030                 } else {
 4031                         cantparse = 1;
 4032                 }
 4033         } else {
 4034                 cantparse = 1;
 4035                 if (i > 0) {
 4036                         error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 4037                         if (error)
 4038                                 goto nfsmout;
 4039                 }
 4040         }
 4041         NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 4042         i = fxdr_unsigned(int, *tl);
 4043         if (i < 0) {
 4044                 error = NFSERR_BADXDR;
 4045                 goto nfsmout;
 4046         } else if (i == 0) {
 4047                 cantparse = 1;
 4048         } else if (!cantparse && i <= 23 && i >= 11) {
 4049                 error = nfsrv_mtostr(nd, addr, i);
 4050                 if (error)
 4051                         goto nfsmout;
 4052 
 4053                 /*
 4054                  * Parse out the address fields. We expect 6 decimal numbers
 4055                  * separated by '.'s.
 4056                  */
 4057                 cp = addr;
 4058                 i = 0;
 4059                 while (*cp && i < 6) {
 4060                         cp2 = cp;
 4061                         while (*cp2 && *cp2 != '.')
 4062                                 cp2++;
 4063                         if (*cp2)
 4064                                 *cp2++ = '\0';
 4065                         else if (i != 5) {
 4066                                 cantparse = 1;
 4067                                 break;
 4068                         }
 4069                         j = nfsrv_getipnumber(cp);
 4070                         if (j >= 0) {
 4071                                 if (i < 4)
 4072                                         ip.cval[3 - i] = j;
 4073                                 else
 4074                                         port.cval[5 - i] = j;
 4075                         } else {
 4076                                 cantparse = 1;
 4077                                 break;
 4078                         }
 4079                         cp = cp2;
 4080                         i++;
 4081                 }
 4082                 if (!cantparse) {
 4083                         if (ip.ival != 0x0) {
 4084                                 rad->sin_addr.s_addr = htonl(ip.ival);
 4085                                 rad->sin_port = htons(port.sval);
 4086                         } else {
 4087                                 cantparse = 1;
 4088                         }
 4089                 }
 4090         } else {
 4091                 cantparse = 1;
 4092                 if (i > 0) {
 4093                         error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 4094                         if (error)
 4095                                 goto nfsmout;
 4096                 }
 4097         }
 4098         if (cantparse) {
 4099                 sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
 4100                 if (sad->sin_family == AF_INET) {
 4101                         rad->sin_addr.s_addr = sad->sin_addr.s_addr;
 4102                         rad->sin_port = 0x0;
 4103                 }
 4104                 clp->lc_program = 0;
 4105         }
 4106 nfsmout:
 4107         NFSEXITCODE2(error, nd);
 4108         return (error);
 4109 }
 4110 
 4111 /*
 4112  * Turn a string of up to three decimal digits into a number. Return -1 upon
 4113  * error.
 4114  */
 4115 static int
 4116 nfsrv_getipnumber(u_char *cp)
 4117 {
 4118         int i = 0, j = 0;
 4119 
 4120         while (*cp) {
 4121                 if (j > 2 || *cp < '' || *cp > '9')
 4122                         return (-1);
 4123                 i *= 10;
 4124                 i += (*cp - '');
 4125                 cp++;
 4126                 j++;
 4127         }
 4128         if (i < 256)
 4129                 return (i);
 4130         return (-1);
 4131 }
 4132 
 4133 /*
 4134  * This function checks for restart conditions.
 4135  */
 4136 static int
 4137 nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
 4138     nfsv4stateid_t *stateidp, int specialid)
 4139 {
 4140         int ret = 0;
 4141 
 4142         /*
 4143          * First check for a server restart. Open, LockT, ReleaseLockOwner
 4144          * and DelegPurge have a clientid, the rest a stateid.
 4145          */
 4146         if (flags &
 4147             (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
 4148                 if (clientid.lval[0] != nfsrvboottime) {
 4149                         ret = NFSERR_STALECLIENTID;
 4150                         goto out;
 4151                 }
 4152         } else if (stateidp->other[0] != nfsrvboottime &&
 4153                 specialid == 0) {
 4154                 ret = NFSERR_STALESTATEID;
 4155                 goto out;
 4156         }
 4157 
 4158         /*
 4159          * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
 4160          * not use a lock/open owner seqid#, so the check can be done now.
 4161          * (The others will be checked, as required, later.)
 4162          */
 4163         if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
 4164                 goto out;
 4165 
 4166         NFSLOCKSTATE();
 4167         ret = nfsrv_checkgrace(NULL, NULL, flags);
 4168         NFSUNLOCKSTATE();
 4169 
 4170 out:
 4171         NFSEXITCODE(ret);
 4172         return (ret);
 4173 }
 4174 
 4175 /*
 4176  * Check for grace.
 4177  */
 4178 static int
 4179 nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
 4180     u_int32_t flags)
 4181 {
 4182         int error = 0, notreclaimed;
 4183         struct nfsrv_stable *sp;
 4184 
 4185         if ((nfsrv_stablefirst.nsf_flags & (NFSNSF_UPDATEDONE |
 4186              NFSNSF_GRACEOVER)) == 0) {
 4187                 /*
 4188                  * First, check to see if all of the clients have done a
 4189                  * ReclaimComplete.  If so, grace can end now.
 4190                  */
 4191                 notreclaimed = 0;
 4192                 LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 4193                         if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
 4194                                 notreclaimed = 1;
 4195                                 break;
 4196                         }
 4197                 }
 4198                 if (notreclaimed == 0)
 4199                         nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER |
 4200                             NFSNSF_NEEDLOCK);
 4201         }
 4202 
 4203         if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) {
 4204                 if (flags & NFSLCK_RECLAIM) {
 4205                         error = NFSERR_NOGRACE;
 4206                         goto out;
 4207                 }
 4208         } else {
 4209                 if (!(flags & NFSLCK_RECLAIM)) {
 4210                         error = NFSERR_GRACE;
 4211                         goto out;
 4212                 }
 4213                 if (nd != NULL && clp != NULL &&
 4214                     (nd->nd_flag & ND_NFSV41) != 0 &&
 4215                     (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
 4216                         error = NFSERR_NOGRACE;
 4217                         goto out;
 4218                 }
 4219 
 4220                 /*
 4221                  * If grace is almost over and we are still getting Reclaims,
 4222                  * extend grace a bit.
 4223                  */
 4224                 if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
 4225                     nfsrv_stablefirst.nsf_eograce)
 4226                         nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC +
 4227                                 NFSRV_LEASEDELTA;
 4228         }
 4229 
 4230 out:
 4231         NFSEXITCODE(error);
 4232         return (error);
 4233 }
 4234 
 4235 /*
 4236  * Do a server callback.
 4237  * The "trunc" argument is slightly overloaded and refers to different
 4238  * boolean arguments for CBRECALL and CBLAYOUTRECALL.
 4239  */
 4240 static int
 4241 nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
 4242     int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp,
 4243     int laytype, NFSPROC_T *p)
 4244 {
 4245         mbuf_t m;
 4246         u_int32_t *tl;
 4247         struct nfsrv_descript *nd;
 4248         struct ucred *cred;
 4249         int error = 0;
 4250         u_int32_t callback;
 4251         struct nfsdsession *sep = NULL;
 4252         uint64_t tval;
 4253 
 4254         nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 4255         cred = newnfs_getcred();
 4256         NFSLOCKSTATE(); /* mostly for lc_cbref++ */
 4257         if (clp->lc_flags & LCL_NEEDSCONFIRM) {
 4258                 NFSUNLOCKSTATE();
 4259                 panic("docallb");
 4260         }
 4261         clp->lc_cbref++;
 4262 
 4263         /*
 4264          * Fill the callback program# and version into the request
 4265          * structure for newnfs_connect() to use.
 4266          */
 4267         clp->lc_req.nr_prog = clp->lc_program;
 4268 #ifdef notnow
 4269         if ((clp->lc_flags & LCL_NFSV41) != 0)
 4270                 clp->lc_req.nr_vers = NFSV41_CBVERS;
 4271         else
 4272 #endif
 4273                 clp->lc_req.nr_vers = NFSV4_CBVERS;
 4274 
 4275         /*
 4276          * First, fill in some of the fields of nd and cr.
 4277          */
 4278         nd->nd_flag = ND_NFSV4;
 4279         if (clp->lc_flags & LCL_GSS)
 4280                 nd->nd_flag |= ND_KERBV;
 4281         if ((clp->lc_flags & LCL_NFSV41) != 0)
 4282                 nd->nd_flag |= ND_NFSV41;
 4283         nd->nd_repstat = 0;
 4284         cred->cr_uid = clp->lc_uid;
 4285         cred->cr_gid = clp->lc_gid;
 4286         callback = clp->lc_callback;
 4287         NFSUNLOCKSTATE();
 4288         cred->cr_ngroups = 1;
 4289 
 4290         /*
 4291          * Get the first mbuf for the request.
 4292          */
 4293         MGET(m, M_WAITOK, MT_DATA);
 4294         mbuf_setlen(m, 0);
 4295         nd->nd_mreq = nd->nd_mb = m;
 4296         nd->nd_bpos = NFSMTOD(m, caddr_t);
 4297         
 4298         /*
 4299          * and build the callback request.
 4300          */
 4301         if (procnum == NFSV4OP_CBGETATTR) {
 4302                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 4303                 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
 4304                     "CB Getattr", &sep);
 4305                 if (error != 0) {
 4306                         mbuf_freem(nd->nd_mreq);
 4307                         goto errout;
 4308                 }
 4309                 (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
 4310                 (void)nfsrv_putattrbit(nd, attrbitp);
 4311         } else if (procnum == NFSV4OP_CBRECALL) {
 4312                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 4313                 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
 4314                     "CB Recall", &sep);
 4315                 if (error != 0) {
 4316                         mbuf_freem(nd->nd_mreq);
 4317                         goto errout;
 4318                 }
 4319                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 4320                 *tl++ = txdr_unsigned(stateidp->seqid);
 4321                 NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
 4322                     NFSX_STATEIDOTHER);
 4323                 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 4324                 if (trunc)
 4325                         *tl = newnfs_true;
 4326                 else
 4327                         *tl = newnfs_false;
 4328                 (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
 4329         } else if (procnum == NFSV4OP_CBLAYOUTRECALL) {
 4330                 NFSD_DEBUG(4, "docallback layout recall\n");
 4331                 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
 4332                 error = nfsrv_cbcallargs(nd, clp, callback,
 4333                     NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep);
 4334                 NFSD_DEBUG(4, "aft cbcallargs=%d\n", error);
 4335                 if (error != 0) {
 4336                         mbuf_freem(nd->nd_mreq);
 4337                         goto errout;
 4338                 }
 4339                 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 4340                 *tl++ = txdr_unsigned(laytype);
 4341                 *tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY);
 4342                 if (trunc)
 4343                         *tl++ = newnfs_true;
 4344                 else
 4345                         *tl++ = newnfs_false;
 4346                 *tl = txdr_unsigned(NFSV4LAYOUTRET_FILE);
 4347                 nfsm_fhtom(nd, (uint8_t *)fhp, NFSX_MYFH, 0);
 4348                 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID);
 4349                 tval = 0;
 4350                 txdr_hyper(tval, tl); tl += 2;
 4351                 tval = UINT64_MAX;
 4352                 txdr_hyper(tval, tl); tl += 2;
 4353                 *tl++ = txdr_unsigned(stateidp->seqid);
 4354                 NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER);
 4355                 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 4356                 NFSD_DEBUG(4, "aft args\n");
 4357         } else if (procnum == NFSV4PROC_CBNULL) {
 4358                 nd->nd_procnum = NFSV4PROC_CBNULL;
 4359                 if ((clp->lc_flags & LCL_NFSV41) != 0) {
 4360                         error = nfsv4_getcbsession(clp, &sep);
 4361                         if (error != 0) {
 4362                                 mbuf_freem(nd->nd_mreq);
 4363                                 goto errout;
 4364                         }
 4365                 }
 4366         } else {
 4367                 error = NFSERR_SERVERFAULT;
 4368                 mbuf_freem(nd->nd_mreq);
 4369                 goto errout;
 4370         }
 4371 
 4372         /*
 4373          * Call newnfs_connect(), as required, and then newnfs_request().
 4374          */
 4375         (void) newnfs_sndlock(&clp->lc_req.nr_lock);
 4376         if (clp->lc_req.nr_client == NULL) {
 4377                 if ((clp->lc_flags & LCL_NFSV41) != 0) {
 4378                         error = ECONNREFUSED;
 4379                         nfsrv_freesession(sep, NULL);
 4380                 } else if (nd->nd_procnum == NFSV4PROC_CBNULL)
 4381                         error = newnfs_connect(NULL, &clp->lc_req, cred,
 4382                             NULL, 1);
 4383                 else
 4384                         error = newnfs_connect(NULL, &clp->lc_req, cred,
 4385                             NULL, 3);
 4386         }
 4387         newnfs_sndunlock(&clp->lc_req.nr_lock);
 4388         NFSD_DEBUG(4, "aft sndunlock=%d\n", error);
 4389         if (!error) {
 4390                 if ((nd->nd_flag & ND_NFSV41) != 0) {
 4391                         KASSERT(sep != NULL, ("sep NULL"));
 4392                         if (sep->sess_cbsess.nfsess_xprt != NULL)
 4393                                 error = newnfs_request(nd, NULL, clp,
 4394                                     &clp->lc_req, NULL, NULL, cred,
 4395                                     clp->lc_program, clp->lc_req.nr_vers, NULL,
 4396                                     1, NULL, &sep->sess_cbsess);
 4397                         else {
 4398                                 /*
 4399                                  * This should probably never occur, but if a
 4400                                  * client somehow does an RPC without a
 4401                                  * SequenceID Op that causes a callback just
 4402                                  * after the nfsd threads have been terminated
 4403                                  * and restared we could conceivably get here
 4404                                  * without a backchannel xprt.
 4405                                  */
 4406                                 printf("nfsrv_docallback: no xprt\n");
 4407                                 error = ECONNREFUSED;
 4408                         }
 4409                         NFSD_DEBUG(4, "aft newnfs_request=%d\n", error);
 4410                         nfsrv_freesession(sep, NULL);
 4411                 } else
 4412                         error = newnfs_request(nd, NULL, clp, &clp->lc_req,
 4413                             NULL, NULL, cred, clp->lc_program,
 4414                             clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
 4415         }
 4416 errout:
 4417         NFSFREECRED(cred);
 4418 
 4419         /*
 4420          * If error is set here, the Callback path isn't working
 4421          * properly, so twiddle the appropriate LCL_ flags.
 4422          * (nd_repstat != 0 indicates the Callback path is working,
 4423          *  but the callback failed on the client.)
 4424          */
 4425         if (error) {
 4426                 /*
 4427                  * Mark the callback pathway down, which disabled issuing
 4428                  * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
 4429                  */
 4430                 NFSLOCKSTATE();
 4431                 clp->lc_flags |= LCL_CBDOWN;
 4432                 NFSUNLOCKSTATE();
 4433         } else {
 4434                 /*
 4435                  * Callback worked. If the callback path was down, disable
 4436                  * callbacks, so no more delegations will be issued. (This
 4437                  * is done on the assumption that the callback pathway is
 4438                  * flakey.)
 4439                  */
 4440                 NFSLOCKSTATE();
 4441                 if (clp->lc_flags & LCL_CBDOWN)
 4442                         clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
 4443                 NFSUNLOCKSTATE();
 4444                 if (nd->nd_repstat) {
 4445                         error = nd->nd_repstat;
 4446                         NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n",
 4447                             procnum, error);
 4448                 } else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
 4449                         error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 4450                             NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
 4451                             p, NULL);
 4452                 mbuf_freem(nd->nd_mrep);
 4453         }
 4454         NFSLOCKSTATE();
 4455         clp->lc_cbref--;
 4456         if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
 4457                 clp->lc_flags &= ~LCL_WAKEUPWANTED;
 4458                 wakeup(clp);
 4459         }
 4460         NFSUNLOCKSTATE();
 4461 
 4462         free(nd, M_TEMP);
 4463         NFSEXITCODE(error);
 4464         return (error);
 4465 }
 4466 
 4467 /*
 4468  * Set up the compound RPC for the callback.
 4469  */
 4470 static int
 4471 nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
 4472     uint32_t callback, int op, const char *optag, struct nfsdsession **sepp)
 4473 {
 4474         uint32_t *tl;
 4475         int error, len;
 4476 
 4477         len = strlen(optag);
 4478         (void)nfsm_strtom(nd, optag, len);
 4479         NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 4480         if ((nd->nd_flag & ND_NFSV41) != 0) {
 4481                 *tl++ = txdr_unsigned(NFSV41_MINORVERSION);
 4482                 *tl++ = txdr_unsigned(callback);
 4483                 *tl++ = txdr_unsigned(2);
 4484                 *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
 4485                 error = nfsv4_setcbsequence(nd, clp, 1, sepp);
 4486                 if (error != 0)
 4487                         return (error);
 4488                 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 4489                 *tl = txdr_unsigned(op);
 4490         } else {
 4491                 *tl++ = txdr_unsigned(NFSV4_MINORVERSION);
 4492                 *tl++ = txdr_unsigned(callback);
 4493                 *tl++ = txdr_unsigned(1);
 4494                 *tl = txdr_unsigned(op);
 4495         }
 4496         return (0);
 4497 }
 4498 
 4499 /*
 4500  * Return the next index# for a clientid. Mostly just increment and return
 4501  * the next one, but... if the 32bit unsigned does actually wrap around,
 4502  * it should be rebooted.
 4503  * At an average rate of one new client per second, it will wrap around in
 4504  * approximately 136 years. (I think the server will have been shut
 4505  * down or rebooted before then.)
 4506  */
 4507 static u_int32_t
 4508 nfsrv_nextclientindex(void)
 4509 {
 4510         static u_int32_t client_index = 0;
 4511 
 4512         client_index++;
 4513         if (client_index != 0)
 4514                 return (client_index);
 4515 
 4516         printf("%s: out of clientids\n", __func__);
 4517         return (client_index);
 4518 }
 4519 
 4520 /*
 4521  * Return the next index# for a stateid. Mostly just increment and return
 4522  * the next one, but... if the 32bit unsigned does actually wrap around
 4523  * (will a BSD server stay up that long?), find
 4524  * new start and end values.
 4525  */
 4526 static u_int32_t
 4527 nfsrv_nextstateindex(struct nfsclient *clp)
 4528 {
 4529         struct nfsstate *stp;
 4530         int i;
 4531         u_int32_t canuse, min_index, max_index;
 4532 
 4533         if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
 4534                 clp->lc_stateindex++;
 4535                 if (clp->lc_stateindex != clp->lc_statemaxindex)
 4536                         return (clp->lc_stateindex);
 4537         }
 4538 
 4539         /*
 4540          * Yuck, we've hit the end.
 4541          * Look for a new min and max.
 4542          */
 4543         min_index = 0;
 4544         max_index = 0xffffffff;
 4545         for (i = 0; i < nfsrv_statehashsize; i++) {
 4546             LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 4547                 if (stp->ls_stateid.other[2] > 0x80000000) {
 4548                     if (stp->ls_stateid.other[2] < max_index)
 4549                         max_index = stp->ls_stateid.other[2];
 4550                 } else {
 4551                     if (stp->ls_stateid.other[2] > min_index)
 4552                         min_index = stp->ls_stateid.other[2];
 4553                 }
 4554             }
 4555         }
 4556 
 4557         /*
 4558          * Yikes, highly unlikely, but I'll handle it anyhow.
 4559          */
 4560         if (min_index == 0x80000000 && max_index == 0x80000001) {
 4561             canuse = 0;
 4562             /*
 4563              * Loop around until we find an unused entry. Return that
 4564              * and set LCL_INDEXNOTOK, so the search will continue next time.
 4565              * (This is one of those rare cases where a goto is the
 4566              *  cleanest way to code the loop.)
 4567              */
 4568 tryagain:
 4569             for (i = 0; i < nfsrv_statehashsize; i++) {
 4570                 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
 4571                     if (stp->ls_stateid.other[2] == canuse) {
 4572                         canuse++;
 4573                         goto tryagain;
 4574                     }
 4575                 }
 4576             }
 4577             clp->lc_flags |= LCL_INDEXNOTOK;
 4578             return (canuse);
 4579         }
 4580 
 4581         /*
 4582          * Ok to start again from min + 1.
 4583          */
 4584         clp->lc_stateindex = min_index + 1;
 4585         clp->lc_statemaxindex = max_index;
 4586         clp->lc_flags &= ~LCL_INDEXNOTOK;
 4587         return (clp->lc_stateindex);
 4588 }
 4589 
 4590 /*
 4591  * The following functions handle the stable storage file that deals with
 4592  * the edge conditions described in RFC3530 Sec. 8.6.3.
 4593  * The file is as follows:
 4594  * - a single record at the beginning that has the lease time of the
 4595  *   previous server instance (before the last reboot) and the nfsrvboottime
 4596  *   values for the previous server boots.
 4597  *   These previous boot times are used to ensure that the current
 4598  *   nfsrvboottime does not, somehow, get set to a previous one.
 4599  *   (This is important so that Stale ClientIDs and StateIDs can
 4600  *    be recognized.)
 4601  *   The number of previous nfsvrboottime values precedes the list.
 4602  * - followed by some number of appended records with:
 4603  *   - client id string
 4604  *   - flag that indicates it is a record revoking state via lease
 4605  *     expiration or similar
 4606  *     OR has successfully acquired state.
 4607  * These structures vary in length, with the client string at the end, up
 4608  * to NFSV4_OPAQUELIMIT in size.
 4609  *
 4610  * At the end of the grace period, the file is truncated, the first
 4611  * record is rewritten with updated information and any acquired state
 4612  * records for successful reclaims of state are written.
 4613  *
 4614  * Subsequent records are appended when the first state is issued to
 4615  * a client and when state is revoked for a client.
 4616  *
 4617  * When reading the file in, state issued records that come later in
 4618  * the file override older ones, since the append log is in cronological order.
 4619  * If, for some reason, the file can't be read, the grace period is
 4620  * immediately terminated and all reclaims get NFSERR_NOGRACE.
 4621  */
 4622 
 4623 /*
 4624  * Read in the stable storage file. Called by nfssvc() before the nfsd
 4625  * processes start servicing requests.
 4626  */
 4627 APPLESTATIC void
 4628 nfsrv_setupstable(NFSPROC_T *p)
 4629 {
 4630         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 4631         struct nfsrv_stable *sp, *nsp;
 4632         struct nfst_rec *tsp;
 4633         int error, i, tryagain;
 4634         off_t off = 0;
 4635         ssize_t aresid, len;
 4636 
 4637         /*
 4638          * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
 4639          * a reboot, so state has not been lost.
 4640          */
 4641         if (sf->nsf_flags & NFSNSF_UPDATEDONE)
 4642                 return;
 4643         /*
 4644          * Set Grace over just until the file reads successfully.
 4645          */
 4646         nfsrvboottime = time_second;
 4647         LIST_INIT(&sf->nsf_head);
 4648         sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
 4649         sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
 4650         if (sf->nsf_fp == NULL)
 4651                 return;
 4652         error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 4653             (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
 4654             0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 4655         if (error || aresid || sf->nsf_numboots == 0 ||
 4656                 sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
 4657                 return;
 4658 
 4659         /*
 4660          * Now, read in the boottimes.
 4661          */
 4662         sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
 4663                 sizeof (time_t), M_TEMP, M_WAITOK);
 4664         off = sizeof (struct nfsf_rec);
 4665         error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 4666             (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
 4667             UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 4668         if (error || aresid) {
 4669                 free(sf->nsf_bootvals, M_TEMP);
 4670                 sf->nsf_bootvals = NULL;
 4671                 return;
 4672         }
 4673 
 4674         /*
 4675          * Make sure this nfsrvboottime is different from all recorded
 4676          * previous ones.
 4677          */
 4678         do {
 4679                 tryagain = 0;
 4680                 for (i = 0; i < sf->nsf_numboots; i++) {
 4681                         if (nfsrvboottime == sf->nsf_bootvals[i]) {
 4682                                 nfsrvboottime++;
 4683                                 tryagain = 1;
 4684                                 break;
 4685                         }
 4686                 }
 4687         } while (tryagain);
 4688 
 4689         sf->nsf_flags |= NFSNSF_OK;
 4690         off += (sf->nsf_numboots * sizeof (time_t));
 4691 
 4692         /*
 4693          * Read through the file, building a list of records for grace
 4694          * checking.
 4695          * Each record is between sizeof (struct nfst_rec) and
 4696          * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
 4697          * and is actually sizeof (struct nfst_rec) + nst_len - 1.
 4698          */
 4699         tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
 4700                 NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
 4701         do {
 4702             error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
 4703                 (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
 4704                 off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
 4705             len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
 4706             if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
 4707                 len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
 4708                 /*
 4709                  * Yuck, the file has been corrupted, so just return
 4710                  * after clearing out any restart state, so the grace period
 4711                  * is over.
 4712                  */
 4713                 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
 4714                         LIST_REMOVE(sp, nst_list);
 4715                         free(sp, M_TEMP);
 4716                 }
 4717                 free(tsp, M_TEMP);
 4718                 sf->nsf_flags &= ~NFSNSF_OK;
 4719                 free(sf->nsf_bootvals, M_TEMP);
 4720                 sf->nsf_bootvals = NULL;
 4721                 return;
 4722             }
 4723             if (len > 0) {
 4724                 off += sizeof (struct nfst_rec) + tsp->len - 1;
 4725                 /*
 4726                  * Search the list for a matching client.
 4727                  */
 4728                 LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
 4729                         if (tsp->len == sp->nst_len &&
 4730                             !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
 4731                                 break;
 4732                 }
 4733                 if (sp == LIST_END(&sf->nsf_head)) {
 4734                         sp = (struct nfsrv_stable *)malloc(tsp->len +
 4735                                 sizeof (struct nfsrv_stable) - 1, M_TEMP,
 4736                                 M_WAITOK);
 4737                         NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
 4738                                 sizeof (struct nfst_rec) + tsp->len - 1);
 4739                         LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
 4740                 } else {
 4741                         if (tsp->flag == NFSNST_REVOKE)
 4742                                 sp->nst_flag |= NFSNST_REVOKE;
 4743                         else
 4744                                 /*
 4745                                  * A subsequent timestamp indicates the client
 4746                                  * did a setclientid/confirm and any previous
 4747                                  * revoke is no longer relevant.
 4748                                  */
 4749                                 sp->nst_flag &= ~NFSNST_REVOKE;
 4750                 }
 4751             }
 4752         } while (len > 0);
 4753         free(tsp, M_TEMP);
 4754         sf->nsf_flags = NFSNSF_OK;
 4755         sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
 4756                 NFSRV_LEASEDELTA;
 4757 }
 4758 
 4759 /*
 4760  * Update the stable storage file, now that the grace period is over.
 4761  */
 4762 APPLESTATIC void
 4763 nfsrv_updatestable(NFSPROC_T *p)
 4764 {
 4765         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 4766         struct nfsrv_stable *sp, *nsp;
 4767         int i;
 4768         struct nfsvattr nva;
 4769         vnode_t vp;
 4770 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
 4771         mount_t mp = NULL;
 4772 #endif
 4773         int error;
 4774 
 4775         if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
 4776                 return;
 4777         sf->nsf_flags |= NFSNSF_UPDATEDONE;
 4778         /*
 4779          * Ok, we need to rewrite the stable storage file.
 4780          * - truncate to 0 length
 4781          * - write the new first structure
 4782          * - loop through the data structures, writing out any that
 4783          *   have timestamps older than the old boot
 4784          */
 4785         if (sf->nsf_bootvals) {
 4786                 sf->nsf_numboots++;
 4787                 for (i = sf->nsf_numboots - 2; i >= 0; i--)
 4788                         sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
 4789         } else {
 4790                 sf->nsf_numboots = 1;
 4791                 sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t),
 4792                         M_TEMP, M_WAITOK);
 4793         }
 4794         sf->nsf_bootvals[0] = nfsrvboottime;
 4795         sf->nsf_lease = nfsrv_lease;
 4796         NFSVNO_ATTRINIT(&nva);
 4797         NFSVNO_SETATTRVAL(&nva, size, 0);
 4798         vp = NFSFPVNODE(sf->nsf_fp);
 4799         vn_start_write(vp, &mp, V_WAIT);
 4800         if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
 4801                 error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
 4802                     NULL);
 4803                 NFSVOPUNLOCK(vp, 0);
 4804         } else
 4805                 error = EPERM;
 4806         vn_finished_write(mp);
 4807         if (!error)
 4808             error = NFSD_RDWR(UIO_WRITE, vp,
 4809                 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
 4810                 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
 4811         if (!error)
 4812             error = NFSD_RDWR(UIO_WRITE, vp,
 4813                 (caddr_t)sf->nsf_bootvals,
 4814                 sf->nsf_numboots * sizeof (time_t),
 4815                 (off_t)(sizeof (struct nfsf_rec)),
 4816                 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
 4817         free(sf->nsf_bootvals, M_TEMP);
 4818         sf->nsf_bootvals = NULL;
 4819         if (error) {
 4820                 sf->nsf_flags &= ~NFSNSF_OK;
 4821                 printf("EEK! Can't write NfsV4 stable storage file\n");
 4822                 return;
 4823         }
 4824         sf->nsf_flags |= NFSNSF_OK;
 4825 
 4826         /*
 4827          * Loop through the list and write out timestamp records for
 4828          * any clients that successfully reclaimed state.
 4829          */
 4830         LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
 4831                 if (sp->nst_flag & NFSNST_GOTSTATE) {
 4832                         nfsrv_writestable(sp->nst_client, sp->nst_len,
 4833                                 NFSNST_NEWSTATE, p);
 4834                         sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
 4835                 }
 4836                 LIST_REMOVE(sp, nst_list);
 4837                 free(sp, M_TEMP);
 4838         }
 4839         nfsrv_backupstable();
 4840 }
 4841 
 4842 /*
 4843  * Append a record to the stable storage file.
 4844  */
 4845 APPLESTATIC void
 4846 nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
 4847 {
 4848         struct nfsrv_stablefirst *sf = &nfsrv_stablefirst;
 4849         struct nfst_rec *sp;
 4850         int error;
 4851 
 4852         if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
 4853                 return;
 4854         sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
 4855                 len - 1, M_TEMP, M_WAITOK);
 4856         sp->len = len;
 4857         NFSBCOPY(client, sp->client, len);
 4858         sp->flag = flag;
 4859         error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
 4860             (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
 4861             UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
 4862         free(sp, M_TEMP);
 4863         if (error) {
 4864                 sf->nsf_flags &= ~NFSNSF_OK;
 4865                 printf("EEK! Can't write NfsV4 stable storage file\n");
 4866         }
 4867 }
 4868 
 4869 /*
 4870  * This function is called during the grace period to mark a client
 4871  * that successfully reclaimed state.
 4872  */
 4873 static void
 4874 nfsrv_markstable(struct nfsclient *clp)
 4875 {
 4876         struct nfsrv_stable *sp;
 4877 
 4878         /*
 4879          * First find the client structure.
 4880          */
 4881         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 4882                 if (sp->nst_len == clp->lc_idlen &&
 4883                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 4884                         break;
 4885         }
 4886         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
 4887                 return;
 4888 
 4889         /*
 4890          * Now, just mark it and set the nfsclient back pointer.
 4891          */
 4892         sp->nst_flag |= NFSNST_GOTSTATE;
 4893         sp->nst_clp = clp;
 4894 }
 4895 
 4896 /*
 4897  * This function is called when a NFSv4.1 client does a ReclaimComplete.
 4898  * Very similar to nfsrv_markstable(), except for the flag being set.
 4899  */
 4900 static void
 4901 nfsrv_markreclaim(struct nfsclient *clp)
 4902 {
 4903         struct nfsrv_stable *sp;
 4904 
 4905         /*
 4906          * First find the client structure.
 4907          */
 4908         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 4909                 if (sp->nst_len == clp->lc_idlen &&
 4910                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 4911                         break;
 4912         }
 4913         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head))
 4914                 return;
 4915 
 4916         /*
 4917          * Now, just set the flag.
 4918          */
 4919         sp->nst_flag |= NFSNST_RECLAIMED;
 4920 }
 4921 
 4922 /*
 4923  * This function is called for a reclaim, to see if it gets grace.
 4924  * It returns 0 if a reclaim is allowed, 1 otherwise.
 4925  */
 4926 static int
 4927 nfsrv_checkstable(struct nfsclient *clp)
 4928 {
 4929         struct nfsrv_stable *sp;
 4930 
 4931         /*
 4932          * First, find the entry for the client.
 4933          */
 4934         LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) {
 4935                 if (sp->nst_len == clp->lc_idlen &&
 4936                     !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
 4937                         break;
 4938         }
 4939 
 4940         /*
 4941          * If not in the list, state was revoked or no state was issued
 4942          * since the previous reboot, a reclaim is denied.
 4943          */
 4944         if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) ||
 4945             (sp->nst_flag & NFSNST_REVOKE) ||
 4946             !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK))
 4947                 return (1);
 4948         return (0);
 4949 }
 4950 
 4951 /*
 4952  * Test for and try to clear out a conflicting client. This is called by
 4953  * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
 4954  * a found.
 4955  * The trick here is that it can't revoke a conflicting client with an
 4956  * expired lease unless it holds the v4root lock, so...
 4957  * If no v4root lock, get the lock and return 1 to indicate "try again".
 4958  * Return 0 to indicate the conflict can't be revoked and 1 to indicate
 4959  * the revocation worked and the conflicting client is "bye, bye", so it
 4960  * can be tried again.
 4961  * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK().
 4962  * Unlocks State before a non-zero value is returned.
 4963  */
 4964 static int
 4965 nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
 4966     NFSPROC_T *p)
 4967 {
 4968         int gotlock, lktype = 0;
 4969 
 4970         /*
 4971          * If lease hasn't expired, we can't fix it.
 4972          */
 4973         if (clp->lc_expiry >= NFSD_MONOSEC ||
 4974             !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE))
 4975                 return (0);
 4976         if (*haslockp == 0) {
 4977                 NFSUNLOCKSTATE();
 4978                 if (vp != NULL) {
 4979                         lktype = NFSVOPISLOCKED(vp);
 4980                         NFSVOPUNLOCK(vp, 0);
 4981                 }
 4982                 NFSLOCKV4ROOTMUTEX();
 4983                 nfsv4_relref(&nfsv4rootfs_lock);
 4984                 do {
 4985                         gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 4986                             NFSV4ROOTLOCKMUTEXPTR, NULL);
 4987                 } while (!gotlock);
 4988                 NFSUNLOCKV4ROOTMUTEX();
 4989                 *haslockp = 1;
 4990                 if (vp != NULL) {
 4991                         NFSVOPLOCK(vp, lktype | LK_RETRY);
 4992                         if ((vp->v_iflag & VI_DOOMED) != 0)
 4993                                 return (2);
 4994                 }
 4995                 return (1);
 4996         }
 4997         NFSUNLOCKSTATE();
 4998 
 4999         /*
 5000          * Ok, we can expire the conflicting client.
 5001          */
 5002         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 5003         nfsrv_backupstable();
 5004         nfsrv_cleanclient(clp, p);
 5005         nfsrv_freedeleglist(&clp->lc_deleg);
 5006         nfsrv_freedeleglist(&clp->lc_olddeleg);
 5007         LIST_REMOVE(clp, lc_hash);
 5008         nfsrv_zapclient(clp, p);
 5009         return (1);
 5010 }
 5011 
 5012 /*
 5013  * Resolve a delegation conflict.
 5014  * Returns 0 to indicate the conflict was resolved without sleeping.
 5015  * Return -1 to indicate that the caller should check for conflicts again.
 5016  * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
 5017  *
 5018  * Also, manipulate the nfsv4root_lock, as required. It isn't changed
 5019  * for a return of 0, since there was no sleep and it could be required
 5020  * later. It is released for a return of NFSERR_DELAY, since the caller
 5021  * will return that error. It is released when a sleep was done waiting
 5022  * for the delegation to be returned or expire (so that other nfsds can
 5023  * handle ops). Then, it must be acquired for the write to stable storage.
 5024  * (This function is somewhat similar to nfsrv_clientconflict(), but
 5025  *  the semantics differ in a couple of subtle ways. The return of 0
 5026  *  indicates the conflict was resolved without sleeping here, not
 5027  *  that the conflict can't be resolved and the handling of nfsv4root_lock
 5028  *  differs, as noted above.)
 5029  * Unlocks State before returning a non-zero value.
 5030  */
 5031 static int
 5032 nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
 5033     vnode_t vp)
 5034 {
 5035         struct nfsclient *clp = stp->ls_clp;
 5036         int gotlock, error, lktype = 0, retrycnt, zapped_clp;
 5037         nfsv4stateid_t tstateid;
 5038         fhandle_t tfh;
 5039 
 5040         /*
 5041          * If the conflict is with an old delegation...
 5042          */
 5043         if (stp->ls_flags & NFSLCK_OLDDELEG) {
 5044                 /*
 5045                  * You can delete it, if it has expired.
 5046                  */
 5047                 if (clp->lc_delegtime < NFSD_MONOSEC) {
 5048                         nfsrv_freedeleg(stp);
 5049                         NFSUNLOCKSTATE();
 5050                         error = -1;
 5051                         goto out;
 5052                 }
 5053                 NFSUNLOCKSTATE();
 5054                 /*
 5055                  * During this delay, the old delegation could expire or it
 5056                  * could be recovered by the client via an Open with
 5057                  * CLAIM_DELEGATE_PREV.
 5058                  * Release the nfsv4root_lock, if held.
 5059                  */
 5060                 if (*haslockp) {
 5061                         *haslockp = 0;
 5062                         NFSLOCKV4ROOTMUTEX();
 5063                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5064                         NFSUNLOCKV4ROOTMUTEX();
 5065                 }
 5066                 error = NFSERR_DELAY;
 5067                 goto out;
 5068         }
 5069 
 5070         /*
 5071          * It's a current delegation, so:
 5072          * - check to see if the delegation has expired
 5073          *   - if so, get the v4root lock and then expire it
 5074          */
 5075         if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) {
 5076                 /*
 5077                  * - do a recall callback, since not yet done
 5078                  * For now, never allow truncate to be set. To use
 5079                  * truncate safely, it must be guaranteed that the
 5080                  * Remove, Rename or Setattr with size of 0 will
 5081                  * succeed and that would require major changes to
 5082                  * the VFS/Vnode OPs.
 5083                  * Set the expiry time large enough so that it won't expire
 5084                  * until after the callback, then set it correctly, once
 5085                  * the callback is done. (The delegation will now time
 5086                  * out whether or not the Recall worked ok. The timeout
 5087                  * will be extended when ops are done on the delegation
 5088                  * stateid, up to the timelimit.)
 5089                  */
 5090                 stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
 5091                     NFSRV_LEASEDELTA;
 5092                 stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) +
 5093                     NFSRV_LEASEDELTA;
 5094                 stp->ls_flags |= NFSLCK_DELEGRECALL;
 5095 
 5096                 /*
 5097                  * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
 5098                  * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
 5099                  * in order to try and avoid a race that could happen
 5100                  * when a CBRecall request passed the Open reply with
 5101                  * the delegation in it when transitting the network.
 5102                  * Since nfsrv_docallback will sleep, don't use stp after
 5103                  * the call.
 5104                  */
 5105                 NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
 5106                     sizeof (tstateid));
 5107                 NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
 5108                     sizeof (tfh));
 5109                 NFSUNLOCKSTATE();
 5110                 if (*haslockp) {
 5111                         *haslockp = 0;
 5112                         NFSLOCKV4ROOTMUTEX();
 5113                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5114                         NFSUNLOCKV4ROOTMUTEX();
 5115                 }
 5116                 retrycnt = 0;
 5117                 do {
 5118                     error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
 5119                         &tstateid, 0, &tfh, NULL, NULL, 0, p);
 5120                     retrycnt++;
 5121                 } while ((error == NFSERR_BADSTATEID ||
 5122                     error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
 5123                 error = NFSERR_DELAY;
 5124                 goto out;
 5125         }
 5126 
 5127         if (clp->lc_expiry >= NFSD_MONOSEC &&
 5128             stp->ls_delegtime >= NFSD_MONOSEC) {
 5129                 NFSUNLOCKSTATE();
 5130                 /*
 5131                  * A recall has been done, but it has not yet expired.
 5132                  * So, RETURN_DELAY.
 5133                  */
 5134                 if (*haslockp) {
 5135                         *haslockp = 0;
 5136                         NFSLOCKV4ROOTMUTEX();
 5137                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5138                         NFSUNLOCKV4ROOTMUTEX();
 5139                 }
 5140                 error = NFSERR_DELAY;
 5141                 goto out;
 5142         }
 5143 
 5144         /*
 5145          * If we don't yet have the lock, just get it and then return,
 5146          * since we need that before deleting expired state, such as
 5147          * this delegation.
 5148          * When getting the lock, unlock the vnode, so other nfsds that
 5149          * are in progress, won't get stuck waiting for the vnode lock.
 5150          */
 5151         if (*haslockp == 0) {
 5152                 NFSUNLOCKSTATE();
 5153                 if (vp != NULL) {
 5154                         lktype = NFSVOPISLOCKED(vp);
 5155                         NFSVOPUNLOCK(vp, 0);
 5156                 }
 5157                 NFSLOCKV4ROOTMUTEX();
 5158                 nfsv4_relref(&nfsv4rootfs_lock);
 5159                 do {
 5160                         gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 5161                             NFSV4ROOTLOCKMUTEXPTR, NULL);
 5162                 } while (!gotlock);
 5163                 NFSUNLOCKV4ROOTMUTEX();
 5164                 *haslockp = 1;
 5165                 if (vp != NULL) {
 5166                         NFSVOPLOCK(vp, lktype | LK_RETRY);
 5167                         if ((vp->v_iflag & VI_DOOMED) != 0) {
 5168                                 *haslockp = 0;
 5169                                 NFSLOCKV4ROOTMUTEX();
 5170                                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5171                                 NFSUNLOCKV4ROOTMUTEX();
 5172                                 error = NFSERR_PERM;
 5173                                 goto out;
 5174                         }
 5175                 }
 5176                 error = -1;
 5177                 goto out;
 5178         }
 5179 
 5180         NFSUNLOCKSTATE();
 5181         /*
 5182          * Ok, we can delete the expired delegation.
 5183          * First, write the Revoke record to stable storage and then
 5184          * clear out the conflict.
 5185          * Since all other nfsd threads are now blocked, we can safely
 5186          * sleep without the state changing.
 5187          */
 5188         nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
 5189         nfsrv_backupstable();
 5190         if (clp->lc_expiry < NFSD_MONOSEC) {
 5191                 nfsrv_cleanclient(clp, p);
 5192                 nfsrv_freedeleglist(&clp->lc_deleg);
 5193                 nfsrv_freedeleglist(&clp->lc_olddeleg);
 5194                 LIST_REMOVE(clp, lc_hash);
 5195                 zapped_clp = 1;
 5196         } else {
 5197                 nfsrv_freedeleg(stp);
 5198                 zapped_clp = 0;
 5199         }
 5200         if (zapped_clp)
 5201                 nfsrv_zapclient(clp, p);
 5202         error = -1;
 5203 
 5204 out:
 5205         NFSEXITCODE(error);
 5206         return (error);
 5207 }
 5208 
 5209 /*
 5210  * Check for a remove allowed, if remove is set to 1 and get rid of
 5211  * delegations.
 5212  */
 5213 APPLESTATIC int
 5214 nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p)
 5215 {
 5216         struct nfsstate *stp;
 5217         struct nfslockfile *lfp;
 5218         int error, haslock = 0;
 5219         fhandle_t nfh;
 5220 
 5221         /*
 5222          * First, get the lock file structure.
 5223          * (A return of -1 means no associated state, so remove ok.)
 5224          */
 5225         error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
 5226 tryagain:
 5227         NFSLOCKSTATE();
 5228         if (!error)
 5229                 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
 5230         if (error) {
 5231                 NFSUNLOCKSTATE();
 5232                 if (haslock) {
 5233                         NFSLOCKV4ROOTMUTEX();
 5234                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5235                         NFSUNLOCKV4ROOTMUTEX();
 5236                 }
 5237                 if (error == -1)
 5238                         error = 0;
 5239                 goto out;
 5240         }
 5241 
 5242         /*
 5243          * Now, we must Recall any delegations.
 5244          */
 5245         error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p);
 5246         if (error) {
 5247                 /*
 5248                  * nfsrv_cleandeleg() unlocks state for non-zero
 5249                  * return.
 5250                  */
 5251                 if (error == -1)
 5252                         goto tryagain;
 5253                 if (haslock) {
 5254                         NFSLOCKV4ROOTMUTEX();
 5255                         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5256                         NFSUNLOCKV4ROOTMUTEX();
 5257                 }
 5258                 goto out;
 5259         }
 5260 
 5261         /*
 5262          * Now, look for a conflicting open share.
 5263          */
 5264         if (remove) {
 5265                 /*
 5266                  * If the entry in the directory was the last reference to the
 5267                  * corresponding filesystem object, the object can be destroyed
 5268                  * */
 5269                 if(lfp->lf_usecount>1)
 5270                         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 5271                                 if (stp->ls_flags & NFSLCK_WRITEDENY) {
 5272                                         error = NFSERR_FILEOPEN;
 5273                                         break;
 5274                                 }
 5275                         }
 5276         }
 5277 
 5278         NFSUNLOCKSTATE();
 5279         if (haslock) {
 5280                 NFSLOCKV4ROOTMUTEX();
 5281                 nfsv4_unlock(&nfsv4rootfs_lock, 1);
 5282                 NFSUNLOCKV4ROOTMUTEX();
 5283         }
 5284 
 5285 out:
 5286         NFSEXITCODE(error);
 5287         return (error);
 5288 }
 5289 
 5290 /*
 5291  * Clear out all delegations for the file referred to by lfp.
 5292  * May return NFSERR_DELAY, if there will be a delay waiting for
 5293  * delegations to expire.
 5294  * Returns -1 to indicate it slept while recalling a delegation.
 5295  * This function has the side effect of deleting the nfslockfile structure,
 5296  * if it no longer has associated state and didn't have to sleep.
 5297  * Unlocks State before a non-zero value is returned.
 5298  */
 5299 static int
 5300 nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
 5301     struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
 5302 {
 5303         struct nfsstate *stp, *nstp;
 5304         int ret = 0;
 5305 
 5306         stp = LIST_FIRST(&lfp->lf_deleg);
 5307         while (stp != LIST_END(&lfp->lf_deleg)) {
 5308                 nstp = LIST_NEXT(stp, ls_file);
 5309                 if (stp->ls_clp != clp) {
 5310                         ret = nfsrv_delegconflict(stp, haslockp, p, vp);
 5311                         if (ret) {
 5312                                 /*
 5313                                  * nfsrv_delegconflict() unlocks state
 5314                                  * when it returns non-zero.
 5315                                  */
 5316                                 goto out;
 5317                         }
 5318                 }
 5319                 stp = nstp;
 5320         }
 5321 out:
 5322         NFSEXITCODE(ret);
 5323         return (ret);
 5324 }
 5325 
 5326 /*
 5327  * There are certain operations that, when being done outside of NFSv4,
 5328  * require that any NFSv4 delegation for the file be recalled.
 5329  * This function is to be called for those cases:
 5330  * VOP_RENAME() - When a delegation is being recalled for any reason,
 5331  *      the client may have to do Opens against the server, using the file's
 5332  *      final component name. If the file has been renamed on the server,
 5333  *      that component name will be incorrect and the Open will fail.
 5334  * VOP_REMOVE() - Theoretically, a client could Open a file after it has
 5335  *      been removed on the server, if there is a delegation issued to
 5336  *      that client for the file. I say "theoretically" since clients
 5337  *      normally do an Access Op before the Open and that Access Op will
 5338  *      fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
 5339  *      they will detect the file's removal in the same manner. (There is
 5340  *      one case where RFC3530 allows a client to do an Open without first
 5341  *      doing an Access Op, which is passage of a check against the ACE
 5342  *      returned with a Write delegation, but current practice is to ignore
 5343  *      the ACE and always do an Access Op.)
 5344  *      Since the functions can only be called with an unlocked vnode, this
 5345  *      can't be done at this time.
 5346  * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
 5347  *      locks locally in the client, which are not visible to the server. To
 5348  *      deal with this, issuing of delegations for a vnode must be disabled
 5349  *      and all delegations for the vnode recalled. This is done via the
 5350  *      second function, using the VV_DISABLEDELEG vflag on the vnode.
 5351  */
 5352 APPLESTATIC void
 5353 nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
 5354 {
 5355         time_t starttime;
 5356         int error;
 5357 
 5358         /*
 5359          * First, check to see if the server is currently running and it has
 5360          * been called for a regular file when issuing delegations.
 5361          */
 5362         if (newnfs_numnfsd == 0 || vp->v_type != VREG ||
 5363             nfsrv_issuedelegs == 0)
 5364                 return;
 5365 
 5366         KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
 5367         /*
 5368          * First, get a reference on the nfsv4rootfs_lock so that an
 5369          * exclusive lock cannot be acquired by another thread.
 5370          */
 5371         NFSLOCKV4ROOTMUTEX();
 5372         nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 5373         NFSUNLOCKV4ROOTMUTEX();
 5374 
 5375         /*
 5376          * Now, call nfsrv_checkremove() in a loop while it returns
 5377          * NFSERR_DELAY. Return upon any other error or when timed out.
 5378          */
 5379         starttime = NFSD_MONOSEC;
 5380         do {
 5381                 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
 5382                         error = nfsrv_checkremove(vp, 0, p);
 5383                         NFSVOPUNLOCK(vp, 0);
 5384                 } else
 5385                         error = EPERM;
 5386                 if (error == NFSERR_DELAY) {
 5387                         if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
 5388                                 break;
 5389                         /* Sleep for a short period of time */
 5390                         (void) nfs_catnap(PZERO, 0, "nfsremove");
 5391                 }
 5392         } while (error == NFSERR_DELAY);
 5393         NFSLOCKV4ROOTMUTEX();
 5394         nfsv4_relref(&nfsv4rootfs_lock);
 5395         NFSUNLOCKV4ROOTMUTEX();
 5396 }
 5397 
 5398 APPLESTATIC void
 5399 nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
 5400 {
 5401 
 5402 #ifdef VV_DISABLEDELEG
 5403         /*
 5404          * First, flag issuance of delegations disabled.
 5405          */
 5406         atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
 5407 #endif
 5408 
 5409         /*
 5410          * Then call nfsd_recalldelegation() to get rid of all extant
 5411          * delegations.
 5412          */
 5413         nfsd_recalldelegation(vp, p);
 5414 }
 5415 
 5416 /*
 5417  * Check for conflicting locks, etc. and then get rid of delegations.
 5418  * (At one point I thought that I should get rid of delegations for any
 5419  *  Setattr, since it could potentially disallow the I/O op (read or write)
 5420  *  allowed by the delegation. However, Setattr Ops that aren't changing
 5421  *  the size get a stateid of all 0s, so you can't tell if it is a delegation
 5422  *  for the same client or a different one, so I decided to only get rid
 5423  *  of delegations for other clients when the size is being changed.)
 5424  * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
 5425  * as Write backs, even if there is no delegation, so it really isn't any
 5426  * different?)
 5427  */
 5428 APPLESTATIC int
 5429 nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
 5430     nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
 5431     struct nfsexstuff *exp, NFSPROC_T *p)
 5432 {
 5433         struct nfsstate st, *stp = &st;
 5434         struct nfslock lo, *lop = &lo;
 5435         int error = 0;
 5436         nfsquad_t clientid;
 5437 
 5438         if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
 5439                 stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
 5440                 lop->lo_first = nvap->na_size;
 5441         } else {
 5442                 stp->ls_flags = 0;
 5443                 lop->lo_first = 0;
 5444         }
 5445         if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
 5446             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
 5447             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
 5448             NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
 5449                 stp->ls_flags |= NFSLCK_SETATTR;
 5450         if (stp->ls_flags == 0)
 5451                 goto out;
 5452         lop->lo_end = NFS64BITSSET;
 5453         lop->lo_flags = NFSLCK_WRITE;
 5454         stp->ls_ownerlen = 0;
 5455         stp->ls_op = NULL;
 5456         stp->ls_uid = nd->nd_cred->cr_uid;
 5457         stp->ls_stateid.seqid = stateidp->seqid;
 5458         clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
 5459         clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
 5460         stp->ls_stateid.other[2] = stateidp->other[2];
 5461         error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 5462             stateidp, exp, nd, p);
 5463 
 5464 out:
 5465         NFSEXITCODE2(error, nd);
 5466         return (error);
 5467 }
 5468 
 5469 /*
 5470  * Check for a write delegation and do a CBGETATTR if there is one, updating
 5471  * the attributes, as required.
 5472  * Should I return an error if I can't get the attributes? (For now, I'll
 5473  * just return ok.
 5474  */
 5475 APPLESTATIC int
 5476 nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
 5477     struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
 5478 {
 5479         struct nfsstate *stp;
 5480         struct nfslockfile *lfp;
 5481         struct nfsclient *clp;
 5482         struct nfsvattr nva;
 5483         fhandle_t nfh;
 5484         int error = 0;
 5485         nfsattrbit_t cbbits;
 5486         u_quad_t delegfilerev;
 5487 
 5488         NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
 5489         if (!NFSNONZERO_ATTRBIT(&cbbits))
 5490                 goto out;
 5491         if (nfsrv_writedelegcnt == 0)
 5492                 goto out;
 5493 
 5494         /*
 5495          * Get the lock file structure.
 5496          * (A return of -1 means no associated state, so return ok.)
 5497          */
 5498         error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
 5499         NFSLOCKSTATE();
 5500         if (!error)
 5501                 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
 5502         if (error) {
 5503                 NFSUNLOCKSTATE();
 5504                 if (error == -1)
 5505                         error = 0;
 5506                 goto out;
 5507         }
 5508 
 5509         /*
 5510          * Now, look for a write delegation.
 5511          */
 5512         LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 5513                 if (stp->ls_flags & NFSLCK_DELEGWRITE)
 5514                         break;
 5515         }
 5516         if (stp == LIST_END(&lfp->lf_deleg)) {
 5517                 NFSUNLOCKSTATE();
 5518                 goto out;
 5519         }
 5520         clp = stp->ls_clp;
 5521         delegfilerev = stp->ls_filerev;
 5522 
 5523         /*
 5524          * If the Write delegation was issued as a part of this Compound RPC
 5525          * or if we have an Implied Clientid (used in a previous Op in this
 5526          * compound) and it is the client the delegation was issued to,
 5527          * just return ok.
 5528          * I also assume that it is from the same client iff the network
 5529          * host IP address is the same as the callback address. (Not
 5530          * exactly correct by the RFC, but avoids a lot of Getattr
 5531          * callbacks.)
 5532          */
 5533         if (nd->nd_compref == stp->ls_compref ||
 5534             ((nd->nd_flag & ND_IMPLIEDCLID) &&
 5535              clp->lc_clientid.qval == nd->nd_clientid.qval) ||
 5536              nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
 5537                 NFSUNLOCKSTATE();
 5538                 goto out;
 5539         }
 5540 
 5541         /*
 5542          * We are now done with the delegation state structure,
 5543          * so the statelock can be released and we can now tsleep().
 5544          */
 5545 
 5546         /*
 5547          * Now, we must do the CB Getattr callback, to see if Change or Size
 5548          * has changed.
 5549          */
 5550         if (clp->lc_expiry >= NFSD_MONOSEC) {
 5551                 NFSUNLOCKSTATE();
 5552                 NFSVNO_ATTRINIT(&nva);
 5553                 nva.na_filerev = NFS64BITSSET;
 5554                 error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
 5555                     0, &nfh, &nva, &cbbits, 0, p);
 5556                 if (!error) {
 5557                         if ((nva.na_filerev != NFS64BITSSET &&
 5558                             nva.na_filerev > delegfilerev) ||
 5559                             (NFSVNO_ISSETSIZE(&nva) &&
 5560                              nva.na_size != nvap->na_size)) {
 5561                                 error = nfsvno_updfilerev(vp, nvap, nd, p);
 5562                                 if (NFSVNO_ISSETSIZE(&nva))
 5563                                         nvap->na_size = nva.na_size;
 5564                         }
 5565                 } else
 5566                         error = 0;      /* Ignore callback errors for now. */
 5567         } else {
 5568                 NFSUNLOCKSTATE();
 5569         }
 5570 
 5571 out:
 5572         NFSEXITCODE2(error, nd);
 5573         return (error);
 5574 }
 5575 
 5576 /*
 5577  * This function looks for openowners that haven't had any opens for
 5578  * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
 5579  * is set.
 5580  */
 5581 APPLESTATIC void
 5582 nfsrv_throwawayopens(NFSPROC_T *p)
 5583 {
 5584         struct nfsclient *clp, *nclp;
 5585         struct nfsstate *stp, *nstp;
 5586         int i;
 5587 
 5588         NFSLOCKSTATE();
 5589         nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS;
 5590         /*
 5591          * For each client...
 5592          */
 5593         for (i = 0; i < nfsrv_clienthashsize; i++) {
 5594             LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
 5595                 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
 5596                         if (LIST_EMPTY(&stp->ls_open) &&
 5597                             (stp->ls_noopens > NFSNOOPEN ||
 5598                              (nfsrv_openpluslock * 2) >
 5599                              nfsrv_v4statelimit))
 5600                                 nfsrv_freeopenowner(stp, 0, p);
 5601                 }
 5602             }
 5603         }
 5604         NFSUNLOCKSTATE();
 5605 }
 5606 
 5607 /*
 5608  * This function checks to see if the credentials are the same.
 5609  * Returns 1 for not same, 0 otherwise.
 5610  */
 5611 static int
 5612 nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp)
 5613 {
 5614 
 5615         if (nd->nd_flag & ND_GSS) {
 5616                 if (!(clp->lc_flags & LCL_GSS))
 5617                         return (1);
 5618                 if (clp->lc_flags & LCL_NAME) {
 5619                         if (nd->nd_princlen != clp->lc_namelen ||
 5620                             NFSBCMP(nd->nd_principal, clp->lc_name,
 5621                                 clp->lc_namelen))
 5622                                 return (1);
 5623                         else
 5624                                 return (0);
 5625                 }
 5626                 if (nd->nd_cred->cr_uid == clp->lc_uid)
 5627                         return (0);
 5628                 else
 5629                         return (1);
 5630         } else if (clp->lc_flags & LCL_GSS)
 5631                 return (1);
 5632         /*
 5633          * For AUTH_SYS, allow the same uid or root. (This is underspecified
 5634          * in RFC3530, which talks about principals, but doesn't say anything
 5635          * about uids for AUTH_SYS.)
 5636          */
 5637         if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
 5638                 return (0);
 5639         else
 5640                 return (1);
 5641 }
 5642 
 5643 /*
 5644  * Calculate the lease expiry time.
 5645  */
 5646 static time_t
 5647 nfsrv_leaseexpiry(void)
 5648 {
 5649 
 5650         if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC)
 5651                 return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
 5652         return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
 5653 }
 5654 
 5655 /*
 5656  * Delay the delegation timeout as far as ls_delegtimelimit, as required.
 5657  */
 5658 static void
 5659 nfsrv_delaydelegtimeout(struct nfsstate *stp)
 5660 {
 5661 
 5662         if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
 5663                 return;
 5664 
 5665         if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
 5666             stp->ls_delegtime < stp->ls_delegtimelimit) {
 5667                 stp->ls_delegtime += nfsrv_lease;
 5668                 if (stp->ls_delegtime > stp->ls_delegtimelimit)
 5669                         stp->ls_delegtime = stp->ls_delegtimelimit;
 5670         }
 5671 }
 5672 
 5673 /*
 5674  * This function checks to see if there is any other state associated
 5675  * with the openowner for this Open.
 5676  * It returns 1 if there is no other state, 0 otherwise.
 5677  */
 5678 static int
 5679 nfsrv_nootherstate(struct nfsstate *stp)
 5680 {
 5681         struct nfsstate *tstp;
 5682 
 5683         LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
 5684                 if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
 5685                         return (0);
 5686         }
 5687         return (1);
 5688 }
 5689 
 5690 /*
 5691  * Create a list of lock deltas (changes to local byte range locking
 5692  * that can be rolled back using the list) and apply the changes via
 5693  * nfsvno_advlock(). Optionally, lock the list. It is expected that either
 5694  * the rollback or update function will be called after this.
 5695  * It returns an error (and rolls back, as required), if any nfsvno_advlock()
 5696  * call fails. If it returns an error, it will unlock the list.
 5697  */
 5698 static int
 5699 nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
 5700     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
 5701 {
 5702         struct nfslock *lop, *nlop;
 5703         int error = 0;
 5704 
 5705         /* Loop through the list of locks. */
 5706         lop = LIST_FIRST(&lfp->lf_locallock);
 5707         while (first < end && lop != NULL) {
 5708                 nlop = LIST_NEXT(lop, lo_lckowner);
 5709                 if (first >= lop->lo_end) {
 5710                         /* not there yet */
 5711                         lop = nlop;
 5712                 } else if (first < lop->lo_first) {
 5713                         /* new one starts before entry in list */
 5714                         if (end <= lop->lo_first) {
 5715                                 /* no overlap between old and new */
 5716                                 error = nfsrv_dolocal(vp, lfp, flags,
 5717                                     NFSLCK_UNLOCK, first, end, cfp, p);
 5718                                 if (error != 0)
 5719                                         break;
 5720                                 first = end;
 5721                         } else {
 5722                                 /* handle fragment overlapped with new one */
 5723                                 error = nfsrv_dolocal(vp, lfp, flags,
 5724                                     NFSLCK_UNLOCK, first, lop->lo_first, cfp,
 5725                                     p);
 5726                                 if (error != 0)
 5727                                         break;
 5728                                 first = lop->lo_first;
 5729                         }
 5730                 } else {
 5731                         /* new one overlaps this entry in list */
 5732                         if (end <= lop->lo_end) {
 5733                                 /* overlaps all of new one */
 5734                                 error = nfsrv_dolocal(vp, lfp, flags,
 5735                                     lop->lo_flags, first, end, cfp, p);
 5736                                 if (error != 0)
 5737                                         break;
 5738                                 first = end;
 5739                         } else {
 5740                                 /* handle fragment overlapped with new one */
 5741                                 error = nfsrv_dolocal(vp, lfp, flags,
 5742                                     lop->lo_flags, first, lop->lo_end, cfp, p);
 5743                                 if (error != 0)
 5744                                         break;
 5745                                 first = lop->lo_end;
 5746                                 lop = nlop;
 5747                         }
 5748                 }
 5749         }
 5750         if (first < end && error == 0)
 5751                 /* handle fragment past end of list */
 5752                 error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
 5753                     end, cfp, p);
 5754 
 5755         NFSEXITCODE(error);
 5756         return (error);
 5757 }
 5758 
 5759 /*
 5760  * Local lock unlock. Unlock all byte ranges that are no longer locked
 5761  * by NFSv4. To do this, unlock any subranges of first-->end that
 5762  * do not overlap with the byte ranges of any lock in the lfp->lf_lock
 5763  * list. This list has all locks for the file held by other
 5764  * <clientid, lockowner> tuples. The list is ordered by increasing
 5765  * lo_first value, but may have entries that overlap each other, for
 5766  * the case of read locks.
 5767  */
 5768 static void
 5769 nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
 5770     uint64_t init_end, NFSPROC_T *p)
 5771 {
 5772         struct nfslock *lop;
 5773         uint64_t first, end, prevfirst __unused;
 5774 
 5775         first = init_first;
 5776         end = init_end;
 5777         while (first < init_end) {
 5778                 /* Loop through all nfs locks, adjusting first and end */
 5779                 prevfirst = 0;
 5780                 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
 5781                         KASSERT(prevfirst <= lop->lo_first,
 5782                             ("nfsv4 locks out of order"));
 5783                         KASSERT(lop->lo_first < lop->lo_end,
 5784                             ("nfsv4 bogus lock"));
 5785                         prevfirst = lop->lo_first;
 5786                         if (first >= lop->lo_first &&
 5787                             first < lop->lo_end)
 5788                                 /*
 5789                                  * Overlaps with initial part, so trim
 5790                                  * off that initial part by moving first past
 5791                                  * it.
 5792                                  */
 5793                                 first = lop->lo_end;
 5794                         else if (end > lop->lo_first &&
 5795                             lop->lo_first > first) {
 5796                                 /*
 5797                                  * This lock defines the end of the
 5798                                  * segment to unlock, so set end to the
 5799                                  * start of it and break out of the loop.
 5800                                  */
 5801                                 end = lop->lo_first;
 5802                                 break;
 5803                         }
 5804                         if (first >= end)
 5805                                 /*
 5806                                  * There is no segment left to do, so
 5807                                  * break out of this loop and then exit
 5808                                  * the outer while() since first will be set
 5809                                  * to end, which must equal init_end here.
 5810                                  */
 5811                                 break;
 5812                 }
 5813                 if (first < end) {
 5814                         /* Unlock this segment */
 5815                         (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
 5816                             NFSLCK_READ, first, end, NULL, p);
 5817                         nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
 5818                             first, end);
 5819                 }
 5820                 /*
 5821                  * Now move past this segment and look for any further
 5822                  * segment in the range, if there is one.
 5823                  */
 5824                 first = end;
 5825                 end = init_end;
 5826         }
 5827 }
 5828 
 5829 /*
 5830  * Do the local lock operation and update the rollback list, as required.
 5831  * Perform the rollback and return the error if nfsvno_advlock() fails.
 5832  */
 5833 static int
 5834 nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
 5835     uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
 5836 {
 5837         struct nfsrollback *rlp;
 5838         int error = 0, ltype, oldltype;
 5839 
 5840         if (flags & NFSLCK_WRITE)
 5841                 ltype = F_WRLCK;
 5842         else if (flags & NFSLCK_READ)
 5843                 ltype = F_RDLCK;
 5844         else
 5845                 ltype = F_UNLCK;
 5846         if (oldflags & NFSLCK_WRITE)
 5847                 oldltype = F_WRLCK;
 5848         else if (oldflags & NFSLCK_READ)
 5849                 oldltype = F_RDLCK;
 5850         else
 5851                 oldltype = F_UNLCK;
 5852         if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
 5853                 /* nothing to do */
 5854                 goto out;
 5855         error = nfsvno_advlock(vp, ltype, first, end, p);
 5856         if (error != 0) {
 5857                 if (cfp != NULL) {
 5858                         cfp->cl_clientid.lval[0] = 0;
 5859                         cfp->cl_clientid.lval[1] = 0;
 5860                         cfp->cl_first = 0;
 5861                         cfp->cl_end = NFS64BITSSET;
 5862                         cfp->cl_flags = NFSLCK_WRITE;
 5863                         cfp->cl_ownerlen = 5;
 5864                         NFSBCOPY("LOCAL", cfp->cl_owner, 5);
 5865                 }
 5866                 nfsrv_locallock_rollback(vp, lfp, p);
 5867         } else if (ltype != F_UNLCK) {
 5868                 rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
 5869                     M_WAITOK);
 5870                 rlp->rlck_first = first;
 5871                 rlp->rlck_end = end;
 5872                 rlp->rlck_type = oldltype;
 5873                 LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
 5874         }
 5875 
 5876 out:
 5877         NFSEXITCODE(error);
 5878         return (error);
 5879 }
 5880 
 5881 /*
 5882  * Roll back local lock changes and free up the rollback list.
 5883  */
 5884 static void
 5885 nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
 5886 {
 5887         struct nfsrollback *rlp, *nrlp;
 5888 
 5889         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
 5890                 (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
 5891                     rlp->rlck_end, p);
 5892                 free(rlp, M_NFSDROLLBACK);
 5893         }
 5894         LIST_INIT(&lfp->lf_rollback);
 5895 }
 5896 
 5897 /*
 5898  * Update local lock list and delete rollback list (ie now committed to the
 5899  * local locks). Most of the work is done by the internal function.
 5900  */
 5901 static void
 5902 nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
 5903     uint64_t end)
 5904 {
 5905         struct nfsrollback *rlp, *nrlp;
 5906         struct nfslock *new_lop, *other_lop;
 5907 
 5908         new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
 5909         if (flags & (NFSLCK_READ | NFSLCK_WRITE))
 5910                 other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
 5911                     M_WAITOK);
 5912         else
 5913                 other_lop = NULL;
 5914         new_lop->lo_flags = flags;
 5915         new_lop->lo_first = first;
 5916         new_lop->lo_end = end;
 5917         nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
 5918         if (new_lop != NULL)
 5919                 free(new_lop, M_NFSDLOCK);
 5920         if (other_lop != NULL)
 5921                 free(other_lop, M_NFSDLOCK);
 5922 
 5923         /* and get rid of the rollback list */
 5924         LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
 5925                 free(rlp, M_NFSDROLLBACK);
 5926         LIST_INIT(&lfp->lf_rollback);
 5927 }
 5928 
 5929 /*
 5930  * Lock the struct nfslockfile for local lock updating.
 5931  */
 5932 static void
 5933 nfsrv_locklf(struct nfslockfile *lfp)
 5934 {
 5935         int gotlock;
 5936 
 5937         /* lf_usecount ensures *lfp won't be free'd */
 5938         lfp->lf_usecount++;
 5939         do {
 5940                 gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
 5941                     NFSSTATEMUTEXPTR, NULL);
 5942         } while (gotlock == 0);
 5943         lfp->lf_usecount--;
 5944 }
 5945 
 5946 /*
 5947  * Unlock the struct nfslockfile after local lock updating.
 5948  */
 5949 static void
 5950 nfsrv_unlocklf(struct nfslockfile *lfp)
 5951 {
 5952 
 5953         nfsv4_unlock(&lfp->lf_locallock_lck, 0);
 5954 }
 5955 
 5956 /*
 5957  * Clear out all state for the NFSv4 server.
 5958  * Must be called by a thread that can sleep when no nfsds are running.
 5959  */
 5960 void
 5961 nfsrv_throwawayallstate(NFSPROC_T *p)
 5962 {
 5963         struct nfsclient *clp, *nclp;
 5964         struct nfslockfile *lfp, *nlfp;
 5965         int i;
 5966 
 5967         /*
 5968          * For each client, clean out the state and then free the structure.
 5969          */
 5970         for (i = 0; i < nfsrv_clienthashsize; i++) {
 5971                 LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) {
 5972                         nfsrv_cleanclient(clp, p);
 5973                         nfsrv_freedeleglist(&clp->lc_deleg);
 5974                         nfsrv_freedeleglist(&clp->lc_olddeleg);
 5975                         free(clp->lc_stateid, M_NFSDCLIENT);
 5976                         free(clp, M_NFSDCLIENT);
 5977                 }
 5978         }
 5979 
 5980         /*
 5981          * Also, free up any remaining lock file structures.
 5982          */
 5983         for (i = 0; i < nfsrv_lockhashsize; i++) {
 5984                 LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) {
 5985                         printf("nfsd unload: fnd a lock file struct\n");
 5986                         nfsrv_freenfslockfile(lfp);
 5987                 }
 5988         }
 5989 
 5990         /* And get rid of the deviceid structures and layouts. */
 5991         nfsrv_freealllayoutsanddevids();
 5992 }
 5993 
 5994 /*
 5995  * Check the sequence# for the session and slot provided as an argument.
 5996  * Also, renew the lease if the session will return NFS_OK.
 5997  */
 5998 int
 5999 nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
 6000     uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
 6001     uint32_t *sflagsp, NFSPROC_T *p)
 6002 {
 6003         struct nfsdsession *sep;
 6004         struct nfssessionhash *shp;
 6005         int error;
 6006         SVCXPRT *savxprt;
 6007 
 6008         shp = NFSSESSIONHASH(nd->nd_sessionid);
 6009         NFSLOCKSESSION(shp);
 6010         sep = nfsrv_findsession(nd->nd_sessionid);
 6011         if (sep == NULL) {
 6012                 NFSUNLOCKSESSION(shp);
 6013                 return (NFSERR_BADSESSION);
 6014         }
 6015         error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
 6016             sep->sess_slots, NULL, NFSV4_SLOTS - 1);
 6017         if (error != 0) {
 6018                 NFSUNLOCKSESSION(shp);
 6019                 return (error);
 6020         }
 6021         if (cache_this != 0)
 6022                 nd->nd_flag |= ND_SAVEREPLY;
 6023         /* Renew the lease. */
 6024         sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
 6025         nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
 6026         nd->nd_flag |= ND_IMPLIEDCLID;
 6027 
 6028         /*
 6029          * If this session handles the backchannel, save the nd_xprt for this
 6030          * RPC, since this is the one being used.
 6031          * RFC-5661 specifies that the fore channel will be implicitly
 6032          * bound by a Sequence operation.  However, since some NFSv4.1 clients
 6033          * erroneously assumed that the back channel would be implicitly
 6034          * bound as well, do the implicit binding unless a
 6035          * BindConnectiontoSession has already been done on the session.
 6036          */
 6037         if (sep->sess_clp->lc_req.nr_client != NULL &&
 6038             sep->sess_cbsess.nfsess_xprt != nd->nd_xprt &&
 6039             (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0 &&
 6040             (sep->sess_clp->lc_flags & LCL_DONEBINDCONN) == 0) {
 6041                 NFSD_DEBUG(2,
 6042                     "nfsrv_checksequence: implicit back channel bind\n");
 6043                 savxprt = sep->sess_cbsess.nfsess_xprt;
 6044                 SVC_ACQUIRE(nd->nd_xprt);
 6045                 nd->nd_xprt->xp_p2 =
 6046                     sep->sess_clp->lc_req.nr_client->cl_private;
 6047                 nd->nd_xprt->xp_idletimeout = 0;        /* Disable timeout. */
 6048                 sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
 6049                 if (savxprt != NULL)
 6050                         SVC_RELEASE(savxprt);
 6051         }
 6052 
 6053         *sflagsp = 0;
 6054         if (sep->sess_clp->lc_req.nr_client == NULL)
 6055                 *sflagsp |= NFSV4SEQ_CBPATHDOWN;
 6056         NFSUNLOCKSESSION(shp);
 6057         if (error == NFSERR_EXPIRED) {
 6058                 *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
 6059                 error = 0;
 6060         } else if (error == NFSERR_ADMINREVOKED) {
 6061                 *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
 6062                 error = 0;
 6063         }
 6064         *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
 6065         return (0);
 6066 }
 6067 
 6068 /*
 6069  * Check/set reclaim complete for this session/clientid.
 6070  */
 6071 int
 6072 nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs)
 6073 {
 6074         struct nfsdsession *sep;
 6075         struct nfssessionhash *shp;
 6076         int error = 0;
 6077 
 6078         shp = NFSSESSIONHASH(nd->nd_sessionid);
 6079         NFSLOCKSTATE();
 6080         NFSLOCKSESSION(shp);
 6081         sep = nfsrv_findsession(nd->nd_sessionid);
 6082         if (sep == NULL) {
 6083                 NFSUNLOCKSESSION(shp);
 6084                 NFSUNLOCKSTATE();
 6085                 return (NFSERR_BADSESSION);
 6086         }
 6087 
 6088         if (onefs != 0)
 6089                 sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS;
 6090                 /* Check to see if reclaim complete has already happened. */
 6091         else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
 6092                 error = NFSERR_COMPLETEALREADY;
 6093         else {
 6094                 sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
 6095                 nfsrv_markreclaim(sep->sess_clp);
 6096         }
 6097         NFSUNLOCKSESSION(shp);
 6098         NFSUNLOCKSTATE();
 6099         return (error);
 6100 }
 6101 
 6102 /*
 6103  * Cache the reply in a session slot.
 6104  */
 6105 void
 6106 nfsrv_cache_session(uint8_t *sessionid, uint32_t slotid, int repstat,
 6107    struct mbuf **m)
 6108 {
 6109         struct nfsdsession *sep;
 6110         struct nfssessionhash *shp;
 6111 
 6112         shp = NFSSESSIONHASH(sessionid);
 6113         NFSLOCKSESSION(shp);
 6114         sep = nfsrv_findsession(sessionid);
 6115         if (sep == NULL) {
 6116                 NFSUNLOCKSESSION(shp);
 6117                 printf("nfsrv_cache_session: no session\n");
 6118                 m_freem(*m);
 6119                 return;
 6120         }
 6121         nfsv4_seqsess_cacherep(slotid, sep->sess_slots, repstat, m);
 6122         NFSUNLOCKSESSION(shp);
 6123 }
 6124 
 6125 /*
 6126  * Search for a session that matches the sessionid.
 6127  */
 6128 static struct nfsdsession *
 6129 nfsrv_findsession(uint8_t *sessionid)
 6130 {
 6131         struct nfsdsession *sep;
 6132         struct nfssessionhash *shp;
 6133 
 6134         shp = NFSSESSIONHASH(sessionid);
 6135         LIST_FOREACH(sep, &shp->list, sess_hash) {
 6136                 if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
 6137                         break;
 6138         }
 6139         return (sep);
 6140 }
 6141 
 6142 /*
 6143  * Destroy a session.
 6144  */
 6145 int
 6146 nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
 6147 {
 6148         int error, igotlock, samesess;
 6149 
 6150         samesess = 0;
 6151         if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) &&
 6152             (nd->nd_flag & ND_HASSEQUENCE) != 0) {
 6153                 samesess = 1;
 6154                 if ((nd->nd_flag & ND_LASTOP) == 0)
 6155                         return (NFSERR_BADSESSION);
 6156         }
 6157 
 6158         /* Lock out other nfsd threads */
 6159         NFSLOCKV4ROOTMUTEX();
 6160         nfsv4_relref(&nfsv4rootfs_lock);
 6161         do {
 6162                 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
 6163                     NFSV4ROOTLOCKMUTEXPTR, NULL);
 6164         } while (igotlock == 0);
 6165         NFSUNLOCKV4ROOTMUTEX();
 6166 
 6167         error = nfsrv_freesession(NULL, sessionid);
 6168         if (error == 0 && samesess != 0)
 6169                 nd->nd_flag &= ~ND_HASSEQUENCE;
 6170 
 6171         NFSLOCKV4ROOTMUTEX();
 6172         nfsv4_unlock(&nfsv4rootfs_lock, 1);
 6173         NFSUNLOCKV4ROOTMUTEX();
 6174         return (error);
 6175 }
 6176 
 6177 /*
 6178  * Bind a connection to a session.
 6179  * For now, only certain variants are supported, since the current session
 6180  * structure can only handle a single backchannel entry, which will be
 6181  * applied to all connections if it is set.
 6182  */
 6183 int
 6184 nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp)
 6185 {
 6186         struct nfssessionhash *shp;
 6187         struct nfsdsession *sep;
 6188         struct nfsclient *clp;
 6189         SVCXPRT *savxprt;
 6190         int error;
 6191 
 6192         error = 0;
 6193         shp = NFSSESSIONHASH(sessionid);
 6194         NFSLOCKSTATE();
 6195         NFSLOCKSESSION(shp);
 6196         sep = nfsrv_findsession(sessionid);
 6197         if (sep != NULL) {
 6198                 clp = sep->sess_clp;
 6199                 if (*foreaftp == NFSCDFC4_BACK ||
 6200                     *foreaftp == NFSCDFC4_BACK_OR_BOTH ||
 6201                     *foreaftp == NFSCDFC4_FORE_OR_BOTH) {
 6202                         /* Try to set up a backchannel. */
 6203                         if (clp->lc_req.nr_client == NULL) {
 6204                                 NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire "
 6205                                     "backchannel\n");
 6206                                 clp->lc_req.nr_client = (struct __rpc_client *)
 6207                                     clnt_bck_create(nd->nd_xprt->xp_socket,
 6208                                     sep->sess_cbprogram, NFSV4_CBVERS);
 6209                         }
 6210                         if (clp->lc_req.nr_client != NULL) {
 6211                                 NFSD_DEBUG(2, "nfsrv_bindconnsess: set up "
 6212                                     "backchannel\n");
 6213                                 savxprt = sep->sess_cbsess.nfsess_xprt;
 6214                                 SVC_ACQUIRE(nd->nd_xprt);
 6215                                 nd->nd_xprt->xp_p2 =
 6216                                     clp->lc_req.nr_client->cl_private;
 6217                                 /* Disable idle timeout. */
 6218                                 nd->nd_xprt->xp_idletimeout = 0;
 6219                                 sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
 6220                                 if (savxprt != NULL)
 6221                                         SVC_RELEASE(savxprt);
 6222                                 sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN;
 6223                                 clp->lc_flags |= LCL_DONEBINDCONN;
 6224                                 if (*foreaftp == NFSCDFS4_BACK)
 6225                                         *foreaftp = NFSCDFS4_BACK;
 6226                                 else
 6227                                         *foreaftp = NFSCDFS4_BOTH;
 6228                         } else if (*foreaftp != NFSCDFC4_BACK) {
 6229                                 NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set "
 6230                                     "up backchannel\n");
 6231                                 sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
 6232                                 clp->lc_flags |= LCL_DONEBINDCONN;
 6233                                 *foreaftp = NFSCDFS4_FORE;
 6234                         } else {
 6235                                 error = NFSERR_NOTSUPP;
 6236                                 printf("nfsrv_bindconnsess: Can't add "
 6237                                     "backchannel\n");
 6238                         }
 6239                 } else {
 6240                         NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n");
 6241                         clp->lc_flags |= LCL_DONEBINDCONN;
 6242                         *foreaftp = NFSCDFS4_FORE;
 6243                 }
 6244         } else
 6245                 error = NFSERR_BADSESSION;
 6246         NFSUNLOCKSESSION(shp);
 6247         NFSUNLOCKSTATE();
 6248         return (error);
 6249 }
 6250 
 6251 /*
 6252  * Free up a session structure.
 6253  */
 6254 static int
 6255 nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid)
 6256 {
 6257         struct nfssessionhash *shp;
 6258         int i;
 6259 
 6260         NFSLOCKSTATE();
 6261         if (sep == NULL) {
 6262                 shp = NFSSESSIONHASH(sessionid);
 6263                 NFSLOCKSESSION(shp);
 6264                 sep = nfsrv_findsession(sessionid);
 6265         } else {
 6266                 shp = NFSSESSIONHASH(sep->sess_sessionid);
 6267                 NFSLOCKSESSION(shp);
 6268         }
 6269         if (sep != NULL) {
 6270                 sep->sess_refcnt--;
 6271                 if (sep->sess_refcnt > 0) {
 6272                         NFSUNLOCKSESSION(shp);
 6273                         NFSUNLOCKSTATE();
 6274                         return (NFSERR_BACKCHANBUSY);
 6275                 }
 6276                 LIST_REMOVE(sep, sess_hash);
 6277                 LIST_REMOVE(sep, sess_list);
 6278         }
 6279         NFSUNLOCKSESSION(shp);
 6280         NFSUNLOCKSTATE();
 6281         if (sep == NULL)
 6282                 return (NFSERR_BADSESSION);
 6283         for (i = 0; i < NFSV4_SLOTS; i++)
 6284                 if (sep->sess_slots[i].nfssl_reply != NULL)
 6285                         m_freem(sep->sess_slots[i].nfssl_reply);
 6286         if (sep->sess_cbsess.nfsess_xprt != NULL)
 6287                 SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
 6288         free(sep, M_NFSDSESSION);
 6289         return (0);
 6290 }
 6291 
 6292 /*
 6293  * Free a stateid.
 6294  * RFC5661 says that it should fail when there are associated opens, locks
 6295  * or delegations. Since stateids represent opens, I don't see how you can
 6296  * free an open stateid (it will be free'd when closed), so this function
 6297  * only works for lock stateids (freeing the lock_owner) or delegations.
 6298  */
 6299 int
 6300 nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
 6301     NFSPROC_T *p)
 6302 {
 6303         struct nfsclient *clp;
 6304         struct nfsstate *stp;
 6305         int error;
 6306 
 6307         NFSLOCKSTATE();
 6308         /*
 6309          * Look up the stateid
 6310          */
 6311         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 6312             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 6313         if (error == 0) {
 6314                 /* First, check for a delegation. */
 6315                 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
 6316                         if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
 6317                             NFSX_STATEIDOTHER))
 6318                                 break;
 6319                 }
 6320                 if (stp != NULL) {
 6321                         nfsrv_freedeleg(stp);
 6322                         NFSUNLOCKSTATE();
 6323                         return (error);
 6324                 }
 6325         }
 6326         /* Not a delegation, try for a lock_owner. */
 6327         if (error == 0)
 6328                 error = nfsrv_getstate(clp, stateidp, 0, &stp);
 6329         if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
 6330             NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
 6331                 /* Not a lock_owner stateid. */
 6332                 error = NFSERR_LOCKSHELD;
 6333         if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
 6334                 error = NFSERR_LOCKSHELD;
 6335         if (error == 0)
 6336                 nfsrv_freelockowner(stp, NULL, 0, p);
 6337         NFSUNLOCKSTATE();
 6338         return (error);
 6339 }
 6340 
 6341 /*
 6342  * Test a stateid.
 6343  */
 6344 int
 6345 nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
 6346     NFSPROC_T *p)
 6347 {
 6348         struct nfsclient *clp;
 6349         struct nfsstate *stp;
 6350         int error;
 6351 
 6352         NFSLOCKSTATE();
 6353         /*
 6354          * Look up the stateid
 6355          */
 6356         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 6357             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 6358         if (error == 0)
 6359                 error = nfsrv_getstate(clp, stateidp, 0, &stp);
 6360         if (error == 0 && stateidp->seqid != 0 &&
 6361             SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid))
 6362                 error = NFSERR_OLDSTATEID;
 6363         NFSUNLOCKSTATE();
 6364         return (error);
 6365 }
 6366 
 6367 /*
 6368  * Generate the xdr for an NFSv4.1 CBSequence Operation.
 6369  */
 6370 static int
 6371 nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
 6372     int dont_replycache, struct nfsdsession **sepp)
 6373 {
 6374         struct nfsdsession *sep;
 6375         uint32_t *tl, slotseq = 0;
 6376         int maxslot, slotpos;
 6377         uint8_t sessionid[NFSX_V4SESSIONID];
 6378         int error;
 6379 
 6380         error = nfsv4_getcbsession(clp, sepp);
 6381         if (error != 0)
 6382                 return (error);
 6383         sep = *sepp;
 6384         (void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot,
 6385             &slotseq, sessionid);
 6386         KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
 6387 
 6388         /* Build the Sequence arguments. */
 6389         NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
 6390         bcopy(sessionid, tl, NFSX_V4SESSIONID);
 6391         tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
 6392         nd->nd_slotseq = tl;
 6393         *tl++ = txdr_unsigned(slotseq);
 6394         *tl++ = txdr_unsigned(slotpos);
 6395         *tl++ = txdr_unsigned(maxslot);
 6396         if (dont_replycache == 0)
 6397                 *tl++ = newnfs_true;
 6398         else
 6399                 *tl++ = newnfs_false;
 6400         *tl = 0;                        /* No referring call list, for now. */
 6401         nd->nd_flag |= ND_HASSEQUENCE;
 6402         return (0);
 6403 }
 6404 
 6405 /*
 6406  * Get a session for the callback.
 6407  */
 6408 static int
 6409 nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
 6410 {
 6411         struct nfsdsession *sep;
 6412 
 6413         NFSLOCKSTATE();
 6414         LIST_FOREACH(sep, &clp->lc_session, sess_list) {
 6415                 if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
 6416                         break;
 6417         }
 6418         if (sep == NULL) {
 6419                 NFSUNLOCKSTATE();
 6420                 return (NFSERR_BADSESSION);
 6421         }
 6422         sep->sess_refcnt++;
 6423         *sepp = sep;
 6424         NFSUNLOCKSTATE();
 6425         return (0);
 6426 }
 6427 
 6428 /*
 6429  * Free up all backchannel xprts.  This needs to be done when the nfsd threads
 6430  * exit, since those transports will all be going away.
 6431  * This is only called after all the nfsd threads are done performing RPCs,
 6432  * so locking shouldn't be an issue.
 6433  */
 6434 APPLESTATIC void
 6435 nfsrv_freeallbackchannel_xprts(void)
 6436 {
 6437         struct nfsdsession *sep;
 6438         struct nfsclient *clp;
 6439         SVCXPRT *xprt;
 6440         int i;
 6441 
 6442         for (i = 0; i < nfsrv_clienthashsize; i++) {
 6443                 LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) {
 6444                         LIST_FOREACH(sep, &clp->lc_session, sess_list) {
 6445                                 xprt = sep->sess_cbsess.nfsess_xprt;
 6446                                 sep->sess_cbsess.nfsess_xprt = NULL;
 6447                                 if (xprt != NULL)
 6448                                         SVC_RELEASE(xprt);
 6449                         }
 6450                 }
 6451         }
 6452 }
 6453 
 6454 /*
 6455  * Do a layout commit.  Actually just call nfsrv_updatemdsattr().
 6456  * I have no idea if the rest of these arguments will ever be useful?
 6457  */
 6458 int
 6459 nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype,
 6460     int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len,
 6461     int hasnewmtime, struct timespec *newmtimep, int reclaim,
 6462     nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep,
 6463     uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p)
 6464 {
 6465         struct nfsvattr na;
 6466         int error;
 6467 
 6468         error = nfsrv_updatemdsattr(vp, &na, p);
 6469         if (error == 0) {
 6470                 *hasnewsizep = 1;
 6471                 *newsizep = na.na_size;
 6472         }
 6473         return (error);
 6474 }
 6475 
 6476 /*
 6477  * Try and get a layout.
 6478  */
 6479 int
 6480 nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp,
 6481     int layouttype, int *iomode, uint64_t *offset, uint64_t *len,
 6482     uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose,
 6483     int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p)
 6484 {
 6485         struct nfslayouthash *lhyp;
 6486         struct nfslayout *lyp;
 6487         char *devid;
 6488         fhandle_t fh, *dsfhp;
 6489         int error, mirrorcnt;
 6490 
 6491         if (nfsrv_devidcnt == 0)
 6492                 return (NFSERR_UNKNLAYOUTTYPE);
 6493 
 6494         if (*offset != 0)
 6495                 printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset,
 6496                     (uintmax_t)*len);
 6497         error = nfsvno_getfh(vp, &fh, p);
 6498         NFSD_DEBUG(4, "layoutget getfh=%d\n", error);
 6499         if (error != 0)
 6500                 return (error);
 6501 
 6502         /*
 6503          * For now, all layouts are for entire files.
 6504          * Only issue Read/Write layouts if requested for a non-readonly fs.
 6505          */
 6506         if (NFSVNO_EXRDONLY(exp)) {
 6507                 if (*iomode == NFSLAYOUTIOMODE_RW)
 6508                         return (NFSERR_LAYOUTTRYLATER);
 6509                 *iomode = NFSLAYOUTIOMODE_READ;
 6510         }
 6511         if (*iomode != NFSLAYOUTIOMODE_RW)
 6512                 *iomode = NFSLAYOUTIOMODE_READ;
 6513 
 6514         /*
 6515          * Check to see if a write layout can be issued for this file.
 6516          * This is used during mirror recovery to avoid RW layouts being
 6517          * issued for a file while it is being copied to the recovered
 6518          * mirror.
 6519          */
 6520         if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0)
 6521                 return (NFSERR_LAYOUTTRYLATER);
 6522 
 6523         *retonclose = 0;
 6524         *offset = 0;
 6525         *len = UINT64_MAX;
 6526 
 6527         /* First, see if a layout already exists and return if found. */
 6528         lhyp = NFSLAYOUTHASH(&fh);
 6529         NFSLOCKLAYOUT(lhyp);
 6530         error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp);
 6531         NFSD_DEBUG(4, "layoutget findlay=%d\n", error);
 6532         /*
 6533          * Not sure if the seqid must be the same, so I won't check it.
 6534          */
 6535         if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] ||
 6536             stateidp->other[1] != lyp->lay_stateid.other[1] ||
 6537             stateidp->other[2] != lyp->lay_stateid.other[2])) {
 6538                 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
 6539                         NFSUNLOCKLAYOUT(lhyp);
 6540                         NFSD_DEBUG(1, "ret bad stateid\n");
 6541                         return (NFSERR_BADSTATEID);
 6542                 }
 6543                 /*
 6544                  * I believe we get here because there is a race between
 6545                  * the client processing the CBLAYOUTRECALL and the layout
 6546                  * being deleted here on the server.
 6547                  * The client has now done a LayoutGet with a non-layout
 6548                  * stateid, as it would when there is no layout.
 6549                  * As such, free this layout and set error == NFSERR_BADSTATEID
 6550                  * so the code below will create a new layout structure as
 6551                  * would happen if no layout was found.
 6552                  * "lyp" will be set before being used below, but set it NULL
 6553                  * as a safety belt.
 6554                  */
 6555                 nfsrv_freelayout(&lhyp->list, lyp);
 6556                 lyp = NULL;
 6557                 error = NFSERR_BADSTATEID;
 6558         }
 6559         if (error == 0) {
 6560                 if (lyp->lay_layoutlen > maxcnt) {
 6561                         NFSUNLOCKLAYOUT(lhyp);
 6562                         NFSD_DEBUG(1, "ret layout too small\n");
 6563                         return (NFSERR_TOOSMALL);
 6564                 }
 6565                 if (*iomode == NFSLAYOUTIOMODE_RW)
 6566                         lyp->lay_flags |= NFSLAY_RW;
 6567                 else
 6568                         lyp->lay_flags |= NFSLAY_READ;
 6569                 NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
 6570                 *layoutlenp = lyp->lay_layoutlen;
 6571                 if (++lyp->lay_stateid.seqid == 0)
 6572                         lyp->lay_stateid.seqid = 1;
 6573                 stateidp->seqid = lyp->lay_stateid.seqid;
 6574                 NFSUNLOCKLAYOUT(lhyp);
 6575                 NFSD_DEBUG(4, "ret fnd layout\n");
 6576                 return (0);
 6577         }
 6578         NFSUNLOCKLAYOUT(lhyp);
 6579 
 6580         /* Find the device id and file handle. */
 6581         dsfhp = malloc(sizeof(fhandle_t) * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
 6582         devid = malloc(NFSX_V4DEVICEID * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
 6583         error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, dsfhp, devid);
 6584         NFSD_DEBUG(4, "layoutget devandfh=%d\n", error);
 6585         if (error == 0) {
 6586                 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
 6587                         if (NFSX_V4FILELAYOUT > maxcnt)
 6588                                 error = NFSERR_TOOSMALL;
 6589                         else
 6590                                 lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp,
 6591                                     devid, vp->v_mount->mnt_stat.f_fsid);
 6592                 } else {
 6593                         if (NFSX_V4FLEXLAYOUT(mirrorcnt) > maxcnt)
 6594                                 error = NFSERR_TOOSMALL;
 6595                         else
 6596                                 lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt,
 6597                                     &fh, dsfhp, devid,
 6598                                     vp->v_mount->mnt_stat.f_fsid);
 6599                 }
 6600         }
 6601         free(dsfhp, M_TEMP);
 6602         free(devid, M_TEMP);
 6603         if (error != 0)
 6604                 return (error);
 6605 
 6606         /*
 6607          * Now, add this layout to the list.
 6608          */
 6609         error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p);
 6610         NFSD_DEBUG(4, "layoutget addl=%d\n", error);
 6611         /*
 6612          * The lyp will be set to NULL by nfsrv_addlayout() if it
 6613          * linked the new structure into the lists.
 6614          */
 6615         free(lyp, M_NFSDSTATE);
 6616         return (error);
 6617 }
 6618 
 6619 /*
 6620  * Generate a File Layout.
 6621  */
 6622 static struct nfslayout *
 6623 nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
 6624     fhandle_t *dsfhp, char *devid, fsid_t fs)
 6625 {
 6626         uint32_t *tl;
 6627         struct nfslayout *lyp;
 6628         uint64_t pattern_offset;
 6629 
 6630         lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE,
 6631             M_WAITOK | M_ZERO);
 6632         lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES;
 6633         if (iomode == NFSLAYOUTIOMODE_RW)
 6634                 lyp->lay_flags = NFSLAY_RW;
 6635         else
 6636                 lyp->lay_flags = NFSLAY_READ;
 6637         NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
 6638         lyp->lay_clientid.qval = nd->nd_clientid.qval;
 6639         lyp->lay_fsid = fs;
 6640 
 6641         /* Fill in the xdr for the files layout. */
 6642         tl = (uint32_t *)lyp->lay_xdr;
 6643         NFSBCOPY(devid, tl, NFSX_V4DEVICEID);           /* Device ID. */
 6644         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 6645 
 6646         /*
 6647          * Make the stripe size as many 64K blocks as will fit in the stripe
 6648          * mask. Since there is only one stripe, the stripe size doesn't really
 6649          * matter, except that the Linux client will only handle an exact
 6650          * multiple of their PAGE_SIZE (usually 4K).  I chose 64K as a value
 6651          * that should cover most/all arches w.r.t. PAGE_SIZE.
 6652          */
 6653         *tl++ = txdr_unsigned(NFSFLAYUTIL_STRIPE_MASK & ~0xffff);
 6654         *tl++ = 0;                                      /* 1st stripe index. */
 6655         pattern_offset = 0;
 6656         txdr_hyper(pattern_offset, tl); tl += 2;        /* Pattern offset. */
 6657         *tl++ = txdr_unsigned(1);                       /* 1 file handle. */
 6658         *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
 6659         NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
 6660         lyp->lay_layoutlen = NFSX_V4FILELAYOUT;
 6661         return (lyp);
 6662 }
 6663 
 6664 #define FLEX_OWNERID    "999"
 6665 #define FLEX_UID0       ""
 6666 /*
 6667  * Generate a Flex File Layout.
 6668  * The FLEX_OWNERID can be any string of 3 decimal digits. Although this
 6669  * string goes on the wire, it isn't supposed to be used by the client,
 6670  * since this server uses tight coupling.
 6671  * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use
 6672  * a string of "". This works around the Linux Flex File Layout driver bug
 6673  * which uses the synthetic uid/gid strings for the "tightly coupled" case.
 6674  */
 6675 static struct nfslayout *
 6676 nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt,
 6677     fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs)
 6678 {
 6679         uint32_t *tl;
 6680         struct nfslayout *lyp;
 6681         uint64_t lenval;
 6682         int i;
 6683 
 6684         lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FLEXLAYOUT(mirrorcnt),
 6685             M_NFSDSTATE, M_WAITOK | M_ZERO);
 6686         lyp->lay_type = NFSLAYOUT_FLEXFILE;
 6687         if (iomode == NFSLAYOUTIOMODE_RW)
 6688                 lyp->lay_flags = NFSLAY_RW;
 6689         else
 6690                 lyp->lay_flags = NFSLAY_READ;
 6691         NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
 6692         lyp->lay_clientid.qval = nd->nd_clientid.qval;
 6693         lyp->lay_fsid = fs;
 6694         lyp->lay_mirrorcnt = mirrorcnt;
 6695 
 6696         /* Fill in the xdr for the files layout. */
 6697         tl = (uint32_t *)lyp->lay_xdr;
 6698         lenval = 0;
 6699         txdr_hyper(lenval, tl); tl += 2;                /* Stripe unit. */
 6700         *tl++ = txdr_unsigned(mirrorcnt);               /* # of mirrors. */
 6701         for (i = 0; i < mirrorcnt; i++) {
 6702                 *tl++ = txdr_unsigned(1);               /* One stripe. */
 6703                 NFSBCOPY(devid, tl, NFSX_V4DEVICEID);   /* Device ID. */
 6704                 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 6705                 devid += NFSX_V4DEVICEID;
 6706                 *tl++ = txdr_unsigned(1);               /* Efficiency. */
 6707                 *tl++ = 0;                              /* Proxy Stateid. */
 6708                 *tl++ = 0x55555555;
 6709                 *tl++ = 0x55555555;
 6710                 *tl++ = 0x55555555;
 6711                 *tl++ = txdr_unsigned(1);               /* 1 file handle. */
 6712                 *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
 6713                 NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
 6714                 tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED);
 6715                 dsfhp++;
 6716                 if (nfsrv_flexlinuxhack != 0) {
 6717                         *tl++ = txdr_unsigned(strlen(FLEX_UID0));
 6718                         *tl = 0;                /* 0 pad string. */
 6719                         NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
 6720                         *tl++ = txdr_unsigned(strlen(FLEX_UID0));
 6721                         *tl = 0;                /* 0 pad string. */
 6722                         NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
 6723                 } else {
 6724                         *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
 6725                         NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
 6726                         *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
 6727                         NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
 6728                 }
 6729         }
 6730         *tl++ = txdr_unsigned(0);               /* ff_flags. */
 6731         *tl = txdr_unsigned(60);                /* Status interval hint. */
 6732         lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt);
 6733         return (lyp);
 6734 }
 6735 
 6736 /*
 6737  * Parse and process Flex File errors returned via LayoutReturn.
 6738  */
 6739 static void
 6740 nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt,
 6741     NFSPROC_T *p)
 6742 {
 6743         uint32_t *tl;
 6744         int cnt, errcnt, i, j, opnum, stat;
 6745         char devid[NFSX_V4DEVICEID];
 6746 
 6747         tl = layp;
 6748         cnt = fxdr_unsigned(int, *tl++);
 6749         NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt);
 6750         for (i = 0; i < cnt; i++) {
 6751                 /* Skip offset, length and stateid for now. */
 6752                 tl += (4 + NFSX_STATEID / NFSX_UNSIGNED);
 6753                 errcnt = fxdr_unsigned(int, *tl++);
 6754                 NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt);
 6755                 for (j = 0; j < errcnt; j++) {
 6756                         NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
 6757                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 6758                         stat = fxdr_unsigned(int, *tl++);
 6759                         opnum = fxdr_unsigned(int, *tl++);
 6760                         NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum,
 6761                             stat);
 6762                         /*
 6763                          * Except for NFSERR_ACCES and NFSERR_STALE errors,
 6764                          * disable the mirror.
 6765                          */
 6766                         if (stat != NFSERR_ACCES && stat != NFSERR_STALE)
 6767                                 nfsrv_delds(devid, p);
 6768                 }
 6769         }
 6770 }
 6771 
 6772 /*
 6773  * This function removes all flex file layouts which has a mirror with
 6774  * a device id that matches the argument.
 6775  * Called when the DS represented by the device id has failed.
 6776  */
 6777 void
 6778 nfsrv_flexmirrordel(char *devid, NFSPROC_T *p)
 6779 {
 6780         uint32_t *tl;
 6781         struct nfslayout *lyp, *nlyp;
 6782         struct nfslayouthash *lhyp;
 6783         struct nfslayouthead loclyp;
 6784         int i, j;
 6785 
 6786         NFSD_DEBUG(4, "flexmirrordel\n");
 6787         /* Move all layouts found onto a local list. */
 6788         TAILQ_INIT(&loclyp);
 6789         for (i = 0; i < nfsrv_layouthashsize; i++) {
 6790                 lhyp = &nfslayouthash[i];
 6791                 NFSLOCKLAYOUT(lhyp);
 6792                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 6793                         if (lyp->lay_type == NFSLAYOUT_FLEXFILE &&
 6794                             lyp->lay_mirrorcnt > 1) {
 6795                                 NFSD_DEBUG(4, "possible match\n");
 6796                                 tl = lyp->lay_xdr;
 6797                                 tl += 3;
 6798                                 for (j = 0; j < lyp->lay_mirrorcnt; j++) {
 6799                                         tl++;
 6800                                         if (NFSBCMP(devid, tl, NFSX_V4DEVICEID)
 6801                                             == 0) {
 6802                                                 /* Found one. */
 6803                                                 NFSD_DEBUG(4, "fnd one\n");
 6804                                                 TAILQ_REMOVE(&lhyp->list, lyp,
 6805                                                     lay_list);
 6806                                                 TAILQ_INSERT_HEAD(&loclyp, lyp,
 6807                                                     lay_list);
 6808                                                 break;
 6809                                         }
 6810                                         tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED +
 6811                                             NFSM_RNDUP(NFSX_V4PNFSFH) /
 6812                                             NFSX_UNSIGNED + 11 * NFSX_UNSIGNED);
 6813                                 }
 6814                         }
 6815                 }
 6816                 NFSUNLOCKLAYOUT(lhyp);
 6817         }
 6818 
 6819         /* Now, try to do a Layout recall for each one found. */
 6820         TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) {
 6821                 NFSD_DEBUG(4, "do layout recall\n");
 6822                 /*
 6823                  * The layout stateid.seqid needs to be incremented
 6824                  * before doing a LAYOUT_RECALL callback.
 6825                  */
 6826                 if (++lyp->lay_stateid.seqid == 0)
 6827                         lyp->lay_stateid.seqid = 1;
 6828                 nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
 6829                     &lyp->lay_fh, lyp, 1, lyp->lay_type, p);
 6830                 nfsrv_freelayout(&loclyp, lyp);
 6831         }
 6832 }
 6833 
 6834 /*
 6835  * Do a recall callback to the client for this layout.
 6836  */
 6837 static int
 6838 nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp,
 6839     struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p)
 6840 {
 6841         struct nfsclient *clp;
 6842         int error;
 6843 
 6844         NFSD_DEBUG(4, "nfsrv_recalllayout\n");
 6845         error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0),
 6846             0, NULL, p);
 6847         NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error);
 6848         if (error != 0) {
 6849                 printf("nfsrv_recalllayout: getclient err=%d\n", error);
 6850                 return (error);
 6851         }
 6852         if ((clp->lc_flags & LCL_NFSV41) != 0) {
 6853                 error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL,
 6854                     stateidp, changed, fhp, NULL, NULL, laytype, p);
 6855                 /* If lyp != NULL, handle an error return here. */
 6856                 if (error != 0 && lyp != NULL) {
 6857                         NFSDRECALLLOCK();
 6858                         /*
 6859                          * Mark it returned, since no layout recall
 6860                          * has been done.
 6861                          * All errors seem to be non-recoverable, although
 6862                          * NFSERR_NOMATCHLAYOUT is a normal event.
 6863                          */
 6864                         if ((lyp->lay_flags & NFSLAY_RECALL) != 0) {
 6865                                 lyp->lay_flags |= NFSLAY_RETURNED;
 6866                                 wakeup(lyp);
 6867                         }
 6868                         NFSDRECALLUNLOCK();
 6869                         if (error != NFSERR_NOMATCHLAYOUT)
 6870                                 printf("nfsrv_recalllayout: err=%d\n", error);
 6871                 }
 6872         } else
 6873                 printf("nfsrv_recalllayout: clp not NFSv4.1\n");
 6874         return (error);
 6875 }
 6876 
 6877 /*
 6878  * Find a layout to recall when we exceed our high water mark.
 6879  */
 6880 void
 6881 nfsrv_recalloldlayout(NFSPROC_T *p)
 6882 {
 6883         struct nfslayouthash *lhyp;
 6884         struct nfslayout *lyp;
 6885         nfsquad_t clientid;
 6886         nfsv4stateid_t stateid;
 6887         fhandle_t fh;
 6888         int error, laytype, ret;
 6889 
 6890         lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize];
 6891         NFSLOCKLAYOUT(lhyp);
 6892         TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) {
 6893                 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
 6894                         lyp->lay_flags |= NFSLAY_CALLB;
 6895                         /*
 6896                          * The layout stateid.seqid needs to be incremented
 6897                          * before doing a LAYOUT_RECALL callback.
 6898                          */
 6899                         if (++lyp->lay_stateid.seqid == 0)
 6900                                 lyp->lay_stateid.seqid = 1;
 6901                         clientid = lyp->lay_clientid;
 6902                         stateid = lyp->lay_stateid;
 6903                         NFSBCOPY(&lyp->lay_fh, &fh, sizeof(fh));
 6904                         laytype = lyp->lay_type;
 6905                         break;
 6906                 }
 6907         }
 6908         NFSUNLOCKLAYOUT(lhyp);
 6909         if (lyp != NULL) {
 6910                 error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, 0,
 6911                     laytype, p);
 6912                 if (error != 0 && error != NFSERR_NOMATCHLAYOUT)
 6913                         NFSD_DEBUG(4, "recallold=%d\n", error);
 6914                 if (error != 0) {
 6915                         NFSLOCKLAYOUT(lhyp);
 6916                         /*
 6917                          * Since the hash list was unlocked, we need to
 6918                          * find it again.
 6919                          */
 6920                         ret = nfsrv_findlayout(&clientid, &fh, laytype, p,
 6921                             &lyp);
 6922                         if (ret == 0 &&
 6923                             (lyp->lay_flags & NFSLAY_CALLB) != 0 &&
 6924                             lyp->lay_stateid.other[0] == stateid.other[0] &&
 6925                             lyp->lay_stateid.other[1] == stateid.other[1] &&
 6926                             lyp->lay_stateid.other[2] == stateid.other[2]) {
 6927                                 /*
 6928                                  * The client no longer knows this layout, so
 6929                                  * it can be free'd now.
 6930                                  */
 6931                                 if (error == NFSERR_NOMATCHLAYOUT)
 6932                                         nfsrv_freelayout(&lhyp->list, lyp);
 6933                                 else {
 6934                                         /*
 6935                                          * Leave it to be tried later by
 6936                                          * clearing NFSLAY_CALLB and moving
 6937                                          * it to the head of the list, so it
 6938                                          * won't be tried again for a while.
 6939                                          */
 6940                                         lyp->lay_flags &= ~NFSLAY_CALLB;
 6941                                         TAILQ_REMOVE(&lhyp->list, lyp,
 6942                                             lay_list);
 6943                                         TAILQ_INSERT_HEAD(&lhyp->list, lyp,
 6944                                             lay_list);
 6945                                 }
 6946                         }
 6947                         NFSUNLOCKLAYOUT(lhyp);
 6948                 }
 6949         }
 6950 }
 6951 
 6952 /*
 6953  * Try and return layout(s).
 6954  */
 6955 int
 6956 nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp,
 6957     int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim,
 6958     int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp,
 6959     struct ucred *cred, NFSPROC_T *p)
 6960 {
 6961         struct nfsvattr na;
 6962         struct nfslayouthash *lhyp;
 6963         struct nfslayout *lyp;
 6964         fhandle_t fh;
 6965         int error = 0;
 6966 
 6967         *fndp = 0;
 6968         if (kind == NFSV4LAYOUTRET_FILE) {
 6969                 error = nfsvno_getfh(vp, &fh, p);
 6970                 if (error == 0) {
 6971                         error = nfsrv_updatemdsattr(vp, &na, p);
 6972                         if (error != 0)
 6973                                 printf("nfsrv_layoutreturn: updatemdsattr"
 6974                                     " failed=%d\n", error);
 6975                 }
 6976                 if (error == 0) {
 6977                         if (reclaim == newnfs_true) {
 6978                                 error = nfsrv_checkgrace(NULL, NULL,
 6979                                     NFSLCK_RECLAIM);
 6980                                 if (error != NFSERR_NOGRACE)
 6981                                         error = 0;
 6982                                 return (error);
 6983                         }
 6984                         lhyp = NFSLAYOUTHASH(&fh);
 6985                         NFSDRECALLLOCK();
 6986                         NFSLOCKLAYOUT(lhyp);
 6987                         error = nfsrv_findlayout(&nd->nd_clientid, &fh,
 6988                             layouttype, p, &lyp);
 6989                         NFSD_DEBUG(4, "layoutret findlay=%d\n", error);
 6990                         if (error == 0 &&
 6991                             stateidp->other[0] == lyp->lay_stateid.other[0] &&
 6992                             stateidp->other[1] == lyp->lay_stateid.other[1] &&
 6993                             stateidp->other[2] == lyp->lay_stateid.other[2]) {
 6994                                 NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d"
 6995                                     " %x %x %x laystateid %d %x %x %x"
 6996                                     " off=%ju len=%ju flgs=0x%x\n",
 6997                                     stateidp->seqid, stateidp->other[0],
 6998                                     stateidp->other[1], stateidp->other[2],
 6999                                     lyp->lay_stateid.seqid,
 7000                                     lyp->lay_stateid.other[0],
 7001                                     lyp->lay_stateid.other[1],
 7002                                     lyp->lay_stateid.other[2],
 7003                                     (uintmax_t)offset, (uintmax_t)len,
 7004                                     lyp->lay_flags);
 7005                                 if (++lyp->lay_stateid.seqid == 0)
 7006                                         lyp->lay_stateid.seqid = 1;
 7007                                 stateidp->seqid = lyp->lay_stateid.seqid;
 7008                                 if (offset == 0 && len == UINT64_MAX) {
 7009                                         if ((iomode & NFSLAYOUTIOMODE_READ) !=
 7010                                             0)
 7011                                                 lyp->lay_flags &= ~NFSLAY_READ;
 7012                                         if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
 7013                                                 lyp->lay_flags &= ~NFSLAY_RW;
 7014                                         if ((lyp->lay_flags & (NFSLAY_READ |
 7015                                             NFSLAY_RW)) == 0)
 7016                                                 nfsrv_freelayout(&lhyp->list,
 7017                                                     lyp);
 7018                                         else
 7019                                                 *fndp = 1;
 7020                                 } else
 7021                                         *fndp = 1;
 7022                         }
 7023                         NFSUNLOCKLAYOUT(lhyp);
 7024                         /* Search the nfsrv_recalllist for a match. */
 7025                         TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 7026                                 if (NFSBCMP(&lyp->lay_fh, &fh,
 7027                                     sizeof(fh)) == 0 &&
 7028                                     lyp->lay_clientid.qval ==
 7029                                     nd->nd_clientid.qval &&
 7030                                     stateidp->other[0] ==
 7031                                     lyp->lay_stateid.other[0] &&
 7032                                     stateidp->other[1] ==
 7033                                     lyp->lay_stateid.other[1] &&
 7034                                     stateidp->other[2] ==
 7035                                     lyp->lay_stateid.other[2]) {
 7036                                         lyp->lay_flags |= NFSLAY_RETURNED;
 7037                                         wakeup(lyp);
 7038                                         error = 0;
 7039                                 }
 7040                         }
 7041                         NFSDRECALLUNLOCK();
 7042                 }
 7043                 if (layouttype == NFSLAYOUT_FLEXFILE)
 7044                         nfsrv_flexlayouterr(nd, layp, maxcnt, p);
 7045         } else if (kind == NFSV4LAYOUTRET_FSID)
 7046                 nfsrv_freelayouts(&nd->nd_clientid,
 7047                     &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode);
 7048         else if (kind == NFSV4LAYOUTRET_ALL)
 7049                 nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode);
 7050         else
 7051                 error = NFSERR_INVAL;
 7052         if (error == -1)
 7053                 error = 0;
 7054         return (error);
 7055 }
 7056 
 7057 /*
 7058  * Look for an existing layout.
 7059  */
 7060 static int
 7061 nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
 7062     NFSPROC_T *p, struct nfslayout **lypp)
 7063 {
 7064         struct nfslayouthash *lhyp;
 7065         struct nfslayout *lyp;
 7066         int ret;
 7067 
 7068         *lypp = NULL;
 7069         ret = 0;
 7070         lhyp = NFSLAYOUTHASH(fhp);
 7071         TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
 7072                 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
 7073                     lyp->lay_clientid.qval == clientidp->qval &&
 7074                     lyp->lay_type == laytype)
 7075                         break;
 7076         }
 7077         if (lyp != NULL)
 7078                 *lypp = lyp;
 7079         else
 7080                 ret = -1;
 7081         return (ret);
 7082 }
 7083 
 7084 /*
 7085  * Add the new layout, as required.
 7086  */
 7087 static int
 7088 nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
 7089     nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p)
 7090 {
 7091         struct nfsclient *clp;
 7092         struct nfslayouthash *lhyp;
 7093         struct nfslayout *lyp, *nlyp;
 7094         fhandle_t *fhp;
 7095         int error;
 7096 
 7097         KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0,
 7098             ("nfsrv_layoutget: no nd_clientid\n"));
 7099         lyp = *lypp;
 7100         fhp = &lyp->lay_fh;
 7101         NFSLOCKSTATE();
 7102         error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
 7103             NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 7104         if (error != 0) {
 7105                 NFSUNLOCKSTATE();
 7106                 return (error);
 7107         }
 7108         lyp->lay_stateid.seqid = stateidp->seqid = 1;
 7109         lyp->lay_stateid.other[0] = stateidp->other[0] =
 7110             clp->lc_clientid.lval[0];
 7111         lyp->lay_stateid.other[1] = stateidp->other[1] =
 7112             clp->lc_clientid.lval[1];
 7113         lyp->lay_stateid.other[2] = stateidp->other[2] =
 7114             nfsrv_nextstateindex(clp);
 7115         NFSUNLOCKSTATE();
 7116 
 7117         lhyp = NFSLAYOUTHASH(fhp);
 7118         NFSLOCKLAYOUT(lhyp);
 7119         TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) {
 7120                 if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
 7121                     nlyp->lay_clientid.qval == nd->nd_clientid.qval)
 7122                         break;
 7123         }
 7124         if (nlyp != NULL) {
 7125                 /* A layout already exists, so use it. */
 7126                 nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW));
 7127                 NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen);
 7128                 *layoutlenp = nlyp->lay_layoutlen;
 7129                 if (++nlyp->lay_stateid.seqid == 0)
 7130                         nlyp->lay_stateid.seqid = 1;
 7131                 stateidp->seqid = nlyp->lay_stateid.seqid;
 7132                 stateidp->other[0] = nlyp->lay_stateid.other[0];
 7133                 stateidp->other[1] = nlyp->lay_stateid.other[1];
 7134                 stateidp->other[2] = nlyp->lay_stateid.other[2];
 7135                 NFSUNLOCKLAYOUT(lhyp);
 7136                 return (0);
 7137         }
 7138 
 7139         /* Insert the new layout in the lists. */
 7140         *lypp = NULL;
 7141         atomic_add_int(&nfsrv_layoutcnt, 1);
 7142         NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
 7143         *layoutlenp = lyp->lay_layoutlen;
 7144         TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list);
 7145         NFSUNLOCKLAYOUT(lhyp);
 7146         return (0);
 7147 }
 7148 
 7149 /*
 7150  * Get the devinfo for a deviceid.
 7151  */
 7152 int
 7153 nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt,
 7154     uint32_t *notify, int *devaddrlen, char **devaddr)
 7155 {
 7156         struct nfsdevice *ds;
 7157 
 7158         if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype !=
 7159              NFSLAYOUT_FLEXFILE) ||
 7160             (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES))
 7161                 return (NFSERR_UNKNLAYOUTTYPE);
 7162 
 7163         /*
 7164          * Now, search for the device id.  Note that the structures won't go
 7165          * away, but the order changes in the list.  As such, the lock only
 7166          * needs to be held during the search through the list.
 7167          */
 7168         NFSDDSLOCK();
 7169         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 7170                 if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 &&
 7171                     ds->nfsdev_nmp != NULL)
 7172                         break;
 7173         }
 7174         NFSDDSUNLOCK();
 7175         if (ds == NULL)
 7176                 return (NFSERR_NOENT);
 7177 
 7178         /* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */
 7179         *devaddrlen = 0;
 7180         if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
 7181                 *devaddrlen = ds->nfsdev_fileaddrlen;
 7182                 *devaddr = ds->nfsdev_fileaddr;
 7183         } else if (layouttype == NFSLAYOUT_FLEXFILE) {
 7184                 *devaddrlen = ds->nfsdev_flexaddrlen;
 7185                 *devaddr = ds->nfsdev_flexaddr;
 7186         }
 7187         if (*devaddrlen == 0)
 7188                 return (NFSERR_UNKNLAYOUTTYPE);
 7189 
 7190         /*
 7191          * The XDR overhead is 3 unsigned values: layout_type,
 7192          * length_of_address and notify bitmap.
 7193          * If the notify array is changed to not all zeros, the
 7194          * count of unsigned values must be increased.
 7195          */
 7196         if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) +
 7197             3 * NFSX_UNSIGNED) {
 7198                 *maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED;
 7199                 return (NFSERR_TOOSMALL);
 7200         }
 7201         return (0);
 7202 }
 7203 
 7204 /*
 7205  * Free a list of layout state structures.
 7206  */
 7207 static void
 7208 nfsrv_freelayoutlist(nfsquad_t clientid)
 7209 {
 7210         struct nfslayouthash *lhyp;
 7211         struct nfslayout *lyp, *nlyp;
 7212         int i;
 7213 
 7214         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7215                 lhyp = &nfslayouthash[i];
 7216                 NFSLOCKLAYOUT(lhyp);
 7217                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7218                         if (lyp->lay_clientid.qval == clientid.qval)
 7219                                 nfsrv_freelayout(&lhyp->list, lyp);
 7220                 }
 7221                 NFSUNLOCKLAYOUT(lhyp);
 7222         }
 7223 }
 7224 
 7225 /*
 7226  * Free up a layout.
 7227  */
 7228 static void
 7229 nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp)
 7230 {
 7231 
 7232         NFSD_DEBUG(4, "Freelayout=%p\n", lyp);
 7233         atomic_add_int(&nfsrv_layoutcnt, -1);
 7234         TAILQ_REMOVE(lhp, lyp, lay_list);
 7235         free(lyp, M_NFSDSTATE);
 7236 }
 7237 
 7238 /*
 7239  * Free up a device id.
 7240  */
 7241 void
 7242 nfsrv_freeonedevid(struct nfsdevice *ds)
 7243 {
 7244         int i;
 7245 
 7246         atomic_add_int(&nfsrv_devidcnt, -1);
 7247         vrele(ds->nfsdev_dvp);
 7248         for (i = 0; i < nfsrv_dsdirsize; i++)
 7249                 if (ds->nfsdev_dsdir[i] != NULL)
 7250                         vrele(ds->nfsdev_dsdir[i]);
 7251         free(ds->nfsdev_fileaddr, M_NFSDSTATE);
 7252         free(ds->nfsdev_flexaddr, M_NFSDSTATE);
 7253         free(ds->nfsdev_host, M_NFSDSTATE);
 7254         free(ds, M_NFSDSTATE);
 7255 }
 7256 
 7257 /*
 7258  * Free up a device id and its mirrors.
 7259  */
 7260 static void
 7261 nfsrv_freedevid(struct nfsdevice *ds)
 7262 {
 7263 
 7264         TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
 7265         nfsrv_freeonedevid(ds);
 7266 }
 7267 
 7268 /*
 7269  * Free all layouts and device ids.
 7270  * Done when the nfsd threads are shut down since there may be a new
 7271  * modified device id list created when the nfsd is restarted.
 7272  */
 7273 void
 7274 nfsrv_freealllayoutsanddevids(void)
 7275 {
 7276         struct nfsdontlist *mrp, *nmrp;
 7277         struct nfslayout *lyp, *nlyp;
 7278 
 7279         /* Get rid of the deviceid structures. */
 7280         nfsrv_freealldevids();
 7281         TAILQ_INIT(&nfsrv_devidhead);
 7282         nfsrv_devidcnt = 0;
 7283 
 7284         /* Get rid of all layouts. */
 7285         nfsrv_freealllayouts();
 7286 
 7287         /* Get rid of any nfsdontlist entries. */
 7288         LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp)
 7289                 free(mrp, M_NFSDSTATE);
 7290         LIST_INIT(&nfsrv_dontlisthead);
 7291         nfsrv_dontlistlen = 0;
 7292 
 7293         /* Free layouts in the recall list. */
 7294         TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp)
 7295                 nfsrv_freelayout(&nfsrv_recalllisthead, lyp);
 7296         TAILQ_INIT(&nfsrv_recalllisthead);
 7297 }
 7298 
 7299 /*
 7300  * Free layouts that match the arguments.
 7301  */
 7302 static void
 7303 nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode)
 7304 {
 7305         struct nfslayouthash *lhyp;
 7306         struct nfslayout *lyp, *nlyp;
 7307         int i;
 7308 
 7309         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7310                 lhyp = &nfslayouthash[i];
 7311                 NFSLOCKLAYOUT(lhyp);
 7312                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7313                         if (clid->qval != lyp->lay_clientid.qval)
 7314                                 continue;
 7315                         if (fs != NULL && (fs->val[0] != lyp->lay_fsid.val[0] ||
 7316                             fs->val[1] != lyp->lay_fsid.val[1]))
 7317                                 continue;
 7318                         if (laytype != lyp->lay_type)
 7319                                 continue;
 7320                         if ((iomode & NFSLAYOUTIOMODE_READ) != 0)
 7321                                 lyp->lay_flags &= ~NFSLAY_READ;
 7322                         if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
 7323                                 lyp->lay_flags &= ~NFSLAY_RW;
 7324                         if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0)
 7325                                 nfsrv_freelayout(&lhyp->list, lyp);
 7326                 }
 7327                 NFSUNLOCKLAYOUT(lhyp);
 7328         }
 7329 }
 7330 
 7331 /*
 7332  * Free all layouts for the argument file.
 7333  */
 7334 void
 7335 nfsrv_freefilelayouts(fhandle_t *fhp)
 7336 {
 7337         struct nfslayouthash *lhyp;
 7338         struct nfslayout *lyp, *nlyp;
 7339 
 7340         lhyp = NFSLAYOUTHASH(fhp);
 7341         NFSLOCKLAYOUT(lhyp);
 7342         TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 7343                 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0)
 7344                         nfsrv_freelayout(&lhyp->list, lyp);
 7345         }
 7346         NFSUNLOCKLAYOUT(lhyp);
 7347 }
 7348 
 7349 /*
 7350  * Free all layouts.
 7351  */
 7352 static void
 7353 nfsrv_freealllayouts(void)
 7354 {
 7355         struct nfslayouthash *lhyp;
 7356         struct nfslayout *lyp, *nlyp;
 7357         int i;
 7358 
 7359         for (i = 0; i < nfsrv_layouthashsize; i++) {
 7360                 lhyp = &nfslayouthash[i];
 7361                 NFSLOCKLAYOUT(lhyp);
 7362                 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp)
 7363                         nfsrv_freelayout(&lhyp->list, lyp);
 7364                 NFSUNLOCKLAYOUT(lhyp);
 7365         }
 7366 }
 7367 
 7368 /*
 7369  * Look up the mount path for the DS server.
 7370  */
 7371 static int
 7372 nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
 7373     struct nfsdevice **dsp)
 7374 {
 7375         struct nameidata nd;
 7376         struct nfsdevice *ds;
 7377         struct mount *mp;
 7378         int error, i;
 7379         char *dsdirpath;
 7380         size_t dsdirsize;
 7381 
 7382         NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp);
 7383         *dsp = NULL;
 7384         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 7385             dspathp, p);
 7386         error = namei(&nd);
 7387         NFSD_DEBUG(4, "lookup=%d\n", error);
 7388         if (error != 0)
 7389                 return (error);
 7390         if (nd.ni_vp->v_type != VDIR) {
 7391                 vput(nd.ni_vp);
 7392                 NFSD_DEBUG(4, "dspath not dir\n");
 7393                 return (ENOTDIR);
 7394         }
 7395         if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 7396                 vput(nd.ni_vp);
 7397                 NFSD_DEBUG(4, "dspath not an NFS mount\n");
 7398                 return (ENXIO);
 7399         }
 7400 
 7401         /*
 7402          * Allocate a DS server structure with the NFS mounted directory
 7403          * vnode reference counted, so that a non-forced dismount will
 7404          * fail with EBUSY.
 7405          * This structure is always linked into the list, even if an error
 7406          * is being returned.  The caller will free the entire list upon
 7407          * an error return.
 7408          */
 7409         *dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t),
 7410             M_NFSDSTATE, M_WAITOK | M_ZERO);
 7411         ds->nfsdev_dvp = nd.ni_vp;
 7412         ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount);
 7413         NFSVOPUNLOCK(nd.ni_vp, 0);
 7414 
 7415         dsdirsize = strlen(dspathp) + 16;
 7416         dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK);
 7417         /* Now, create the DS directory structures. */
 7418         for (i = 0; i < nfsrv_dsdirsize; i++) {
 7419                 snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i);
 7420                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 7421                     UIO_SYSSPACE, dsdirpath, p);
 7422                 error = namei(&nd);
 7423                 NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error);
 7424                 if (error != 0)
 7425                         break;
 7426                 if (nd.ni_vp->v_type != VDIR) {
 7427                         vput(nd.ni_vp);
 7428                         error = ENOTDIR;
 7429                         NFSD_DEBUG(4, "dsdirpath not a VDIR\n");
 7430                         break;
 7431                 }
 7432                 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 7433                         vput(nd.ni_vp);
 7434                         error = ENXIO;
 7435                         NFSD_DEBUG(4, "dsdirpath not an NFS mount\n");
 7436                         break;
 7437                 }
 7438                 ds->nfsdev_dsdir[i] = nd.ni_vp;
 7439                 NFSVOPUNLOCK(nd.ni_vp, 0);
 7440         }
 7441         free(dsdirpath, M_TEMP);
 7442 
 7443         if (strlen(mdspathp) > 0) {
 7444                 /*
 7445                  * This DS stores file for a specific MDS exported file
 7446                  * system.
 7447                  */
 7448                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 7449                     UIO_SYSSPACE, mdspathp, p);
 7450                 error = namei(&nd);
 7451                 NFSD_DEBUG(4, "mds lookup=%d\n", error);
 7452                 if (error != 0)
 7453                         goto out;
 7454                 if (nd.ni_vp->v_type != VDIR) {
 7455                         vput(nd.ni_vp);
 7456                         error = ENOTDIR;
 7457                         NFSD_DEBUG(4, "mdspath not dir\n");
 7458                         goto out;
 7459                 }
 7460                 mp = nd.ni_vp->v_mount;
 7461                 if ((mp->mnt_flag & MNT_EXPORTED) == 0) {
 7462                         vput(nd.ni_vp);
 7463                         error = ENXIO;
 7464                         NFSD_DEBUG(4, "mdspath not an exported fs\n");
 7465                         goto out;
 7466                 }
 7467                 ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid;
 7468                 ds->nfsdev_mdsisset = 1;
 7469                 vput(nd.ni_vp);
 7470         }
 7471 
 7472 out:
 7473         TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
 7474         atomic_add_int(&nfsrv_devidcnt, 1);
 7475         return (error);
 7476 }
 7477 
 7478 /*
 7479  * Look up the mount path for the DS server and delete it.
 7480  */
 7481 int
 7482 nfsrv_deldsserver(int op, char *dspathp, NFSPROC_T *p)
 7483 {
 7484         struct mount *mp;
 7485         struct nfsmount *nmp;
 7486         struct nfsdevice *ds;
 7487         int error;
 7488 
 7489         NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp);
 7490         /*
 7491          * Search for the path in the mount list.  Avoid looking the path
 7492          * up, since this mount point may be hung, with associated locked
 7493          * vnodes, etc.
 7494          * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked
 7495          * until this completes.
 7496          * As noted in the man page, this should be done before any forced
 7497          * dismount on the mount point, but at least the handshake on
 7498          * NFSMNTP_CANCELRPCS should make it safe.
 7499          */
 7500         error = 0;
 7501         ds = NULL;
 7502         nmp = NULL;
 7503         mtx_lock(&mountlist_mtx);
 7504         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 7505                 if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 &&
 7506                     strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 &&
 7507                     mp->mnt_data != NULL) {
 7508                         nmp = VFSTONFS(mp);
 7509                         NFSLOCKMNT(nmp);
 7510                         if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 7511                              NFSMNTP_CANCELRPCS)) == 0) {
 7512                                 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 7513                                 NFSUNLOCKMNT(nmp);
 7514                         } else {
 7515                                 NFSUNLOCKMNT(nmp);
 7516                                 nmp = NULL;
 7517                         }
 7518                         break;
 7519                 }
 7520         }
 7521         mtx_unlock(&mountlist_mtx);
 7522 
 7523         if (nmp != NULL) {
 7524                 ds = nfsrv_deldsnmp(op, nmp, p);
 7525                 NFSD_DEBUG(4, "deldsnmp=%p\n", ds);
 7526                 if (ds != NULL) {
 7527                         nfsrv_killrpcs(nmp);
 7528                         NFSD_DEBUG(4, "aft killrpcs\n");
 7529                 } else
 7530                         error = ENXIO;
 7531                 NFSLOCKMNT(nmp);
 7532                 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 7533                 wakeup(nmp);
 7534                 NFSUNLOCKMNT(nmp);
 7535         } else
 7536                 error = EINVAL;
 7537         return (error);
 7538 }
 7539 
 7540 /*
 7541  * Search for and remove a DS entry which matches the "nmp" argument.
 7542  * The nfsdevice structure pointer is returned so that the caller can
 7543  * free it via nfsrv_freeonedevid().
 7544  * For the forced case, do not try to do LayoutRecalls, since the server
 7545  * must be shut down now anyhow.
 7546  */
 7547 struct nfsdevice *
 7548 nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p)
 7549 {
 7550         struct nfsdevice *fndds;
 7551 
 7552         NFSD_DEBUG(4, "deldsdvp\n");
 7553         NFSDDSLOCK();
 7554         if (op == PNFSDOP_FORCEDELDS)
 7555                 fndds = nfsv4_findmirror(nmp);
 7556         else
 7557                 fndds = nfsrv_findmirroredds(nmp);
 7558         if (fndds != NULL)
 7559                 nfsrv_deleteds(fndds);
 7560         NFSDDSUNLOCK();
 7561         if (fndds != NULL) {
 7562                 if (op != PNFSDOP_FORCEDELDS)
 7563                         nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
 7564                 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
 7565         }
 7566         return (fndds);
 7567 }
 7568 
 7569 /*
 7570  * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid.
 7571  * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount
 7572  * point.
 7573  * Also, returns an error instead of the nfsdevice found.
 7574  */
 7575 static int
 7576 nfsrv_delds(char *devid, NFSPROC_T *p)
 7577 {
 7578         struct nfsdevice *ds, *fndds;
 7579         struct nfsmount *nmp;
 7580         int fndmirror;
 7581 
 7582         NFSD_DEBUG(4, "delds\n");
 7583         /*
 7584          * Search the DS server list for a match with devid.
 7585          * Remove the DS entry if found and there is a mirror.
 7586          */
 7587         fndds = NULL;
 7588         nmp = NULL;
 7589         fndmirror = 0;
 7590         NFSDDSLOCK();
 7591         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 7592                 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 &&
 7593                     ds->nfsdev_nmp != NULL) {
 7594                         NFSD_DEBUG(4, "fnd main ds\n");
 7595                         fndds = ds;
 7596                         break;
 7597                 }
 7598         }
 7599         if (fndds == NULL) {
 7600                 NFSDDSUNLOCK();
 7601                 return (ENXIO);
 7602         }
 7603         if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
 7604                 fndmirror = 1;
 7605         else if (fndds->nfsdev_mdsisset != 0) {
 7606                 /* For the fsid is set case, search for a mirror. */
 7607                 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 7608                         if (ds != fndds && ds->nfsdev_nmp != NULL &&
 7609                             ds->nfsdev_mdsisset != 0 &&
 7610                             ds->nfsdev_mdsfsid.val[0] ==
 7611                             fndds->nfsdev_mdsfsid.val[0] &&
 7612                             ds->nfsdev_mdsfsid.val[1] ==
 7613                             fndds->nfsdev_mdsfsid.val[1]) {
 7614                                 fndmirror = 1;
 7615                                 break;
 7616                         }
 7617                 }
 7618         }
 7619         if (fndmirror != 0) {
 7620                 nmp = fndds->nfsdev_nmp;
 7621                 NFSLOCKMNT(nmp);
 7622                 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 7623                      NFSMNTP_CANCELRPCS)) == 0) {
 7624                         nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 7625                         NFSUNLOCKMNT(nmp);
 7626                         nfsrv_deleteds(fndds);
 7627                 } else {
 7628                         NFSUNLOCKMNT(nmp);
 7629                         nmp = NULL;
 7630                 }
 7631         }
 7632         NFSDDSUNLOCK();
 7633         if (nmp != NULL) {
 7634                 nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
 7635                 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
 7636                 nfsrv_killrpcs(nmp);
 7637                 NFSLOCKMNT(nmp);
 7638                 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 7639                 wakeup(nmp);
 7640                 NFSUNLOCKMNT(nmp);
 7641                 return (0);
 7642         }
 7643         return (ENXIO);
 7644 }
 7645 
 7646 /*
 7647  * Mark a DS as disabled by setting nfsdev_nmp = NULL.
 7648  */
 7649 static void
 7650 nfsrv_deleteds(struct nfsdevice *fndds)
 7651 {
 7652 
 7653         NFSD_DEBUG(4, "deleteds: deleting a mirror\n");
 7654         fndds->nfsdev_nmp = NULL;
 7655         if (fndds->nfsdev_mdsisset == 0)
 7656                 nfsrv_faildscnt--;
 7657 }
 7658 
 7659 /*
 7660  * Fill in the addr structures for the File and Flex File layouts.
 7661  */
 7662 static void
 7663 nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
 7664 {
 7665         uint32_t *tl;
 7666         char *netprot;
 7667         int addrlen;
 7668         static uint64_t new_devid = 0;
 7669 
 7670         if (strchr(addr, ':') != NULL)
 7671                 netprot = "tcp6";
 7672         else
 7673                 netprot = "tcp";
 7674 
 7675         /* Fill in the device id. */
 7676         NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time));
 7677         new_devid++;
 7678         NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)],
 7679             sizeof(new_devid));
 7680 
 7681         /*
 7682          * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4
 7683          * as defined in RFC5661) in XDR.
 7684          */
 7685         addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
 7686             6 * NFSX_UNSIGNED;
 7687         NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot);
 7688         ds->nfsdev_fileaddrlen = addrlen;
 7689         tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
 7690         ds->nfsdev_fileaddr = (char *)tl;
 7691         *tl++ = txdr_unsigned(1);               /* One stripe with index 0. */
 7692         *tl++ = 0;
 7693         *tl++ = txdr_unsigned(1);               /* One multipath list */
 7694         *tl++ = txdr_unsigned(1);               /* with one entry in it. */
 7695         /* The netaddr for this one entry. */
 7696         *tl++ = txdr_unsigned(strlen(netprot));
 7697         NFSBCOPY(netprot, tl, strlen(netprot));
 7698         tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
 7699         *tl++ = txdr_unsigned(strlen(addr));
 7700         NFSBCOPY(addr, tl, strlen(addr));
 7701 
 7702         /*
 7703          * Fill in the flex file addr (actually the ff_device_addr4
 7704          * as defined for Flexible File Layout) in XDR.
 7705          */
 7706         addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
 7707             9 * NFSX_UNSIGNED;
 7708         ds->nfsdev_flexaddrlen = addrlen;
 7709         tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
 7710         ds->nfsdev_flexaddr = (char *)tl;
 7711         *tl++ = txdr_unsigned(1);               /* One multipath entry. */
 7712         /* The netaddr for this one entry. */
 7713         *tl++ = txdr_unsigned(strlen(netprot));
 7714         NFSBCOPY(netprot, tl, strlen(netprot));
 7715         tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
 7716         *tl++ = txdr_unsigned(strlen(addr));
 7717         NFSBCOPY(addr, tl, strlen(addr));
 7718         tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED);
 7719         *tl++ = txdr_unsigned(1);               /* One NFS Version. */
 7720         *tl++ = txdr_unsigned(NFS_VER4);        /* NFSv4. */
 7721         *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
 7722         *tl++ = txdr_unsigned(NFS_SRVMAXIO);    /* DS max rsize. */
 7723         *tl++ = txdr_unsigned(NFS_SRVMAXIO);    /* DS max wsize. */
 7724         *tl = newnfs_true;                      /* Tightly coupled. */
 7725 
 7726         ds->nfsdev_hostnamelen = strlen(dnshost);
 7727         ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE,
 7728             M_WAITOK);
 7729         NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1);
 7730 }
 7731 
 7732 
 7733 /*
 7734  * Create the device id list.
 7735  * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument
 7736  * is misconfigured.
 7737  */
 7738 int
 7739 nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p)
 7740 {
 7741         struct nfsdevice *ds;
 7742         char *addrp, *dnshostp, *dspathp, *mdspathp;
 7743         int error, i;
 7744 
 7745         addrp = args->addr;
 7746         dnshostp = args->dnshost;
 7747         dspathp = args->dspath;
 7748         mdspathp = args->mdspath;
 7749         nfsrv_maxpnfsmirror = args->mirrorcnt;
 7750         if (addrp == NULL || dnshostp == NULL || dspathp == NULL ||
 7751             mdspathp == NULL)
 7752                 return (0);
 7753 
 7754         /*
 7755          * Loop around for each nul-terminated string in args->addr,
 7756          * args->dnshost, args->dnspath and args->mdspath.
 7757          */
 7758         while (addrp < (args->addr + args->addrlen) &&
 7759             dnshostp < (args->dnshost + args->dnshostlen) &&
 7760             dspathp < (args->dspath + args->dspathlen) &&
 7761             mdspathp < (args->mdspath + args->mdspathlen)) {
 7762                 error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds);
 7763                 if (error != 0) {
 7764                         /* Free all DS servers. */
 7765                         nfsrv_freealldevids();
 7766                         nfsrv_devidcnt = 0;
 7767                         return (ENXIO);
 7768                 }
 7769                 nfsrv_allocdevid(ds, addrp, dnshostp);
 7770                 addrp += (strlen(addrp) + 1);
 7771                 dnshostp += (strlen(dnshostp) + 1);
 7772                 dspathp += (strlen(dspathp) + 1);
 7773                 mdspathp += (strlen(mdspathp) + 1);
 7774         }
 7775         if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) {
 7776                 /* Free all DS servers. */
 7777                 nfsrv_freealldevids();
 7778                 nfsrv_devidcnt = 0;
 7779                 nfsrv_maxpnfsmirror = 1;
 7780                 return (ENXIO);
 7781         }
 7782         /* We can fail at most one less DS than the mirror level. */
 7783         nfsrv_faildscnt = nfsrv_maxpnfsmirror - 1;
 7784 
 7785         /*
 7786          * Allocate the nfslayout hash table now, since this is a pNFS server.
 7787          * Make it 1% of the high water mark and at least 100.
 7788          */
 7789         if (nfslayouthash == NULL) {
 7790                 nfsrv_layouthashsize = nfsrv_layouthighwater / 100;
 7791                 if (nfsrv_layouthashsize < 100)
 7792                         nfsrv_layouthashsize = 100;
 7793                 nfslayouthash = mallocarray(nfsrv_layouthashsize,
 7794                     sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK |
 7795                     M_ZERO);
 7796                 for (i = 0; i < nfsrv_layouthashsize; i++) {
 7797                         mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF);
 7798                         TAILQ_INIT(&nfslayouthash[i].list);
 7799                 }
 7800         }
 7801         return (0);
 7802 }
 7803 
 7804 /*
 7805  * Free all device ids.
 7806  */
 7807 static void
 7808 nfsrv_freealldevids(void)
 7809 {
 7810         struct nfsdevice *ds, *nds;
 7811 
 7812         TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds)
 7813                 nfsrv_freedevid(ds);
 7814 }
 7815 
 7816 /*
 7817  * Check to see if there is a Read/Write Layout plus either:
 7818  * - A Write Delegation
 7819  * or
 7820  * - An Open with Write_access.
 7821  * Return 1 if this is the case and 0 otherwise.
 7822  * This function is used by nfsrv_proxyds() to decide if doing a Proxy
 7823  * Getattr RPC to the Data Server (DS) is necessary.
 7824  */
 7825 #define NFSCLIDVECSIZE  6
 7826 APPLESTATIC int
 7827 nfsrv_checkdsattr(struct nfsrv_descript *nd, vnode_t vp, NFSPROC_T *p)
 7828 {
 7829         fhandle_t fh, *tfhp;
 7830         struct nfsstate *stp;
 7831         struct nfslayout *lyp;
 7832         struct nfslayouthash *lhyp;
 7833         struct nfslockhashhead *hp;
 7834         struct nfslockfile *lfp;
 7835         nfsquad_t clid[NFSCLIDVECSIZE];
 7836         int clidcnt, ret;
 7837 
 7838         ret = nfsvno_getfh(vp, &fh, p);
 7839         if (ret != 0)
 7840                 return (0);
 7841 
 7842         /* First check for a Read/Write Layout. */
 7843         clidcnt = 0;
 7844         lhyp = NFSLAYOUTHASH(&fh);
 7845         NFSLOCKLAYOUT(lhyp);
 7846         TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
 7847                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 7848                     ((lyp->lay_flags & NFSLAY_RW) != 0 ||
 7849                      ((lyp->lay_flags & NFSLAY_READ) != 0 &&
 7850                       nfsrv_pnfsatime != 0))) {
 7851                         if (clidcnt < NFSCLIDVECSIZE)
 7852                                 clid[clidcnt].qval = lyp->lay_clientid.qval;
 7853                         clidcnt++;
 7854                 }
 7855         }
 7856         NFSUNLOCKLAYOUT(lhyp);
 7857         if (clidcnt == 0) {
 7858                 /* None found, so return 0. */
 7859                 return (0);
 7860         }
 7861 
 7862         /* Get the nfslockfile for this fh. */
 7863         NFSLOCKSTATE();
 7864         hp = NFSLOCKHASH(&fh);
 7865         LIST_FOREACH(lfp, hp, lf_hash) {
 7866                 tfhp = &lfp->lf_fh;
 7867                 if (NFSVNO_CMPFH(&fh, tfhp))
 7868                         break;
 7869         }
 7870         if (lfp == NULL) {
 7871                 /* None found, so return 0. */
 7872                 NFSUNLOCKSTATE();
 7873                 return (0);
 7874         }
 7875 
 7876         /* Now, look for a Write delegation for this clientid. */
 7877         LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
 7878                 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
 7879                     nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
 7880                         break;
 7881         }
 7882         if (stp != NULL) {
 7883                 /* Found one, so return 1. */
 7884                 NFSUNLOCKSTATE();
 7885                 return (1);
 7886         }
 7887 
 7888         /* No Write delegation, so look for an Open with Write_access. */
 7889         LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
 7890                 KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0,
 7891                     ("nfsrv_checkdsattr: Non-open in Open list\n"));
 7892                 if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 &&
 7893                     nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
 7894                         break;
 7895         }
 7896         NFSUNLOCKSTATE();
 7897         if (stp != NULL)
 7898                 return (1);
 7899         return (0);
 7900 }
 7901 
 7902 /*
 7903  * Look for a matching clientid in the vector. Return 1 if one might match.
 7904  */
 7905 static int
 7906 nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt)
 7907 {
 7908         int i;
 7909 
 7910         /* If too many for the vector, return 1 since there might be a match. */
 7911         if (clidcnt > NFSCLIDVECSIZE)
 7912                 return (1);
 7913 
 7914         for (i = 0; i < clidcnt; i++)
 7915                 if (clidvec[i].qval == clid.qval)
 7916                         return (1);
 7917         return (0);
 7918 }
 7919 
 7920 /*
 7921  * Check the don't list for "vp" and see if issuing an rw layout is allowed.
 7922  * Return 1 if issuing an rw layout isn't allowed, 0 otherwise.
 7923  */
 7924 static int
 7925 nfsrv_dontlayout(fhandle_t *fhp)
 7926 {
 7927         struct nfsdontlist *mrp;
 7928         int ret;
 7929 
 7930         if (nfsrv_dontlistlen == 0)
 7931                 return (0);
 7932         ret = 0;
 7933         NFSDDONTLISTLOCK();
 7934         LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
 7935                 if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 &&
 7936                     (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) {
 7937                         ret = 1;
 7938                         break;
 7939                 }
 7940         }
 7941         NFSDDONTLISTUNLOCK();
 7942         return (ret);
 7943 }
 7944 
 7945 #define PNFSDS_COPYSIZ  65536
 7946 /*
 7947  * Create a new file on a DS and copy the contents of an extant DS file to it.
 7948  * This can be used for recovery of a DS file onto a recovered DS.
 7949  * The steps are:
 7950  * - When called, the MDS file's vnode is locked, blocking LayoutGet operations.
 7951  * - Disable issuing of read/write layouts for the file via the nfsdontlist,
 7952  *   so that they will be disabled after the MDS file's vnode is unlocked.
 7953  * - Set up the nfsrv_recalllist so that recall of read/write layouts can
 7954  *   be done.
 7955  * - Unlock the MDS file's vnode, so that the client(s) can perform proxied
 7956  *   writes, LayoutCommits and LayoutReturns for the file when completing the
 7957  *   LayoutReturn requested by the LayoutRecall callback.
 7958  * - Issue a LayoutRecall callback for all read/write layouts and wait for
 7959  *   them to be returned. (If the LayoutRecall callback replies
 7960  *   NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.)
 7961  * - Exclusively lock the MDS file's vnode.  This ensures that no proxied
 7962  *   writes are in progress or can occur during the DS file copy.
 7963  *   It also blocks Setattr operations.
 7964  * - Create the file on the recovered mirror.
 7965  * - Copy the file from the operational DS.
 7966  * - Copy any ACL from the MDS file to the new DS file.
 7967  * - Set the modify time of the new DS file to that of the MDS file.
 7968  * - Update the extended attribute for the MDS file.
 7969  * - Enable issuing of rw layouts by deleting the nfsdontlist entry.
 7970  * - The caller will unlock the MDS file's vnode allowing operations
 7971  *   to continue normally, since it is now on the mirror again.
 7972  */
 7973 int
 7974 nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds,
 7975     struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt,
 7976     struct ucred *cred, NFSPROC_T *p)
 7977 {
 7978         struct nfsdontlist *mrp, *nmrp;
 7979         struct nfslayouthash *lhyp;
 7980         struct nfslayout *lyp, *nlyp;
 7981         struct nfslayouthead thl;
 7982         struct mount *mp, *tvmp;
 7983         struct acl *aclp;
 7984         struct vattr va;
 7985         struct timespec mtime;
 7986         fhandle_t fh;
 7987         vnode_t tvp;
 7988         off_t rdpos, wrpos;
 7989         ssize_t aresid;
 7990         char *dat;
 7991         int didprintf, ret, retacl, xfer;
 7992 
 7993         ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp");
 7994         ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp");
 7995         /*
 7996          * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag
 7997          * so that no more RW layouts will get issued.
 7998          */
 7999         ret = nfsvno_getfh(vp, &fh, p);
 8000         if (ret != 0) {
 8001                 NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret);
 8002                 return (ret);
 8003         }
 8004         nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK);
 8005         nmrp->nfsmr_flags = NFSMR_DONTLAYOUT;
 8006         NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh));
 8007         NFSDDONTLISTLOCK();
 8008         LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
 8009                 if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0)
 8010                         break;
 8011         }
 8012         if (mrp == NULL) {
 8013                 LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list);
 8014                 mrp = nmrp;
 8015                 nmrp = NULL;
 8016                 nfsrv_dontlistlen++;
 8017                 NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n");
 8018         } else {
 8019                 NFSDDONTLISTUNLOCK();
 8020                 free(nmrp, M_NFSDSTATE);
 8021                 NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n");
 8022                 return (ENXIO);
 8023         }
 8024         NFSDDONTLISTUNLOCK();
 8025 
 8026         /*
 8027          * Search for all RW layouts for this file.  Move them to the
 8028          * recall list, so they can be recalled and their return noted.
 8029          */
 8030         lhyp = NFSLAYOUTHASH(&fh);
 8031         NFSDRECALLLOCK();
 8032         NFSLOCKLAYOUT(lhyp);
 8033         TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
 8034                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 8035                     (lyp->lay_flags & NFSLAY_RW) != 0) {
 8036                         TAILQ_REMOVE(&lhyp->list, lyp, lay_list);
 8037                         TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list);
 8038                         lyp->lay_trycnt = 0;
 8039                 }
 8040         }
 8041         NFSUNLOCKLAYOUT(lhyp);
 8042         NFSDRECALLUNLOCK();
 8043 
 8044         ret = 0;
 8045         mp = tvmp = NULL;
 8046         didprintf = 0;
 8047         TAILQ_INIT(&thl);
 8048         /* Unlock the MDS vp, so that a LayoutReturn can be done on it. */
 8049         NFSVOPUNLOCK(vp, 0);
 8050         /* Now, do a recall for all layouts not yet recalled. */
 8051 tryagain:
 8052         NFSDRECALLLOCK();
 8053         TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 8054                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
 8055                     (lyp->lay_flags & NFSLAY_RECALL) == 0) {
 8056                         lyp->lay_flags |= NFSLAY_RECALL;
 8057                         /*
 8058                          * The layout stateid.seqid needs to be incremented
 8059                          * before doing a LAYOUT_RECALL callback.
 8060                          */
 8061                         if (++lyp->lay_stateid.seqid == 0)
 8062                                 lyp->lay_stateid.seqid = 1;
 8063                         NFSDRECALLUNLOCK();
 8064                         nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
 8065                             &lyp->lay_fh, lyp, 0, lyp->lay_type, p);
 8066                         NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n");
 8067                         goto tryagain;
 8068                 }
 8069         }
 8070 
 8071         /* Now wait for them to be returned. */
 8072 tryagain2:
 8073         TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
 8074                 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) {
 8075                         if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) {
 8076                                 TAILQ_REMOVE(&nfsrv_recalllisthead, lyp,
 8077                                     lay_list);
 8078                                 TAILQ_INSERT_HEAD(&thl, lyp, lay_list);
 8079                                 NFSD_DEBUG(4,
 8080                                     "nfsrv_copymr: layout returned\n");
 8081                         } else {
 8082                                 lyp->lay_trycnt++;
 8083                                 ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR,
 8084                                     PVFS | PCATCH, "nfsmrl", hz);
 8085                                 NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n",
 8086                                     ret);
 8087                                 if (ret == EINTR || ret == ERESTART)
 8088                                         break;
 8089                                 if ((lyp->lay_flags & NFSLAY_RETURNED) == 0) {
 8090                                         /*
 8091                                          * Give up after 60sec and return
 8092                                          * ENXIO, failing the copymr.
 8093                                          * This layout will remain on the
 8094                                          * recalllist.  It can only be cleared
 8095                                          * by restarting the nfsd.
 8096                                          * This seems the safe way to handle
 8097                                          * it, since it cannot be safely copied
 8098                                          * with an outstanding RW layout.
 8099                                          */
 8100                                         if (lyp->lay_trycnt >= 60) {
 8101                                                 ret = ENXIO;
 8102                                                 break;
 8103                                         }
 8104                                         if (didprintf == 0) {
 8105                                                 printf("nfsrv_copymr: layout "
 8106                                                     "not returned\n");
 8107                                                 didprintf = 1;
 8108                                         }
 8109                                 }
 8110                         }
 8111                         goto tryagain2;
 8112                 }
 8113         }
 8114         NFSDRECALLUNLOCK();
 8115         /* We can now get rid of the layouts that have been returned. */
 8116         TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp)
 8117                 nfsrv_freelayout(&thl, lyp);
 8118 
 8119         /*
 8120          * Do the vn_start_write() calls here, before the MDS vnode is
 8121          * locked and the tvp is created (locked) in the NFS file system
 8122          * that dvp is in.
 8123          * For tvmp, this probably isn't necessary, since it will be an
 8124          * NFS mount and they are not suspendable at this time.
 8125          */
 8126         if (ret == 0)
 8127                 ret = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 8128         if (ret == 0) {
 8129                 tvmp = dvp->v_mount;
 8130                 ret = vn_start_write(NULL, &tvmp, V_WAIT | PCATCH);
 8131         }
 8132 
 8133         /*
 8134          * LK_EXCLUSIVE lock the MDS vnode, so that any
 8135          * proxied writes through the MDS will be blocked until we have
 8136          * completed the copy and update of the extended attributes.
 8137          * This will also ensure that any attributes and ACL will not be
 8138          * changed until the copy is complete.
 8139          */
 8140         NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 8141         if (ret == 0 && (vp->v_iflag & VI_DOOMED) != 0) {
 8142                 NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n");
 8143                 ret = ESTALE;
 8144         }
 8145 
 8146         /* Create the data file on the recovered DS. */
 8147         if (ret == 0)
 8148                 ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp);
 8149 
 8150         /* Copy the DS file, if created successfully. */
 8151         if (ret == 0) {
 8152                 /*
 8153                  * Get any NFSv4 ACL on the MDS file, so that it can be set
 8154                  * on the new DS file.
 8155                  */
 8156                 aclp = acl_alloc(M_WAITOK | M_ZERO);
 8157                 retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
 8158                 if (retacl != 0 && retacl != ENOATTR)
 8159                         NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl);
 8160                 dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK);
 8161                 /* Malloc a block of 0s used to check for holes. */
 8162                 if (nfsrv_zeropnfsdat == NULL)
 8163                         nfsrv_zeropnfsdat = malloc(PNFSDS_COPYSIZ, M_TEMP,
 8164                             M_WAITOK | M_ZERO);
 8165                 rdpos = wrpos = 0;
 8166                 ret = VOP_GETATTR(fvp, &va, cred);
 8167                 aresid = 0;
 8168                 while (ret == 0 && aresid == 0) {
 8169                         ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ,
 8170                             rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL,
 8171                             &aresid, p);
 8172                         xfer = PNFSDS_COPYSIZ - aresid;
 8173                         if (ret == 0 && xfer > 0) {
 8174                                 rdpos += xfer;
 8175                                 /*
 8176                                  * Skip the write for holes, except for the
 8177                                  * last block.
 8178                                  */
 8179                                 if (xfer < PNFSDS_COPYSIZ || rdpos ==
 8180                                     va.va_size || NFSBCMP(dat,
 8181                                     nfsrv_zeropnfsdat, PNFSDS_COPYSIZ) != 0)
 8182                                         ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer,
 8183                                             wrpos, UIO_SYSSPACE, IO_NODELOCKED,
 8184                                             cred, NULL, NULL, p);
 8185                                 if (ret == 0)
 8186                                         wrpos += xfer;
 8187                         }
 8188                 }
 8189 
 8190                 /* If there is an ACL and the copy succeeded, set the ACL. */
 8191                 if (ret == 0 && retacl == 0) {
 8192                         ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p);
 8193                         /*
 8194                          * Don't consider these as errors, since VOP_GETACL()
 8195                          * can return an ACL when they are not actually
 8196                          * supported.  For example, for UFS, VOP_GETACL()
 8197                          * will return a trivial ACL based on the uid/gid/mode
 8198                          * when there is no ACL on the file.
 8199                          * This case should be recognized as a trivial ACL
 8200                          * by UFS's VOP_SETACL() and succeed, but...
 8201                          */
 8202                         if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM)
 8203                                 ret = 0;
 8204                 }
 8205 
 8206                 if (ret == 0)
 8207                         ret = VOP_FSYNC(tvp, MNT_WAIT, p);
 8208 
 8209                 /* Set the DS data file's modify time that of the MDS file. */
 8210                 if (ret == 0)
 8211                         ret = VOP_GETATTR(vp, &va, cred);
 8212                 if (ret == 0) {
 8213                         mtime = va.va_mtime;
 8214                         VATTR_NULL(&va);
 8215                         va.va_mtime = mtime;
 8216                         ret = VOP_SETATTR(tvp, &va, cred);
 8217                 }
 8218 
 8219                 vput(tvp);
 8220                 acl_free(aclp);
 8221                 free(dat, M_TEMP);
 8222         }
 8223         if (tvmp != NULL)
 8224                 vn_finished_write(tvmp);
 8225 
 8226         /* Update the extended attributes for the newly created DS file. */
 8227         if (ret == 0)
 8228                 ret = vn_extattr_set(vp, IO_NODELOCKED,
 8229                     EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
 8230                     sizeof(*wpf) * mirrorcnt, (char *)wpf, p);
 8231         if (mp != NULL)
 8232                 vn_finished_write(mp);
 8233 
 8234         /* Get rid of the dontlist entry, so that Layouts can be issued. */
 8235         NFSDDONTLISTLOCK();
 8236         LIST_REMOVE(mrp, nfsmr_list);
 8237         NFSDDONTLISTUNLOCK();
 8238         free(mrp, M_NFSDSTATE);
 8239         return (ret);
 8240 }
 8241 
 8242 /*
 8243  * Create a data storage file on the recovered DS.
 8244  */
 8245 static int
 8246 nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
 8247     vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
 8248     vnode_t *tvpp)
 8249 {
 8250         struct vattr va, nva;
 8251         int error;
 8252 
 8253         /* Make data file name based on FH. */
 8254         error = VOP_GETATTR(vp, &va, cred);
 8255         if (error == 0) {
 8256                 /* Set the attributes for "vp" to Setattr the DS vp. */
 8257                 VATTR_NULL(&nva);
 8258                 nva.va_uid = va.va_uid;
 8259                 nva.va_gid = va.va_gid;
 8260                 nva.va_mode = va.va_mode;
 8261                 nva.va_size = 0;
 8262                 VATTR_NULL(&va);
 8263                 va.va_type = VREG;
 8264                 va.va_mode = nva.va_mode;
 8265                 NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf);
 8266                 error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL,
 8267                     pf->dsf_filename, cred, p, tvpp);
 8268         }
 8269         return (error);
 8270 }
 8271 
 8272 /*
 8273  * Look up the MDS file shared locked, and then get the extended attribute
 8274  * to find the extant DS file to be copied to the new mirror.
 8275  * If successful, *vpp is set to the MDS file's vp and *nvpp is
 8276  * set to a DS data file for the MDS file, both exclusively locked.
 8277  * The "buf" argument has the pnfsdsfile structure from the MDS file
 8278  * in it and buflen is set to its length.
 8279  */
 8280 int
 8281 nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf,
 8282     int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp,
 8283     struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp,
 8284     struct nfsdevice **fdsp)
 8285 {
 8286         struct nameidata nd;
 8287         struct vnode *vp, *curvp;
 8288         struct pnfsdsfile *pf;
 8289         struct nfsmount *nmp, *curnmp;
 8290         int dsdir, error, mirrorcnt, ippos;
 8291 
 8292         vp = NULL;
 8293         curvp = NULL;
 8294         curnmp = NULL;
 8295         *dsp = NULL;
 8296         *fdsp = NULL;
 8297         if (dspathp == NULL && curdspathp != NULL)
 8298                 return (EPERM);
 8299 
 8300         /*
 8301          * Look up the MDS file shared locked.  The lock will be upgraded
 8302          * to an exclusive lock after any rw layouts have been returned.
 8303          */
 8304         NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp);
 8305         NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 8306             mdspathp, p);
 8307         error = namei(&nd);
 8308         NFSD_DEBUG(4, "lookup=%d\n", error);
 8309         if (error != 0)
 8310                 return (error);
 8311         if (nd.ni_vp->v_type != VREG) {
 8312                 vput(nd.ni_vp);
 8313                 NFSD_DEBUG(4, "mdspath not reg\n");
 8314                 return (EISDIR);
 8315         }
 8316         vp = nd.ni_vp;
 8317 
 8318         if (curdspathp != NULL) {
 8319                 /*
 8320                  * Look up the current DS path and find the nfsdev structure for
 8321                  * it.
 8322                  */
 8323                 NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp);
 8324                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 8325                     UIO_SYSSPACE, curdspathp, p);
 8326                 error = namei(&nd);
 8327                 NFSD_DEBUG(4, "ds lookup=%d\n", error);
 8328                 if (error != 0) {
 8329                         vput(vp);
 8330                         return (error);
 8331                 }
 8332                 if (nd.ni_vp->v_type != VDIR) {
 8333                         vput(nd.ni_vp);
 8334                         vput(vp);
 8335                         NFSD_DEBUG(4, "curdspath not dir\n");
 8336                         return (ENOTDIR);
 8337                 }
 8338                 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 8339                         vput(nd.ni_vp);
 8340                         vput(vp);
 8341                         NFSD_DEBUG(4, "curdspath not an NFS mount\n");
 8342                         return (ENXIO);
 8343                 }
 8344                 curnmp = VFSTONFS(nd.ni_vp->v_mount);
 8345         
 8346                 /* Search the nfsdev list for a match. */
 8347                 NFSDDSLOCK();
 8348                 *fdsp = nfsv4_findmirror(curnmp);
 8349                 NFSDDSUNLOCK();
 8350                 if (*fdsp == NULL)
 8351                         curnmp = NULL;
 8352                 if (curnmp == NULL) {
 8353                         vput(nd.ni_vp);
 8354                         vput(vp);
 8355                         NFSD_DEBUG(4, "mdscopymr: no current ds\n");
 8356                         return (ENXIO);
 8357                 }
 8358                 curvp = nd.ni_vp;
 8359         }
 8360 
 8361         if (dspathp != NULL) {
 8362                 /* Look up the nfsdev path and find the nfsdev structure. */
 8363                 NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp);
 8364                 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 8365                     UIO_SYSSPACE, dspathp, p);
 8366                 error = namei(&nd);
 8367                 NFSD_DEBUG(4, "ds lookup=%d\n", error);
 8368                 if (error != 0) {
 8369                         vput(vp);
 8370                         if (curvp != NULL)
 8371                                 vput(curvp);
 8372                         return (error);
 8373                 }
 8374                 if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) {
 8375                         vput(nd.ni_vp);
 8376                         vput(vp);
 8377                         if (curvp != NULL)
 8378                                 vput(curvp);
 8379                         NFSD_DEBUG(4, "dspath not dir\n");
 8380                         if (nd.ni_vp == curvp)
 8381                                 return (EPERM);
 8382                         return (ENOTDIR);
 8383                 }
 8384                 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
 8385                         vput(nd.ni_vp);
 8386                         vput(vp);
 8387                         if (curvp != NULL)
 8388                                 vput(curvp);
 8389                         NFSD_DEBUG(4, "dspath not an NFS mount\n");
 8390                         return (ENXIO);
 8391                 }
 8392                 nmp = VFSTONFS(nd.ni_vp->v_mount);
 8393         
 8394                 /*
 8395                  * Search the nfsdevice list for a match.  If curnmp == NULL,
 8396                  * this is a recovery and there must be a mirror.
 8397                  */
 8398                 NFSDDSLOCK();
 8399                 if (curnmp == NULL)
 8400                         *dsp = nfsrv_findmirroredds(nmp);
 8401                 else
 8402                         *dsp = nfsv4_findmirror(nmp);
 8403                 NFSDDSUNLOCK();
 8404                 if (*dsp == NULL) {
 8405                         vput(nd.ni_vp);
 8406                         vput(vp);
 8407                         if (curvp != NULL)
 8408                                 vput(curvp);
 8409                         NFSD_DEBUG(4, "mdscopymr: no ds\n");
 8410                         return (ENXIO);
 8411                 }
 8412         } else {
 8413                 nd.ni_vp = NULL;
 8414                 nmp = NULL;
 8415         }
 8416 
 8417         /*
 8418          * Get a vp for an available DS data file using the extended
 8419          * attribute on the MDS file.
 8420          * If there is a valid entry for the new DS in the extended attribute
 8421          * on the MDS file (as checked via the nmp argument),
 8422          * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur.
 8423          */
 8424         error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, &mirrorcnt, p,
 8425             NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir);
 8426         if (curvp != NULL)
 8427                 vput(curvp);
 8428         if (nd.ni_vp == NULL) {
 8429                 if (error == 0 && nmp != NULL) {
 8430                         /* Search the nfsdev list for a match. */
 8431                         NFSDDSLOCK();
 8432                         *dsp = nfsrv_findmirroredds(nmp);
 8433                         NFSDDSUNLOCK();
 8434                 }
 8435                 if (error == 0 && (nmp == NULL || *dsp == NULL)) {
 8436                         if (nvpp != NULL && *nvpp != NULL) {
 8437                                 vput(*nvpp);
 8438                                 *nvpp = NULL;
 8439                         }
 8440                         error = ENXIO;
 8441                 }
 8442         } else
 8443                 vput(nd.ni_vp);
 8444 
 8445         /*
 8446          * When dspathp != NULL and curdspathp == NULL, this is a recovery
 8447          * and is only allowed if there is a 0.0.0.0 IP address entry.
 8448          * When curdspathp != NULL, the ippos will be set to that entry.
 8449          */
 8450         if (error == 0 && dspathp != NULL && ippos == -1) {
 8451                 if (nvpp != NULL && *nvpp != NULL) {
 8452                         vput(*nvpp);
 8453                         *nvpp = NULL;
 8454                 }
 8455                 error = ENXIO;
 8456         }
 8457         if (error == 0) {
 8458                 *vpp = vp;
 8459 
 8460                 pf = (struct pnfsdsfile *)buf;
 8461                 if (ippos == -1) {
 8462                         /* If no zeroip pnfsdsfile, add one. */
 8463                         ippos = *buflenp / sizeof(*pf);
 8464                         *buflenp += sizeof(*pf);
 8465                         pf += ippos;
 8466                         pf->dsf_dir = dsdir;
 8467                         strlcpy(pf->dsf_filename, fname,
 8468                             sizeof(pf->dsf_filename));
 8469                 } else
 8470                         pf += ippos;
 8471                 *pfp = pf;
 8472         } else
 8473                 vput(vp);
 8474         return (error);
 8475 }
 8476 
 8477 /*
 8478  * Search for a matching pnfsd mirror device structure, base on the nmp arg.
 8479  * Return one if found, NULL otherwise.
 8480  */
 8481 static struct nfsdevice *
 8482 nfsrv_findmirroredds(struct nfsmount *nmp)
 8483 {
 8484         struct nfsdevice *ds, *fndds;
 8485         int fndmirror;
 8486 
 8487         mtx_assert(NFSDDSMUTEXPTR, MA_OWNED);
 8488         /*
 8489          * Search the DS server list for a match with nmp.
 8490          * Remove the DS entry if found and there is a mirror.
 8491          */
 8492         fndds = NULL;
 8493         fndmirror = 0;
 8494         if (nfsrv_devidcnt == 0)
 8495                 return (fndds);
 8496         TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 8497                 if (ds->nfsdev_nmp == nmp) {
 8498                         NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n");
 8499                         fndds = ds;
 8500                         break;
 8501                 }
 8502         }
 8503         if (fndds == NULL)
 8504                 return (fndds);
 8505         if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
 8506                 fndmirror = 1;
 8507         else if (fndds->nfsdev_mdsisset != 0) {
 8508                 /* For the fsid is set case, search for a mirror. */
 8509                 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 8510                         if (ds != fndds && ds->nfsdev_nmp != NULL &&
 8511                             ds->nfsdev_mdsisset != 0 &&
 8512                             ds->nfsdev_mdsfsid.val[0] ==
 8513                             fndds->nfsdev_mdsfsid.val[0] &&
 8514                             ds->nfsdev_mdsfsid.val[1] ==
 8515                             fndds->nfsdev_mdsfsid.val[1]) {
 8516                                 fndmirror = 1;
 8517                                 break;
 8518                         }
 8519                 }
 8520         }
 8521         if (fndmirror == 0) {
 8522                 NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n");
 8523                 return (NULL);
 8524         }
 8525         return (fndds);
 8526 }
 8527 

Cache object: 7e17d8014bff4d30acf3c117a8c746dc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.