The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/nfsserver/nfs_srvsock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1989, 1991, 1993, 1995
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)nfs_socket.c        8.5 (Berkeley) 3/30/95
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD: releng/6.1/sys/nfsserver/nfs_srvsock.c 158179 2006-04-30 16:44:43Z cvs2svn $");
   37 
   38 /*
   39  * Socket operations for use by nfs
   40  */
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/kernel.h>
   45 #include <sys/lock.h>
   46 #include <sys/malloc.h>
   47 #include <sys/mbuf.h>
   48 #include <sys/mount.h>
   49 #include <sys/mutex.h>
   50 #include <sys/proc.h>
   51 #include <sys/protosw.h>
   52 #include <sys/signalvar.h>
   53 #include <sys/socket.h>
   54 #include <sys/socketvar.h>
   55 #include <sys/sysctl.h>
   56 #include <sys/syslog.h>
   57 #include <sys/vnode.h>
   58 
   59 #include <netinet/in.h>
   60 #include <netinet/tcp.h>
   61 
   62 #include <nfs/rpcv2.h>
   63 #include <nfs/nfsproto.h>
   64 #include <nfsserver/nfs.h>
   65 #include <nfs/xdr_subs.h>
   66 #include <nfsserver/nfsm_subs.h>
   67 
   68 #define TRUE    1
   69 #define FALSE   0
   70 
   71 static int nfs_realign_test;
   72 static int nfs_realign_count;
   73 
   74 SYSCTL_DECL(_vfs_nfsrv);
   75 
   76 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, "");
   77 SYSCTL_INT(_vfs_nfsrv, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, "");
   78 
   79 
   80 /*
   81  * There is a congestion window for outstanding rpcs maintained per mount
   82  * point. The cwnd size is adjusted in roughly the way that:
   83  * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
   84  * SIGCOMM '88". ACM, August 1988.
   85  * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
   86  * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
   87  * of rpcs is in progress.
   88  * (The sent count and cwnd are scaled for integer arith.)
   89  * Variants of "slow start" were tried and were found to be too much of a
   90  * performance hit (ave. rtt 3 times larger),
   91  * I suspect due to the large rtt that nfs rpcs have.
   92  */
   93 #define NFS_CWNDSCALE   256
   94 #define NFS_MAXCWND     (NFS_CWNDSCALE * 32)
   95 struct callout  nfsrv_callout;
   96 
   97 static void     nfs_realign(struct mbuf **pm, int hsiz);        /* XXX SHARED */
   98 static int      nfsrv_getstream(struct nfssvc_sock *, int);
   99 
  100 int32_t (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
  101                                 struct nfssvc_sock *slp,
  102                                 struct thread *td,
  103                                 struct mbuf **mreqp) = {
  104         nfsrv_null,
  105         nfsrv_getattr,
  106         nfsrv_setattr,
  107         nfsrv_lookup,
  108         nfsrv3_access,
  109         nfsrv_readlink,
  110         nfsrv_read,
  111         nfsrv_write,
  112         nfsrv_create,
  113         nfsrv_mkdir,
  114         nfsrv_symlink,
  115         nfsrv_mknod,
  116         nfsrv_remove,
  117         nfsrv_rmdir,
  118         nfsrv_rename,
  119         nfsrv_link,
  120         nfsrv_readdir,
  121         nfsrv_readdirplus,
  122         nfsrv_statfs,
  123         nfsrv_fsinfo,
  124         nfsrv_pathconf,
  125         nfsrv_commit,
  126         nfsrv_noop
  127 };
  128 
  129 
  130 /*
  131  * Generate the rpc reply header
  132  * siz arg. is used to decide if adding a cluster is worthwhile
  133  */
  134 struct mbuf *
  135 nfs_rephead(int siz, struct nfsrv_descript *nd, int err,
  136     struct mbuf **mbp, caddr_t *bposp)
  137 {
  138         u_int32_t *tl;
  139         struct mbuf *mreq;
  140         caddr_t bpos;
  141         struct mbuf *mb;
  142 
  143         /* XXXRW: not 100% clear the lock is needed here. */
  144         NFSD_LOCK_ASSERT();
  145 
  146         nd->nd_repstat = err;
  147         if (err && (nd->nd_flag & ND_NFSV3) == 0)       /* XXX recheck */
  148                 siz = 0;
  149         NFSD_UNLOCK();
  150         MGETHDR(mreq, M_TRYWAIT, MT_DATA);
  151         mb = mreq;
  152         /*
  153          * If this is a big reply, use a cluster else
  154          * try and leave leading space for the lower level headers.
  155          */
  156         mreq->m_len = 6 * NFSX_UNSIGNED;
  157         siz += RPC_REPLYSIZ;
  158         if ((max_hdr + siz) >= MINCLSIZE) {
  159                 MCLGET(mreq, M_TRYWAIT);
  160         } else
  161                 mreq->m_data += min(max_hdr, M_TRAILINGSPACE(mreq));
  162         NFSD_LOCK();
  163         tl = mtod(mreq, u_int32_t *);
  164         bpos = ((caddr_t)tl) + mreq->m_len;
  165         *tl++ = txdr_unsigned(nd->nd_retxid);
  166         *tl++ = nfsrv_rpc_reply;
  167         if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
  168                 *tl++ = nfsrv_rpc_msgdenied;
  169                 if (err & NFSERR_AUTHERR) {
  170                         *tl++ = nfsrv_rpc_autherr;
  171                         *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
  172                         mreq->m_len -= NFSX_UNSIGNED;
  173                         bpos -= NFSX_UNSIGNED;
  174                 } else {
  175                         *tl++ = nfsrv_rpc_mismatch;
  176                         *tl++ = txdr_unsigned(RPC_VER2);
  177                         *tl = txdr_unsigned(RPC_VER2);
  178                 }
  179         } else {
  180                 *tl++ = nfsrv_rpc_msgaccepted;
  181                 /*
  182                  * Send a RPCAUTH_NULL verifier - no Kerberos.
  183                  */
  184                 *tl++ = 0;
  185                 *tl++ = 0;
  186                 switch (err) {
  187                 case EPROGUNAVAIL:
  188                         *tl = txdr_unsigned(RPC_PROGUNAVAIL);
  189                         break;
  190                 case EPROGMISMATCH:
  191                         *tl = txdr_unsigned(RPC_PROGMISMATCH);
  192                         tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED);
  193                         *tl++ = txdr_unsigned(2);
  194                         *tl = txdr_unsigned(3);
  195                         break;
  196                 case EPROCUNAVAIL:
  197                         *tl = txdr_unsigned(RPC_PROCUNAVAIL);
  198                         break;
  199                 case EBADRPC:
  200                         *tl = txdr_unsigned(RPC_GARBAGE);
  201                         break;
  202                 default:
  203                         *tl = 0;
  204                         if (err != NFSERR_RETVOID) {
  205                                 tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED);
  206                                 if (err)
  207                                     *tl = txdr_unsigned(nfsrv_errmap(nd, err));
  208                                 else
  209                                     *tl = 0;
  210                         }
  211                         break;
  212                 }
  213         }
  214         *mbp = mb;
  215         *bposp = bpos;
  216         if (err != 0 && err != NFSERR_RETVOID)
  217                 nfsrvstats.srvrpc_errs++;
  218         return mreq;
  219 }
  220 
  221 
  222 /*
  223  *      nfs_realign:
  224  *
  225  *      Check for badly aligned mbuf data and realign by copying the unaligned
  226  *      portion of the data into a new mbuf chain and freeing the portions
  227  *      of the old chain that were replaced.
  228  *
  229  *      We cannot simply realign the data within the existing mbuf chain
  230  *      because the underlying buffers may contain other rpc commands and
  231  *      we cannot afford to overwrite them.
  232  *
  233  *      We would prefer to avoid this situation entirely.  The situation does
  234  *      not occur with NFS/UDP and is supposed to only occassionally occur
  235  *      with TCP.  Use vfs.nfs.realign_count and realign_test to check this.
  236  */
  237 static void
  238 nfs_realign(struct mbuf **pm, int hsiz) /* XXX COMMON */
  239 {
  240         struct mbuf *m;
  241         struct mbuf *n = NULL;
  242         int off = 0;
  243 
  244         /* XXXRW: may not need lock? */
  245         NFSD_LOCK_ASSERT();
  246 
  247         ++nfs_realign_test;
  248         while ((m = *pm) != NULL) {
  249                 if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
  250                         NFSD_UNLOCK();
  251                         MGET(n, M_TRYWAIT, MT_DATA);
  252                         if (m->m_len >= MINCLSIZE) {
  253                                 MCLGET(n, M_TRYWAIT);
  254                         }
  255                         NFSD_LOCK();
  256                         n->m_len = 0;
  257                         break;
  258                 }
  259                 pm = &m->m_next;
  260         }
  261 
  262         /*
  263          * If n is non-NULL, loop on m copying data, then replace the
  264          * portion of the chain that had to be realigned.
  265          */
  266         if (n != NULL) {
  267                 ++nfs_realign_count;
  268                 while (m) {
  269                         m_copyback(n, off, m->m_len, mtod(m, caddr_t));
  270                         off += m->m_len;
  271                         m = m->m_next;
  272                 }
  273                 m_freem(*pm);
  274                 *pm = n;
  275         }
  276 }
  277 
  278 
  279 /*
  280  * Parse an RPC request
  281  * - verify it
  282  * - fill in the cred struct.
  283  */
  284 int
  285 nfs_getreq(struct nfsrv_descript *nd, struct nfsd *nfsd, int has_header)
  286 {
  287         int len, i;
  288         u_int32_t *tl;
  289         caddr_t dpos;
  290         u_int32_t nfsvers, auth_type;
  291         int error = 0;
  292         struct mbuf *mrep, *md;
  293 
  294         NFSD_LOCK_ASSERT();
  295 
  296         mrep = nd->nd_mrep;
  297         md = nd->nd_md;
  298         dpos = nd->nd_dpos;
  299         if (has_header) {
  300                 tl = nfsm_dissect_nonblock(u_int32_t *, 10 * NFSX_UNSIGNED);
  301                 nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
  302                 if (*tl++ != nfsrv_rpc_call) {
  303                         m_freem(mrep);
  304                         return (EBADRPC);
  305                 }
  306         } else
  307                 tl = nfsm_dissect_nonblock(u_int32_t *, 8 * NFSX_UNSIGNED);
  308         nd->nd_repstat = 0;
  309         nd->nd_flag = 0;
  310         if (*tl++ != nfsrv_rpc_vers) {
  311                 nd->nd_repstat = ERPCMISMATCH;
  312                 nd->nd_procnum = NFSPROC_NOOP;
  313                 return (0);
  314         }
  315         if (*tl != nfsrv_nfs_prog) {
  316                 nd->nd_repstat = EPROGUNAVAIL;
  317                 nd->nd_procnum = NFSPROC_NOOP;
  318                 return (0);
  319         }
  320         tl++;
  321         nfsvers = fxdr_unsigned(u_int32_t, *tl++);
  322         if (nfsvers < NFS_VER2 || nfsvers > NFS_VER3) {
  323                 nd->nd_repstat = EPROGMISMATCH;
  324                 nd->nd_procnum = NFSPROC_NOOP;
  325                 return (0);
  326         }
  327         nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
  328         if (nd->nd_procnum == NFSPROC_NULL)
  329                 return (0);
  330         if (nfsvers == NFS_VER3) {
  331                 nd->nd_flag = ND_NFSV3;
  332                 if (nd->nd_procnum >= NFS_NPROCS) {
  333                         nd->nd_repstat = EPROCUNAVAIL;
  334                         nd->nd_procnum = NFSPROC_NOOP;
  335                         return (0);
  336                 }
  337         } else {
  338                 if (nd->nd_procnum > NFSV2PROC_STATFS) {
  339                         nd->nd_repstat = EPROCUNAVAIL;
  340                         nd->nd_procnum = NFSPROC_NOOP;
  341                         return (0);
  342                 }
  343                 /* Map the v2 procedure numbers into v3 ones */
  344                 nd->nd_procnum = nfsrv_nfsv3_procid[nd->nd_procnum];
  345         }
  346         auth_type = *tl++;
  347         len = fxdr_unsigned(int, *tl++);
  348         if (len < 0 || len > RPCAUTH_MAXSIZ) {
  349                 m_freem(mrep);
  350                 return (EBADRPC);
  351         }
  352 
  353         /*
  354          * Handle auth_unix;
  355          */
  356         if (auth_type == nfsrv_rpc_auth_unix) {
  357                 len = fxdr_unsigned(int, *++tl);
  358                 if (len < 0 || len > NFS_MAXNAMLEN) {
  359                         m_freem(mrep);
  360                         return (EBADRPC);
  361                 }
  362                 nfsm_adv(nfsm_rndup(len));
  363                 tl = nfsm_dissect_nonblock(u_int32_t *, 3 * NFSX_UNSIGNED);
  364                 /*
  365                  * XXX: This credential should be managed using crget(9)
  366                  * and related calls.  Right now, this tramples on any
  367                  * extensible data in the ucred, fails to initialize the
  368                  * mutex, and worse.  This must be fixed before FreeBSD
  369                  * 5.3-RELEASE.
  370                  */
  371                 bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
  372                 nd->nd_cr.cr_ref = 1;
  373                 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
  374                 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
  375                 len = fxdr_unsigned(int, *tl);
  376                 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
  377                         m_freem(mrep);
  378                         return (EBADRPC);
  379                 }
  380                 tl = nfsm_dissect_nonblock(u_int32_t *, (len + 2) * NFSX_UNSIGNED);
  381                 for (i = 1; i <= len; i++)
  382                     if (i < NGROUPS)
  383                         nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
  384                     else
  385                         tl++;
  386                 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
  387                 if (nd->nd_cr.cr_ngroups > 1)
  388                     nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
  389                 len = fxdr_unsigned(int, *++tl);
  390                 if (len < 0 || len > RPCAUTH_MAXSIZ) {
  391                         m_freem(mrep);
  392                         return (EBADRPC);
  393                 }
  394                 if (len > 0)
  395                         nfsm_adv(nfsm_rndup(len));
  396         } else {
  397                 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
  398                 nd->nd_procnum = NFSPROC_NOOP;
  399                 return (0);
  400         }
  401 
  402         nd->nd_md = md;
  403         nd->nd_dpos = dpos;
  404         return (0);
  405 nfsmout:
  406         return (error);
  407 }
  408 
  409 /*
  410  * Socket upcall routine for the nfsd sockets.
  411  * The caddr_t arg is a pointer to the "struct nfssvc_sock".
  412  * Essentially do as much as possible non-blocking, else punt and it will
  413  * be called with M_TRYWAIT from an nfsd.
  414  */
  415 void
  416 nfsrv_rcv(struct socket *so, void *arg, int waitflag)
  417 {
  418         struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
  419         struct mbuf *m;
  420         struct mbuf *mp;
  421         struct sockaddr *nam;
  422         struct uio auio;
  423         int flags, error;
  424 
  425         /*
  426          * XXXRW: For now, assert Giant here since the NFS server upcall
  427          * will perform socket operations requiring Giant in a non-mpsafe
  428          * kernel.
  429          */
  430         NET_ASSERT_GIANT();
  431         NFSD_UNLOCK_ASSERT();
  432 
  433         /* XXXRW: Unlocked read. */
  434         if ((slp->ns_flag & SLP_VALID) == 0)
  435                 return;
  436 
  437         /*
  438          * We can't do this in the context of a socket callback
  439          * because we're called with locks held.
  440          * XXX: SMP
  441          */
  442         if (waitflag == M_DONTWAIT) {
  443                 NFSD_LOCK();
  444                 slp->ns_flag |= SLP_NEEDQ;
  445                 goto dorecs;
  446         }
  447 
  448 
  449         NFSD_LOCK();
  450         auio.uio_td = NULL;
  451         if (so->so_type == SOCK_STREAM) {
  452                 /*
  453                  * If there are already records on the queue, defer soreceive()
  454                  * to an nfsd so that there is feedback to the TCP layer that
  455                  * the nfs servers are heavily loaded.
  456                  */
  457                 if (STAILQ_FIRST(&slp->ns_rec) != NULL &&
  458                     waitflag == M_DONTWAIT) {
  459                         slp->ns_flag |= SLP_NEEDQ;
  460                         goto dorecs;
  461                 }
  462 
  463                 /*
  464                  * Do soreceive().
  465                  */
  466                 auio.uio_resid = 1000000000;
  467                 flags = MSG_DONTWAIT;
  468                 NFSD_UNLOCK();
  469                 error = so->so_proto->pr_usrreqs->pru_soreceive
  470                         (so, &nam, &auio, &mp, NULL, &flags);
  471                 NFSD_LOCK();
  472                 if (error || mp == NULL) {
  473                         if (error == EWOULDBLOCK)
  474                                 slp->ns_flag |= SLP_NEEDQ;
  475                         else
  476                                 slp->ns_flag |= SLP_DISCONN;
  477                         goto dorecs;
  478                 }
  479                 m = mp;
  480                 if (slp->ns_rawend) {
  481                         slp->ns_rawend->m_next = m;
  482                         slp->ns_cc += 1000000000 - auio.uio_resid;
  483                 } else {
  484                         slp->ns_raw = m;
  485                         slp->ns_cc = 1000000000 - auio.uio_resid;
  486                 }
  487                 while (m->m_next)
  488                         m = m->m_next;
  489                 slp->ns_rawend = m;
  490 
  491                 /*
  492                  * Now try and parse record(s) out of the raw stream data.
  493                  */
  494                 error = nfsrv_getstream(slp, waitflag);
  495                 if (error) {
  496                         if (error == EPERM)
  497                                 slp->ns_flag |= SLP_DISCONN;
  498                         else
  499                                 slp->ns_flag |= SLP_NEEDQ;
  500                 }
  501         } else {
  502                 do {
  503                         auio.uio_resid = 1000000000;
  504                         flags = MSG_DONTWAIT;
  505                         NFSD_UNLOCK();
  506                         error = so->so_proto->pr_usrreqs->pru_soreceive
  507                                 (so, &nam, &auio, &mp, NULL, &flags);
  508                         if (mp) {
  509                                 struct nfsrv_rec *rec;
  510                                 rec = malloc(sizeof(struct nfsrv_rec),
  511                                     M_NFSRVDESC, 
  512                                     waitflag == M_DONTWAIT ? M_NOWAIT : M_WAITOK);
  513                                 if (!rec) {
  514                                         if (nam)
  515                                                 FREE(nam, M_SONAME);
  516                                         m_freem(mp);
  517                                         NFSD_LOCK();
  518                                         continue;
  519                                 }
  520                                 NFSD_LOCK();
  521                                 nfs_realign(&mp, 10 * NFSX_UNSIGNED);
  522                                 rec->nr_address = nam;
  523                                 rec->nr_packet = mp;
  524                                 STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
  525                         } else
  526                                 NFSD_LOCK();
  527                         if (error) {
  528                                 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
  529                                         && error != EWOULDBLOCK) {
  530                                         slp->ns_flag |= SLP_DISCONN;
  531                                         goto dorecs;
  532                                 }
  533                         }
  534                 } while (mp);
  535         }
  536 
  537         /*
  538          * Now try and process the request records, non-blocking.
  539          */
  540 dorecs:
  541         if (waitflag == M_DONTWAIT &&
  542                 (STAILQ_FIRST(&slp->ns_rec) != NULL ||
  543                  (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
  544                 nfsrv_wakenfsd(slp);
  545         NFSD_UNLOCK();
  546 }
  547 
  548 /*
  549  * Try and extract an RPC request from the mbuf data list received on a
  550  * stream socket. The "waitflag" argument indicates whether or not it
  551  * can sleep.
  552  */
  553 static int
  554 nfsrv_getstream(struct nfssvc_sock *slp, int waitflag)
  555 {
  556         struct mbuf *m, **mpp;
  557         char *cp1, *cp2;
  558         int len;
  559         struct mbuf *om, *m2, *recm;
  560         u_int32_t recmark;
  561 
  562         NFSD_LOCK_ASSERT();
  563 
  564         if (slp->ns_flag & SLP_GETSTREAM)
  565                 panic("nfs getstream");
  566         slp->ns_flag |= SLP_GETSTREAM;
  567         for (;;) {
  568             if (slp->ns_reclen == 0) {
  569                 if (slp->ns_cc < NFSX_UNSIGNED) {
  570                         slp->ns_flag &= ~SLP_GETSTREAM;
  571                         return (0);
  572                 }
  573                 m = slp->ns_raw;
  574                 if (m->m_len >= NFSX_UNSIGNED) {
  575                         bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
  576                         m->m_data += NFSX_UNSIGNED;
  577                         m->m_len -= NFSX_UNSIGNED;
  578                 } else {
  579                         cp1 = (caddr_t)&recmark;
  580                         cp2 = mtod(m, caddr_t);
  581                         while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
  582                                 while (m->m_len == 0) {
  583                                         m = m->m_next;
  584                                         cp2 = mtod(m, caddr_t);
  585                                 }
  586                                 *cp1++ = *cp2++;
  587                                 m->m_data++;
  588                                 m->m_len--;
  589                         }
  590                 }
  591                 slp->ns_cc -= NFSX_UNSIGNED;
  592                 recmark = ntohl(recmark);
  593                 slp->ns_reclen = recmark & ~0x80000000;
  594                 if (recmark & 0x80000000)
  595                         slp->ns_flag |= SLP_LASTFRAG;
  596                 else
  597                         slp->ns_flag &= ~SLP_LASTFRAG;
  598                 if (slp->ns_reclen > NFS_MAXPACKET || slp->ns_reclen <= 0) {
  599                         slp->ns_flag &= ~SLP_GETSTREAM;
  600                         return (EPERM);
  601                 }
  602             }
  603 
  604             /*
  605              * Now get the record part.
  606              *
  607              * Note that slp->ns_reclen may be 0.  Linux sometimes
  608              * generates 0-length RPCs.
  609              */
  610             recm = NULL;
  611             if (slp->ns_cc == slp->ns_reclen) {
  612                 recm = slp->ns_raw;
  613                 slp->ns_raw = slp->ns_rawend = NULL;
  614                 slp->ns_cc = slp->ns_reclen = 0;
  615             } else if (slp->ns_cc > slp->ns_reclen) {
  616                 len = 0;
  617                 m = slp->ns_raw;
  618                 om = NULL;
  619 
  620                 while (len < slp->ns_reclen) {
  621                         if ((len + m->m_len) > slp->ns_reclen) {
  622                                 NFSD_UNLOCK();
  623                                 m2 = m_copym(m, 0, slp->ns_reclen - len,
  624                                         waitflag);
  625                                 NFSD_LOCK();
  626                                 if (m2) {
  627                                         if (om) {
  628                                                 om->m_next = m2;
  629                                                 recm = slp->ns_raw;
  630                                         } else
  631                                                 recm = m2;
  632                                         m->m_data += slp->ns_reclen - len;
  633                                         m->m_len -= slp->ns_reclen - len;
  634                                         len = slp->ns_reclen;
  635                                 } else {
  636                                         slp->ns_flag &= ~SLP_GETSTREAM;
  637                                         return (EWOULDBLOCK);
  638                                 }
  639                         } else if ((len + m->m_len) == slp->ns_reclen) {
  640                                 om = m;
  641                                 len += m->m_len;
  642                                 m = m->m_next;
  643                                 recm = slp->ns_raw;
  644                                 om->m_next = NULL;
  645                         } else {
  646                                 om = m;
  647                                 len += m->m_len;
  648                                 m = m->m_next;
  649                         }
  650                 }
  651                 slp->ns_raw = m;
  652                 slp->ns_cc -= len;
  653                 slp->ns_reclen = 0;
  654             } else {
  655                 slp->ns_flag &= ~SLP_GETSTREAM;
  656                 return (0);
  657             }
  658 
  659             /*
  660              * Accumulate the fragments into a record.
  661              */
  662             mpp = &slp->ns_frag;
  663             while (*mpp)
  664                 mpp = &((*mpp)->m_next);
  665             *mpp = recm;
  666             if (slp->ns_flag & SLP_LASTFRAG) {
  667                 struct nfsrv_rec *rec;
  668                 NFSD_UNLOCK();
  669                 rec = malloc(sizeof(struct nfsrv_rec), M_NFSRVDESC,
  670                     waitflag == M_DONTWAIT ? M_NOWAIT : M_WAITOK);
  671                 NFSD_LOCK();
  672                 if (!rec) {
  673                     m_freem(slp->ns_frag);
  674                 } else {
  675                     nfs_realign(&slp->ns_frag, 10 * NFSX_UNSIGNED);
  676                     rec->nr_address = NULL;
  677                     rec->nr_packet = slp->ns_frag;
  678                     STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
  679                 }
  680                 slp->ns_frag = NULL;
  681             }
  682         }
  683 }
  684 
  685 /*
  686  * Parse an RPC header.
  687  */
  688 int
  689 nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd,
  690     struct nfsrv_descript **ndp)
  691 {
  692         struct nfsrv_rec *rec;
  693         struct mbuf *m;
  694         struct sockaddr *nam;
  695         struct nfsrv_descript *nd;
  696         int error;
  697 
  698         NFSD_LOCK_ASSERT();
  699 
  700         *ndp = NULL;
  701         if ((slp->ns_flag & SLP_VALID) == 0 ||
  702             STAILQ_FIRST(&slp->ns_rec) == NULL)
  703                 return (ENOBUFS);
  704         rec = STAILQ_FIRST(&slp->ns_rec);
  705         STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link);
  706         nam = rec->nr_address;
  707         m = rec->nr_packet;
  708         free(rec, M_NFSRVDESC);
  709         NFSD_UNLOCK();
  710         MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript),
  711                 M_NFSRVDESC, M_WAITOK);
  712         NFSD_LOCK();
  713         nd->nd_md = nd->nd_mrep = m;
  714         nd->nd_nam2 = nam;
  715         nd->nd_dpos = mtod(m, caddr_t);
  716         error = nfs_getreq(nd, nfsd, TRUE);
  717         if (error) {
  718                 if (nam) {
  719                         FREE(nam, M_SONAME);
  720                 }
  721                 free((caddr_t)nd, M_NFSRVDESC);
  722                 return (error);
  723         }
  724         *ndp = nd;
  725         nfsd->nfsd_nd = nd;
  726         return (0);
  727 }
  728 
  729 /*
  730  * Search for a sleeping nfsd and wake it up.
  731  * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
  732  * running nfsds will go look for the work in the nfssvc_sock list.
  733  */
  734 void
  735 nfsrv_wakenfsd(struct nfssvc_sock *slp)
  736 {
  737         struct nfsd *nd;
  738 
  739         NFSD_LOCK_ASSERT();
  740 
  741         if ((slp->ns_flag & SLP_VALID) == 0)
  742                 return;
  743         TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
  744                 if (nd->nfsd_flag & NFSD_WAITING) {
  745                         nd->nfsd_flag &= ~NFSD_WAITING;
  746                         if (nd->nfsd_slp)
  747                                 panic("nfsd wakeup");
  748                         slp->ns_sref++;
  749                         nd->nfsd_slp = slp;
  750                         wakeup(nd);
  751                         return;
  752                 }
  753         }
  754         slp->ns_flag |= SLP_DOREC;
  755         nfsd_head_flag |= NFSD_CHECKSLP;
  756 }
  757 
  758 /*
  759  * This is the nfs send routine.
  760  * For the server side:
  761  * - return EINTR or ERESTART if interrupted by a signal
  762  * - return EPIPE if a connection is lost for connection based sockets (TCP...)
  763  * - do any cleanup required by recoverable socket errors (?)
  764  */
  765 int
  766 nfsrv_send(struct socket *so, struct sockaddr *nam, struct mbuf *top)
  767 {
  768         struct sockaddr *sendnam;
  769         int error, soflags, flags;
  770 
  771         NET_ASSERT_GIANT();
  772         NFSD_UNLOCK_ASSERT();
  773 
  774         soflags = so->so_proto->pr_flags;
  775         if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
  776                 sendnam = NULL;
  777         else
  778                 sendnam = nam;
  779         if (so->so_type == SOCK_SEQPACKET)
  780                 flags = MSG_EOR;
  781         else
  782                 flags = 0;
  783 
  784         error = so->so_proto->pr_usrreqs->pru_sosend(so, sendnam, 0, top, 0,
  785                                                      flags, curthread/*XXX*/);
  786         if (error == ENOBUFS && so->so_type == SOCK_DGRAM)
  787                 error = 0;
  788 
  789         if (error) {
  790                 log(LOG_INFO, "nfsd send error %d\n", error);
  791 
  792                 /*
  793                  * Handle any recoverable (soft) socket errors here. (?)
  794                  */
  795                 if (error != EINTR && error != ERESTART &&
  796                     error != EWOULDBLOCK && error != EPIPE)
  797                         error = 0;
  798         }
  799         return (error);
  800 }
  801 
  802 /*
  803  * NFS server timer routine.
  804  */
  805 void
  806 nfsrv_timer(void *arg)
  807 {
  808         struct nfssvc_sock *slp;
  809         u_quad_t cur_usec;
  810 
  811         NFSD_LOCK();
  812         /*
  813          * Scan the write gathering queues for writes that need to be
  814          * completed now.
  815          */
  816         cur_usec = nfs_curusec();
  817         TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
  818                 if (LIST_FIRST(&slp->ns_tq) &&
  819                     LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec)
  820                         nfsrv_wakenfsd(slp);
  821         }
  822         NFSD_UNLOCK();
  823         callout_reset(&nfsrv_callout, nfsrv_ticks, nfsrv_timer, NULL);
  824 }

Cache object: 3f4839390aee3db499006cc8c9894c23


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.