The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/nfs/nfs_socket.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: nfs_socket.c,v 1.143 2022/08/13 21:01:46 mvs Exp $    */
    2 /*      $NetBSD: nfs_socket.c,v 1.27 1996/04/15 20:20:00 thorpej Exp $  */
    3 
    4 /*
    5  * Copyright (c) 1989, 1991, 1993, 1995
    6  *      The Regents of the University of California.  All rights reserved.
    7  *
    8  * This code is derived from software contributed to Berkeley by
    9  * Rick Macklem at The University of Guelph.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. Neither the name of the University nor the names of its contributors
   20  *    may be used to endorse or promote products derived from this software
   21  *    without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   33  * SUCH DAMAGE.
   34  *
   35  *      @(#)nfs_socket.c        8.5 (Berkeley) 3/30/95
   36  */
   37 
   38 /*
   39  * Socket operations for use by nfs
   40  */
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/proc.h>
   45 #include <sys/mount.h>
   46 #include <sys/kernel.h>
   47 #include <sys/mbuf.h>
   48 #include <sys/vnode.h>
   49 #include <sys/protosw.h>
   50 #include <sys/signalvar.h>
   51 #include <sys/socket.h>
   52 #include <sys/socketvar.h>
   53 #include <sys/syslog.h>
   54 #include <sys/tprintf.h>
   55 #include <sys/namei.h>
   56 #include <sys/pool.h>
   57 #include <sys/queue.h>
   58 
   59 #include <netinet/in.h>
   60 #include <netinet/tcp.h>
   61 
   62 #include <nfs/rpcv2.h>
   63 #include <nfs/nfsproto.h>
   64 #include <nfs/nfs.h>
   65 #include <nfs/xdr_subs.h>
   66 #include <nfs/nfsm_subs.h>
   67 #include <nfs/nfsmount.h>
   68 #include <nfs/nfs_var.h>
   69 
   70 /* External data, mostly RPC constants in XDR form. */
   71 extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers,
   72         rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr;
   73 extern u_int32_t nfs_prog;
   74 extern struct nfsstats nfsstats;
   75 extern int nfsv3_procid[NFS_NPROCS];
   76 extern int nfs_ticks;
   77 
   78 extern struct pool nfsrv_descript_pl;
   79 
   80 /*
   81  * There is a congestion window for outstanding rpcs maintained per mount
   82  * point. The cwnd size is adjusted in roughly the way that:
   83  * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
   84  * SIGCOMM '88". ACM, August 1988.
   85  * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
   86  * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
   87  * of rpcs is in progress.
   88  * (The sent count and cwnd are scaled for integer arith.)
   89  * Variants of "slow start" were tried and were found to be too much of a
   90  * performance hit (ave. rtt 3 times larger),
   91  * I suspect due to the large rtt that nfs rpcs have.
   92  */
   93 #define NFS_CWNDSCALE   256
   94 #define NFS_MAXCWND     (NFS_CWNDSCALE * 32)
   95 int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256 };
   96 
   97 /* RTT estimator */
   98 enum nfs_rto_timers nfs_ptimers[NFS_NPROCS] = {
   99         NFS_DEFAULT_TIMER,      /* NULL */
  100         NFS_GETATTR_TIMER,      /* GETATTR */
  101         NFS_DEFAULT_TIMER,      /* SETATTR */
  102         NFS_LOOKUP_TIMER,       /* LOOKUP */
  103         NFS_GETATTR_TIMER,      /* ACCESS */
  104         NFS_READ_TIMER,         /* READLINK */
  105         NFS_READ_TIMER,         /* READ */
  106         NFS_WRITE_TIMER,        /* WRITE */
  107         NFS_DEFAULT_TIMER,      /* CREATE */
  108         NFS_DEFAULT_TIMER,      /* MKDIR */
  109         NFS_DEFAULT_TIMER,      /* SYMLINK */
  110         NFS_DEFAULT_TIMER,      /* MKNOD */
  111         NFS_DEFAULT_TIMER,      /* REMOVE */
  112         NFS_DEFAULT_TIMER,      /* RMDIR */
  113         NFS_DEFAULT_TIMER,      /* RENAME */
  114         NFS_DEFAULT_TIMER,      /* LINK */
  115         NFS_READ_TIMER,         /* READDIR */
  116         NFS_READ_TIMER,         /* READDIRPLUS */
  117         NFS_DEFAULT_TIMER,      /* FSSTAT */
  118         NFS_DEFAULT_TIMER,      /* FSINFO */
  119         NFS_DEFAULT_TIMER,      /* PATHCONF */
  120         NFS_DEFAULT_TIMER,      /* COMMIT */
  121         NFS_DEFAULT_TIMER,      /* NOOP */
  122 };
  123 
  124 void nfs_init_rtt(struct nfsmount *);
  125 void nfs_update_rtt(struct nfsreq *);
  126 int  nfs_estimate_rto(struct nfsmount *, u_int32_t procnum);
  127 
  128 void nfs_realign(struct mbuf **, int);
  129 void nfs_realign_fixup(struct mbuf *, struct mbuf *, unsigned int *);
  130 
  131 int nfs_rcvlock(struct nfsreq *);
  132 int nfs_receive(struct nfsreq *, struct mbuf **, struct mbuf **);
  133 int nfs_reconnect(struct nfsreq *);
  134 int nfs_reply(struct nfsreq *);
  135 void nfs_msg(struct nfsreq *, char *);
  136 void nfs_rcvunlock(int *);
  137 
  138 int nfsrv_getstream(struct nfssvc_sock *, int);
  139 
  140 unsigned int nfs_realign_test = 0;
  141 unsigned int nfs_realign_count = 0;
  142 
  143 /* Initialize the RTT estimator state for a new mount point. */
  144 void
  145 nfs_init_rtt(struct nfsmount *nmp)
  146 {
  147         int i;
  148 
  149         for (i = 0; i < NFS_MAX_TIMER; i++)
  150                 nmp->nm_srtt[i] = NFS_INITRTT;
  151         for (i = 0; i < NFS_MAX_TIMER; i++)
  152                 nmp->nm_sdrtt[i] = 0;
  153 }
  154 
  155 /*
  156  * Update a mount point's RTT estimator state using data from the
  157  * passed-in request.
  158  * 
  159  * Use a gain of 0.125 on the mean and a gain of 0.25 on the deviation.
  160  *
  161  * NB: Since the timer resolution of NFS_HZ is so course, it can often
  162  * result in r_rtt == 0. Since r_rtt == N means that the actual RTT is
  163  * between N + dt and N + 2 - dt ticks, add 1 before calculating the
  164  * update values.
  165  */
  166 void
  167 nfs_update_rtt(struct nfsreq *rep)
  168 {
  169         int t1 = rep->r_rtt + 1;
  170         int index = nfs_ptimers[rep->r_procnum] - 1;
  171         int *srtt = &rep->r_nmp->nm_srtt[index];
  172         int *sdrtt = &rep->r_nmp->nm_sdrtt[index];
  173 
  174         t1 -= *srtt >> 3;
  175         *srtt += t1;
  176         if (t1 < 0)
  177                 t1 = -t1;
  178         t1 -= *sdrtt >> 2;
  179         *sdrtt += t1;
  180 }
  181 
  182 /*
  183  * Estimate RTO for an NFS RPC sent via an unreliable datagram.
  184  *
  185  * Use the mean and mean deviation of RTT for the appropriate type
  186  * of RPC for the frequent RPCs and a default for the others.
  187  * The justification for doing "other" this way is that these RPCs
  188  * happen so infrequently that timer est. would probably be stale.
  189  * Also, since many of these RPCs are non-idempotent, a conservative
  190  * timeout is desired.
  191  *
  192  * getattr, lookup - A+2D
  193  * read, write     - A+4D
  194  * other           - nm_timeo
  195  */
  196 int
  197 nfs_estimate_rto(struct nfsmount *nmp, u_int32_t procnum)
  198 {
  199         enum nfs_rto_timers timer = nfs_ptimers[procnum];
  200         int index = timer - 1;
  201         int rto;
  202 
  203         switch (timer) {
  204         case NFS_GETATTR_TIMER:
  205         case NFS_LOOKUP_TIMER:
  206                 rto = ((nmp->nm_srtt[index] + 3) >> 2) +
  207                                 ((nmp->nm_sdrtt[index] + 1) >> 1);
  208                 break;
  209         case NFS_READ_TIMER:
  210         case NFS_WRITE_TIMER:
  211                 rto = ((nmp->nm_srtt[index] + 7) >> 3) +
  212                                 (nmp->nm_sdrtt[index] + 1);
  213                 break;
  214         default:
  215                 rto = nmp->nm_timeo;
  216                 return (rto);
  217         }
  218 
  219         if (rto < NFS_MINRTO)
  220                 rto = NFS_MINRTO;
  221         else if (rto > NFS_MAXRTO)
  222                 rto = NFS_MAXRTO;
  223 
  224         return (rto);
  225 }
  226 
  227 
  228 
  229 /*
  230  * Initialize sockets and congestion for a new NFS connection.
  231  * We do not free the sockaddr if error.
  232  */
  233 int
  234 nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
  235 {
  236         struct socket *so;
  237         int error, rcvreserve, sndreserve;
  238         struct sockaddr *saddr;
  239         struct sockaddr_in *sin;
  240         struct mbuf *nam = NULL, *mopt = NULL;
  241 
  242         if (!(nmp->nm_sotype == SOCK_DGRAM || nmp->nm_sotype == SOCK_STREAM))
  243                 return (EINVAL);
  244 
  245         nmp->nm_so = NULL;
  246         saddr = mtod(nmp->nm_nam, struct sockaddr *);
  247         error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype, 
  248             nmp->nm_soproto);
  249         if (error) {
  250                 nfs_disconnect(nmp);
  251                 return (error);
  252         }
  253 
  254         /* Allocate mbufs possibly waiting before grabbing the socket lock. */
  255         if (nmp->nm_sotype == SOCK_STREAM || saddr->sa_family == AF_INET)
  256                 MGET(mopt, M_WAIT, MT_SOOPTS);
  257         if (saddr->sa_family == AF_INET)
  258                 MGET(nam, M_WAIT, MT_SONAME);
  259 
  260         so = nmp->nm_so;
  261         solock(so);
  262         nmp->nm_soflags = so->so_proto->pr_flags;
  263 
  264         /*
  265          * Some servers require that the client port be a reserved port number.
  266          * We always allocate a reserved port, as this prevents filehandle
  267          * disclosure through UDP port capture.
  268          */
  269         if (saddr->sa_family == AF_INET) {
  270                 int *ip;
  271 
  272                 mopt->m_len = sizeof(int);
  273                 ip = mtod(mopt, int *);
  274                 *ip = IP_PORTRANGE_LOW;
  275                 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
  276                 if (error)
  277                         goto bad;
  278 
  279                 sin = mtod(nam, struct sockaddr_in *);
  280                 memset(sin, 0, sizeof(*sin));
  281                 sin->sin_len = nam->m_len = sizeof(struct sockaddr_in);
  282                 sin->sin_family = AF_INET;
  283                 sin->sin_addr.s_addr = INADDR_ANY;
  284                 sin->sin_port = htons(0);
  285                 error = sobind(so, nam, &proc0);
  286                 if (error)
  287                         goto bad;
  288 
  289                 mopt->m_len = sizeof(int);
  290                 ip = mtod(mopt, int *);
  291                 *ip = IP_PORTRANGE_DEFAULT;
  292                 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
  293                 if (error)
  294                         goto bad;
  295         }
  296 
  297         /*
  298          * Protocols that do not require connections may be optionally left
  299          * unconnected for servers that reply from a port other than NFS_PORT.
  300          */
  301         if (nmp->nm_flag & NFSMNT_NOCONN) {
  302                 if (nmp->nm_soflags & PR_CONNREQUIRED) {
  303                         error = ENOTCONN;
  304                         goto bad;
  305                 }
  306         } else {
  307                 error = soconnect(so, nmp->nm_nam);
  308                 if (error)
  309                         goto bad;
  310 
  311                 /*
  312                  * Wait for the connection to complete. Cribbed from the
  313                  * connect system call but with the wait timing out so
  314                  * that interruptible mounts don't hang here for a long time.
  315                  */
  316                 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  317                         sosleep_nsec(so, &so->so_timeo, PSOCK, "nfscon",
  318                             SEC_TO_NSEC(2));
  319                         if ((so->so_state & SS_ISCONNECTING) &&
  320                             so->so_error == 0 && rep &&
  321                             (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){
  322                                 so->so_state &= ~SS_ISCONNECTING;
  323                                 goto bad;
  324                         }
  325                 }
  326                 if (so->so_error) {
  327                         error = so->so_error;
  328                         so->so_error = 0;
  329                         goto bad;
  330                 }
  331         }
  332         /*
  333          * Always set receive timeout to detect server crash and reconnect.
  334          * Otherwise, we can get stuck in soreceive forever.
  335          */
  336         so->so_rcv.sb_timeo_nsecs = SEC_TO_NSEC(5);
  337         if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT))
  338                 so->so_snd.sb_timeo_nsecs = SEC_TO_NSEC(5);
  339         else
  340                 so->so_snd.sb_timeo_nsecs = INFSLP;
  341         if (nmp->nm_sotype == SOCK_DGRAM) {
  342                 sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
  343                 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
  344                     NFS_MAXPKTHDR) * 2;
  345         } else if (nmp->nm_sotype == SOCK_STREAM) {
  346                 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
  347                         *mtod(mopt, int32_t *) = 1;
  348                         mopt->m_len = sizeof(int32_t);
  349                         sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, mopt);
  350                 }
  351                 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
  352                         *mtod(mopt, int32_t *) = 1;
  353                         mopt->m_len = sizeof(int32_t);
  354                         sosetopt(so, IPPROTO_TCP, TCP_NODELAY, mopt);
  355                 }
  356                 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
  357                     sizeof (u_int32_t)) * 2;
  358                 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
  359                     sizeof (u_int32_t)) * 2;
  360         } else {
  361                 panic("%s: nm_sotype %d", __func__, nmp->nm_sotype);
  362         }
  363         error = soreserve(so, sndreserve, rcvreserve);
  364         if (error)
  365                 goto bad;
  366         so->so_rcv.sb_flags |= SB_NOINTR;
  367         so->so_snd.sb_flags |= SB_NOINTR;
  368         sounlock(so);
  369 
  370         m_freem(mopt);
  371         m_freem(nam);
  372 
  373         /* Initialize other non-zero congestion variables */
  374         nfs_init_rtt(nmp);
  375         nmp->nm_cwnd = NFS_MAXCWND / 2;     /* Initial send window */
  376         nmp->nm_sent = 0;
  377         nmp->nm_timeouts = 0;
  378         return (0);
  379 
  380 bad:
  381         sounlock(so);
  382 
  383         m_freem(mopt);
  384         m_freem(nam);
  385 
  386         nfs_disconnect(nmp);
  387         return (error);
  388 }
  389 
  390 /*
  391  * Reconnect routine:
  392  * Called when a connection is broken on a reliable protocol.
  393  * - clean up the old socket
  394  * - nfs_connect() again
  395  * - set R_MUSTRESEND for all outstanding requests on mount point
  396  * If this fails the mount point is DEAD!
  397  * nb: Must be called with the nfs_sndlock() set on the mount point.
  398  */
  399 int
  400 nfs_reconnect(struct nfsreq *rep)
  401 {
  402         struct nfsreq *rp;
  403         struct nfsmount *nmp = rep->r_nmp;
  404         int error;
  405 
  406         nfs_disconnect(nmp);
  407         while ((error = nfs_connect(nmp, rep)) != 0) {
  408                 if (error == EINTR || error == ERESTART)
  409                         return (EINTR);
  410                 tsleep_nsec(&nowake, PSOCK, "nfsrecon", SEC_TO_NSEC(1));
  411         }
  412 
  413         /*
  414          * Loop through outstanding request list and fix up all requests
  415          * on old socket.
  416          */
  417         TAILQ_FOREACH(rp, &nmp->nm_reqsq, r_chain) {
  418                 rp->r_flags |= R_MUSTRESEND;
  419                 rp->r_rexmit = 0;
  420         }
  421         return (0);
  422 }
  423 
  424 /*
  425  * NFS disconnect. Clean up and unlink.
  426  */
  427 void
  428 nfs_disconnect(struct nfsmount *nmp)
  429 {
  430         struct socket *so;
  431 
  432         if (nmp->nm_so) {
  433                 so = nmp->nm_so;
  434                 nmp->nm_so = NULL;
  435                 soshutdown(so, SHUT_RDWR);
  436                 soclose(so, 0);
  437         }
  438 }
  439 
  440 /*
  441  * This is the nfs send routine. For connection based socket types, it
  442  * must be called with an nfs_sndlock() on the socket.
  443  * "rep == NULL" indicates that it has been called from a server.
  444  * For the client side:
  445  * - return EINTR if the RPC is terminated, 0 otherwise
  446  * - set R_MUSTRESEND if the send fails for any reason
  447  * - do any cleanup required by recoverable socket errors (???)
  448  * For the server side:
  449  * - return EINTR or ERESTART if interrupted by a signal
  450  * - return EPIPE if a connection is lost for connection based sockets (TCP...)
  451  * - do any cleanup required by recoverable socket errors (???)
  452  */
  453 int
  454 nfs_send(struct socket *so, struct mbuf *nam, struct mbuf *top,
  455     struct nfsreq *rep)
  456 {
  457         struct mbuf *sendnam;
  458         int error, soflags, flags;
  459 
  460         if (rep) {
  461                 if (rep->r_flags & R_SOFTTERM) {
  462                         m_freem(top);
  463                         return (EINTR);
  464                 }
  465                 if ((so = rep->r_nmp->nm_so) == NULL) {
  466                         rep->r_flags |= R_MUSTRESEND;
  467                         m_freem(top);
  468                         return (0);
  469                 }
  470                 rep->r_flags &= ~R_MUSTRESEND;
  471                 soflags = rep->r_nmp->nm_soflags;
  472         } else
  473                 soflags = so->so_proto->pr_flags;
  474         if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
  475                 sendnam = NULL;
  476         else
  477                 sendnam = nam;
  478         flags = 0;
  479 
  480         error = sosend(so, sendnam, NULL, top, NULL, flags);
  481         if (error) {
  482                 if (rep) {
  483                         /*
  484                          * Deal with errors for the client side.
  485                          */
  486                         if (rep->r_flags & R_SOFTTERM)
  487                                 error = EINTR;
  488                         else
  489                                 rep->r_flags |= R_MUSTRESEND;
  490                 }
  491 
  492                 /*
  493                  * Handle any recoverable (soft) socket errors here. (???)
  494                  */
  495                 if (error != EINTR && error != ERESTART &&
  496                     error != EWOULDBLOCK && error != EPIPE)
  497                         error = 0;
  498         }
  499         return (error);
  500 }
  501 
  502 #ifdef NFSCLIENT
  503 /*
  504  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
  505  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
  506  * Mark and consolidate the data into a new mbuf list.
  507  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
  508  *     small mbufs.
  509  * For SOCK_STREAM we must be very careful to read an entire record once
  510  * we have read any of it, even if the system call has been interrupted.
  511  */
  512 int
  513 nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp)
  514 {
  515         struct socket *so;
  516         struct uio auio;
  517         struct iovec aio;
  518         struct mbuf *m;
  519         struct mbuf *control;
  520         u_int32_t len;
  521         struct mbuf **getnam;
  522         int error, sotype, rcvflg;
  523         struct proc *p = curproc;       /* XXX */
  524 
  525         /*
  526          * Set up arguments for soreceive()
  527          */
  528         *mp = NULL;
  529         *aname = NULL;
  530         sotype = rep->r_nmp->nm_sotype;
  531 
  532         /*
  533          * For reliable protocols, lock against other senders/receivers
  534          * in case a reconnect is necessary.
  535          * For SOCK_STREAM, first get the Record Mark to find out how much
  536          * more there is to get.
  537          * We must lock the socket against other receivers
  538          * until we have an entire rpc request/reply.
  539          */
  540         if (sotype != SOCK_DGRAM) {
  541                 error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
  542                 if (error)
  543                         return (error);
  544 tryagain:
  545                 /*
  546                  * Check for fatal errors and resending request.
  547                  */
  548                 /*
  549                  * Ugh: If a reconnect attempt just happened, nm_so
  550                  * would have changed. NULL indicates a failed
  551                  * attempt that has essentially shut down this
  552                  * mount point.
  553                  */
  554                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
  555                         nfs_sndunlock(&rep->r_nmp->nm_flag);
  556                         return (EINTR);
  557                 }
  558                 so = rep->r_nmp->nm_so;
  559                 if (!so) {
  560                         error = nfs_reconnect(rep); 
  561                         if (error) {
  562                                 nfs_sndunlock(&rep->r_nmp->nm_flag);
  563                                 return (error);
  564                         }
  565                         goto tryagain;
  566                 }
  567                 while (rep->r_flags & R_MUSTRESEND) {
  568                         m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
  569                         nfsstats.rpcretries++;
  570                         rep->r_rtt = 0;
  571                         rep->r_flags &= ~R_TIMING;
  572                         error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
  573                         if (error) {
  574                                 if (error == EINTR || error == ERESTART ||
  575                                     (error = nfs_reconnect(rep)) != 0) {
  576                                         nfs_sndunlock(&rep->r_nmp->nm_flag);
  577                                         return (error);
  578                                 }
  579                                 goto tryagain;
  580                         }
  581                 }
  582                 nfs_sndunlock(&rep->r_nmp->nm_flag);
  583                 if (sotype == SOCK_STREAM) {
  584                         aio.iov_base = (caddr_t) &len;
  585                         aio.iov_len = sizeof(u_int32_t);
  586                         auio.uio_iov = &aio;
  587                         auio.uio_iovcnt = 1;
  588                         auio.uio_segflg = UIO_SYSSPACE;
  589                         auio.uio_rw = UIO_READ;
  590                         auio.uio_offset = 0;
  591                         auio.uio_resid = sizeof(u_int32_t);
  592                         auio.uio_procp = p;
  593                         do {
  594                                 rcvflg = MSG_WAITALL;
  595                                 error = soreceive(so, NULL, &auio, NULL, NULL,
  596                                     &rcvflg, 0);
  597                                 if (error == EWOULDBLOCK && rep) {
  598                                         if (rep->r_flags & R_SOFTTERM)
  599                                                 return (EINTR);
  600                                         /*
  601                                          * looks like the server died after it
  602                                          * received the request, make sure
  603                                          * that we will retransmit and we
  604                                          * don't get stuck here forever.
  605                                          */
  606                                         if (rep->r_rexmit >=
  607                                             rep->r_nmp->nm_retry) {
  608                                                 nfsstats.rpctimeouts++;
  609                                                 error = EPIPE;
  610                                         }
  611                                 }
  612                         } while (error == EWOULDBLOCK);
  613                         if (!error && auio.uio_resid > 0) {
  614                             log(LOG_INFO,
  615                                  "short receive (%zu/%zu) from nfs server %s\n",
  616                                  sizeof(u_int32_t) - auio.uio_resid,
  617                                  sizeof(u_int32_t),
  618                                  rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  619                             error = EPIPE;
  620                         }
  621                         if (error)
  622                                 goto errout;
  623 
  624                         len = ntohl(len) & ~0x80000000;
  625                         /*
  626                          * This is SERIOUS! We are out of sync with the sender
  627                          * and forcing a disconnect/reconnect is all I can do.
  628                          */
  629                         if (len > NFS_MAXPACKET) {
  630                             log(LOG_ERR, "%s (%u) from nfs server %s\n",
  631                                 "impossible packet length",
  632                                 len,
  633                                 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  634                             error = EFBIG;
  635                             goto errout;
  636                         }
  637                         auio.uio_resid = len;
  638                         do {
  639                             rcvflg = MSG_WAITALL;
  640                             error =  soreceive(so, NULL, &auio, mp, NULL,
  641                                 &rcvflg, 0);
  642                         } while (error == EWOULDBLOCK || error == EINTR ||
  643                             error == ERESTART);
  644                         if (!error && auio.uio_resid > 0) {
  645                                 log(LOG_INFO, "short receive (%zu/%u) from "
  646                                     "nfs server %s\n", len - auio.uio_resid,
  647                                     len, rep->r_nmp->nm_mountp->
  648                                     mnt_stat.f_mntfromname);
  649                                 error = EPIPE;
  650                         }
  651                 } else {
  652                         /*
  653                          * NB: Since uio_resid is big, MSG_WAITALL is ignored
  654                          * and soreceive() will return when it has either a
  655                          * control msg or a data msg.
  656                          * We have no use for control msg., but must grab them
  657                          * and then throw them away so we know what is going
  658                          * on.
  659                          */
  660                         auio.uio_resid = len = 100000000; /* Anything Big */
  661                         auio.uio_procp = p;
  662                         do {
  663                                 rcvflg = 0;
  664                                 error = soreceive(so, NULL, &auio, mp, &control,
  665                                     &rcvflg, 0);
  666                                 m_freem(control);
  667                                 if (error == EWOULDBLOCK && rep) {
  668                                         if (rep->r_flags & R_SOFTTERM)
  669                                                 return (EINTR);
  670                                 }
  671                         } while (error == EWOULDBLOCK ||
  672                             (!error && *mp == NULL && control));
  673                         if ((rcvflg & MSG_EOR) == 0)
  674                                 printf("Egad!!\n");
  675                         if (!error && *mp == NULL)
  676                                 error = EPIPE;
  677                         len -= auio.uio_resid;
  678                 }
  679 errout:
  680                 if (error && error != EINTR && error != ERESTART) {
  681                         m_freemp(mp);
  682                         if (error != EPIPE)
  683                                 log(LOG_INFO,
  684                                     "receive error %d from nfs server %s\n",
  685                                     error,
  686                                  rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  687                         error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
  688                         if (!error) {
  689                                 error = nfs_reconnect(rep);
  690                                 if (!error)
  691                                         goto tryagain;
  692                                 nfs_sndunlock(&rep->r_nmp->nm_flag);
  693                         }
  694                 }
  695         } else {
  696                 if ((so = rep->r_nmp->nm_so) == NULL)
  697                         return (EACCES);
  698                 if (so->so_state & SS_ISCONNECTED)
  699                         getnam = NULL;
  700                 else
  701                         getnam = aname;
  702                 auio.uio_resid = len = 1000000;
  703                 auio.uio_procp = p;
  704                 do {
  705                         rcvflg = 0;
  706                         error = soreceive(so, getnam, &auio, mp, NULL,
  707                             &rcvflg, 0);
  708                         if (error == EWOULDBLOCK &&
  709                             (rep->r_flags & R_SOFTTERM))
  710                                 return (EINTR);
  711                 } while (error == EWOULDBLOCK);
  712                 len -= auio.uio_resid;
  713         }
  714         if (error)
  715                 m_freemp(mp);
  716         /*
  717          * Search for any mbufs that are not a multiple of 4 bytes long
  718          * or with m_data not longword aligned.
  719          * These could cause pointer alignment problems, so copy them to
  720          * well aligned mbufs.
  721          */
  722         nfs_realign(mp, 5 * NFSX_UNSIGNED);
  723         return (error);
  724 }
  725 
  726 /*
  727  * Implement receipt of reply on a socket.
  728  * We must search through the list of received datagrams matching them
  729  * with outstanding requests using the xid, until ours is found.
  730  */
  731 int
  732 nfs_reply(struct nfsreq *myrep)
  733 {
  734         struct nfsreq *rep;
  735         struct nfsmount *nmp = myrep->r_nmp;
  736         struct nfsm_info        info;
  737         struct mbuf *nam;
  738         u_int32_t rxid, *tl, t1;
  739         caddr_t cp2;
  740         int error;
  741 
  742         /*
  743          * Loop around until we get our own reply
  744          */
  745         for (;;) {
  746                 /*
  747                  * Lock against other receivers so that I don't get stuck in
  748                  * sbwait() after someone else has received my reply for me.
  749                  * Also necessary for connection based protocols to avoid
  750                  * race conditions during a reconnect.
  751                  */
  752                 error = nfs_rcvlock(myrep);
  753                 if (error)
  754                         return (error == EALREADY ? 0 : error);
  755 
  756                 /*
  757                  * Get the next Rpc reply off the socket
  758                  */
  759                 error = nfs_receive(myrep, &nam, &info.nmi_mrep);
  760                 nfs_rcvunlock(&nmp->nm_flag);
  761                 if (error) {
  762 
  763                         /*
  764                          * Ignore routing errors on connectionless protocols??
  765                          */
  766                         if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
  767                                 if (nmp->nm_so)
  768                                         nmp->nm_so->so_error = 0;
  769                                 continue;
  770                         }
  771                         return (error);
  772                 }
  773                 m_freem(nam);
  774         
  775                 /*
  776                  * Get the xid and check that it is an rpc reply
  777                  */
  778                 info.nmi_md = info.nmi_mrep;
  779                 info.nmi_dpos = mtod(info.nmi_md, caddr_t);
  780                 nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
  781                 rxid = *tl++;
  782                 if (*tl != rpc_reply) {
  783                         nfsstats.rpcinvalid++;
  784                         m_freem(info.nmi_mrep);
  785 nfsmout:
  786                         continue;
  787                 }
  788 
  789                 /*
  790                  * Loop through the request list to match up the reply
  791                  * Iff no match, just drop the datagram
  792                  */
  793                 TAILQ_FOREACH(rep, &nmp->nm_reqsq, r_chain) {
  794                         if (rep->r_mrep == NULL && rxid == rep->r_xid) {
  795                                 /* Found it.. */
  796                                 rep->r_mrep = info.nmi_mrep;
  797                                 rep->r_md = info.nmi_md;
  798                                 rep->r_dpos = info.nmi_dpos;
  799 
  800                                 /*
  801                                  * Update congestion window.
  802                                  * Do the additive increase of
  803                                  * one rpc/rtt.
  804                                  */
  805                                 if (nmp->nm_cwnd <= nmp->nm_sent) {
  806                                         nmp->nm_cwnd +=
  807                                            (NFS_CWNDSCALE * NFS_CWNDSCALE +
  808                                            (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
  809                                         if (nmp->nm_cwnd > NFS_MAXCWND)
  810                                                 nmp->nm_cwnd = NFS_MAXCWND;
  811                                 }
  812                                 rep->r_flags &= ~R_SENT;
  813                                 nmp->nm_sent -= NFS_CWNDSCALE;
  814 
  815                                 if (rep->r_flags & R_TIMING)
  816                                         nfs_update_rtt(rep);
  817 
  818                                 nmp->nm_timeouts = 0;
  819                                 break;
  820                         }
  821                 }
  822                 /*
  823                  * If not matched to a request, drop it.
  824                  * If it's mine, get out.
  825                  */
  826                 if (rep == 0) {
  827                         nfsstats.rpcunexpected++;
  828                         m_freem(info.nmi_mrep);
  829                 } else if (rep == myrep) {
  830                         if (rep->r_mrep == NULL)
  831                                 panic("nfsreply nil");
  832                         return (0);
  833                 }
  834         }
  835 }
  836 
  837 /*
  838  * nfs_request - goes something like this
  839  *      - fill in request struct
  840  *      - links it into list
  841  *      - calls nfs_send() for first transmit
  842  *      - calls nfs_receive() to get reply
  843  *      - break down rpc header and return with nfs reply pointed to
  844  *        by mrep or error
  845  * nb: always frees up mreq mbuf list
  846  */
  847 int
  848 nfs_request(struct vnode *vp, int procnum, struct nfsm_info *infop)
  849 {
  850         struct mbuf *m;
  851         u_int32_t *tl;
  852         struct nfsmount *nmp;
  853         caddr_t cp2;
  854         int t1, i, error = 0;
  855         int trylater_delay;
  856         struct nfsreq *rep;
  857         struct nfsm_info info;
  858 
  859         rep = pool_get(&nfsreqpl, PR_WAITOK);
  860         rep->r_nmp = VFSTONFS(vp->v_mount);
  861         rep->r_vp = vp;
  862         rep->r_procp = infop->nmi_procp;
  863         rep->r_procnum = procnum;
  864 
  865         /* empty mbuf for AUTH_UNIX header */
  866         rep->r_mreq = m_gethdr(M_WAIT, MT_DATA);
  867         rep->r_mreq->m_next = infop->nmi_mreq;
  868         rep->r_mreq->m_len = 0;
  869         m_calchdrlen(rep->r_mreq);
  870 
  871         trylater_delay = NFS_MINTIMEO;
  872 
  873         nmp = rep->r_nmp;
  874 
  875         /* Get the RPC header with authorization. */
  876         nfsm_rpchead(rep, infop->nmi_cred, RPCAUTH_UNIX);
  877         m = rep->r_mreq;
  878 
  879         /*
  880          * For stream protocols, insert a Sun RPC Record Mark.
  881          */
  882         if (nmp->nm_sotype == SOCK_STREAM) {
  883                 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
  884                 *mtod(m, u_int32_t *) = htonl(0x80000000 |
  885                     (m->m_pkthdr.len - NFSX_UNSIGNED));
  886         }
  887 
  888 tryagain:
  889         rep->r_rtt = rep->r_rexmit = 0;
  890         if (nfs_ptimers[rep->r_procnum] != NFS_DEFAULT_TIMER)
  891                 rep->r_flags = R_TIMING;
  892         else
  893                 rep->r_flags = 0;
  894         rep->r_mrep = NULL;
  895 
  896         /*
  897          * Do the client side RPC.
  898          */
  899         nfsstats.rpcrequests++;
  900         /*
  901          * Chain request into list of outstanding requests. Be sure
  902          * to put it LAST so timer finds oldest requests first.
  903          */
  904         if (TAILQ_EMPTY(&nmp->nm_reqsq))
  905                 timeout_add(&nmp->nm_rtimeout, nfs_ticks);
  906         TAILQ_INSERT_TAIL(&nmp->nm_reqsq, rep, r_chain);
  907 
  908         /*
  909          * If backing off another request or avoiding congestion, don't
  910          * send this one now but let timer do it. If not timing a request,
  911          * do it now.
  912          */
  913         if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
  914                 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
  915                 nmp->nm_sent < nmp->nm_cwnd)) {
  916                 if (nmp->nm_soflags & PR_CONNREQUIRED)
  917                         error = nfs_sndlock(&nmp->nm_flag, rep);
  918                 if (!error) {
  919                         error = nfs_send(nmp->nm_so, nmp->nm_nam,
  920                             m_copym(m, 0, M_COPYALL, M_WAIT), rep);
  921                         if (nmp->nm_soflags & PR_CONNREQUIRED)
  922                                 nfs_sndunlock(&nmp->nm_flag);
  923                 }
  924                 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
  925                         nmp->nm_sent += NFS_CWNDSCALE;
  926                         rep->r_flags |= R_SENT;
  927                 }
  928         } else {
  929                 rep->r_rtt = -1;
  930         }
  931 
  932         /*
  933          * Wait for the reply from our send or the timer's.
  934          */
  935         if (!error || error == EPIPE)
  936                 error = nfs_reply(rep);
  937 
  938         /*
  939          * RPC done, unlink the request.
  940          */
  941         TAILQ_REMOVE(&nmp->nm_reqsq, rep, r_chain);
  942         if (TAILQ_EMPTY(&nmp->nm_reqsq))
  943                 timeout_del(&nmp->nm_rtimeout);
  944 
  945         /*
  946          * Decrement the outstanding request count.
  947          */
  948         if (rep->r_flags & R_SENT) {
  949                 rep->r_flags &= ~R_SENT;        /* paranoia */
  950                 nmp->nm_sent -= NFS_CWNDSCALE;
  951         }
  952 
  953         /*
  954          * If there was a successful reply and a tprintf msg.
  955          * tprintf a response.
  956          */
  957         if (!error && (rep->r_flags & R_TPRINTFMSG))
  958                 nfs_msg(rep, "is alive again");
  959         info.nmi_mrep = rep->r_mrep;
  960         info.nmi_md = rep->r_md;
  961         info.nmi_dpos = rep->r_dpos;
  962         if (error) {
  963                 infop->nmi_mrep = NULL;
  964                 goto nfsmout1;
  965         }
  966 
  967         /*
  968          * break down the rpc header and check if ok
  969          */
  970         nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
  971         if (*tl++ == rpc_msgdenied) {
  972                 if (*tl == rpc_mismatch)
  973                         error = EOPNOTSUPP;
  974                 else
  975                         error = EACCES; /* Should be EAUTH. */
  976                 infop->nmi_mrep = NULL;
  977                 goto nfsmout1;
  978         }
  979 
  980         /*
  981          * Since we only support RPCAUTH_UNIX atm we step over the
  982          * reply verifer type, and in the (error) case that there really
  983          * is any data in it, we advance over it.
  984          */
  985         tl++;                   /* Step over verifer type */
  986         i = fxdr_unsigned(int32_t, *tl);
  987         if (i > 0)
  988                 nfsm_adv(nfsm_rndup(i));        /* Should not happen */
  989 
  990         nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
  991         /* 0 == ok */
  992         if (*tl == 0) {
  993                 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
  994                 if (*tl != 0) {
  995                         error = fxdr_unsigned(int, *tl);
  996                         if ((nmp->nm_flag & NFSMNT_NFSV3) &&
  997                             error == NFSERR_TRYLATER) {
  998                                 m_freem(info.nmi_mrep);
  999                                 error = 0;
 1000                                 tsleep_nsec(&nowake, PSOCK, "nfsretry",
 1001                                     SEC_TO_NSEC(trylater_delay));
 1002                                 trylater_delay *= NFS_TIMEOUTMUL;
 1003                                 if (trylater_delay > NFS_MAXTIMEO)
 1004                                         trylater_delay = NFS_MAXTIMEO;
 1005 
 1006                                 goto tryagain;
 1007                         }
 1008 
 1009                         /*
 1010                          * If the File Handle was stale, invalidate the
 1011                          * lookup cache, just in case.
 1012                          */
 1013                         if (error == ESTALE)
 1014                                 cache_purge(rep->r_vp);
 1015                 }
 1016                 goto nfsmout;
 1017         }
 1018 
 1019         error = EPROTONOSUPPORT;
 1020 
 1021 nfsmout:
 1022         infop->nmi_mrep = info.nmi_mrep;
 1023         infop->nmi_md = info.nmi_md;
 1024         infop->nmi_dpos = info.nmi_dpos;
 1025 nfsmout1:
 1026         m_freem(rep->r_mreq);
 1027         pool_put(&nfsreqpl, rep);
 1028         return (error);
 1029 }
 1030 #endif /* NFSCLIENT */
 1031 
 1032 /*
 1033  * Generate the rpc reply header
 1034  * siz arg. is used to decide if adding a cluster is worthwhile
 1035  */
 1036 int
 1037 nfs_rephead(int siz, struct nfsrv_descript *nd, struct nfssvc_sock *slp,
 1038     int err, struct mbuf **mrq, struct mbuf **mbp)
 1039 {
 1040         u_int32_t *tl;
 1041         struct mbuf *mreq;
 1042         struct mbuf *mb;
 1043 
 1044         MGETHDR(mreq, M_WAIT, MT_DATA);
 1045         mb = mreq;
 1046         /*
 1047          * If this is a big reply, use a cluster else
 1048          * try and leave leading space for the lower level headers.
 1049          */
 1050         siz += RPC_REPLYSIZ;
 1051         if (siz >= MHLEN - max_hdr) {
 1052                 MCLGET(mreq, M_WAIT);
 1053         } else
 1054                 mreq->m_data += max_hdr;
 1055         tl = mtod(mreq, u_int32_t *);
 1056         mreq->m_len = 6 * NFSX_UNSIGNED;
 1057         *tl++ = txdr_unsigned(nd->nd_retxid);
 1058         *tl++ = rpc_reply;
 1059         if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
 1060                 *tl++ = rpc_msgdenied;
 1061                 if (err & NFSERR_AUTHERR) {
 1062                         *tl++ = rpc_autherr;
 1063                         *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
 1064                         mreq->m_len -= NFSX_UNSIGNED;
 1065                 } else {
 1066                         *tl++ = rpc_mismatch;
 1067                         *tl++ = txdr_unsigned(RPC_VER2);
 1068                         *tl = txdr_unsigned(RPC_VER2);
 1069                 }
 1070         } else {
 1071                 *tl++ = rpc_msgaccepted;
 1072 
 1073                 /* AUTH_UNIX requires RPCAUTH_NULL. */
 1074                 *tl++ = 0;
 1075                 *tl++ = 0;
 1076 
 1077                 switch (err) {
 1078                 case EPROGUNAVAIL:
 1079                         *tl = txdr_unsigned(RPC_PROGUNAVAIL);
 1080                         break;
 1081                 case EPROGMISMATCH:
 1082                         *tl = txdr_unsigned(RPC_PROGMISMATCH);
 1083                         tl = nfsm_build(&mb, 2 * NFSX_UNSIGNED);
 1084                         *tl++ = txdr_unsigned(NFS_VER2);
 1085                         *tl = txdr_unsigned(NFS_VER3);
 1086                         break;
 1087                 case EPROCUNAVAIL:
 1088                         *tl = txdr_unsigned(RPC_PROCUNAVAIL);
 1089                         break;
 1090                 case EBADRPC:
 1091                         *tl = txdr_unsigned(RPC_GARBAGE);
 1092                         break;
 1093                 default:
 1094                         *tl = 0;
 1095                         if (err != NFSERR_RETVOID) {
 1096                                 tl = nfsm_build(&mb, NFSX_UNSIGNED);
 1097                                 if (err)
 1098                                     *tl = txdr_unsigned(nfsrv_errmap(nd, err));
 1099                                 else
 1100                                     *tl = 0;
 1101                         }
 1102                         break;
 1103                 };
 1104         }
 1105 
 1106         *mrq = mreq;
 1107         if (mbp != NULL)
 1108                 *mbp = mb;
 1109         if (err != 0 && err != NFSERR_RETVOID)
 1110                 nfsstats.srvrpc_errs++;
 1111         return (0);
 1112 }
 1113 
 1114 /*
 1115  * nfs timer routine
 1116  * Scan the nfsreq list and retransmit any requests that have timed out.
 1117  */
 1118 void
 1119 nfs_timer(void *arg)
 1120 {
 1121         struct nfsmount *nmp = arg;
 1122         struct nfsreq *rep;
 1123         struct mbuf *m;
 1124         struct socket *so;
 1125         int timeo, error;
 1126 
 1127         NET_LOCK();
 1128         TAILQ_FOREACH(rep, &nmp->nm_reqsq, r_chain) {
 1129                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
 1130                         continue;
 1131                 if (nfs_sigintr(nmp, rep, rep->r_procp)) {
 1132                         rep->r_flags |= R_SOFTTERM;
 1133                         continue;
 1134                 }
 1135                 if (rep->r_rtt >= 0) {
 1136                         rep->r_rtt++;
 1137                         if (nmp->nm_flag & NFSMNT_DUMBTIMR)
 1138                                 timeo = nmp->nm_timeo;
 1139                         else
 1140                                 timeo = nfs_estimate_rto(nmp, rep->r_procnum);
 1141                         if (nmp->nm_timeouts > 0)
 1142                                 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
 1143                         if (rep->r_rtt <= timeo)
 1144                                 continue;
 1145                         if (nmp->nm_timeouts < nitems(nfs_backoff))
 1146                                 nmp->nm_timeouts++;
 1147                 }
 1148 
 1149                 /* Check for server not responding. */
 1150                 if ((rep->r_flags & R_TPRINTFMSG) == 0 && rep->r_rexmit > 4) {
 1151                         nfs_msg(rep, "not responding");
 1152                         rep->r_flags |= R_TPRINTFMSG;
 1153                 }
 1154                 if (rep->r_rexmit >= nmp->nm_retry) {   /* too many */
 1155                         nfsstats.rpctimeouts++;
 1156                         rep->r_flags |= R_SOFTTERM;
 1157                         continue;
 1158                 }
 1159                 if (nmp->nm_sotype != SOCK_DGRAM) {
 1160                         if (++rep->r_rexmit > NFS_MAXREXMIT)
 1161                                 rep->r_rexmit = NFS_MAXREXMIT;
 1162                         continue;
 1163                 }
 1164 
 1165                 if ((so = nmp->nm_so) == NULL)
 1166                         continue;
 1167 
 1168                 /*
 1169                  * If there is enough space and the window allows..
 1170                  *      Resend it
 1171                  * Set r_rtt to -1 in case we fail to send it now.
 1172                  */
 1173                 rep->r_rtt = -1;
 1174                 if (sbspace(so, &so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
 1175                    ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
 1176                     (rep->r_flags & R_SENT) ||
 1177                     nmp->nm_sent < nmp->nm_cwnd) &&
 1178                    (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
 1179                         if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
 1180                                 error = pru_send(so, m, NULL, NULL);
 1181                         else
 1182                                 error = pru_send(so, m, nmp->nm_nam, NULL);
 1183                         if (error) {
 1184                                 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
 1185                                         so->so_error = 0;
 1186                         } else {
 1187                                 /*
 1188                                  * Iff first send, start timing
 1189                                  * else turn timing off, backoff timer
 1190                                  * and divide congestion window by 2.
 1191                                  */
 1192                                 if (rep->r_flags & R_SENT) {
 1193                                         rep->r_flags &= ~R_TIMING;
 1194                                         if (++rep->r_rexmit > NFS_MAXREXMIT)
 1195                                                 rep->r_rexmit = NFS_MAXREXMIT;
 1196                                         nmp->nm_cwnd >>= 1;
 1197                                         if (nmp->nm_cwnd < NFS_CWNDSCALE)
 1198                                                 nmp->nm_cwnd = NFS_CWNDSCALE;
 1199                                         nfsstats.rpcretries++;
 1200                                 } else {
 1201                                         rep->r_flags |= R_SENT;
 1202                                         nmp->nm_sent += NFS_CWNDSCALE;
 1203                                 }
 1204                                 rep->r_rtt = 0;
 1205                         }
 1206                 }
 1207         }
 1208         NET_UNLOCK();
 1209         timeout_add(&nmp->nm_rtimeout, nfs_ticks);
 1210 }
 1211 
 1212 /*
 1213  * Test for a termination condition pending on the process.
 1214  * This is used for NFSMNT_INT mounts.
 1215  */
 1216 int
 1217 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct proc *p)
 1218 {
 1219 
 1220         if (rep && (rep->r_flags & R_SOFTTERM))
 1221                 return (EINTR);
 1222         if (!(nmp->nm_flag & NFSMNT_INT))
 1223                 return (0);
 1224         if (p && (SIGPENDING(p) & ~p->p_p->ps_sigacts->ps_sigignore &
 1225             NFSINT_SIGMASK))
 1226                 return (EINTR);
 1227         return (0);
 1228 }
 1229 
 1230 /*
 1231  * Lock a socket against others.
 1232  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
 1233  * and also to avoid race conditions between the processes with nfs requests
 1234  * in progress when a reconnect is necessary.
 1235  */
 1236 int
 1237 nfs_sndlock(int *flagp, struct nfsreq *rep)
 1238 {
 1239         uint64_t slptimeo = INFSLP;
 1240         struct proc *p;
 1241         int slpflag = 0;
 1242 
 1243         if (rep) {
 1244                 p = rep->r_procp;
 1245                 if (rep->r_nmp->nm_flag & NFSMNT_INT)
 1246                         slpflag = PCATCH;
 1247         } else
 1248                 p = NULL;
 1249         while (*flagp & NFSMNT_SNDLOCK) {
 1250                 if (rep && nfs_sigintr(rep->r_nmp, rep, p))
 1251                         return (EINTR);
 1252                 *flagp |= NFSMNT_WANTSND;
 1253                 tsleep_nsec(flagp, slpflag | (PZERO - 1), "nfsndlck", slptimeo);
 1254                 if (slpflag == PCATCH) {
 1255                         slpflag = 0;
 1256                         slptimeo = SEC_TO_NSEC(2);
 1257                 }
 1258         }
 1259         *flagp |= NFSMNT_SNDLOCK;
 1260         return (0);
 1261 }
 1262 
 1263 /*
 1264  * Unlock the stream socket for others.
 1265  */
 1266 void
 1267 nfs_sndunlock(int *flagp)
 1268 {
 1269 
 1270         if ((*flagp & NFSMNT_SNDLOCK) == 0)
 1271                 panic("nfs sndunlock");
 1272         *flagp &= ~NFSMNT_SNDLOCK;
 1273         if (*flagp & NFSMNT_WANTSND) {
 1274                 *flagp &= ~NFSMNT_WANTSND;
 1275                 wakeup((caddr_t)flagp);
 1276         }
 1277 }
 1278 
 1279 int
 1280 nfs_rcvlock(struct nfsreq *rep)
 1281 {
 1282         uint64_t slptimeo = INFSLP;
 1283         int *flagp = &rep->r_nmp->nm_flag;
 1284         int slpflag;
 1285 
 1286         if (*flagp & NFSMNT_INT)
 1287                 slpflag = PCATCH;
 1288         else
 1289                 slpflag = 0;
 1290 
 1291         while (*flagp & NFSMNT_RCVLOCK) {
 1292                 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
 1293                         return (EINTR);
 1294                 *flagp |= NFSMNT_WANTRCV;
 1295                 tsleep_nsec(flagp, slpflag | (PZERO - 1), "nfsrcvlk", slptimeo);
 1296                 if (rep->r_mrep != NULL) {
 1297                         /*
 1298                          * Don't take the lock if our reply has been received
 1299                          * while we where sleeping.
 1300                          */
 1301                          return (EALREADY);
 1302                 }
 1303                 if (slpflag == PCATCH) {
 1304                         slpflag = 0;
 1305                         slptimeo = SEC_TO_NSEC(2);
 1306                 }
 1307         }
 1308         *flagp |= NFSMNT_RCVLOCK;
 1309         return (0);
 1310 }
 1311 
 1312 /*
 1313  * Unlock the stream socket for others.
 1314  */
 1315 void
 1316 nfs_rcvunlock(int *flagp)
 1317 {
 1318 
 1319         if ((*flagp & NFSMNT_RCVLOCK) == 0)
 1320                 panic("nfs rcvunlock");
 1321         *flagp &= ~NFSMNT_RCVLOCK;
 1322         if (*flagp & NFSMNT_WANTRCV) {
 1323                 *flagp &= ~NFSMNT_WANTRCV;
 1324                 wakeup(flagp);
 1325         }
 1326 }
 1327 
 1328 /*
 1329  * Auxiliary routine to align the length of mbuf copies made with m_copyback().
 1330  */
 1331 void
 1332 nfs_realign_fixup(struct mbuf *m, struct mbuf *n, unsigned int *off)
 1333 {
 1334         size_t padding;
 1335 
 1336         /*
 1337          * The maximum number of bytes that m_copyback() places in a mbuf is
 1338          * always an aligned quantity, so realign happens at the chain's tail.
 1339          */
 1340         while (n->m_next != NULL)
 1341                 n = n->m_next;
 1342 
 1343         /*
 1344          * Pad from the next elements in the source chain. Loop until the
 1345          * destination chain is aligned, or the end of the source is reached.
 1346          */
 1347         do {
 1348                 m = m->m_next;
 1349                 if (m == NULL)
 1350                         return;
 1351 
 1352                 padding = min(ALIGN(n->m_len) - n->m_len, m->m_len);
 1353                 if (padding > m_trailingspace(n))
 1354                         panic("nfs_realign_fixup: no memory to pad to");
 1355 
 1356                 bcopy(mtod(m, void *), mtod(n, char *) + n->m_len, padding);
 1357 
 1358                 n->m_len += padding;
 1359                 m_adj(m, padding);
 1360                 *off += padding;
 1361 
 1362         } while (!ALIGNED_POINTER(n->m_len, void *));
 1363 }
 1364 
 1365 /*
 1366  * The NFS RPC parsing code uses the data address and the length of mbuf
 1367  * structures to calculate on-memory addresses. This function makes sure these
 1368  * parameters are correctly aligned.
 1369  */
 1370 void
 1371 nfs_realign(struct mbuf **pm, int hsiz)
 1372 {
 1373         struct mbuf *m;
 1374         struct mbuf *n = NULL;
 1375         unsigned int off = 0;
 1376 
 1377         ++nfs_realign_test;
 1378         while ((m = *pm) != NULL) {
 1379                 if (!ALIGNED_POINTER(m->m_data, void *) ||
 1380                     !ALIGNED_POINTER(m->m_len,  void *)) {
 1381                         MGET(n, M_WAIT, MT_DATA);
 1382 #define ALIGN_POINTER(n) ((u_int)(((n) + sizeof(void *)) & ~sizeof(void *)))
 1383                         if (ALIGN_POINTER(m->m_len) >= MINCLSIZE) {
 1384                                 MCLGET(n, M_WAIT);
 1385                         }
 1386                         n->m_len = 0;
 1387                         break;
 1388                 }
 1389                 pm = &m->m_next;
 1390         }
 1391         /*
 1392          * If n is non-NULL, loop on m copying data, then replace the
 1393          * portion of the chain that had to be realigned.
 1394          */
 1395         if (n != NULL) {
 1396                 ++nfs_realign_count;
 1397                 while (m) {
 1398                         m_copyback(n, off, m->m_len, mtod(m, caddr_t), M_WAIT);
 1399 
 1400                         /*
 1401                          * If an unaligned amount of memory was copied, fix up
 1402                          * the last mbuf created by m_copyback().
 1403                          */
 1404                         if (!ALIGNED_POINTER(m->m_len, void *))
 1405                                 nfs_realign_fixup(m, n, &off);
 1406 
 1407                         off += m->m_len;
 1408                         m = m->m_next;
 1409                 }
 1410                 m_freemp(pm);
 1411                 *pm = n;
 1412         }
 1413 }
 1414 
 1415 
 1416 /*
 1417  * Parse an RPC request
 1418  * - verify it
 1419  * - fill in the cred struct.
 1420  */
 1421 int
 1422 nfs_getreq(struct nfsrv_descript *nd, struct nfsd *nfsd, int has_header)
 1423 {
 1424         int len, i;
 1425         u_int32_t *tl;
 1426         int32_t t1;
 1427         caddr_t cp2;
 1428         u_int32_t nfsvers, auth_type;
 1429         int error = 0;
 1430         struct nfsm_info info;
 1431 
 1432         info.nmi_mrep = nd->nd_mrep;
 1433         info.nmi_md = nd->nd_md;
 1434         info.nmi_dpos = nd->nd_dpos;
 1435         if (has_header) {
 1436                 nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED);
 1437                 nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
 1438                 if (*tl++ != rpc_call) {
 1439                         m_freem(info.nmi_mrep);
 1440                         return (EBADRPC);
 1441                 }
 1442         } else
 1443                 nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 1444         nd->nd_repstat = 0;
 1445         nd->nd_flag = 0;
 1446         if (*tl++ != rpc_vers) {
 1447                 nd->nd_repstat = ERPCMISMATCH;
 1448                 nd->nd_procnum = NFSPROC_NOOP;
 1449                 return (0);
 1450         }
 1451         if (*tl != nfs_prog) {
 1452                 nd->nd_repstat = EPROGUNAVAIL;
 1453                 nd->nd_procnum = NFSPROC_NOOP;
 1454                 return (0);
 1455         }
 1456         tl++;
 1457         nfsvers = fxdr_unsigned(u_int32_t, *tl++);
 1458         if (nfsvers != NFS_VER2 && nfsvers != NFS_VER3) {
 1459                 nd->nd_repstat = EPROGMISMATCH;
 1460                 nd->nd_procnum = NFSPROC_NOOP;
 1461                 return (0);
 1462         }
 1463         if (nfsvers == NFS_VER3)
 1464                 nd->nd_flag = ND_NFSV3;
 1465         nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
 1466         if (nd->nd_procnum == NFSPROC_NULL)
 1467                 return (0);
 1468         if (nd->nd_procnum >= NFS_NPROCS ||
 1469                 (nd->nd_procnum > NFSPROC_COMMIT) ||
 1470                 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
 1471                 nd->nd_repstat = EPROCUNAVAIL;
 1472                 nd->nd_procnum = NFSPROC_NOOP;
 1473                 return (0);
 1474         }
 1475         if ((nd->nd_flag & ND_NFSV3) == 0)
 1476                 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
 1477         auth_type = *tl++;
 1478         len = fxdr_unsigned(int, *tl++);
 1479         if (len < 0 || len > RPCAUTH_MAXSIZ) {
 1480                 m_freem(info.nmi_mrep);
 1481                 return (EBADRPC);
 1482         }
 1483 
 1484         /* Handle auth_unix */
 1485         if (auth_type == rpc_auth_unix) {
 1486                 len = fxdr_unsigned(int, *++tl);
 1487                 if (len < 0 || len > NFS_MAXNAMLEN) {
 1488                         m_freem(info.nmi_mrep);
 1489                         return (EBADRPC);
 1490                 }
 1491                 nfsm_adv(nfsm_rndup(len));
 1492                 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 1493                 memset(&nd->nd_cr, 0, sizeof (struct ucred));
 1494                 refcnt_init(&nd->nd_cr.cr_refcnt);
 1495                 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
 1496                 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
 1497                 len = fxdr_unsigned(int, *tl);
 1498                 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
 1499                         m_freem(info.nmi_mrep);
 1500                         return (EBADRPC);
 1501                 }
 1502                 nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED);
 1503                 for (i = 0; i < len; i++) {
 1504                         if (i < NGROUPS_MAX)
 1505                                 nd->nd_cr.cr_groups[i] =
 1506                                     fxdr_unsigned(gid_t, *tl++);
 1507                         else
 1508                                 tl++;
 1509                 }
 1510                 nd->nd_cr.cr_ngroups = (len > NGROUPS_MAX) ? NGROUPS_MAX : len;
 1511                 len = fxdr_unsigned(int, *++tl);
 1512                 if (len < 0 || len > RPCAUTH_MAXSIZ) {
 1513                         m_freem(info.nmi_mrep);
 1514                         return (EBADRPC);
 1515                 }
 1516                 if (len > 0)
 1517                         nfsm_adv(nfsm_rndup(len));
 1518         } else {
 1519                 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
 1520                 nd->nd_procnum = NFSPROC_NOOP;
 1521                 return (0);
 1522         }
 1523 
 1524         nd->nd_md = info.nmi_md;
 1525         nd->nd_dpos = info.nmi_dpos;
 1526         return (0);
 1527 nfsmout:
 1528         return (error);
 1529 }
 1530 
 1531 void
 1532 nfs_msg(struct nfsreq *rep, char *msg)
 1533 {
 1534         tpr_t tpr;
 1535 
 1536         if (rep->r_procp)
 1537                 tpr = tprintf_open(rep->r_procp);
 1538         else
 1539                 tpr = NULL;
 1540 
 1541         tprintf(tpr, "nfs server %s: %s\n",
 1542             rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname, msg);
 1543         tprintf_close(tpr);
 1544 }
 1545 
 1546 #ifdef NFSSERVER
 1547 /*
 1548  * Socket upcall routine for the nfsd sockets.
 1549  * The caddr_t arg is a pointer to the "struct nfssvc_sock".
 1550  * Essentially do as much as possible non-blocking, else punt and it will
 1551  * be called with M_WAIT from an nfsd.
 1552  */
 1553 void
 1554 nfsrv_rcv(struct socket *so, caddr_t arg, int waitflag)
 1555 {
 1556         struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
 1557         struct mbuf *m;
 1558         struct mbuf *mp, *nam;
 1559         struct uio auio;
 1560         int flags, error;
 1561 
 1562         KERNEL_LOCK();
 1563 
 1564         if ((slp->ns_flag & SLP_VALID) == 0)
 1565                 goto out;
 1566 
 1567         /* Defer soreceive() to an nfsd. */
 1568         if (waitflag == M_DONTWAIT) {
 1569                 slp->ns_flag |= SLP_NEEDQ;
 1570                 goto dorecs;
 1571         }
 1572 
 1573         auio.uio_procp = NULL;
 1574         if (so->so_type == SOCK_STREAM) {
 1575                 /*
 1576                  * Do soreceive().
 1577                  */
 1578                 auio.uio_resid = 1000000000;
 1579                 flags = MSG_DONTWAIT;
 1580                 error = soreceive(so, &nam, &auio, &mp, NULL,
 1581                     &flags, 0);
 1582                 if (error || mp == NULL) {
 1583                         if (error == EWOULDBLOCK)
 1584                                 slp->ns_flag |= SLP_NEEDQ;
 1585                         else
 1586                                 slp->ns_flag |= SLP_DISCONN;
 1587                         goto dorecs;
 1588                 }
 1589                 m = mp;
 1590                 if (slp->ns_rawend) {
 1591                         slp->ns_rawend->m_next = m;
 1592                         slp->ns_cc += 1000000000 - auio.uio_resid;
 1593                 } else {
 1594                         slp->ns_raw = m;
 1595                         slp->ns_cc = 1000000000 - auio.uio_resid;
 1596                 }
 1597                 while (m->m_next)
 1598                         m = m->m_next;
 1599                 slp->ns_rawend = m;
 1600 
 1601                 /*
 1602                  * Now try and parse record(s) out of the raw stream data.
 1603                  */
 1604                 error = nfsrv_getstream(slp, waitflag);
 1605                 if (error) {
 1606                         if (error == EPERM)
 1607                                 slp->ns_flag |= SLP_DISCONN;
 1608                         else
 1609                                 slp->ns_flag |= SLP_NEEDQ;
 1610                 }
 1611         } else {
 1612                 do {
 1613                         auio.uio_resid = 1000000000;
 1614                         flags = MSG_DONTWAIT;
 1615                         error = soreceive(so, &nam, &auio, &mp,
 1616                             NULL, &flags, 0);
 1617                         if (mp) {
 1618                                 if (nam) {
 1619                                         m = nam;
 1620                                         m->m_next = mp;
 1621                                 } else
 1622                                         m = mp;
 1623                                 if (slp->ns_recend)
 1624                                         slp->ns_recend->m_nextpkt = m;
 1625                                 else
 1626                                         slp->ns_rec = m;
 1627                                 slp->ns_recend = m;
 1628                                 m->m_nextpkt = NULL;
 1629                         }
 1630                         if (error) {
 1631                                 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
 1632                                         && error != EWOULDBLOCK) {
 1633                                         slp->ns_flag |= SLP_DISCONN;
 1634                                         goto dorecs;
 1635                                 }
 1636                         }
 1637                 } while (mp);
 1638         }
 1639 
 1640         /*
 1641          * Now try and process the request records, non-blocking.
 1642          */
 1643 dorecs:
 1644         if (waitflag == M_DONTWAIT &&
 1645                 (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
 1646                 nfsrv_wakenfsd(slp);
 1647 
 1648 out:
 1649         KERNEL_UNLOCK();
 1650 }
 1651 
 1652 /*
 1653  * Try and extract an RPC request from the mbuf data list received on a
 1654  * stream socket. The "waitflag" argument indicates whether or not it
 1655  * can sleep.
 1656  */
 1657 int
 1658 nfsrv_getstream(struct nfssvc_sock *slp, int waitflag)
 1659 {
 1660         struct mbuf *m, **mpp;
 1661         char *cp1, *cp2;
 1662         int len;
 1663         struct mbuf *om, *m2, *recm;
 1664         u_int32_t recmark;
 1665 
 1666         if (slp->ns_flag & SLP_GETSTREAM)
 1667                 return (0);
 1668         slp->ns_flag |= SLP_GETSTREAM;
 1669         for (;;) {
 1670                 if (slp->ns_reclen == 0) {
 1671                         if (slp->ns_cc < NFSX_UNSIGNED) {
 1672                                 slp->ns_flag &= ~SLP_GETSTREAM;
 1673                                 return (0);
 1674                         }
 1675                         m = slp->ns_raw;
 1676                         if (m->m_len >= NFSX_UNSIGNED) {
 1677                                 bcopy(mtod(m, caddr_t), &recmark,
 1678                                     NFSX_UNSIGNED);
 1679                                 m->m_data += NFSX_UNSIGNED;
 1680                                 m->m_len -= NFSX_UNSIGNED;
 1681                         } else {
 1682                                 cp1 = (caddr_t)&recmark;
 1683                                 cp2 = mtod(m, caddr_t);
 1684                                 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
 1685                                         while (m->m_len == 0) {
 1686                                                 m = m->m_next;
 1687                                                 cp2 = mtod(m, caddr_t);
 1688                                         }
 1689                                         *cp1++ = *cp2++;
 1690                                         m->m_data++;
 1691                                         m->m_len--;
 1692                                 }
 1693                         }
 1694                         slp->ns_cc -= NFSX_UNSIGNED;
 1695                         recmark = ntohl(recmark);
 1696                         slp->ns_reclen = recmark & ~0x80000000;
 1697                         if (recmark & 0x80000000)
 1698                                 slp->ns_flag |= SLP_LASTFRAG;
 1699                         else
 1700                                 slp->ns_flag &= ~SLP_LASTFRAG;
 1701                         if (slp->ns_reclen > NFS_MAXPACKET) {
 1702                                 slp->ns_flag &= ~SLP_GETSTREAM;
 1703                                 return (EPERM);
 1704                         }
 1705                 }
 1706 
 1707                 /*
 1708                  * Now get the record part.
 1709                  */
 1710                 recm = NULL;
 1711                 if (slp->ns_cc == slp->ns_reclen) {
 1712                         recm = slp->ns_raw;
 1713                         slp->ns_raw = slp->ns_rawend = NULL;
 1714                         slp->ns_cc = slp->ns_reclen = 0;
 1715                 } else if (slp->ns_cc > slp->ns_reclen) {
 1716                         len = 0;
 1717                         m = slp->ns_raw;
 1718                         om = NULL;
 1719                         while (len < slp->ns_reclen) {
 1720                                 if ((len + m->m_len) > slp->ns_reclen) {
 1721                                         m2 = m_copym(m, 0, slp->ns_reclen - len,
 1722                                             waitflag);
 1723                                         if (m2) {
 1724                                                 if (om) {
 1725                                                         om->m_next = m2;
 1726                                                         recm = slp->ns_raw;
 1727                                                 } else
 1728                                                         recm = m2;
 1729                                                 m->m_data += slp->ns_reclen-len;
 1730                                                 m->m_len -= slp->ns_reclen-len;
 1731                                                 len = slp->ns_reclen;
 1732                                         } else {
 1733                                                 slp->ns_flag &= ~SLP_GETSTREAM;
 1734                                                 return (EWOULDBLOCK);
 1735                                         }
 1736                                 } else if ((len + m->m_len) == slp->ns_reclen) {
 1737                                         om = m;
 1738                                         len += m->m_len;
 1739                                         m = m->m_next;
 1740                                         recm = slp->ns_raw;
 1741                                         om->m_next = NULL;
 1742                                 } else {
 1743                                         om = m;
 1744                                         len += m->m_len;
 1745                                         m = m->m_next;
 1746                                 }
 1747                         }
 1748                         slp->ns_raw = m;
 1749                         slp->ns_cc -= len;
 1750                         slp->ns_reclen = 0;
 1751                 } else {
 1752                         slp->ns_flag &= ~SLP_GETSTREAM;
 1753                         return (0);
 1754                 }
 1755 
 1756                 /*
 1757                  * Accumulate the fragments into a record.
 1758                  */
 1759                 mpp = &slp->ns_frag;
 1760                 while (*mpp)
 1761                         mpp = &((*mpp)->m_next);
 1762                 *mpp = recm;
 1763                 if (slp->ns_flag & SLP_LASTFRAG) {
 1764                         if (slp->ns_recend)
 1765                             slp->ns_recend->m_nextpkt = slp->ns_frag;
 1766                         else
 1767                             slp->ns_rec = slp->ns_frag;
 1768                         slp->ns_recend = slp->ns_frag;
 1769                         slp->ns_frag = NULL;
 1770                 }
 1771         }
 1772 }
 1773 
 1774 /*
 1775  * Parse an RPC header.
 1776  */
 1777 int
 1778 nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd,
 1779     struct nfsrv_descript **ndp)
 1780 {
 1781         struct mbuf *m, *nam;
 1782         struct nfsrv_descript *nd;
 1783         int error;
 1784 
 1785         *ndp = NULL;
 1786         if ((slp->ns_flag & SLP_VALID) == 0 ||
 1787             (m = slp->ns_rec) == NULL)
 1788                 return (ENOBUFS);
 1789         slp->ns_rec = m->m_nextpkt;
 1790         if (slp->ns_rec)
 1791                 m->m_nextpkt = NULL;
 1792         else
 1793                 slp->ns_recend = NULL;
 1794         if (m->m_type == MT_SONAME) {
 1795                 nam = m;
 1796                 m = m->m_next;
 1797                 nam->m_next = NULL;
 1798         } else
 1799                 nam = NULL;
 1800         nd = pool_get(&nfsrv_descript_pl, PR_WAITOK);
 1801         nfs_realign(&m, 10 * NFSX_UNSIGNED);
 1802         nd->nd_md = nd->nd_mrep = m;
 1803         nd->nd_nam2 = nam;
 1804         nd->nd_dpos = mtod(m, caddr_t);
 1805         error = nfs_getreq(nd, nfsd, 1);
 1806         if (error) {
 1807                 m_freem(nam);
 1808                 pool_put(&nfsrv_descript_pl, nd);
 1809                 return (error);
 1810         }
 1811         *ndp = nd;
 1812         nfsd->nfsd_nd = nd;
 1813         return (0);
 1814 }
 1815 
 1816 
 1817 /*
 1818  * Search for a sleeping nfsd and wake it up.
 1819  * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
 1820  * running nfsds will go look for the work in the nfssvc_sock list.
 1821  */
 1822 void
 1823 nfsrv_wakenfsd(struct nfssvc_sock *slp)
 1824 {
 1825         struct nfsd     *nfsd;
 1826 
 1827         if ((slp->ns_flag & SLP_VALID) == 0)
 1828                 return;
 1829 
 1830         TAILQ_FOREACH(nfsd, &nfsd_head, nfsd_chain) {
 1831                 if (nfsd->nfsd_flag & NFSD_WAITING) {
 1832                         nfsd->nfsd_flag &= ~NFSD_WAITING;
 1833                         if (nfsd->nfsd_slp)
 1834                                 panic("nfsd wakeup");
 1835                         slp->ns_sref++;
 1836                         nfsd->nfsd_slp = slp;
 1837                         wakeup_one(nfsd);
 1838                         return;
 1839                 }
 1840         }
 1841 
 1842         slp->ns_flag |= SLP_DOREC;
 1843         nfsd_head_flag |= NFSD_CHECKSLP;
 1844 }
 1845 #endif /* NFSSERVER */

Cache object: 66b05cbeda6bfd54da9d3c4e9f22c5d6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.