The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/nfs/nfs_socket.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1989, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * Rick Macklem at The University of Guelph.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)nfs_socket.c        8.3 (Berkeley) 1/12/94
   37  * $FreeBSD: src/sys/nfs/nfs_socket.c,v 1.18.2.3 1999/09/05 08:19:42 peter Exp $
   38  */
   39 
   40 /*
   41  * Socket operations for use by nfs
   42  */
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/proc.h>
   47 #include <sys/mount.h>
   48 #include <sys/kernel.h>
   49 #include <sys/mbuf.h>
   50 #include <sys/vnode.h>
   51 #include <sys/domain.h>
   52 #include <sys/protosw.h>
   53 #include <sys/socket.h>
   54 #include <sys/socketvar.h>
   55 #include <sys/syslog.h>
   56 #include <sys/tprintf.h>
   57 
   58 #include <netinet/in.h>
   59 #include <netinet/tcp.h>
   60 
   61 #include <nfs/rpcv2.h>
   62 #include <nfs/nfsproto.h>
   63 #include <nfs/nfs.h>
   64 #include <nfs/xdr_subs.h>
   65 #include <nfs/nfsm_subs.h>
   66 #include <nfs/nfsmount.h>
   67 #include <nfs/nfsnode.h>
   68 #include <nfs/nfsrtt.h>
   69 #include <nfs/nqnfs.h>
   70 
   71 #define TRUE    1
   72 #define FALSE   0
   73 
   74 /*
   75  * Estimate rto for an nfs rpc sent via. an unreliable datagram.
   76  * Use the mean and mean deviation of rtt for the appropriate type of rpc
   77  * for the frequent rpcs and a default for the others.
   78  * The justification for doing "other" this way is that these rpcs
   79  * happen so infrequently that timer est. would probably be stale.
   80  * Also, since many of these rpcs are
   81  * non-idempotent, a conservative timeout is desired.
   82  * getattr, lookup - A+2D
   83  * read, write     - A+4D
   84  * other           - nm_timeo
   85  */
   86 #define NFS_RTO(n, t) \
   87         ((t) == 0 ? (n)->nm_timeo : \
   88          ((t) < 3 ? \
   89           (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
   90           ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
   91 #define NFS_SRTT(r)     (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
   92 #define NFS_SDRTT(r)    (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
   93 /*
   94  * External data, mostly RPC constants in XDR form
   95  */
   96 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
   97         rpc_msgaccepted, rpc_call, rpc_autherr,
   98         rpc_auth_kerb;
   99 extern u_long nfs_prog, nqnfs_prog;
  100 extern time_t nqnfsstarttime;
  101 extern struct nfsstats nfsstats;
  102 extern int nfsv3_procid[NFS_NPROCS];
  103 extern int nfs_ticks;
  104 
  105 /*
  106  * Defines which timer to use for the procnum.
  107  * 0 - default
  108  * 1 - getattr
  109  * 2 - lookup
  110  * 3 - read
  111  * 4 - write
  112  */
  113 static int proct[NFS_NPROCS] = {
  114         0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
  115         0, 0, 0,
  116 };
  117 
  118 /*
  119  * There is a congestion window for outstanding rpcs maintained per mount
  120  * point. The cwnd size is adjusted in roughly the way that:
  121  * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
  122  * SIGCOMM '88". ACM, August 1988.
  123  * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
  124  * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
  125  * of rpcs is in progress.
  126  * (The sent count and cwnd are scaled for integer arith.)
  127  * Variants of "slow start" were tried and were found to be too much of a
  128  * performance hit (ave. rtt 3 times larger),
  129  * I suspect due to the large rtt that nfs rpcs have.
  130  */
  131 #define NFS_CWNDSCALE   256
  132 #define NFS_MAXCWND     (NFS_CWNDSCALE * 32)
  133 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
  134 int nfsrtton = 0;
  135 struct nfsrtt nfsrtt;
  136 
  137 static int      nfs_msg __P((struct proc *,char *,char *));
  138 static int      nfs_rcvlock __P((struct nfsreq *));
  139 static void     nfs_rcvunlock __P((int *flagp));
  140 static void     nfs_realign __P((struct mbuf *m, int hsiz));
  141 static int      nfs_receive __P((struct nfsreq *rep, struct mbuf **aname,
  142                                  struct mbuf **mp));
  143 static int      nfs_reconnect __P((struct nfsreq *rep));
  144 #ifndef NFS_NOSERVER 
  145 static int      nfsrv_getstream __P((struct nfssvc_sock *,int));
  146 
  147 int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
  148                                     struct nfssvc_sock *slp,
  149                                     struct proc *procp,
  150                                     struct mbuf **mreqp)) = {
  151         nfsrv_null,
  152         nfsrv_getattr,
  153         nfsrv_setattr,
  154         nfsrv_lookup,
  155         nfsrv3_access,
  156         nfsrv_readlink,
  157         nfsrv_read,
  158         nfsrv_write,
  159         nfsrv_create,
  160         nfsrv_mkdir,
  161         nfsrv_symlink,
  162         nfsrv_mknod,
  163         nfsrv_remove,
  164         nfsrv_rmdir,
  165         nfsrv_rename,
  166         nfsrv_link,
  167         nfsrv_readdir,
  168         nfsrv_readdirplus,
  169         nfsrv_statfs,
  170         nfsrv_fsinfo,
  171         nfsrv_pathconf,
  172         nfsrv_commit,
  173         nqnfsrv_getlease,
  174         nqnfsrv_vacated,
  175         nfsrv_noop,
  176         nfsrv_noop
  177 };
  178 #endif /* NFS_NOSERVER */
  179 
  180 /*
  181  * Initialize sockets and congestion for a new NFS connection.
  182  * We do not free the sockaddr if error.
  183  */
  184 int
  185 nfs_connect(nmp, rep)
  186         register struct nfsmount *nmp;
  187         struct nfsreq *rep;
  188 {
  189         register struct socket *so;
  190         int s, error, rcvreserve, sndreserve;
  191         struct sockaddr *saddr;
  192         struct sockaddr_in *sin;
  193         struct mbuf *m;
  194         u_short tport;
  195         struct proc *p = &proc0; /* only used for socreate */
  196 
  197         nmp->nm_so = (struct socket *)0;
  198         saddr = mtod(nmp->nm_nam, struct sockaddr *);
  199         error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
  200                 nmp->nm_soproto, p);
  201         if (error)
  202                 goto bad;
  203         so = nmp->nm_so;
  204         so->so_state &= ~SS_PRIV; /* don't need it */
  205         nmp->nm_soflags = so->so_proto->pr_flags;
  206 
  207         /*
  208          * Some servers require that the client port be a reserved port number.
  209          */
  210         if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
  211                 MGET(m, M_WAIT, MT_SONAME);
  212                 sin = mtod(m, struct sockaddr_in *);
  213                 sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
  214                 sin->sin_family = AF_INET;
  215                 sin->sin_addr.s_addr = INADDR_ANY;
  216                 tport = IPPORT_RESERVED - 1;
  217                 sin->sin_port = htons(tport);
  218                 while ((error = sobind(so, m)) == EADDRINUSE &&
  219                        --tport > IPPORT_RESERVED / 2)
  220                         sin->sin_port = htons(tport);
  221                 m_freem(m);
  222                 if (error)
  223                         goto bad;
  224         }
  225 
  226         /*
  227          * Protocols that do not require connections may be optionally left
  228          * unconnected for servers that reply from a port other than NFS_PORT.
  229          */
  230         if (nmp->nm_flag & NFSMNT_NOCONN) {
  231                 if (nmp->nm_soflags & PR_CONNREQUIRED) {
  232                         error = ENOTCONN;
  233                         goto bad;
  234                 }
  235         } else {
  236                 error = soconnect(so, nmp->nm_nam);
  237                 if (error)
  238                         goto bad;
  239 
  240                 /*
  241                  * Wait for the connection to complete. Cribbed from the
  242                  * connect system call but with the wait timing out so
  243                  * that interruptible mounts don't hang here for a long time.
  244                  */
  245                 s = splnet();
  246                 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  247                         (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
  248                                 "nfscon", 2 * hz);
  249                         if ((so->so_state & SS_ISCONNECTING) &&
  250                             so->so_error == 0 && rep &&
  251                             (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
  252                                 so->so_state &= ~SS_ISCONNECTING;
  253                                 splx(s);
  254                                 goto bad;
  255                         }
  256                 }
  257                 if (so->so_error) {
  258                         error = so->so_error;
  259                         so->so_error = 0;
  260                         splx(s);
  261                         goto bad;
  262                 }
  263                 splx(s);
  264         }
  265         if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
  266                 so->so_rcv.sb_timeo = (5 * hz);
  267                 so->so_snd.sb_timeo = (5 * hz);
  268         } else {
  269                 so->so_rcv.sb_timeo = 0;
  270                 so->so_snd.sb_timeo = 0;
  271         }
  272         if (nmp->nm_sotype == SOCK_DGRAM) {
  273                 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
  274                 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
  275         } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
  276                 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
  277                 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
  278         } else {
  279                 if (nmp->nm_sotype != SOCK_STREAM)
  280                         panic("nfscon sotype");
  281                 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
  282                         MGET(m, M_WAIT, MT_SOOPTS);
  283                         *mtod(m, int *) = 1;
  284                         m->m_len = sizeof(int);
  285                         sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
  286                 }
  287                 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
  288                         MGET(m, M_WAIT, MT_SOOPTS);
  289                         *mtod(m, int *) = 1;
  290                         m->m_len = sizeof(int);
  291                         sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
  292                 }
  293                 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
  294                                 * 2;
  295                 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
  296                                 * 2;
  297         }
  298         error = soreserve(so, sndreserve, rcvreserve);
  299         if (error)
  300                 goto bad;
  301         so->so_rcv.sb_flags |= SB_NOINTR;
  302         so->so_snd.sb_flags |= SB_NOINTR;
  303 
  304         /* Initialize other non-zero congestion variables */
  305         nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
  306                 nmp->nm_srtt[4] = (NFS_TIMEO << 3);
  307         nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
  308                 nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
  309         nmp->nm_cwnd = NFS_MAXCWND / 2;     /* Initial send window */
  310         nmp->nm_sent = 0;
  311         nmp->nm_timeouts = 0;
  312         return (0);
  313 
  314 bad:
  315         nfs_disconnect(nmp);
  316         return (error);
  317 }
  318 
  319 /*
  320  * Reconnect routine:
  321  * Called when a connection is broken on a reliable protocol.
  322  * - clean up the old socket
  323  * - nfs_connect() again
  324  * - set R_MUSTRESEND for all outstanding requests on mount point
  325  * If this fails the mount point is DEAD!
  326  * nb: Must be called with the nfs_sndlock() set on the mount point.
  327  */
  328 static int
  329 nfs_reconnect(rep)
  330         register struct nfsreq *rep;
  331 {
  332         register struct nfsreq *rp;
  333         register struct nfsmount *nmp = rep->r_nmp;
  334         int error;
  335 
  336         nfs_disconnect(nmp);
  337         while ((error = nfs_connect(nmp, rep))) {
  338                 if (error == EINTR || error == ERESTART)
  339                         return (EINTR);
  340                 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
  341         }
  342 
  343         /*
  344          * Loop through outstanding request list and fix up all requests
  345          * on old socket.
  346          */
  347         for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) {
  348                 if (rp->r_nmp == nmp)
  349                         rp->r_flags |= R_MUSTRESEND;
  350         }
  351         return (0);
  352 }
  353 
  354 /*
  355  * NFS disconnect. Clean up and unlink.
  356  */
  357 void
  358 nfs_disconnect(nmp)
  359         register struct nfsmount *nmp;
  360 {
  361         register struct socket *so;
  362 
  363         if (nmp->nm_so) {
  364                 so = nmp->nm_so;
  365                 nmp->nm_so = (struct socket *)0;
  366                 soshutdown(so, 2);
  367                 soclose(so);
  368         }
  369 }
  370 
  371 /*
  372  * This is the nfs send routine. For connection based socket types, it
  373  * must be called with an nfs_sndlock() on the socket.
  374  * "rep == NULL" indicates that it has been called from a server.
  375  * For the client side:
  376  * - return EINTR if the RPC is terminated, 0 otherwise
  377  * - set R_MUSTRESEND if the send fails for any reason
  378  * - do any cleanup required by recoverable socket errors (???)
  379  * For the server side:
  380  * - return EINTR or ERESTART if interrupted by a signal
  381  * - return EPIPE if a connection is lost for connection based sockets (TCP...)
  382  * - do any cleanup required by recoverable socket errors (???)
  383  */
  384 int
  385 nfs_send(so, nam, top, rep)
  386         register struct socket *so;
  387         struct mbuf *nam;
  388         register struct mbuf *top;
  389         struct nfsreq *rep;
  390 {
  391         struct mbuf *sendnam;
  392         int error, soflags, flags;
  393 
  394         if (rep) {
  395                 if (rep->r_flags & R_SOFTTERM) {
  396                         m_freem(top);
  397                         return (EINTR);
  398                 }
  399                 if ((so = rep->r_nmp->nm_so) == NULL) {
  400                         rep->r_flags |= R_MUSTRESEND;
  401                         m_freem(top);
  402                         return (0);
  403                 }
  404                 rep->r_flags &= ~R_MUSTRESEND;
  405                 soflags = rep->r_nmp->nm_soflags;
  406         } else
  407                 soflags = so->so_proto->pr_flags;
  408         if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
  409                 sendnam = (struct mbuf *)0;
  410         else
  411                 sendnam = nam;
  412         if (so->so_type == SOCK_SEQPACKET)
  413                 flags = MSG_EOR;
  414         else
  415                 flags = 0;
  416 
  417         error = sosend(so, sendnam, (struct uio *)0, top,
  418                 (struct mbuf *)0, flags);
  419         if (error) {
  420                 if (rep) {
  421                         log(LOG_INFO, "nfs send error %d for server %s\n",error,
  422                             rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  423                         /*
  424                          * Deal with errors for the client side.
  425                          */
  426                         if (rep->r_flags & R_SOFTTERM)
  427                                 error = EINTR;
  428                         else
  429                                 rep->r_flags |= R_MUSTRESEND;
  430                 } else
  431                         log(LOG_INFO, "nfsd send error %d\n", error);
  432 
  433                 /*
  434                  * Handle any recoverable (soft) socket errors here. (???)
  435                  */
  436                 if (error != EINTR && error != ERESTART &&
  437                         error != EWOULDBLOCK && error != EPIPE)
  438                         error = 0;
  439         }
  440         return (error);
  441 }
  442 
  443 /*
  444  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
  445  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
  446  * Mark and consolidate the data into a new mbuf list.
  447  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
  448  *     small mbufs.
  449  * For SOCK_STREAM we must be very careful to read an entire record once
  450  * we have read any of it, even if the system call has been interrupted.
  451  */
  452 static int
  453 nfs_receive(rep, aname, mp)
  454         register struct nfsreq *rep;
  455         struct mbuf **aname;
  456         struct mbuf **mp;
  457 {
  458         register struct socket *so;
  459         struct uio auio;
  460         struct iovec aio;
  461         register struct mbuf *m;
  462         struct mbuf *control;
  463         u_long len;
  464         struct mbuf **getnam;
  465         int error, sotype, rcvflg;
  466         struct proc *p = curproc;       /* XXX */
  467 
  468         /*
  469          * Set up arguments for soreceive()
  470          */
  471         *mp = (struct mbuf *)0;
  472         *aname = (struct mbuf *)0;
  473         sotype = rep->r_nmp->nm_sotype;
  474 
  475         /*
  476          * For reliable protocols, lock against other senders/receivers
  477          * in case a reconnect is necessary.
  478          * For SOCK_STREAM, first get the Record Mark to find out how much
  479          * more there is to get.
  480          * We must lock the socket against other receivers
  481          * until we have an entire rpc request/reply.
  482          */
  483         if (sotype != SOCK_DGRAM) {
  484                 error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
  485                 if (error)
  486                         return (error);
  487 tryagain:
  488                 /*
  489                  * Check for fatal errors and resending request.
  490                  */
  491                 /*
  492                  * Ugh: If a reconnect attempt just happened, nm_so
  493                  * would have changed. NULL indicates a failed
  494                  * attempt that has essentially shut down this
  495                  * mount point.
  496                  */
  497                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
  498                         nfs_sndunlock(&rep->r_nmp->nm_flag);
  499                         return (EINTR);
  500                 }
  501                 so = rep->r_nmp->nm_so;
  502                 if (!so) {
  503                         error = nfs_reconnect(rep);
  504                         if (error) {
  505                                 nfs_sndunlock(&rep->r_nmp->nm_flag);
  506                                 return (error);
  507                         }
  508                         goto tryagain;
  509                 }
  510                 while (rep->r_flags & R_MUSTRESEND) {
  511                         m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
  512                         nfsstats.rpcretries++;
  513                         error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
  514                         if (error) {
  515                                 if (error == EINTR || error == ERESTART ||
  516                                     (error = nfs_reconnect(rep))) {
  517                                         nfs_sndunlock(&rep->r_nmp->nm_flag);
  518                                         return (error);
  519                                 }
  520                                 goto tryagain;
  521                         }
  522                 }
  523                 nfs_sndunlock(&rep->r_nmp->nm_flag);
  524                 if (sotype == SOCK_STREAM) {
  525                         aio.iov_base = (caddr_t) &len;
  526                         aio.iov_len = sizeof(u_long);
  527                         auio.uio_iov = &aio;
  528                         auio.uio_iovcnt = 1;
  529                         auio.uio_segflg = UIO_SYSSPACE;
  530                         auio.uio_rw = UIO_READ;
  531                         auio.uio_offset = 0;
  532                         auio.uio_resid = sizeof(u_long);
  533                         auio.uio_procp = p;
  534                         do {
  535                            rcvflg = MSG_WAITALL;
  536                            error = soreceive(so, (struct mbuf **)0, &auio,
  537                                 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
  538                            if (error == EWOULDBLOCK && rep) {
  539                                 if (rep->r_flags & R_SOFTTERM)
  540                                         return (EINTR);
  541                            }
  542                         } while (error == EWOULDBLOCK);
  543                         if (!error && auio.uio_resid > 0) {
  544                             log(LOG_INFO,
  545                                  "short receive (%d/%d) from nfs server %s\n",
  546                                  sizeof(u_long) - auio.uio_resid,
  547                                  sizeof(u_long),
  548                                  rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  549                             error = EPIPE;
  550                         }
  551                         if (error)
  552                                 goto errout;
  553                         len = ntohl(len) & ~0x80000000;
  554                         /*
  555                          * This is SERIOUS! We are out of sync with the sender
  556                          * and forcing a disconnect/reconnect is all I can do.
  557                          */
  558                         if (len > NFS_MAXPACKET) {
  559                             log(LOG_ERR, "%s (%d) from nfs server %s\n",
  560                                 "impossible packet length",
  561                                 len,
  562                                 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  563                             error = EFBIG;
  564                             goto errout;
  565                         }
  566                         auio.uio_resid = len;
  567                         do {
  568                             rcvflg = MSG_WAITALL;
  569                             error =  soreceive(so, (struct mbuf **)0,
  570                                 &auio, mp, (struct mbuf **)0, &rcvflg);
  571                         } while (error == EWOULDBLOCK || error == EINTR ||
  572                                  error == ERESTART);
  573                         if (!error && auio.uio_resid > 0) {
  574                             log(LOG_INFO,
  575                                 "short receive (%d/%d) from nfs server %s\n",
  576                                 len - auio.uio_resid, len,
  577                                 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  578                             error = EPIPE;
  579                         }
  580                 } else {
  581                         /*
  582                          * NB: Since uio_resid is big, MSG_WAITALL is ignored
  583                          * and soreceive() will return when it has either a
  584                          * control msg or a data msg.
  585                          * We have no use for control msg., but must grab them
  586                          * and then throw them away so we know what is going
  587                          * on.
  588                          */
  589                         auio.uio_resid = len = 100000000; /* Anything Big */
  590                         auio.uio_procp = p;
  591                         do {
  592                             rcvflg = 0;
  593                             error =  soreceive(so, (struct mbuf **)0,
  594                                 &auio, mp, &control, &rcvflg);
  595                             if (control)
  596                                 m_freem(control);
  597                             if (error == EWOULDBLOCK && rep) {
  598                                 if (rep->r_flags & R_SOFTTERM)
  599                                         return (EINTR);
  600                             }
  601                         } while (error == EWOULDBLOCK ||
  602                                  (!error && *mp == NULL && control));
  603                         if ((rcvflg & MSG_EOR) == 0)
  604                                 printf("Egad!!\n");
  605                         if (!error && *mp == NULL)
  606                                 error = EPIPE;
  607                         len -= auio.uio_resid;
  608                 }
  609 errout:
  610                 if (error && error != EINTR && error != ERESTART) {
  611                         m_freem(*mp);
  612                         *mp = (struct mbuf *)0;
  613                         if (error != EPIPE)
  614                                 log(LOG_INFO,
  615                                     "receive error %d from nfs server %s\n",
  616                                     error,
  617                                  rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  618                         error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
  619                         if (!error)
  620                                 error = nfs_reconnect(rep);
  621                         if (!error)
  622                                 goto tryagain;
  623                 }
  624         } else {
  625                 if ((so = rep->r_nmp->nm_so) == NULL)
  626                         return (EACCES);
  627                 if (so->so_state & SS_ISCONNECTED)
  628                         getnam = (struct mbuf **)0;
  629                 else
  630                         getnam = aname;
  631                 auio.uio_resid = len = 1000000;
  632                 auio.uio_procp = p;
  633                 do {
  634                         rcvflg = 0;
  635                         error =  soreceive(so, getnam, &auio, mp,
  636                                 (struct mbuf **)0, &rcvflg);
  637                         if (error == EWOULDBLOCK &&
  638                             (rep->r_flags & R_SOFTTERM))
  639                                 return (EINTR);
  640                 } while (error == EWOULDBLOCK);
  641                 len -= auio.uio_resid;
  642         }
  643         if (error) {
  644                 m_freem(*mp);
  645                 *mp = (struct mbuf *)0;
  646         }
  647         /*
  648          * Search for any mbufs that are not a multiple of 4 bytes long
  649          * or with m_data not longword aligned.
  650          * These could cause pointer alignment problems, so copy them to
  651          * well aligned mbufs.
  652          */
  653         nfs_realign(*mp, 5 * NFSX_UNSIGNED);
  654         return (error);
  655 }
  656 
  657 /*
  658  * Implement receipt of reply on a socket.
  659  * We must search through the list of received datagrams matching them
  660  * with outstanding requests using the xid, until ours is found.
  661  */
  662 /* ARGSUSED */
  663 int
  664 nfs_reply(myrep)
  665         struct nfsreq *myrep;
  666 {
  667         register struct nfsreq *rep;
  668         register struct nfsmount *nmp = myrep->r_nmp;
  669         register long t1;
  670         struct mbuf *mrep, *nam, *md;
  671         u_long rxid, *tl;
  672         caddr_t dpos, cp2;
  673         int error;
  674 
  675         /*
  676          * Loop around until we get our own reply
  677          */
  678         for (;;) {
  679                 /*
  680                  * Lock against other receivers so that I don't get stuck in
  681                  * sbwait() after someone else has received my reply for me.
  682                  * Also necessary for connection based protocols to avoid
  683                  * race conditions during a reconnect.
  684                  * If nfs_rcvlock() returns EALREADY, that means that
  685                  * the reply has already been recieved by another
  686                  * process and we can return immediately.  In this
  687                  * case, the lock is not taken to avoid races with
  688                  * other processes.
  689                  */
  690                 error = nfs_rcvlock(myrep);
  691                 if (error == EALREADY)
  692                         return (0);
  693                 if (error)
  694                         return (error);
  695                 /*
  696                  * Get the next Rpc reply off the socket
  697                  */
  698                 error = nfs_receive(myrep, &nam, &mrep);
  699                 nfs_rcvunlock(&nmp->nm_flag);
  700                 if (error) {
  701 
  702                         /*
  703                          * Ignore routing errors on connectionless protocols??
  704                          */
  705                         if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
  706                                 nmp->nm_so->so_error = 0;
  707                                 if (myrep->r_flags & R_GETONEREP)
  708                                         return (0);
  709                                 continue;
  710                         }
  711                         return (error);
  712                 }
  713                 if (nam)
  714                         m_freem(nam);
  715 
  716                 /*
  717                  * Get the xid and check that it is an rpc reply
  718                  */
  719                 md = mrep;
  720                 dpos = mtod(md, caddr_t);
  721                 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
  722                 rxid = *tl++;
  723                 if (*tl != rpc_reply) {
  724 #ifndef NFS_NOSERVER
  725                         if (nmp->nm_flag & NFSMNT_NQNFS) {
  726                                 if (nqnfs_callback(nmp, mrep, md, dpos))
  727                                         nfsstats.rpcinvalid++;
  728                         } else {
  729                                 nfsstats.rpcinvalid++;
  730                                 m_freem(mrep);
  731                         }
  732 #else
  733                         nfsstats.rpcinvalid++;
  734                         m_freem(mrep);
  735 #endif
  736 nfsmout:
  737                         if (myrep->r_flags & R_GETONEREP)
  738                                 return (0);
  739                         continue;
  740                 }
  741 
  742                 /*
  743                  * Loop through the request list to match up the reply
  744                  * Iff no match, just drop the datagram
  745                  */
  746                 for (rep = nfs_reqq.tqh_first; rep != 0;
  747                     rep = rep->r_chain.tqe_next) {
  748                         if (rep->r_mrep == NULL && rxid == rep->r_xid) {
  749                                 /* Found it.. */
  750                                 rep->r_mrep = mrep;
  751                                 rep->r_md = md;
  752                                 rep->r_dpos = dpos;
  753                                 if (nfsrtton) {
  754                                         struct rttl *rt;
  755 
  756                                         rt = &nfsrtt.rttl[nfsrtt.pos];
  757                                         rt->proc = rep->r_procnum;
  758                                         rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
  759                                         rt->sent = nmp->nm_sent;
  760                                         rt->cwnd = nmp->nm_cwnd;
  761                                         rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
  762                                         rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
  763                                         rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
  764                                         rt->tstamp = time;
  765                                         if (rep->r_flags & R_TIMING)
  766                                                 rt->rtt = rep->r_rtt;
  767                                         else
  768                                                 rt->rtt = 1000000;
  769                                         nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
  770                                 }
  771                                 /*
  772                                  * Update congestion window.
  773                                  * Do the additive increase of
  774                                  * one rpc/rtt.
  775                                  */
  776                                 if (nmp->nm_cwnd <= nmp->nm_sent) {
  777                                         nmp->nm_cwnd +=
  778                                            (NFS_CWNDSCALE * NFS_CWNDSCALE +
  779                                            (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
  780                                         if (nmp->nm_cwnd > NFS_MAXCWND)
  781                                                 nmp->nm_cwnd = NFS_MAXCWND;
  782                                 }
  783                                 rep->r_flags &= ~R_SENT;
  784                                 nmp->nm_sent -= NFS_CWNDSCALE;
  785                                 /*
  786                                  * Update rtt using a gain of 0.125 on the mean
  787                                  * and a gain of 0.25 on the deviation.
  788                                  */
  789                                 if (rep->r_flags & R_TIMING) {
  790                                         /*
  791                                          * Since the timer resolution of
  792                                          * NFS_HZ is so course, it can often
  793                                          * result in r_rtt == 0. Since
  794                                          * r_rtt == N means that the actual
  795                                          * rtt is between N+dt and N+2-dt ticks,
  796                                          * add 1.
  797                                          */
  798                                         t1 = rep->r_rtt + 1;
  799                                         t1 -= (NFS_SRTT(rep) >> 3);
  800                                         NFS_SRTT(rep) += t1;
  801                                         if (t1 < 0)
  802                                                 t1 = -t1;
  803                                         t1 -= (NFS_SDRTT(rep) >> 2);
  804                                         NFS_SDRTT(rep) += t1;
  805                                 }
  806                                 nmp->nm_timeouts = 0;
  807                                 break;
  808                         }
  809                 }
  810                 /*
  811                  * If not matched to a request, drop it.
  812                  * If it's mine, get out.
  813                  */
  814                 if (rep == 0) {
  815                         nfsstats.rpcunexpected++;
  816                         m_freem(mrep);
  817                 } else if (rep == myrep) {
  818                         if (rep->r_mrep == NULL)
  819                                 panic("nfsreply nil");
  820                         return (0);
  821                 }
  822                 if (myrep->r_flags & R_GETONEREP)
  823                         return (0);
  824         }
  825 }
  826 
  827 /*
  828  * nfs_request - goes something like this
  829  *      - fill in request struct
  830  *      - links it into list
  831  *      - calls nfs_send() for first transmit
  832  *      - calls nfs_receive() to get reply
  833  *      - break down rpc header and return with nfs reply pointed to
  834  *        by mrep or error
  835  * nb: always frees up mreq mbuf list
  836  */
  837 int
  838 nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
  839         struct vnode *vp;
  840         struct mbuf *mrest;
  841         int procnum;
  842         struct proc *procp;
  843         struct ucred *cred;
  844         struct mbuf **mrp;
  845         struct mbuf **mdp;
  846         caddr_t *dposp;
  847 {
  848         register struct mbuf *m, *mrep, *m2;
  849         register struct nfsreq *rep;
  850         register u_long *tl;
  851         register int i;
  852         struct nfsmount *nmp;
  853         struct mbuf *md, *mheadend;
  854         struct nfsnode *np;
  855         char nickv[RPCX_NICKVERF];
  856         time_t reqtime, waituntil;
  857         caddr_t dpos, cp2;
  858         int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
  859         int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
  860         int verf_len, verf_type;
  861         u_long xid;
  862         u_quad_t frev;
  863         char *auth_str, *verf_str;
  864         NFSKERBKEY_T key;               /* save session key */
  865 
  866         nmp = VFSTONFS(vp->v_mount);
  867         MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
  868         rep->r_nmp = nmp;
  869         rep->r_vp = vp;
  870         rep->r_procp = procp;
  871         rep->r_procnum = procnum;
  872         i = 0;
  873         m = mrest;
  874         while (m) {
  875                 i += m->m_len;
  876                 m = m->m_next;
  877         }
  878         mrest_len = i;
  879 
  880         /*
  881          * Get the RPC header with authorization.
  882          */
  883 kerbauth:
  884         verf_str = auth_str = (char *)0;
  885         if (nmp->nm_flag & NFSMNT_KERB) {
  886                 verf_str = nickv;
  887                 verf_len = sizeof (nickv);
  888                 auth_type = RPCAUTH_KERB4;
  889                 bzero((caddr_t)key, sizeof (key));
  890                 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
  891                         &auth_len, verf_str, verf_len)) {
  892                         error = nfs_getauth(nmp, rep, cred, &auth_str,
  893                                 &auth_len, verf_str, &verf_len, key);
  894                         if (error) {
  895                                 free((caddr_t)rep, M_NFSREQ);
  896                                 m_freem(mrest);
  897                                 return (error);
  898                         }
  899                 }
  900         } else {
  901                 auth_type = RPCAUTH_UNIX;
  902                 if (cred->cr_ngroups < 1)
  903                         panic("nfsreq nogrps");
  904                 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
  905                         nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
  906                         5 * NFSX_UNSIGNED;
  907         }
  908         m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
  909              auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
  910         if (auth_str)
  911                 free(auth_str, M_TEMP);
  912 
  913         /*
  914          * For stream protocols, insert a Sun RPC Record Mark.
  915          */
  916         if (nmp->nm_sotype == SOCK_STREAM) {
  917                 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
  918                 *mtod(m, u_long *) = htonl(0x80000000 |
  919                          (m->m_pkthdr.len - NFSX_UNSIGNED));
  920         }
  921         rep->r_mreq = m;
  922         rep->r_xid = xid;
  923 tryagain:
  924         if (nmp->nm_flag & NFSMNT_SOFT)
  925                 rep->r_retry = nmp->nm_retry;
  926         else
  927                 rep->r_retry = NFS_MAXREXMIT + 1;       /* past clip limit */
  928         rep->r_rtt = rep->r_rexmit = 0;
  929         if (proct[procnum] > 0)
  930                 rep->r_flags = R_TIMING;
  931         else
  932                 rep->r_flags = 0;
  933         rep->r_mrep = NULL;
  934 
  935         /*
  936          * Do the client side RPC.
  937          */
  938         nfsstats.rpcrequests++;
  939         /*
  940          * Chain request into list of outstanding requests. Be sure
  941          * to put it LAST so timer finds oldest requests first.
  942          */
  943         s = splsoftclock();
  944         TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
  945 
  946         /* Get send time for nqnfs */
  947         reqtime = time.tv_sec;
  948 
  949         /*
  950          * If backing off another request or avoiding congestion, don't
  951          * send this one now but let timer do it. If not timing a request,
  952          * do it now.
  953          */
  954         if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
  955                 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
  956                 nmp->nm_sent < nmp->nm_cwnd)) {
  957                 splx(s);
  958                 if (nmp->nm_soflags & PR_CONNREQUIRED)
  959                         error = nfs_sndlock(&nmp->nm_flag, rep);
  960                 if (!error) {
  961                         m2 = m_copym(m, 0, M_COPYALL, M_WAIT);
  962                         error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
  963                         if (nmp->nm_soflags & PR_CONNREQUIRED)
  964                                 nfs_sndunlock(&nmp->nm_flag);
  965                 }
  966                 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
  967                         nmp->nm_sent += NFS_CWNDSCALE;
  968                         rep->r_flags |= R_SENT;
  969                 }
  970         } else {
  971                 splx(s);
  972                 rep->r_rtt = -1;
  973         }
  974 
  975         /*
  976          * Wait for the reply from our send or the timer's.
  977          */
  978         if (!error || error == EPIPE)
  979                 error = nfs_reply(rep);
  980 
  981         /*
  982          * RPC done, unlink the request.
  983          */
  984         s = splsoftclock();
  985         TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
  986         splx(s);
  987 
  988         /*
  989          * Decrement the outstanding request count.
  990          */
  991         if (rep->r_flags & R_SENT) {
  992                 rep->r_flags &= ~R_SENT;        /* paranoia */
  993                 nmp->nm_sent -= NFS_CWNDSCALE;
  994         }
  995 
  996         /*
  997          * If there was a successful reply and a tprintf msg.
  998          * tprintf a response.
  999          */
 1000         if (!error && (rep->r_flags & R_TPRINTFMSG))
 1001                 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
 1002                     "is alive again");
 1003         mrep = rep->r_mrep;
 1004         md = rep->r_md;
 1005         dpos = rep->r_dpos;
 1006         if (error) {
 1007                 m_freem(rep->r_mreq);
 1008                 free((caddr_t)rep, M_NFSREQ);
 1009                 return (error);
 1010         }
 1011 
 1012         /*
 1013          * break down the rpc header and check if ok
 1014          */
 1015         nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 1016         if (*tl++ == rpc_msgdenied) {
 1017                 if (*tl == rpc_mismatch)
 1018                         error = EOPNOTSUPP;
 1019                 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
 1020                         if (!failed_auth) {
 1021                                 failed_auth++;
 1022                                 mheadend->m_next = (struct mbuf *)0;
 1023                                 m_freem(mrep);
 1024                                 m_freem(rep->r_mreq);
 1025                                 goto kerbauth;
 1026                         } else
 1027                                 error = EAUTH;
 1028                 } else
 1029                         error = EACCES;
 1030                 m_freem(mrep);
 1031                 m_freem(rep->r_mreq);
 1032                 free((caddr_t)rep, M_NFSREQ);
 1033                 return (error);
 1034         }
 1035 
 1036         /*
 1037          * Grab any Kerberos verifier, otherwise just throw it away.
 1038          */
 1039         verf_type = fxdr_unsigned(int, *tl++);
 1040         i = fxdr_unsigned(int, *tl);
 1041         if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
 1042                 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
 1043                 if (error)
 1044                         goto nfsmout;
 1045         } else if (i > 0)
 1046                 nfsm_adv(nfsm_rndup(i));
 1047         nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 1048         /* 0 == ok */
 1049         if (*tl == 0) {
 1050                 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 1051                 if (*tl != 0) {
 1052                         error = fxdr_unsigned(int, *tl);
 1053                         if ((nmp->nm_flag & NFSMNT_NFSV3) &&
 1054                                 error == NFSERR_TRYLATER) {
 1055                                 m_freem(mrep);
 1056                                 error = 0;
 1057                                 waituntil = time.tv_sec + trylater_delay;
 1058                                 while (time.tv_sec < waituntil)
 1059                                         (void) tsleep((caddr_t)&lbolt,
 1060                                                 PSOCK, "nqnfstry", 0);
 1061                                 trylater_delay *= nfs_backoff[trylater_cnt];
 1062                                 if (trylater_cnt < 7)
 1063                                         trylater_cnt++;
 1064                                 goto tryagain;
 1065                         }
 1066 
 1067                         /*
 1068                          * If the File Handle was stale, invalidate the
 1069                          * lookup cache, just in case.
 1070                          */
 1071                         if (error == ESTALE)
 1072                                 cache_purge(vp);
 1073                         if (nmp->nm_flag & NFSMNT_NFSV3) {
 1074                                 *mrp = mrep;
 1075                                 *mdp = md;
 1076                                 *dposp = dpos;
 1077                                 error |= NFSERR_RETERR;
 1078                         } else
 1079                                 m_freem(mrep);
 1080                         m_freem(rep->r_mreq);
 1081                         free((caddr_t)rep, M_NFSREQ);
 1082                         return (error);
 1083                 }
 1084 
 1085                 /*
 1086                  * For nqnfs, get any lease in reply
 1087                  */
 1088                 if (nmp->nm_flag & NFSMNT_NQNFS) {
 1089                         nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 1090                         if (*tl) {
 1091                                 np = VTONFS(vp);
 1092                                 nqlflag = fxdr_unsigned(int, *tl);
 1093                                 nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
 1094                                 cachable = fxdr_unsigned(int, *tl++);
 1095                                 reqtime += fxdr_unsigned(int, *tl++);
 1096                                 if (reqtime > time.tv_sec) {
 1097                                     fxdr_hyper(tl, &frev);
 1098                                     nqnfs_clientlease(nmp, np, nqlflag,
 1099                                         cachable, reqtime, frev);
 1100                                 }
 1101                         }
 1102                 }
 1103                 *mrp = mrep;
 1104                 *mdp = md;
 1105                 *dposp = dpos;
 1106                 m_freem(rep->r_mreq);
 1107                 FREE((caddr_t)rep, M_NFSREQ);
 1108                 return (0);
 1109         }
 1110         m_freem(mrep);
 1111         error = EPROTONOSUPPORT;
 1112 nfsmout:
 1113         m_freem(rep->r_mreq);
 1114         free((caddr_t)rep, M_NFSREQ);
 1115         return (error);
 1116 }
 1117 
 1118 #ifndef NFS_NOSERVER
 1119 /*
 1120  * Generate the rpc reply header
 1121  * siz arg. is used to decide if adding a cluster is worthwhile
 1122  */
 1123 int
 1124 nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
 1125         int siz;
 1126         struct nfsrv_descript *nd;
 1127         struct nfssvc_sock *slp;
 1128         int err;
 1129         int cache;
 1130         u_quad_t *frev;
 1131         struct mbuf **mrq;
 1132         struct mbuf **mbp;
 1133         caddr_t *bposp;
 1134 {
 1135         register u_long *tl;
 1136         register struct mbuf *mreq;
 1137         caddr_t bpos;
 1138         struct mbuf *mb, *mb2;
 1139 
 1140         MGETHDR(mreq, M_WAIT, MT_DATA);
 1141         mb = mreq;
 1142         /*
 1143          * If this is a big reply, use a cluster else
 1144          * try and leave leading space for the lower level headers.
 1145          */
 1146         siz += RPC_REPLYSIZ;
 1147         if (siz >= MINCLSIZE) {
 1148                 MCLGET(mreq, M_WAIT);
 1149         } else
 1150                 mreq->m_data += max_hdr;
 1151         tl = mtod(mreq, u_long *);
 1152         mreq->m_len = 6 * NFSX_UNSIGNED;
 1153         bpos = ((caddr_t)tl) + mreq->m_len;
 1154         *tl++ = txdr_unsigned(nd->nd_retxid);
 1155         *tl++ = rpc_reply;
 1156         if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
 1157                 *tl++ = rpc_msgdenied;
 1158                 if (err & NFSERR_AUTHERR) {
 1159                         *tl++ = rpc_autherr;
 1160                         *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
 1161                         mreq->m_len -= NFSX_UNSIGNED;
 1162                         bpos -= NFSX_UNSIGNED;
 1163                 } else {
 1164                         *tl++ = rpc_mismatch;
 1165                         *tl++ = txdr_unsigned(RPC_VER2);
 1166                         *tl = txdr_unsigned(RPC_VER2);
 1167                 }
 1168         } else {
 1169                 *tl++ = rpc_msgaccepted;
 1170 
 1171                 /*
 1172                  * For Kerberos authentication, we must send the nickname
 1173                  * verifier back, otherwise just RPCAUTH_NULL.
 1174                  */
 1175                 if (nd->nd_flag & ND_KERBFULL) {
 1176                     register struct nfsuid *nuidp;
 1177                     struct timeval ktvin, ktvout;
 1178 
 1179                     for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
 1180                         nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
 1181                         if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
 1182                             (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
 1183                              &nuidp->nu_haddr, nd->nd_nam2)))
 1184                             break;
 1185                     }
 1186                     if (nuidp) {
 1187                         ktvin.tv_sec =
 1188                             txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
 1189                         ktvin.tv_usec =
 1190                             txdr_unsigned(nuidp->nu_timestamp.tv_usec);
 1191 
 1192                         /*
 1193                          * Encrypt the timestamp in ecb mode using the
 1194                          * session key.
 1195                          */
 1196 #ifdef NFSKERB
 1197                         XXX
 1198 #endif
 1199 
 1200                         *tl++ = rpc_auth_kerb;
 1201                         *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
 1202                         *tl = ktvout.tv_sec;
 1203                         nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
 1204                         *tl++ = ktvout.tv_usec;
 1205                         *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
 1206                     } else {
 1207                         *tl++ = 0;
 1208                         *tl++ = 0;
 1209                     }
 1210                 } else {
 1211                         *tl++ = 0;
 1212                         *tl++ = 0;
 1213                 }
 1214                 switch (err) {
 1215                 case EPROGUNAVAIL:
 1216                         *tl = txdr_unsigned(RPC_PROGUNAVAIL);
 1217                         break;
 1218                 case EPROGMISMATCH:
 1219                         *tl = txdr_unsigned(RPC_PROGMISMATCH);
 1220                         nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
 1221                         if (nd->nd_flag & ND_NQNFS) {
 1222                                 *tl++ = txdr_unsigned(3);
 1223                                 *tl = txdr_unsigned(3);
 1224                         } else {
 1225                                 *tl++ = txdr_unsigned(2);
 1226                                 *tl = txdr_unsigned(3);
 1227                         }
 1228                         break;
 1229                 case EPROCUNAVAIL:
 1230                         *tl = txdr_unsigned(RPC_PROCUNAVAIL);
 1231                         break;
 1232                 case EBADRPC:
 1233                         *tl = txdr_unsigned(RPC_GARBAGE);
 1234                         break;
 1235                 default:
 1236                         *tl = 0;
 1237                         if (err != NFSERR_RETVOID) {
 1238                                 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 1239                                 if (err)
 1240                                     *tl = txdr_unsigned(nfsrv_errmap(nd, err));
 1241                                 else
 1242                                     *tl = 0;
 1243                         }
 1244                         break;
 1245                 };
 1246         }
 1247 
 1248         /*
 1249          * For nqnfs, piggyback lease as requested.
 1250          */
 1251         if ((nd->nd_flag & ND_NQNFS) && err == 0) {
 1252                 if (nd->nd_flag & ND_LEASE) {
 1253                         nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
 1254                         *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
 1255                         *tl++ = txdr_unsigned(cache);
 1256                         *tl++ = txdr_unsigned(nd->nd_duration);
 1257                         txdr_hyper(frev, tl);
 1258                 } else {
 1259                         nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 1260                         *tl = 0;
 1261                 }
 1262         }
 1263         *mrq = mreq;
 1264         *mbp = mb;
 1265         *bposp = bpos;
 1266         if (err != 0 && err != NFSERR_RETVOID)
 1267                 nfsstats.srvrpc_errs++;
 1268         return (0);
 1269 }
 1270 
 1271 
 1272 #endif /* NFS_NOSERVER */
 1273 /*
 1274  * Nfs timer routine
 1275  * Scan the nfsreq list and retranmit any requests that have timed out
 1276  * To avoid retransmission attempts on STREAM sockets (in the future) make
 1277  * sure to set the r_retry field to 0 (implies nm_retry == 0).
 1278  */
 1279 void
 1280 nfs_timer(arg)
 1281         void *arg;      /* never used */
 1282 {
 1283         register struct nfsreq *rep;
 1284         register struct mbuf *m;
 1285         register struct socket *so;
 1286         register struct nfsmount *nmp;
 1287         register int timeo;
 1288         int s, error;
 1289 #ifndef NFS_NOSERVER
 1290         static long lasttime = 0;
 1291         register struct nfssvc_sock *slp;
 1292         u_quad_t cur_usec;
 1293 #endif /* NFS_NOSERVER */
 1294 
 1295         s = splnet();
 1296         for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) {
 1297                 nmp = rep->r_nmp;
 1298                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
 1299                         continue;
 1300                 if (nfs_sigintr(nmp, rep, rep->r_procp)) {
 1301                         rep->r_flags |= R_SOFTTERM;
 1302                         continue;
 1303                 }
 1304                 if (rep->r_rtt >= 0) {
 1305                         rep->r_rtt++;
 1306                         if (nmp->nm_flag & NFSMNT_DUMBTIMR)
 1307                                 timeo = nmp->nm_timeo;
 1308                         else
 1309                                 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
 1310                         if (nmp->nm_timeouts > 0)
 1311                                 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
 1312                         if (rep->r_rtt <= timeo)
 1313                                 continue;
 1314                         if (nmp->nm_timeouts < 8)
 1315                                 nmp->nm_timeouts++;
 1316                 }
 1317                 /*
 1318                  * Check for server not responding
 1319                  */
 1320                 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
 1321                      rep->r_rexmit > nmp->nm_deadthresh) {
 1322                         nfs_msg(rep->r_procp,
 1323                             nmp->nm_mountp->mnt_stat.f_mntfromname,
 1324                             "not responding");
 1325                         rep->r_flags |= R_TPRINTFMSG;
 1326                 }
 1327                 if (rep->r_rexmit >= rep->r_retry) {    /* too many */
 1328                         nfsstats.rpctimeouts++;
 1329                         rep->r_flags |= R_SOFTTERM;
 1330                         continue;
 1331                 }
 1332                 if (nmp->nm_sotype != SOCK_DGRAM) {
 1333                         if (++rep->r_rexmit > NFS_MAXREXMIT)
 1334                                 rep->r_rexmit = NFS_MAXREXMIT;
 1335                         continue;
 1336                 }
 1337                 if ((so = nmp->nm_so) == NULL)
 1338                         continue;
 1339 
 1340                 /*
 1341                  * If there is enough space and the window allows..
 1342                  *      Resend it
 1343                  * Set r_rtt to -1 in case we fail to send it now.
 1344                  */
 1345                 rep->r_rtt = -1;
 1346                 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
 1347                    ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
 1348                     (rep->r_flags & R_SENT) ||
 1349                     nmp->nm_sent < nmp->nm_cwnd) &&
 1350                    (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
 1351                         if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
 1352                             error = (*so->so_proto->pr_usrreqs->pru_send)
 1353                                     (so, 0, m, (struct mbuf *)0,
 1354                                      (struct mbuf *)0);
 1355                         else
 1356                             error = (*so->so_proto->pr_usrreqs->pru_send)
 1357                                     (so, 0, m, nmp->nm_nam, (struct mbuf *)0);
 1358                         if (error) {
 1359                                 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
 1360                                         so->so_error = 0;
 1361                         } else {
 1362                                 /*
 1363                                  * Iff first send, start timing
 1364                                  * else turn timing off, backoff timer
 1365                                  * and divide congestion window by 2.
 1366                                  */
 1367                                 if (rep->r_flags & R_SENT) {
 1368                                         rep->r_flags &= ~R_TIMING;
 1369                                         if (++rep->r_rexmit > NFS_MAXREXMIT)
 1370                                                 rep->r_rexmit = NFS_MAXREXMIT;
 1371                                         nmp->nm_cwnd >>= 1;
 1372                                         if (nmp->nm_cwnd < NFS_CWNDSCALE)
 1373                                                 nmp->nm_cwnd = NFS_CWNDSCALE;
 1374                                         nfsstats.rpcretries++;
 1375                                 } else {
 1376                                         rep->r_flags |= R_SENT;
 1377                                         nmp->nm_sent += NFS_CWNDSCALE;
 1378                                 }
 1379                                 rep->r_rtt = 0;
 1380                         }
 1381                 }
 1382         }
 1383 #ifndef NFS_NOSERVER
 1384         /*
 1385          * Call the nqnfs server timer once a second to handle leases.
 1386          */
 1387         if (lasttime != time.tv_sec) {
 1388                 lasttime = time.tv_sec;
 1389                 nqnfs_serverd();
 1390         }
 1391 
 1392         /*
 1393          * Scan the write gathering queues for writes that need to be
 1394          * completed now.
 1395          */
 1396         cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec;
 1397         for (slp = nfssvc_sockhead.tqh_first; slp != 0;
 1398             slp = slp->ns_chain.tqe_next) {
 1399             if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec)
 1400                 nfsrv_wakenfsd(slp);
 1401         }
 1402 #endif /* NFS_NOSERVER */
 1403         splx(s);
 1404         timeout(nfs_timer, (void *)0, nfs_ticks);
 1405 }
 1406 
 1407 
 1408 /*
 1409  * Test for a termination condition pending on the process.
 1410  * This is used for NFSMNT_INT mounts.
 1411  */
 1412 int
 1413 nfs_sigintr(nmp, rep, p)
 1414         struct nfsmount *nmp;
 1415         struct nfsreq *rep;
 1416         register struct proc *p;
 1417 {
 1418 
 1419         if (rep && (rep->r_flags & R_SOFTTERM))
 1420                 return (EINTR);
 1421         if (!(nmp->nm_flag & NFSMNT_INT))
 1422                 return (0);
 1423         if (p && p->p_siglist &&
 1424             (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
 1425             NFSINT_SIGMASK))
 1426                 return (EINTR);
 1427         return (0);
 1428 }
 1429 
 1430 /*
 1431  * Lock a socket against others.
 1432  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
 1433  * and also to avoid race conditions between the processes with nfs requests
 1434  * in progress when a reconnect is necessary.
 1435  */
 1436 int
 1437 nfs_sndlock(flagp, rep)
 1438         register int *flagp;
 1439         struct nfsreq *rep;
 1440 {
 1441         struct proc *p;
 1442         int slpflag = 0, slptimeo = 0;
 1443 
 1444         if (rep) {
 1445                 p = rep->r_procp;
 1446                 if (rep->r_nmp->nm_flag & NFSMNT_INT)
 1447                         slpflag = PCATCH;
 1448         } else
 1449                 p = (struct proc *)0;
 1450         while (*flagp & NFSMNT_SNDLOCK) {
 1451                 if (nfs_sigintr(rep->r_nmp, rep, p))
 1452                         return (EINTR);
 1453                 *flagp |= NFSMNT_WANTSND;
 1454                 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
 1455                         slptimeo);
 1456                 if (slpflag == PCATCH) {
 1457                         slpflag = 0;
 1458                         slptimeo = 2 * hz;
 1459                 }
 1460         }
 1461         *flagp |= NFSMNT_SNDLOCK;
 1462         return (0);
 1463 }
 1464 
 1465 /*
 1466  * Unlock the stream socket for others.
 1467  */
 1468 void
 1469 nfs_sndunlock(flagp)
 1470         register int *flagp;
 1471 {
 1472 
 1473         if ((*flagp & NFSMNT_SNDLOCK) == 0)
 1474                 panic("nfs sndunlock");
 1475         *flagp &= ~NFSMNT_SNDLOCK;
 1476         if (*flagp & NFSMNT_WANTSND) {
 1477                 *flagp &= ~NFSMNT_WANTSND;
 1478                 wakeup((caddr_t)flagp);
 1479         }
 1480 }
 1481 
 1482 static int
 1483 nfs_rcvlock(rep)
 1484         register struct nfsreq *rep;
 1485 {
 1486         register int *flagp = &rep->r_nmp->nm_flag;
 1487         int slpflag, slptimeo = 0;
 1488 
 1489         if (*flagp & NFSMNT_INT)
 1490                 slpflag = PCATCH;
 1491         else
 1492                 slpflag = 0;
 1493         while (*flagp & NFSMNT_RCVLOCK) {
 1494                 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
 1495                         return (EINTR);
 1496                 *flagp |= NFSMNT_WANTRCV;
 1497                 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
 1498                         slptimeo);
 1499                 /*
 1500                  * If our reply was recieved while we were sleeping,
 1501                  * then just return without taking the lock to avoid a
 1502                  * situation where a single iod could 'capture' the
 1503                  * recieve lock.
 1504                  */
 1505                 if (rep->r_mrep != NULL)
 1506                         return (EALREADY);
 1507                 if (slpflag == PCATCH) {
 1508                         slpflag = 0;
 1509                         slptimeo = 2 * hz;
 1510                 }
 1511         }
 1512         *flagp |= NFSMNT_RCVLOCK;
 1513         return (0);
 1514 }
 1515 
 1516 /*
 1517  * Unlock the stream socket for others.
 1518  */
 1519 static void
 1520 nfs_rcvunlock(flagp)
 1521         register int *flagp;
 1522 {
 1523 
 1524         if ((*flagp & NFSMNT_RCVLOCK) == 0)
 1525                 panic("nfs rcvunlock");
 1526         *flagp &= ~NFSMNT_RCVLOCK;
 1527         if (*flagp & NFSMNT_WANTRCV) {
 1528                 *flagp &= ~NFSMNT_WANTRCV;
 1529                 wakeup((caddr_t)flagp);
 1530         }
 1531 }
 1532 
 1533 /*
 1534  * Check for badly aligned mbuf data areas and
 1535  * realign data in an mbuf list by copying the data areas up, as required.
 1536  */
 1537 static void
 1538 nfs_realign(m, hsiz)
 1539         register struct mbuf *m;
 1540         int hsiz;
 1541 {
 1542         register struct mbuf *m2;
 1543         register int siz, mlen, olen;
 1544         register caddr_t tcp, fcp;
 1545         struct mbuf *mnew;
 1546 
 1547         while (m) {
 1548             /*
 1549              * This never happens for UDP, rarely happens for TCP
 1550              * but frequently happens for iso transport.
 1551              */
 1552             if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) {
 1553                 olen = m->m_len;
 1554                 fcp = mtod(m, caddr_t);
 1555                 if ((int)fcp & 0x3) {
 1556                         m->m_flags &= ~M_PKTHDR;
 1557                         if (m->m_flags & M_EXT)
 1558                                 m->m_data = m->m_ext.ext_buf +
 1559                                         ((m->m_ext.ext_size - olen) & ~0x3);
 1560                         else
 1561                                 m->m_data = m->m_dat;
 1562                 }
 1563                 m->m_len = 0;
 1564                 tcp = mtod(m, caddr_t);
 1565                 mnew = m;
 1566                 m2 = m->m_next;
 1567 
 1568                 /*
 1569                  * If possible, only put the first invariant part
 1570                  * of the RPC header in the first mbuf.
 1571                  */
 1572                 mlen = M_TRAILINGSPACE(m);
 1573                 if (olen <= hsiz && mlen > hsiz)
 1574                         mlen = hsiz;
 1575 
 1576                 /*
 1577                  * Loop through the mbuf list consolidating data.
 1578                  */
 1579                 while (m) {
 1580                         while (olen > 0) {
 1581                                 if (mlen == 0) {
 1582                                         m2->m_flags &= ~M_PKTHDR;
 1583                                         if (m2->m_flags & M_EXT)
 1584                                                 m2->m_data = m2->m_ext.ext_buf;
 1585                                         else
 1586                                                 m2->m_data = m2->m_dat;
 1587                                         m2->m_len = 0;
 1588                                         mlen = M_TRAILINGSPACE(m2);
 1589                                         tcp = mtod(m2, caddr_t);
 1590                                         mnew = m2;
 1591                                         m2 = m2->m_next;
 1592                                 }
 1593                                 siz = min(mlen, olen);
 1594                                 if (tcp != fcp)
 1595                                         bcopy(fcp, tcp, siz);
 1596                                 mnew->m_len += siz;
 1597                                 mlen -= siz;
 1598                                 olen -= siz;
 1599                                 tcp += siz;
 1600                                 fcp += siz;
 1601                         }
 1602                         m = m->m_next;
 1603                         if (m) {
 1604                                 olen = m->m_len;
 1605                                 fcp = mtod(m, caddr_t);
 1606                         }
 1607                 }
 1608 
 1609                 /*
 1610                  * Finally, set m_len == 0 for any trailing mbufs that have
 1611                  * been copied out of.
 1612                  */
 1613                 while (m2) {
 1614                         m2->m_len = 0;
 1615                         m2 = m2->m_next;
 1616                 }
 1617                 return;
 1618             }
 1619             m = m->m_next;
 1620         }
 1621 }
 1622 
 1623 #ifndef NFS_NOSERVER
 1624 /*
 1625  * Socket upcall routine for the nfsd sockets.
 1626  * The caddr_t arg is a pointer to the "struct nfssvc_sock".
 1627  * Essentially do as much as possible non-blocking, else punt and it will
 1628  * be called with M_WAIT from an nfsd.
 1629  */
 1630 void
 1631 nfsrv_rcv(so, arg, waitflag)
 1632         struct socket *so;
 1633         caddr_t arg;
 1634         int waitflag;
 1635 {
 1636         register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
 1637         register struct mbuf *m;
 1638         struct mbuf *mp, *nam;
 1639         struct uio auio;
 1640         int flags, error;
 1641 
 1642         if ((slp->ns_flag & SLP_VALID) == 0)
 1643                 return;
 1644 #ifdef notdef
 1645         /*
 1646          * Define this to test for nfsds handling this under heavy load.
 1647          */
 1648         if (waitflag == M_DONTWAIT) {
 1649                 slp->ns_flag |= SLP_NEEDQ; goto dorecs;
 1650         }
 1651 #endif
 1652         auio.uio_procp = NULL;
 1653         if (so->so_type == SOCK_STREAM) {
 1654                 /*
 1655                  * If there are already records on the queue, defer soreceive()
 1656                  * to an nfsd so that there is feedback to the TCP layer that
 1657                  * the nfs servers are heavily loaded.
 1658                  */
 1659                 if (slp->ns_rec && waitflag == M_DONTWAIT) {
 1660                         slp->ns_flag |= SLP_NEEDQ;
 1661                         goto dorecs;
 1662                 }
 1663 
 1664                 /*
 1665                  * Do soreceive().
 1666                  */
 1667                 auio.uio_resid = 1000000000;
 1668                 flags = MSG_DONTWAIT;
 1669                 error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
 1670                 if (error || mp == (struct mbuf *)0) {
 1671                         if (error == EWOULDBLOCK)
 1672                                 slp->ns_flag |= SLP_NEEDQ;
 1673                         else
 1674                                 slp->ns_flag |= SLP_DISCONN;
 1675                         goto dorecs;
 1676                 }
 1677                 m = mp;
 1678                 if (slp->ns_rawend) {
 1679                         slp->ns_rawend->m_next = m;
 1680                         slp->ns_cc += 1000000000 - auio.uio_resid;
 1681                 } else {
 1682                         slp->ns_raw = m;
 1683                         slp->ns_cc = 1000000000 - auio.uio_resid;
 1684                 }
 1685                 while (m->m_next)
 1686                         m = m->m_next;
 1687                 slp->ns_rawend = m;
 1688 
 1689                 /*
 1690                  * Now try and parse record(s) out of the raw stream data.
 1691                  */
 1692                 error = nfsrv_getstream(slp, waitflag);
 1693                 if (error) {
 1694                         if (error == EPERM)
 1695                                 slp->ns_flag |= SLP_DISCONN;
 1696                         else
 1697                                 slp->ns_flag |= SLP_NEEDQ;
 1698                 }
 1699         } else {
 1700                 do {
 1701                         auio.uio_resid = 1000000000;
 1702                         flags = MSG_DONTWAIT;
 1703                         error = soreceive(so, &nam, &auio, &mp,
 1704                                                 (struct mbuf **)0, &flags);
 1705                         if (mp) {
 1706                                 nfs_realign(mp, 10 * NFSX_UNSIGNED);
 1707                                 if (nam) {
 1708                                         m = nam;
 1709                                         m->m_next = mp;
 1710                                 } else
 1711                                         m = mp;
 1712                                 if (slp->ns_recend)
 1713                                         slp->ns_recend->m_nextpkt = m;
 1714                                 else
 1715                                         slp->ns_rec = m;
 1716                                 slp->ns_recend = m;
 1717                                 m->m_nextpkt = (struct mbuf *)0;
 1718                         }
 1719                         if (error) {
 1720                                 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
 1721                                         && error != EWOULDBLOCK) {
 1722                                         slp->ns_flag |= SLP_DISCONN;
 1723                                         goto dorecs;
 1724                                 }
 1725                         }
 1726                 } while (mp);
 1727         }
 1728 
 1729         /*
 1730          * Now try and process the request records, non-blocking.
 1731          */
 1732 dorecs:
 1733         if (waitflag == M_DONTWAIT &&
 1734                 (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
 1735                 nfsrv_wakenfsd(slp);
 1736 }
 1737 
 1738 /*
 1739  * Try and extract an RPC request from the mbuf data list received on a
 1740  * stream socket. The "waitflag" argument indicates whether or not it
 1741  * can sleep.
 1742  */
 1743 static int
 1744 nfsrv_getstream(slp, waitflag)
 1745         register struct nfssvc_sock *slp;
 1746         int waitflag;
 1747 {
 1748         register struct mbuf *m, **mpp;
 1749         register char *cp1, *cp2;
 1750         register int len;
 1751         struct mbuf *om, *m2, *recm = 0;
 1752         u_long recmark;
 1753 
 1754         if (slp->ns_flag & SLP_GETSTREAM)
 1755                 panic("nfs getstream");
 1756         slp->ns_flag |= SLP_GETSTREAM;
 1757         for (;;) {
 1758             if (slp->ns_reclen == 0) {
 1759                 if (slp->ns_cc < NFSX_UNSIGNED) {
 1760                         slp->ns_flag &= ~SLP_GETSTREAM;
 1761                         return (0);
 1762                 }
 1763                 m = slp->ns_raw;
 1764                 if (m->m_len >= NFSX_UNSIGNED) {
 1765                         bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
 1766                         m->m_data += NFSX_UNSIGNED;
 1767                         m->m_len -= NFSX_UNSIGNED;
 1768                 } else {
 1769                         cp1 = (caddr_t)&recmark;
 1770                         cp2 = mtod(m, caddr_t);
 1771                         while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
 1772                                 while (m->m_len == 0) {
 1773                                         m = m->m_next;
 1774                                         cp2 = mtod(m, caddr_t);
 1775                                 }
 1776                                 *cp1++ = *cp2++;
 1777                                 m->m_data++;
 1778                                 m->m_len--;
 1779                         }
 1780                 }
 1781                 slp->ns_cc -= NFSX_UNSIGNED;
 1782                 recmark = ntohl(recmark);
 1783                 slp->ns_reclen = recmark & ~0x80000000;
 1784                 if (recmark & 0x80000000)
 1785                         slp->ns_flag |= SLP_LASTFRAG;
 1786                 else
 1787                         slp->ns_flag &= ~SLP_LASTFRAG;
 1788                 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
 1789                         slp->ns_flag &= ~SLP_GETSTREAM;
 1790                         return (EPERM);
 1791                 }
 1792             }
 1793 
 1794             /*
 1795              * Now get the record part.
 1796              */
 1797             if (slp->ns_cc == slp->ns_reclen) {
 1798                 recm = slp->ns_raw;
 1799                 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
 1800                 slp->ns_cc = slp->ns_reclen = 0;
 1801             } else if (slp->ns_cc > slp->ns_reclen) {
 1802                 len = 0;
 1803                 m = slp->ns_raw;
 1804                 om = (struct mbuf *)0;
 1805                 while (len < slp->ns_reclen) {
 1806                         if ((len + m->m_len) > slp->ns_reclen) {
 1807                                 m2 = m_copym(m, 0, slp->ns_reclen - len,
 1808                                         waitflag);
 1809                                 if (m2) {
 1810                                         if (om) {
 1811                                                 om->m_next = m2;
 1812                                                 recm = slp->ns_raw;
 1813                                         } else
 1814                                                 recm = m2;
 1815                                         m->m_data += slp->ns_reclen - len;
 1816                                         m->m_len -= slp->ns_reclen - len;
 1817                                         len = slp->ns_reclen;
 1818                                 } else {
 1819                                         slp->ns_flag &= ~SLP_GETSTREAM;
 1820                                         return (EWOULDBLOCK);
 1821                                 }
 1822                         } else if ((len + m->m_len) == slp->ns_reclen) {
 1823                                 om = m;
 1824                                 len += m->m_len;
 1825                                 m = m->m_next;
 1826                                 recm = slp->ns_raw;
 1827                                 om->m_next = (struct mbuf *)0;
 1828                         } else {
 1829                                 om = m;
 1830                                 len += m->m_len;
 1831                                 m = m->m_next;
 1832                         }
 1833                 }
 1834                 slp->ns_raw = m;
 1835                 slp->ns_cc -= len;
 1836                 slp->ns_reclen = 0;
 1837             } else {
 1838                 slp->ns_flag &= ~SLP_GETSTREAM;
 1839                 return (0);
 1840             }
 1841 
 1842             /*
 1843              * Accumulate the fragments into a record.
 1844              */
 1845             mpp = &slp->ns_frag;
 1846             while (*mpp)
 1847                 mpp = &((*mpp)->m_next);
 1848             *mpp = recm;
 1849             if (slp->ns_flag & SLP_LASTFRAG) {
 1850                 nfs_realign(slp->ns_frag, 10 * NFSX_UNSIGNED);
 1851                 if (slp->ns_recend)
 1852                     slp->ns_recend->m_nextpkt = slp->ns_frag;
 1853                 else
 1854                     slp->ns_rec = slp->ns_frag;
 1855                 slp->ns_recend = slp->ns_frag;
 1856                 slp->ns_frag = (struct mbuf *)0;
 1857             }
 1858         }
 1859 }
 1860 
 1861 /*
 1862  * Parse an RPC header.
 1863  */
 1864 int
 1865 nfsrv_dorec(slp, nfsd, ndp)
 1866         register struct nfssvc_sock *slp;
 1867         struct nfsd *nfsd;
 1868         struct nfsrv_descript **ndp;
 1869 {
 1870         register struct mbuf *m, *nam;
 1871         register struct nfsrv_descript *nd;
 1872         int error;
 1873 
 1874         *ndp = NULL;
 1875         if ((slp->ns_flag & SLP_VALID) == 0 ||
 1876             (m = slp->ns_rec) == (struct mbuf *)0)
 1877                 return (ENOBUFS);
 1878         slp->ns_rec = m->m_nextpkt;
 1879         if (slp->ns_rec)
 1880                 m->m_nextpkt = (struct mbuf *)0;
 1881         else
 1882                 slp->ns_recend = (struct mbuf *)0;
 1883         if (m->m_type == MT_SONAME) {
 1884                 nam = m;
 1885                 m = m->m_next;
 1886                 nam->m_next = NULL;
 1887         } else
 1888                 nam = NULL;
 1889         MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript),
 1890                 M_NFSRVDESC, M_WAITOK);
 1891         nd->nd_md = nd->nd_mrep = m;
 1892         nd->nd_nam2 = nam;
 1893         nd->nd_dpos = mtod(m, caddr_t);
 1894         error = nfs_getreq(nd, nfsd, TRUE);
 1895         if (error) {
 1896                 m_freem(nam);
 1897                 free((caddr_t)nd, M_NFSRVDESC);
 1898                 return (error);
 1899         }
 1900         *ndp = nd;
 1901         nfsd->nfsd_nd = nd;
 1902         return (0);
 1903 }
 1904 
 1905 /*
 1906  * Parse an RPC request
 1907  * - verify it
 1908  * - fill in the cred struct.
 1909  */
 1910 int
 1911 nfs_getreq(nd, nfsd, has_header)
 1912         register struct nfsrv_descript *nd;
 1913         struct nfsd *nfsd;
 1914         int has_header;
 1915 {
 1916         register int len, i;
 1917         register u_long *tl;
 1918         register long t1;
 1919         struct uio uio;
 1920         struct iovec iov;
 1921         caddr_t dpos, cp2, cp;
 1922         u_long nfsvers, auth_type;
 1923         uid_t nickuid;
 1924         int error = 0, nqnfs = 0, ticklen;
 1925         struct mbuf *mrep, *md;
 1926         register struct nfsuid *nuidp;
 1927         struct timeval tvin, tvout;
 1928 
 1929         mrep = nd->nd_mrep;
 1930         md = nd->nd_md;
 1931         dpos = nd->nd_dpos;
 1932         if (has_header) {
 1933                 nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED);
 1934                 nd->nd_retxid = fxdr_unsigned(u_long, *tl++);
 1935                 if (*tl++ != rpc_call) {
 1936                         m_freem(mrep);
 1937                         return (EBADRPC);
 1938                 }
 1939         } else
 1940                 nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED);
 1941         nd->nd_repstat = 0;
 1942         nd->nd_flag = 0;
 1943         if (*tl++ != rpc_vers) {
 1944                 nd->nd_repstat = ERPCMISMATCH;
 1945                 nd->nd_procnum = NFSPROC_NOOP;
 1946                 return (0);
 1947         }
 1948         if (*tl != nfs_prog) {
 1949                 if (*tl == nqnfs_prog)
 1950                         nqnfs++;
 1951                 else {
 1952                         nd->nd_repstat = EPROGUNAVAIL;
 1953                         nd->nd_procnum = NFSPROC_NOOP;
 1954                         return (0);
 1955                 }
 1956         }
 1957         tl++;
 1958         nfsvers = fxdr_unsigned(u_long, *tl++);
 1959         if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) ||
 1960                 (nfsvers != NQNFS_VER3 && nqnfs)) {
 1961                 nd->nd_repstat = EPROGMISMATCH;
 1962                 nd->nd_procnum = NFSPROC_NOOP;
 1963                 return (0);
 1964         }
 1965         if (nqnfs)
 1966                 nd->nd_flag = (ND_NFSV3 | ND_NQNFS);
 1967         else if (nfsvers == NFS_VER3)
 1968                 nd->nd_flag = ND_NFSV3;
 1969         nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
 1970         if (nd->nd_procnum == NFSPROC_NULL)
 1971                 return (0);
 1972         if (nd->nd_procnum >= NFS_NPROCS ||
 1973                 (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
 1974                 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
 1975                 nd->nd_repstat = EPROCUNAVAIL;
 1976                 nd->nd_procnum = NFSPROC_NOOP;
 1977                 return (0);
 1978         }
 1979         if ((nd->nd_flag & ND_NFSV3) == 0)
 1980                 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
 1981         auth_type = *tl++;
 1982         len = fxdr_unsigned(int, *tl++);
 1983         if (len < 0 || len > RPCAUTH_MAXSIZ) {
 1984                 m_freem(mrep);
 1985                 return (EBADRPC);
 1986         }
 1987 
 1988         nd->nd_flag &= ~ND_KERBAUTH;
 1989         /*
 1990          * Handle auth_unix or auth_kerb.
 1991          */
 1992         if (auth_type == rpc_auth_unix) {
 1993                 len = fxdr_unsigned(int, *++tl);
 1994                 if (len < 0 || len > NFS_MAXNAMLEN) {
 1995                         m_freem(mrep);
 1996                         return (EBADRPC);
 1997                 }
 1998                 nfsm_adv(nfsm_rndup(len));
 1999                 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 2000                 bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
 2001                 nd->nd_cr.cr_ref = 1;
 2002                 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
 2003                 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
 2004                 len = fxdr_unsigned(int, *tl);
 2005                 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
 2006                         m_freem(mrep);
 2007                         return (EBADRPC);
 2008                 }
 2009                 nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED);
 2010                 for (i = 1; i <= len; i++)
 2011                     if (i < NGROUPS)
 2012                         nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
 2013                     else
 2014                         tl++;
 2015                 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
 2016                 if (nd->nd_cr.cr_ngroups > 1)
 2017                     nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
 2018                 len = fxdr_unsigned(int, *++tl);
 2019                 if (len < 0 || len > RPCAUTH_MAXSIZ) {
 2020                         m_freem(mrep);
 2021                         return (EBADRPC);
 2022                 }
 2023                 if (len > 0)
 2024                         nfsm_adv(nfsm_rndup(len));
 2025         } else if (auth_type == rpc_auth_kerb) {
 2026                 switch (fxdr_unsigned(int, *tl++)) {
 2027                 case RPCAKN_FULLNAME:
 2028                         ticklen = fxdr_unsigned(int, *tl);
 2029                         *((u_long *)nfsd->nfsd_authstr) = *tl;
 2030                         uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
 2031                         nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
 2032                         if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
 2033                                 m_freem(mrep);
 2034                                 return (EBADRPC);
 2035                         }
 2036                         uio.uio_offset = 0;
 2037                         uio.uio_iov = &iov;
 2038                         uio.uio_iovcnt = 1;
 2039                         uio.uio_segflg = UIO_SYSSPACE;
 2040                         iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
 2041                         iov.iov_len = RPCAUTH_MAXSIZ - 4;
 2042                         nfsm_mtouio(&uio, uio.uio_resid);
 2043                         nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
 2044                         if (*tl++ != rpc_auth_kerb ||
 2045                                 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
 2046                                 printf("Bad kerb verifier\n");
 2047                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
 2048                                 nd->nd_procnum = NFSPROC_NOOP;
 2049                                 return (0);
 2050                         }
 2051                         nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
 2052                         tl = (u_long *)cp;
 2053                         if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
 2054                                 printf("Not fullname kerb verifier\n");
 2055                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
 2056                                 nd->nd_procnum = NFSPROC_NOOP;
 2057                                 return (0);
 2058                         }
 2059                         cp += NFSX_UNSIGNED;
 2060                         bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
 2061                         nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
 2062                         nd->nd_flag |= ND_KERBFULL;
 2063                         nfsd->nfsd_flag |= NFSD_NEEDAUTH;
 2064                         break;
 2065                 case RPCAKN_NICKNAME:
 2066                         if (len != 2 * NFSX_UNSIGNED) {
 2067                                 printf("Kerb nickname short\n");
 2068                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
 2069                                 nd->nd_procnum = NFSPROC_NOOP;
 2070                                 return (0);
 2071                         }
 2072                         nickuid = fxdr_unsigned(uid_t, *tl);
 2073                         nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
 2074                         if (*tl++ != rpc_auth_kerb ||
 2075                                 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
 2076                                 printf("Kerb nick verifier bad\n");
 2077                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
 2078                                 nd->nd_procnum = NFSPROC_NOOP;
 2079                                 return (0);
 2080                         }
 2081                         nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 2082                         tvin.tv_sec = *tl++;
 2083                         tvin.tv_usec = *tl;
 2084 
 2085                         for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
 2086                             nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
 2087                                 if (nuidp->nu_cr.cr_uid == nickuid &&
 2088                                     (!nd->nd_nam2 ||
 2089                                      netaddr_match(NU_NETFAM(nuidp),
 2090                                       &nuidp->nu_haddr, nd->nd_nam2)))
 2091                                         break;
 2092                         }
 2093                         if (!nuidp) {
 2094                                 nd->nd_repstat =
 2095                                         (NFSERR_AUTHERR|AUTH_REJECTCRED);
 2096                                 nd->nd_procnum = NFSPROC_NOOP;
 2097                                 return (0);
 2098                         }
 2099 
 2100                         /*
 2101                          * Now, decrypt the timestamp using the session key
 2102                          * and validate it.
 2103                          */
 2104 #ifdef NFSKERB
 2105                         XXX
 2106 #endif
 2107 
 2108                         tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
 2109                         tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
 2110                         if (nuidp->nu_expire < time.tv_sec ||
 2111                             nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
 2112                             (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
 2113                              nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
 2114                                 nuidp->nu_expire = 0;
 2115                                 nd->nd_repstat =
 2116                                     (NFSERR_AUTHERR|AUTH_REJECTVERF);
 2117                                 nd->nd_procnum = NFSPROC_NOOP;
 2118                                 return (0);
 2119                         }
 2120                         nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
 2121                         nd->nd_flag |= ND_KERBNICK;
 2122                 };
 2123         } else {
 2124                 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
 2125                 nd->nd_procnum = NFSPROC_NOOP;
 2126                 return (0);
 2127         }
 2128 
 2129         /*
 2130          * For nqnfs, get piggybacked lease request.
 2131          */
 2132         if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
 2133                 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 2134                 nd->nd_flag |= fxdr_unsigned(int, *tl);
 2135                 if (nd->nd_flag & ND_LEASE) {
 2136                         nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 2137                         nd->nd_duration = fxdr_unsigned(int, *tl);
 2138                 } else
 2139                         nd->nd_duration = NQ_MINLEASE;
 2140         } else
 2141                 nd->nd_duration = NQ_MINLEASE;
 2142         nd->nd_md = md;
 2143         nd->nd_dpos = dpos;
 2144         return (0);
 2145 nfsmout:
 2146         return (error);
 2147 }
 2148 
 2149 /*
 2150  * Search for a sleeping nfsd and wake it up.
 2151  * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
 2152  * running nfsds will go look for the work in the nfssvc_sock list.
 2153  */
 2154 void
 2155 nfsrv_wakenfsd(slp)
 2156         struct nfssvc_sock *slp;
 2157 {
 2158         register struct nfsd *nd;
 2159 
 2160         if ((slp->ns_flag & SLP_VALID) == 0)
 2161                 return;
 2162         for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) {
 2163                 if (nd->nfsd_flag & NFSD_WAITING) {
 2164                         nd->nfsd_flag &= ~NFSD_WAITING;
 2165                         if (nd->nfsd_slp)
 2166                                 panic("nfsd wakeup");
 2167                         slp->ns_sref++;
 2168                         nd->nfsd_slp = slp;
 2169                         wakeup((caddr_t)nd);
 2170                         return;
 2171                 }
 2172         }
 2173         slp->ns_flag |= SLP_DOREC;
 2174         nfsd_head_flag |= NFSD_CHECKSLP;
 2175 }
 2176 #endif /* NFS_NOSERVER */
 2177 
 2178 static int
 2179 nfs_msg(p, server, msg)
 2180         struct proc *p;
 2181         char *server, *msg;
 2182 {
 2183         tpr_t tpr;
 2184 
 2185         if (p)
 2186                 tpr = tprintf_open(p);
 2187         else
 2188                 tpr = NULL;
 2189         tprintf(tpr, "nfs server %s: %s\n", server, msg);
 2190         tprintf_close(tpr);
 2191         return (0);
 2192 }

Cache object: c02d56b64d31f8baadbce43d21043e9c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.