The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/nfs/nfs_socket.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: nfs_socket.c,v 1.173.4.9 2011/04/24 16:23:49 riz Exp $ */
    2 
    3 /*
    4  * Copyright (c) 1989, 1991, 1993, 1995
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * This code is derived from software contributed to Berkeley by
    8  * Rick Macklem at The University of Guelph.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)nfs_socket.c        8.5 (Berkeley) 3/30/95
   35  */
   36 
   37 /*
   38  * Socket operations for use by nfs
   39  */
   40 
   41 #include <sys/cdefs.h>
   42 __KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.173.4.9 2011/04/24 16:23:49 riz Exp $");
   43 
   44 #include "fs_nfs.h"
   45 #include "opt_nfs.h"
   46 #include "opt_nfsserver.h"
   47 #include "opt_mbuftrace.h"
   48 #include "opt_inet.h"
   49 
   50 #include <sys/param.h>
   51 #include <sys/systm.h>
   52 #include <sys/evcnt.h>
   53 #include <sys/callout.h>
   54 #include <sys/proc.h>
   55 #include <sys/mount.h>
   56 #include <sys/kernel.h>
   57 #include <sys/kmem.h>
   58 #include <sys/mbuf.h>
   59 #include <sys/vnode.h>
   60 #include <sys/domain.h>
   61 #include <sys/protosw.h>
   62 #include <sys/socket.h>
   63 #include <sys/socketvar.h>
   64 #include <sys/syslog.h>
   65 #include <sys/tprintf.h>
   66 #include <sys/namei.h>
   67 #include <sys/signal.h>
   68 #include <sys/signalvar.h>
   69 #include <sys/kauth.h>
   70 
   71 #include <netinet/in.h>
   72 #include <netinet/tcp.h>
   73 
   74 #include <nfs/rpcv2.h>
   75 #include <nfs/nfsproto.h>
   76 #include <nfs/nfs.h>
   77 #include <nfs/xdr_subs.h>
   78 #include <nfs/nfsm_subs.h>
   79 #include <nfs/nfsmount.h>
   80 #include <nfs/nfsnode.h>
   81 #include <nfs/nfsrtt.h>
   82 #include <nfs/nfs_var.h>
   83 
   84 #ifdef MBUFTRACE
   85 struct mowner nfs_mowner = MOWNER_INIT("nfs","");
   86 #endif
   87 
   88 /*
   89  * Estimate rto for an nfs rpc sent via. an unreliable datagram.
   90  * Use the mean and mean deviation of rtt for the appropriate type of rpc
   91  * for the frequent rpcs and a default for the others.
   92  * The justification for doing "other" this way is that these rpcs
   93  * happen so infrequently that timer est. would probably be stale.
   94  * Also, since many of these rpcs are
   95  * non-idempotent, a conservative timeout is desired.
   96  * getattr, lookup - A+2D
   97  * read, write     - A+4D
   98  * other           - nm_timeo
   99  */
  100 #define NFS_RTO(n, t) \
  101         ((t) == 0 ? (n)->nm_timeo : \
  102          ((t) < 3 ? \
  103           (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
  104           ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
  105 #define NFS_SRTT(r)     (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
  106 #define NFS_SDRTT(r)    (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
  107 /*
  108  * External data, mostly RPC constants in XDR form
  109  */
  110 extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers,
  111         rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr,
  112         rpc_auth_kerb;
  113 extern u_int32_t nfs_prog;
  114 extern const int nfsv3_procid[NFS_NPROCS];
  115 extern int nfs_ticks;
  116 
  117 #ifdef DEBUG
  118 /*
  119  * Avoid spamming the console with debugging messages.  We only print
  120  * the nfs timer and reply error debugs every 10 seconds.
  121  */
  122 static const struct timeval nfs_err_interval = { 10, 0 };
  123 static struct timeval nfs_reply_last_err_time;
  124 static struct timeval nfs_timer_last_err_time;
  125 #endif
  126 
  127 /*
  128  * Defines which timer to use for the procnum.
  129  * 0 - default
  130  * 1 - getattr
  131  * 2 - lookup
  132  * 3 - read
  133  * 4 - write
  134  */
  135 static const int proct[NFS_NPROCS] = {
  136         [NFSPROC_NULL] = 0,
  137         [NFSPROC_GETATTR] = 1,
  138         [NFSPROC_SETATTR] = 0,
  139         [NFSPROC_LOOKUP] = 2,
  140         [NFSPROC_ACCESS] = 1,
  141         [NFSPROC_READLINK] = 3,
  142         [NFSPROC_READ] = 3,
  143         [NFSPROC_WRITE] = 4,
  144         [NFSPROC_CREATE] = 0,
  145         [NFSPROC_MKDIR] = 0,
  146         [NFSPROC_SYMLINK] = 0,
  147         [NFSPROC_MKNOD] = 0,
  148         [NFSPROC_REMOVE] = 0,
  149         [NFSPROC_RMDIR] = 0,
  150         [NFSPROC_RENAME] = 0,
  151         [NFSPROC_LINK] = 0,
  152         [NFSPROC_READDIR] = 3,
  153         [NFSPROC_READDIRPLUS] = 3,
  154         [NFSPROC_FSSTAT] = 0,
  155         [NFSPROC_FSINFO] = 0,
  156         [NFSPROC_PATHCONF] = 0,
  157         [NFSPROC_COMMIT] = 0,
  158         [NFSPROC_NOOP] = 0,
  159 };
  160 
  161 /*
  162  * There is a congestion window for outstanding rpcs maintained per mount
  163  * point. The cwnd size is adjusted in roughly the way that:
  164  * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
  165  * SIGCOMM '88". ACM, August 1988.
  166  * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
  167  * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
  168  * of rpcs is in progress.
  169  * (The sent count and cwnd are scaled for integer arith.)
  170  * Variants of "slow start" were tried and were found to be too much of a
  171  * performance hit (ave. rtt 3 times larger),
  172  * I suspect due to the large rtt that nfs rpcs have.
  173  */
  174 #define NFS_CWNDSCALE   256
  175 #define NFS_MAXCWND     (NFS_CWNDSCALE * 32)
  176 static const int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
  177 int nfsrtton = 0;
  178 struct nfsrtt nfsrtt;
  179 struct nfsreqhead nfs_reqq;
  180 static callout_t nfs_timer_ch;
  181 static struct evcnt nfs_timer_ev;
  182 static struct evcnt nfs_timer_start_ev;
  183 static struct evcnt nfs_timer_stop_ev;
  184 
  185 #ifdef NFS
  186 static int nfs_sndlock(struct nfsmount *, struct nfsreq *);
  187 static void nfs_sndunlock(struct nfsmount *);
  188 #endif
  189 static int nfs_rcvlock(struct nfsmount *, struct nfsreq *);
  190 static void nfs_rcvunlock(struct nfsmount *);
  191 
  192 #if defined(NFSSERVER)
  193 static void nfsrv_wakenfsd_locked(struct nfssvc_sock *);
  194 #endif /* defined(NFSSERVER) */
  195 
  196 /*
  197  * Initialize sockets and congestion for a new NFS connection.
  198  * We do not free the sockaddr if error.
  199  */
  200 int
  201 nfs_connect(nmp, rep, l)
  202         struct nfsmount *nmp;
  203         struct nfsreq *rep;
  204         struct lwp *l;
  205 {
  206         struct socket *so;
  207         int error, rcvreserve, sndreserve;
  208         struct sockaddr *saddr;
  209         struct sockaddr_in *sin;
  210 #ifdef INET6
  211         struct sockaddr_in6 *sin6;
  212 #endif
  213         struct mbuf *m;
  214         int val;
  215 
  216         nmp->nm_so = (struct socket *)0;
  217         saddr = mtod(nmp->nm_nam, struct sockaddr *);
  218         error = socreate(saddr->sa_family, &nmp->nm_so,
  219                 nmp->nm_sotype, nmp->nm_soproto, l, NULL);
  220         if (error)
  221                 goto bad;
  222         so = nmp->nm_so;
  223 #ifdef MBUFTRACE
  224         so->so_mowner = &nfs_mowner;
  225         so->so_rcv.sb_mowner = &nfs_mowner;
  226         so->so_snd.sb_mowner = &nfs_mowner;
  227 #endif
  228         nmp->nm_soflags = so->so_proto->pr_flags;
  229 
  230         /*
  231          * Some servers require that the client port be a reserved port number.
  232          */
  233         if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
  234                 val = IP_PORTRANGE_LOW;
  235 
  236                 if ((error = so_setsockopt(NULL, so, IPPROTO_IP, IP_PORTRANGE,
  237                     &val, sizeof(val))))
  238                         goto bad;
  239                 m = m_get(M_WAIT, MT_SONAME);
  240                 MCLAIM(m, so->so_mowner);
  241                 sin = mtod(m, struct sockaddr_in *);
  242                 sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
  243                 sin->sin_family = AF_INET;
  244                 sin->sin_addr.s_addr = INADDR_ANY;
  245                 sin->sin_port = 0;
  246                 error = sobind(so, m, &lwp0);
  247                 m_freem(m);
  248                 if (error)
  249                         goto bad;
  250         }
  251 #ifdef INET6
  252         if (saddr->sa_family == AF_INET6 && (nmp->nm_flag & NFSMNT_RESVPORT)) {
  253                 val = IPV6_PORTRANGE_LOW;
  254 
  255                 if ((error = so_setsockopt(NULL, so, IPPROTO_IPV6,
  256                     IPV6_PORTRANGE, &val, sizeof(val))))
  257                         goto bad;
  258                 m = m_get(M_WAIT, MT_SONAME);
  259                 MCLAIM(m, so->so_mowner);
  260                 sin6 = mtod(m, struct sockaddr_in6 *);
  261                 sin6->sin6_len = m->m_len = sizeof (struct sockaddr_in6);
  262                 sin6->sin6_family = AF_INET6;
  263                 sin6->sin6_addr = in6addr_any;
  264                 sin6->sin6_port = 0;
  265                 error = sobind(so, m, &lwp0);
  266                 m_freem(m);
  267                 if (error)
  268                         goto bad;
  269         }
  270 #endif
  271 
  272         /*
  273          * Protocols that do not require connections may be optionally left
  274          * unconnected for servers that reply from a port other than NFS_PORT.
  275          */
  276         solock(so);
  277         if (nmp->nm_flag & NFSMNT_NOCONN) {
  278                 if (nmp->nm_soflags & PR_CONNREQUIRED) {
  279                         sounlock(so);
  280                         error = ENOTCONN;
  281                         goto bad;
  282                 }
  283         } else {
  284                 error = soconnect(so, nmp->nm_nam, l);
  285                 if (error) {
  286                         sounlock(so);
  287                         goto bad;
  288                 }
  289 
  290                 /*
  291                  * Wait for the connection to complete. Cribbed from the
  292                  * connect system call but with the wait timing out so
  293                  * that interruptible mounts don't hang here for a long time.
  294                  */
  295                 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
  296                         (void)sowait(so, false, 2 * hz);
  297                         if ((so->so_state & SS_ISCONNECTING) &&
  298                             so->so_error == 0 && rep &&
  299                             (error = nfs_sigintr(nmp, rep, rep->r_lwp)) != 0){
  300                                 so->so_state &= ~SS_ISCONNECTING;
  301                                 sounlock(so);
  302                                 goto bad;
  303                         }
  304                 }
  305                 if (so->so_error) {
  306                         error = so->so_error;
  307                         so->so_error = 0;
  308                         sounlock(so);
  309                         goto bad;
  310                 }
  311         }
  312         if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
  313                 so->so_rcv.sb_timeo = (5 * hz);
  314                 so->so_snd.sb_timeo = (5 * hz);
  315         } else {
  316                 /*
  317                  * enable receive timeout to detect server crash and reconnect.
  318                  * otherwise, we can be stuck in soreceive forever.
  319                  */
  320                 so->so_rcv.sb_timeo = (5 * hz);
  321                 so->so_snd.sb_timeo = 0;
  322         }
  323         if (nmp->nm_sotype == SOCK_DGRAM) {
  324                 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
  325                 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
  326                     NFS_MAXPKTHDR) * 2;
  327         } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
  328                 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3;
  329                 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
  330                     NFS_MAXPKTHDR) * 3;
  331         } else {
  332                 sounlock(so);
  333                 if (nmp->nm_sotype != SOCK_STREAM)
  334                         panic("nfscon sotype");
  335                 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
  336                         val = 1;
  337                         so_setsockopt(NULL, so, SOL_SOCKET, SO_KEEPALIVE, &val,
  338                             sizeof(val));
  339                 }
  340                 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
  341                         val = 1;
  342                         so_setsockopt(NULL, so, IPPROTO_TCP, TCP_NODELAY, &val,
  343                             sizeof(val));
  344                 }
  345                 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
  346                     sizeof (u_int32_t)) * 3;
  347                 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
  348                     sizeof (u_int32_t)) * 3;
  349                 solock(so);
  350         }
  351         error = soreserve(so, sndreserve, rcvreserve);
  352         if (error) {
  353                 sounlock(so);
  354                 goto bad;
  355         }
  356         so->so_rcv.sb_flags |= SB_NOINTR;
  357         so->so_snd.sb_flags |= SB_NOINTR;
  358         sounlock(so);
  359 
  360         /* Initialize other non-zero congestion variables */
  361         nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
  362                 NFS_TIMEO << 3;
  363         nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
  364                 nmp->nm_sdrtt[3] = 0;
  365         nmp->nm_cwnd = NFS_MAXCWND / 2;     /* Initial send window */
  366         nmp->nm_sent = 0;
  367         nmp->nm_timeouts = 0;
  368         return (0);
  369 
  370 bad:
  371         nfs_disconnect(nmp);
  372         return (error);
  373 }
  374 
  375 /*
  376  * Reconnect routine:
  377  * Called when a connection is broken on a reliable protocol.
  378  * - clean up the old socket
  379  * - nfs_connect() again
  380  * - set R_MUSTRESEND for all outstanding requests on mount point
  381  * If this fails the mount point is DEAD!
  382  * nb: Must be called with the nfs_sndlock() set on the mount point.
  383  */
  384 int
  385 nfs_reconnect(struct nfsreq *rep)
  386 {
  387         struct nfsreq *rp;
  388         struct nfsmount *nmp = rep->r_nmp;
  389         int error;
  390 
  391         nfs_disconnect(nmp);
  392         while ((error = nfs_connect(nmp, rep, &lwp0)) != 0) {
  393                 if (error == EINTR || error == ERESTART)
  394                         return (EINTR);
  395                 kpause("nfscn2", false, hz, NULL);
  396         }
  397 
  398         /*
  399          * Loop through outstanding request list and fix up all requests
  400          * on old socket.
  401          */
  402         TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
  403                 if (rp->r_nmp == nmp) {
  404                         if ((rp->r_flags & R_MUSTRESEND) == 0)
  405                                 rp->r_flags |= R_MUSTRESEND | R_REXMITTED;
  406                         rp->r_rexmit = 0;
  407                 }
  408         }
  409         return (0);
  410 }
  411 
  412 /*
  413  * NFS disconnect. Clean up and unlink.
  414  */
  415 void
  416 nfs_disconnect(nmp)
  417         struct nfsmount *nmp;
  418 {
  419         struct socket *so;
  420         int drain = 0;
  421 
  422         if (nmp->nm_so) {
  423                 so = nmp->nm_so;
  424                 nmp->nm_so = (struct socket *)0;
  425                 solock(so);
  426                 soshutdown(so, SHUT_RDWR);
  427                 sounlock(so);
  428                 drain = (nmp->nm_iflag & NFSMNT_DISMNT) != 0;
  429                 if (drain) {
  430                         /*
  431                          * soshutdown() above should wake up the current
  432                          * listener.
  433                          * Now wake up those waiting for the receive lock, and
  434                          * wait for them to go away unhappy, to prevent *nmp
  435                          * from evaporating while they're sleeping.
  436                          */
  437                         mutex_enter(&nmp->nm_lock);
  438                         while (nmp->nm_waiters > 0) {
  439                                 cv_broadcast(&nmp->nm_rcvcv);
  440                                 cv_broadcast(&nmp->nm_sndcv);
  441                                 cv_wait(&nmp->nm_disconcv, &nmp->nm_lock);
  442                         }
  443                         mutex_exit(&nmp->nm_lock);
  444                 }
  445                 soclose(so);
  446         }
  447 #ifdef DIAGNOSTIC
  448         if (drain && (nmp->nm_waiters > 0))
  449                 panic("nfs_disconnect: waiters left after drain?");
  450 #endif
  451 }
  452 
  453 void
  454 nfs_safedisconnect(nmp)
  455         struct nfsmount *nmp;
  456 {
  457         struct nfsreq dummyreq;
  458 
  459         memset(&dummyreq, 0, sizeof(dummyreq));
  460         dummyreq.r_nmp = nmp;
  461         nfs_rcvlock(nmp, &dummyreq); /* XXX ignored error return */
  462         nfs_disconnect(nmp);
  463         nfs_rcvunlock(nmp);
  464 }
  465 
  466 /*
  467  * This is the nfs send routine. For connection based socket types, it
  468  * must be called with an nfs_sndlock() on the socket.
  469  * "rep == NULL" indicates that it has been called from a server.
  470  * For the client side:
  471  * - return EINTR if the RPC is terminated, 0 otherwise
  472  * - set R_MUSTRESEND if the send fails for any reason
  473  * - do any cleanup required by recoverable socket errors (? ? ?)
  474  * For the server side:
  475  * - return EINTR or ERESTART if interrupted by a signal
  476  * - return EPIPE if a connection is lost for connection based sockets (TCP...)
  477  * - do any cleanup required by recoverable socket errors (? ? ?)
  478  */
  479 int
  480 nfs_send(so, nam, top, rep, l)
  481         struct socket *so;
  482         struct mbuf *nam;
  483         struct mbuf *top;
  484         struct nfsreq *rep;
  485         struct lwp *l;
  486 {
  487         struct mbuf *sendnam;
  488         int error, soflags, flags;
  489 
  490         /* XXX nfs_doio()/nfs_request() calls with  rep->r_lwp == NULL */
  491         if (l == NULL && rep->r_lwp == NULL)
  492                 l = curlwp;
  493 
  494         if (rep) {
  495                 if (rep->r_flags & R_SOFTTERM) {
  496                         m_freem(top);
  497                         return (EINTR);
  498                 }
  499                 if ((so = rep->r_nmp->nm_so) == NULL) {
  500                         rep->r_flags |= R_MUSTRESEND;
  501                         m_freem(top);
  502                         return (0);
  503                 }
  504                 rep->r_flags &= ~R_MUSTRESEND;
  505                 soflags = rep->r_nmp->nm_soflags;
  506         } else
  507                 soflags = so->so_proto->pr_flags;
  508         if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
  509                 sendnam = (struct mbuf *)0;
  510         else
  511                 sendnam = nam;
  512         if (so->so_type == SOCK_SEQPACKET)
  513                 flags = MSG_EOR;
  514         else
  515                 flags = 0;
  516 
  517         error = (*so->so_send)(so, sendnam, NULL, top, NULL, flags,  l);
  518         if (error) {
  519                 if (rep) {
  520                         if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
  521                                 /*
  522                                  * We're too fast for the network/driver,
  523                                  * and UDP isn't flowcontrolled.
  524                                  * We need to resend. This is not fatal,
  525                                  * just try again.
  526                                  *
  527                                  * Could be smarter here by doing some sort
  528                                  * of a backoff, but this is rare.
  529                                  */
  530                                 rep->r_flags |= R_MUSTRESEND;
  531                         } else {
  532                                 if (error != EPIPE)
  533                                         log(LOG_INFO,
  534                                             "nfs send error %d for %s\n",
  535                                             error,
  536                                             rep->r_nmp->nm_mountp->
  537                                                     mnt_stat.f_mntfromname);
  538                                 /*
  539                                  * Deal with errors for the client side.
  540                                  */
  541                                 if (rep->r_flags & R_SOFTTERM)
  542                                         error = EINTR;
  543                                 else if (error != EMSGSIZE)
  544                                         rep->r_flags |= R_MUSTRESEND;
  545                         }
  546                 } else {
  547                         /*
  548                          * See above. This error can happen under normal
  549                          * circumstances and the log is too noisy.
  550                          * The error will still show up in nfsstat.
  551                          */
  552                         if (error != ENOBUFS || so->so_type != SOCK_DGRAM)
  553                                 log(LOG_INFO, "nfsd send error %d\n", error);
  554                 }
  555 
  556                 /*
  557                  * Handle any recoverable (soft) socket errors here. (? ? ?)
  558                  */
  559                 if (error != EINTR && error != ERESTART &&
  560                     error != EWOULDBLOCK && error != EPIPE &&
  561                     error != EMSGSIZE)
  562                         error = 0;
  563         }
  564         return (error);
  565 }
  566 
  567 #ifdef NFS
  568 /*
  569  * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
  570  * done by soreceive(), but for SOCK_STREAM we must deal with the Record
  571  * Mark and consolidate the data into a new mbuf list.
  572  * nb: Sometimes TCP passes the data up to soreceive() in long lists of
  573  *     small mbufs.
  574  * For SOCK_STREAM we must be very careful to read an entire record once
  575  * we have read any of it, even if the system call has been interrupted.
  576  */
  577 static int
  578 nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp,
  579     struct lwp *l)
  580 {
  581         struct socket *so;
  582         struct uio auio;
  583         struct iovec aio;
  584         struct mbuf *m;
  585         struct mbuf *control;
  586         u_int32_t len;
  587         struct mbuf **getnam;
  588         int error, sotype, rcvflg;
  589 
  590         /*
  591          * Set up arguments for soreceive()
  592          */
  593         *mp = (struct mbuf *)0;
  594         *aname = (struct mbuf *)0;
  595         sotype = rep->r_nmp->nm_sotype;
  596 
  597         /*
  598          * For reliable protocols, lock against other senders/receivers
  599          * in case a reconnect is necessary.
  600          * For SOCK_STREAM, first get the Record Mark to find out how much
  601          * more there is to get.
  602          * We must lock the socket against other receivers
  603          * until we have an entire rpc request/reply.
  604          */
  605         if (sotype != SOCK_DGRAM) {
  606                 error = nfs_sndlock(rep->r_nmp, rep);
  607                 if (error)
  608                         return (error);
  609 tryagain:
  610                 /*
  611                  * Check for fatal errors and resending request.
  612                  */
  613                 /*
  614                  * Ugh: If a reconnect attempt just happened, nm_so
  615                  * would have changed. NULL indicates a failed
  616                  * attempt that has essentially shut down this
  617                  * mount point.
  618                  */
  619                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
  620                         nfs_sndunlock(rep->r_nmp);
  621                         return (EINTR);
  622                 }
  623                 so = rep->r_nmp->nm_so;
  624                 if (!so) {
  625                         error = nfs_reconnect(rep);
  626                         if (error) {
  627                                 nfs_sndunlock(rep->r_nmp);
  628                                 return (error);
  629                         }
  630                         goto tryagain;
  631                 }
  632                 while (rep->r_flags & R_MUSTRESEND) {
  633                         m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
  634                         nfsstats.rpcretries++;
  635                         rep->r_rtt = 0;
  636                         rep->r_flags &= ~R_TIMING;
  637                         error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l);
  638                         if (error) {
  639                                 if (error == EINTR || error == ERESTART ||
  640                                     (error = nfs_reconnect(rep)) != 0) {
  641                                         nfs_sndunlock(rep->r_nmp);
  642                                         return (error);
  643                                 }
  644                                 goto tryagain;
  645                         }
  646                 }
  647                 nfs_sndunlock(rep->r_nmp);
  648                 if (sotype == SOCK_STREAM) {
  649                         aio.iov_base = (void *) &len;
  650                         aio.iov_len = sizeof(u_int32_t);
  651                         auio.uio_iov = &aio;
  652                         auio.uio_iovcnt = 1;
  653                         auio.uio_rw = UIO_READ;
  654                         auio.uio_offset = 0;
  655                         auio.uio_resid = sizeof(u_int32_t);
  656                         UIO_SETUP_SYSSPACE(&auio);
  657                         do {
  658                            rcvflg = MSG_WAITALL;
  659                            error = (*so->so_receive)(so, (struct mbuf **)0, &auio,
  660                                 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
  661                            if (error == EWOULDBLOCK && rep) {
  662                                 if (rep->r_flags & R_SOFTTERM)
  663                                         return (EINTR);
  664                                 /*
  665                                  * if it seems that the server died after it
  666                                  * received our request, set EPIPE so that
  667                                  * we'll reconnect and retransmit requests.
  668                                  */
  669                                 if (rep->r_rexmit >= rep->r_nmp->nm_retry) {
  670                                         nfsstats.rpctimeouts++;
  671                                         error = EPIPE;
  672                                 }
  673                            }
  674                         } while (error == EWOULDBLOCK);
  675                         if (!error && auio.uio_resid > 0) {
  676                             /*
  677                              * Don't log a 0 byte receive; it means
  678                              * that the socket has been closed, and
  679                              * can happen during normal operation
  680                              * (forcible unmount or Solaris server).
  681                              */
  682                             if (auio.uio_resid != sizeof (u_int32_t))
  683                               log(LOG_INFO,
  684                                  "short receive (%lu/%lu) from nfs server %s\n",
  685                                  (u_long)sizeof(u_int32_t) - auio.uio_resid,
  686                                  (u_long)sizeof(u_int32_t),
  687                                  rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  688                             error = EPIPE;
  689                         }
  690                         if (error)
  691                                 goto errout;
  692                         len = ntohl(len) & ~0x80000000;
  693                         /*
  694                          * This is SERIOUS! We are out of sync with the sender
  695                          * and forcing a disconnect/reconnect is all I can do.
  696                          */
  697                         if (len > NFS_MAXPACKET) {
  698                             log(LOG_ERR, "%s (%d) from nfs server %s\n",
  699                                 "impossible packet length",
  700                                 len,
  701                                 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  702                             error = EFBIG;
  703                             goto errout;
  704                         }
  705                         auio.uio_resid = len;
  706                         do {
  707                             rcvflg = MSG_WAITALL;
  708                             error =  (*so->so_receive)(so, (struct mbuf **)0,
  709                                 &auio, mp, (struct mbuf **)0, &rcvflg);
  710                         } while (error == EWOULDBLOCK || error == EINTR ||
  711                                  error == ERESTART);
  712                         if (!error && auio.uio_resid > 0) {
  713                             if (len != auio.uio_resid)
  714                               log(LOG_INFO,
  715                                 "short receive (%lu/%d) from nfs server %s\n",
  716                                 (u_long)len - auio.uio_resid, len,
  717                                 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  718                             error = EPIPE;
  719                         }
  720                 } else {
  721                         /*
  722                          * NB: Since uio_resid is big, MSG_WAITALL is ignored
  723                          * and soreceive() will return when it has either a
  724                          * control msg or a data msg.
  725                          * We have no use for control msg., but must grab them
  726                          * and then throw them away so we know what is going
  727                          * on.
  728                          */
  729                         auio.uio_resid = len = 100000000; /* Anything Big */
  730                         /* not need to setup uio_vmspace */
  731                         do {
  732                             rcvflg = 0;
  733                             error =  (*so->so_receive)(so, (struct mbuf **)0,
  734                                 &auio, mp, &control, &rcvflg);
  735                             if (control)
  736                                 m_freem(control);
  737                             if (error == EWOULDBLOCK && rep) {
  738                                 if (rep->r_flags & R_SOFTTERM)
  739                                         return (EINTR);
  740                             }
  741                         } while (error == EWOULDBLOCK ||
  742                                  (!error && *mp == NULL && control));
  743                         if ((rcvflg & MSG_EOR) == 0)
  744                                 printf("Egad!!\n");
  745                         if (!error && *mp == NULL)
  746                                 error = EPIPE;
  747                         len -= auio.uio_resid;
  748                 }
  749 errout:
  750                 if (error && error != EINTR && error != ERESTART) {
  751                         m_freem(*mp);
  752                         *mp = (struct mbuf *)0;
  753                         if (error != EPIPE)
  754                                 log(LOG_INFO,
  755                                     "receive error %d from nfs server %s\n",
  756                                     error,
  757                                  rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
  758                         error = nfs_sndlock(rep->r_nmp, rep);
  759                         if (!error)
  760                                 error = nfs_reconnect(rep);
  761                         if (!error)
  762                                 goto tryagain;
  763                         else
  764                                 nfs_sndunlock(rep->r_nmp);
  765                 }
  766         } else {
  767                 if ((so = rep->r_nmp->nm_so) == NULL)
  768                         return (EACCES);
  769                 if (so->so_state & SS_ISCONNECTED)
  770                         getnam = (struct mbuf **)0;
  771                 else
  772                         getnam = aname;
  773                 auio.uio_resid = len = 1000000;
  774                 /* not need to setup uio_vmspace */
  775                 do {
  776                         rcvflg = 0;
  777                         error =  (*so->so_receive)(so, getnam, &auio, mp,
  778                                 (struct mbuf **)0, &rcvflg);
  779                         if (error == EWOULDBLOCK &&
  780                             (rep->r_flags & R_SOFTTERM))
  781                                 return (EINTR);
  782                 } while (error == EWOULDBLOCK);
  783                 len -= auio.uio_resid;
  784                 if (!error && *mp == NULL)
  785                         error = EPIPE;
  786         }
  787         if (error) {
  788                 m_freem(*mp);
  789                 *mp = (struct mbuf *)0;
  790         }
  791         return (error);
  792 }
  793 
  794 /*
  795  * Implement receipt of reply on a socket.
  796  * We must search through the list of received datagrams matching them
  797  * with outstanding requests using the xid, until ours is found.
  798  */
  799 /* ARGSUSED */
  800 static int
  801 nfs_reply(struct nfsreq *myrep, struct lwp *lwp)
  802 {
  803         struct nfsreq *rep;
  804         struct nfsmount *nmp = myrep->r_nmp;
  805         int32_t t1;
  806         struct mbuf *mrep, *nam, *md;
  807         u_int32_t rxid, *tl;
  808         char *dpos, *cp2;
  809         int error;
  810 
  811         /*
  812          * Loop around until we get our own reply
  813          */
  814         for (;;) {
  815                 /*
  816                  * Lock against other receivers so that I don't get stuck in
  817                  * sbwait() after someone else has received my reply for me.
  818                  * Also necessary for connection based protocols to avoid
  819                  * race conditions during a reconnect.
  820                  */
  821                 error = nfs_rcvlock(nmp, myrep);
  822                 if (error == EALREADY)
  823                         return (0);
  824                 if (error)
  825                         return (error);
  826                 /*
  827                  * Get the next Rpc reply off the socket
  828                  */
  829 
  830                 mutex_enter(&nmp->nm_lock);
  831                 nmp->nm_waiters++;
  832                 mutex_exit(&nmp->nm_lock);
  833 
  834                 error = nfs_receive(myrep, &nam, &mrep, lwp);
  835 
  836                 mutex_enter(&nmp->nm_lock);
  837                 nmp->nm_waiters--;
  838                 cv_signal(&nmp->nm_disconcv);
  839                 mutex_exit(&nmp->nm_lock);
  840 
  841                 if (error) {
  842                         nfs_rcvunlock(nmp);
  843 
  844                         if (nmp->nm_iflag & NFSMNT_DISMNT) {
  845                                 /*
  846                                  * Oops, we're going away now..
  847                                  */
  848                                 return error;
  849                         }
  850                         /*
  851                          * Ignore routing errors on connectionless protocols? ?
  852                          */
  853                         if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
  854                                 nmp->nm_so->so_error = 0;
  855 #ifdef DEBUG
  856                                 if (ratecheck(&nfs_reply_last_err_time,
  857                                     &nfs_err_interval))
  858                                         printf("%s: ignoring error %d\n",
  859                                                __func__, error);
  860 #endif
  861                                 continue;
  862                         }
  863                         return (error);
  864                 }
  865                 if (nam)
  866                         m_freem(nam);
  867 
  868                 /*
  869                  * Get the xid and check that it is an rpc reply
  870                  */
  871                 md = mrep;
  872                 dpos = mtod(md, void *);
  873                 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED);
  874                 rxid = *tl++;
  875                 if (*tl != rpc_reply) {
  876                         nfsstats.rpcinvalid++;
  877                         m_freem(mrep);
  878 nfsmout:
  879                         nfs_rcvunlock(nmp);
  880                         continue;
  881                 }
  882 
  883                 /*
  884                  * Loop through the request list to match up the reply
  885                  * Iff no match, just drop the datagram
  886                  */
  887                 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
  888                         if (rep->r_mrep == NULL && rxid == rep->r_xid) {
  889                                 /* Found it.. */
  890                                 rep->r_mrep = mrep;
  891                                 rep->r_md = md;
  892                                 rep->r_dpos = dpos;
  893                                 if (nfsrtton) {
  894                                         struct rttl *rt;
  895 
  896                                         rt = &nfsrtt.rttl[nfsrtt.pos];
  897                                         rt->proc = rep->r_procnum;
  898                                         rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
  899                                         rt->sent = nmp->nm_sent;
  900                                         rt->cwnd = nmp->nm_cwnd;
  901                                         rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
  902                                         rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
  903                                         rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx;
  904                                         getmicrotime(&rt->tstamp);
  905                                         if (rep->r_flags & R_TIMING)
  906                                                 rt->rtt = rep->r_rtt;
  907                                         else
  908                                                 rt->rtt = 1000000;
  909                                         nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
  910                                 }
  911                                 /*
  912                                  * Update congestion window.
  913                                  * Do the additive increase of
  914                                  * one rpc/rtt.
  915                                  */
  916                                 if (nmp->nm_cwnd <= nmp->nm_sent) {
  917                                         nmp->nm_cwnd +=
  918                                            (NFS_CWNDSCALE * NFS_CWNDSCALE +
  919                                            (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
  920                                         if (nmp->nm_cwnd > NFS_MAXCWND)
  921                                                 nmp->nm_cwnd = NFS_MAXCWND;
  922                                 }
  923                                 rep->r_flags &= ~R_SENT;
  924                                 nmp->nm_sent -= NFS_CWNDSCALE;
  925                                 /*
  926                                  * Update rtt using a gain of 0.125 on the mean
  927                                  * and a gain of 0.25 on the deviation.
  928                                  */
  929                                 if (rep->r_flags & R_TIMING) {
  930                                         /*
  931                                          * Since the timer resolution of
  932                                          * NFS_HZ is so course, it can often
  933                                          * result in r_rtt == 0. Since
  934                                          * r_rtt == N means that the actual
  935                                          * rtt is between N+dt and N+2-dt ticks,
  936                                          * add 1.
  937                                          */
  938                                         t1 = rep->r_rtt + 1;
  939                                         t1 -= (NFS_SRTT(rep) >> 3);
  940                                         NFS_SRTT(rep) += t1;
  941                                         if (t1 < 0)
  942                                                 t1 = -t1;
  943                                         t1 -= (NFS_SDRTT(rep) >> 2);
  944                                         NFS_SDRTT(rep) += t1;
  945                                 }
  946                                 nmp->nm_timeouts = 0;
  947                                 break;
  948                         }
  949                 }
  950                 nfs_rcvunlock(nmp);
  951                 /*
  952                  * If not matched to a request, drop it.
  953                  * If it's mine, get out.
  954                  */
  955                 if (rep == 0) {
  956                         nfsstats.rpcunexpected++;
  957                         m_freem(mrep);
  958                 } else if (rep == myrep) {
  959                         if (rep->r_mrep == NULL)
  960                                 panic("nfsreply nil");
  961                         return (0);
  962                 }
  963         }
  964 }
  965 
  966 /*
  967  * nfs_request - goes something like this
  968  *      - fill in request struct
  969  *      - links it into list
  970  *      - calls nfs_send() for first transmit
  971  *      - calls nfs_receive() to get reply
  972  *      - break down rpc header and return with nfs reply pointed to
  973  *        by mrep or error
  974  * nb: always frees up mreq mbuf list
  975  */
  976 int
  977 nfs_request(np, mrest, procnum, lwp, cred, mrp, mdp, dposp, rexmitp)
  978         struct nfsnode *np;
  979         struct mbuf *mrest;
  980         int procnum;
  981         struct lwp *lwp;
  982         kauth_cred_t cred;
  983         struct mbuf **mrp;
  984         struct mbuf **mdp;
  985         char **dposp;
  986         int *rexmitp;
  987 {
  988         struct mbuf *m, *mrep;
  989         struct nfsreq *rep;
  990         u_int32_t *tl;
  991         int i;
  992         struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount);
  993         struct mbuf *md, *mheadend;
  994         char nickv[RPCX_NICKVERF];
  995         time_t waituntil;
  996         char *dpos, *cp2;
  997         int t1, s, error = 0, mrest_len, auth_len, auth_type;
  998         int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0;
  999         int verf_len, verf_type;
 1000         u_int32_t xid;
 1001         char *auth_str, *verf_str;
 1002         NFSKERBKEY_T key;               /* save session key */
 1003         kauth_cred_t acred;
 1004         struct mbuf *mrest_backup = NULL;
 1005         kauth_cred_t origcred = NULL; /* XXX: gcc */
 1006         bool retry_cred = true;
 1007         bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0;
 1008 
 1009         if (rexmitp != NULL)
 1010                 *rexmitp = 0;
 1011 
 1012         acred = kauth_cred_alloc();
 1013 
 1014 tryagain_cred:
 1015         KASSERT(cred != NULL);
 1016         rep = kmem_alloc(sizeof(*rep), KM_SLEEP);
 1017         rep->r_nmp = nmp;
 1018         KASSERT(lwp == NULL || lwp == curlwp);
 1019         rep->r_lwp = lwp;
 1020         rep->r_procnum = procnum;
 1021         i = 0;
 1022         m = mrest;
 1023         while (m) {
 1024                 i += m->m_len;
 1025                 m = m->m_next;
 1026         }
 1027         mrest_len = i;
 1028 
 1029         /*
 1030          * Get the RPC header with authorization.
 1031          */
 1032 kerbauth:
 1033         verf_str = auth_str = (char *)0;
 1034         if (nmp->nm_flag & NFSMNT_KERB) {
 1035                 verf_str = nickv;
 1036                 verf_len = sizeof (nickv);
 1037                 auth_type = RPCAUTH_KERB4;
 1038                 memset((void *)key, 0, sizeof (key));
 1039                 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
 1040                         &auth_len, verf_str, verf_len)) {
 1041                         error = nfs_getauth(nmp, rep, cred, &auth_str,
 1042                                 &auth_len, verf_str, &verf_len, key);
 1043                         if (error) {
 1044                                 kmem_free(rep, sizeof(*rep));
 1045                                 m_freem(mrest);
 1046                                 KASSERT(kauth_cred_getrefcnt(acred) == 1);
 1047                                 kauth_cred_free(acred);
 1048                                 return (error);
 1049                         }
 1050                 }
 1051                 retry_cred = false;
 1052         } else {
 1053                 /* AUTH_UNIX */
 1054                 uid_t uid;
 1055                 gid_t gid;
 1056 
 1057                 /*
 1058                  * on the most unix filesystems, permission checks are
 1059                  * done when the file is open(2)'ed.
 1060                  * ie. once a file is successfully open'ed,
 1061                  * following i/o operations never fail with EACCES.
 1062                  * we try to follow the semantics as far as possible.
 1063                  *
 1064                  * note that we expect that the nfs server always grant
 1065                  * accesses by the file's owner.
 1066                  */
 1067                 origcred = cred;
 1068                 switch (procnum) {
 1069                 case NFSPROC_READ:
 1070                 case NFSPROC_WRITE:
 1071                 case NFSPROC_COMMIT:
 1072                         uid = np->n_vattr->va_uid;
 1073                         gid = np->n_vattr->va_gid;
 1074                         if (kauth_cred_geteuid(cred) == uid &&
 1075                             kauth_cred_getegid(cred) == gid) {
 1076                                 retry_cred = false;
 1077                                 break;
 1078                         }
 1079                         if (use_opencred)
 1080                                 break;
 1081                         kauth_cred_setuid(acred, uid);
 1082                         kauth_cred_seteuid(acred, uid);
 1083                         kauth_cred_setsvuid(acred, uid);
 1084                         kauth_cred_setgid(acred, gid);
 1085                         kauth_cred_setegid(acred, gid);
 1086                         kauth_cred_setsvgid(acred, gid);
 1087                         cred = acred;
 1088                         break;
 1089                 default:
 1090                         retry_cred = false;
 1091                         break;
 1092                 }
 1093                 /*
 1094                  * backup mbuf chain if we can need it later to retry.
 1095                  *
 1096                  * XXX maybe we can keep a direct reference to
 1097                  * mrest without doing m_copym, but it's ...ugly.
 1098                  */
 1099                 if (retry_cred)
 1100                         mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT);
 1101                 auth_type = RPCAUTH_UNIX;
 1102                 /* XXX elad - ngroups */
 1103                 auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ?
 1104                         nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) +
 1105                         5 * NFSX_UNSIGNED;
 1106         }
 1107         m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
 1108              auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
 1109         if (auth_str)
 1110                 free(auth_str, M_TEMP);
 1111 
 1112         /*
 1113          * For stream protocols, insert a Sun RPC Record Mark.
 1114          */
 1115         if (nmp->nm_sotype == SOCK_STREAM) {
 1116                 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
 1117                 *mtod(m, u_int32_t *) = htonl(0x80000000 |
 1118                          (m->m_pkthdr.len - NFSX_UNSIGNED));
 1119         }
 1120         rep->r_mreq = m;
 1121         rep->r_xid = xid;
 1122 tryagain:
 1123         if (nmp->nm_flag & NFSMNT_SOFT)
 1124                 rep->r_retry = nmp->nm_retry;
 1125         else
 1126                 rep->r_retry = NFS_MAXREXMIT + 1;       /* past clip limit */
 1127         rep->r_rtt = rep->r_rexmit = 0;
 1128         if (proct[procnum] > 0)
 1129                 rep->r_flags = R_TIMING;
 1130         else
 1131                 rep->r_flags = 0;
 1132         rep->r_mrep = NULL;
 1133 
 1134         /*
 1135          * Do the client side RPC.
 1136          */
 1137         nfsstats.rpcrequests++;
 1138         /*
 1139          * Chain request into list of outstanding requests. Be sure
 1140          * to put it LAST so timer finds oldest requests first.
 1141          */
 1142         s = splsoftnet();
 1143         TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
 1144         nfs_timer_start();
 1145 
 1146         /*
 1147          * If backing off another request or avoiding congestion, don't
 1148          * send this one now but let timer do it. If not timing a request,
 1149          * do it now.
 1150          */
 1151         if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
 1152             (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) {
 1153                 splx(s);
 1154                 if (nmp->nm_soflags & PR_CONNREQUIRED)
 1155                         error = nfs_sndlock(nmp, rep);
 1156                 if (!error) {
 1157                         m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
 1158                         error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp);
 1159                         if (nmp->nm_soflags & PR_CONNREQUIRED)
 1160                                 nfs_sndunlock(nmp);
 1161                 }
 1162                 s = splsoftnet();
 1163                 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
 1164                         if ((rep->r_flags & R_SENT) == 0) {
 1165                                 nmp->nm_sent += NFS_CWNDSCALE;
 1166                                 rep->r_flags |= R_SENT;
 1167                         }
 1168                 }
 1169                 splx(s);
 1170         } else {
 1171                 splx(s);
 1172                 rep->r_rtt = -1;
 1173         }
 1174 
 1175         /*
 1176          * Wait for the reply from our send or the timer's.
 1177          */
 1178         if (!error || error == EPIPE || error == EWOULDBLOCK)
 1179                 error = nfs_reply(rep, lwp);
 1180 
 1181         /*
 1182          * RPC done, unlink the request.
 1183          */
 1184         s = splsoftnet();
 1185         TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
 1186 
 1187         /*
 1188          * Decrement the outstanding request count.
 1189          */
 1190         if (rep->r_flags & R_SENT) {
 1191                 rep->r_flags &= ~R_SENT;        /* paranoia */
 1192                 nmp->nm_sent -= NFS_CWNDSCALE;
 1193         }
 1194         splx(s);
 1195 
 1196         if (rexmitp != NULL) {
 1197                 int rexmit;
 1198 
 1199                 if (nmp->nm_sotype != SOCK_DGRAM)
 1200                         rexmit = (rep->r_flags & R_REXMITTED) != 0;
 1201                 else
 1202                         rexmit = rep->r_rexmit;
 1203                 *rexmitp = rexmit;
 1204         }
 1205 
 1206         /*
 1207          * If there was a successful reply and a tprintf msg.
 1208          * tprintf a response.
 1209          */
 1210         if (!error && (rep->r_flags & R_TPRINTFMSG))
 1211                 nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname,
 1212                     "is alive again");
 1213         mrep = rep->r_mrep;
 1214         md = rep->r_md;
 1215         dpos = rep->r_dpos;
 1216         if (error)
 1217                 goto nfsmout;
 1218 
 1219         /*
 1220          * break down the rpc header and check if ok
 1221          */
 1222         nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 1223         if (*tl++ == rpc_msgdenied) {
 1224                 if (*tl == rpc_mismatch)
 1225                         error = EOPNOTSUPP;
 1226                 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
 1227                         if (!failed_auth) {
 1228                                 failed_auth++;
 1229                                 mheadend->m_next = (struct mbuf *)0;
 1230                                 m_freem(mrep);
 1231                                 m_freem(rep->r_mreq);
 1232                                 goto kerbauth;
 1233                         } else
 1234                                 error = EAUTH;
 1235                 } else
 1236                         error = EACCES;
 1237                 m_freem(mrep);
 1238                 goto nfsmout;
 1239         }
 1240 
 1241         /*
 1242          * Grab any Kerberos verifier, otherwise just throw it away.
 1243          */
 1244         verf_type = fxdr_unsigned(int, *tl++);
 1245         i = fxdr_unsigned(int32_t, *tl);
 1246         if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
 1247                 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
 1248                 if (error)
 1249                         goto nfsmout;
 1250         } else if (i > 0)
 1251                 nfsm_adv(nfsm_rndup(i));
 1252         nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 1253         /* 0 == ok */
 1254         if (*tl == 0) {
 1255                 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 1256                 if (*tl != 0) {
 1257                         error = fxdr_unsigned(int, *tl);
 1258                         switch (error) {
 1259                         case NFSERR_PERM:
 1260                                 error = EPERM;
 1261                                 break;
 1262 
 1263                         case NFSERR_NOENT:
 1264                                 error = ENOENT;
 1265                                 break;
 1266 
 1267                         case NFSERR_IO:
 1268                                 error = EIO;
 1269                                 break;
 1270 
 1271                         case NFSERR_NXIO:
 1272                                 error = ENXIO;
 1273                                 break;
 1274 
 1275                         case NFSERR_ACCES:
 1276                                 error = EACCES;
 1277                                 if (!retry_cred)
 1278                                         break;
 1279                                 m_freem(mrep);
 1280                                 m_freem(rep->r_mreq);
 1281                                 kmem_free(rep, sizeof(*rep));
 1282                                 use_opencred = !use_opencred;
 1283                                 if (mrest_backup == NULL) {
 1284                                         /* m_copym failure */
 1285                                         KASSERT(
 1286                                             kauth_cred_getrefcnt(acred) == 1);
 1287                                         kauth_cred_free(acred);
 1288                                         return ENOMEM;
 1289                                 }
 1290                                 mrest = mrest_backup;
 1291                                 mrest_backup = NULL;
 1292                                 cred = origcred;
 1293                                 error = 0;
 1294                                 retry_cred = false;
 1295                                 goto tryagain_cred;
 1296 
 1297                         case NFSERR_EXIST:
 1298                                 error = EEXIST;
 1299                                 break;
 1300 
 1301                         case NFSERR_XDEV:
 1302                                 error = EXDEV;
 1303                                 break;
 1304 
 1305                         case NFSERR_NODEV:
 1306                                 error = ENODEV;
 1307                                 break;
 1308 
 1309                         case NFSERR_NOTDIR:
 1310                                 error = ENOTDIR;
 1311                                 break;
 1312 
 1313                         case NFSERR_ISDIR:
 1314                                 error = EISDIR;
 1315                                 break;
 1316 
 1317                         case NFSERR_INVAL:
 1318                                 error = EINVAL;
 1319                                 break;
 1320 
 1321                         case NFSERR_FBIG:
 1322                                 error = EFBIG;
 1323                                 break;
 1324 
 1325                         case NFSERR_NOSPC:
 1326                                 error = ENOSPC;
 1327                                 break;
 1328 
 1329                         case NFSERR_ROFS:
 1330                                 error = EROFS;
 1331                                 break;
 1332 
 1333                         case NFSERR_MLINK:
 1334                                 error = EMLINK;
 1335                                 break;
 1336 
 1337                         case NFSERR_TIMEDOUT:
 1338                                 error = ETIMEDOUT;
 1339                                 break;
 1340 
 1341                         case NFSERR_NAMETOL:
 1342                                 error = ENAMETOOLONG;
 1343                                 break;
 1344 
 1345                         case NFSERR_NOTEMPTY:
 1346                                 error = ENOTEMPTY;
 1347                                 break;
 1348 
 1349                         case NFSERR_DQUOT:
 1350                                 error = EDQUOT;
 1351                                 break;
 1352 
 1353                         case NFSERR_STALE:
 1354                                 /*
 1355                                  * If the File Handle was stale, invalidate the
 1356                                  * lookup cache, just in case.
 1357                                  */
 1358                                 error = ESTALE;
 1359                                 cache_purge(NFSTOV(np));
 1360                                 break;
 1361 
 1362                         case NFSERR_REMOTE:
 1363                                 error = EREMOTE;
 1364                                 break;
 1365 
 1366                         case NFSERR_WFLUSH:
 1367                         case NFSERR_BADHANDLE:
 1368                         case NFSERR_NOT_SYNC:
 1369                         case NFSERR_BAD_COOKIE:
 1370                                 error = EINVAL;
 1371                                 break;
 1372 
 1373                         case NFSERR_NOTSUPP:
 1374                                 error = ENOTSUP;
 1375                                 break;
 1376 
 1377                         case NFSERR_TOOSMALL:
 1378                         case NFSERR_SERVERFAULT:
 1379                         case NFSERR_BADTYPE:
 1380                                 error = EINVAL;
 1381                                 break;
 1382 
 1383                         case NFSERR_TRYLATER:
 1384                                 if ((nmp->nm_flag & NFSMNT_NFSV3) == 0)
 1385                                         break;
 1386                                 m_freem(mrep);
 1387                                 error = 0;
 1388                                 waituntil = time_second + trylater_delay;
 1389                                 while (time_second < waituntil) {
 1390                                         kpause("nfstrylater", false, hz, NULL);
 1391                                 }
 1392                                 trylater_delay *= NFS_TRYLATERDELMUL;
 1393                                 if (trylater_delay > NFS_TRYLATERDELMAX)
 1394                                         trylater_delay = NFS_TRYLATERDELMAX;
 1395                                 /*
 1396                                  * RFC1813:
 1397                                  * The client should wait and then try
 1398                                  * the request with a new RPC transaction ID.
 1399                                  */
 1400                                 nfs_renewxid(rep);
 1401                                 goto tryagain;
 1402 
 1403                         default:
 1404 #ifdef DIAGNOSTIC
 1405                                 printf("Invalid rpc error code %d\n", error);
 1406 #endif
 1407                                 error = EINVAL;
 1408                                 break;
 1409                         }
 1410 
 1411                         if (nmp->nm_flag & NFSMNT_NFSV3) {
 1412                                 *mrp = mrep;
 1413                                 *mdp = md;
 1414                                 *dposp = dpos;
 1415                                 error |= NFSERR_RETERR;
 1416                         } else
 1417                                 m_freem(mrep);
 1418                         goto nfsmout;
 1419                 }
 1420 
 1421                 /*
 1422                  * note which credential worked to minimize number of retries.
 1423                  */
 1424                 if (use_opencred)
 1425                         np->n_flag |= NUSEOPENCRED;
 1426                 else
 1427                         np->n_flag &= ~NUSEOPENCRED;
 1428 
 1429                 *mrp = mrep;
 1430                 *mdp = md;
 1431                 *dposp = dpos;
 1432 
 1433                 KASSERT(error == 0);
 1434                 goto nfsmout;
 1435         }
 1436         m_freem(mrep);
 1437         error = EPROTONOSUPPORT;
 1438 nfsmout:
 1439         KASSERT(kauth_cred_getrefcnt(acred) == 1);
 1440         kauth_cred_free(acred);
 1441         m_freem(rep->r_mreq);
 1442         kmem_free(rep, sizeof(*rep));
 1443         m_freem(mrest_backup);
 1444         return (error);
 1445 }
 1446 #endif /* NFS */
 1447 
 1448 /*
 1449  * Generate the rpc reply header
 1450  * siz arg. is used to decide if adding a cluster is worthwhile
 1451  */
 1452 int
 1453 nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
 1454         int siz;
 1455         struct nfsrv_descript *nd;
 1456         struct nfssvc_sock *slp;
 1457         int err;
 1458         int cache;
 1459         u_quad_t *frev;
 1460         struct mbuf **mrq;
 1461         struct mbuf **mbp;
 1462         char **bposp;
 1463 {
 1464         u_int32_t *tl;
 1465         struct mbuf *mreq;
 1466         char *bpos;
 1467         struct mbuf *mb;
 1468 
 1469         mreq = m_gethdr(M_WAIT, MT_DATA);
 1470         MCLAIM(mreq, &nfs_mowner);
 1471         mb = mreq;
 1472         /*
 1473          * If this is a big reply, use a cluster else
 1474          * try and leave leading space for the lower level headers.
 1475          */
 1476         siz += RPC_REPLYSIZ;
 1477         if (siz >= max_datalen) {
 1478                 m_clget(mreq, M_WAIT);
 1479         } else
 1480                 mreq->m_data += max_hdr;
 1481         tl = mtod(mreq, u_int32_t *);
 1482         mreq->m_len = 6 * NFSX_UNSIGNED;
 1483         bpos = ((char *)tl) + mreq->m_len;
 1484         *tl++ = txdr_unsigned(nd->nd_retxid);
 1485         *tl++ = rpc_reply;
 1486         if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
 1487                 *tl++ = rpc_msgdenied;
 1488                 if (err & NFSERR_AUTHERR) {
 1489                         *tl++ = rpc_autherr;
 1490                         *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
 1491                         mreq->m_len -= NFSX_UNSIGNED;
 1492                         bpos -= NFSX_UNSIGNED;
 1493                 } else {
 1494                         *tl++ = rpc_mismatch;
 1495                         *tl++ = txdr_unsigned(RPC_VER2);
 1496                         *tl = txdr_unsigned(RPC_VER2);
 1497                 }
 1498         } else {
 1499                 *tl++ = rpc_msgaccepted;
 1500 
 1501                 /*
 1502                  * For Kerberos authentication, we must send the nickname
 1503                  * verifier back, otherwise just RPCAUTH_NULL.
 1504                  */
 1505                 if (nd->nd_flag & ND_KERBFULL) {
 1506                         struct nfsuid *nuidp;
 1507                         struct timeval ktvin, ktvout;
 1508 
 1509                         memset(&ktvout, 0, sizeof ktvout);      /* XXX gcc */
 1510 
 1511                         LIST_FOREACH(nuidp,
 1512                             NUIDHASH(slp, kauth_cred_geteuid(nd->nd_cr)),
 1513                             nu_hash) {
 1514                                 if (kauth_cred_geteuid(nuidp->nu_cr) ==
 1515                                 kauth_cred_geteuid(nd->nd_cr) &&
 1516                                     (!nd->nd_nam2 || netaddr_match(
 1517                                     NU_NETFAM(nuidp), &nuidp->nu_haddr,
 1518                                     nd->nd_nam2)))
 1519                                         break;
 1520                         }
 1521                         if (nuidp) {
 1522                                 ktvin.tv_sec =
 1523                                     txdr_unsigned(nuidp->nu_timestamp.tv_sec
 1524                                         - 1);
 1525                                 ktvin.tv_usec =
 1526                                     txdr_unsigned(nuidp->nu_timestamp.tv_usec);
 1527 
 1528                                 /*
 1529                                  * Encrypt the timestamp in ecb mode using the
 1530                                  * session key.
 1531                                  */
 1532 #ifdef NFSKERB
 1533                                 XXX
 1534 #endif
 1535 
 1536                                 *tl++ = rpc_auth_kerb;
 1537                                 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
 1538                                 *tl = ktvout.tv_sec;
 1539                                 nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 1540                                 *tl++ = ktvout.tv_usec;
 1541                                 *tl++ = txdr_unsigned(
 1542                                     kauth_cred_geteuid(nuidp->nu_cr));
 1543                         } else {
 1544                                 *tl++ = 0;
 1545                                 *tl++ = 0;
 1546                         }
 1547                 } else {
 1548                         *tl++ = 0;
 1549                         *tl++ = 0;
 1550                 }
 1551                 switch (err) {
 1552                 case EPROGUNAVAIL:
 1553                         *tl = txdr_unsigned(RPC_PROGUNAVAIL);
 1554                         break;
 1555                 case EPROGMISMATCH:
 1556                         *tl = txdr_unsigned(RPC_PROGMISMATCH);
 1557                         nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 1558                         *tl++ = txdr_unsigned(2);
 1559                         *tl = txdr_unsigned(3);
 1560                         break;
 1561                 case EPROCUNAVAIL:
 1562                         *tl = txdr_unsigned(RPC_PROCUNAVAIL);
 1563                         break;
 1564                 case EBADRPC:
 1565                         *tl = txdr_unsigned(RPC_GARBAGE);
 1566                         break;
 1567                 default:
 1568                         *tl = 0;
 1569                         if (err != NFSERR_RETVOID) {
 1570                                 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 1571                                 if (err)
 1572                                     *tl = txdr_unsigned(nfsrv_errmap(nd, err));
 1573                                 else
 1574                                     *tl = 0;
 1575                         }
 1576                         break;
 1577                 };
 1578         }
 1579 
 1580         if (mrq != NULL)
 1581                 *mrq = mreq;
 1582         *mbp = mb;
 1583         *bposp = bpos;
 1584         if (err != 0 && err != NFSERR_RETVOID)
 1585                 nfsstats.srvrpc_errs++;
 1586         return (0);
 1587 }
 1588 
 1589 static void
 1590 nfs_timer_schedule(void)
 1591 {
 1592 
 1593         callout_schedule(&nfs_timer_ch, nfs_ticks);
 1594 }
 1595 
 1596 void
 1597 nfs_timer_start(void)
 1598 {
 1599 
 1600         if (callout_pending(&nfs_timer_ch))
 1601                 return;
 1602 
 1603         nfs_timer_start_ev.ev_count++;
 1604         nfs_timer_schedule();
 1605 }
 1606 
 1607 void
 1608 nfs_timer_init(void)
 1609 {
 1610 
 1611         callout_init(&nfs_timer_ch, 0);
 1612         callout_setfunc(&nfs_timer_ch, nfs_timer, NULL);
 1613         evcnt_attach_dynamic(&nfs_timer_ev, EVCNT_TYPE_MISC, NULL,
 1614             "nfs", "timer");
 1615         evcnt_attach_dynamic(&nfs_timer_start_ev, EVCNT_TYPE_MISC, NULL,
 1616             "nfs", "timer start");
 1617         evcnt_attach_dynamic(&nfs_timer_stop_ev, EVCNT_TYPE_MISC, NULL,
 1618             "nfs", "timer stop");
 1619 }
 1620 
 1621 /*
 1622  * Nfs timer routine
 1623  * Scan the nfsreq list and retranmit any requests that have timed out
 1624  * To avoid retransmission attempts on STREAM sockets (in the future) make
 1625  * sure to set the r_retry field to 0 (implies nm_retry == 0).
 1626  */
 1627 void
 1628 nfs_timer(void *arg)
 1629 {
 1630         struct nfsreq *rep;
 1631         struct mbuf *m;
 1632         struct socket *so;
 1633         struct nfsmount *nmp;
 1634         int timeo;
 1635         int error;
 1636         bool more = false;
 1637 #ifdef NFSSERVER
 1638         struct timeval tv;
 1639         struct nfssvc_sock *slp;
 1640         u_quad_t cur_usec;
 1641 #endif
 1642 
 1643         nfs_timer_ev.ev_count++;
 1644 
 1645         mutex_enter(softnet_lock);      /* XXX PR 40491 */
 1646         TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
 1647                 more = true;
 1648                 nmp = rep->r_nmp;
 1649                 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
 1650                         continue;
 1651                 if (nfs_sigintr(nmp, rep, rep->r_lwp)) {
 1652                         rep->r_flags |= R_SOFTTERM;
 1653                         continue;
 1654                 }
 1655                 if (rep->r_rtt >= 0) {
 1656                         rep->r_rtt++;
 1657                         if (nmp->nm_flag & NFSMNT_DUMBTIMR)
 1658                                 timeo = nmp->nm_timeo;
 1659                         else
 1660                                 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
 1661                         if (nmp->nm_timeouts > 0)
 1662                                 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
 1663                         if (timeo > NFS_MAXTIMEO)
 1664                                 timeo = NFS_MAXTIMEO;
 1665                         if (rep->r_rtt <= timeo)
 1666                                 continue;
 1667                         if (nmp->nm_timeouts <
 1668                             (sizeof(nfs_backoff) / sizeof(nfs_backoff[0])))
 1669                                 nmp->nm_timeouts++;
 1670                 }
 1671                 /*
 1672                  * Check for server not responding
 1673                  */
 1674                 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
 1675                      rep->r_rexmit > nmp->nm_deadthresh) {
 1676                         nfs_msg(rep->r_lwp,
 1677                             nmp->nm_mountp->mnt_stat.f_mntfromname,
 1678                             "not responding");
 1679                         rep->r_flags |= R_TPRINTFMSG;
 1680                 }
 1681                 if (rep->r_rexmit >= rep->r_retry) {    /* too many */
 1682                         nfsstats.rpctimeouts++;
 1683                         rep->r_flags |= R_SOFTTERM;
 1684                         continue;
 1685                 }
 1686                 if (nmp->nm_sotype != SOCK_DGRAM) {
 1687                         if (++rep->r_rexmit > NFS_MAXREXMIT)
 1688                                 rep->r_rexmit = NFS_MAXREXMIT;
 1689                         continue;
 1690                 }
 1691                 if ((so = nmp->nm_so) == NULL)
 1692                         continue;
 1693 
 1694                 /*
 1695                  * If there is enough space and the window allows..
 1696                  *      Resend it
 1697                  * Set r_rtt to -1 in case we fail to send it now.
 1698                  */
 1699                 /* solock(so);          XXX PR 40491 */
 1700                 rep->r_rtt = -1;
 1701                 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
 1702                    ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
 1703                     (rep->r_flags & R_SENT) ||
 1704                     nmp->nm_sent < nmp->nm_cwnd) &&
 1705                    (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
 1706                         if (so->so_state & SS_ISCONNECTED)
 1707                             error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
 1708                             (struct mbuf *)0, (struct mbuf *)0, (struct lwp *)0);
 1709                         else
 1710                             error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
 1711                             nmp->nm_nam, (struct mbuf *)0, (struct lwp *)0);
 1712                         if (error) {
 1713                                 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
 1714 #ifdef DEBUG
 1715                                         if (ratecheck(&nfs_timer_last_err_time,
 1716                                             &nfs_err_interval))
 1717                                                 printf("%s: ignoring error "
 1718                                                        "%d\n", __func__, error);
 1719 #endif
 1720                                         so->so_error = 0;
 1721                                 }
 1722                         } else {
 1723                                 /*
 1724                                  * Iff first send, start timing
 1725                                  * else turn timing off, backoff timer
 1726                                  * and divide congestion window by 2.
 1727                                  */
 1728                                 if (rep->r_flags & R_SENT) {
 1729                                         rep->r_flags &= ~R_TIMING;
 1730                                         if (++rep->r_rexmit > NFS_MAXREXMIT)
 1731                                                 rep->r_rexmit = NFS_MAXREXMIT;
 1732                                         nmp->nm_cwnd >>= 1;
 1733                                         if (nmp->nm_cwnd < NFS_CWNDSCALE)
 1734                                                 nmp->nm_cwnd = NFS_CWNDSCALE;
 1735                                         nfsstats.rpcretries++;
 1736                                 } else {
 1737                                         rep->r_flags |= R_SENT;
 1738                                         nmp->nm_sent += NFS_CWNDSCALE;
 1739                                 }
 1740                                 rep->r_rtt = 0;
 1741                         }
 1742                 }
 1743                 /* sounlock(so);        XXX PR 40491 */
 1744         }
 1745         mutex_exit(softnet_lock);       /* XXX PR 40491 */
 1746 
 1747 #ifdef NFSSERVER
 1748         /*
 1749          * Scan the write gathering queues for writes that need to be
 1750          * completed now.
 1751          */
 1752         getmicrotime(&tv);
 1753         cur_usec = (u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec;
 1754         mutex_enter(&nfsd_lock);
 1755         TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
 1756                 struct nfsrv_descript *nd;
 1757 
 1758                 nd = LIST_FIRST(&slp->ns_tq);
 1759                 if (nd != NULL) {
 1760                         if (nd->nd_time <= cur_usec) {
 1761                                 nfsrv_wakenfsd_locked(slp);
 1762                         }
 1763                         more = true;
 1764                 }
 1765         }
 1766         mutex_exit(&nfsd_lock);
 1767 #endif /* NFSSERVER */
 1768         if (more) {
 1769                 nfs_timer_schedule();
 1770         } else {
 1771                 nfs_timer_stop_ev.ev_count++;
 1772         }
 1773 }
 1774 
 1775 /*
 1776  * Test for a termination condition pending on the process.
 1777  * This is used for NFSMNT_INT mounts.
 1778  */
 1779 int
 1780 nfs_sigintr(nmp, rep, l)
 1781         struct nfsmount *nmp;
 1782         struct nfsreq *rep;
 1783         struct lwp *l;
 1784 {
 1785         sigset_t ss;
 1786 
 1787         if (rep && (rep->r_flags & R_SOFTTERM))
 1788                 return (EINTR);
 1789         if (!(nmp->nm_flag & NFSMNT_INT))
 1790                 return (0);
 1791         if (l) {
 1792                 sigpending1(l, &ss);
 1793 #if 0
 1794                 sigminusset(&l->l_proc->p_sigctx.ps_sigignore, &ss);
 1795 #endif
 1796                 if (sigismember(&ss, SIGINT) || sigismember(&ss, SIGTERM) ||
 1797                     sigismember(&ss, SIGKILL) || sigismember(&ss, SIGHUP) ||
 1798                     sigismember(&ss, SIGQUIT))
 1799                         return (EINTR);
 1800         }
 1801         return (0);
 1802 }
 1803 
 1804 #ifdef NFS
 1805 /*
 1806  * Lock a socket against others.
 1807  * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
 1808  * and also to avoid race conditions between the processes with nfs requests
 1809  * in progress when a reconnect is necessary.
 1810  */
 1811 static int
 1812 nfs_sndlock(struct nfsmount *nmp, struct nfsreq *rep)
 1813 {
 1814         struct lwp *l;
 1815         int timeo = 0;
 1816         bool catch = false;
 1817         int error = 0;
 1818 
 1819         if (rep) {
 1820                 l = rep->r_lwp;
 1821                 if (rep->r_nmp->nm_flag & NFSMNT_INT)
 1822                         catch = true;
 1823         } else
 1824                 l = NULL;
 1825         mutex_enter(&nmp->nm_lock);
 1826         while ((nmp->nm_iflag & NFSMNT_SNDLOCK) != 0) {
 1827                 if (rep && nfs_sigintr(rep->r_nmp, rep, l)) {
 1828                         error = EINTR;
 1829                         goto quit;
 1830                 }
 1831                 if (catch) {
 1832                         cv_timedwait_sig(&nmp->nm_sndcv, &nmp->nm_lock, timeo);
 1833                 } else {
 1834                         cv_timedwait(&nmp->nm_sndcv, &nmp->nm_lock, timeo);
 1835                 }
 1836                 if (catch) {
 1837                         catch = false;
 1838                         timeo = 2 * hz;
 1839                 }
 1840         }
 1841         nmp->nm_iflag |= NFSMNT_SNDLOCK;
 1842 quit:
 1843         mutex_exit(&nmp->nm_lock);
 1844         return error;
 1845 }
 1846 
 1847 /*
 1848  * Unlock the stream socket for others.
 1849  */
 1850 static void
 1851 nfs_sndunlock(struct nfsmount *nmp)
 1852 {
 1853 
 1854         mutex_enter(&nmp->nm_lock);
 1855         if ((nmp->nm_iflag & NFSMNT_SNDLOCK) == 0)
 1856                 panic("nfs sndunlock");
 1857         nmp->nm_iflag &= ~NFSMNT_SNDLOCK;
 1858         cv_signal(&nmp->nm_sndcv);
 1859         mutex_exit(&nmp->nm_lock);
 1860 }
 1861 #endif /* NFS */
 1862 
 1863 static int
 1864 nfs_rcvlock(struct nfsmount *nmp, struct nfsreq *rep)
 1865 {
 1866         int *flagp = &nmp->nm_iflag;
 1867         int slptimeo = 0;
 1868         bool catch;
 1869         int error = 0;
 1870 
 1871         KASSERT(nmp == rep->r_nmp);
 1872 
 1873         catch = (nmp->nm_flag & NFSMNT_INT) != 0;
 1874         mutex_enter(&nmp->nm_lock);
 1875         while (/* CONSTCOND */ true) {
 1876                 if (*flagp & NFSMNT_DISMNT) {
 1877                         cv_signal(&nmp->nm_disconcv);
 1878                         error = EIO;
 1879                         break;
 1880                 }
 1881                 /* If our reply was received while we were sleeping,
 1882                  * then just return without taking the lock to avoid a
 1883                  * situation where a single iod could 'capture' the
 1884                  * receive lock.
 1885                  */
 1886                 if (rep->r_mrep != NULL) {
 1887                         error = EALREADY;
 1888                         break;
 1889                 }
 1890                 if (nfs_sigintr(rep->r_nmp, rep, rep->r_lwp)) {
 1891                         error = EINTR;
 1892                         break;
 1893                 }
 1894                 if ((*flagp & NFSMNT_RCVLOCK) == 0) {
 1895                         *flagp |= NFSMNT_RCVLOCK;
 1896                         break;
 1897                 }
 1898                 if (catch) {
 1899                         cv_timedwait_sig(&nmp->nm_rcvcv, &nmp->nm_lock,
 1900                             slptimeo);
 1901                 } else {
 1902                         cv_timedwait(&nmp->nm_rcvcv, &nmp->nm_lock,
 1903                             slptimeo);
 1904                 }
 1905                 if (catch) {
 1906                         catch = false;
 1907                         slptimeo = 2 * hz;
 1908                 }
 1909         }
 1910         mutex_exit(&nmp->nm_lock);
 1911         return error;
 1912 }
 1913 
 1914 /*
 1915  * Unlock the stream socket for others.
 1916  */
 1917 static void
 1918 nfs_rcvunlock(struct nfsmount *nmp)
 1919 {
 1920 
 1921         mutex_enter(&nmp->nm_lock);
 1922         if ((nmp->nm_iflag & NFSMNT_RCVLOCK) == 0)
 1923                 panic("nfs rcvunlock");
 1924         nmp->nm_iflag &= ~NFSMNT_RCVLOCK;
 1925         cv_broadcast(&nmp->nm_rcvcv);
 1926         mutex_exit(&nmp->nm_lock);
 1927 }
 1928 
 1929 /*
 1930  * Parse an RPC request
 1931  * - verify it
 1932  * - allocate and fill in the cred.
 1933  */
 1934 int
 1935 nfs_getreq(nd, nfsd, has_header)
 1936         struct nfsrv_descript *nd;
 1937         struct nfsd *nfsd;
 1938         int has_header;
 1939 {
 1940         int len, i;
 1941         u_int32_t *tl;
 1942         int32_t t1;
 1943         struct uio uio;
 1944         struct iovec iov;
 1945         char *dpos, *cp2, *cp;
 1946         u_int32_t nfsvers, auth_type;
 1947         uid_t nickuid;
 1948         int error = 0, ticklen;
 1949         struct mbuf *mrep, *md;
 1950         struct nfsuid *nuidp;
 1951         struct timeval tvin, tvout;
 1952 
 1953         memset(&tvout, 0, sizeof tvout);        /* XXX gcc */
 1954 
 1955         KASSERT(nd->nd_cr == NULL);
 1956         mrep = nd->nd_mrep;
 1957         md = nd->nd_md;
 1958         dpos = nd->nd_dpos;
 1959         if (has_header) {
 1960                 nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED);
 1961                 nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
 1962                 if (*tl++ != rpc_call) {
 1963                         m_freem(mrep);
 1964                         return (EBADRPC);
 1965                 }
 1966         } else
 1967                 nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 1968         nd->nd_repstat = 0;
 1969         nd->nd_flag = 0;
 1970         if (*tl++ != rpc_vers) {
 1971                 nd->nd_repstat = ERPCMISMATCH;
 1972                 nd->nd_procnum = NFSPROC_NOOP;
 1973                 return (0);
 1974         }
 1975         if (*tl != nfs_prog) {
 1976                 nd->nd_repstat = EPROGUNAVAIL;
 1977                 nd->nd_procnum = NFSPROC_NOOP;
 1978                 return (0);
 1979         }
 1980         tl++;
 1981         nfsvers = fxdr_unsigned(u_int32_t, *tl++);
 1982         if (nfsvers < NFS_VER2 || nfsvers > NFS_VER3) {
 1983                 nd->nd_repstat = EPROGMISMATCH;
 1984                 nd->nd_procnum = NFSPROC_NOOP;
 1985                 return (0);
 1986         }
 1987         if (nfsvers == NFS_VER3)
 1988                 nd->nd_flag = ND_NFSV3;
 1989         nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
 1990         if (nd->nd_procnum == NFSPROC_NULL)
 1991                 return (0);
 1992         if (nd->nd_procnum > NFSPROC_COMMIT ||
 1993             (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
 1994                 nd->nd_repstat = EPROCUNAVAIL;
 1995                 nd->nd_procnum = NFSPROC_NOOP;
 1996                 return (0);
 1997         }
 1998         if ((nd->nd_flag & ND_NFSV3) == 0)
 1999                 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
 2000         auth_type = *tl++;
 2001         len = fxdr_unsigned(int, *tl++);
 2002         if (len < 0 || len > RPCAUTH_MAXSIZ) {
 2003                 m_freem(mrep);
 2004                 return (EBADRPC);
 2005         }
 2006 
 2007         nd->nd_flag &= ~ND_KERBAUTH;
 2008         /*
 2009          * Handle auth_unix or auth_kerb.
 2010          */
 2011         if (auth_type == rpc_auth_unix) {
 2012                 uid_t uid;
 2013                 gid_t gid;
 2014 
 2015                 nd->nd_cr = kauth_cred_alloc();
 2016                 len = fxdr_unsigned(int, *++tl);
 2017                 if (len < 0 || len > NFS_MAXNAMLEN) {
 2018                         m_freem(mrep);
 2019                         error = EBADRPC;
 2020                         goto errout;
 2021                 }
 2022                 nfsm_adv(nfsm_rndup(len));
 2023                 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 2024 
 2025                 uid = fxdr_unsigned(uid_t, *tl++);
 2026                 gid = fxdr_unsigned(gid_t, *tl++);
 2027                 kauth_cred_setuid(nd->nd_cr, uid);
 2028                 kauth_cred_seteuid(nd->nd_cr, uid);
 2029                 kauth_cred_setsvuid(nd->nd_cr, uid);
 2030                 kauth_cred_setgid(nd->nd_cr, gid);
 2031                 kauth_cred_setegid(nd->nd_cr, gid);
 2032                 kauth_cred_setsvgid(nd->nd_cr, gid);
 2033 
 2034                 len = fxdr_unsigned(int, *tl);
 2035                 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
 2036                         m_freem(mrep);
 2037                         error = EBADRPC;
 2038                         goto errout;
 2039                 }
 2040                 nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED);
 2041 
 2042                 if (len > 0) {
 2043                         size_t grbuf_size = min(len, NGROUPS) * sizeof(gid_t);
 2044                         gid_t *grbuf = kmem_alloc(grbuf_size, KM_SLEEP);
 2045 
 2046                         for (i = 0; i < len; i++) {
 2047                                 if (i < NGROUPS) /* XXX elad */
 2048                                         grbuf[i] = fxdr_unsigned(gid_t, *tl++);
 2049                                 else
 2050                                         tl++;
 2051                         }
 2052                         kauth_cred_setgroups(nd->nd_cr, grbuf,
 2053                             min(len, NGROUPS), -1, UIO_SYSSPACE);
 2054                         kmem_free(grbuf, grbuf_size);
 2055                 }
 2056 
 2057                 len = fxdr_unsigned(int, *++tl);
 2058                 if (len < 0 || len > RPCAUTH_MAXSIZ) {
 2059                         m_freem(mrep);
 2060                         error = EBADRPC;
 2061                         goto errout;
 2062                 }
 2063                 if (len > 0)
 2064                         nfsm_adv(nfsm_rndup(len));
 2065         } else if (auth_type == rpc_auth_kerb) {
 2066                 switch (fxdr_unsigned(int, *tl++)) {
 2067                 case RPCAKN_FULLNAME:
 2068                         ticklen = fxdr_unsigned(int, *tl);
 2069                         *((u_int32_t *)nfsd->nfsd_authstr) = *tl;
 2070                         uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
 2071                         nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
 2072                         if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
 2073                                 m_freem(mrep);
 2074                                 error = EBADRPC;
 2075                                 goto errout;
 2076                         }
 2077                         uio.uio_offset = 0;
 2078                         uio.uio_iov = &iov;
 2079                         uio.uio_iovcnt = 1;
 2080                         UIO_SETUP_SYSSPACE(&uio);
 2081                         iov.iov_base = (void *)&nfsd->nfsd_authstr[4];
 2082                         iov.iov_len = RPCAUTH_MAXSIZ - 4;
 2083                         nfsm_mtouio(&uio, uio.uio_resid);
 2084                         nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2085                         if (*tl++ != rpc_auth_kerb ||
 2086                                 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
 2087                                 printf("Bad kerb verifier\n");
 2088                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
 2089                                 nd->nd_procnum = NFSPROC_NOOP;
 2090                                 return (0);
 2091                         }
 2092                         nfsm_dissect(cp, void *, 4 * NFSX_UNSIGNED);
 2093                         tl = (u_int32_t *)cp;
 2094                         if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
 2095                                 printf("Not fullname kerb verifier\n");
 2096                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
 2097                                 nd->nd_procnum = NFSPROC_NOOP;
 2098                                 return (0);
 2099                         }
 2100                         cp += NFSX_UNSIGNED;
 2101                         memcpy(nfsd->nfsd_verfstr, cp, 3 * NFSX_UNSIGNED);
 2102                         nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
 2103                         nd->nd_flag |= ND_KERBFULL;
 2104                         nfsd->nfsd_flag |= NFSD_NEEDAUTH;
 2105                         break;
 2106                 case RPCAKN_NICKNAME:
 2107                         if (len != 2 * NFSX_UNSIGNED) {
 2108                                 printf("Kerb nickname short\n");
 2109                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
 2110                                 nd->nd_procnum = NFSPROC_NOOP;
 2111                                 return (0);
 2112                         }
 2113                         nickuid = fxdr_unsigned(uid_t, *tl);
 2114                         nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 2115                         if (*tl++ != rpc_auth_kerb ||
 2116                                 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
 2117                                 printf("Kerb nick verifier bad\n");
 2118                                 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
 2119                                 nd->nd_procnum = NFSPROC_NOOP;
 2120                                 return (0);
 2121                         }
 2122                         nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 2123                         tvin.tv_sec = *tl++;
 2124                         tvin.tv_usec = *tl;
 2125 
 2126                         LIST_FOREACH(nuidp, NUIDHASH(nfsd->nfsd_slp, nickuid),
 2127                             nu_hash) {
 2128                                 if (kauth_cred_geteuid(nuidp->nu_cr) == nickuid &&
 2129                                     (!nd->nd_nam2 ||
 2130                                      netaddr_match(NU_NETFAM(nuidp),
 2131                                       &nuidp->nu_haddr, nd->nd_nam2)))
 2132                                         break;
 2133                         }
 2134                         if (!nuidp) {
 2135                                 nd->nd_repstat =
 2136                                         (NFSERR_AUTHERR|AUTH_REJECTCRED);
 2137                                 nd->nd_procnum = NFSPROC_NOOP;
 2138                                 return (0);
 2139                         }
 2140 
 2141                         /*
 2142                          * Now, decrypt the timestamp using the session key
 2143                          * and validate it.
 2144                          */
 2145 #ifdef NFSKERB
 2146                         XXX
 2147 #endif
 2148 
 2149                         tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
 2150                         tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
 2151                         if (nuidp->nu_expire < time_second ||
 2152                             nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
 2153                             (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
 2154                              nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
 2155                                 nuidp->nu_expire = 0;
 2156                                 nd->nd_repstat =
 2157                                     (NFSERR_AUTHERR|AUTH_REJECTVERF);
 2158                                 nd->nd_procnum = NFSPROC_NOOP;
 2159                                 return (0);
 2160                         }
 2161                         kauth_cred_hold(nuidp->nu_cr);
 2162                         nd->nd_cr = nuidp->nu_cr;
 2163                         nd->nd_flag |= ND_KERBNICK;
 2164                 }
 2165         } else {
 2166                 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
 2167                 nd->nd_procnum = NFSPROC_NOOP;
 2168                 return (0);
 2169         }
 2170 
 2171         nd->nd_md = md;
 2172         nd->nd_dpos = dpos;
 2173         KASSERT((nd->nd_cr == NULL && (nfsd->nfsd_flag & NFSD_NEEDAUTH) != 0)
 2174              || (nd->nd_cr != NULL && (nfsd->nfsd_flag & NFSD_NEEDAUTH) == 0));
 2175         return (0);
 2176 nfsmout:
 2177 errout:
 2178         KASSERT(error != 0);
 2179         if (nd->nd_cr != NULL) {
 2180                 kauth_cred_free(nd->nd_cr);
 2181                 nd->nd_cr = NULL;
 2182         }
 2183         return (error);
 2184 }
 2185 
 2186 int
 2187 nfs_msg(l, server, msg)
 2188         struct lwp *l;
 2189         const char *server, *msg;
 2190 {
 2191         tpr_t tpr;
 2192 
 2193 #if 0 /* XXX nfs_timer can't block on proc_lock */
 2194         if (l)
 2195                 tpr = tprintf_open(l->l_proc);
 2196         else
 2197 #endif
 2198                 tpr = NULL;
 2199         tprintf(tpr, "nfs server %s: %s\n", server, msg);
 2200         tprintf_close(tpr);
 2201         return (0);
 2202 }
 2203 
 2204 #ifdef NFSSERVER
 2205 int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *,
 2206                                     struct nfssvc_sock *, struct lwp *,
 2207                                     struct mbuf **)) = {
 2208         nfsrv_null,
 2209         nfsrv_getattr,
 2210         nfsrv_setattr,
 2211         nfsrv_lookup,
 2212         nfsrv3_access,
 2213         nfsrv_readlink,
 2214         nfsrv_read,
 2215         nfsrv_write,
 2216         nfsrv_create,
 2217         nfsrv_mkdir,
 2218         nfsrv_symlink,
 2219         nfsrv_mknod,
 2220         nfsrv_remove,
 2221         nfsrv_rmdir,
 2222         nfsrv_rename,
 2223         nfsrv_link,
 2224         nfsrv_readdir,
 2225         nfsrv_readdirplus,
 2226         nfsrv_statfs,
 2227         nfsrv_fsinfo,
 2228         nfsrv_pathconf,
 2229         nfsrv_commit,
 2230         nfsrv_noop
 2231 };
 2232 
 2233 /*
 2234  * Socket upcall routine for the nfsd sockets.
 2235  * The void *arg is a pointer to the "struct nfssvc_sock".
 2236  */
 2237 void
 2238 nfsrv_soupcall(struct socket *so, void *arg, int waitflag)
 2239 {
 2240         struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
 2241 
 2242         nfsdsock_setbits(slp, SLP_A_NEEDQ);
 2243         nfsrv_wakenfsd(slp);
 2244 }
 2245 
 2246 void
 2247 nfsrv_rcv(struct nfssvc_sock *slp)
 2248 {
 2249         struct socket *so;
 2250         struct mbuf *m;
 2251         struct mbuf *mp, *nam;
 2252         struct uio auio;
 2253         int flags;
 2254         int error;
 2255         int setflags = 0;
 2256 
 2257         error = nfsdsock_lock(slp, true);
 2258         if (error) {
 2259                 setflags |= SLP_A_NEEDQ;
 2260                 goto dorecs_unlocked;
 2261         }
 2262 
 2263         nfsdsock_clearbits(slp, SLP_A_NEEDQ);
 2264 
 2265         so = slp->ns_so;
 2266         if (so->so_type == SOCK_STREAM) {
 2267                 /*
 2268                  * Do soreceive().
 2269                  */
 2270                 auio.uio_resid = 1000000000;
 2271                 /* not need to setup uio_vmspace */
 2272                 flags = MSG_DONTWAIT;
 2273                 error = (*so->so_receive)(so, &nam, &auio, &mp, NULL, &flags);
 2274                 if (error || mp == NULL) {
 2275                         if (error == EWOULDBLOCK)
 2276                                 setflags |= SLP_A_NEEDQ;
 2277                         else
 2278                                 setflags |= SLP_A_DISCONN;
 2279                         goto dorecs;
 2280                 }
 2281                 m = mp;
 2282                 m_claimm(m, &nfs_mowner);
 2283                 if (slp->ns_rawend) {
 2284                         slp->ns_rawend->m_next = m;
 2285                         slp->ns_cc += 1000000000 - auio.uio_resid;
 2286                 } else {
 2287                         slp->ns_raw = m;
 2288                         slp->ns_cc = 1000000000 - auio.uio_resid;
 2289                 }
 2290                 while (m->m_next)
 2291                         m = m->m_next;
 2292                 slp->ns_rawend = m;
 2293 
 2294                 /*
 2295                  * Now try and parse record(s) out of the raw stream data.
 2296                  */
 2297                 error = nfsrv_getstream(slp, M_WAIT);
 2298                 if (error) {
 2299                         if (error == EPERM)
 2300                                 setflags |= SLP_A_DISCONN;
 2301                         else
 2302                                 setflags |= SLP_A_NEEDQ;
 2303                 }
 2304         } else {
 2305                 do {
 2306                         auio.uio_resid = 1000000000;
 2307                         /* not need to setup uio_vmspace */
 2308                         flags = MSG_DONTWAIT;
 2309                         error = (*so->so_receive)(so, &nam, &auio, &mp, NULL,
 2310                             &flags);
 2311                         if (mp) {
 2312                                 if (nam) {
 2313                                         m = nam;
 2314                                         m->m_next = mp;
 2315                                 } else
 2316                                         m = mp;
 2317                                 m_claimm(m, &nfs_mowner);
 2318                                 if (slp->ns_recend)
 2319                                         slp->ns_recend->m_nextpkt = m;
 2320                                 else
 2321                                         slp->ns_rec = m;
 2322                                 slp->ns_recend = m;
 2323                                 m->m_nextpkt = (struct mbuf *)0;
 2324                         }
 2325                         if (error) {
 2326                                 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
 2327                                     && error != EWOULDBLOCK) {
 2328                                         setflags |= SLP_A_DISCONN;
 2329                                         goto dorecs;
 2330                                 }
 2331                         }
 2332                 } while (mp);
 2333         }
 2334 dorecs:
 2335         nfsdsock_unlock(slp);
 2336 
 2337 dorecs_unlocked:
 2338         if (setflags) {
 2339                 nfsdsock_setbits(slp, setflags);
 2340         }
 2341 }
 2342 
 2343 int
 2344 nfsdsock_lock(struct nfssvc_sock *slp, bool waitok)
 2345 {
 2346 
 2347         mutex_enter(&slp->ns_lock);
 2348         while ((~slp->ns_flags & (SLP_BUSY|SLP_VALID)) == 0) {
 2349                 if (!waitok) {
 2350                         mutex_exit(&slp->ns_lock);
 2351                         return EWOULDBLOCK;
 2352                 }
 2353                 cv_wait(&slp->ns_cv, &slp->ns_lock);
 2354         }
 2355         if ((slp->ns_flags & SLP_VALID) == 0) {
 2356                 mutex_exit(&slp->ns_lock);
 2357                 return EINVAL;
 2358         }
 2359         KASSERT((slp->ns_flags & SLP_BUSY) == 0);
 2360         slp->ns_flags |= SLP_BUSY;
 2361         mutex_exit(&slp->ns_lock);
 2362 
 2363         return 0;
 2364 }
 2365 
 2366 void
 2367 nfsdsock_unlock(struct nfssvc_sock *slp)
 2368 {
 2369 
 2370         mutex_enter(&slp->ns_lock);
 2371         KASSERT((slp->ns_flags & SLP_BUSY) != 0);
 2372         cv_broadcast(&slp->ns_cv);
 2373         slp->ns_flags &= ~SLP_BUSY;
 2374         mutex_exit(&slp->ns_lock);
 2375 }
 2376 
 2377 int
 2378 nfsdsock_drain(struct nfssvc_sock *slp)
 2379 {
 2380         int error = 0;
 2381 
 2382         mutex_enter(&slp->ns_lock);
 2383         if ((slp->ns_flags & SLP_VALID) == 0) {
 2384                 error = EINVAL;
 2385                 goto done;
 2386         }
 2387         slp->ns_flags &= ~SLP_VALID;
 2388         while ((slp->ns_flags & SLP_BUSY) != 0) {
 2389                 cv_wait(&slp->ns_cv, &slp->ns_lock);
 2390         }
 2391 done:
 2392         mutex_exit(&slp->ns_lock);
 2393 
 2394         return error;
 2395 }
 2396 
 2397 /*
 2398  * Try and extract an RPC request from the mbuf data list received on a
 2399  * stream socket. The "waitflag" argument indicates whether or not it
 2400  * can sleep.
 2401  */
 2402 int
 2403 nfsrv_getstream(slp, waitflag)
 2404         struct nfssvc_sock *slp;
 2405         int waitflag;
 2406 {
 2407         struct mbuf *m, **mpp;
 2408         struct mbuf *recm;
 2409         u_int32_t recmark;
 2410         int error = 0;
 2411 
 2412         KASSERT((slp->ns_flags & SLP_BUSY) != 0);
 2413         for (;;) {
 2414                 if (slp->ns_reclen == 0) {
 2415                         if (slp->ns_cc < NFSX_UNSIGNED) {
 2416                                 break;
 2417                         }
 2418                         m = slp->ns_raw;
 2419                         m_copydata(m, 0, NFSX_UNSIGNED, (void *)&recmark);
 2420                         m_adj(m, NFSX_UNSIGNED);
 2421                         slp->ns_cc -= NFSX_UNSIGNED;
 2422                         recmark = ntohl(recmark);
 2423                         slp->ns_reclen = recmark & ~0x80000000;
 2424                         if (recmark & 0x80000000)
 2425                                 slp->ns_sflags |= SLP_S_LASTFRAG;
 2426                         else
 2427                                 slp->ns_sflags &= ~SLP_S_LASTFRAG;
 2428                         if (slp->ns_reclen > NFS_MAXPACKET) {
 2429                                 error = EPERM;
 2430                                 break;
 2431                         }
 2432                 }
 2433 
 2434                 /*
 2435                  * Now get the record part.
 2436                  *
 2437                  * Note that slp->ns_reclen may be 0.  Linux sometimes
 2438                  * generates 0-length records.
 2439                  */
 2440                 if (slp->ns_cc == slp->ns_reclen) {
 2441                         recm = slp->ns_raw;
 2442                         slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
 2443                         slp->ns_cc = slp->ns_reclen = 0;
 2444                 } else if (slp->ns_cc > slp->ns_reclen) {
 2445                         recm = slp->ns_raw;
 2446                         m = m_split(recm, slp->ns_reclen, waitflag);
 2447                         if (m == NULL) {
 2448                                 error = EWOULDBLOCK;
 2449                                 break;
 2450                         }
 2451                         m_claimm(recm, &nfs_mowner);
 2452                         slp->ns_raw = m;
 2453                         if (m->m_next == NULL)
 2454                                 slp->ns_rawend = m;
 2455                         slp->ns_cc -= slp->ns_reclen;
 2456                         slp->ns_reclen = 0;
 2457                 } else {
 2458                         break;
 2459                 }
 2460 
 2461                 /*
 2462                  * Accumulate the fragments into a record.
 2463                  */
 2464                 mpp = &slp->ns_frag;
 2465                 while (*mpp)
 2466                         mpp = &((*mpp)->m_next);
 2467                 *mpp = recm;
 2468                 if (slp->ns_sflags & SLP_S_LASTFRAG) {
 2469                         if (slp->ns_recend)
 2470                                 slp->ns_recend->m_nextpkt = slp->ns_frag;
 2471                         else
 2472                                 slp->ns_rec = slp->ns_frag;
 2473                         slp->ns_recend = slp->ns_frag;
 2474                         slp->ns_frag = NULL;
 2475                 }
 2476         }
 2477 
 2478         return error;
 2479 }
 2480 
 2481 /*
 2482  * Parse an RPC header.
 2483  */
 2484 int
 2485 nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd,
 2486     struct nfsrv_descript **ndp, bool *more)
 2487 {
 2488         struct mbuf *m, *nam;
 2489         struct nfsrv_descript *nd;
 2490         int error;
 2491 
 2492         *ndp = NULL;
 2493         *more = false;
 2494 
 2495         if (nfsdsock_lock(slp, true)) {
 2496                 return ENOBUFS;
 2497         }
 2498         m = slp->ns_rec;
 2499         if (m == NULL) {
 2500                 nfsdsock_unlock(slp);
 2501                 return ENOBUFS;
 2502         }
 2503         slp->ns_rec = m->m_nextpkt;
 2504         if (slp->ns_rec) {
 2505                 m->m_nextpkt = NULL;
 2506                 *more = true;
 2507         } else {
 2508                 slp->ns_recend = NULL;
 2509         }
 2510         nfsdsock_unlock(slp);
 2511 
 2512         if (m->m_type == MT_SONAME) {
 2513                 nam = m;
 2514                 m = m->m_next;
 2515                 nam->m_next = NULL;
 2516         } else
 2517                 nam = NULL;
 2518         nd = nfsdreq_alloc();
 2519         nd->nd_md = nd->nd_mrep = m;
 2520         nd->nd_nam2 = nam;
 2521         nd->nd_dpos = mtod(m, void *);
 2522         error = nfs_getreq(nd, nfsd, true);
 2523         if (error) {
 2524                 m_freem(nam);
 2525                 nfsdreq_free(nd);
 2526                 return (error);
 2527         }
 2528         *ndp = nd;
 2529         nfsd->nfsd_nd = nd;
 2530         return (0);
 2531 }
 2532 
 2533 /*
 2534  * Search for a sleeping nfsd and wake it up.
 2535  * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
 2536  * running nfsds will go look for the work in the nfssvc_sock list.
 2537  */
 2538 static void
 2539 nfsrv_wakenfsd_locked(struct nfssvc_sock *slp)
 2540 {
 2541         struct nfsd *nd;
 2542 
 2543         KASSERT(mutex_owned(&nfsd_lock));
 2544 
 2545         if ((slp->ns_flags & SLP_VALID) == 0)
 2546                 return;
 2547         if (slp->ns_gflags & SLP_G_DOREC)
 2548                 return;
 2549         nd = SLIST_FIRST(&nfsd_idle_head);
 2550         if (nd) {
 2551                 SLIST_REMOVE_HEAD(&nfsd_idle_head, nfsd_idle);
 2552                 if (nd->nfsd_slp)
 2553                         panic("nfsd wakeup");
 2554                 slp->ns_sref++;
 2555                 KASSERT(slp->ns_sref > 0);
 2556                 nd->nfsd_slp = slp;
 2557                 cv_signal(&nd->nfsd_cv);
 2558         } else {
 2559                 slp->ns_gflags |= SLP_G_DOREC;
 2560                 nfsd_head_flag |= NFSD_CHECKSLP;
 2561                 TAILQ_INSERT_TAIL(&nfssvc_sockpending, slp, ns_pending);
 2562         }
 2563 }
 2564 
 2565 void
 2566 nfsrv_wakenfsd(struct nfssvc_sock *slp)
 2567 {
 2568 
 2569         mutex_enter(&nfsd_lock);
 2570         nfsrv_wakenfsd_locked(slp);
 2571         mutex_exit(&nfsd_lock);
 2572 }
 2573 
 2574 int
 2575 nfsdsock_sendreply(struct nfssvc_sock *slp, struct nfsrv_descript *nd)
 2576 {
 2577         int error;
 2578 
 2579         if (nd->nd_mrep != NULL) {
 2580                 m_freem(nd->nd_mrep);
 2581                 nd->nd_mrep = NULL;
 2582         }
 2583 
 2584         mutex_enter(&slp->ns_lock);
 2585         if ((slp->ns_flags & SLP_SENDING) != 0) {
 2586                 SIMPLEQ_INSERT_TAIL(&slp->ns_sendq, nd, nd_sendq);
 2587                 mutex_exit(&slp->ns_lock);
 2588                 return 0;
 2589         }
 2590         KASSERT(SIMPLEQ_EMPTY(&slp->ns_sendq));
 2591         slp->ns_flags |= SLP_SENDING;
 2592         mutex_exit(&slp->ns_lock);
 2593 
 2594 again:
 2595         error = nfs_send(slp->ns_so, nd->nd_nam2, nd->nd_mreq, NULL, curlwp);
 2596         if (nd->nd_nam2) {
 2597                 m_free(nd->nd_nam2);
 2598         }
 2599         nfsdreq_free(nd);
 2600 
 2601         mutex_enter(&slp->ns_lock);
 2602         KASSERT((slp->ns_flags & SLP_SENDING) != 0);
 2603         nd = SIMPLEQ_FIRST(&slp->ns_sendq);
 2604         if (nd != NULL) {
 2605                 SIMPLEQ_REMOVE_HEAD(&slp->ns_sendq, nd_sendq);
 2606                 mutex_exit(&slp->ns_lock);
 2607                 goto again;
 2608         }
 2609         slp->ns_flags &= ~SLP_SENDING;
 2610         mutex_exit(&slp->ns_lock);
 2611 
 2612         return error;
 2613 }
 2614 
 2615 void
 2616 nfsdsock_setbits(struct nfssvc_sock *slp, int bits)
 2617 {
 2618 
 2619         mutex_enter(&slp->ns_alock);
 2620         slp->ns_aflags |= bits;
 2621         mutex_exit(&slp->ns_alock);
 2622 }
 2623 
 2624 void
 2625 nfsdsock_clearbits(struct nfssvc_sock *slp, int bits)
 2626 {
 2627 
 2628         mutex_enter(&slp->ns_alock);
 2629         slp->ns_aflags &= ~bits;
 2630         mutex_exit(&slp->ns_alock);
 2631 }
 2632 
 2633 bool
 2634 nfsdsock_testbits(struct nfssvc_sock *slp, int bits)
 2635 {
 2636 
 2637         return (slp->ns_aflags & bits);
 2638 }
 2639 #endif /* NFSSERVER */
 2640 
 2641 #if defined(NFSSERVER) || (defined(NFS) && !defined(NFS_V2_ONLY))
 2642 static struct pool nfs_srvdesc_pool;
 2643 
 2644 void
 2645 nfsdreq_init(void)
 2646 {
 2647 
 2648         pool_init(&nfs_srvdesc_pool, sizeof(struct nfsrv_descript),
 2649             0, 0, 0, "nfsrvdescpl", &pool_allocator_nointr, IPL_NONE);
 2650 }
 2651 
 2652 struct nfsrv_descript *
 2653 nfsdreq_alloc(void)
 2654 {
 2655         struct nfsrv_descript *nd;
 2656 
 2657         nd = pool_get(&nfs_srvdesc_pool, PR_WAITOK);
 2658         nd->nd_cr = NULL;
 2659         return nd;
 2660 }
 2661 
 2662 void
 2663 nfsdreq_free(struct nfsrv_descript *nd)
 2664 {
 2665         kauth_cred_t cr;
 2666 
 2667         cr = nd->nd_cr;
 2668         if (cr != NULL) {
 2669                 kauth_cred_free(cr);
 2670         }
 2671         pool_put(&nfs_srvdesc_pool, nd);
 2672 }
 2673 #endif /* defined(NFSSERVER) || (defined(NFS) && !defined(NFS_V2_ONLY)) */

Cache object: 2122fa3207278351f67ec5d1c746b58f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.