FreeBSD/Linux Kernel Cross Reference
sys/nfs/nfs_socket.c
1 /* $NetBSD: nfs_socket.c,v 1.102.2.4 2005/01/11 06:39:49 jmc Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1991, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Rick Macklem at The University of Guelph.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
35 */
36
37 /*
38 * Socket operations for use by nfs
39 */
40
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.102.2.4 2005/01/11 06:39:49 jmc Exp $");
43
44 #include "fs_nfs.h"
45 #include "opt_nfs.h"
46 #include "opt_nfsserver.h"
47 #include "opt_mbuftrace.h"
48 #include "opt_inet.h"
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/callout.h>
53 #include <sys/proc.h>
54 #include <sys/mount.h>
55 #include <sys/kernel.h>
56 #include <sys/mbuf.h>
57 #include <sys/vnode.h>
58 #include <sys/domain.h>
59 #include <sys/protosw.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/syslog.h>
63 #include <sys/tprintf.h>
64 #include <sys/namei.h>
65 #include <sys/signal.h>
66 #include <sys/signalvar.h>
67
68 #include <netinet/in.h>
69 #include <netinet/tcp.h>
70
71 #include <nfs/rpcv2.h>
72 #include <nfs/nfsproto.h>
73 #include <nfs/nfs.h>
74 #include <nfs/xdr_subs.h>
75 #include <nfs/nfsm_subs.h>
76 #include <nfs/nfsmount.h>
77 #include <nfs/nfsnode.h>
78 #include <nfs/nfsrtt.h>
79 #include <nfs/nqnfs.h>
80 #include <nfs/nfs_var.h>
81
82 MALLOC_DEFINE(M_NFSREQ, "NFS req", "NFS request header");
83 #ifdef MBUFTRACE
84 struct mowner nfs_mowner = { "nfs" };
85 #endif
86
87 /*
88 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
89 * Use the mean and mean deviation of rtt for the appropriate type of rpc
90 * for the frequent rpcs and a default for the others.
91 * The justification for doing "other" this way is that these rpcs
92 * happen so infrequently that timer est. would probably be stale.
93 * Also, since many of these rpcs are
94 * non-idempotent, a conservative timeout is desired.
95 * getattr, lookup - A+2D
96 * read, write - A+4D
97 * other - nm_timeo
98 */
99 #define NFS_RTO(n, t) \
100 ((t) == 0 ? (n)->nm_timeo : \
101 ((t) < 3 ? \
102 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
103 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
104 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
105 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
106 /*
107 * External data, mostly RPC constants in XDR form
108 */
109 extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers,
110 rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr,
111 rpc_auth_kerb;
112 extern u_int32_t nfs_prog, nqnfs_prog;
113 extern time_t nqnfsstarttime;
114 extern const int nfsv3_procid[NFS_NPROCS];
115 extern int nfs_ticks;
116
117 /*
118 * Defines which timer to use for the procnum.
119 * 0 - default
120 * 1 - getattr
121 * 2 - lookup
122 * 3 - read
123 * 4 - write
124 */
125 static const int proct[NFS_NPROCS] = {
126 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
127 0, 0, 0,
128 };
129
130 /*
131 * There is a congestion window for outstanding rpcs maintained per mount
132 * point. The cwnd size is adjusted in roughly the way that:
133 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
134 * SIGCOMM '88". ACM, August 1988.
135 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
136 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
137 * of rpcs is in progress.
138 * (The sent count and cwnd are scaled for integer arith.)
139 * Variants of "slow start" were tried and were found to be too much of a
140 * performance hit (ave. rtt 3 times larger),
141 * I suspect due to the large rtt that nfs rpcs have.
142 */
143 #define NFS_CWNDSCALE 256
144 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
145 static const int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
146 int nfsrtton = 0;
147 struct nfsrtt nfsrtt;
148 struct nfsreqhead nfs_reqq;
149
150 struct callout nfs_timer_ch = CALLOUT_INITIALIZER_SETFUNC(nfs_timer, NULL);
151
152 /*
153 * Initialize sockets and congestion for a new NFS connection.
154 * We do not free the sockaddr if error.
155 */
156 int
157 nfs_connect(nmp, rep)
158 struct nfsmount *nmp;
159 struct nfsreq *rep;
160 {
161 struct socket *so;
162 int s, error, rcvreserve, sndreserve;
163 struct sockaddr *saddr;
164 struct sockaddr_in *sin;
165 #ifdef INET6
166 struct sockaddr_in6 *sin6;
167 #endif
168 struct mbuf *m;
169
170 nmp->nm_so = (struct socket *)0;
171 saddr = mtod(nmp->nm_nam, struct sockaddr *);
172 error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
173 nmp->nm_soproto);
174 if (error)
175 goto bad;
176 so = nmp->nm_so;
177 #ifdef MBUFTRACE
178 so->so_mowner = &nfs_mowner;
179 so->so_rcv.sb_mowner = &nfs_mowner;
180 so->so_snd.sb_mowner = &nfs_mowner;
181 #endif
182 nmp->nm_soflags = so->so_proto->pr_flags;
183
184 /*
185 * Some servers require that the client port be a reserved port number.
186 */
187 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
188 m = m_get(M_WAIT, MT_SOOPTS);
189 MCLAIM(m, so->so_mowner);
190 *mtod(m, int32_t *) = IP_PORTRANGE_LOW;
191 m->m_len = sizeof(int32_t);
192 if ((error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, m)))
193 goto bad;
194 m = m_get(M_WAIT, MT_SONAME);
195 MCLAIM(m, so->so_mowner);
196 sin = mtod(m, struct sockaddr_in *);
197 sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
198 sin->sin_family = AF_INET;
199 sin->sin_addr.s_addr = INADDR_ANY;
200 sin->sin_port = 0;
201 error = sobind(so, m, &proc0);
202 m_freem(m);
203 if (error)
204 goto bad;
205 }
206 #ifdef INET6
207 if (saddr->sa_family == AF_INET6 && (nmp->nm_flag & NFSMNT_RESVPORT)) {
208 m = m_get(M_WAIT, MT_SOOPTS);
209 MCLAIM(m, so->so_mowner);
210 *mtod(m, int32_t *) = IPV6_PORTRANGE_LOW;
211 m->m_len = sizeof(int32_t);
212 if ((error = sosetopt(so, IPPROTO_IPV6, IPV6_PORTRANGE, m)))
213 goto bad;
214 m = m_get(M_WAIT, MT_SONAME);
215 MCLAIM(m, so->so_mowner);
216 sin6 = mtod(m, struct sockaddr_in6 *);
217 sin6->sin6_len = m->m_len = sizeof (struct sockaddr_in6);
218 sin6->sin6_family = AF_INET6;
219 sin6->sin6_addr = in6addr_any;
220 sin6->sin6_port = 0;
221 error = sobind(so, m, &proc0);
222 m_freem(m);
223 if (error)
224 goto bad;
225 }
226 #endif
227
228 /*
229 * Protocols that do not require connections may be optionally left
230 * unconnected for servers that reply from a port other than NFS_PORT.
231 */
232 if (nmp->nm_flag & NFSMNT_NOCONN) {
233 if (nmp->nm_soflags & PR_CONNREQUIRED) {
234 error = ENOTCONN;
235 goto bad;
236 }
237 } else {
238 error = soconnect(so, nmp->nm_nam);
239 if (error)
240 goto bad;
241
242 /*
243 * Wait for the connection to complete. Cribbed from the
244 * connect system call but with the wait timing out so
245 * that interruptible mounts don't hang here for a long time.
246 */
247 s = splsoftnet();
248 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
249 (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
250 "nfscn1", 2 * hz);
251 if ((so->so_state & SS_ISCONNECTING) &&
252 so->so_error == 0 && rep &&
253 (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){
254 so->so_state &= ~SS_ISCONNECTING;
255 splx(s);
256 goto bad;
257 }
258 }
259 if (so->so_error) {
260 error = so->so_error;
261 so->so_error = 0;
262 splx(s);
263 goto bad;
264 }
265 splx(s);
266 }
267 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
268 so->so_rcv.sb_timeo = (5 * hz);
269 so->so_snd.sb_timeo = (5 * hz);
270 } else {
271 /*
272 * enable receive timeout to detect server crash and reconnect.
273 * otherwise, we can be stuck in soreceive forever.
274 */
275 so->so_rcv.sb_timeo = (5 * hz);
276 so->so_snd.sb_timeo = 0;
277 }
278 if (nmp->nm_sotype == SOCK_DGRAM) {
279 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
280 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
281 NFS_MAXPKTHDR) * 2;
282 } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
283 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
284 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
285 NFS_MAXPKTHDR) * 2;
286 } else {
287 if (nmp->nm_sotype != SOCK_STREAM)
288 panic("nfscon sotype");
289 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
290 m = m_get(M_WAIT, MT_SOOPTS);
291 MCLAIM(m, so->so_mowner);
292 *mtod(m, int32_t *) = 1;
293 m->m_len = sizeof(int32_t);
294 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
295 }
296 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
297 m = m_get(M_WAIT, MT_SOOPTS);
298 MCLAIM(m, so->so_mowner);
299 *mtod(m, int32_t *) = 1;
300 m->m_len = sizeof(int32_t);
301 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
302 }
303 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
304 sizeof (u_int32_t)) * 2;
305 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
306 sizeof (u_int32_t)) * 2;
307 }
308 error = soreserve(so, sndreserve, rcvreserve);
309 if (error)
310 goto bad;
311 so->so_rcv.sb_flags |= SB_NOINTR;
312 so->so_snd.sb_flags |= SB_NOINTR;
313
314 /* Initialize other non-zero congestion variables */
315 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
316 NFS_TIMEO << 3;
317 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
318 nmp->nm_sdrtt[3] = 0;
319 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
320 nmp->nm_sent = 0;
321 nmp->nm_timeouts = 0;
322 return (0);
323
324 bad:
325 nfs_disconnect(nmp);
326 return (error);
327 }
328
329 /*
330 * Reconnect routine:
331 * Called when a connection is broken on a reliable protocol.
332 * - clean up the old socket
333 * - nfs_connect() again
334 * - set R_MUSTRESEND for all outstanding requests on mount point
335 * If this fails the mount point is DEAD!
336 * nb: Must be called with the nfs_sndlock() set on the mount point.
337 */
338 int
339 nfs_reconnect(rep)
340 struct nfsreq *rep;
341 {
342 struct nfsreq *rp;
343 struct nfsmount *nmp = rep->r_nmp;
344 int error;
345
346 nfs_disconnect(nmp);
347 while ((error = nfs_connect(nmp, rep)) != 0) {
348 if (error == EINTR || error == ERESTART)
349 return (EINTR);
350 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscn2", 0);
351 }
352
353 /*
354 * Loop through outstanding request list and fix up all requests
355 * on old socket.
356 */
357 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
358 if (rp->r_nmp == nmp) {
359 if ((rp->r_flags & R_MUSTRESEND) == 0)
360 rp->r_flags |= R_MUSTRESEND | R_REXMITTED;
361 rp->r_rexmit = 0;
362 }
363 }
364 return (0);
365 }
366
367 /*
368 * NFS disconnect. Clean up and unlink.
369 */
370 void
371 nfs_disconnect(nmp)
372 struct nfsmount *nmp;
373 {
374 struct socket *so;
375 int drain = 0;
376
377 if (nmp->nm_so) {
378 so = nmp->nm_so;
379 nmp->nm_so = (struct socket *)0;
380 soshutdown(so, 2);
381 drain = (nmp->nm_iflag & NFSMNT_DISMNT) != 0;
382 if (drain) {
383 /*
384 * soshutdown() above should wake up the current
385 * listener.
386 * Now wake up those waiting for the receive lock, and
387 * wait for them to go away unhappy, to prevent *nmp
388 * from evaporating while they're sleeping.
389 */
390 while (nmp->nm_waiters > 0) {
391 wakeup (&nmp->nm_iflag);
392 (void) tsleep(&nmp->nm_waiters, PVFS,
393 "nfsdis", 0);
394 }
395 }
396 soclose(so);
397 }
398 #ifdef DIAGNOSTIC
399 if (drain && (nmp->nm_waiters > 0))
400 panic("nfs_disconnect: waiters left after drain?");
401 #endif
402 }
403
404 void
405 nfs_safedisconnect(nmp)
406 struct nfsmount *nmp;
407 {
408 struct nfsreq dummyreq;
409
410 memset(&dummyreq, 0, sizeof(dummyreq));
411 dummyreq.r_nmp = nmp;
412 nfs_rcvlock(&dummyreq); /* XXX ignored error return */
413 nfs_disconnect(nmp);
414 nfs_rcvunlock(nmp);
415 }
416
417 /*
418 * This is the nfs send routine. For connection based socket types, it
419 * must be called with an nfs_sndlock() on the socket.
420 * "rep == NULL" indicates that it has been called from a server.
421 * For the client side:
422 * - return EINTR if the RPC is terminated, 0 otherwise
423 * - set R_MUSTRESEND if the send fails for any reason
424 * - do any cleanup required by recoverable socket errors (? ? ?)
425 * For the server side:
426 * - return EINTR or ERESTART if interrupted by a signal
427 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
428 * - do any cleanup required by recoverable socket errors (? ? ?)
429 */
430 int
431 nfs_send(so, nam, top, rep)
432 struct socket *so;
433 struct mbuf *nam;
434 struct mbuf *top;
435 struct nfsreq *rep;
436 {
437 struct mbuf *sendnam;
438 int error, soflags, flags;
439
440 if (rep) {
441 if (rep->r_flags & R_SOFTTERM) {
442 m_freem(top);
443 return (EINTR);
444 }
445 if ((so = rep->r_nmp->nm_so) == NULL) {
446 rep->r_flags |= R_MUSTRESEND;
447 m_freem(top);
448 return (0);
449 }
450 rep->r_flags &= ~R_MUSTRESEND;
451 soflags = rep->r_nmp->nm_soflags;
452 } else
453 soflags = so->so_proto->pr_flags;
454 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
455 sendnam = (struct mbuf *)0;
456 else
457 sendnam = nam;
458 if (so->so_type == SOCK_SEQPACKET)
459 flags = MSG_EOR;
460 else
461 flags = 0;
462
463 error = (*so->so_send)(so, sendnam, (struct uio *)0, top,
464 (struct mbuf *)0, flags);
465 if (error) {
466 if (rep) {
467 if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
468 /*
469 * We're too fast for the network/driver,
470 * and UDP isn't flowcontrolled.
471 * We need to resend. This is not fatal,
472 * just try again.
473 *
474 * Could be smarter here by doing some sort
475 * of a backoff, but this is rare.
476 */
477 rep->r_flags |= R_MUSTRESEND;
478 } else {
479 if (error != EPIPE)
480 log(LOG_INFO,
481 "nfs send error %d for %s\n",
482 error,
483 rep->r_nmp->nm_mountp->
484 mnt_stat.f_mntfromname);
485 /*
486 * Deal with errors for the client side.
487 */
488 if (rep->r_flags & R_SOFTTERM)
489 error = EINTR;
490 else
491 rep->r_flags |= R_MUSTRESEND;
492 }
493 } else {
494 /*
495 * See above. This error can happen under normal
496 * circumstances and the log is too noisy.
497 * The error will still show up in nfsstat.
498 */
499 if (error != ENOBUFS || so->so_type != SOCK_DGRAM)
500 log(LOG_INFO, "nfsd send error %d\n", error);
501 }
502
503 /*
504 * Handle any recoverable (soft) socket errors here. (? ? ?)
505 */
506 if (error != EINTR && error != ERESTART &&
507 error != EWOULDBLOCK && error != EPIPE)
508 error = 0;
509 }
510 return (error);
511 }
512
513 #ifdef NFS
514 /*
515 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
516 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
517 * Mark and consolidate the data into a new mbuf list.
518 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
519 * small mbufs.
520 * For SOCK_STREAM we must be very careful to read an entire record once
521 * we have read any of it, even if the system call has been interrupted.
522 */
523 int
524 nfs_receive(rep, aname, mp)
525 struct nfsreq *rep;
526 struct mbuf **aname;
527 struct mbuf **mp;
528 {
529 struct socket *so;
530 struct uio auio;
531 struct iovec aio;
532 struct mbuf *m;
533 struct mbuf *control;
534 u_int32_t len;
535 struct mbuf **getnam;
536 int error, sotype, rcvflg;
537 struct proc *p = curproc; /* XXX */
538
539 /*
540 * Set up arguments for soreceive()
541 */
542 *mp = (struct mbuf *)0;
543 *aname = (struct mbuf *)0;
544 sotype = rep->r_nmp->nm_sotype;
545
546 /*
547 * For reliable protocols, lock against other senders/receivers
548 * in case a reconnect is necessary.
549 * For SOCK_STREAM, first get the Record Mark to find out how much
550 * more there is to get.
551 * We must lock the socket against other receivers
552 * until we have an entire rpc request/reply.
553 */
554 if (sotype != SOCK_DGRAM) {
555 error = nfs_sndlock(&rep->r_nmp->nm_iflag, rep);
556 if (error)
557 return (error);
558 tryagain:
559 /*
560 * Check for fatal errors and resending request.
561 */
562 /*
563 * Ugh: If a reconnect attempt just happened, nm_so
564 * would have changed. NULL indicates a failed
565 * attempt that has essentially shut down this
566 * mount point.
567 */
568 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
569 nfs_sndunlock(&rep->r_nmp->nm_iflag);
570 return (EINTR);
571 }
572 so = rep->r_nmp->nm_so;
573 if (!so) {
574 error = nfs_reconnect(rep);
575 if (error) {
576 nfs_sndunlock(&rep->r_nmp->nm_iflag);
577 return (error);
578 }
579 goto tryagain;
580 }
581 while (rep->r_flags & R_MUSTRESEND) {
582 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
583 nfsstats.rpcretries++;
584 rep->r_rtt = 0;
585 rep->r_flags &= ~R_TIMING;
586 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
587 if (error) {
588 if (error == EINTR || error == ERESTART ||
589 (error = nfs_reconnect(rep)) != 0) {
590 nfs_sndunlock(&rep->r_nmp->nm_iflag);
591 return (error);
592 }
593 goto tryagain;
594 }
595 }
596 nfs_sndunlock(&rep->r_nmp->nm_iflag);
597 if (sotype == SOCK_STREAM) {
598 aio.iov_base = (caddr_t) &len;
599 aio.iov_len = sizeof(u_int32_t);
600 auio.uio_iov = &aio;
601 auio.uio_iovcnt = 1;
602 auio.uio_segflg = UIO_SYSSPACE;
603 auio.uio_rw = UIO_READ;
604 auio.uio_offset = 0;
605 auio.uio_resid = sizeof(u_int32_t);
606 auio.uio_procp = p;
607 do {
608 rcvflg = MSG_WAITALL;
609 error = (*so->so_receive)(so, (struct mbuf **)0, &auio,
610 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
611 if (error == EWOULDBLOCK && rep) {
612 if (rep->r_flags & R_SOFTTERM)
613 return (EINTR);
614 /*
615 * if it seems that the server died after it
616 * received our request, set EPIPE so that
617 * we'll reconnect and retransmit requests.
618 */
619 if (rep->r_rexmit >= rep->r_nmp->nm_retry) {
620 nfsstats.rpctimeouts++;
621 error = EPIPE;
622 }
623 }
624 } while (error == EWOULDBLOCK);
625 if (!error && auio.uio_resid > 0) {
626 /*
627 * Don't log a 0 byte receive; it means
628 * that the socket has been closed, and
629 * can happen during normal operation
630 * (forcible unmount or Solaris server).
631 */
632 if (auio.uio_resid != sizeof (u_int32_t))
633 log(LOG_INFO,
634 "short receive (%lu/%lu) from nfs server %s\n",
635 (u_long)sizeof(u_int32_t) - auio.uio_resid,
636 (u_long)sizeof(u_int32_t),
637 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
638 error = EPIPE;
639 }
640 if (error)
641 goto errout;
642 len = ntohl(len) & ~0x80000000;
643 /*
644 * This is SERIOUS! We are out of sync with the sender
645 * and forcing a disconnect/reconnect is all I can do.
646 */
647 if (len > NFS_MAXPACKET) {
648 log(LOG_ERR, "%s (%d) from nfs server %s\n",
649 "impossible packet length",
650 len,
651 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
652 error = EFBIG;
653 goto errout;
654 }
655 auio.uio_resid = len;
656 do {
657 rcvflg = MSG_WAITALL;
658 error = (*so->so_receive)(so, (struct mbuf **)0,
659 &auio, mp, (struct mbuf **)0, &rcvflg);
660 } while (error == EWOULDBLOCK || error == EINTR ||
661 error == ERESTART);
662 if (!error && auio.uio_resid > 0) {
663 if (len != auio.uio_resid)
664 log(LOG_INFO,
665 "short receive (%lu/%d) from nfs server %s\n",
666 (u_long)len - auio.uio_resid, len,
667 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
668 error = EPIPE;
669 }
670 } else {
671 /*
672 * NB: Since uio_resid is big, MSG_WAITALL is ignored
673 * and soreceive() will return when it has either a
674 * control msg or a data msg.
675 * We have no use for control msg., but must grab them
676 * and then throw them away so we know what is going
677 * on.
678 */
679 auio.uio_resid = len = 100000000; /* Anything Big */
680 auio.uio_procp = p;
681 do {
682 rcvflg = 0;
683 error = (*so->so_receive)(so, (struct mbuf **)0,
684 &auio, mp, &control, &rcvflg);
685 if (control)
686 m_freem(control);
687 if (error == EWOULDBLOCK && rep) {
688 if (rep->r_flags & R_SOFTTERM)
689 return (EINTR);
690 }
691 } while (error == EWOULDBLOCK ||
692 (!error && *mp == NULL && control));
693 if ((rcvflg & MSG_EOR) == 0)
694 printf("Egad!!\n");
695 if (!error && *mp == NULL)
696 error = EPIPE;
697 len -= auio.uio_resid;
698 }
699 errout:
700 if (error && error != EINTR && error != ERESTART) {
701 m_freem(*mp);
702 *mp = (struct mbuf *)0;
703 if (error != EPIPE)
704 log(LOG_INFO,
705 "receive error %d from nfs server %s\n",
706 error,
707 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
708 error = nfs_sndlock(&rep->r_nmp->nm_iflag, rep);
709 if (!error)
710 error = nfs_reconnect(rep);
711 if (!error)
712 goto tryagain;
713 else
714 nfs_sndunlock(&rep->r_nmp->nm_iflag);
715 }
716 } else {
717 if ((so = rep->r_nmp->nm_so) == NULL)
718 return (EACCES);
719 if (so->so_state & SS_ISCONNECTED)
720 getnam = (struct mbuf **)0;
721 else
722 getnam = aname;
723 auio.uio_resid = len = 1000000;
724 auio.uio_procp = p;
725 do {
726 rcvflg = 0;
727 error = (*so->so_receive)(so, getnam, &auio, mp,
728 (struct mbuf **)0, &rcvflg);
729 if (error == EWOULDBLOCK &&
730 (rep->r_flags & R_SOFTTERM))
731 return (EINTR);
732 } while (error == EWOULDBLOCK);
733 len -= auio.uio_resid;
734 if (!error && *mp == NULL)
735 error = EPIPE;
736 }
737 if (error) {
738 m_freem(*mp);
739 *mp = (struct mbuf *)0;
740 }
741 return (error);
742 }
743
744 /*
745 * Implement receipt of reply on a socket.
746 * We must search through the list of received datagrams matching them
747 * with outstanding requests using the xid, until ours is found.
748 */
749 /* ARGSUSED */
750 int
751 nfs_reply(myrep)
752 struct nfsreq *myrep;
753 {
754 struct nfsreq *rep;
755 struct nfsmount *nmp = myrep->r_nmp;
756 int32_t t1;
757 struct mbuf *mrep, *nam, *md;
758 u_int32_t rxid, *tl;
759 caddr_t dpos, cp2;
760 int error;
761
762 /*
763 * Loop around until we get our own reply
764 */
765 for (;;) {
766 /*
767 * Lock against other receivers so that I don't get stuck in
768 * sbwait() after someone else has received my reply for me.
769 * Also necessary for connection based protocols to avoid
770 * race conditions during a reconnect.
771 */
772 error = nfs_rcvlock(myrep);
773 if (error == EALREADY)
774 return (0);
775 if (error)
776 return (error);
777 /*
778 * Get the next Rpc reply off the socket
779 */
780 nmp->nm_waiters++;
781 error = nfs_receive(myrep, &nam, &mrep);
782 nfs_rcvunlock(nmp);
783 if (error) {
784
785 if (nmp->nm_iflag & NFSMNT_DISMNT) {
786 /*
787 * Oops, we're going away now..
788 */
789 nmp->nm_waiters--;
790 wakeup (&nmp->nm_waiters);
791 return error;
792 }
793 nmp->nm_waiters--;
794 /*
795 * Ignore routing errors on connectionless protocols? ?
796 */
797 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
798 nmp->nm_so->so_error = 0;
799 #ifdef DEBUG
800 printf("nfs_reply: ignoring error %d\n", error);
801 #endif
802 if (myrep->r_flags & R_GETONEREP)
803 return (0);
804 continue;
805 }
806 return (error);
807 }
808 nmp->nm_waiters--;
809 if (nam)
810 m_freem(nam);
811
812 /*
813 * Get the xid and check that it is an rpc reply
814 */
815 md = mrep;
816 dpos = mtod(md, caddr_t);
817 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED);
818 rxid = *tl++;
819 if (*tl != rpc_reply) {
820 #ifndef NFS_V2_ONLY
821 if (nmp->nm_flag & NFSMNT_NQNFS) {
822 if (nqnfs_callback(nmp, mrep, md, dpos))
823 nfsstats.rpcinvalid++;
824 } else
825 #endif
826 {
827 nfsstats.rpcinvalid++;
828 m_freem(mrep);
829 }
830 nfsmout:
831 if (myrep->r_flags & R_GETONEREP)
832 return (0);
833 continue;
834 }
835
836 /*
837 * Loop through the request list to match up the reply
838 * Iff no match, just drop the datagram
839 */
840 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
841 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
842 /* Found it.. */
843 rep->r_mrep = mrep;
844 rep->r_md = md;
845 rep->r_dpos = dpos;
846 if (nfsrtton) {
847 struct rttl *rt;
848
849 rt = &nfsrtt.rttl[nfsrtt.pos];
850 rt->proc = rep->r_procnum;
851 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
852 rt->sent = nmp->nm_sent;
853 rt->cwnd = nmp->nm_cwnd;
854 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
855 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
856 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
857 rt->tstamp = time;
858 if (rep->r_flags & R_TIMING)
859 rt->rtt = rep->r_rtt;
860 else
861 rt->rtt = 1000000;
862 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
863 }
864 /*
865 * Update congestion window.
866 * Do the additive increase of
867 * one rpc/rtt.
868 */
869 if (nmp->nm_cwnd <= nmp->nm_sent) {
870 nmp->nm_cwnd +=
871 (NFS_CWNDSCALE * NFS_CWNDSCALE +
872 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
873 if (nmp->nm_cwnd > NFS_MAXCWND)
874 nmp->nm_cwnd = NFS_MAXCWND;
875 }
876 rep->r_flags &= ~R_SENT;
877 nmp->nm_sent -= NFS_CWNDSCALE;
878 /*
879 * Update rtt using a gain of 0.125 on the mean
880 * and a gain of 0.25 on the deviation.
881 */
882 if (rep->r_flags & R_TIMING) {
883 /*
884 * Since the timer resolution of
885 * NFS_HZ is so course, it can often
886 * result in r_rtt == 0. Since
887 * r_rtt == N means that the actual
888 * rtt is between N+dt and N+2-dt ticks,
889 * add 1.
890 */
891 t1 = rep->r_rtt + 1;
892 t1 -= (NFS_SRTT(rep) >> 3);
893 NFS_SRTT(rep) += t1;
894 if (t1 < 0)
895 t1 = -t1;
896 t1 -= (NFS_SDRTT(rep) >> 2);
897 NFS_SDRTT(rep) += t1;
898 }
899 nmp->nm_timeouts = 0;
900 break;
901 }
902 }
903 /*
904 * If not matched to a request, drop it.
905 * If it's mine, get out.
906 */
907 if (rep == 0) {
908 nfsstats.rpcunexpected++;
909 m_freem(mrep);
910 } else if (rep == myrep) {
911 if (rep->r_mrep == NULL)
912 panic("nfsreply nil");
913 return (0);
914 }
915 if (myrep->r_flags & R_GETONEREP)
916 return (0);
917 }
918 }
919
920 /*
921 * nfs_request - goes something like this
922 * - fill in request struct
923 * - links it into list
924 * - calls nfs_send() for first transmit
925 * - calls nfs_receive() to get reply
926 * - break down rpc header and return with nfs reply pointed to
927 * by mrep or error
928 * nb: always frees up mreq mbuf list
929 */
930 int
931 nfs_request(np, mrest, procnum, procp, cred, mrp, mdp, dposp, rexmitp)
932 struct nfsnode *np;
933 struct mbuf *mrest;
934 int procnum;
935 struct proc *procp;
936 struct ucred *cred;
937 struct mbuf **mrp;
938 struct mbuf **mdp;
939 caddr_t *dposp;
940 int *rexmitp;
941 {
942 struct mbuf *m, *mrep;
943 struct nfsreq *rep;
944 u_int32_t *tl;
945 int i;
946 struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount);
947 struct mbuf *md, *mheadend;
948 char nickv[RPCX_NICKVERF];
949 time_t reqtime, waituntil;
950 caddr_t dpos, cp2;
951 int t1, s, error = 0, mrest_len, auth_len, auth_type;
952 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0;
953 int verf_len, verf_type;
954 u_int32_t xid;
955 char *auth_str, *verf_str;
956 NFSKERBKEY_T key; /* save session key */
957 struct ucred acred;
958 #ifndef NFS_V2_ONLY
959 int nqlflag, cachable;
960 u_quad_t frev;
961 #endif
962 struct mbuf *mrest_backup = NULL;
963 struct ucred *origcred = NULL; /* XXX: gcc */
964 boolean_t retry_cred = TRUE;
965 boolean_t use_opencred = (np->n_flag & NUSEOPENCRED) != 0;
966
967 if (rexmitp != NULL)
968 *rexmitp = 0;
969
970 tryagain_cred:
971 KASSERT(cred != NULL);
972 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
973 rep->r_nmp = nmp;
974 rep->r_procp = procp;
975 rep->r_procnum = procnum;
976 i = 0;
977 m = mrest;
978 while (m) {
979 i += m->m_len;
980 m = m->m_next;
981 }
982 mrest_len = i;
983
984 /*
985 * Get the RPC header with authorization.
986 */
987 kerbauth:
988 verf_str = auth_str = (char *)0;
989 if (nmp->nm_flag & NFSMNT_KERB) {
990 verf_str = nickv;
991 verf_len = sizeof (nickv);
992 auth_type = RPCAUTH_KERB4;
993 memset((caddr_t)key, 0, sizeof (key));
994 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str,
995 &auth_len, verf_str, verf_len)) {
996 error = nfs_getauth(nmp, rep, cred, &auth_str,
997 &auth_len, verf_str, &verf_len, key);
998 if (error) {
999 free((caddr_t)rep, M_NFSREQ);
1000 m_freem(mrest);
1001 return (error);
1002 }
1003 }
1004 retry_cred = FALSE;
1005 } else {
1006 /* AUTH_UNIX */
1007 uid_t uid;
1008 gid_t gid;
1009
1010 /*
1011 * on the most unix filesystems, permission checks are
1012 * done when the file is open(2)'ed.
1013 * ie. once a file is successfully open'ed,
1014 * following i/o operations never fail with EACCES.
1015 * we try to follow the semantics as far as possible.
1016 *
1017 * note that we expect that the nfs server always grant
1018 * accesses by the file's owner.
1019 */
1020 origcred = cred;
1021 switch (procnum) {
1022 case NFSPROC_READ:
1023 case NFSPROC_WRITE:
1024 case NFSPROC_COMMIT:
1025 uid = np->n_vattr->va_uid;
1026 gid = np->n_vattr->va_gid;
1027 if (cred->cr_uid == uid && cred->cr_gid == gid) {
1028 retry_cred = FALSE;
1029 break;
1030 }
1031 if (use_opencred)
1032 break;
1033 acred.cr_uid = uid;
1034 acred.cr_gid = gid;
1035 acred.cr_ngroups = 0;
1036 acred.cr_ref = 2; /* Just to be safe.. */
1037 cred = &acred;
1038 break;
1039 default:
1040 retry_cred = FALSE;
1041 break;
1042 }
1043 /*
1044 * backup mbuf chain if we can need it later to retry.
1045 *
1046 * XXX maybe we can keep a direct reference to
1047 * mrest without doing m_copym, but it's ...ugly.
1048 */
1049 if (retry_cred)
1050 mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT);
1051 auth_type = RPCAUTH_UNIX;
1052 auth_len = (((cred->cr_ngroups > nmp->nm_numgrps) ?
1053 nmp->nm_numgrps : cred->cr_ngroups) << 2) +
1054 5 * NFSX_UNSIGNED;
1055 }
1056 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
1057 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
1058 if (auth_str)
1059 free(auth_str, M_TEMP);
1060
1061 /*
1062 * For stream protocols, insert a Sun RPC Record Mark.
1063 */
1064 if (nmp->nm_sotype == SOCK_STREAM) {
1065 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
1066 *mtod(m, u_int32_t *) = htonl(0x80000000 |
1067 (m->m_pkthdr.len - NFSX_UNSIGNED));
1068 }
1069 rep->r_mreq = m;
1070 rep->r_xid = xid;
1071 tryagain:
1072 if (nmp->nm_flag & NFSMNT_SOFT)
1073 rep->r_retry = nmp->nm_retry;
1074 else
1075 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1076 rep->r_rtt = rep->r_rexmit = 0;
1077 if (proct[procnum] > 0)
1078 rep->r_flags = R_TIMING;
1079 else
1080 rep->r_flags = 0;
1081 rep->r_mrep = NULL;
1082
1083 /*
1084 * Do the client side RPC.
1085 */
1086 nfsstats.rpcrequests++;
1087 /*
1088 * Chain request into list of outstanding requests. Be sure
1089 * to put it LAST so timer finds oldest requests first.
1090 */
1091 s = splsoftnet();
1092 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
1093
1094 /* Get send time for nqnfs */
1095 reqtime = time.tv_sec;
1096
1097 /*
1098 * If backing off another request or avoiding congestion, don't
1099 * send this one now but let timer do it. If not timing a request,
1100 * do it now.
1101 */
1102 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
1103 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1104 nmp->nm_sent < nmp->nm_cwnd)) {
1105 splx(s);
1106 if (nmp->nm_soflags & PR_CONNREQUIRED)
1107 error = nfs_sndlock(&nmp->nm_iflag, rep);
1108 if (!error) {
1109 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
1110 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
1111 if (nmp->nm_soflags & PR_CONNREQUIRED)
1112 nfs_sndunlock(&nmp->nm_iflag);
1113 }
1114 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
1115 nmp->nm_sent += NFS_CWNDSCALE;
1116 rep->r_flags |= R_SENT;
1117 }
1118 } else {
1119 splx(s);
1120 rep->r_rtt = -1;
1121 }
1122
1123 /*
1124 * Wait for the reply from our send or the timer's.
1125 */
1126 if (!error || error == EPIPE)
1127 error = nfs_reply(rep);
1128
1129 /*
1130 * RPC done, unlink the request.
1131 */
1132 s = splsoftnet();
1133 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
1134 splx(s);
1135
1136 /*
1137 * Decrement the outstanding request count.
1138 */
1139 if (rep->r_flags & R_SENT) {
1140 rep->r_flags &= ~R_SENT; /* paranoia */
1141 nmp->nm_sent -= NFS_CWNDSCALE;
1142 }
1143
1144 if (rexmitp != NULL) {
1145 int rexmit;
1146
1147 if (nmp->nm_sotype != SOCK_DGRAM)
1148 rexmit = (rep->r_flags & R_REXMITTED) != 0;
1149 else
1150 rexmit = rep->r_rexmit;
1151 *rexmitp = rexmit;
1152 }
1153
1154 /*
1155 * If there was a successful reply and a tprintf msg.
1156 * tprintf a response.
1157 */
1158 if (!error && (rep->r_flags & R_TPRINTFMSG))
1159 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
1160 "is alive again");
1161 mrep = rep->r_mrep;
1162 md = rep->r_md;
1163 dpos = rep->r_dpos;
1164 if (error)
1165 goto nfsmout;
1166
1167 /*
1168 * break down the rpc header and check if ok
1169 */
1170 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1171 if (*tl++ == rpc_msgdenied) {
1172 if (*tl == rpc_mismatch)
1173 error = EOPNOTSUPP;
1174 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
1175 if (!failed_auth) {
1176 failed_auth++;
1177 mheadend->m_next = (struct mbuf *)0;
1178 m_freem(mrep);
1179 m_freem(rep->r_mreq);
1180 goto kerbauth;
1181 } else
1182 error = EAUTH;
1183 } else
1184 error = EACCES;
1185 m_freem(mrep);
1186 goto nfsmout;
1187 }
1188
1189 /*
1190 * Grab any Kerberos verifier, otherwise just throw it away.
1191 */
1192 verf_type = fxdr_unsigned(int, *tl++);
1193 i = fxdr_unsigned(int32_t, *tl);
1194 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1195 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
1196 if (error)
1197 goto nfsmout;
1198 } else if (i > 0)
1199 nfsm_adv(nfsm_rndup(i));
1200 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1201 /* 0 == ok */
1202 if (*tl == 0) {
1203 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1204 if (*tl != 0) {
1205 error = fxdr_unsigned(int, *tl);
1206 if (error == NFSERR_ACCES && retry_cred) {
1207 m_freem(mrep);
1208 m_freem(rep->r_mreq);
1209 FREE(rep, M_NFSREQ);
1210 use_opencred = !use_opencred;
1211 if (mrest_backup == NULL)
1212 return ENOMEM; /* m_copym failure */
1213 mrest = mrest_backup;
1214 mrest_backup = NULL;
1215 cred = origcred;
1216 error = 0;
1217 retry_cred = FALSE;
1218 goto tryagain_cred;
1219 }
1220 if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1221 error == NFSERR_TRYLATER) {
1222 m_freem(mrep);
1223 error = 0;
1224 waituntil = time.tv_sec + trylater_delay;
1225 while (time.tv_sec < waituntil)
1226 (void) tsleep((caddr_t)&lbolt,
1227 PSOCK, "nqnfstry", 0);
1228 trylater_delay *= NFS_TRYLATERDELMUL;
1229 if (trylater_delay > NFS_TRYLATERDELMAX)
1230 trylater_delay = NFS_TRYLATERDELMAX;
1231 /*
1232 * RFC1813:
1233 * The client should wait and then try
1234 * the request with a new RPC transaction ID.
1235 */
1236 nfs_renewxid(rep);
1237 goto tryagain;
1238 }
1239
1240 /*
1241 * If the File Handle was stale, invalidate the
1242 * lookup cache, just in case.
1243 */
1244 if (error == ESTALE)
1245 cache_purge(NFSTOV(np));
1246 if (nmp->nm_flag & NFSMNT_NFSV3) {
1247 *mrp = mrep;
1248 *mdp = md;
1249 *dposp = dpos;
1250 error |= NFSERR_RETERR;
1251 } else
1252 m_freem(mrep);
1253 goto nfsmout;
1254 }
1255
1256 /*
1257 * note which credential worked to minimize number of retries.
1258 */
1259 if (use_opencred)
1260 np->n_flag |= NUSEOPENCRED;
1261 else
1262 np->n_flag &= ~NUSEOPENCRED;
1263
1264 #ifndef NFS_V2_ONLY
1265 /*
1266 * For nqnfs, get any lease in reply
1267 */
1268 if (nmp->nm_flag & NFSMNT_NQNFS) {
1269 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1270 if (*tl) {
1271 nqlflag = fxdr_unsigned(int, *tl);
1272 nfsm_dissect(tl, u_int32_t *, 4*NFSX_UNSIGNED);
1273 cachable = fxdr_unsigned(int, *tl++);
1274 reqtime += fxdr_unsigned(int, *tl++);
1275 if (reqtime > time.tv_sec) {
1276 frev = fxdr_hyper(tl);
1277 nqnfs_clientlease(nmp, np, nqlflag,
1278 cachable, reqtime, frev);
1279 }
1280 }
1281 }
1282 #endif
1283 *mrp = mrep;
1284 *mdp = md;
1285 *dposp = dpos;
1286
1287 KASSERT(error == 0);
1288 goto nfsmout;
1289 }
1290 m_freem(mrep);
1291 error = EPROTONOSUPPORT;
1292 nfsmout:
1293 m_freem(rep->r_mreq);
1294 free((caddr_t)rep, M_NFSREQ);
1295 m_freem(mrest_backup);
1296 return (error);
1297 }
1298 #endif /* NFS */
1299
1300 /*
1301 * Generate the rpc reply header
1302 * siz arg. is used to decide if adding a cluster is worthwhile
1303 */
1304 int
1305 nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
1306 int siz;
1307 struct nfsrv_descript *nd;
1308 struct nfssvc_sock *slp;
1309 int err;
1310 int cache;
1311 u_quad_t *frev;
1312 struct mbuf **mrq;
1313 struct mbuf **mbp;
1314 caddr_t *bposp;
1315 {
1316 u_int32_t *tl;
1317 struct mbuf *mreq;
1318 caddr_t bpos;
1319 struct mbuf *mb;
1320
1321 mreq = m_gethdr(M_WAIT, MT_DATA);
1322 MCLAIM(mreq, &nfs_mowner);
1323 mb = mreq;
1324 /*
1325 * If this is a big reply, use a cluster else
1326 * try and leave leading space for the lower level headers.
1327 */
1328 siz += RPC_REPLYSIZ;
1329 if (siz >= max_datalen) {
1330 m_clget(mreq, M_WAIT);
1331 } else
1332 mreq->m_data += max_hdr;
1333 tl = mtod(mreq, u_int32_t *);
1334 mreq->m_len = 6 * NFSX_UNSIGNED;
1335 bpos = ((caddr_t)tl) + mreq->m_len;
1336 *tl++ = txdr_unsigned(nd->nd_retxid);
1337 *tl++ = rpc_reply;
1338 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1339 *tl++ = rpc_msgdenied;
1340 if (err & NFSERR_AUTHERR) {
1341 *tl++ = rpc_autherr;
1342 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
1343 mreq->m_len -= NFSX_UNSIGNED;
1344 bpos -= NFSX_UNSIGNED;
1345 } else {
1346 *tl++ = rpc_mismatch;
1347 *tl++ = txdr_unsigned(RPC_VER2);
1348 *tl = txdr_unsigned(RPC_VER2);
1349 }
1350 } else {
1351 *tl++ = rpc_msgaccepted;
1352
1353 /*
1354 * For Kerberos authentication, we must send the nickname
1355 * verifier back, otherwise just RPCAUTH_NULL.
1356 */
1357 if (nd->nd_flag & ND_KERBFULL) {
1358 struct nfsuid *nuidp;
1359 struct timeval ktvin, ktvout;
1360
1361 LIST_FOREACH(nuidp, NUIDHASH(slp, nd->nd_cr.cr_uid),
1362 nu_hash) {
1363 if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
1364 (!nd->nd_nam2 || netaddr_match(
1365 NU_NETFAM(nuidp), &nuidp->nu_haddr,
1366 nd->nd_nam2)))
1367 break;
1368 }
1369 if (nuidp) {
1370 ktvin.tv_sec =
1371 txdr_unsigned(nuidp->nu_timestamp.tv_sec
1372 - 1);
1373 ktvin.tv_usec =
1374 txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1375
1376 /*
1377 * Encrypt the timestamp in ecb mode using the
1378 * session key.
1379 */
1380 #ifdef NFSKERB
1381 XXX
1382 #endif
1383
1384 *tl++ = rpc_auth_kerb;
1385 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1386 *tl = ktvout.tv_sec;
1387 nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1388 *tl++ = ktvout.tv_usec;
1389 *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
1390 } else {
1391 *tl++ = 0;
1392 *tl++ = 0;
1393 }
1394 } else {
1395 *tl++ = 0;
1396 *tl++ = 0;
1397 }
1398 switch (err) {
1399 case EPROGUNAVAIL:
1400 *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1401 break;
1402 case EPROGMISMATCH:
1403 *tl = txdr_unsigned(RPC_PROGMISMATCH);
1404 nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1405 if (nd->nd_flag & ND_NQNFS) {
1406 *tl++ = txdr_unsigned(3);
1407 *tl = txdr_unsigned(3);
1408 } else {
1409 *tl++ = txdr_unsigned(2);
1410 *tl = txdr_unsigned(3);
1411 }
1412 break;
1413 case EPROCUNAVAIL:
1414 *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1415 break;
1416 case EBADRPC:
1417 *tl = txdr_unsigned(RPC_GARBAGE);
1418 break;
1419 default:
1420 *tl = 0;
1421 if (err != NFSERR_RETVOID) {
1422 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1423 if (err)
1424 *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1425 else
1426 *tl = 0;
1427 }
1428 break;
1429 };
1430 }
1431
1432 /*
1433 * For nqnfs, piggyback lease as requested.
1434 */
1435 if ((nd->nd_flag & ND_NQNFS) && err == 0) {
1436 if (nd->nd_flag & ND_LEASE) {
1437 nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1438 *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
1439 *tl++ = txdr_unsigned(cache);
1440 *tl++ = txdr_unsigned(nd->nd_duration);
1441 txdr_hyper(*frev, tl);
1442 } else {
1443 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
1444 *tl = 0;
1445 }
1446 }
1447 if (mrq != NULL)
1448 *mrq = mreq;
1449 *mbp = mb;
1450 *bposp = bpos;
1451 if (err != 0 && err != NFSERR_RETVOID)
1452 nfsstats.srvrpc_errs++;
1453 return (0);
1454 }
1455
1456 /*
1457 * Nfs timer routine
1458 * Scan the nfsreq list and retranmit any requests that have timed out
1459 * To avoid retransmission attempts on STREAM sockets (in the future) make
1460 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1461 */
1462 void
1463 nfs_timer(arg)
1464 void *arg; /* never used */
1465 {
1466 struct nfsreq *rep;
1467 struct mbuf *m;
1468 struct socket *so;
1469 struct nfsmount *nmp;
1470 int timeo;
1471 int s, error;
1472 #ifdef NFSSERVER
1473 struct nfssvc_sock *slp;
1474 static long lasttime = 0;
1475 u_quad_t cur_usec;
1476 #endif
1477
1478 s = splsoftnet();
1479 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
1480 nmp = rep->r_nmp;
1481 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1482 continue;
1483 if (nfs_sigintr(nmp, rep, rep->r_procp)) {
1484 rep->r_flags |= R_SOFTTERM;
1485 continue;
1486 }
1487 if (rep->r_rtt >= 0) {
1488 rep->r_rtt++;
1489 if (nmp->nm_flag & NFSMNT_DUMBTIMR)
1490 timeo = nmp->nm_timeo;
1491 else
1492 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
1493 if (nmp->nm_timeouts > 0)
1494 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
1495 if (rep->r_rtt <= timeo)
1496 continue;
1497 if (nmp->nm_timeouts <
1498 (sizeof(nfs_backoff) / sizeof(nfs_backoff[0])))
1499 nmp->nm_timeouts++;
1500 }
1501 /*
1502 * Check for server not responding
1503 */
1504 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
1505 rep->r_rexmit > nmp->nm_deadthresh) {
1506 nfs_msg(rep->r_procp,
1507 nmp->nm_mountp->mnt_stat.f_mntfromname,
1508 "not responding");
1509 rep->r_flags |= R_TPRINTFMSG;
1510 }
1511 if (rep->r_rexmit >= rep->r_retry) { /* too many */
1512 nfsstats.rpctimeouts++;
1513 rep->r_flags |= R_SOFTTERM;
1514 continue;
1515 }
1516 if (nmp->nm_sotype != SOCK_DGRAM) {
1517 if (++rep->r_rexmit > NFS_MAXREXMIT)
1518 rep->r_rexmit = NFS_MAXREXMIT;
1519 continue;
1520 }
1521 if ((so = nmp->nm_so) == NULL)
1522 continue;
1523
1524 /*
1525 * If there is enough space and the window allows..
1526 * Resend it
1527 * Set r_rtt to -1 in case we fail to send it now.
1528 */
1529 rep->r_rtt = -1;
1530 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1531 ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1532 (rep->r_flags & R_SENT) ||
1533 nmp->nm_sent < nmp->nm_cwnd) &&
1534 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
1535 if (so->so_state & SS_ISCONNECTED)
1536 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1537 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
1538 else
1539 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1540 nmp->nm_nam, (struct mbuf *)0, (struct proc *)0);
1541 if (error) {
1542 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
1543 #ifdef DEBUG
1544 printf("nfs_timer: ignoring error %d\n",
1545 error);
1546 #endif
1547 so->so_error = 0;
1548 }
1549 } else {
1550 /*
1551 * Iff first send, start timing
1552 * else turn timing off, backoff timer
1553 * and divide congestion window by 2.
1554 */
1555 if (rep->r_flags & R_SENT) {
1556 rep->r_flags &= ~R_TIMING;
1557 if (++rep->r_rexmit > NFS_MAXREXMIT)
1558 rep->r_rexmit = NFS_MAXREXMIT;
1559 nmp->nm_cwnd >>= 1;
1560 if (nmp->nm_cwnd < NFS_CWNDSCALE)
1561 nmp->nm_cwnd = NFS_CWNDSCALE;
1562 nfsstats.rpcretries++;
1563 } else {
1564 rep->r_flags |= R_SENT;
1565 nmp->nm_sent += NFS_CWNDSCALE;
1566 }
1567 rep->r_rtt = 0;
1568 }
1569 }
1570 }
1571
1572 #ifdef NFSSERVER
1573 /*
1574 * Call the nqnfs server timer once a second to handle leases.
1575 */
1576 if (lasttime != time.tv_sec) {
1577 lasttime = time.tv_sec;
1578 nqnfs_serverd();
1579 }
1580
1581 /*
1582 * Scan the write gathering queues for writes that need to be
1583 * completed now.
1584 */
1585 cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec;
1586 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
1587 if (LIST_FIRST(&slp->ns_tq) &&
1588 LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec)
1589 nfsrv_wakenfsd(slp);
1590 }
1591 #endif /* NFSSERVER */
1592 splx(s);
1593 callout_schedule(&nfs_timer_ch, nfs_ticks);
1594 }
1595
1596 /*ARGSUSED*/
1597 void
1598 nfs_exit(p, v)
1599 struct proc *p;
1600 void *v;
1601 {
1602 struct nfsreq *rp;
1603 int s = splsoftnet();
1604
1605 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
1606 if (rp->r_procp == p)
1607 TAILQ_REMOVE(&nfs_reqq, rp, r_chain);
1608 }
1609 splx(s);
1610 }
1611
1612 /*
1613 * Test for a termination condition pending on the process.
1614 * This is used for NFSMNT_INT mounts.
1615 */
1616 int
1617 nfs_sigintr(nmp, rep, p)
1618 struct nfsmount *nmp;
1619 struct nfsreq *rep;
1620 struct proc *p;
1621 {
1622 sigset_t ss;
1623
1624 if (rep && (rep->r_flags & R_SOFTTERM))
1625 return (EINTR);
1626 if (!(nmp->nm_flag & NFSMNT_INT))
1627 return (0);
1628 if (p) {
1629 sigpending1(p, &ss);
1630 #if 0
1631 sigminusset(&p->p_sigctx.ps_sigignore, &ss);
1632 #endif
1633 if (sigismember(&ss, SIGINT) || sigismember(&ss, SIGTERM) ||
1634 sigismember(&ss, SIGKILL) || sigismember(&ss, SIGHUP) ||
1635 sigismember(&ss, SIGQUIT))
1636 return (EINTR);
1637 }
1638 return (0);
1639 }
1640
1641 /*
1642 * Lock a socket against others.
1643 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1644 * and also to avoid race conditions between the processes with nfs requests
1645 * in progress when a reconnect is necessary.
1646 */
1647 int
1648 nfs_sndlock(flagp, rep)
1649 int *flagp;
1650 struct nfsreq *rep;
1651 {
1652 struct proc *p;
1653 int slpflag = 0, slptimeo = 0;
1654
1655 if (rep) {
1656 p = rep->r_procp;
1657 if (rep->r_nmp->nm_flag & NFSMNT_INT)
1658 slpflag = PCATCH;
1659 } else
1660 p = (struct proc *)0;
1661 while (*flagp & NFSMNT_SNDLOCK) {
1662 if (rep && nfs_sigintr(rep->r_nmp, rep, p))
1663 return (EINTR);
1664 *flagp |= NFSMNT_WANTSND;
1665 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
1666 slptimeo);
1667 if (slpflag == PCATCH) {
1668 slpflag = 0;
1669 slptimeo = 2 * hz;
1670 }
1671 }
1672 *flagp |= NFSMNT_SNDLOCK;
1673 return (0);
1674 }
1675
1676 /*
1677 * Unlock the stream socket for others.
1678 */
1679 void
1680 nfs_sndunlock(flagp)
1681 int *flagp;
1682 {
1683
1684 if ((*flagp & NFSMNT_SNDLOCK) == 0)
1685 panic("nfs sndunlock");
1686 *flagp &= ~NFSMNT_SNDLOCK;
1687 if (*flagp & NFSMNT_WANTSND) {
1688 *flagp &= ~NFSMNT_WANTSND;
1689 wakeup((caddr_t)flagp);
1690 }
1691 }
1692
1693 int
1694 nfs_rcvlock(rep)
1695 struct nfsreq *rep;
1696 {
1697 struct nfsmount *nmp = rep->r_nmp;
1698 int *flagp = &nmp->nm_iflag;
1699 int slpflag, slptimeo = 0;
1700 int error = 0;
1701
1702 if (*flagp & NFSMNT_DISMNT)
1703 return EIO;
1704
1705 if (*flagp & NFSMNT_INT)
1706 slpflag = PCATCH;
1707 else
1708 slpflag = 0;
1709 simple_lock(&nmp->nm_slock);
1710 while (*flagp & NFSMNT_RCVLOCK) {
1711 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) {
1712 error = EINTR;
1713 goto quit;
1714 }
1715 *flagp |= NFSMNT_WANTRCV;
1716 nmp->nm_waiters++;
1717 (void) ltsleep(flagp, slpflag | (PZERO - 1), "nfsrcvlk",
1718 slptimeo, &nmp->nm_slock);
1719 nmp->nm_waiters--;
1720 if (*flagp & NFSMNT_DISMNT) {
1721 wakeup(&nmp->nm_waiters);
1722 error = EIO;
1723 goto quit;
1724 }
1725 /* If our reply was received while we were sleeping,
1726 * then just return without taking the lock to avoid a
1727 * situation where a single iod could 'capture' the
1728 * receive lock.
1729 */
1730 if (rep->r_mrep != NULL) {
1731 error = EALREADY;
1732 goto quit;
1733 }
1734 if (slpflag == PCATCH) {
1735 slpflag = 0;
1736 slptimeo = 2 * hz;
1737 }
1738 }
1739 *flagp |= NFSMNT_RCVLOCK;
1740 quit:
1741 simple_unlock(&nmp->nm_slock);
1742 return error;
1743 }
1744
1745 /*
1746 * Unlock the stream socket for others.
1747 */
1748 void
1749 nfs_rcvunlock(nmp)
1750 struct nfsmount *nmp;
1751 {
1752 int *flagp = &nmp->nm_iflag;
1753
1754 simple_lock(&nmp->nm_slock);
1755 if ((*flagp & NFSMNT_RCVLOCK) == 0)
1756 panic("nfs rcvunlock");
1757 *flagp &= ~NFSMNT_RCVLOCK;
1758 if (*flagp & NFSMNT_WANTRCV) {
1759 *flagp &= ~NFSMNT_WANTRCV;
1760 wakeup((caddr_t)flagp);
1761 }
1762 simple_unlock(&nmp->nm_slock);
1763 }
1764
1765 /*
1766 * Parse an RPC request
1767 * - verify it
1768 * - fill in the cred struct.
1769 */
1770 int
1771 nfs_getreq(nd, nfsd, has_header)
1772 struct nfsrv_descript *nd;
1773 struct nfsd *nfsd;
1774 int has_header;
1775 {
1776 int len, i;
1777 u_int32_t *tl;
1778 int32_t t1;
1779 struct uio uio;
1780 struct iovec iov;
1781 caddr_t dpos, cp2, cp;
1782 u_int32_t nfsvers, auth_type;
1783 uid_t nickuid;
1784 int error = 0, nqnfs = 0, ticklen;
1785 struct mbuf *mrep, *md;
1786 struct nfsuid *nuidp;
1787 struct timeval tvin, tvout;
1788
1789 mrep = nd->nd_mrep;
1790 md = nd->nd_md;
1791 dpos = nd->nd_dpos;
1792 if (has_header) {
1793 nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED);
1794 nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
1795 if (*tl++ != rpc_call) {
1796 m_freem(mrep);
1797 return (EBADRPC);
1798 }
1799 } else
1800 nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
1801 nd->nd_repstat = 0;
1802 nd->nd_flag = 0;
1803 if (*tl++ != rpc_vers) {
1804 nd->nd_repstat = ERPCMISMATCH;
1805 nd->nd_procnum = NFSPROC_NOOP;
1806 return (0);
1807 }
1808 if (*tl != nfs_prog) {
1809 if (*tl == nqnfs_prog)
1810 nqnfs++;
1811 else {
1812 nd->nd_repstat = EPROGUNAVAIL;
1813 nd->nd_procnum = NFSPROC_NOOP;
1814 return (0);
1815 }
1816 }
1817 tl++;
1818 nfsvers = fxdr_unsigned(u_int32_t, *tl++);
1819 if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) ||
1820 (nfsvers != NQNFS_VER3 && nqnfs)) {
1821 nd->nd_repstat = EPROGMISMATCH;
1822 nd->nd_procnum = NFSPROC_NOOP;
1823 return (0);
1824 }
1825 if (nqnfs)
1826 nd->nd_flag = (ND_NFSV3 | ND_NQNFS);
1827 else if (nfsvers == NFS_VER3)
1828 nd->nd_flag = ND_NFSV3;
1829 nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
1830 if (nd->nd_procnum == NFSPROC_NULL)
1831 return (0);
1832 if (nd->nd_procnum >= NFS_NPROCS ||
1833 (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
1834 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
1835 nd->nd_repstat = EPROCUNAVAIL;
1836 nd->nd_procnum = NFSPROC_NOOP;
1837 return (0);
1838 }
1839 if ((nd->nd_flag & ND_NFSV3) == 0)
1840 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
1841 auth_type = *tl++;
1842 len = fxdr_unsigned(int, *tl++);
1843 if (len < 0 || len > RPCAUTH_MAXSIZ) {
1844 m_freem(mrep);
1845 return (EBADRPC);
1846 }
1847
1848 nd->nd_flag &= ~ND_KERBAUTH;
1849 /*
1850 * Handle auth_unix or auth_kerb.
1851 */
1852 if (auth_type == rpc_auth_unix) {
1853 len = fxdr_unsigned(int, *++tl);
1854 if (len < 0 || len > NFS_MAXNAMLEN) {
1855 m_freem(mrep);
1856 return (EBADRPC);
1857 }
1858 nfsm_adv(nfsm_rndup(len));
1859 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1860 memset((caddr_t)&nd->nd_cr, 0, sizeof (struct ucred));
1861 nd->nd_cr.cr_ref = 1;
1862 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
1863 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
1864 len = fxdr_unsigned(int, *tl);
1865 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
1866 m_freem(mrep);
1867 return (EBADRPC);
1868 }
1869 nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED);
1870 for (i = 0; i < len; i++)
1871 if (i < NGROUPS)
1872 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
1873 else
1874 tl++;
1875 nd->nd_cr.cr_ngroups = (len > NGROUPS) ? NGROUPS : len;
1876 if (nd->nd_cr.cr_ngroups > 1)
1877 nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
1878 len = fxdr_unsigned(int, *++tl);
1879 if (len < 0 || len > RPCAUTH_MAXSIZ) {
1880 m_freem(mrep);
1881 return (EBADRPC);
1882 }
1883 if (len > 0)
1884 nfsm_adv(nfsm_rndup(len));
1885 } else if (auth_type == rpc_auth_kerb) {
1886 switch (fxdr_unsigned(int, *tl++)) {
1887 case RPCAKN_FULLNAME:
1888 ticklen = fxdr_unsigned(int, *tl);
1889 *((u_int32_t *)nfsd->nfsd_authstr) = *tl;
1890 uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
1891 nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
1892 if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
1893 m_freem(mrep);
1894 return (EBADRPC);
1895 }
1896 uio.uio_offset = 0;
1897 uio.uio_iov = &iov;
1898 uio.uio_iovcnt = 1;
1899 uio.uio_segflg = UIO_SYSSPACE;
1900 iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
1901 iov.iov_len = RPCAUTH_MAXSIZ - 4;
1902 nfsm_mtouio(&uio, uio.uio_resid);
1903 nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1904 if (*tl++ != rpc_auth_kerb ||
1905 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
1906 printf("Bad kerb verifier\n");
1907 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
1908 nd->nd_procnum = NFSPROC_NOOP;
1909 return (0);
1910 }
1911 nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
1912 tl = (u_int32_t *)cp;
1913 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
1914 printf("Not fullname kerb verifier\n");
1915 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
1916 nd->nd_procnum = NFSPROC_NOOP;
1917 return (0);
1918 }
1919 cp += NFSX_UNSIGNED;
1920 memcpy(nfsd->nfsd_verfstr, cp, 3 * NFSX_UNSIGNED);
1921 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
1922 nd->nd_flag |= ND_KERBFULL;
1923 nfsd->nfsd_flag |= NFSD_NEEDAUTH;
1924 break;
1925 case RPCAKN_NICKNAME:
1926 if (len != 2 * NFSX_UNSIGNED) {
1927 printf("Kerb nickname short\n");
1928 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
1929 nd->nd_procnum = NFSPROC_NOOP;
1930 return (0);
1931 }
1932 nickuid = fxdr_unsigned(uid_t, *tl);
1933 nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1934 if (*tl++ != rpc_auth_kerb ||
1935 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
1936 printf("Kerb nick verifier bad\n");
1937 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
1938 nd->nd_procnum = NFSPROC_NOOP;
1939 return (0);
1940 }
1941 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1942 tvin.tv_sec = *tl++;
1943 tvin.tv_usec = *tl;
1944
1945 LIST_FOREACH(nuidp, NUIDHASH(nfsd->nfsd_slp, nickuid),
1946 nu_hash) {
1947 if (nuidp->nu_cr.cr_uid == nickuid &&
1948 (!nd->nd_nam2 ||
1949 netaddr_match(NU_NETFAM(nuidp),
1950 &nuidp->nu_haddr, nd->nd_nam2)))
1951 break;
1952 }
1953 if (!nuidp) {
1954 nd->nd_repstat =
1955 (NFSERR_AUTHERR|AUTH_REJECTCRED);
1956 nd->nd_procnum = NFSPROC_NOOP;
1957 return (0);
1958 }
1959
1960 /*
1961 * Now, decrypt the timestamp using the session key
1962 * and validate it.
1963 */
1964 #ifdef NFSKERB
1965 XXX
1966 #endif
1967
1968 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
1969 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
1970 if (nuidp->nu_expire < time.tv_sec ||
1971 nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
1972 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
1973 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
1974 nuidp->nu_expire = 0;
1975 nd->nd_repstat =
1976 (NFSERR_AUTHERR|AUTH_REJECTVERF);
1977 nd->nd_procnum = NFSPROC_NOOP;
1978 return (0);
1979 }
1980 nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
1981 nd->nd_flag |= ND_KERBNICK;
1982 };
1983 } else {
1984 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
1985 nd->nd_procnum = NFSPROC_NOOP;
1986 return (0);
1987 }
1988
1989 /*
1990 * For nqnfs, get piggybacked lease request.
1991 */
1992 if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
1993 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1994 nd->nd_flag |= fxdr_unsigned(int, *tl);
1995 if (nd->nd_flag & ND_LEASE) {
1996 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
1997 nd->nd_duration = fxdr_unsigned(u_int32_t, *tl);
1998 } else
1999 nd->nd_duration = NQ_MINLEASE;
2000 } else
2001 nd->nd_duration = NQ_MINLEASE;
2002 nd->nd_md = md;
2003 nd->nd_dpos = dpos;
2004 return (0);
2005 nfsmout:
2006 return (error);
2007 }
2008
2009 int
2010 nfs_msg(p, server, msg)
2011 struct proc *p;
2012 char *server, *msg;
2013 {
2014 tpr_t tpr;
2015
2016 if (p)
2017 tpr = tprintf_open(p);
2018 else
2019 tpr = NULL;
2020 tprintf(tpr, "nfs server %s: %s\n", server, msg);
2021 tprintf_close(tpr);
2022 return (0);
2023 }
2024
2025 #ifdef NFSSERVER
2026 int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *,
2027 struct nfssvc_sock *, struct proc *,
2028 struct mbuf **)) = {
2029 nfsrv_null,
2030 nfsrv_getattr,
2031 nfsrv_setattr,
2032 nfsrv_lookup,
2033 nfsrv3_access,
2034 nfsrv_readlink,
2035 nfsrv_read,
2036 nfsrv_write,
2037 nfsrv_create,
2038 nfsrv_mkdir,
2039 nfsrv_symlink,
2040 nfsrv_mknod,
2041 nfsrv_remove,
2042 nfsrv_rmdir,
2043 nfsrv_rename,
2044 nfsrv_link,
2045 nfsrv_readdir,
2046 nfsrv_readdirplus,
2047 nfsrv_statfs,
2048 nfsrv_fsinfo,
2049 nfsrv_pathconf,
2050 nfsrv_commit,
2051 nqnfsrv_getlease,
2052 nqnfsrv_vacated,
2053 nfsrv_noop,
2054 nfsrv_noop
2055 };
2056
2057 /*
2058 * Socket upcall routine for the nfsd sockets.
2059 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
2060 * Essentially do as much as possible non-blocking, else punt and it will
2061 * be called with M_WAIT from an nfsd.
2062 */
2063 void
2064 nfsrv_rcv(so, arg, waitflag)
2065 struct socket *so;
2066 caddr_t arg;
2067 int waitflag;
2068 {
2069 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
2070 struct mbuf *m;
2071 struct mbuf *mp, *nam;
2072 struct uio auio;
2073 int flags, error;
2074
2075 if ((slp->ns_flag & SLP_VALID) == 0)
2076 return;
2077 #ifdef notdef
2078 /*
2079 * Define this to test for nfsds handling this under heavy load.
2080 */
2081 if (waitflag == M_DONTWAIT) {
2082 slp->ns_flag |= SLP_NEEDQ; goto dorecs;
2083 }
2084 #endif
2085 auio.uio_procp = NULL;
2086 if (so->so_type == SOCK_STREAM) {
2087 /*
2088 * If there are already records on the queue, defer soreceive()
2089 * to an nfsd so that there is feedback to the TCP layer that
2090 * the nfs servers are heavily loaded.
2091 */
2092 if (slp->ns_rec && waitflag == M_DONTWAIT) {
2093 slp->ns_flag |= SLP_NEEDQ;
2094 goto dorecs;
2095 }
2096
2097 /*
2098 * Do soreceive().
2099 */
2100 auio.uio_resid = 1000000000;
2101 flags = MSG_DONTWAIT;
2102 error = (*so->so_receive)(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
2103 if (error || mp == (struct mbuf *)0) {
2104 if (error == EWOULDBLOCK)
2105 slp->ns_flag |= SLP_NEEDQ;
2106 else
2107 slp->ns_flag |= SLP_DISCONN;
2108 goto dorecs;
2109 }
2110 m = mp;
2111 if (slp->ns_rawend) {
2112 slp->ns_rawend->m_next = m;
2113 slp->ns_cc += 1000000000 - auio.uio_resid;
2114 } else {
2115 slp->ns_raw = m;
2116 slp->ns_cc = 1000000000 - auio.uio_resid;
2117 }
2118 while (m->m_next)
2119 m = m->m_next;
2120 slp->ns_rawend = m;
2121
2122 /*
2123 * Now try and parse record(s) out of the raw stream data.
2124 */
2125 error = nfsrv_getstream(slp, waitflag);
2126 if (error) {
2127 if (error == EPERM)
2128 slp->ns_flag |= SLP_DISCONN;
2129 else
2130 slp->ns_flag |= SLP_NEEDQ;
2131 }
2132 } else {
2133 do {
2134 auio.uio_resid = 1000000000;
2135 flags = MSG_DONTWAIT;
2136 error = (*so->so_receive)(so, &nam, &auio, &mp,
2137 (struct mbuf **)0, &flags);
2138 if (mp) {
2139 if (nam) {
2140 m = nam;
2141 m->m_next = mp;
2142 } else
2143 m = mp;
2144 if (slp->ns_recend)
2145 slp->ns_recend->m_nextpkt = m;
2146 else
2147 slp->ns_rec = m;
2148 slp->ns_recend = m;
2149 m->m_nextpkt = (struct mbuf *)0;
2150 }
2151 if (error) {
2152 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
2153 && error != EWOULDBLOCK) {
2154 slp->ns_flag |= SLP_DISCONN;
2155 goto dorecs;
2156 }
2157 }
2158 } while (mp);
2159 }
2160
2161 /*
2162 * Now try and process the request records, non-blocking.
2163 */
2164 dorecs:
2165 if (waitflag == M_DONTWAIT &&
2166 (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
2167 nfsrv_wakenfsd(slp);
2168 }
2169
2170 /*
2171 * Try and extract an RPC request from the mbuf data list received on a
2172 * stream socket. The "waitflag" argument indicates whether or not it
2173 * can sleep.
2174 */
2175 int
2176 nfsrv_getstream(slp, waitflag)
2177 struct nfssvc_sock *slp;
2178 int waitflag;
2179 {
2180 struct mbuf *m, **mpp;
2181 struct mbuf *recm;
2182 u_int32_t recmark;
2183
2184 if (slp->ns_flag & SLP_GETSTREAM)
2185 panic("nfs getstream");
2186 slp->ns_flag |= SLP_GETSTREAM;
2187 for (;;) {
2188 if (slp->ns_reclen == 0) {
2189 if (slp->ns_cc < NFSX_UNSIGNED) {
2190 slp->ns_flag &= ~SLP_GETSTREAM;
2191 return (0);
2192 }
2193 m = slp->ns_raw;
2194 m_copydata(m, 0, NFSX_UNSIGNED, (caddr_t)&recmark);
2195 m_adj(m, NFSX_UNSIGNED);
2196 slp->ns_cc -= NFSX_UNSIGNED;
2197 recmark = ntohl(recmark);
2198 slp->ns_reclen = recmark & ~0x80000000;
2199 if (recmark & 0x80000000)
2200 slp->ns_flag |= SLP_LASTFRAG;
2201 else
2202 slp->ns_flag &= ~SLP_LASTFRAG;
2203 if (slp->ns_reclen > NFS_MAXPACKET) {
2204 slp->ns_flag &= ~SLP_GETSTREAM;
2205 return (EPERM);
2206 }
2207 }
2208
2209 /*
2210 * Now get the record part.
2211 *
2212 * Note that slp->ns_reclen may be 0. Linux sometimes
2213 * generates 0-length records.
2214 */
2215 if (slp->ns_cc == slp->ns_reclen) {
2216 recm = slp->ns_raw;
2217 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
2218 slp->ns_cc = slp->ns_reclen = 0;
2219 } else if (slp->ns_cc > slp->ns_reclen) {
2220 recm = slp->ns_raw;
2221 m = m_split(recm, slp->ns_reclen, waitflag);
2222 if (m == NULL) {
2223 slp->ns_flag &= ~SLP_GETSTREAM;
2224 return (EWOULDBLOCK);
2225 }
2226 m_claimm(recm, &nfs_mowner);
2227 slp->ns_raw = m;
2228 if (m->m_next == NULL)
2229 slp->ns_rawend = m;
2230 slp->ns_cc -= slp->ns_reclen;
2231 slp->ns_reclen = 0;
2232 } else {
2233 slp->ns_flag &= ~SLP_GETSTREAM;
2234 return (0);
2235 }
2236
2237 /*
2238 * Accumulate the fragments into a record.
2239 */
2240 mpp = &slp->ns_frag;
2241 while (*mpp)
2242 mpp = &((*mpp)->m_next);
2243 *mpp = recm;
2244 if (slp->ns_flag & SLP_LASTFRAG) {
2245 if (slp->ns_recend)
2246 slp->ns_recend->m_nextpkt = slp->ns_frag;
2247 else
2248 slp->ns_rec = slp->ns_frag;
2249 slp->ns_recend = slp->ns_frag;
2250 slp->ns_frag = (struct mbuf *)0;
2251 }
2252 }
2253 }
2254
2255 /*
2256 * Parse an RPC header.
2257 */
2258 int
2259 nfsrv_dorec(slp, nfsd, ndp)
2260 struct nfssvc_sock *slp;
2261 struct nfsd *nfsd;
2262 struct nfsrv_descript **ndp;
2263 {
2264 struct mbuf *m, *nam;
2265 struct nfsrv_descript *nd;
2266 int error;
2267
2268 *ndp = NULL;
2269 if ((slp->ns_flag & SLP_VALID) == 0 ||
2270 (m = slp->ns_rec) == (struct mbuf *)0)
2271 return (ENOBUFS);
2272 slp->ns_rec = m->m_nextpkt;
2273 if (slp->ns_rec)
2274 m->m_nextpkt = (struct mbuf *)0;
2275 else
2276 slp->ns_recend = (struct mbuf *)0;
2277 if (m->m_type == MT_SONAME) {
2278 nam = m;
2279 m = m->m_next;
2280 nam->m_next = NULL;
2281 } else
2282 nam = NULL;
2283 nd = pool_get(&nfs_srvdesc_pool, PR_WAITOK);
2284 nd->nd_md = nd->nd_mrep = m;
2285 nd->nd_nam2 = nam;
2286 nd->nd_dpos = mtod(m, caddr_t);
2287 error = nfs_getreq(nd, nfsd, TRUE);
2288 if (error) {
2289 m_freem(nam);
2290 pool_put(&nfs_srvdesc_pool, nd);
2291 return (error);
2292 }
2293 *ndp = nd;
2294 nfsd->nfsd_nd = nd;
2295 return (0);
2296 }
2297
2298
2299 /*
2300 * Search for a sleeping nfsd and wake it up.
2301 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
2302 * running nfsds will go look for the work in the nfssvc_sock list.
2303 */
2304 void
2305 nfsrv_wakenfsd(slp)
2306 struct nfssvc_sock *slp;
2307 {
2308 struct nfsd *nd;
2309
2310 if ((slp->ns_flag & SLP_VALID) == 0)
2311 return;
2312 simple_lock(&nfsd_slock);
2313 if (slp->ns_flag & SLP_DOREC) {
2314 simple_unlock(&nfsd_slock);
2315 return;
2316 }
2317 nd = SLIST_FIRST(&nfsd_idle_head);
2318 if (nd) {
2319 SLIST_REMOVE_HEAD(&nfsd_idle_head, nfsd_idle);
2320 simple_unlock(&nfsd_slock);
2321
2322 KASSERT(nd->nfsd_flag & NFSD_WAITING);
2323 nd->nfsd_flag &= ~NFSD_WAITING;
2324 if (nd->nfsd_slp)
2325 panic("nfsd wakeup");
2326 slp->ns_sref++;
2327 nd->nfsd_slp = slp;
2328 wakeup(nd);
2329 return;
2330 }
2331 slp->ns_flag |= SLP_DOREC;
2332 nfsd_head_flag |= NFSD_CHECKSLP;
2333 TAILQ_INSERT_TAIL(&nfssvc_sockpending, slp, ns_pending);
2334 simple_unlock(&nfsd_slock);
2335 }
2336 #endif /* NFSSERVER */
Cache object: b956c66009b9c8bf05caa266194a5c1d
|