1 /*
2 * Copyright (c) 1989, 1991, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
37 */
38
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD: releng/5.2/sys/nfsclient/nfs_socket.c 122698 2003-11-14 20:54:10Z alfred $");
41
42 /*
43 * Socket operations for use by nfs
44 */
45
46 #include "opt_inet6.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/lock.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 #include <sys/mount.h>
55 #include <sys/mutex.h>
56 #include <sys/proc.h>
57 #include <sys/protosw.h>
58 #include <sys/signalvar.h>
59 #include <sys/socket.h>
60 #include <sys/socketvar.h>
61 #include <sys/sysctl.h>
62 #include <sys/syslog.h>
63 #include <sys/vnode.h>
64
65 #include <netinet/in.h>
66 #include <netinet/tcp.h>
67
68 #include <rpc/rpcclnt.h>
69
70 #include <nfs/rpcv2.h>
71 #include <nfs/nfsproto.h>
72 #include <nfsclient/nfs.h>
73 #include <nfs/xdr_subs.h>
74 #include <nfsclient/nfsm_subs.h>
75 #include <nfsclient/nfsmount.h>
76 #include <nfsclient/nfsnode.h>
77
78 #include <nfs4client/nfs4.h>
79
80 #define TRUE 1
81 #define FALSE 0
82
83 /*
84 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
85 * Use the mean and mean deviation of rtt for the appropriate type of rpc
86 * for the frequent rpcs and a default for the others.
87 * The justification for doing "other" this way is that these rpcs
88 * happen so infrequently that timer est. would probably be stale.
89 * Also, since many of these rpcs are
90 * non-idempotent, a conservative timeout is desired.
91 * getattr, lookup - A+2D
92 * read, write - A+4D
93 * other - nm_timeo
94 */
95 #define NFS_RTO(n, t) \
96 ((t) == 0 ? (n)->nm_timeo : \
97 ((t) < 3 ? \
98 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
99 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
100 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
101 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
102
103 /*
104 * Defines which timer to use for the procnum.
105 * 0 - default
106 * 1 - getattr
107 * 2 - lookup
108 * 3 - read
109 * 4 - write
110 */
111 static int proct[NFS_NPROCS] = {
112 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
113 };
114
115 static int nfs_realign_test;
116 static int nfs_realign_count;
117 static int nfs_bufpackets = 4;
118
119 SYSCTL_DECL(_vfs_nfs);
120
121 SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, "");
122 SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, "");
123 SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, "");
124
125
126 /*
127 * There is a congestion window for outstanding rpcs maintained per mount
128 * point. The cwnd size is adjusted in roughly the way that:
129 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
130 * SIGCOMM '88". ACM, August 1988.
131 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
132 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
133 * of rpcs is in progress.
134 * (The sent count and cwnd are scaled for integer arith.)
135 * Variants of "slow start" were tried and were found to be too much of a
136 * performance hit (ave. rtt 3 times larger),
137 * I suspect due to the large rtt that nfs rpcs have.
138 */
139 #define NFS_CWNDSCALE 256
140 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
141 #define NFS_NBACKOFF 8
142 static int nfs_backoff[NFS_NBACKOFF] = { 2, 4, 8, 16, 32, 64, 128, 256, };
143 struct callout_handle nfs_timer_handle;
144
145 static int nfs_msg(struct thread *, char *, char *);
146 static int nfs_rcvlock(struct nfsreq *);
147 static void nfs_rcvunlock(struct nfsreq *);
148 static void nfs_realign(struct mbuf **pm, int hsiz);
149 static int nfs_receive(struct nfsreq *rep, struct sockaddr **aname,
150 struct mbuf **mp);
151 static int nfs_reply(struct nfsreq *);
152 static void nfs_softterm(struct nfsreq *rep);
153 static int nfs_reconnect(struct nfsreq *rep);
154
155 /*
156 * Initialize sockets and congestion for a new NFS connection.
157 * We do not free the sockaddr if error.
158 */
159 int
160 nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
161 {
162 struct socket *so;
163 int s, error, rcvreserve, sndreserve;
164 int pktscale;
165 struct sockaddr *saddr;
166 struct thread *td = &thread0; /* only used for socreate and sobind */
167
168 GIANT_REQUIRED; /* XXX until socket locking done */
169
170 nmp->nm_so = NULL;
171 saddr = nmp->nm_nam;
172 error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
173 nmp->nm_soproto, nmp->nm_mountp->mnt_cred, td);
174 if (error)
175 goto bad;
176 so = nmp->nm_so;
177 nmp->nm_soflags = so->so_proto->pr_flags;
178
179 /*
180 * Some servers require that the client port be a reserved port number.
181 */
182 if (nmp->nm_flag & NFSMNT_RESVPORT) {
183 struct sockopt sopt;
184 int ip, ip2, len;
185 struct sockaddr_in6 ssin;
186 struct sockaddr *sa;
187
188 bzero(&sopt, sizeof sopt);
189 switch(saddr->sa_family) {
190 case AF_INET:
191 sopt.sopt_level = IPPROTO_IP;
192 sopt.sopt_name = IP_PORTRANGE;
193 ip = IP_PORTRANGE_LOW;
194 ip2 = IP_PORTRANGE_DEFAULT;
195 len = sizeof (struct sockaddr_in);
196 break;
197 #ifdef INET6
198 case AF_INET6:
199 sopt.sopt_level = IPPROTO_IPV6;
200 sopt.sopt_name = IPV6_PORTRANGE;
201 ip = IPV6_PORTRANGE_LOW;
202 ip2 = IPV6_PORTRANGE_DEFAULT;
203 len = sizeof (struct sockaddr_in6);
204 break;
205 #endif
206 default:
207 goto noresvport;
208 }
209 sa = (struct sockaddr *)&ssin;
210 bzero(sa, len);
211 sa->sa_len = len;
212 sa->sa_family = saddr->sa_family;
213 sopt.sopt_dir = SOPT_SET;
214 sopt.sopt_val = (void *)&ip;
215 sopt.sopt_valsize = sizeof(ip);
216 error = sosetopt(so, &sopt);
217 if (error)
218 goto bad;
219 error = sobind(so, sa, td);
220 if (error)
221 goto bad;
222 ip = ip2;
223 error = sosetopt(so, &sopt);
224 if (error)
225 goto bad;
226 noresvport: ;
227 }
228
229 /*
230 * Protocols that do not require connections may be optionally left
231 * unconnected for servers that reply from a port other than NFS_PORT.
232 */
233 if (nmp->nm_flag & NFSMNT_NOCONN) {
234 if (nmp->nm_soflags & PR_CONNREQUIRED) {
235 error = ENOTCONN;
236 goto bad;
237 }
238 } else {
239 error = soconnect(so, nmp->nm_nam, td);
240 if (error)
241 goto bad;
242
243 /*
244 * Wait for the connection to complete. Cribbed from the
245 * connect system call but with the wait timing out so
246 * that interruptible mounts don't hang here for a long time.
247 */
248 s = splnet();
249 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
250 (void) tsleep(&so->so_timeo,
251 PSOCK, "nfscon", 2 * hz);
252 if ((so->so_state & SS_ISCONNECTING) &&
253 so->so_error == 0 && rep &&
254 (error = nfs_sigintr(nmp, rep, rep->r_td)) != 0) {
255 so->so_state &= ~SS_ISCONNECTING;
256 splx(s);
257 goto bad;
258 }
259 }
260 if (so->so_error) {
261 error = so->so_error;
262 so->so_error = 0;
263 splx(s);
264 goto bad;
265 }
266 splx(s);
267 }
268 so->so_rcv.sb_timeo = 5 * hz;
269 so->so_snd.sb_timeo = 5 * hz;
270
271 /*
272 * Get buffer reservation size from sysctl, but impose reasonable
273 * limits.
274 */
275 pktscale = nfs_bufpackets;
276 if (pktscale < 2)
277 pktscale = 2;
278 if (pktscale > 64)
279 pktscale = 64;
280
281 if (nmp->nm_sotype == SOCK_DGRAM) {
282 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
283 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
284 NFS_MAXPKTHDR) * pktscale;
285 } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
286 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
287 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
288 NFS_MAXPKTHDR) * pktscale;
289 } else {
290 if (nmp->nm_sotype != SOCK_STREAM)
291 panic("nfscon sotype");
292 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
293 struct sockopt sopt;
294 int val;
295
296 bzero(&sopt, sizeof sopt);
297 sopt.sopt_dir = SOPT_SET;
298 sopt.sopt_level = SOL_SOCKET;
299 sopt.sopt_name = SO_KEEPALIVE;
300 sopt.sopt_val = &val;
301 sopt.sopt_valsize = sizeof val;
302 val = 1;
303 sosetopt(so, &sopt);
304 }
305 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
306 struct sockopt sopt;
307 int val;
308
309 bzero(&sopt, sizeof sopt);
310 sopt.sopt_dir = SOPT_SET;
311 sopt.sopt_level = IPPROTO_TCP;
312 sopt.sopt_name = TCP_NODELAY;
313 sopt.sopt_val = &val;
314 sopt.sopt_valsize = sizeof val;
315 val = 1;
316 sosetopt(so, &sopt);
317 }
318 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
319 sizeof (u_int32_t)) * pktscale;
320 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
321 sizeof (u_int32_t)) * pktscale;
322 }
323 error = soreserve(so, sndreserve, rcvreserve);
324 if (error)
325 goto bad;
326 so->so_rcv.sb_flags |= SB_NOINTR;
327 so->so_snd.sb_flags |= SB_NOINTR;
328
329 /* Initialize other non-zero congestion variables */
330 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
331 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
332 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
333 nmp->nm_sdrtt[3] = 0;
334 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
335 nmp->nm_sent = 0;
336 nmp->nm_timeouts = 0;
337 return (0);
338
339 bad:
340 nfs_disconnect(nmp);
341 return (error);
342 }
343
344 /*
345 * Reconnect routine:
346 * Called when a connection is broken on a reliable protocol.
347 * - clean up the old socket
348 * - nfs_connect() again
349 * - set R_MUSTRESEND for all outstanding requests on mount point
350 * If this fails the mount point is DEAD!
351 * nb: Must be called with the nfs_sndlock() set on the mount point.
352 */
353 static int
354 nfs_reconnect(struct nfsreq *rep)
355 {
356 struct nfsreq *rp;
357 struct nfsmount *nmp = rep->r_nmp;
358 int error;
359
360 nfs_disconnect(nmp);
361 while ((error = nfs_connect(nmp, rep)) != 0) {
362 if (error == EINTR || error == ERESTART)
363 return (EINTR);
364 (void) tsleep(&lbolt, PSOCK, "nfscon", 0);
365 }
366
367 /*
368 * Loop through outstanding request list and fix up all requests
369 * on old socket.
370 */
371 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
372 if (rp->r_nmp == nmp)
373 rp->r_flags |= R_MUSTRESEND;
374 }
375 return (0);
376 }
377
378 /*
379 * NFS disconnect. Clean up and unlink.
380 */
381 void
382 nfs_disconnect(struct nfsmount *nmp)
383 {
384 struct socket *so;
385
386 GIANT_REQUIRED; /* XXX until socket locking done */
387
388 if (nmp->nm_so) {
389 so = nmp->nm_so;
390 nmp->nm_so = NULL;
391 soshutdown(so, 2);
392 soclose(so);
393 }
394 }
395
396 void
397 nfs_safedisconnect(struct nfsmount *nmp)
398 {
399 struct nfsreq dummyreq;
400
401 bzero(&dummyreq, sizeof(dummyreq));
402 dummyreq.r_nmp = nmp;
403 nfs_rcvlock(&dummyreq);
404 nfs_disconnect(nmp);
405 nfs_rcvunlock(&dummyreq);
406 }
407
408 /*
409 * This is the nfs send routine. For connection based socket types, it
410 * must be called with an nfs_sndlock() on the socket.
411 * - return EINTR if the RPC is terminated, 0 otherwise
412 * - set R_MUSTRESEND if the send fails for any reason
413 * - do any cleanup required by recoverable socket errors (?)
414 */
415 int
416 nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top,
417 struct nfsreq *rep)
418 {
419 struct sockaddr *sendnam;
420 int error, soflags, flags;
421
422 GIANT_REQUIRED; /* XXX until socket locking done */
423
424 KASSERT(rep, ("nfs_send: called with rep == NULL"));
425
426 if (rep->r_flags & R_SOFTTERM) {
427 m_freem(top);
428 return (EINTR);
429 }
430 if ((so = rep->r_nmp->nm_so) == NULL) {
431 rep->r_flags |= R_MUSTRESEND;
432 m_freem(top);
433 return (0);
434 }
435 rep->r_flags &= ~R_MUSTRESEND;
436 soflags = rep->r_nmp->nm_soflags;
437
438 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
439 sendnam = NULL;
440 else
441 sendnam = nam;
442 if (so->so_type == SOCK_SEQPACKET)
443 flags = MSG_EOR;
444 else
445 flags = 0;
446
447 error = so->so_proto->pr_usrreqs->pru_sosend(so, sendnam, 0, top, 0,
448 flags, curthread /*XXX*/);
449 if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
450 error = 0;
451 rep->r_flags |= R_MUSTRESEND;
452 }
453
454 if (error) {
455 log(LOG_INFO, "nfs send error %d for server %s\n", error,
456 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
457 /*
458 * Deal with errors for the client side.
459 */
460 if (rep->r_flags & R_SOFTTERM)
461 error = EINTR;
462 else
463 rep->r_flags |= R_MUSTRESEND;
464
465 /*
466 * Handle any recoverable (soft) socket errors here. (?)
467 */
468 if (error != EINTR && error != ERESTART &&
469 error != EWOULDBLOCK && error != EPIPE)
470 error = 0;
471 }
472 return (error);
473 }
474
475 /*
476 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
477 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
478 * Mark and consolidate the data into a new mbuf list.
479 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
480 * small mbufs.
481 * For SOCK_STREAM we must be very careful to read an entire record once
482 * we have read any of it, even if the system call has been interrupted.
483 */
484 static int
485 nfs_receive(struct nfsreq *rep, struct sockaddr **aname, struct mbuf **mp)
486 {
487 struct socket *so;
488 struct uio auio;
489 struct iovec aio;
490 struct mbuf *m;
491 struct mbuf *control;
492 u_int32_t len;
493 struct sockaddr **getnam;
494 int error, sotype, rcvflg;
495 struct thread *td = curthread; /* XXX */
496
497 GIANT_REQUIRED; /* XXX until socket locking done */
498
499 /*
500 * Set up arguments for soreceive()
501 */
502 *mp = NULL;
503 *aname = NULL;
504 sotype = rep->r_nmp->nm_sotype;
505
506 /*
507 * For reliable protocols, lock against other senders/receivers
508 * in case a reconnect is necessary.
509 * For SOCK_STREAM, first get the Record Mark to find out how much
510 * more there is to get.
511 * We must lock the socket against other receivers
512 * until we have an entire rpc request/reply.
513 */
514 if (sotype != SOCK_DGRAM) {
515 error = nfs_sndlock(rep);
516 if (error)
517 return (error);
518 tryagain:
519 /*
520 * Check for fatal errors and resending request.
521 */
522 /*
523 * Ugh: If a reconnect attempt just happened, nm_so
524 * would have changed. NULL indicates a failed
525 * attempt that has essentially shut down this
526 * mount point.
527 */
528 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
529 nfs_sndunlock(rep);
530 return (EINTR);
531 }
532 so = rep->r_nmp->nm_so;
533 if (!so) {
534 error = nfs_reconnect(rep);
535 if (error) {
536 nfs_sndunlock(rep);
537 return (error);
538 }
539 goto tryagain;
540 }
541 while (rep->r_flags & R_MUSTRESEND) {
542 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_TRYWAIT);
543 nfsstats.rpcretries++;
544 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
545 if (error) {
546 if (error == EINTR || error == ERESTART ||
547 (error = nfs_reconnect(rep)) != 0) {
548 nfs_sndunlock(rep);
549 return (error);
550 }
551 goto tryagain;
552 }
553 }
554 nfs_sndunlock(rep);
555 if (sotype == SOCK_STREAM) {
556 aio.iov_base = (caddr_t) &len;
557 aio.iov_len = sizeof(u_int32_t);
558 auio.uio_iov = &aio;
559 auio.uio_iovcnt = 1;
560 auio.uio_segflg = UIO_SYSSPACE;
561 auio.uio_rw = UIO_READ;
562 auio.uio_offset = 0;
563 auio.uio_resid = sizeof(u_int32_t);
564 auio.uio_td = td;
565 do {
566 rcvflg = MSG_WAITALL;
567 error = so->so_proto->pr_usrreqs->pru_soreceive
568 (so, NULL, &auio, NULL, NULL, &rcvflg);
569 if (error == EWOULDBLOCK && rep) {
570 if (rep->r_flags & R_SOFTTERM)
571 return (EINTR);
572 }
573 } while (error == EWOULDBLOCK);
574 if (!error && auio.uio_resid > 0) {
575 /*
576 * Don't log a 0 byte receive; it means
577 * that the socket has been closed, and
578 * can happen during normal operation
579 * (forcible unmount or Solaris server).
580 */
581 if (auio.uio_resid != sizeof (u_int32_t))
582 log(LOG_INFO,
583 "short receive (%d/%d) from nfs server %s\n",
584 (int)(sizeof(u_int32_t) - auio.uio_resid),
585 (int)sizeof(u_int32_t),
586 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
587 error = EPIPE;
588 }
589 if (error)
590 goto errout;
591 len = ntohl(len) & ~0x80000000;
592 /*
593 * This is SERIOUS! We are out of sync with the sender
594 * and forcing a disconnect/reconnect is all I can do.
595 */
596 if (len > NFS_MAXPACKET) {
597 log(LOG_ERR, "%s (%d) from nfs server %s\n",
598 "impossible packet length",
599 len,
600 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
601 error = EFBIG;
602 goto errout;
603 }
604 auio.uio_resid = len;
605 do {
606 rcvflg = MSG_WAITALL;
607 error = so->so_proto->pr_usrreqs->pru_soreceive
608 (so, NULL,
609 &auio, mp, NULL, &rcvflg);
610 } while (error == EWOULDBLOCK || error == EINTR ||
611 error == ERESTART);
612 if (!error && auio.uio_resid > 0) {
613 if (len != auio.uio_resid)
614 log(LOG_INFO,
615 "short receive (%d/%d) from nfs server %s\n",
616 len - auio.uio_resid, len,
617 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
618 error = EPIPE;
619 }
620 } else {
621 /*
622 * NB: Since uio_resid is big, MSG_WAITALL is ignored
623 * and soreceive() will return when it has either a
624 * control msg or a data msg.
625 * We have no use for control msg., but must grab them
626 * and then throw them away so we know what is going
627 * on.
628 */
629 auio.uio_resid = len = 100000000; /* Anything Big */
630 auio.uio_td = td;
631 do {
632 rcvflg = 0;
633 error = so->so_proto->pr_usrreqs->pru_soreceive
634 (so, NULL,
635 &auio, mp, &control, &rcvflg);
636 if (control)
637 m_freem(control);
638 if (error == EWOULDBLOCK && rep) {
639 if (rep->r_flags & R_SOFTTERM)
640 return (EINTR);
641 }
642 } while (error == EWOULDBLOCK ||
643 (!error && *mp == NULL && control));
644 if ((rcvflg & MSG_EOR) == 0)
645 printf("Egad!!\n");
646 if (!error && *mp == NULL)
647 error = EPIPE;
648 len -= auio.uio_resid;
649 }
650 errout:
651 if (error && error != EINTR && error != ERESTART) {
652 m_freem(*mp);
653 *mp = NULL;
654 if (error != EPIPE)
655 log(LOG_INFO,
656 "receive error %d from nfs server %s\n",
657 error,
658 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
659 error = nfs_sndlock(rep);
660 if (!error) {
661 error = nfs_reconnect(rep);
662 if (!error)
663 goto tryagain;
664 else
665 nfs_sndunlock(rep);
666 }
667 }
668 } else {
669 if ((so = rep->r_nmp->nm_so) == NULL)
670 return (EACCES);
671 if (so->so_state & SS_ISCONNECTED)
672 getnam = NULL;
673 else
674 getnam = aname;
675 auio.uio_resid = len = 1000000;
676 auio.uio_td = td;
677 do {
678 rcvflg = 0;
679 error = so->so_proto->pr_usrreqs->pru_soreceive
680 (so, getnam, &auio, mp,
681 NULL, &rcvflg);
682 if (error == EWOULDBLOCK &&
683 (rep->r_flags & R_SOFTTERM))
684 return (EINTR);
685 } while (error == EWOULDBLOCK);
686 len -= auio.uio_resid;
687 }
688 if (error) {
689 m_freem(*mp);
690 *mp = NULL;
691 }
692 /*
693 * Search for any mbufs that are not a multiple of 4 bytes long
694 * or with m_data not longword aligned.
695 * These could cause pointer alignment problems, so copy them to
696 * well aligned mbufs.
697 */
698 nfs_realign(mp, 5 * NFSX_UNSIGNED);
699 return (error);
700 }
701
702 /*
703 * Implement receipt of reply on a socket.
704 * We must search through the list of received datagrams matching them
705 * with outstanding requests using the xid, until ours is found.
706 */
707 /* ARGSUSED */
708 static int
709 nfs_reply(struct nfsreq *myrep)
710 {
711 struct nfsreq *rep;
712 struct nfsmount *nmp = myrep->r_nmp;
713 int32_t t1;
714 struct mbuf *mrep, *md;
715 struct sockaddr *nam;
716 u_int32_t rxid, *tl;
717 caddr_t dpos;
718 int error;
719
720 /*
721 * Loop around until we get our own reply
722 */
723 for (;;) {
724 /*
725 * Lock against other receivers so that I don't get stuck in
726 * sbwait() after someone else has received my reply for me.
727 * Also necessary for connection based protocols to avoid
728 * race conditions during a reconnect.
729 * If nfs_rcvlock() returns EALREADY, that means that
730 * the reply has already been recieved by another
731 * process and we can return immediately. In this
732 * case, the lock is not taken to avoid races with
733 * other processes.
734 */
735 error = nfs_rcvlock(myrep);
736 if (error == EALREADY)
737 return (0);
738 if (error)
739 return (error);
740 /*
741 * Get the next Rpc reply off the socket
742 */
743 error = nfs_receive(myrep, &nam, &mrep);
744 nfs_rcvunlock(myrep);
745 if (error) {
746
747 /*
748 * Ignore routing errors on connectionless protocols??
749 */
750 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
751 nmp->nm_so->so_error = 0;
752 if (myrep->r_flags & R_GETONEREP)
753 return (0);
754 continue;
755 }
756 return (error);
757 }
758 if (nam)
759 FREE(nam, M_SONAME);
760
761 /*
762 * Get the xid and check that it is an rpc reply
763 */
764 md = mrep;
765 dpos = mtod(md, caddr_t);
766 tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
767 rxid = *tl++;
768 if (*tl != rpc_reply) {
769 nfsstats.rpcinvalid++;
770 m_freem(mrep);
771 nfsmout:
772 if (myrep->r_flags & R_GETONEREP)
773 return (0);
774 continue;
775 }
776
777 /*
778 * Loop through the request list to match up the reply
779 * Iff no match, just drop the datagram
780 */
781 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
782 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
783 /* Found it.. */
784 rep->r_mrep = mrep;
785 rep->r_md = md;
786 rep->r_dpos = dpos;
787 /*
788 * Update congestion window.
789 * Do the additive increase of
790 * one rpc/rtt.
791 */
792 if (nmp->nm_cwnd <= nmp->nm_sent) {
793 nmp->nm_cwnd +=
794 (NFS_CWNDSCALE * NFS_CWNDSCALE +
795 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
796 if (nmp->nm_cwnd > NFS_MAXCWND)
797 nmp->nm_cwnd = NFS_MAXCWND;
798 }
799 if (rep->r_flags & R_SENT) {
800 rep->r_flags &= ~R_SENT;
801 nmp->nm_sent -= NFS_CWNDSCALE;
802 }
803 /*
804 * Update rtt using a gain of 0.125 on the mean
805 * and a gain of 0.25 on the deviation.
806 */
807 if (rep->r_flags & R_TIMING) {
808 /*
809 * Since the timer resolution of
810 * NFS_HZ is so course, it can often
811 * result in r_rtt == 0. Since
812 * r_rtt == N means that the actual
813 * rtt is between N+dt and N+2-dt ticks,
814 * add 1.
815 */
816 t1 = rep->r_rtt + 1;
817 t1 -= (NFS_SRTT(rep) >> 3);
818 NFS_SRTT(rep) += t1;
819 if (t1 < 0)
820 t1 = -t1;
821 t1 -= (NFS_SDRTT(rep) >> 2);
822 NFS_SDRTT(rep) += t1;
823 }
824 nmp->nm_timeouts = 0;
825 break;
826 }
827 }
828 /*
829 * If not matched to a request, drop it.
830 * If it's mine, get out.
831 */
832 if (rep == 0) {
833 nfsstats.rpcunexpected++;
834 m_freem(mrep);
835 } else if (rep == myrep) {
836 if (rep->r_mrep == NULL)
837 panic("nfsreply nil");
838 return (0);
839 }
840 if (myrep->r_flags & R_GETONEREP)
841 return (0);
842 }
843 }
844
845 /*
846 * nfs_request - goes something like this
847 * - fill in request struct
848 * - links it into list
849 * - calls nfs_send() for first transmit
850 * - calls nfs_receive() to get reply
851 * - break down rpc header and return with nfs reply pointed to
852 * by mrep or error
853 * nb: always frees up mreq mbuf list
854 */
855 /* XXX overloaded before */
856 #define NQ_TRYLATERDEL 15 /* Initial try later delay (sec) */
857
858 int
859 nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
860 struct thread *td, struct ucred *cred, struct mbuf **mrp,
861 struct mbuf **mdp, caddr_t *dposp)
862 {
863 struct mbuf *mrep, *m2;
864 struct nfsreq *rep;
865 u_int32_t *tl;
866 int i;
867 struct nfsmount *nmp;
868 struct mbuf *m, *md, *mheadend;
869 time_t waituntil;
870 caddr_t dpos;
871 int s, error = 0, mrest_len, auth_len, auth_type;
872 int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0;
873 u_int32_t xid;
874
875 /* Reject requests while attempting a forced unmount. */
876 if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) {
877 m_freem(mrest);
878 return (ESTALE);
879 }
880 nmp = VFSTONFS(vp->v_mount);
881 if ((nmp->nm_flag & NFSMNT_NFSV4) != 0)
882 return nfs4_request(vp, mrest, procnum, td, cred, mrp, mdp, dposp);
883 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
884 rep->r_nmp = nmp;
885 rep->r_vp = vp;
886 rep->r_td = td;
887 rep->r_procnum = procnum;
888 mrest_len = m_length(mrest, NULL);
889
890 /*
891 * Get the RPC header with authorization.
892 */
893 auth_type = RPCAUTH_UNIX;
894 if (cred->cr_ngroups < 1)
895 panic("nfsreq nogrps");
896 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
897 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
898 5 * NFSX_UNSIGNED;
899 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
900 mrest, mrest_len, &mheadend, &xid);
901
902 /*
903 * For stream protocols, insert a Sun RPC Record Mark.
904 */
905 if (nmp->nm_sotype == SOCK_STREAM) {
906 M_PREPEND(m, NFSX_UNSIGNED, M_TRYWAIT);
907 *mtod(m, u_int32_t *) = htonl(0x80000000 |
908 (m->m_pkthdr.len - NFSX_UNSIGNED));
909 }
910 rep->r_mreq = m;
911 rep->r_xid = xid;
912 tryagain:
913 if (nmp->nm_flag & NFSMNT_SOFT)
914 rep->r_retry = nmp->nm_retry;
915 else
916 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
917 rep->r_rtt = rep->r_rexmit = 0;
918 if (proct[procnum] > 0)
919 rep->r_flags = R_TIMING;
920 else
921 rep->r_flags = 0;
922 rep->r_mrep = NULL;
923
924 /*
925 * Do the client side RPC.
926 */
927 nfsstats.rpcrequests++;
928 /*
929 * Chain request into list of outstanding requests. Be sure
930 * to put it LAST so timer finds oldest requests first.
931 */
932 s = splsoftclock();
933 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
934
935 /*
936 * If backing off another request or avoiding congestion, don't
937 * send this one now but let timer do it. If not timing a request,
938 * do it now.
939 */
940 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
941 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
942 nmp->nm_sent < nmp->nm_cwnd)) {
943 splx(s);
944 if (nmp->nm_soflags & PR_CONNREQUIRED)
945 error = nfs_sndlock(rep);
946 if (!error) {
947 m2 = m_copym(m, 0, M_COPYALL, M_TRYWAIT);
948 error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
949 if (nmp->nm_soflags & PR_CONNREQUIRED)
950 nfs_sndunlock(rep);
951 }
952 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
953 nmp->nm_sent += NFS_CWNDSCALE;
954 rep->r_flags |= R_SENT;
955 }
956 } else {
957 splx(s);
958 rep->r_rtt = -1;
959 }
960
961 /*
962 * Wait for the reply from our send or the timer's.
963 */
964 if (!error || error == EPIPE)
965 error = nfs_reply(rep);
966
967 /*
968 * RPC done, unlink the request.
969 */
970 s = splsoftclock();
971 TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
972 splx(s);
973
974 /*
975 * Decrement the outstanding request count.
976 */
977 if (rep->r_flags & R_SENT) {
978 rep->r_flags &= ~R_SENT; /* paranoia */
979 nmp->nm_sent -= NFS_CWNDSCALE;
980 }
981
982 /*
983 * If there was a successful reply and a tprintf msg.
984 * tprintf a response.
985 */
986 if (!error && (rep->r_flags & R_TPRINTFMSG))
987 nfs_msg(rep->r_td, nmp->nm_mountp->mnt_stat.f_mntfromname,
988 "is alive again");
989 mrep = rep->r_mrep;
990 md = rep->r_md;
991 dpos = rep->r_dpos;
992 if (error) {
993 m_freem(rep->r_mreq);
994 free((caddr_t)rep, M_NFSREQ);
995 return (error);
996 }
997
998 /*
999 * break down the rpc header and check if ok
1000 */
1001 tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED);
1002 if (*tl++ == rpc_msgdenied) {
1003 if (*tl == rpc_mismatch)
1004 error = EOPNOTSUPP;
1005 else
1006 error = EACCES;
1007 m_freem(mrep);
1008 m_freem(rep->r_mreq);
1009 free((caddr_t)rep, M_NFSREQ);
1010 return (error);
1011 }
1012
1013 /*
1014 * Just throw away any verifyer (ie: kerberos etc).
1015 */
1016 i = fxdr_unsigned(int, *tl++); /* verf type */
1017 i = fxdr_unsigned(int32_t, *tl); /* len */
1018 if (i > 0)
1019 nfsm_adv(nfsm_rndup(i));
1020 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
1021 /* 0 == ok */
1022 if (*tl == 0) {
1023 tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
1024 if (*tl != 0) {
1025 error = fxdr_unsigned(int, *tl);
1026 if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1027 error == NFSERR_TRYLATER) {
1028 m_freem(mrep);
1029 error = 0;
1030 waituntil = time_second + trylater_delay;
1031 while (time_second < waituntil)
1032 (void) tsleep(&lbolt,
1033 PSOCK, "nqnfstry", 0);
1034 trylater_delay *= nfs_backoff[trylater_cnt];
1035 if (trylater_cnt < NFS_NBACKOFF - 1)
1036 trylater_cnt++;
1037 goto tryagain;
1038 }
1039
1040 /*
1041 * If the File Handle was stale, invalidate the
1042 * lookup cache, just in case.
1043 */
1044 if (error == ESTALE)
1045 cache_purge(vp);
1046 if (nmp->nm_flag & NFSMNT_NFSV3) {
1047 *mrp = mrep;
1048 *mdp = md;
1049 *dposp = dpos;
1050 error |= NFSERR_RETERR;
1051 } else
1052 m_freem(mrep);
1053 m_freem(rep->r_mreq);
1054 free((caddr_t)rep, M_NFSREQ);
1055 return (error);
1056 }
1057
1058 *mrp = mrep;
1059 *mdp = md;
1060 *dposp = dpos;
1061 m_freem(rep->r_mreq);
1062 FREE((caddr_t)rep, M_NFSREQ);
1063 return (0);
1064 }
1065 m_freem(mrep);
1066 error = EPROTONOSUPPORT;
1067 nfsmout:
1068 m_freem(rep->r_mreq);
1069 free((caddr_t)rep, M_NFSREQ);
1070 return (error);
1071 }
1072
1073 /*
1074 * Nfs timer routine
1075 * Scan the nfsreq list and retranmit any requests that have timed out
1076 * To avoid retransmission attempts on STREAM sockets (in the future) make
1077 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1078 */
1079 void
1080 nfs_timer(void *arg)
1081 {
1082 struct nfsreq *rep;
1083 struct mbuf *m;
1084 struct socket *so;
1085 struct nfsmount *nmp;
1086 int timeo;
1087 int s, error;
1088 struct thread *td;
1089
1090 td = &thread0; /* XXX for credentials, may break if sleep */
1091 s = splnet();
1092 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
1093 nmp = rep->r_nmp;
1094 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
1095 continue;
1096 if (nfs_sigintr(nmp, rep, rep->r_td)) {
1097 nfs_softterm(rep);
1098 continue;
1099 }
1100 if (rep->r_rtt >= 0) {
1101 rep->r_rtt++;
1102 if (nmp->nm_flag & NFSMNT_DUMBTIMR)
1103 timeo = nmp->nm_timeo;
1104 else
1105 timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
1106 if (nmp->nm_timeouts > 0)
1107 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
1108 if (rep->r_rtt <= timeo)
1109 continue;
1110 if (nmp->nm_timeouts < NFS_NBACKOFF)
1111 nmp->nm_timeouts++;
1112 }
1113 /*
1114 * Check for server not responding
1115 */
1116 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
1117 rep->r_rexmit > nmp->nm_deadthresh) {
1118 nfs_msg(rep->r_td,
1119 nmp->nm_mountp->mnt_stat.f_mntfromname,
1120 "not responding");
1121 rep->r_flags |= R_TPRINTFMSG;
1122 }
1123 if (rep->r_rexmit >= rep->r_retry) { /* too many */
1124 nfsstats.rpctimeouts++;
1125 nfs_softterm(rep);
1126 continue;
1127 }
1128 if (nmp->nm_sotype != SOCK_DGRAM) {
1129 if (++rep->r_rexmit > NFS_MAXREXMIT)
1130 rep->r_rexmit = NFS_MAXREXMIT;
1131 continue;
1132 }
1133 if ((so = nmp->nm_so) == NULL)
1134 continue;
1135
1136 /*
1137 * If there is enough space and the window allows..
1138 * Resend it
1139 * Set r_rtt to -1 in case we fail to send it now.
1140 */
1141 rep->r_rtt = -1;
1142 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1143 ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1144 (rep->r_flags & R_SENT) ||
1145 nmp->nm_sent < nmp->nm_cwnd) &&
1146 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
1147 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
1148 error = (*so->so_proto->pr_usrreqs->pru_send)
1149 (so, 0, m, NULL, NULL, td);
1150 else
1151 error = (*so->so_proto->pr_usrreqs->pru_send)
1152 (so, 0, m, nmp->nm_nam, NULL, td);
1153 if (error) {
1154 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
1155 so->so_error = 0;
1156 } else {
1157 /*
1158 * Iff first send, start timing
1159 * else turn timing off, backoff timer
1160 * and divide congestion window by 2.
1161 */
1162 if (rep->r_flags & R_SENT) {
1163 rep->r_flags &= ~R_TIMING;
1164 if (++rep->r_rexmit > NFS_MAXREXMIT)
1165 rep->r_rexmit = NFS_MAXREXMIT;
1166 nmp->nm_cwnd >>= 1;
1167 if (nmp->nm_cwnd < NFS_CWNDSCALE)
1168 nmp->nm_cwnd = NFS_CWNDSCALE;
1169 nfsstats.rpcretries++;
1170 } else {
1171 rep->r_flags |= R_SENT;
1172 nmp->nm_sent += NFS_CWNDSCALE;
1173 }
1174 rep->r_rtt = 0;
1175 }
1176 }
1177 }
1178 splx(s);
1179 nfs_timer_handle = timeout(nfs_timer, NULL, nfs_ticks);
1180 }
1181
1182 /*
1183 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
1184 * wait for all requests to complete. This is used by forced unmounts
1185 * to terminate any outstanding RPCs.
1186 */
1187 int
1188 nfs_nmcancelreqs(nmp)
1189 struct nfsmount *nmp;
1190 {
1191 struct nfsreq *req;
1192 int i, s;
1193
1194 s = splnet();
1195 TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
1196 if (nmp != req->r_nmp || req->r_mrep != NULL ||
1197 (req->r_flags & R_SOFTTERM))
1198 continue;
1199 nfs_softterm(req);
1200 }
1201 splx(s);
1202
1203 for (i = 0; i < 30; i++) {
1204 s = splnet();
1205 TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
1206 if (nmp == req->r_nmp)
1207 break;
1208 }
1209 splx(s);
1210 if (req == NULL)
1211 return (0);
1212 tsleep(&lbolt, PSOCK, "nfscancel", 0);
1213 }
1214 return (EBUSY);
1215 }
1216
1217 /*
1218 * Flag a request as being about to terminate (due to NFSMNT_INT/NFSMNT_SOFT).
1219 * The nm_send count is decremented now to avoid deadlocks when the process in
1220 * soreceive() hasn't yet managed to send its own request.
1221 */
1222
1223 static void
1224 nfs_softterm(struct nfsreq *rep)
1225 {
1226
1227 rep->r_flags |= R_SOFTTERM;
1228 if (rep->r_flags & R_SENT) {
1229 rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
1230 rep->r_flags &= ~R_SENT;
1231 }
1232 }
1233
1234 /*
1235 * Test for a termination condition pending on the process.
1236 * This is used for NFSMNT_INT mounts.
1237 */
1238 int
1239 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td)
1240 {
1241 struct proc *p;
1242 sigset_t tmpset;
1243
1244 if ((nmp->nm_flag & NFSMNT_NFSV4) != 0)
1245 return nfs4_sigintr(nmp, rep, td);
1246 if (rep && (rep->r_flags & R_SOFTTERM))
1247 return (EINTR);
1248 /* Terminate all requests while attempting a forced unmount. */
1249 if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)
1250 return (EINTR);
1251 if (!(nmp->nm_flag & NFSMNT_INT))
1252 return (0);
1253 if (td == NULL)
1254 return (0);
1255
1256 p = td->td_proc;
1257 PROC_LOCK(p);
1258 tmpset = p->p_siglist;
1259 SIGSETNAND(tmpset, td->td_sigmask);
1260 mtx_lock(&p->p_sigacts->ps_mtx);
1261 SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
1262 mtx_unlock(&p->p_sigacts->ps_mtx);
1263 if (SIGNOTEMPTY(p->p_siglist) && NFSINT_SIGMASK(tmpset)) {
1264 PROC_UNLOCK(p);
1265 return (EINTR);
1266 }
1267 PROC_UNLOCK(p);
1268
1269 return (0);
1270 }
1271
1272 /*
1273 * Lock a socket against others.
1274 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1275 * and also to avoid race conditions between the processes with nfs requests
1276 * in progress when a reconnect is necessary.
1277 */
1278 int
1279 nfs_sndlock(struct nfsreq *rep)
1280 {
1281 int *statep = &rep->r_nmp->nm_state;
1282 struct thread *td;
1283 int slpflag = 0, slptimeo = 0;
1284
1285 td = rep->r_td;
1286 if (rep->r_nmp->nm_flag & NFSMNT_INT)
1287 slpflag = PCATCH;
1288 while (*statep & NFSSTA_SNDLOCK) {
1289 if (nfs_sigintr(rep->r_nmp, rep, td))
1290 return (EINTR);
1291 *statep |= NFSSTA_WANTSND;
1292 (void) tsleep(statep, slpflag | (PZERO - 1),
1293 "nfsndlck", slptimeo);
1294 if (slpflag == PCATCH) {
1295 slpflag = 0;
1296 slptimeo = 2 * hz;
1297 }
1298 }
1299 *statep |= NFSSTA_SNDLOCK;
1300 return (0);
1301 }
1302
1303 /*
1304 * Unlock the stream socket for others.
1305 */
1306 void
1307 nfs_sndunlock(struct nfsreq *rep)
1308 {
1309 int *statep = &rep->r_nmp->nm_state;
1310
1311 if ((*statep & NFSSTA_SNDLOCK) == 0)
1312 panic("nfs sndunlock");
1313 *statep &= ~NFSSTA_SNDLOCK;
1314 if (*statep & NFSSTA_WANTSND) {
1315 *statep &= ~NFSSTA_WANTSND;
1316 wakeup(statep);
1317 }
1318 }
1319
1320 static int
1321 nfs_rcvlock(struct nfsreq *rep)
1322 {
1323 int *statep = &rep->r_nmp->nm_state;
1324 int slpflag, slptimeo = 0;
1325
1326 if (rep->r_nmp->nm_flag & NFSMNT_INT)
1327 slpflag = PCATCH;
1328 else
1329 slpflag = 0;
1330 while (*statep & NFSSTA_RCVLOCK) {
1331 if (nfs_sigintr(rep->r_nmp, rep, rep->r_td))
1332 return (EINTR);
1333 *statep |= NFSSTA_WANTRCV;
1334 (void) tsleep(statep, slpflag | (PZERO - 1), "nfsrcvlk",
1335 slptimeo);
1336 /*
1337 * If our reply was recieved while we were sleeping,
1338 * then just return without taking the lock to avoid a
1339 * situation where a single iod could 'capture' the
1340 * recieve lock.
1341 */
1342 if (rep->r_mrep != NULL)
1343 return (EALREADY);
1344 if (slpflag == PCATCH) {
1345 slpflag = 0;
1346 slptimeo = 2 * hz;
1347 }
1348 }
1349 /* Always fail if our request has been cancelled. */
1350 if (rep != NULL && (rep->r_flags & R_SOFTTERM))
1351 return (EINTR);
1352 *statep |= NFSSTA_RCVLOCK;
1353 return (0);
1354 }
1355
1356 /*
1357 * Unlock the stream socket for others.
1358 */
1359 static void
1360 nfs_rcvunlock(struct nfsreq *rep)
1361 {
1362 int *statep = &rep->r_nmp->nm_state;
1363
1364 if ((*statep & NFSSTA_RCVLOCK) == 0)
1365 panic("nfs rcvunlock");
1366 *statep &= ~NFSSTA_RCVLOCK;
1367 if (*statep & NFSSTA_WANTRCV) {
1368 *statep &= ~NFSSTA_WANTRCV;
1369 wakeup(statep);
1370 }
1371 }
1372
1373 /*
1374 * nfs_realign:
1375 *
1376 * Check for badly aligned mbuf data and realign by copying the unaligned
1377 * portion of the data into a new mbuf chain and freeing the portions
1378 * of the old chain that were replaced.
1379 *
1380 * We cannot simply realign the data within the existing mbuf chain
1381 * because the underlying buffers may contain other rpc commands and
1382 * we cannot afford to overwrite them.
1383 *
1384 * We would prefer to avoid this situation entirely. The situation does
1385 * not occur with NFS/UDP and is supposed to only occassionally occur
1386 * with TCP. Use vfs.nfs.realign_count and realign_test to check this.
1387 */
1388 static void
1389 nfs_realign(struct mbuf **pm, int hsiz)
1390 {
1391 struct mbuf *m;
1392 struct mbuf *n = NULL;
1393 int off = 0;
1394
1395 ++nfs_realign_test;
1396 while ((m = *pm) != NULL) {
1397 if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
1398 MGET(n, M_TRYWAIT, MT_DATA);
1399 if (m->m_len >= MINCLSIZE) {
1400 MCLGET(n, M_TRYWAIT);
1401 }
1402 n->m_len = 0;
1403 break;
1404 }
1405 pm = &m->m_next;
1406 }
1407 /*
1408 * If n is non-NULL, loop on m copying data, then replace the
1409 * portion of the chain that had to be realigned.
1410 */
1411 if (n != NULL) {
1412 ++nfs_realign_count;
1413 while (m) {
1414 m_copyback(n, off, m->m_len, mtod(m, caddr_t));
1415 off += m->m_len;
1416 m = m->m_next;
1417 }
1418 m_freem(*pm);
1419 *pm = n;
1420 }
1421 }
1422
1423
1424 static int
1425 nfs_msg(struct thread *td, char *server, char *msg)
1426 {
1427
1428 tprintf(td ? td->td_proc : NULL, LOG_INFO,
1429 "nfs server %s: %s\n", server, msg);
1430 return (0);
1431 }
Cache object: 2b02fedabb5721b27f363f026f809ab8
|