FreeBSD/Linux Kernel Cross Reference
sys/rpc/rpcclnt.c
1 /* $FreeBSD: src/sys/rpc/rpcclnt.c,v 1.24 2008/10/23 15:53:51 des Exp $ */
2 /* $Id: rpcclnt.c,v 1.9 2003/11/05 14:59:03 rees Exp $ */
3
4 /*-
5 * copyright (c) 2003
6 * the regents of the university of michigan
7 * all rights reserved
8 *
9 * permission is granted to use, copy, create derivative works and redistribute
10 * this software and such derivative works for any purpose, so long as the name
11 * of the university of michigan is not used in any advertising or publicity
12 * pertaining to the use or distribution of this software without specific,
13 * written prior authorization. if the above copyright notice or any other
14 * identification of the university of michigan is included in any copy of any
15 * portion of this software, then the disclaimer below must also be included.
16 *
17 * this software is provided as is, without representation from the university
18 * of michigan as to its fitness for any purpose, and without warranty by the
19 * university of michigan of any kind, either express or implied, including
20 * without limitation the implied warranties of merchantability and fitness for
21 * a particular purpose. the regents of the university of michigan shall not be
22 * liable for any damages, including special, indirect, incidental, or
23 * consequential damages, with respect to any claim arising out of or in
24 * connection with the use of the software, even if it has been or is hereafter
25 * advised of the possibility of such damages.
26 */
27
28 /*-
29 * Copyright (c) 1989, 1991, 1993, 1995 The Regents of the University of
30 * California. All rights reserved.
31 *
32 * This code is derived from software contributed to Berkeley by Rick Macklem at
33 * The University of Guelph.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions are
37 * met: 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer. 2.
39 * Redistributions in binary form must reproduce the above copyright notice,
40 * this list of conditions and the following disclaimer in the documentation
41 * and/or other materials provided with the distribution. 3. All advertising
42 * materials mentioning features or use of this software must display the
43 * following acknowledgement: This product includes software developed by the
44 * University of California, Berkeley and its contributors. 4. Neither the
45 * name of the University nor the names of its contributors may be used to
46 * endorse or promote products derived from this software without specific
47 * prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
50 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
51 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
52 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
53 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
55 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
56 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
62 */
63
64 /* XXX: kill ugly debug strings */
65 /* XXX: get rid of proct, as it is not even being used... (or keep it so v{2,3}
66 * can run, but clean it up! */
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/proc.h>
71 #include <sys/mount.h>
72 #include <sys/kernel.h>
73 #include <sys/mbuf.h>
74 #include <sys/syslog.h>
75 #include <sys/malloc.h>
76 #include <sys/uio.h>
77 #include <sys/lock.h>
78 #include <sys/signalvar.h>
79 #include <sys/sysent.h>
80 #include <sys/syscall.h>
81 #include <sys/sysctl.h>
82
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/mutex.h>
88
89 #include <netinet/in.h>
90 #include <netinet/tcp.h>
91
92 #include <nfs/rpcv2.h>
93
94 #include <rpc/rpcm_subs.h>
95 #include <rpc/rpcclnt.h>
96
97 /* memory management */
98 #ifdef __OpenBSD__
99 struct pool rpctask_pool;
100 struct pool rpcclnt_pool;
101 #define RPCTASKPOOL_LWM 10
102 #define RPCTASKPOOL_HWM 40
103 #else
104 static MALLOC_DEFINE(M_RPCCLNT, "rpcclnt", "rpc state");
105 #endif
106
107 #define RPC_RETURN(X) do { RPCDEBUG("returning %d", X); return X; }while(0)
108
109 /*
110 * Estimate rto for an nfs rpc sent via. an unreliable datagram. Use the mean
111 * and mean deviation of rtt for the appropriate type of rpc for the frequent
112 * rpcs and a default for the others. The justification for doing "other"
113 * this way is that these rpcs happen so infrequently that timer est. would
114 * probably be stale. Also, since many of these rpcs are non-idempotent, a
115 * conservative timeout is desired. getattr, lookup - A+2D read, write -
116 * A+4D other - nm_timeo
117 */
118 #define RPC_RTO(n, t) \
119 ((t) == 0 ? (n)->rc_timeo : \
120 ((t) < 3 ? \
121 (((((n)->rc_srtt[t-1] + 3) >> 2) + (n)->rc_sdrtt[t-1] + 1) >> 1) : \
122 ((((n)->rc_srtt[t-1] + 7) >> 3) + (n)->rc_sdrtt[t-1] + 1)))
123
124 #define RPC_SRTT(s,r) (r)->r_rpcclnt->rc_srtt[rpcclnt_proct((s),\
125 (r)->r_procnum) - 1]
126
127 #define RPC_SDRTT(s,r) (r)->r_rpcclnt->rc_sdrtt[rpcclnt_proct((s),\
128 (r)->r_procnum) - 1]
129
130
131 /*
132 * There is a congestion window for outstanding rpcs maintained per mount
133 * point. The cwnd size is adjusted in roughly the way that: Van Jacobson,
134 * Congestion avoidance and Control, In "Proceedings of SIGCOMM '88". ACM,
135 * August 1988. describes for TCP. The cwnd size is chopped in half on a
136 * retransmit timeout and incremented by 1/cwnd when each rpc reply is
137 * received and a full cwnd of rpcs is in progress. (The sent count and cwnd
138 * are scaled for integer arith.) Variants of "slow start" were tried and
139 * were found to be too much of a performance hit (ave. rtt 3 times larger),
140 * I suspect due to the large rtt that nfs rpcs have.
141 */
142 #define RPC_CWNDSCALE 256
143 #define RPC_MAXCWND (RPC_CWNDSCALE * 32)
144 static const int rpcclnt_backoff[8] = {2, 4, 8, 16, 32, 64, 128, 256,};
145
146 /* XXX ugly debug strings */
147 #define RPC_ERRSTR_ACCEPTED_SIZE 6
148 char *rpc_errstr_accepted[RPC_ERRSTR_ACCEPTED_SIZE] = {
149 "", /* no good message... */
150 "remote server hasn't exported program.",
151 "remote server can't support version number.",
152 "program can't support procedure.",
153 "procedure can't decode params.",
154 "remote error. remote side memory allocation failure?"
155 };
156
157 char *rpc_errstr_denied[2] = {
158 "remote server doesnt support rpc version 2!",
159 "remote server authentication error."
160 };
161
162 #define RPC_ERRSTR_AUTH_SIZE 6
163 char *rpc_errstr_auth[RPC_ERRSTR_AUTH_SIZE] = {
164 "",
165 "auth error: bad credential (seal broken).",
166 "auth error: client must begin new session.",
167 "auth error: bad verifier (seal broken).",
168 "auth error: verifier expired or replayed.",
169 "auth error: rejected for security reasons.",
170 };
171
172 /*
173 * Static data, mostly RPC constants in XDR form
174 */
175 static u_int32_t rpc_reply, rpc_call, rpc_vers;
176
177 /*
178 * rpc_msgdenied, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
179 * rpc_autherr, rpc_auth_kerb;
180 */
181
182 static u_int32_t rpcclnt_xid = 0;
183 static u_int32_t rpcclnt_xid_touched = 0;
184 struct rpcstats rpcstats;
185 int rpcclnt_ticks;
186 static int fake_wchan;
187
188 SYSCTL_NODE(_kern, OID_AUTO, rpc, CTLFLAG_RD, 0, "RPC Subsystem");
189
190 SYSCTL_UINT(_kern_rpc, OID_AUTO, retries, CTLFLAG_RD, &rpcstats.rpcretries, 0, "retries");
191 SYSCTL_UINT(_kern_rpc, OID_AUTO, request, CTLFLAG_RD, &rpcstats.rpcrequests, 0, "request");
192 SYSCTL_UINT(_kern_rpc, OID_AUTO, timeouts, CTLFLAG_RD, &rpcstats.rpctimeouts, 0, "timeouts");
193 SYSCTL_UINT(_kern_rpc, OID_AUTO, unexpected, CTLFLAG_RD, &rpcstats.rpcunexpected, 0, "unexpected");
194 SYSCTL_UINT(_kern_rpc, OID_AUTO, invalid, CTLFLAG_RD, &rpcstats.rpcinvalid, 0, "invalid");
195
196
197 #ifdef RPCCLNT_DEBUG
198 int rpcdebugon = 0;
199 SYSCTL_UINT(_kern_rpc, OID_AUTO, debug_on, CTLFLAG_RW, &rpcdebugon, 0, "RPC Debug messages");
200 #endif
201
202 /*
203 * Queue head for rpctask's
204 */
205 static
206 TAILQ_HEAD(, rpctask) rpctask_q;
207 struct callout rpcclnt_callout;
208
209 #ifdef __OpenBSD__
210 static int rpcclnt_send(struct socket *, struct mbuf *, struct mbuf *, struct rpctask *);
211 static int rpcclnt_receive(struct rpctask *, struct mbuf **, struct mbuf **, RPC_EXEC_CTX);
212 #else
213 static int rpcclnt_send(struct socket *, struct sockaddr *, struct mbuf *, struct rpctask *);
214 static int rpcclnt_receive(struct rpctask *, struct sockaddr **, struct mbuf **, RPC_EXEC_CTX);
215 #endif
216
217 static int rpcclnt_msg(RPC_EXEC_CTX, const char *, char *);
218
219 static int rpcclnt_reply(struct rpctask *, RPC_EXEC_CTX);
220 static void rpcclnt_timer(void *);
221 static int rpcclnt_sndlock(int *, struct rpctask *);
222 static void rpcclnt_sndunlock(int *);
223 static int rpcclnt_rcvlock(struct rpctask *);
224 static void rpcclnt_rcvunlock(int *);
225 #if 0
226 void rpcclnt_realign(struct mbuf *, int);
227 #else
228 static void rpcclnt_realign(struct mbuf **, int);
229 #endif
230
231 static struct mbuf *rpcclnt_buildheader(struct rpcclnt *, int, struct mbuf *, u_int32_t, int *, struct mbuf **, struct ucred *);
232 static int rpcm_disct(struct mbuf **, caddr_t *, int, int, caddr_t *);
233 static u_int32_t rpcclnt_proct(struct rpcclnt *, u_int32_t);
234 static int rpc_adv(struct mbuf **, caddr_t *, int, int);
235 static void rpcclnt_softterm(struct rpctask * task);
236
237 static int rpcauth_buildheader(struct rpc_auth * auth, struct ucred *, struct mbuf **, caddr_t *);
238
239 void
240 rpcclnt_init(void)
241 {
242 #ifdef __OpenBSD__
243 static struct timeout rpcclnt_timer_to;
244 #endif
245
246 rpcclnt_ticks = (hz * RPC_TICKINTVL + 500) / 1000;
247 if (rpcclnt_ticks < 1)
248 rpcclnt_ticks = 1;
249 rpcstats.rpcretries = 0;
250 rpcstats.rpcrequests = 0;
251 rpcstats.rpctimeouts = 0;
252 rpcstats.rpcunexpected = 0;
253 rpcstats.rpcinvalid = 0;
254
255 /*
256 * rpc constants how about actually using more than one of these!
257 */
258
259 rpc_reply = txdr_unsigned(RPC_REPLY);
260 rpc_vers = txdr_unsigned(RPC_VER2);
261 rpc_call = txdr_unsigned(RPC_CALL);
262 #if 0
263 rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
264 rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
265 rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
266 rpc_autherr = txdr_unsigned(RPC_AUTHERR);
267 rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
268 rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
269 #endif
270
271 /* initialize rpctask queue */
272 TAILQ_INIT(&rpctask_q);
273
274 #ifdef __OpenBSD__
275 /* initialize pools */
276 pool_init(&rpctask_pool, sizeof(struct rpctask), 0, 0, RPCTASKPOOL_LWM,
277 "rpctask_p", NULL);
278 pool_setlowat(&rpctask_pool, RPCTASKPOOL_LWM);
279 pool_sethiwat(&rpctask_pool, RPCTASKPOOL_HWM);
280
281 pool_init(&rpcclnt_pool, sizeof(struct rpcclnt), 0, 0, 1, "rpcclnt_p", NULL);
282
283 /* initialize timers */
284 timeout_set(&rpcclnt_timer_to, rpcclnt_timer, &rpcclnt_timer_to);
285 rpcclnt_timer(&rpcclnt_timer_to);
286 #else /* !__OpenBSD__ */
287 callout_init(&rpcclnt_callout, 0);
288 #endif /* !__OpenBSD__ */
289
290 RPCDEBUG("rpc initialed");
291
292 return;
293 }
294
295 void
296 rpcclnt_uninit(void)
297 {
298 RPCDEBUG("uninit");
299 /* XXX delete sysctl variables? */
300 callout_stop(&rpcclnt_callout);
301 }
302
303 int
304 rpcclnt_setup(clnt, program, addr, sotype, soproto, auth, max_read_size, max_write_size, flags)
305 struct rpcclnt * clnt;
306 struct rpc_program * program;
307 struct sockaddr * addr;
308 int sotype;
309 int soproto;
310 struct rpc_auth * auth;
311 int max_read_size;
312 int max_write_size;
313 int flags;
314 {
315 if (clnt == NULL || program == NULL || addr == NULL || auth == NULL)
316 RPC_RETURN (EFAULT);
317
318 if (program->prog_name == NULL)
319 RPC_RETURN (EFAULT);
320 clnt->rc_prog = program;
321
322 clnt->rc_name = addr;
323 clnt->rc_sotype = sotype;
324 clnt->rc_soproto = soproto;
325 clnt->rc_auth = auth;
326 clnt->rc_rsize = max_read_size;
327 clnt->rc_wsize = max_write_size;
328 clnt->rc_flag = flags;
329
330 clnt->rc_proctlen = 0;
331 clnt->rc_proct = NULL;
332
333 RPC_RETURN (0);
334 }
335
336 /*
337 * Initialize sockets and congestion for a new RPC connection. We do not free
338 * the sockaddr if error.
339 */
340 int
341 rpcclnt_connect(rpc, td)
342 struct rpcclnt *rpc;
343 RPC_EXEC_CTX td;
344 {
345 struct socket *so;
346 int s, error, rcvreserve, sndreserve;
347 struct sockaddr *saddr;
348
349 #ifdef __OpenBSD__
350 struct sockaddr_in *sin;
351 struct mbuf *m;
352 #else
353 struct sockaddr_in sin;
354
355 int soarg;
356 struct sockopt opt;
357 #endif
358
359 if (rpc == NULL) {
360 RPCDEBUG("no rpcclnt struct!\n");
361 RPC_RETURN(EFAULT);
362 }
363
364 /* create the socket */
365 rpc->rc_so = NULL;
366
367 saddr = rpc->rc_name;
368
369 error = socreate(saddr->sa_family, &rpc->rc_so, rpc->rc_sotype,
370 rpc->rc_soproto, td->td_ucred, td);
371 if (error) {
372 RPCDEBUG("error %d in socreate()", error);
373 RPC_RETURN(error);
374 }
375 so = rpc->rc_so;
376 rpc->rc_soflags = so->so_proto->pr_flags;
377
378 /*
379 * Some servers require that the client port be a reserved port
380 * number. We always allocate a reserved port, as this prevents
381 * filehandle disclosure through UDP port capture.
382 */
383 if (saddr->sa_family == AF_INET) {
384 #ifdef __OpenBSD__
385 struct mbuf *mopt;
386 int *ip;
387 #endif
388
389 #ifdef __OpenBSD__
390 MGET(mopt, M_TRYWAIT, MT_SOOPTS);
391 mopt->m_len = sizeof(int);
392 ip = mtod(mopt, int *);
393 *ip = IP_PORTRANGE_LOW;
394
395 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
396 #else
397 soarg = IP_PORTRANGE_LOW;
398 bzero(&opt, sizeof(struct sockopt));
399 opt.sopt_dir = SOPT_SET;
400 opt.sopt_level = IPPROTO_IP;
401 opt.sopt_name = IP_PORTRANGE;
402 opt.sopt_val = &soarg;
403 opt.sopt_valsize = sizeof(soarg);
404
405 error = sosetopt(so, &opt);
406 #endif
407 if (error)
408 goto bad;
409
410 #ifdef __OpenBSD__
411 MGET(m, M_TRYWAIT, MT_SONAME);
412 sin = mtod(m, struct sockaddr_in *);
413 sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
414 sin->sin_family = AF_INET;
415 sin->sin_addr.s_addr = INADDR_ANY;
416 sin->sin_port = htons(0);
417 error = sobind(so, m);
418 m_freem(m);
419 #else
420 sin.sin_len = sizeof(struct sockaddr_in);
421 sin.sin_family = AF_INET;
422 sin.sin_addr.s_addr = INADDR_ANY;
423 sin.sin_port = htons(0);
424 /*
425 * &thread0 gives us root credentials to ensure sobind
426 * will give us a reserved ephemeral port.
427 */
428 error = sobind(so, (struct sockaddr *) & sin, &thread0);
429 #endif
430 if (error)
431 goto bad;
432
433 #ifdef __OpenBSD__
434 MGET(mopt, M_TRYWAIT, MT_SOOPTS);
435 mopt->m_len = sizeof(int);
436 ip = mtod(mopt, int *);
437 *ip = IP_PORTRANGE_DEFAULT;
438 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
439 #else
440 soarg = IP_PORTRANGE_DEFAULT;
441 bzero(&opt, sizeof(struct sockopt));
442 opt.sopt_dir = SOPT_SET;
443 opt.sopt_level = IPPROTO_IP;
444 opt.sopt_name = IP_PORTRANGE;
445 opt.sopt_val = &soarg;
446 opt.sopt_valsize = sizeof(soarg);
447 error = sosetopt(so, &opt);
448 #endif
449 if (error)
450 goto bad;
451 }
452 /*
453 * Protocols that do not require connections may be optionally left
454 * unconnected for servers that reply from a port other than
455 * NFS_PORT.
456 */
457 if (rpc->rc_flag & RPCCLNT_NOCONN) {
458 if (rpc->rc_soflags & PR_CONNREQUIRED) {
459 error = ENOTCONN;
460 goto bad;
461 }
462 } else {
463 error = soconnect(so, saddr, td);
464 if (error)
465 goto bad;
466
467 /*
468 * Wait for the connection to complete. Cribbed from the
469 * connect system call but with the wait timing out so that
470 * interruptible mounts don't hang here for a long time.
471 */
472 #ifdef __OpenBSD__
473 s = splsoftnet();
474 #else
475 s = splnet();
476 #endif
477 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
478 (void)tsleep((caddr_t) & so->so_timeo, PSOCK,
479 "rpc", 2 * hz);
480
481 /*
482 * XXX needs to catch interrupt signals. something
483 * like this: if ((so->so_state & SS_ISCONNECTING) &&
484 * so->so_error == 0 && rep && (error =
485 * nfs_sigintr(nmp, rep, rep->r_td)) != 0) {
486 * so->so_state &= ~SS_ISCONNECTING; splx(s); goto
487 * bad; }
488 */
489 }
490 if (so->so_error) {
491 error = so->so_error;
492 so->so_error = 0;
493 splx(s);
494 goto bad;
495 }
496 splx(s);
497 }
498 if (rpc->rc_flag & (RPCCLNT_SOFT | RPCCLNT_INT)) {
499 so->so_rcv.sb_timeo = (5 * hz);
500 so->so_snd.sb_timeo = (5 * hz);
501 } else {
502 so->so_rcv.sb_timeo = 0;
503 so->so_snd.sb_timeo = 0;
504 }
505
506
507 if (rpc->rc_sotype == SOCK_DGRAM) {
508 sndreserve = rpc->rc_wsize + RPC_MAXPKTHDR;
509 rcvreserve = rpc->rc_rsize + RPC_MAXPKTHDR;
510 } else if (rpc->rc_sotype == SOCK_SEQPACKET) {
511 sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR) * 2;
512 rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR) * 2;
513 } else {
514 if (rpc->rc_sotype != SOCK_STREAM)
515 panic("rpcclnt_connect() bad sotype");
516 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
517 #ifdef __OpenBSD__
518 MGET(m, M_TRYWAIT, MT_SOOPTS);
519 *mtod(m, int32_t *) = 1;
520 m->m_len = sizeof(int32_t);
521 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
522 #else
523 soarg = 1;
524
525 bzero(&opt, sizeof(struct sockopt));
526 opt.sopt_dir = SOPT_SET;
527 opt.sopt_level = SOL_SOCKET;
528 opt.sopt_name = SO_KEEPALIVE;
529 opt.sopt_val = &soarg;
530 opt.sopt_valsize = sizeof(soarg);
531 sosetopt(so, &opt);
532 #endif
533 }
534 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
535 #ifdef __OpenBSD__
536 MGET(m, M_TRYWAIT, MT_SOOPTS);
537 *mtod(m, int32_t *) = 1;
538 m->m_len = sizeof(int32_t);
539 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
540 #else
541 soarg = 1;
542
543 bzero(&opt, sizeof(struct sockopt));
544 opt.sopt_dir = SOPT_SET;
545 opt.sopt_level = IPPROTO_TCP;
546 opt.sopt_name = TCP_NODELAY;
547 opt.sopt_val = &soarg;
548 opt.sopt_valsize = sizeof(soarg);
549 sosetopt(so, &opt);
550 #endif
551 }
552 sndreserve = (rpc->rc_wsize + RPC_MAXPKTHDR +
553 sizeof(u_int32_t)) * 2;
554 rcvreserve = (rpc->rc_rsize + RPC_MAXPKTHDR +
555 sizeof(u_int32_t)) * 2;
556 }
557 error = soreserve(so, sndreserve, rcvreserve);
558 if (error)
559 goto bad;
560 so->so_rcv.sb_flags |= SB_NOINTR;
561 so->so_snd.sb_flags |= SB_NOINTR;
562
563 /* Initialize other non-zero congestion variables */
564 rpc->rc_srtt[0] = rpc->rc_srtt[1] = rpc->rc_srtt[2] =
565 rpc->rc_srtt[3] = (RPC_TIMEO << 3);
566 rpc->rc_sdrtt[0] = rpc->rc_sdrtt[1] = rpc->rc_sdrtt[2] =
567 rpc->rc_sdrtt[3] = 0;
568 rpc->rc_cwnd = RPC_MAXCWND / 2; /* Initial send window */
569 rpc->rc_sent = 0;
570 rpc->rc_timeouts = 0;
571 RPC_RETURN(0);
572
573 bad:
574 rpcclnt_disconnect(rpc);
575 RPC_RETURN(error);
576 }
577
578
579 /*
580 * Reconnect routine:
581 * Called when a connection is broken on a reliable protocol.
582 * - clean up the old socket
583 * - rpcclnt_connect() again
584 * - set R_MUSTRESEND for all outstanding requests on mount point
585 * If this fails the mount point is DEAD!
586 * nb: Must be called with the rpcclnt_sndlock() set on the mount point.
587 */
588 int
589 rpcclnt_reconnect(rep, td)
590 struct rpctask *rep;
591 RPC_EXEC_CTX td;
592 {
593 struct rpctask *rp;
594 struct rpcclnt *rpc = rep->r_rpcclnt;
595 int error;
596
597 rpcclnt_disconnect(rpc);
598 while ((error = rpcclnt_connect(rpc, td)) != 0) {
599 if (error == EINTR || error == ERESTART)
600 RPC_RETURN(EINTR);
601 tsleep(&fake_wchan, PSOCK, "rpccon", hz);
602 }
603
604 /*
605 * Loop through outstanding request list and fix up all requests on
606 * old socket.
607 */
608 for (rp = TAILQ_FIRST(&rpctask_q); rp != NULL;
609 rp = TAILQ_NEXT(rp, r_chain)) {
610 if (rp->r_rpcclnt == rpc)
611 rp->r_flags |= R_MUSTRESEND;
612 }
613 RPC_RETURN(0);
614 }
615
616 /*
617 * RPC transport disconnect. Clean up and unlink.
618 */
619 void
620 rpcclnt_disconnect(rpc)
621 struct rpcclnt *rpc;
622 {
623 struct socket *so;
624
625 if (rpc->rc_so) {
626 so = rpc->rc_so;
627 rpc->rc_so = NULL;
628 soshutdown(so, 2);
629 soclose(so);
630 }
631 }
632
633 void
634 rpcclnt_safedisconnect(struct rpcclnt * rpc)
635 {
636 struct rpctask dummytask;
637
638 bzero(&dummytask, sizeof(dummytask));
639 dummytask.r_rpcclnt = rpc;
640 rpcclnt_rcvlock(&dummytask);
641 rpcclnt_disconnect(rpc);
642 rpcclnt_rcvunlock(&rpc->rc_flag);
643 }
644
645 /*
646 * This is the rpc send routine. For connection based socket types, it
647 * must be called with an rpcclnt_sndlock() on the socket.
648 * "rep == NULL" indicates that it has been called from a server.
649 * For the client side:
650 * - return EINTR if the RPC is terminated, 0 otherwise
651 * - set R_MUSTRESEND if the send fails for any reason
652 * - do any cleanup required by recoverable socket errors (?)
653 * For the server side:
654 * - return EINTR or ERESTART if interrupted by a signal
655 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
656 * - do any cleanup required by recoverable socket errors (?)
657 */
658 static int
659 rpcclnt_send(so, nam, top, rep)
660 struct socket *so;
661 #ifdef __OpenBSD__
662 struct mbuf *nam;
663 #else
664 struct sockaddr *nam;
665 #endif
666 struct mbuf *top;
667 struct rpctask *rep;
668 {
669 #ifdef __OpenBSD__
670 struct mbuf *sendnam;
671 #else
672 struct sockaddr *sendnam;
673 struct thread *td = curthread;
674 #endif
675 int error, soflags, flags;
676
677 if (rep) {
678 if (rep->r_flags & R_SOFTTERM) {
679 m_freem(top);
680 RPC_RETURN(EINTR);
681 }
682 if ((so = rep->r_rpcclnt->rc_so) == NULL) {
683 rep->r_flags |= R_MUSTRESEND;
684 m_freem(top);
685 RPC_RETURN(0);
686 }
687 rep->r_flags &= ~R_MUSTRESEND;
688 soflags = rep->r_rpcclnt->rc_soflags;
689 } else
690 soflags = so->so_proto->pr_flags;
691
692 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
693 sendnam = NULL;
694 else
695 sendnam = nam;
696
697 if (so->so_type == SOCK_SEQPACKET)
698 flags = MSG_EOR;
699 else
700 flags = 0;
701
702 /*
703 * XXXRW: If/when this code becomes MPSAFE itself, Giant might have
704 * to be conditionally acquired earlier for the stack so has to avoid
705 * lock order reversals with any locks held over rpcclnt_send().
706 */
707 error = sosend(so, sendnam, NULL, top, NULL, flags, td);
708 if (error) {
709 if (rep) {
710 log(LOG_INFO, "rpc send error %d for service %s\n", error,
711 rep->r_rpcclnt->rc_prog->prog_name);
712 /*
713 * Deal with errors for the client side.
714 */
715 if (rep->r_flags & R_SOFTTERM)
716 error = EINTR;
717 else
718 rep->r_flags |= R_MUSTRESEND;
719 } else
720 log(LOG_INFO, "rpc service send error %d\n", error);
721
722 /*
723 * Handle any recoverable (soft) socket errors here.
724 */
725 if (error != EINTR && error != ERESTART &&
726 error != EWOULDBLOCK && error != EPIPE)
727 error = 0;
728 }
729 RPC_RETURN(error);
730 }
731
732 /*
733 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all done by
734 * soreceive(), but for SOCK_STREAM we must deal with the Record Mark and
735 * consolidate the data into a new mbuf list. nb: Sometimes TCP passes the
736 * data up to soreceive() in long lists of small mbufs. For SOCK_STREAM we
737 * must be very careful to read an entire record once we have read any of it,
738 * even if the system call has been interrupted.
739 */
740 static int
741 rpcclnt_receive(rep, aname, mp, td)
742 struct rpctask *rep;
743 #ifdef __OpenBSD__
744 struct mbuf **aname;
745 #else
746 struct sockaddr **aname;
747 #endif
748 struct mbuf **mp;
749 RPC_EXEC_CTX td;
750 {
751 struct socket *so;
752 struct uio auio;
753 struct iovec aio;
754 struct mbuf *m;
755 struct mbuf *control;
756 u_int32_t len;
757 #ifdef __OpenBSD__
758 struct mbuf **getnam;
759 #else
760 struct sockaddr **getnam;
761 #endif
762 int error, sotype, rcvflg;
763
764 /*
765 * Set up arguments for soreceive()
766 */
767 *mp = NULL;
768 *aname = NULL;
769 sotype = rep->r_rpcclnt->rc_sotype;
770
771 /*
772 * For reliable protocols, lock against other senders/receivers in
773 * case a reconnect is necessary. For SOCK_STREAM, first get the
774 * Record Mark to find out how much more there is to get. We must
775 * lock the socket against other receivers until we have an entire
776 * rpc request/reply.
777 */
778 if (sotype != SOCK_DGRAM) {
779 error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
780 if (error)
781 RPC_RETURN(error);
782 tryagain:
783 /*
784 * Check for fatal errors and resending request.
785 */
786 /*
787 * Ugh: If a reconnect attempt just happened, rc_so would
788 * have changed. NULL indicates a failed attempt that has
789 * essentially shut down this mount point.
790 */
791 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
792 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
793 RPC_RETURN(EINTR);
794 }
795 so = rep->r_rpcclnt->rc_so;
796 if (!so) {
797 error = rpcclnt_reconnect(rep, td);
798 if (error) {
799 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
800 RPC_RETURN(error);
801 }
802 goto tryagain;
803 }
804 while (rep->r_flags & R_MUSTRESEND) {
805 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
806 rpcstats.rpcretries++;
807 error = rpcclnt_send(so, rep->r_rpcclnt->rc_name, m, rep);
808 if (error) {
809 if (error == EINTR || error == ERESTART ||
810 (error = rpcclnt_reconnect(rep, td)) != 0) {
811 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
812 RPC_RETURN(error);
813 }
814 goto tryagain;
815 }
816 }
817 rpcclnt_sndunlock(&rep->r_rpcclnt->rc_flag);
818 if (sotype == SOCK_STREAM) {
819 aio.iov_base = (caddr_t) & len;
820 aio.iov_len = sizeof(u_int32_t);
821 auio.uio_iov = &aio;
822 auio.uio_iovcnt = 1;
823 auio.uio_segflg = UIO_SYSSPACE;
824 auio.uio_rw = UIO_READ;
825 auio.uio_offset = 0;
826 auio.uio_resid = sizeof(u_int32_t);
827 #ifdef __OpenBSD__
828 auio.uio_procp = td;
829 #else
830 auio.uio_td = td;
831 #endif
832 do {
833 rcvflg = MSG_WAITALL;
834 error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg);
835 if (error == EWOULDBLOCK && rep) {
836 if (rep->r_flags & R_SOFTTERM)
837 RPC_RETURN(EINTR);
838 }
839 } while (error == EWOULDBLOCK);
840 if (!error && auio.uio_resid > 0) {
841 log(LOG_INFO,
842 "short receive (%zu/%zu) from rpc server %s\n",
843 sizeof(u_int32_t) - auio.uio_resid,
844 sizeof(u_int32_t),
845 rep->r_rpcclnt->rc_prog->prog_name);
846 error = EPIPE;
847 }
848 if (error)
849 goto errout;
850 len = ntohl(len) & ~0x80000000;
851 /*
852 * This is SERIOUS! We are out of sync with the
853 * sender and forcing a disconnect/reconnect is all I
854 * can do.
855 */
856 if (len > RPC_MAXPACKET) {
857 log(LOG_ERR, "%s (%d) from rpc server %s\n",
858 "impossible packet length",
859 len,
860 rep->r_rpcclnt->rc_prog->prog_name);
861 error = EFBIG;
862 goto errout;
863 }
864 auio.uio_resid = len;
865 do {
866 rcvflg = MSG_WAITALL;
867 error = soreceive(so, NULL, &auio, mp, NULL, &rcvflg);
868 } while (error == EWOULDBLOCK || error == EINTR ||
869 error == ERESTART);
870 if (!error && auio.uio_resid > 0) {
871 log(LOG_INFO,
872 "short receive (%d/%d) from rpc server %s\n",
873 len - auio.uio_resid, len,
874 rep->r_rpcclnt->rc_prog->prog_name);
875 error = EPIPE;
876 }
877 } else {
878 /*
879 * NB: Since uio_resid is big, MSG_WAITALL is ignored
880 * and soreceive() will return when it has either a
881 * control msg or a data msg. We have no use for
882 * control msg., but must grab them and then throw
883 * them away so we know what is going on.
884 */
885 auio.uio_resid = len = 100000000; /* Anything Big */
886 #ifdef __OpenBSD__
887 auio.uio_procp = td;
888 #else
889 auio.uio_td = td;
890 #endif
891 do {
892 rcvflg = 0;
893 error = soreceive(so, NULL, &auio, mp, &control, &rcvflg);
894 if (control)
895 m_freem(control);
896 if (error == EWOULDBLOCK && rep) {
897 if (rep->r_flags & R_SOFTTERM)
898 RPC_RETURN(EINTR);
899 }
900 } while (error == EWOULDBLOCK ||
901 (!error && *mp == NULL && control));
902 if ((rcvflg & MSG_EOR) == 0)
903 printf("Egad!!\n");
904 if (!error && *mp == NULL)
905 error = EPIPE;
906 len -= auio.uio_resid;
907 }
908 errout:
909 if (error && error != EINTR && error != ERESTART) {
910 m_freem(*mp);
911 *mp = (struct mbuf *) 0;
912 if (error != EPIPE)
913 log(LOG_INFO,
914 "receive error %d from rpc server %s\n",
915 error,
916 rep->r_rpcclnt->rc_prog->prog_name);
917 error = rpcclnt_sndlock(&rep->r_rpcclnt->rc_flag, rep);
918 if (!error)
919 error = rpcclnt_reconnect(rep, td);
920 if (!error)
921 goto tryagain;
922 }
923 } else {
924 if ((so = rep->r_rpcclnt->rc_so) == NULL)
925 RPC_RETURN(EACCES);
926 if (so->so_state & SS_ISCONNECTED)
927 getnam = NULL;
928 else
929 getnam = aname;
930 auio.uio_resid = len = 1000000;
931 #ifdef __OpenBSD__
932 auio.uio_procp = td;
933 #else
934 auio.uio_td = td;
935 #endif
936
937 do {
938 rcvflg = 0;
939 error = soreceive(so, getnam, &auio, mp, NULL, &rcvflg);
940 RPCDEBUG("soreceive returns %d", error);
941 if (error == EWOULDBLOCK && (rep->r_flags & R_SOFTTERM)) {
942 RPCDEBUG("wouldblock && softerm -> EINTR");
943 RPC_RETURN(EINTR);
944 }
945 } while (error == EWOULDBLOCK);
946 len -= auio.uio_resid;
947 }
948 if (error) {
949 m_freem(*mp);
950 *mp = NULL;
951 } else {
952 /*
953 * Search for any mbufs that are not a multiple of 4 bytes
954 * long or with m_data not longword aligned. These could
955 * cause pointer alignment problems, so copy them to well
956 * aligned mbufs.
957 */
958 rpcclnt_realign(mp, 5 * RPCX_UNSIGNED);
959 }
960 RPC_RETURN(error);
961 }
962
963
964 /*
965 * Implement receipt of reply on a socket. We must search through the list of
966 * received datagrams matching them with outstanding requests using the xid,
967 * until ours is found.
968 */
969 /* ARGSUSED */
970 static int
971 rpcclnt_reply(myrep, td)
972 struct rpctask *myrep;
973 RPC_EXEC_CTX td;
974 {
975 struct rpctask *rep;
976 struct rpcclnt *rpc = myrep->r_rpcclnt;
977 int32_t t1;
978 struct mbuf *mrep, *md;
979 #ifdef __OpenBSD__
980 struct mbuf *nam;
981 #else
982 struct sockaddr *nam;
983 #endif
984 u_int32_t rxid, *tl;
985 caddr_t dpos, cp2;
986 int error;
987
988 /*
989 * Loop around until we get our own reply
990 */
991 for (;;) {
992 /*
993 * Lock against other receivers so that I don't get stuck in
994 * sbwait() after someone else has received my reply for me.
995 * Also necessary for connection based protocols to avoid
996 |