1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1988, 1993
5 * The Regents of the University of California.
6 * Copyright (c) 2006-2007 Robert N. M. Watson
7 * Copyright (c) 2010-2011 Juniper Networks, Inc.
8 * All rights reserved.
9 *
10 * Portions of this software were developed by Robert N. M. Watson under
11 * contract to Juniper Networks, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
38 */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD: releng/12.0/sys/netinet/tcp_usrreq.c 338291 2018-08-24 10:50:19Z tuexen $");
42
43 #include "opt_ddb.h"
44 #include "opt_inet.h"
45 #include "opt_inet6.h"
46 #include "opt_ipsec.h"
47 #include "opt_tcpdebug.h"
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/limits.h>
52 #include <sys/malloc.h>
53 #include <sys/refcount.h>
54 #include <sys/kernel.h>
55 #include <sys/sysctl.h>
56 #include <sys/mbuf.h>
57 #ifdef INET6
58 #include <sys/domain.h>
59 #endif /* INET6 */
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/protosw.h>
63 #include <sys/proc.h>
64 #include <sys/jail.h>
65 #include <sys/syslog.h>
66
67 #ifdef DDB
68 #include <ddb/ddb.h>
69 #endif
70
71 #include <net/if.h>
72 #include <net/if_var.h>
73 #include <net/route.h>
74 #include <net/vnet.h>
75
76 #include <netinet/in.h>
77 #include <netinet/in_kdtrace.h>
78 #include <netinet/in_pcb.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/in_var.h>
81 #include <netinet/ip_var.h>
82 #ifdef INET6
83 #include <netinet/ip6.h>
84 #include <netinet6/in6_pcb.h>
85 #include <netinet6/ip6_var.h>
86 #include <netinet6/scope6_var.h>
87 #endif
88 #include <netinet/tcp.h>
89 #include <netinet/tcp_fsm.h>
90 #include <netinet/tcp_seq.h>
91 #include <netinet/tcp_timer.h>
92 #include <netinet/tcp_var.h>
93 #include <netinet/tcp_log_buf.h>
94 #include <netinet/tcpip.h>
95 #include <netinet/cc/cc.h>
96 #include <netinet/tcp_fastopen.h>
97 #include <netinet/tcp_hpts.h>
98 #ifdef TCPPCAP
99 #include <netinet/tcp_pcap.h>
100 #endif
101 #ifdef TCPDEBUG
102 #include <netinet/tcp_debug.h>
103 #endif
104 #ifdef TCP_OFFLOAD
105 #include <netinet/tcp_offload.h>
106 #endif
107 #include <netipsec/ipsec_support.h>
108
109 /*
110 * TCP protocol interface to socket abstraction.
111 */
112 static int tcp_attach(struct socket *);
113 #ifdef INET
114 static int tcp_connect(struct tcpcb *, struct sockaddr *,
115 struct thread *td);
116 #endif /* INET */
117 #ifdef INET6
118 static int tcp6_connect(struct tcpcb *, struct sockaddr *,
119 struct thread *td);
120 #endif /* INET6 */
121 static void tcp_disconnect(struct tcpcb *);
122 static void tcp_usrclosed(struct tcpcb *);
123 static void tcp_fill_info(struct tcpcb *, struct tcp_info *);
124
125 #ifdef TCPDEBUG
126 #define TCPDEBUG0 int ostate = 0
127 #define TCPDEBUG1() ostate = tp ? tp->t_state : 0
128 #define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \
129 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
130 #else
131 #define TCPDEBUG0
132 #define TCPDEBUG1()
133 #define TCPDEBUG2(req)
134 #endif
135
136 /*
137 * TCP attaches to socket via pru_attach(), reserving space,
138 * and an internet control block.
139 */
140 static int
141 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
142 {
143 struct inpcb *inp;
144 struct tcpcb *tp = NULL;
145 int error;
146 TCPDEBUG0;
147
148 inp = sotoinpcb(so);
149 KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
150 TCPDEBUG1();
151
152 error = tcp_attach(so);
153 if (error)
154 goto out;
155
156 if ((so->so_options & SO_LINGER) && so->so_linger == 0)
157 so->so_linger = TCP_LINGERTIME;
158
159 inp = sotoinpcb(so);
160 tp = intotcpcb(inp);
161 out:
162 TCPDEBUG2(PRU_ATTACH);
163 TCP_PROBE2(debug__user, tp, PRU_ATTACH);
164 return error;
165 }
166
167 /*
168 * tcp_detach is called when the socket layer loses its final reference
169 * to the socket, be it a file descriptor reference, a reference from TCP,
170 * etc. At this point, there is only one case in which we will keep around
171 * inpcb state: time wait.
172 *
173 * This function can probably be re-absorbed back into tcp_usr_detach() now
174 * that there is a single detach path.
175 */
176 static void
177 tcp_detach(struct socket *so, struct inpcb *inp)
178 {
179 struct tcpcb *tp;
180
181 INP_INFO_LOCK_ASSERT(&V_tcbinfo);
182 INP_WLOCK_ASSERT(inp);
183
184 KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
185 KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so"));
186
187 tp = intotcpcb(inp);
188
189 if (inp->inp_flags & INP_TIMEWAIT) {
190 /*
191 * There are two cases to handle: one in which the time wait
192 * state is being discarded (INP_DROPPED), and one in which
193 * this connection will remain in timewait. In the former,
194 * it is time to discard all state (except tcptw, which has
195 * already been discarded by the timewait close code, which
196 * should be further up the call stack somewhere). In the
197 * latter case, we detach from the socket, but leave the pcb
198 * present until timewait ends.
199 *
200 * XXXRW: Would it be cleaner to free the tcptw here?
201 *
202 * Astute question indeed, from twtcp perspective there are
203 * four cases to consider:
204 *
205 * #1 tcp_detach is called at tcptw creation time by
206 * tcp_twstart, then do not discard the newly created tcptw
207 * and leave inpcb present until timewait ends
208 * #2 tcp_detach is called at tcptw creation time by
209 * tcp_twstart, but connection is local and tw will be
210 * discarded immediately
211 * #3 tcp_detach is called at timewait end (or reuse) by
212 * tcp_twclose, then the tcptw has already been discarded
213 * (or reused) and inpcb is freed here
214 * #4 tcp_detach is called() after timewait ends (or reuse)
215 * (e.g. by soclose), then tcptw has already been discarded
216 * (or reused) and inpcb is freed here
217 *
218 * In all three cases the tcptw should not be freed here.
219 */
220 if (inp->inp_flags & INP_DROPPED) {
221 in_pcbdetach(inp);
222 if (__predict_true(tp == NULL)) {
223 in_pcbfree(inp);
224 } else {
225 /*
226 * This case should not happen as in TIMEWAIT
227 * state the inp should not be destroyed before
228 * its tcptw. If INVARIANTS is defined, panic.
229 */
230 #ifdef INVARIANTS
231 panic("%s: Panic before an inp double-free: "
232 "INP_TIMEWAIT && INP_DROPPED && tp != NULL"
233 , __func__);
234 #else
235 log(LOG_ERR, "%s: Avoid an inp double-free: "
236 "INP_TIMEWAIT && INP_DROPPED && tp != NULL"
237 , __func__);
238 #endif
239 INP_WUNLOCK(inp);
240 }
241 } else {
242 in_pcbdetach(inp);
243 INP_WUNLOCK(inp);
244 }
245 } else {
246 /*
247 * If the connection is not in timewait, we consider two
248 * two conditions: one in which no further processing is
249 * necessary (dropped || embryonic), and one in which TCP is
250 * not yet done, but no longer requires the socket, so the
251 * pcb will persist for the time being.
252 *
253 * XXXRW: Does the second case still occur?
254 */
255 if (inp->inp_flags & INP_DROPPED ||
256 tp->t_state < TCPS_SYN_SENT) {
257 tcp_discardcb(tp);
258 in_pcbdetach(inp);
259 in_pcbfree(inp);
260 } else {
261 in_pcbdetach(inp);
262 INP_WUNLOCK(inp);
263 }
264 }
265 }
266
267 /*
268 * pru_detach() detaches the TCP protocol from the socket.
269 * If the protocol state is non-embryonic, then can't
270 * do this directly: have to initiate a pru_disconnect(),
271 * which may finish later; embryonic TCB's can just
272 * be discarded here.
273 */
274 static void
275 tcp_usr_detach(struct socket *so)
276 {
277 struct inpcb *inp;
278 int rlock = 0;
279 struct epoch_tracker et;
280
281 inp = sotoinpcb(so);
282 KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
283 if (!INP_INFO_WLOCKED(&V_tcbinfo)) {
284 INP_INFO_RLOCK_ET(&V_tcbinfo, et);
285 rlock = 1;
286 }
287 INP_WLOCK(inp);
288 KASSERT(inp->inp_socket != NULL,
289 ("tcp_usr_detach: inp_socket == NULL"));
290 tcp_detach(so, inp);
291 if (rlock)
292 INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
293 }
294
295 #ifdef INET
296 /*
297 * Give the socket an address.
298 */
299 static int
300 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
301 {
302 int error = 0;
303 struct inpcb *inp;
304 struct tcpcb *tp = NULL;
305 struct sockaddr_in *sinp;
306
307 sinp = (struct sockaddr_in *)nam;
308 if (nam->sa_len != sizeof (*sinp))
309 return (EINVAL);
310 /*
311 * Must check for multicast addresses and disallow binding
312 * to them.
313 */
314 if (sinp->sin_family == AF_INET &&
315 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
316 return (EAFNOSUPPORT);
317
318 TCPDEBUG0;
319 inp = sotoinpcb(so);
320 KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
321 INP_WLOCK(inp);
322 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
323 error = EINVAL;
324 goto out;
325 }
326 tp = intotcpcb(inp);
327 TCPDEBUG1();
328 INP_HASH_WLOCK(&V_tcbinfo);
329 error = in_pcbbind(inp, nam, td->td_ucred);
330 INP_HASH_WUNLOCK(&V_tcbinfo);
331 out:
332 TCPDEBUG2(PRU_BIND);
333 TCP_PROBE2(debug__user, tp, PRU_BIND);
334 INP_WUNLOCK(inp);
335
336 return (error);
337 }
338 #endif /* INET */
339
340 #ifdef INET6
341 static int
342 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
343 {
344 int error = 0;
345 struct inpcb *inp;
346 struct tcpcb *tp = NULL;
347 struct sockaddr_in6 *sin6p;
348
349 sin6p = (struct sockaddr_in6 *)nam;
350 if (nam->sa_len != sizeof (*sin6p))
351 return (EINVAL);
352 /*
353 * Must check for multicast addresses and disallow binding
354 * to them.
355 */
356 if (sin6p->sin6_family == AF_INET6 &&
357 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
358 return (EAFNOSUPPORT);
359
360 TCPDEBUG0;
361 inp = sotoinpcb(so);
362 KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
363 INP_WLOCK(inp);
364 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
365 error = EINVAL;
366 goto out;
367 }
368 tp = intotcpcb(inp);
369 TCPDEBUG1();
370 INP_HASH_WLOCK(&V_tcbinfo);
371 inp->inp_vflag &= ~INP_IPV4;
372 inp->inp_vflag |= INP_IPV6;
373 #ifdef INET
374 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
375 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
376 inp->inp_vflag |= INP_IPV4;
377 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
378 struct sockaddr_in sin;
379
380 in6_sin6_2_sin(&sin, sin6p);
381 if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
382 error = EAFNOSUPPORT;
383 INP_HASH_WUNLOCK(&V_tcbinfo);
384 goto out;
385 }
386 inp->inp_vflag |= INP_IPV4;
387 inp->inp_vflag &= ~INP_IPV6;
388 error = in_pcbbind(inp, (struct sockaddr *)&sin,
389 td->td_ucred);
390 INP_HASH_WUNLOCK(&V_tcbinfo);
391 goto out;
392 }
393 }
394 #endif
395 error = in6_pcbbind(inp, nam, td->td_ucred);
396 INP_HASH_WUNLOCK(&V_tcbinfo);
397 out:
398 TCPDEBUG2(PRU_BIND);
399 TCP_PROBE2(debug__user, tp, PRU_BIND);
400 INP_WUNLOCK(inp);
401 return (error);
402 }
403 #endif /* INET6 */
404
405 #ifdef INET
406 /*
407 * Prepare to accept connections.
408 */
409 static int
410 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
411 {
412 int error = 0;
413 struct inpcb *inp;
414 struct tcpcb *tp = NULL;
415
416 TCPDEBUG0;
417 inp = sotoinpcb(so);
418 KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
419 INP_WLOCK(inp);
420 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
421 error = EINVAL;
422 goto out;
423 }
424 tp = intotcpcb(inp);
425 TCPDEBUG1();
426 SOCK_LOCK(so);
427 error = solisten_proto_check(so);
428 INP_HASH_WLOCK(&V_tcbinfo);
429 if (error == 0 && inp->inp_lport == 0)
430 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
431 INP_HASH_WUNLOCK(&V_tcbinfo);
432 if (error == 0) {
433 tcp_state_change(tp, TCPS_LISTEN);
434 solisten_proto(so, backlog);
435 #ifdef TCP_OFFLOAD
436 if ((so->so_options & SO_NO_OFFLOAD) == 0)
437 tcp_offload_listen_start(tp);
438 #endif
439 }
440 SOCK_UNLOCK(so);
441
442 if (IS_FASTOPEN(tp->t_flags))
443 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
444
445 out:
446 TCPDEBUG2(PRU_LISTEN);
447 TCP_PROBE2(debug__user, tp, PRU_LISTEN);
448 INP_WUNLOCK(inp);
449 return (error);
450 }
451 #endif /* INET */
452
453 #ifdef INET6
454 static int
455 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
456 {
457 int error = 0;
458 struct inpcb *inp;
459 struct tcpcb *tp = NULL;
460
461 TCPDEBUG0;
462 inp = sotoinpcb(so);
463 KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
464 INP_WLOCK(inp);
465 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
466 error = EINVAL;
467 goto out;
468 }
469 tp = intotcpcb(inp);
470 TCPDEBUG1();
471 SOCK_LOCK(so);
472 error = solisten_proto_check(so);
473 INP_HASH_WLOCK(&V_tcbinfo);
474 if (error == 0 && inp->inp_lport == 0) {
475 inp->inp_vflag &= ~INP_IPV4;
476 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
477 inp->inp_vflag |= INP_IPV4;
478 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
479 }
480 INP_HASH_WUNLOCK(&V_tcbinfo);
481 if (error == 0) {
482 tcp_state_change(tp, TCPS_LISTEN);
483 solisten_proto(so, backlog);
484 #ifdef TCP_OFFLOAD
485 if ((so->so_options & SO_NO_OFFLOAD) == 0)
486 tcp_offload_listen_start(tp);
487 #endif
488 }
489 SOCK_UNLOCK(so);
490
491 if (IS_FASTOPEN(tp->t_flags))
492 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
493
494 out:
495 TCPDEBUG2(PRU_LISTEN);
496 TCP_PROBE2(debug__user, tp, PRU_LISTEN);
497 INP_WUNLOCK(inp);
498 return (error);
499 }
500 #endif /* INET6 */
501
502 #ifdef INET
503 /*
504 * Initiate connection to peer.
505 * Create a template for use in transmissions on this connection.
506 * Enter SYN_SENT state, and mark socket as connecting.
507 * Start keep-alive timer, and seed output sequence space.
508 * Send initial segment on connection.
509 */
510 static int
511 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
512 {
513 int error = 0;
514 struct inpcb *inp;
515 struct tcpcb *tp = NULL;
516 struct sockaddr_in *sinp;
517
518 sinp = (struct sockaddr_in *)nam;
519 if (nam->sa_len != sizeof (*sinp))
520 return (EINVAL);
521 /*
522 * Must disallow TCP ``connections'' to multicast addresses.
523 */
524 if (sinp->sin_family == AF_INET
525 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
526 return (EAFNOSUPPORT);
527 if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
528 return (error);
529
530 TCPDEBUG0;
531 inp = sotoinpcb(so);
532 KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
533 INP_WLOCK(inp);
534 if (inp->inp_flags & INP_TIMEWAIT) {
535 error = EADDRINUSE;
536 goto out;
537 }
538 if (inp->inp_flags & INP_DROPPED) {
539 error = ECONNREFUSED;
540 goto out;
541 }
542 tp = intotcpcb(inp);
543 TCPDEBUG1();
544 if ((error = tcp_connect(tp, nam, td)) != 0)
545 goto out;
546 #ifdef TCP_OFFLOAD
547 if (registered_toedevs > 0 &&
548 (so->so_options & SO_NO_OFFLOAD) == 0 &&
549 (error = tcp_offload_connect(so, nam)) == 0)
550 goto out;
551 #endif
552 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
553 error = tp->t_fb->tfb_tcp_output(tp);
554 out:
555 TCPDEBUG2(PRU_CONNECT);
556 TCP_PROBE2(debug__user, tp, PRU_CONNECT);
557 INP_WUNLOCK(inp);
558 return (error);
559 }
560 #endif /* INET */
561
562 #ifdef INET6
563 static int
564 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
565 {
566 int error = 0;
567 struct inpcb *inp;
568 struct tcpcb *tp = NULL;
569 struct sockaddr_in6 *sin6p;
570
571 TCPDEBUG0;
572
573 sin6p = (struct sockaddr_in6 *)nam;
574 if (nam->sa_len != sizeof (*sin6p))
575 return (EINVAL);
576 /*
577 * Must disallow TCP ``connections'' to multicast addresses.
578 */
579 if (sin6p->sin6_family == AF_INET6
580 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
581 return (EAFNOSUPPORT);
582
583 inp = sotoinpcb(so);
584 KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
585 INP_WLOCK(inp);
586 if (inp->inp_flags & INP_TIMEWAIT) {
587 error = EADDRINUSE;
588 goto out;
589 }
590 if (inp->inp_flags & INP_DROPPED) {
591 error = ECONNREFUSED;
592 goto out;
593 }
594 tp = intotcpcb(inp);
595 TCPDEBUG1();
596 #ifdef INET
597 /*
598 * XXXRW: Some confusion: V4/V6 flags relate to binding, and
599 * therefore probably require the hash lock, which isn't held here.
600 * Is this a significant problem?
601 */
602 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
603 struct sockaddr_in sin;
604
605 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
606 error = EINVAL;
607 goto out;
608 }
609 if ((inp->inp_vflag & INP_IPV4) == 0) {
610 error = EAFNOSUPPORT;
611 goto out;
612 }
613
614 in6_sin6_2_sin(&sin, sin6p);
615 if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
616 error = EAFNOSUPPORT;
617 goto out;
618 }
619 inp->inp_vflag |= INP_IPV4;
620 inp->inp_vflag &= ~INP_IPV6;
621 if ((error = prison_remote_ip4(td->td_ucred,
622 &sin.sin_addr)) != 0)
623 goto out;
624 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
625 goto out;
626 #ifdef TCP_OFFLOAD
627 if (registered_toedevs > 0 &&
628 (so->so_options & SO_NO_OFFLOAD) == 0 &&
629 (error = tcp_offload_connect(so, nam)) == 0)
630 goto out;
631 #endif
632 error = tp->t_fb->tfb_tcp_output(tp);
633 goto out;
634 } else {
635 if ((inp->inp_vflag & INP_IPV6) == 0) {
636 error = EAFNOSUPPORT;
637 goto out;
638 }
639 }
640 #endif
641 inp->inp_vflag &= ~INP_IPV4;
642 inp->inp_vflag |= INP_IPV6;
643 inp->inp_inc.inc_flags |= INC_ISIPV6;
644 if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0)
645 goto out;
646 if ((error = tcp6_connect(tp, nam, td)) != 0)
647 goto out;
648 #ifdef TCP_OFFLOAD
649 if (registered_toedevs > 0 &&
650 (so->so_options & SO_NO_OFFLOAD) == 0 &&
651 (error = tcp_offload_connect(so, nam)) == 0)
652 goto out;
653 #endif
654 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
655 error = tp->t_fb->tfb_tcp_output(tp);
656
657 out:
658 TCPDEBUG2(PRU_CONNECT);
659 TCP_PROBE2(debug__user, tp, PRU_CONNECT);
660 INP_WUNLOCK(inp);
661 return (error);
662 }
663 #endif /* INET6 */
664
665 /*
666 * Initiate disconnect from peer.
667 * If connection never passed embryonic stage, just drop;
668 * else if don't need to let data drain, then can just drop anyways,
669 * else have to begin TCP shutdown process: mark socket disconnecting,
670 * drain unread data, state switch to reflect user close, and
671 * send segment (e.g. FIN) to peer. Socket will be really disconnected
672 * when peer sends FIN and acks ours.
673 *
674 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
675 */
676 static int
677 tcp_usr_disconnect(struct socket *so)
678 {
679 struct inpcb *inp;
680 struct tcpcb *tp = NULL;
681 struct epoch_tracker et;
682 int error = 0;
683
684 TCPDEBUG0;
685 INP_INFO_RLOCK_ET(&V_tcbinfo, et);
686 inp = sotoinpcb(so);
687 KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
688 INP_WLOCK(inp);
689 if (inp->inp_flags & INP_TIMEWAIT)
690 goto out;
691 if (inp->inp_flags & INP_DROPPED) {
692 error = ECONNRESET;
693 goto out;
694 }
695 tp = intotcpcb(inp);
696 TCPDEBUG1();
697 tcp_disconnect(tp);
698 out:
699 TCPDEBUG2(PRU_DISCONNECT);
700 TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
701 INP_WUNLOCK(inp);
702 INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
703 return (error);
704 }
705
706 #ifdef INET
707 /*
708 * Accept a connection. Essentially all the work is done at higher levels;
709 * just return the address of the peer, storing through addr.
710 */
711 static int
712 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
713 {
714 int error = 0;
715 struct inpcb *inp = NULL;
716 struct tcpcb *tp = NULL;
717 struct in_addr addr;
718 in_port_t port = 0;
719 TCPDEBUG0;
720
721 if (so->so_state & SS_ISDISCONNECTED)
722 return (ECONNABORTED);
723
724 inp = sotoinpcb(so);
725 KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
726 INP_WLOCK(inp);
727 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
728 error = ECONNABORTED;
729 goto out;
730 }
731 tp = intotcpcb(inp);
732 TCPDEBUG1();
733
734 /*
735 * We inline in_getpeeraddr and COMMON_END here, so that we can
736 * copy the data of interest and defer the malloc until after we
737 * release the lock.
738 */
739 port = inp->inp_fport;
740 addr = inp->inp_faddr;
741
742 out:
743 TCPDEBUG2(PRU_ACCEPT);
744 TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
745 INP_WUNLOCK(inp);
746 if (error == 0)
747 *nam = in_sockaddr(port, &addr);
748 return error;
749 }
750 #endif /* INET */
751
752 #ifdef INET6
753 static int
754 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
755 {
756 struct inpcb *inp = NULL;
757 int error = 0;
758 struct tcpcb *tp = NULL;
759 struct in_addr addr;
760 struct in6_addr addr6;
761 struct epoch_tracker et;
762 in_port_t port = 0;
763 int v4 = 0;
764 TCPDEBUG0;
765
766 if (so->so_state & SS_ISDISCONNECTED)
767 return (ECONNABORTED);
768
769 inp = sotoinpcb(so);
770 KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
771 INP_INFO_RLOCK_ET(&V_tcbinfo, et);
772 INP_WLOCK(inp);
773 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
774 error = ECONNABORTED;
775 goto out;
776 }
777 tp = intotcpcb(inp);
778 TCPDEBUG1();
779
780 /*
781 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
782 * copy the data of interest and defer the malloc until after we
783 * release the lock.
784 */
785 if (inp->inp_vflag & INP_IPV4) {
786 v4 = 1;
787 port = inp->inp_fport;
788 addr = inp->inp_faddr;
789 } else {
790 port = inp->inp_fport;
791 addr6 = inp->in6p_faddr;
792 }
793
794 out:
795 TCPDEBUG2(PRU_ACCEPT);
796 TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
797 INP_WUNLOCK(inp);
798 INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
799 if (error == 0) {
800 if (v4)
801 *nam = in6_v4mapsin6_sockaddr(port, &addr);
802 else
803 *nam = in6_sockaddr(port, &addr6);
804 }
805 return error;
806 }
807 #endif /* INET6 */
808
809 /*
810 * Mark the connection as being incapable of further output.
811 */
812 static int
813 tcp_usr_shutdown(struct socket *so)
814 {
815 int error = 0;
816 struct inpcb *inp;
817 struct tcpcb *tp = NULL;
818 struct epoch_tracker et;
819
820 TCPDEBUG0;
821 INP_INFO_RLOCK_ET(&V_tcbinfo, et);
822 inp = sotoinpcb(so);
823 KASSERT(inp != NULL, ("inp == NULL"));
824 INP_WLOCK(inp);
825 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
826 error = ECONNRESET;
827 goto out;
828 }
829 tp = intotcpcb(inp);
830 TCPDEBUG1();
831 socantsendmore(so);
832 tcp_usrclosed(tp);
833 if (!(inp->inp_flags & INP_DROPPED))
834 error = tp->t_fb->tfb_tcp_output(tp);
835
836 out:
837 TCPDEBUG2(PRU_SHUTDOWN);
838 TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
839 INP_WUNLOCK(inp);
840 INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
841
842 return (error);
843 }
844
845 /*
846 * After a receive, possibly send window update to peer.
847 */
848 static int
849 tcp_usr_rcvd(struct socket *so, int flags)
850 {
851 struct inpcb *inp;
852 struct tcpcb *tp = NULL;
853 int error = 0;
854
855 TCPDEBUG0;
856 inp = sotoinpcb(so);
857 KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
858 INP_WLOCK(inp);
859 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
860 error = ECONNRESET;
861 goto out;
862 }
863 tp = intotcpcb(inp);
864 TCPDEBUG1();
865 /*
866 * For passively-created TFO connections, don't attempt a window
867 * update while still in SYN_RECEIVED as this may trigger an early
868 * SYN|ACK. It is preferable to have the SYN|ACK be sent along with
869 * application response data, or failing that, when the DELACK timer
870 * expires.
871 */
872 if (IS_FASTOPEN(tp->t_flags) &&
873 (tp->t_state == TCPS_SYN_RECEIVED))
874 goto out;
875 #ifdef TCP_OFFLOAD
876 if (tp->t_flags & TF_TOE)
877 tcp_offload_rcvd(tp);
878 else
879 #endif
880 tp->t_fb->tfb_tcp_output(tp);
881
882 out:
883 TCPDEBUG2(PRU_RCVD);
884 TCP_PROBE2(debug__user, tp, PRU_RCVD);
885 INP_WUNLOCK(inp);
886 return (error);
887 }
888
889 /*
890 * Do a send by putting data in output queue and updating urgent
891 * marker if URG set. Possibly send more data. Unlike the other
892 * pru_*() routines, the mbuf chains are our responsibility. We
893 * must either enqueue them or free them. The other pru_* routines
894 * generally are caller-frees.
895 */
896 static int
897 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
898 struct sockaddr *nam, struct mbuf *control, struct thread *td)
899 {
900 int error = 0;
901 struct inpcb *inp;
902 struct tcpcb *tp = NULL;
903 struct epoch_tracker net_et;
904 #ifdef INET
905 #ifdef INET6
906 struct sockaddr_in sin;
907 #endif
908 struct sockaddr_in *sinp;
909 #endif
910 #ifdef INET6
911 int isipv6;
912 #endif
913 TCPDEBUG0;
914
915 /*
916 * We require the pcbinfo lock if we will close the socket as part of
917 * this call.
918 */
919 if (flags & PRUS_EOF)
920 INP_INFO_RLOCK_ET(&V_tcbinfo, net_et);
921 inp = sotoinpcb(so);
922 KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
923 INP_WLOCK(inp);
924 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
925 if (control)
926 m_freem(control);
927 /*
928 * In case of PRUS_NOTREADY, tcp_usr_ready() is responsible
929 * for freeing memory.
930 */
931 if (m && (flags & PRUS_NOTREADY) == 0)
932 m_freem(m);
933 error = ECONNRESET;
934 goto out;
935 }
936 tp = intotcpcb(inp);
937 TCPDEBUG1();
938 if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
939 switch (nam->sa_family) {
940 #ifdef INET
941 case AF_INET:
942 sinp = (struct sockaddr_in *)nam;
943 if (sinp->sin_len != sizeof(struct sockaddr_in)) {
944 if (m)
945 m_freem(m);
946 error = EINVAL;
947 goto out;
948 }
949 if ((inp->inp_vflag & INP_IPV6) != 0) {
950 if (m)
951 m_freem(m);
952 error = EAFNOSUPPORT;
953 goto out;
954 }
955 if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
956 if (m)
957 m_freem(m);
958 error = EAFNOSUPPORT;
959 goto out;
960 }
961 if ((error = prison_remote_ip4(td->td_ucred,
962 &sinp->sin_addr))) {
963 if (m)
964 m_freem(m);
965 goto out;
966 }
967 #ifdef INET6
968 isipv6 = 0;
969 #endif
970 break;
971 #endif /* INET */
972 #ifdef INET6
973 case AF_INET6:
974 {
975 struct sockaddr_in6 *sin6p;
976
977 sin6p = (struct sockaddr_in6 *)nam;
978 if (sin6p->sin6_len != sizeof(struct sockaddr_in6)) {
979 if (m)
980 m_freem(m);
981 error = EINVAL;
982 goto out;
983 }
984 if (IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
985 if (m)
986 m_freem(m);
987 error = EAFNOSUPPORT;
988 goto out;
989 }
990 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
991 #ifdef INET
992 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
993 error = EINVAL;
994 if (m)
995 m_freem(m);
996 goto out;
997 }
998 if ((inp->inp_vflag & INP_IPV4) == 0) {
999 error = EAFNOSUPPORT;
1000 if (m)
1001 m_freem(m);
1002 goto out;
1003 }
1004 inp->inp_vflag &= ~INP_IPV6;
1005 sinp = &sin;
1006 in6_sin6_2_sin(sinp, sin6p);
1007 if (IN_MULTICAST(
1008 ntohl(sinp->sin_addr.s_addr))) {
1009 error = EAFNOSUPPORT;
1010 if (m)
1011 m_freem(m);
1012 goto out;
1013 }
1014 if ((error = prison_remote_ip4(td->td_ucred,
1015 &sinp->sin_addr))) {
1016 if (m)
1017 m_freem(m);
1018 goto out;
1019 }
1020 isipv6 = 0;
1021 #else /* !INET */
1022 error = EAFNOSUPPORT;
1023 if (m)
1024 m_freem(m);
1025 goto out;
1026 #endif /* INET */
1027 } else {
1028 if ((inp->inp_vflag & INP_IPV6) == 0) {
1029 if (m)
1030 m_freem(m);
1031 error = EAFNOSUPPORT;
1032 goto out;
1033 }
1034 inp->inp_vflag &= ~INP_IPV4;
1035 inp->inp_inc.inc_flags |= INC_ISIPV6;
1036 if ((error = prison_remote_ip6(td->td_ucred,
1037 &sin6p->sin6_addr))) {
1038 if (m)
1039 m_freem(m);
1040 goto out;
1041 }
1042 isipv6 = 1;
1043 }
1044 break;
1045 }
1046 #endif /* INET6 */
1047 default:
1048 if (m)
1049 m_freem(m);
1050 error = EAFNOSUPPORT;
1051 goto out;
1052 }
1053 }
1054 if (control) {
1055 /* TCP doesn't do control messages (rights, creds, etc) */
1056 if (control->m_len) {
1057 m_freem(control);
1058 if (m)
1059 m_freem(m);
1060 error = EINVAL;
1061 goto out;
1062 }
1063 m_freem(control); /* empty control, just free it */
1064 }
1065 if (!(flags & PRUS_OOB)) {
1066 sbappendstream(&so->so_snd, m, flags);
1067 if (nam && tp->t_state < TCPS_SYN_SENT) {
1068 /*
1069 * Do implied connect if not yet connected,
1070 * initialize window to default value, and
1071 * initialize maxseg using peer's cached MSS.
1072 */
1073 #ifdef INET6
1074 if (isipv6)
1075 error = tcp6_connect(tp, nam, td);
1076 #endif /* INET6 */
1077 #if defined(INET6) && defined(INET)
1078 else
1079 #endif
1080 #ifdef INET
1081 error = tcp_connect(tp,
1082 (struct sockaddr *)sinp, td);
1083 #endif
1084 if (error)
1085 goto out;
1086 if (IS_FASTOPEN(tp->t_flags))
1087 tcp_fastopen_connect(tp);
1088 else {
1089 tp->snd_wnd = TTCP_CLIENT_SND_WND;
1090 tcp_mss(tp, -1);
1091 }
1092 }
1093 if (flags & PRUS_EOF) {
1094 /*
1095 * Close the send side of the connection after
1096 * the data is sent.
1097 */
1098 INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
1099 socantsendmore(so);
1100 tcp_usrclosed(tp);
1101 }
1102 if (!(inp->inp_flags & INP_DROPPED) &&
1103 !(flags & PRUS_NOTREADY)) {
1104 if (flags & PRUS_MORETOCOME)
1105 tp->t_flags |= TF_MORETOCOME;
1106 error = tp->t_fb->tfb_tcp_output(tp);
1107 if (flags & PRUS_MORETOCOME)
1108 tp->t_flags &= ~TF_MORETOCOME;
1109 }
1110 } else {
1111 /*
1112 * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
1113 */
1114 SOCKBUF_LOCK(&so->so_snd);
1115 if (sbspace(&so->so_snd) < -512) {
1116 SOCKBUF_UNLOCK(&so->so_snd);
1117 m_freem(m);
1118 error = ENOBUFS;
1119 goto out;
1120 }
1121 /*
1122 * According to RFC961 (Assigned Protocols),
1123 * the urgent pointer points to the last octet
1124 * of urgent data. We continue, however,
1125 * to consider it to indicate the first octet
1126 * of data past the urgent section.
1127 * Otherwise, snd_up should be one lower.
1128 */
1129 sbappendstream_locked(&so->so_snd, m, flags);
1130 SOCKBUF_UNLOCK(&so->so_snd);
1131 if (nam && tp->t_state < TCPS_SYN_SENT) {
1132 /*
1133 * Do implied connect if not yet connected,
1134 * initialize window to default value, and
1135 * initialize maxseg using peer's cached MSS.
1136 */
1137
1138 /*
1139 * Not going to contemplate SYN|URG
1140 */
1141 if (IS_FASTOPEN(tp->t_flags))
1142 tp->t_flags &= ~TF_FASTOPEN;
1143 #ifdef INET6
1144 if (isipv6)
1145 error = tcp6_connect(tp, nam, td);
1146 #endif /* INET6 */
1147 #if defined(INET6) && defined(INET)
1148 else
1149 #endif
1150 #ifdef INET
1151 error = tcp_connect(tp,
1152 (struct sockaddr *)sinp, td);
1153 #endif
1154 if (error)
1155 goto out;
1156 tp->snd_wnd = TTCP_CLIENT_SND_WND;
1157 tcp_mss(tp, -1);
1158 }
1159 tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
1160 if (!(flags & PRUS_NOTREADY)) {
1161 tp->t_flags |= TF_FORCEDATA;
1162 error = tp->t_fb->tfb_tcp_output(tp);
1163 tp->t_flags &= ~TF_FORCEDATA;
1164 }
1165 }
1166 TCP_LOG_EVENT(tp, NULL,
1167 &inp->inp_socket->so_rcv,
1168 &inp->inp_socket->so_snd,
1169 TCP_LOG_USERSEND, error,
1170 0, NULL, false);
1171 out:
1172 TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
1173 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
1174 TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
1175 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
1176 INP_WUNLOCK(inp);
1177 if (flags & PRUS_EOF)
1178 INP_INFO_RUNLOCK_ET(&V_tcbinfo, net_et);
1179 return (error);
1180 }
1181
1182 static int
1183 tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
1184 {
1185 struct inpcb *inp;
1186 struct tcpcb *tp;
1187 int error;
1188
1189 inp = sotoinpcb(so);
1190 INP_WLOCK(inp);
1191 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
1192 INP_WUNLOCK(inp);
1193 for (int i = 0; i < count; i++)
1194 m = m_free(m);
1195 return (ECONNRESET);
1196 }
1197 tp = intotcpcb(inp);
1198
1199 SOCKBUF_LOCK(&so->so_snd);
1200 error = sbready(&so->so_snd, m, count);
1201 SOCKBUF_UNLOCK(&so->so_snd);
1202 if (error == 0)
1203 error = tp->t_fb->tfb_tcp_output(tp);
1204 INP_WUNLOCK(inp);
1205
1206 return (error);
1207 }
1208
1209 /*
1210 * Abort the TCP. Drop the connection abruptly.
1211 */
1212 static void
1213 tcp_usr_abort(struct socket *so)
1214 {
1215 struct inpcb *inp;
1216 struct tcpcb *tp = NULL;
1217 struct epoch_tracker et;
1218 TCPDEBUG0;
1219
1220 inp = sotoinpcb(so);
1221 KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
1222
1223 INP_INFO_RLOCK_ET(&V_tcbinfo, et);
1224 INP_WLOCK(inp);
1225 KASSERT(inp->inp_socket != NULL,
1226 ("tcp_usr_abort: inp_socket == NULL"));
1227
1228 /*
1229 * If we still have full TCP state, and we're not dropped, drop.
1230 */
1231 if (!(inp->inp_flags & INP_TIMEWAIT) &&
1232 !(inp->inp_flags & INP_DROPPED)) {
1233 tp = intotcpcb(inp);
1234 TCPDEBUG1();
1235 tp = tcp_drop(tp, ECONNABORTED);
1236 if (tp == NULL)
1237 goto dropped;
1238 TCPDEBUG2(PRU_ABORT);
1239 TCP_PROBE2(debug__user, tp, PRU_ABORT);
1240 }
1241 if (!(inp->inp_flags & INP_DROPPED)) {
1242 SOCK_LOCK(so);
1243 so->so_state |= SS_PROTOREF;
1244 SOCK_UNLOCK(so);
1245 inp->inp_flags |= INP_SOCKREF;
1246 }
1247 INP_WUNLOCK(inp);
1248 dropped:
1249 INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
1250 }
1251
1252 /*
1253 * TCP socket is closed. Start friendly disconnect.
1254 */
1255 static void
1256 tcp_usr_close(struct socket *so)
1257 {
1258 struct inpcb *inp;
1259 struct tcpcb *tp = NULL;
1260 struct epoch_tracker et;
1261 TCPDEBUG0;
1262
1263 inp = sotoinpcb(so);
1264 KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
1265
1266 INP_INFO_RLOCK_ET(&V_tcbinfo, et);
1267 INP_WLOCK(inp);
1268 KASSERT(inp->inp_socket != NULL,
1269 ("tcp_usr_close: inp_socket == NULL"));
1270
1271 /*
1272 * If we still have full TCP state, and we're not dropped, initiate
1273 * a disconnect.
1274 */
1275 if (!(inp->inp_flags & INP_TIMEWAIT) &&
1276 !(inp->inp_flags & INP_DROPPED)) {
1277 tp = intotcpcb(inp);
1278 TCPDEBUG1();
1279 tcp_disconnect(tp);
1280 TCPDEBUG2(PRU_CLOSE);
1281 TCP_PROBE2(debug__user, tp, PRU_CLOSE);
1282 }
1283 if (!(inp->inp_flags & INP_DROPPED)) {
1284 SOCK_LOCK(so);
1285 so->so_state |= SS_PROTOREF;
1286 SOCK_UNLOCK(so);
1287 inp->inp_flags |= INP_SOCKREF;
1288 }
1289 INP_WUNLOCK(inp);
1290 INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
1291 }
1292
1293 /*
1294 * Receive out-of-band data.
1295 */
1296 static int
1297 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
1298 {
1299 int error = 0;
1300 struct inpcb *inp;
1301 struct tcpcb *tp = NULL;
1302
1303 TCPDEBUG0;
1304 inp = sotoinpcb(so);
1305 KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
1306 INP_WLOCK(inp);
1307 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
1308 error = ECONNRESET;
1309 goto out;
1310 }
1311 tp = intotcpcb(inp);
1312 TCPDEBUG1();
1313 if ((so->so_oobmark == 0 &&
1314 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1315 so->so_options & SO_OOBINLINE ||
1316 tp->t_oobflags & TCPOOB_HADDATA) {
1317 error = EINVAL;
1318 goto out;
1319 }
1320 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
1321 error = EWOULDBLOCK;
1322 goto out;
1323 }
1324 m->m_len = 1;
1325 *mtod(m, caddr_t) = tp->t_iobc;
1326 if ((flags & MSG_PEEK) == 0)
1327 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
1328
1329 out:
1330 TCPDEBUG2(PRU_RCVOOB);
1331 TCP_PROBE2(debug__user, tp, PRU_RCVOOB);
1332 INP_WUNLOCK(inp);
1333 return (error);
1334 }
1335
1336 #ifdef INET
1337 struct pr_usrreqs tcp_usrreqs = {
1338 .pru_abort = tcp_usr_abort,
1339 .pru_accept = tcp_usr_accept,
1340 .pru_attach = tcp_usr_attach,
1341 .pru_bind = tcp_usr_bind,
1342 .pru_connect = tcp_usr_connect,
1343 .pru_control = in_control,
1344 .pru_detach = tcp_usr_detach,
1345 .pru_disconnect = tcp_usr_disconnect,
1346 .pru_listen = tcp_usr_listen,
1347 .pru_peeraddr = in_getpeeraddr,
1348 .pru_rcvd = tcp_usr_rcvd,
1349 .pru_rcvoob = tcp_usr_rcvoob,
1350 .pru_send = tcp_usr_send,
1351 .pru_ready = tcp_usr_ready,
1352 .pru_shutdown = tcp_usr_shutdown,
1353 .pru_sockaddr = in_getsockaddr,
1354 .pru_sosetlabel = in_pcbsosetlabel,
1355 .pru_close = tcp_usr_close,
1356 };
1357 #endif /* INET */
1358
1359 #ifdef INET6
1360 struct pr_usrreqs tcp6_usrreqs = {
1361 .pru_abort = tcp_usr_abort,
1362 .pru_accept = tcp6_usr_accept,
1363 .pru_attach = tcp_usr_attach,
1364 .pru_bind = tcp6_usr_bind,
1365 .pru_connect = tcp6_usr_connect,
1366 .pru_control = in6_control,
1367 .pru_detach = tcp_usr_detach,
1368 .pru_disconnect = tcp_usr_disconnect,
1369 .pru_listen = tcp6_usr_listen,
1370 .pru_peeraddr = in6_mapped_peeraddr,
1371 .pru_rcvd = tcp_usr_rcvd,
1372 .pru_rcvoob = tcp_usr_rcvoob,
1373 .pru_send = tcp_usr_send,
1374 .pru_ready = tcp_usr_ready,
1375 .pru_shutdown = tcp_usr_shutdown,
1376 .pru_sockaddr = in6_mapped_sockaddr,
1377 .pru_sosetlabel = in_pcbsosetlabel,
1378 .pru_close = tcp_usr_close,
1379 };
1380 #endif /* INET6 */
1381
1382 #ifdef INET
1383 /*
1384 * Common subroutine to open a TCP connection to remote host specified
1385 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
1386 * port number if needed. Call in_pcbconnect_setup to do the routing and
1387 * to choose a local host address (interface). If there is an existing
1388 * incarnation of the same connection in TIME-WAIT state and if the remote
1389 * host was sending CC options and if the connection duration was < MSL, then
1390 * truncate the previous TIME-WAIT state and proceed.
1391 * Initialize connection parameters and enter SYN-SENT state.
1392 */
1393 static int
1394 tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
1395 {
1396 struct inpcb *inp = tp->t_inpcb, *oinp;
1397 struct socket *so = inp->inp_socket;
1398 struct in_addr laddr;
1399 u_short lport;
1400 int error;
1401
1402 INP_WLOCK_ASSERT(inp);
1403 INP_HASH_WLOCK(&V_tcbinfo);
1404
1405 if (inp->inp_lport == 0) {
1406 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
1407 if (error)
1408 goto out;
1409 }
1410
1411 /*
1412 * Cannot simply call in_pcbconnect, because there might be an
1413 * earlier incarnation of this same connection still in
1414 * TIME_WAIT state, creating an ADDRINUSE error.
1415 */
1416 laddr = inp->inp_laddr;
1417 lport = inp->inp_lport;
1418 error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
1419 &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
1420 if (error && oinp == NULL)
1421 goto out;
1422 if (oinp) {
1423 error = EADDRINUSE;
1424 goto out;
1425 }
1426 inp->inp_laddr = laddr;
1427 in_pcbrehash(inp);
1428 INP_HASH_WUNLOCK(&V_tcbinfo);
1429
1430 /*
1431 * Compute window scaling to request:
1432 * Scale to fit into sweet spot. See tcp_syncache.c.
1433 * XXX: This should move to tcp_output().
1434 */
1435 while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1436 (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1437 tp->request_r_scale++;
1438
1439 soisconnecting(so);
1440 TCPSTAT_INC(tcps_connattempt);
1441 tcp_state_change(tp, TCPS_SYN_SENT);
1442 tp->iss = tcp_new_isn(&inp->inp_inc);
1443 if (tp->t_flags & TF_REQ_TSTMP)
1444 tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
1445 tcp_sendseqinit(tp);
1446
1447 return 0;
1448
1449 out:
1450 INP_HASH_WUNLOCK(&V_tcbinfo);
1451 return (error);
1452 }
1453 #endif /* INET */
1454
1455 #ifdef INET6
1456 static int
1457 tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
1458 {
1459 struct inpcb *inp = tp->t_inpcb;
1460 int error;
1461
1462 INP_WLOCK_ASSERT(inp);
1463 INP_HASH_WLOCK(&V_tcbinfo);
1464
1465 if (inp->inp_lport == 0) {
1466 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
1467 if (error)
1468 goto out;
1469 }
1470 error = in6_pcbconnect(inp, nam, td->td_ucred);
1471 if (error != 0)
1472 goto out;
1473 INP_HASH_WUNLOCK(&V_tcbinfo);
1474
1475 /* Compute window scaling to request. */
1476 while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1477 (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1478 tp->request_r_scale++;
1479
1480 soisconnecting(inp->inp_socket);
1481 TCPSTAT_INC(tcps_connattempt);
1482 tcp_state_change(tp, TCPS_SYN_SENT);
1483 tp->iss = tcp_new_isn(&inp->inp_inc);
1484 if (tp->t_flags & TF_REQ_TSTMP)
1485 tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
1486 tcp_sendseqinit(tp);
1487
1488 return 0;
1489
1490 out:
1491 INP_HASH_WUNLOCK(&V_tcbinfo);
1492 return error;
1493 }
1494 #endif /* INET6 */
1495
1496 /*
1497 * Export TCP internal state information via a struct tcp_info, based on the
1498 * Linux 2.6 API. Not ABI compatible as our constants are mapped differently
1499 * (TCP state machine, etc). We export all information using FreeBSD-native
1500 * constants -- for example, the numeric values for tcpi_state will differ
1501 * from Linux.
1502 */
1503 static void
1504 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
1505 {
1506
1507 INP_WLOCK_ASSERT(tp->t_inpcb);
1508 bzero(ti, sizeof(*ti));
1509
1510 ti->tcpi_state = tp->t_state;
1511 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
1512 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
1513 if (tp->t_flags & TF_SACK_PERMIT)
1514 ti->tcpi_options |= TCPI_OPT_SACK;
1515 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
1516 ti->tcpi_options |= TCPI_OPT_WSCALE;
1517 ti->tcpi_snd_wscale = tp->snd_scale;
1518 ti->tcpi_rcv_wscale = tp->rcv_scale;
1519 }
1520 if (tp->t_flags & TF_ECN_PERMIT)
1521 ti->tcpi_options |= TCPI_OPT_ECN;
1522
1523 ti->tcpi_rto = tp->t_rxtcur * tick;
1524 ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick;
1525 ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
1526 ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
1527
1528 ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
1529 ti->tcpi_snd_cwnd = tp->snd_cwnd;
1530
1531 /*
1532 * FreeBSD-specific extension fields for tcp_info.
1533 */
1534 ti->tcpi_rcv_space = tp->rcv_wnd;
1535 ti->tcpi_rcv_nxt = tp->rcv_nxt;
1536 ti->tcpi_snd_wnd = tp->snd_wnd;
1537 ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */
1538 ti->tcpi_snd_nxt = tp->snd_nxt;
1539 ti->tcpi_snd_mss = tp->t_maxseg;
1540 ti->tcpi_rcv_mss = tp->t_maxseg;
1541 ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
1542 ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
1543 ti->tcpi_snd_zerowin = tp->t_sndzerowin;
1544 #ifdef TCP_OFFLOAD
1545 if (tp->t_flags & TF_TOE) {
1546 ti->tcpi_options |= TCPI_OPT_TOE;
1547 tcp_offload_tcp_info(tp, ti);
1548 }
1549 #endif
1550 }
1551
1552 /*
1553 * tcp_ctloutput() must drop the inpcb lock before performing copyin on
1554 * socket option arguments. When it re-acquires the lock after the copy, it
1555 * has to revalidate that the connection is still valid for the socket
1556 * option.
1557 */
1558 #define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do { \
1559 INP_WLOCK(inp); \
1560 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \
1561 INP_WUNLOCK(inp); \
1562 cleanup; \
1563 return (ECONNRESET); \
1564 } \
1565 tp = intotcpcb(inp); \
1566 } while(0)
1567 #define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */)
1568
1569 int
1570 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1571 {
1572 int error;
1573 struct inpcb *inp;
1574 struct tcpcb *tp;
1575 struct tcp_function_block *blk;
1576 struct tcp_function_set fsn;
1577
1578 error = 0;
1579 inp = sotoinpcb(so);
1580 KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
1581 INP_WLOCK(inp);
1582 if (sopt->sopt_level != IPPROTO_TCP) {
1583 #ifdef INET6
1584 if (inp->inp_vflag & INP_IPV6PROTO) {
1585 INP_WUNLOCK(inp);
1586 error = ip6_ctloutput(so, sopt);
1587 /*
1588 * In case of the IPV6_USE_MIN_MTU socket option,
1589 * the INC_IPV6MINMTU flag to announce a corresponding
1590 * MSS during the initial handshake.
1591 * If the TCP connection is not in the front states,
1592 * just reduce the MSS being used.
1593 * This avoids the sending of TCP segments which will
1594 * be fragmented at the IPv6 layer.
1595 */
1596 if ((error == 0) &&
1597 (sopt->sopt_dir == SOPT_SET) &&
1598 (sopt->sopt_level == IPPROTO_IPV6) &&
1599 (sopt->sopt_name == IPV6_USE_MIN_MTU)) {
1600 INP_WLOCK(inp);
1601 if ((inp->inp_flags &
1602 (INP_TIMEWAIT | INP_DROPPED))) {
1603 INP_WUNLOCK(inp);
1604 return (ECONNRESET);
1605 }
1606 inp->inp_inc.inc_flags |= INC_IPV6MINMTU;
1607 tp = intotcpcb(inp);
1608 if ((tp->t_state >= TCPS_SYN_SENT) &&
1609 (inp->inp_inc.inc_flags & INC_ISIPV6)) {
1610 struct ip6_pktopts *opt;
1611
1612 opt = inp->in6p_outputopts;
1613 if ((opt != NULL) &&
1614 (opt->ip6po_minmtu ==
1615 IP6PO_MINMTU_ALL)) {
1616 if (tp->t_maxseg > TCP6_MSS) {
1617 tp->t_maxseg = TCP6_MSS;
1618 }
1619 }
1620 }
1621 INP_WUNLOCK(inp);
1622 }
1623 }
1624 #endif /* INET6 */
1625 #if defined(INET6) && defined(INET)
1626 else
1627 #endif
1628 #ifdef INET
1629 {
1630 INP_WUNLOCK(inp);
1631 error = ip_ctloutput(so, sopt);
1632 }
1633 #endif
1634 return (error);
1635 }
1636 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
1637 INP_WUNLOCK(inp);
1638 return (ECONNRESET);
1639 }
1640 tp = intotcpcb(inp);
1641 /*
1642 * Protect the TCP option TCP_FUNCTION_BLK so
1643 * that a sub-function can *never* overwrite this.
1644 */
1645 if ((sopt->sopt_dir == SOPT_SET) &&
1646 (sopt->sopt_name == TCP_FUNCTION_BLK)) {
1647 INP_WUNLOCK(inp);
1648 error = sooptcopyin(sopt, &fsn, sizeof fsn,
1649 sizeof fsn);
1650 if (error)
1651 return (error);
1652 INP_WLOCK_RECHECK(inp);
1653 blk = find_and_ref_tcp_functions(&fsn);
1654 if (blk == NULL) {
1655 INP_WUNLOCK(inp);
1656 return (ENOENT);
1657 }
1658 if (tp->t_fb == blk) {
1659 /* You already have this */
1660 refcount_release(&blk->tfb_refcnt);
1661 INP_WUNLOCK(inp);
1662 return (0);
1663 }
1664 if (tp->t_state != TCPS_CLOSED) {
1665 /*
1666 * The user has advanced the state
1667 * past the initial point, we may not
1668 * be able to switch.
1669 */
1670 if (blk->tfb_tcp_handoff_ok != NULL) {
1671 /*
1672 * Does the stack provide a
1673 * query mechanism, if so it may
1674 * still be possible?
1675 */
1676 error = (*blk->tfb_tcp_handoff_ok)(tp);
1677 } else
1678 error = EINVAL;
1679 if (error) {
1680 refcount_release(&blk->tfb_refcnt);
1681 INP_WUNLOCK(inp);
1682 return(error);
1683 }
1684 }
1685 if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
1686 refcount_release(&blk->tfb_refcnt);
1687 INP_WUNLOCK(inp);
1688 return (ENOENT);
1689 }
1690 /*
1691 * Release the old refcnt, the
1692 * lookup acquired a ref on the
1693 * new one already.
1694 */
1695 if (tp->t_fb->tfb_tcp_fb_fini) {
1696 /*
1697 * Tell the stack to cleanup with 0 i.e.
1698 * the tcb is not going away.
1699 */
1700 (*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
1701 }
1702 #ifdef TCPHPTS
1703 /* Assure that we are not on any hpts */
1704 tcp_hpts_remove(tp->t_inpcb, HPTS_REMOVE_ALL);
1705 #endif
1706 if (blk->tfb_tcp_fb_init) {
1707 error = (*blk->tfb_tcp_fb_init)(tp);
1708 if (error) {
1709 refcount_release(&blk->tfb_refcnt);
1710 if (tp->t_fb->tfb_tcp_fb_init) {
1711 if((*tp->t_fb->tfb_tcp_fb_init)(tp) != 0) {
1712 /* Fall back failed, drop the connection */
1713 INP_WUNLOCK(inp);
1714 soabort(so);
1715 return(error);
1716 }
1717 }
1718 goto err_out;
1719 }
1720 }
1721 refcount_release(&tp->t_fb->tfb_refcnt);
1722 tp->t_fb = blk;
1723 #ifdef TCP_OFFLOAD
1724 if (tp->t_flags & TF_TOE) {
1725 tcp_offload_ctloutput(tp, sopt->sopt_dir,
1726 sopt->sopt_name);
1727 }
1728 #endif
1729 err_out:
1730 INP_WUNLOCK(inp);
1731 return (error);
1732 } else if ((sopt->sopt_dir == SOPT_GET) &&
1733 (sopt->sopt_name == TCP_FUNCTION_BLK)) {
1734 strncpy(fsn.function_set_name, tp->t_fb->tfb_tcp_block_name,
1735 TCP_FUNCTION_NAME_LEN_MAX);
1736 fsn.function_set_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
1737 fsn.pcbcnt = tp->t_fb->tfb_refcnt;
1738 INP_WUNLOCK(inp);
1739 error = sooptcopyout(sopt, &fsn, sizeof fsn);
1740 return (error);
1741 }
1742 /* Pass in the INP locked, called must unlock it */
1743 return (tp->t_fb->tfb_tcp_ctloutput(so, sopt, inp, tp));
1744 }
1745
1746 /*
1747 * If this assert becomes untrue, we need to change the size of the buf
1748 * variable in tcp_default_ctloutput().
1749 */
1750 #ifdef CTASSERT
1751 CTASSERT(TCP_CA_NAME_MAX <= TCP_LOG_ID_LEN);
1752 CTASSERT(TCP_LOG_REASON_LEN <= TCP_LOG_ID_LEN);
1753 #endif
1754
1755 int
1756 tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
1757 {
1758 int error, opt, optval;
1759 u_int ui;
1760 struct tcp_info ti;
1761 struct cc_algo *algo;
1762 char *pbuf, buf[TCP_LOG_ID_LEN];
1763 size_t len;
1764
1765 /*
1766 * For TCP_CCALGOOPT forward the control to CC module, for both
1767 * SOPT_SET and SOPT_GET.
1768 */
1769 switch (sopt->sopt_name) {
1770 case TCP_CCALGOOPT:
1771 INP_WUNLOCK(inp);
1772 pbuf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
1773 error = sooptcopyin(sopt, pbuf, sopt->sopt_valsize,
1774 sopt->sopt_valsize);
1775 if (error) {
1776 free(pbuf, M_TEMP);
1777 return (error);
1778 }
1779 INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP));
1780 if (CC_ALGO(tp)->ctl_output != NULL)
1781 error = CC_ALGO(tp)->ctl_output(tp->ccv, sopt, pbuf);
1782 else
1783 error = ENOENT;
1784 INP_WUNLOCK(inp);
1785 if (error == 0 && sopt->sopt_dir == SOPT_GET)
1786 error = sooptcopyout(sopt, pbuf, sopt->sopt_valsize);
1787 free(pbuf, M_TEMP);
1788 return (error);
1789 }
1790
1791 switch (sopt->sopt_dir) {
1792 case SOPT_SET:
1793 switch (sopt->sopt_name) {
1794 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
1795 case TCP_MD5SIG:
1796 if (!TCPMD5_ENABLED()) {
1797 INP_WUNLOCK(inp);
1798 return (ENOPROTOOPT);
1799 }
1800 error = TCPMD5_PCBCTL(inp, sopt);
1801 if (error)
1802 return (error);
1803 goto unlock_and_done;
1804 #endif /* IPSEC */
1805
1806 case TCP_NODELAY:
1807 case TCP_NOOPT:
1808 INP_WUNLOCK(inp);
1809 error = sooptcopyin(sopt, &optval, sizeof optval,
1810 sizeof optval);
1811 if (error)
1812 return (error);
1813
1814 INP_WLOCK_RECHECK(inp);
1815 switch (sopt->sopt_name) {
1816 case TCP_NODELAY:
1817 opt = TF_NODELAY;
1818 break;
1819 case TCP_NOOPT:
1820 opt = TF_NOOPT;
1821 break;
1822 default:
1823 opt = 0; /* dead code to fool gcc */
1824 break;
1825 }
1826
1827 if (optval)
1828 tp->t_flags |= opt;
1829 else
1830 tp->t_flags &= ~opt;
1831 unlock_and_done:
1832 #ifdef TCP_OFFLOAD
1833 if (tp->t_flags & TF_TOE) {
1834 tcp_offload_ctloutput(tp, sopt->sopt_dir,
1835 sopt->sopt_name);
1836 }
1837 #endif
1838 INP_WUNLOCK(inp);
1839 break;
1840
1841 case TCP_NOPUSH:
1842 INP_WUNLOCK(inp);
1843 error = sooptcopyin(sopt, &optval, sizeof optval,
1844 sizeof optval);
1845 if (error)
1846 return (error);
1847
1848 INP_WLOCK_RECHECK(inp);
1849 if (optval)
1850 tp->t_flags |= TF_NOPUSH;
1851 else if (tp->t_flags & TF_NOPUSH) {
1852 tp->t_flags &= ~TF_NOPUSH;
1853 if (TCPS_HAVEESTABLISHED(tp->t_state))
1854 error = tp->t_fb->tfb_tcp_output(tp);
1855 }
1856 goto unlock_and_done;
1857
1858 case TCP_MAXSEG:
1859 INP_WUNLOCK(inp);
1860 error = sooptcopyin(sopt, &optval, sizeof optval,
1861 sizeof optval);
1862 if (error)
1863 return (error);
1864
1865 INP_WLOCK_RECHECK(inp);
1866 if (optval > 0 && optval <= tp->t_maxseg &&
1867 optval + 40 >= V_tcp_minmss)
1868 tp->t_maxseg = optval;
1869 else
1870 error = EINVAL;
1871 goto unlock_and_done;
1872
1873 case TCP_INFO:
1874 INP_WUNLOCK(inp);
1875 error = EINVAL;
1876 break;
1877
1878 case TCP_CONGESTION:
1879 INP_WUNLOCK(inp);
1880 error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
1881 if (error)
1882 break;
1883 buf[sopt->sopt_valsize] = '\0';
1884 INP_WLOCK_RECHECK(inp);
1885 CC_LIST_RLOCK();
1886 STAILQ_FOREACH(algo, &cc_list, entries)
1887 if (strncmp(buf, algo->name,
1888 TCP_CA_NAME_MAX) == 0)
1889 break;
1890 CC_LIST_RUNLOCK();
1891 if (algo == NULL) {
1892 INP_WUNLOCK(inp);
1893 error = EINVAL;
1894 break;
1895 }
1896 /*
1897 * We hold a write lock over the tcb so it's safe to
1898 * do these things without ordering concerns.
1899 */
1900 if (CC_ALGO(tp)->cb_destroy != NULL)
1901 CC_ALGO(tp)->cb_destroy(tp->ccv);
1902 CC_DATA(tp) = NULL;
1903 CC_ALGO(tp) = algo;
1904 /*
1905 * If something goes pear shaped initialising the new
1906 * algo, fall back to newreno (which does not
1907 * require initialisation).
1908 */
1909 if (algo->cb_init != NULL &&
1910 algo->cb_init(tp->ccv) != 0) {
1911 CC_ALGO(tp) = &newreno_cc_algo;
1912 /*
1913 * The only reason init should fail is
1914 * because of malloc.
1915 */
1916 error = ENOMEM;
1917 }
1918 INP_WUNLOCK(inp);
1919 break;
1920
1921 case TCP_KEEPIDLE:
1922 case TCP_KEEPINTVL:
1923 case TCP_KEEPINIT:
1924 INP_WUNLOCK(inp);
1925 error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
1926 if (error)
1927 return (error);
1928
1929 if (ui > (UINT_MAX / hz)) {
1930 error = EINVAL;
1931 break;
1932 }
1933 ui *= hz;
1934
1935 INP_WLOCK_RECHECK(inp);
1936 switch (sopt->sopt_name) {
1937 case TCP_KEEPIDLE:
1938 tp->t_keepidle = ui;
1939 /*
1940 * XXX: better check current remaining
1941 * timeout and "merge" it with new value.
1942 */
1943 if ((tp->t_state > TCPS_LISTEN) &&
1944 (tp->t_state <= TCPS_CLOSING))
1945 tcp_timer_activate(tp, TT_KEEP,
1946 TP_KEEPIDLE(tp));
1947 break;
1948 case TCP_KEEPINTVL:
1949 tp->t_keepintvl = ui;
1950 if ((tp->t_state == TCPS_FIN_WAIT_2) &&
1951 (TP_MAXIDLE(tp) > 0))
1952 tcp_timer_activate(tp, TT_2MSL,
1953 TP_MAXIDLE(tp));
1954 break;
1955 case TCP_KEEPINIT:
1956 tp->t_keepinit = ui;
1957 if (tp->t_state == TCPS_SYN_RECEIVED ||
1958 tp->t_state == TCPS_SYN_SENT)
1959 tcp_timer_activate(tp, TT_KEEP,
1960 TP_KEEPINIT(tp));
1961 break;
1962 }
1963 goto unlock_and_done;
1964
1965 case TCP_KEEPCNT:
1966 INP_WUNLOCK(inp);
1967 error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
1968 if (error)
1969 return (error);
1970
1971 INP_WLOCK_RECHECK(inp);
1972 tp->t_keepcnt = ui;
1973 if ((tp->t_state == TCPS_FIN_WAIT_2) &&
1974 (TP_MAXIDLE(tp) > 0))
1975 tcp_timer_activate(tp, TT_2MSL,
1976 TP_MAXIDLE(tp));
1977 goto unlock_and_done;
1978
1979 #ifdef TCPPCAP
1980 case TCP_PCAP_OUT:
1981 case TCP_PCAP_IN:
1982 INP_WUNLOCK(inp);
1983 error = sooptcopyin(sopt, &optval, sizeof optval,
1984 sizeof optval);
1985 if (error)
1986 return (error);
1987
1988 INP_WLOCK_RECHECK(inp);
1989 if (optval >= 0)
1990 tcp_pcap_set_sock_max(TCP_PCAP_OUT ?
1991 &(tp->t_outpkts) : &(tp->t_inpkts),
1992 optval);
1993 else
1994 error = EINVAL;
1995 goto unlock_and_done;
1996 #endif
1997
1998 case TCP_FASTOPEN: {
1999 struct tcp_fastopen tfo_optval;
2000
2001 INP_WUNLOCK(inp);
2002 if (!V_tcp_fastopen_client_enable &&
2003 !V_tcp_fastopen_server_enable)
2004 return (EPERM);
2005
2006 error = sooptcopyin(sopt, &tfo_optval,
2007 sizeof(tfo_optval), sizeof(int));
2008 if (error)
2009 return (error);
2010
2011 INP_WLOCK_RECHECK(inp);
2012 if (tfo_optval.enable) {
2013 if (tp->t_state == TCPS_LISTEN) {
2014 if (!V_tcp_fastopen_server_enable) {
2015 error = EPERM;
2016 goto unlock_and_done;
2017 }
2018
2019 tp->t_flags |= TF_FASTOPEN;
2020 if (tp->t_tfo_pending == NULL)
2021 tp->t_tfo_pending =
2022 tcp_fastopen_alloc_counter();
2023 } else {
2024 /*
2025 * If a pre-shared key was provided,
2026 * stash it in the client cookie
2027 * field of the tcpcb for use during
2028 * connect.
2029 */
2030 if (sopt->sopt_valsize ==
2031 sizeof(tfo_optval)) {
2032 memcpy(tp->t_tfo_cookie.client,
2033 tfo_optval.psk,
2034 TCP_FASTOPEN_PSK_LEN);
2035 tp->t_tfo_client_cookie_len =
2036 TCP_FASTOPEN_PSK_LEN;
2037 }
2038 tp->t_flags |= TF_FASTOPEN;
2039 }
2040 } else
2041 tp->t_flags &= ~TF_FASTOPEN;
2042 goto unlock_and_done;
2043 }
2044
2045 #ifdef TCP_BLACKBOX
2046 case TCP_LOG:
2047 INP_WUNLOCK(inp);
2048 error = sooptcopyin(sopt, &optval, sizeof optval,
2049 sizeof optval);
2050 if (error)
2051 return (error);
2052
2053 INP_WLOCK_RECHECK(inp);
2054 error = tcp_log_state_change(tp, optval);
2055 goto unlock_and_done;
2056
2057 case TCP_LOGBUF:
2058 INP_WUNLOCK(inp);
2059 error = EINVAL;
2060 break;
2061
2062 case TCP_LOGID:
2063 INP_WUNLOCK(inp);
2064 error = sooptcopyin(sopt, buf, TCP_LOG_ID_LEN - 1, 0);
2065 if (error)
2066 break;
2067 buf[sopt->sopt_valsize] = '\0';
2068 INP_WLOCK_RECHECK(inp);
2069 error = tcp_log_set_id(tp, buf);
2070 /* tcp_log_set_id() unlocks the INP. */
2071 break;
2072
2073 case TCP_LOGDUMP:
2074 case TCP_LOGDUMPID:
2075 INP_WUNLOCK(inp);
2076 error =
2077 sooptcopyin(sopt, buf, TCP_LOG_REASON_LEN - 1, 0);
2078 if (error)
2079 break;
2080 buf[sopt->sopt_valsize] = '\0';
2081 INP_WLOCK_RECHECK(inp);
2082 if (sopt->sopt_name == TCP_LOGDUMP) {
2083 error = tcp_log_dump_tp_logbuf(tp, buf,
2084 M_WAITOK, true);
2085 INP_WUNLOCK(inp);
2086 } else {
2087 tcp_log_dump_tp_bucket_logbufs(tp, buf);
2088 /*
2089 * tcp_log_dump_tp_bucket_logbufs() drops the
2090 * INP lock.
2091 */
2092 }
2093 break;
2094 #endif
2095
2096 default:
2097 INP_WUNLOCK(inp);
2098 error = ENOPROTOOPT;
2099 break;
2100 }
2101 break;
2102
2103 case SOPT_GET:
2104 tp = intotcpcb(inp);
2105 switch (sopt->sopt_name) {
2106 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
2107 case TCP_MD5SIG:
2108 if (!TCPMD5_ENABLED()) {
2109 INP_WUNLOCK(inp);
2110 return (ENOPROTOOPT);
2111 }
2112 error = TCPMD5_PCBCTL(inp, sopt);
2113 break;
2114 #endif
2115
2116 case TCP_NODELAY:
2117 optval = tp->t_flags & TF_NODELAY;
2118 INP_WUNLOCK(inp);
2119 error = sooptcopyout(sopt, &optval, sizeof optval);
2120 break;
2121 case TCP_MAXSEG:
2122 optval = tp->t_maxseg;
2123 INP_WUNLOCK(inp);
2124 error = sooptcopyout(sopt, &optval, sizeof optval);
2125 break;
2126 case TCP_NOOPT:
2127 optval = tp->t_flags & TF_NOOPT;
2128 INP_WUNLOCK(inp);
2129 error = sooptcopyout(sopt, &optval, sizeof optval);
2130 break;
2131 case TCP_NOPUSH:
2132 optval = tp->t_flags & TF_NOPUSH;
2133 INP_WUNLOCK(inp);
2134 error = sooptcopyout(sopt, &optval, sizeof optval);
2135 break;
2136 case TCP_INFO:
2137 tcp_fill_info(tp, &ti);
2138 INP_WUNLOCK(inp);
2139 error = sooptcopyout(sopt, &ti, sizeof ti);
2140 break;
2141 case TCP_CONGESTION:
2142 len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
2143 INP_WUNLOCK(inp);
2144 error = sooptcopyout(sopt, buf, len + 1);
2145 break;
2146 case TCP_KEEPIDLE:
2147 case TCP_KEEPINTVL:
2148 case TCP_KEEPINIT:
2149 case TCP_KEEPCNT:
2150 switch (sopt->sopt_name) {
2151 case TCP_KEEPIDLE:
2152 ui = TP_KEEPIDLE(tp) / hz;
2153 break;
2154 case TCP_KEEPINTVL:
2155 ui = TP_KEEPINTVL(tp) / hz;
2156 break;
2157 case TCP_KEEPINIT:
2158 ui = TP_KEEPINIT(tp) / hz;
2159 break;
2160 case TCP_KEEPCNT:
2161 ui = TP_KEEPCNT(tp);
2162 break;
2163 }
2164 INP_WUNLOCK(inp);
2165 error = sooptcopyout(sopt, &ui, sizeof(ui));
2166 break;
2167 #ifdef TCPPCAP
2168 case TCP_PCAP_OUT:
2169 case TCP_PCAP_IN:
2170 optval = tcp_pcap_get_sock_max(TCP_PCAP_OUT ?
2171 &(tp->t_outpkts) : &(tp->t_inpkts));
2172 INP_WUNLOCK(inp);
2173 error = sooptcopyout(sopt, &optval, sizeof optval);
2174 break;
2175 #endif
2176 case TCP_FASTOPEN:
2177 optval = tp->t_flags & TF_FASTOPEN;
2178 INP_WUNLOCK(inp);
2179 error = sooptcopyout(sopt, &optval, sizeof optval);
2180 break;
2181 #ifdef TCP_BLACKBOX
2182 case TCP_LOG:
2183 optval = tp->t_logstate;
2184 INP_WUNLOCK(inp);
2185 error = sooptcopyout(sopt, &optval, sizeof(optval));
2186 break;
2187 case TCP_LOGBUF:
2188 /* tcp_log_getlogbuf() does INP_WUNLOCK(inp) */
2189 error = tcp_log_getlogbuf(sopt, tp);
2190 break;
2191 case TCP_LOGID:
2192 len = tcp_log_get_id(tp, buf);
2193 INP_WUNLOCK(inp);
2194 error = sooptcopyout(sopt, buf, len + 1);
2195 break;
2196 case TCP_LOGDUMP:
2197 case TCP_LOGDUMPID:
2198 INP_WUNLOCK(inp);
2199 error = EINVAL;
2200 break;
2201 #endif
2202 default:
2203 INP_WUNLOCK(inp);
2204 error = ENOPROTOOPT;
2205 break;
2206 }
2207 break;
2208 }
2209 return (error);
2210 }
2211 #undef INP_WLOCK_RECHECK
2212 #undef INP_WLOCK_RECHECK_CLEANUP
2213
2214 /*
2215 * Attach TCP protocol to socket, allocating
2216 * internet protocol control block, tcp control block,
2217 * bufer space, and entering LISTEN state if to accept connections.
2218 */
2219 static int
2220 tcp_attach(struct socket *so)
2221 {
2222 struct tcpcb *tp;
2223 struct inpcb *inp;
2224 struct epoch_tracker et;
2225 int error;
2226
2227 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
2228 error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
2229 if (error)
2230 return (error);
2231 }
2232 so->so_rcv.sb_flags |= SB_AUTOSIZE;
2233 so->so_snd.sb_flags |= SB_AUTOSIZE;
2234 INP_INFO_RLOCK_ET(&V_tcbinfo, et);
2235 error = in_pcballoc(so, &V_tcbinfo);
2236 if (error) {
2237 INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
2238 return (error);
2239 }
2240 inp = sotoinpcb(so);
2241 #ifdef INET6
2242 if (inp->inp_vflag & INP_IPV6PROTO) {
2243 inp->inp_vflag |= INP_IPV6;
2244 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
2245 inp->inp_vflag |= INP_IPV4;
2246 inp->in6p_hops = -1; /* use kernel default */
2247 }
2248 else
2249 #endif
2250 inp->inp_vflag |= INP_IPV4;
2251 tp = tcp_newtcpcb(inp);
2252 if (tp == NULL) {
2253 in_pcbdetach(inp);
2254 in_pcbfree(inp);
2255 INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
2256 return (ENOBUFS);
2257 }
2258 tp->t_state = TCPS_CLOSED;
2259 INP_WUNLOCK(inp);
2260 INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
2261 TCPSTATES_INC(TCPS_CLOSED);
2262 return (0);
2263 }
2264
2265 /*
2266 * Initiate (or continue) disconnect.
2267 * If embryonic state, just send reset (once).
2268 * If in ``let data drain'' option and linger null, just drop.
2269 * Otherwise (hard), mark socket disconnecting and drop
2270 * current input data; switch states based on user close, and
2271 * send segment to peer (with FIN).
2272 */
2273 static void
2274 tcp_disconnect(struct tcpcb *tp)
2275 {
2276 struct inpcb *inp = tp->t_inpcb;
2277 struct socket *so = inp->inp_socket;
2278
2279 INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
2280 INP_WLOCK_ASSERT(inp);
2281
2282 /*
2283 * Neither tcp_close() nor tcp_drop() should return NULL, as the
2284 * socket is still open.
2285 */
2286 if (tp->t_state < TCPS_ESTABLISHED &&
2287 !(tp->t_state > TCPS_LISTEN && IS_FASTOPEN(tp->t_flags))) {
2288 tp = tcp_close(tp);
2289 KASSERT(tp != NULL,
2290 ("tcp_disconnect: tcp_close() returned NULL"));
2291 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
2292 tp = tcp_drop(tp, 0);
2293 KASSERT(tp != NULL,
2294 ("tcp_disconnect: tcp_drop() returned NULL"));
2295 } else {
2296 soisdisconnecting(so);
2297 sbflush(&so->so_rcv);
2298 tcp_usrclosed(tp);
2299 if (!(inp->inp_flags & INP_DROPPED))
2300 tp->t_fb->tfb_tcp_output(tp);
2301 }
2302 }
2303
2304 /*
2305 * User issued close, and wish to trail through shutdown states:
2306 * if never received SYN, just forget it. If got a SYN from peer,
2307 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
2308 * If already got a FIN from peer, then almost done; go to LAST_ACK
2309 * state. In all other cases, have already sent FIN to peer (e.g.
2310 * after PRU_SHUTDOWN), and just have to play tedious game waiting
2311 * for peer to send FIN or not respond to keep-alives, etc.
2312 * We can let the user exit from the close as soon as the FIN is acked.
2313 */
2314 static void
2315 tcp_usrclosed(struct tcpcb *tp)
2316 {
2317
2318 INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
2319 INP_WLOCK_ASSERT(tp->t_inpcb);
2320
2321 switch (tp->t_state) {
2322 case TCPS_LISTEN:
2323 #ifdef TCP_OFFLOAD
2324 tcp_offload_listen_stop(tp);
2325 #endif
2326 tcp_state_change(tp, TCPS_CLOSED);
2327 /* FALLTHROUGH */
2328 case TCPS_CLOSED:
2329 tp = tcp_close(tp);
2330 /*
2331 * tcp_close() should never return NULL here as the socket is
2332 * still open.
2333 */
2334 KASSERT(tp != NULL,
2335 ("tcp_usrclosed: tcp_close() returned NULL"));
2336 break;
2337
2338 case TCPS_SYN_SENT:
2339 case TCPS_SYN_RECEIVED:
2340 tp->t_flags |= TF_NEEDFIN;
2341 break;
2342
2343 case TCPS_ESTABLISHED:
2344 tcp_state_change(tp, TCPS_FIN_WAIT_1);
2345 break;
2346
2347 case TCPS_CLOSE_WAIT:
2348 tcp_state_change(tp, TCPS_LAST_ACK);
2349 break;
2350 }
2351 if (tp->t_state >= TCPS_FIN_WAIT_2) {
2352 soisdisconnected(tp->t_inpcb->inp_socket);
2353 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
2354 if (tp->t_state == TCPS_FIN_WAIT_2) {
2355 int timeout;
2356
2357 timeout = (tcp_fast_finwait2_recycle) ?
2358 tcp_finwait2_timeout : TP_MAXIDLE(tp);
2359 tcp_timer_activate(tp, TT_2MSL, timeout);
2360 }
2361 }
2362 }
2363
2364 #ifdef DDB
2365 static void
2366 db_print_indent(int indent)
2367 {
2368 int i;
2369
2370 for (i = 0; i < indent; i++)
2371 db_printf(" ");
2372 }
2373
2374 static void
2375 db_print_tstate(int t_state)
2376 {
2377
2378 switch (t_state) {
2379 case TCPS_CLOSED:
2380 db_printf("TCPS_CLOSED");
2381 return;
2382
2383 case TCPS_LISTEN:
2384 db_printf("TCPS_LISTEN");
2385 return;
2386
2387 case TCPS_SYN_SENT:
2388 db_printf("TCPS_SYN_SENT");
2389 return;
2390
2391 case TCPS_SYN_RECEIVED:
2392 db_printf("TCPS_SYN_RECEIVED");
2393 return;
2394
2395 case TCPS_ESTABLISHED:
2396 db_printf("TCPS_ESTABLISHED");
2397 return;
2398
2399 case TCPS_CLOSE_WAIT:
2400 db_printf("TCPS_CLOSE_WAIT");
2401 return;
2402
2403 case TCPS_FIN_WAIT_1:
2404 db_printf("TCPS_FIN_WAIT_1");
2405 return;
2406
2407 case TCPS_CLOSING:
2408 db_printf("TCPS_CLOSING");
2409 return;
2410
2411 case TCPS_LAST_ACK:
2412 db_printf("TCPS_LAST_ACK");
2413 return;
2414
2415 case TCPS_FIN_WAIT_2:
2416 db_printf("TCPS_FIN_WAIT_2");
2417 return;
2418
2419 case TCPS_TIME_WAIT:
2420 db_printf("TCPS_TIME_WAIT");
2421 return;
2422
2423 default:
2424 db_printf("unknown");
2425 return;
2426 }
2427 }
2428
2429 static void
2430 db_print_tflags(u_int t_flags)
2431 {
2432 int comma;
2433
2434 comma = 0;
2435 if (t_flags & TF_ACKNOW) {
2436 db_printf("%sTF_ACKNOW", comma ? ", " : "");
2437 comma = 1;
2438 }
2439 if (t_flags & TF_DELACK) {
2440 db_printf("%sTF_DELACK", comma ? ", " : "");
2441 comma = 1;
2442 }
2443 if (t_flags & TF_NODELAY) {
2444 db_printf("%sTF_NODELAY", comma ? ", " : "");
2445 comma = 1;
2446 }
2447 if (t_flags & TF_NOOPT) {
2448 db_printf("%sTF_NOOPT", comma ? ", " : "");
2449 comma = 1;
2450 }
2451 if (t_flags & TF_SENTFIN) {
2452 db_printf("%sTF_SENTFIN", comma ? ", " : "");
2453 comma = 1;
2454 }
2455 if (t_flags & TF_REQ_SCALE) {
2456 db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
2457 comma = 1;
2458 }
2459 if (t_flags & TF_RCVD_SCALE) {
2460 db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
2461 comma = 1;
2462 }
2463 if (t_flags & TF_REQ_TSTMP) {
2464 db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
2465 comma = 1;
2466 }
2467 if (t_flags & TF_RCVD_TSTMP) {
2468 db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
2469 comma = 1;
2470 }
2471 if (t_flags & TF_SACK_PERMIT) {
2472 db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
2473 comma = 1;
2474 }
2475 if (t_flags & TF_NEEDSYN) {
2476 db_printf("%sTF_NEEDSYN", comma ? ", " : "");
2477 comma = 1;
2478 }
2479 if (t_flags & TF_NEEDFIN) {
2480 db_printf("%sTF_NEEDFIN", comma ? ", " : "");
2481 comma = 1;
2482 }
2483 if (t_flags & TF_NOPUSH) {
2484 db_printf("%sTF_NOPUSH", comma ? ", " : "");
2485 comma = 1;
2486 }
2487 if (t_flags & TF_MORETOCOME) {
2488 db_printf("%sTF_MORETOCOME", comma ? ", " : "");
2489 comma = 1;
2490 }
2491 if (t_flags & TF_LQ_OVERFLOW) {
2492 db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : "");
2493 comma = 1;
2494 }
2495 if (t_flags & TF_LASTIDLE) {
2496 db_printf("%sTF_LASTIDLE", comma ? ", " : "");
2497 comma = 1;
2498 }
2499 if (t_flags & TF_RXWIN0SENT) {
2500 db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
2501 comma = 1;
2502 }
2503 if (t_flags & TF_FASTRECOVERY) {
2504 db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
2505 comma = 1;
2506 }
2507 if (t_flags & TF_CONGRECOVERY) {
2508 db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
2509 comma = 1;
2510 }
2511 if (t_flags & TF_WASFRECOVERY) {
2512 db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
2513 comma = 1;
2514 }
2515 if (t_flags & TF_SIGNATURE) {
2516 db_printf("%sTF_SIGNATURE", comma ? ", " : "");
2517 comma = 1;
2518 }
2519 if (t_flags & TF_FORCEDATA) {
2520 db_printf("%sTF_FORCEDATA", comma ? ", " : "");
2521 comma = 1;
2522 }
2523 if (t_flags & TF_TSO) {
2524 db_printf("%sTF_TSO", comma ? ", " : "");
2525 comma = 1;
2526 }
2527 if (t_flags & TF_ECN_PERMIT) {
2528 db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
2529 comma = 1;
2530 }
2531 if (t_flags & TF_FASTOPEN) {
2532 db_printf("%sTF_FASTOPEN", comma ? ", " : "");
2533 comma = 1;
2534 }
2535 }
2536
2537 static void
2538 db_print_toobflags(char t_oobflags)
2539 {
2540 int comma;
2541
2542 comma = 0;
2543 if (t_oobflags & TCPOOB_HAVEDATA) {
2544 db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
2545 comma = 1;
2546 }
2547 if (t_oobflags & TCPOOB_HADDATA) {
2548 db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
2549 comma = 1;
2550 }
2551 }
2552
2553 static void
2554 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
2555 {
2556
2557 db_print_indent(indent);
2558 db_printf("%s at %p\n", name, tp);
2559
2560 indent += 2;
2561
2562 db_print_indent(indent);
2563 db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n",
2564 TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
2565
2566 db_print_indent(indent);
2567 db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n",
2568 &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep);
2569
2570 db_print_indent(indent);
2571 db_printf("tt_2msl: %p tt_delack: %p t_inpcb: %p\n", &tp->t_timers->tt_2msl,
2572 &tp->t_timers->tt_delack, tp->t_inpcb);
2573
2574 db_print_indent(indent);
2575 db_printf("t_state: %d (", tp->t_state);
2576 db_print_tstate(tp->t_state);
2577 db_printf(")\n");
2578
2579 db_print_indent(indent);
2580 db_printf("t_flags: 0x%x (", tp->t_flags);
2581 db_print_tflags(tp->t_flags);
2582 db_printf(")\n");
2583
2584 db_print_indent(indent);
2585 db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: x0%08x\n",
2586 tp->snd_una, tp->snd_max, tp->snd_nxt);
2587
2588 db_print_indent(indent);
2589 db_printf("snd_up: 0x%08x snd_wl1: 0x%08x snd_wl2: 0x%08x\n",
2590 tp->snd_up, tp->snd_wl1, tp->snd_wl2);
2591
2592 db_print_indent(indent);
2593 db_printf("iss: 0x%08x irs: 0x%08x rcv_nxt: 0x%08x\n",
2594 tp->iss, tp->irs, tp->rcv_nxt);
2595
2596 db_print_indent(indent);
2597 db_printf("rcv_adv: 0x%08x rcv_wnd: %u rcv_up: 0x%08x\n",
2598 tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
2599
2600 db_print_indent(indent);
2601 db_printf("snd_wnd: %u snd_cwnd: %u\n",
2602 tp->snd_wnd, tp->snd_cwnd);
2603
2604 db_print_indent(indent);
2605 db_printf("snd_ssthresh: %u snd_recover: "
2606 "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
2607
2608 db_print_indent(indent);
2609 db_printf("t_rcvtime: %u t_startime: %u\n",
2610 tp->t_rcvtime, tp->t_starttime);
2611
2612 db_print_indent(indent);
2613 db_printf("t_rttime: %u t_rtsq: 0x%08x\n",
2614 tp->t_rtttime, tp->t_rtseq);
2615
2616 db_print_indent(indent);
2617 db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %d\n",
2618 tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
2619
2620 db_print_indent(indent);
2621 db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u "
2622 "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
2623 tp->t_rttbest);
2624
2625 db_print_indent(indent);
2626 db_printf("t_rttupdated: %lu max_sndwnd: %u t_softerror: %d\n",
2627 tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
2628
2629 db_print_indent(indent);
2630 db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
2631 db_print_toobflags(tp->t_oobflags);
2632 db_printf(") t_iobc: 0x%02x\n", tp->t_iobc);
2633
2634 db_print_indent(indent);
2635 db_printf("snd_scale: %u rcv_scale: %u request_r_scale: %u\n",
2636 tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
2637
2638 db_print_indent(indent);
2639 db_printf("ts_recent: %u ts_recent_age: %u\n",
2640 tp->ts_recent, tp->ts_recent_age);
2641
2642 db_print_indent(indent);
2643 db_printf("ts_offset: %u last_ack_sent: 0x%08x snd_cwnd_prev: "
2644 "%u\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
2645
2646 db_print_indent(indent);
2647 db_printf("snd_ssthresh_prev: %u snd_recover_prev: 0x%08x "
2648 "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
2649 tp->snd_recover_prev, tp->t_badrxtwin);
2650
2651 db_print_indent(indent);
2652 db_printf("snd_numholes: %d snd_holes first: %p\n",
2653 tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
2654
2655 db_print_indent(indent);
2656 db_printf("snd_fack: 0x%08x rcv_numsacks: %d sack_newdata: "
2657 "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata);
2658
2659 /* Skip sackblks, sackhint. */
2660
2661 db_print_indent(indent);
2662 db_printf("t_rttlow: %d rfbuf_ts: %u rfbuf_cnt: %d\n",
2663 tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
2664 }
2665
2666 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
2667 {
2668 struct tcpcb *tp;
2669
2670 if (!have_addr) {
2671 db_printf("usage: show tcpcb <addr>\n");
2672 return;
2673 }
2674 tp = (struct tcpcb *)addr;
2675
2676 db_print_tcpcb(tp, "tcpcb", 0);
2677 }
2678 #endif
Cache object: f65798cc4d32c5e6f35dab296c2d11b3
|