The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1988, 1993
    5  *      The Regents of the University of California.
    6  * Copyright (c) 2006-2007 Robert N. M. Watson
    7  * Copyright (c) 2010-2011 Juniper Networks, Inc.
    8  * All rights reserved.
    9  *
   10  * Portions of this software were developed by Robert N. M. Watson under
   11  * contract to Juniper Networks, Inc.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      From: @(#)tcp_usrreq.c  8.2 (Berkeley) 1/3/94
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __FBSDID("$FreeBSD$");
   42 
   43 #include "opt_ddb.h"
   44 #include "opt_inet.h"
   45 #include "opt_inet6.h"
   46 #include "opt_ipsec.h"
   47 #include "opt_kern_tls.h"
   48 
   49 #include <sys/param.h>
   50 #include <sys/systm.h>
   51 #include <sys/arb.h>
   52 #include <sys/limits.h>
   53 #include <sys/malloc.h>
   54 #include <sys/refcount.h>
   55 #include <sys/kernel.h>
   56 #include <sys/ktls.h>
   57 #include <sys/qmath.h>
   58 #include <sys/sysctl.h>
   59 #include <sys/mbuf.h>
   60 #ifdef INET6
   61 #include <sys/domain.h>
   62 #endif /* INET6 */
   63 #include <sys/socket.h>
   64 #include <sys/socketvar.h>
   65 #include <sys/protosw.h>
   66 #include <sys/proc.h>
   67 #include <sys/jail.h>
   68 #include <sys/stats.h>
   69 
   70 #ifdef DDB
   71 #include <ddb/ddb.h>
   72 #endif
   73 
   74 #include <net/if.h>
   75 #include <net/if_var.h>
   76 #include <net/route.h>
   77 #include <net/vnet.h>
   78 
   79 #include <netinet/in.h>
   80 #include <netinet/in_kdtrace.h>
   81 #include <netinet/in_pcb.h>
   82 #include <netinet/in_systm.h>
   83 #include <netinet/in_var.h>
   84 #include <netinet/ip.h>
   85 #include <netinet/ip_var.h>
   86 #ifdef INET6
   87 #include <netinet/ip6.h>
   88 #include <netinet6/in6_pcb.h>
   89 #include <netinet6/ip6_var.h>
   90 #include <netinet6/scope6_var.h>
   91 #endif
   92 #include <netinet/tcp.h>
   93 #include <netinet/tcp_fsm.h>
   94 #include <netinet/tcp_seq.h>
   95 #include <netinet/tcp_timer.h>
   96 #include <netinet/tcp_var.h>
   97 #include <netinet/tcp_log_buf.h>
   98 #include <netinet/tcpip.h>
   99 #include <netinet/cc/cc.h>
  100 #include <netinet/tcp_fastopen.h>
  101 #include <netinet/tcp_hpts.h>
  102 #ifdef TCPPCAP
  103 #include <netinet/tcp_pcap.h>
  104 #endif
  105 #ifdef TCP_OFFLOAD
  106 #include <netinet/tcp_offload.h>
  107 #endif
  108 #include <netipsec/ipsec_support.h>
  109 
  110 #include <vm/vm.h>
  111 #include <vm/vm_param.h>
  112 #include <vm/pmap.h>
  113 #include <vm/vm_extern.h>
  114 #include <vm/vm_map.h>
  115 #include <vm/vm_page.h>
  116 
  117 /*
  118  * TCP protocol interface to socket abstraction.
  119  */
  120 #ifdef INET
  121 static int      tcp_connect(struct tcpcb *, struct sockaddr *,
  122                     struct thread *td);
  123 #endif /* INET */
  124 #ifdef INET6
  125 static int      tcp6_connect(struct tcpcb *, struct sockaddr *,
  126                     struct thread *td);
  127 #endif /* INET6 */
  128 static void     tcp_disconnect(struct tcpcb *);
  129 static void     tcp_usrclosed(struct tcpcb *);
  130 static void     tcp_fill_info(struct tcpcb *, struct tcp_info *);
  131 
  132 static int      tcp_pru_options_support(struct tcpcb *tp, int flags);
  133 
  134 /*
  135  * tcp_require_unique port requires a globally-unique source port for each
  136  * outgoing connection.  The default is to require the 4-tuple to be unique.
  137  */
  138 VNET_DEFINE(int, tcp_require_unique_port) = 0;
  139 SYSCTL_INT(_net_inet_tcp, OID_AUTO, require_unique_port,
  140     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_require_unique_port), 0,
  141     "Require globally-unique ephemeral port for outgoing connections");
  142 #define V_tcp_require_unique_port       VNET(tcp_require_unique_port)
  143 
  144 /*
  145  * TCP attaches to socket via pru_attach(), reserving space,
  146  * and an internet control block.
  147  */
  148 static int
  149 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
  150 {
  151         struct inpcb *inp;
  152         struct tcpcb *tp = NULL;
  153         int error;
  154 
  155         inp = sotoinpcb(so);
  156         KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
  157 
  158         error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
  159         if (error)
  160                 goto out;
  161 
  162         so->so_rcv.sb_flags |= SB_AUTOSIZE;
  163         so->so_snd.sb_flags |= SB_AUTOSIZE;
  164         error = in_pcballoc(so, &V_tcbinfo);
  165         if (error)
  166                 goto out;
  167         inp = sotoinpcb(so);
  168         tp = tcp_newtcpcb(inp);
  169         if (tp == NULL) {
  170                 error = ENOBUFS;
  171                 in_pcbdetach(inp);
  172                 in_pcbfree(inp);
  173                 goto out;
  174         }
  175         tp->t_state = TCPS_CLOSED;
  176         INP_WUNLOCK(inp);
  177         TCPSTATES_INC(TCPS_CLOSED);
  178 out:
  179         TCP_PROBE2(debug__user, tp, PRU_ATTACH);
  180         return (error);
  181 }
  182 
  183 /*
  184  * tcp_usr_detach is called when the socket layer loses its final reference
  185  * to the socket, be it a file descriptor reference, a reference from TCP,
  186  * etc.  At this point, there is only one case in which we will keep around
  187  * inpcb state: time wait.
  188  */
  189 static void
  190 tcp_usr_detach(struct socket *so)
  191 {
  192         struct inpcb *inp;
  193         struct tcpcb *tp;
  194 
  195         inp = sotoinpcb(so);
  196         KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
  197         INP_WLOCK(inp);
  198         KASSERT(so->so_pcb == inp && inp->inp_socket == so,
  199                 ("%s: socket %p inp %p mismatch", __func__, so, inp));
  200 
  201         tp = intotcpcb(inp);
  202 
  203         KASSERT(inp->inp_flags & INP_DROPPED ||
  204             tp->t_state < TCPS_SYN_SENT,
  205             ("%s: inp %p not dropped or embryonic", __func__, inp));
  206 
  207         tcp_discardcb(tp);
  208         in_pcbdetach(inp);
  209         in_pcbfree(inp);
  210 }
  211 
  212 #ifdef INET
  213 /*
  214  * Give the socket an address.
  215  */
  216 static int
  217 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  218 {
  219         int error = 0;
  220         struct inpcb *inp;
  221 #ifdef KDTRACE_HOOKS
  222         struct tcpcb *tp = NULL;
  223 #endif
  224         struct sockaddr_in *sinp;
  225 
  226         sinp = (struct sockaddr_in *)nam;
  227         if (nam->sa_family != AF_INET) {
  228                 /*
  229                  * Preserve compatibility with old programs.
  230                  */
  231                 if (nam->sa_family != AF_UNSPEC ||
  232                     nam->sa_len < offsetof(struct sockaddr_in, sin_zero) ||
  233                     sinp->sin_addr.s_addr != INADDR_ANY)
  234                         return (EAFNOSUPPORT);
  235                 nam->sa_family = AF_INET;
  236         }
  237         if (nam->sa_len != sizeof(*sinp))
  238                 return (EINVAL);
  239 
  240         /*
  241          * Must check for multicast addresses and disallow binding
  242          * to them.
  243          */
  244         if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
  245                 return (EAFNOSUPPORT);
  246 
  247         inp = sotoinpcb(so);
  248         KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
  249         INP_WLOCK(inp);
  250         if (inp->inp_flags & INP_DROPPED) {
  251                 error = EINVAL;
  252                 goto out;
  253         }
  254 #ifdef KDTRACE_HOOKS
  255         tp = intotcpcb(inp);
  256 #endif
  257         INP_HASH_WLOCK(&V_tcbinfo);
  258         error = in_pcbbind(inp, nam, td->td_ucred);
  259         INP_HASH_WUNLOCK(&V_tcbinfo);
  260 out:
  261         TCP_PROBE2(debug__user, tp, PRU_BIND);
  262         INP_WUNLOCK(inp);
  263 
  264         return (error);
  265 }
  266 #endif /* INET */
  267 
  268 #ifdef INET6
  269 static int
  270 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  271 {
  272         int error = 0;
  273         struct inpcb *inp;
  274 #ifdef KDTRACE_HOOKS
  275         struct tcpcb *tp = NULL;
  276 #endif
  277         struct sockaddr_in6 *sin6;
  278         u_char vflagsav;
  279 
  280         sin6 = (struct sockaddr_in6 *)nam;
  281         if (nam->sa_family != AF_INET6)
  282                 return (EAFNOSUPPORT);
  283         if (nam->sa_len != sizeof(*sin6))
  284                 return (EINVAL);
  285 
  286         /*
  287          * Must check for multicast addresses and disallow binding
  288          * to them.
  289          */
  290         if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
  291                 return (EAFNOSUPPORT);
  292 
  293         inp = sotoinpcb(so);
  294         KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
  295         INP_WLOCK(inp);
  296         vflagsav = inp->inp_vflag;
  297         if (inp->inp_flags & INP_DROPPED) {
  298                 error = EINVAL;
  299                 goto out;
  300         }
  301 #ifdef KDTRACE_HOOKS
  302         tp = intotcpcb(inp);
  303 #endif
  304         INP_HASH_WLOCK(&V_tcbinfo);
  305         inp->inp_vflag &= ~INP_IPV4;
  306         inp->inp_vflag |= INP_IPV6;
  307 #ifdef INET
  308         if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
  309                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
  310                         inp->inp_vflag |= INP_IPV4;
  311                 else if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
  312                         struct sockaddr_in sin;
  313 
  314                         in6_sin6_2_sin(&sin, sin6);
  315                         if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
  316                                 error = EAFNOSUPPORT;
  317                                 INP_HASH_WUNLOCK(&V_tcbinfo);
  318                                 goto out;
  319                         }
  320                         inp->inp_vflag |= INP_IPV4;
  321                         inp->inp_vflag &= ~INP_IPV6;
  322                         error = in_pcbbind(inp, (struct sockaddr *)&sin,
  323                             td->td_ucred);
  324                         INP_HASH_WUNLOCK(&V_tcbinfo);
  325                         goto out;
  326                 }
  327         }
  328 #endif
  329         error = in6_pcbbind(inp, nam, td->td_ucred);
  330         INP_HASH_WUNLOCK(&V_tcbinfo);
  331 out:
  332         if (error != 0)
  333                 inp->inp_vflag = vflagsav;
  334         TCP_PROBE2(debug__user, tp, PRU_BIND);
  335         INP_WUNLOCK(inp);
  336         return (error);
  337 }
  338 #endif /* INET6 */
  339 
  340 #ifdef INET
  341 /*
  342  * Prepare to accept connections.
  343  */
  344 static int
  345 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
  346 {
  347         int error = 0;
  348         struct inpcb *inp;
  349         struct tcpcb *tp = NULL;
  350 
  351         inp = sotoinpcb(so);
  352         KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
  353         INP_WLOCK(inp);
  354         if (inp->inp_flags & INP_DROPPED) {
  355                 error = EINVAL;
  356                 goto out;
  357         }
  358         tp = intotcpcb(inp);
  359         SOCK_LOCK(so);
  360         error = solisten_proto_check(so);
  361         if (error != 0) {
  362                 SOCK_UNLOCK(so);
  363                 goto out;
  364         }
  365         if (inp->inp_lport == 0) {
  366                 INP_HASH_WLOCK(&V_tcbinfo);
  367                 error = in_pcbbind(inp, NULL, td->td_ucred);
  368                 INP_HASH_WUNLOCK(&V_tcbinfo);
  369         }
  370         if (error == 0) {
  371                 tcp_state_change(tp, TCPS_LISTEN);
  372                 solisten_proto(so, backlog);
  373 #ifdef TCP_OFFLOAD
  374                 if ((so->so_options & SO_NO_OFFLOAD) == 0)
  375                         tcp_offload_listen_start(tp);
  376 #endif
  377         } else {
  378                 solisten_proto_abort(so);
  379         }
  380         SOCK_UNLOCK(so);
  381 
  382         if (IS_FASTOPEN(tp->t_flags))
  383                 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
  384 
  385 out:
  386         TCP_PROBE2(debug__user, tp, PRU_LISTEN);
  387         INP_WUNLOCK(inp);
  388         return (error);
  389 }
  390 #endif /* INET */
  391 
  392 #ifdef INET6
  393 static int
  394 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
  395 {
  396         int error = 0;
  397         struct inpcb *inp;
  398         struct tcpcb *tp = NULL;
  399         u_char vflagsav;
  400 
  401         inp = sotoinpcb(so);
  402         KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
  403         INP_WLOCK(inp);
  404         if (inp->inp_flags & INP_DROPPED) {
  405                 error = EINVAL;
  406                 goto out;
  407         }
  408         vflagsav = inp->inp_vflag;
  409         tp = intotcpcb(inp);
  410         SOCK_LOCK(so);
  411         error = solisten_proto_check(so);
  412         if (error != 0) {
  413                 SOCK_UNLOCK(so);
  414                 goto out;
  415         }
  416         INP_HASH_WLOCK(&V_tcbinfo);
  417         if (inp->inp_lport == 0) {
  418                 inp->inp_vflag &= ~INP_IPV4;
  419                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
  420                         inp->inp_vflag |= INP_IPV4;
  421                 error = in6_pcbbind(inp, NULL, td->td_ucred);
  422         }
  423         INP_HASH_WUNLOCK(&V_tcbinfo);
  424         if (error == 0) {
  425                 tcp_state_change(tp, TCPS_LISTEN);
  426                 solisten_proto(so, backlog);
  427 #ifdef TCP_OFFLOAD
  428                 if ((so->so_options & SO_NO_OFFLOAD) == 0)
  429                         tcp_offload_listen_start(tp);
  430 #endif
  431         } else {
  432                 solisten_proto_abort(so);
  433         }
  434         SOCK_UNLOCK(so);
  435 
  436         if (IS_FASTOPEN(tp->t_flags))
  437                 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
  438 
  439         if (error != 0)
  440                 inp->inp_vflag = vflagsav;
  441 
  442 out:
  443         TCP_PROBE2(debug__user, tp, PRU_LISTEN);
  444         INP_WUNLOCK(inp);
  445         return (error);
  446 }
  447 #endif /* INET6 */
  448 
  449 #ifdef INET
  450 /*
  451  * Initiate connection to peer.
  452  * Create a template for use in transmissions on this connection.
  453  * Enter SYN_SENT state, and mark socket as connecting.
  454  * Start keep-alive timer, and seed output sequence space.
  455  * Send initial segment on connection.
  456  */
  457 static int
  458 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  459 {
  460         struct epoch_tracker et;
  461         int error = 0;
  462         struct inpcb *inp;
  463         struct tcpcb *tp = NULL;
  464         struct sockaddr_in *sinp;
  465 
  466         sinp = (struct sockaddr_in *)nam;
  467         if (nam->sa_family != AF_INET)
  468                 return (EAFNOSUPPORT);
  469         if (nam->sa_len != sizeof (*sinp))
  470                 return (EINVAL);
  471 
  472         /*
  473          * Must disallow TCP ``connections'' to multicast addresses.
  474          */
  475         if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
  476                 return (EAFNOSUPPORT);
  477         if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST)
  478                 return (EACCES);
  479         if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
  480                 return (error);
  481 
  482         inp = sotoinpcb(so);
  483         KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
  484         INP_WLOCK(inp);
  485         if (inp->inp_flags & INP_DROPPED) {
  486                 error = ECONNREFUSED;
  487                 goto out;
  488         }
  489         if (SOLISTENING(so)) {
  490                 error = EOPNOTSUPP;
  491                 goto out;
  492         }
  493         tp = intotcpcb(inp);
  494         NET_EPOCH_ENTER(et);
  495         if ((error = tcp_connect(tp, nam, td)) != 0)
  496                 goto out_in_epoch;
  497 #ifdef TCP_OFFLOAD
  498         if (registered_toedevs > 0 &&
  499             (so->so_options & SO_NO_OFFLOAD) == 0 &&
  500             (error = tcp_offload_connect(so, nam)) == 0)
  501                 goto out_in_epoch;
  502 #endif
  503         tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
  504         error = tcp_output(tp);
  505         KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
  506             ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
  507 out_in_epoch:
  508         NET_EPOCH_EXIT(et);
  509 out:
  510         TCP_PROBE2(debug__user, tp, PRU_CONNECT);
  511         INP_WUNLOCK(inp);
  512         return (error);
  513 }
  514 #endif /* INET */
  515 
  516 #ifdef INET6
  517 static int
  518 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  519 {
  520         struct epoch_tracker et;
  521         int error = 0;
  522         struct inpcb *inp;
  523         struct tcpcb *tp = NULL;
  524         struct sockaddr_in6 *sin6;
  525         u_int8_t incflagsav;
  526         u_char vflagsav;
  527 
  528         sin6 = (struct sockaddr_in6 *)nam;
  529         if (nam->sa_family != AF_INET6)
  530                 return (EAFNOSUPPORT);
  531         if (nam->sa_len != sizeof (*sin6))
  532                 return (EINVAL);
  533 
  534         /*
  535          * Must disallow TCP ``connections'' to multicast addresses.
  536          */
  537         if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
  538                 return (EAFNOSUPPORT);
  539 
  540         inp = sotoinpcb(so);
  541         KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
  542         INP_WLOCK(inp);
  543         vflagsav = inp->inp_vflag;
  544         incflagsav = inp->inp_inc.inc_flags;
  545         if (inp->inp_flags & INP_DROPPED) {
  546                 error = ECONNREFUSED;
  547                 goto out;
  548         }
  549         if (SOLISTENING(so)) {
  550                 error = EINVAL;
  551                 goto out;
  552         }
  553         tp = intotcpcb(inp);
  554 #ifdef INET
  555         /*
  556          * XXXRW: Some confusion: V4/V6 flags relate to binding, and
  557          * therefore probably require the hash lock, which isn't held here.
  558          * Is this a significant problem?
  559          */
  560         if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
  561                 struct sockaddr_in sin;
  562 
  563                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
  564                         error = EINVAL;
  565                         goto out;
  566                 }
  567                 if ((inp->inp_vflag & INP_IPV4) == 0) {
  568                         error = EAFNOSUPPORT;
  569                         goto out;
  570                 }
  571 
  572                 in6_sin6_2_sin(&sin, sin6);
  573                 if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
  574                         error = EAFNOSUPPORT;
  575                         goto out;
  576                 }
  577                 if (ntohl(sin.sin_addr.s_addr) == INADDR_BROADCAST) {
  578                         error = EACCES;
  579                         goto out;
  580                 }
  581                 if ((error = prison_remote_ip4(td->td_ucred,
  582                     &sin.sin_addr)) != 0)
  583                         goto out;
  584                 inp->inp_vflag |= INP_IPV4;
  585                 inp->inp_vflag &= ~INP_IPV6;
  586                 NET_EPOCH_ENTER(et);
  587                 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
  588                         goto out_in_epoch;
  589 #ifdef TCP_OFFLOAD
  590                 if (registered_toedevs > 0 &&
  591                     (so->so_options & SO_NO_OFFLOAD) == 0 &&
  592                     (error = tcp_offload_connect(so, nam)) == 0)
  593                         goto out_in_epoch;
  594 #endif
  595                 error = tcp_output(tp);
  596                 goto out_in_epoch;
  597         } else {
  598                 if ((inp->inp_vflag & INP_IPV6) == 0) {
  599                         error = EAFNOSUPPORT;
  600                         goto out;
  601                 }
  602         }
  603 #endif
  604         if ((error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr)) != 0)
  605                 goto out;
  606         inp->inp_vflag &= ~INP_IPV4;
  607         inp->inp_vflag |= INP_IPV6;
  608         inp->inp_inc.inc_flags |= INC_ISIPV6;
  609         NET_EPOCH_ENTER(et);
  610         if ((error = tcp6_connect(tp, nam, td)) != 0)
  611                 goto out_in_epoch;
  612 #ifdef TCP_OFFLOAD
  613         if (registered_toedevs > 0 &&
  614             (so->so_options & SO_NO_OFFLOAD) == 0 &&
  615             (error = tcp_offload_connect(so, nam)) == 0)
  616                 goto out_in_epoch;
  617 #endif
  618         tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
  619         error = tcp_output(tp);
  620 out_in_epoch:
  621         NET_EPOCH_EXIT(et);
  622 out:
  623         KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
  624             ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
  625         /*
  626          * If the implicit bind in the connect call fails, restore
  627          * the flags we modified.
  628          */
  629         if (error != 0 && inp->inp_lport == 0) {
  630                 inp->inp_vflag = vflagsav;
  631                 inp->inp_inc.inc_flags = incflagsav;
  632         }
  633 
  634         TCP_PROBE2(debug__user, tp, PRU_CONNECT);
  635         INP_WUNLOCK(inp);
  636         return (error);
  637 }
  638 #endif /* INET6 */
  639 
  640 /*
  641  * Initiate disconnect from peer.
  642  * If connection never passed embryonic stage, just drop;
  643  * else if don't need to let data drain, then can just drop anyways,
  644  * else have to begin TCP shutdown process: mark socket disconnecting,
  645  * drain unread data, state switch to reflect user close, and
  646  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
  647  * when peer sends FIN and acks ours.
  648  *
  649  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
  650  */
  651 static int
  652 tcp_usr_disconnect(struct socket *so)
  653 {
  654         struct inpcb *inp;
  655         struct tcpcb *tp = NULL;
  656         struct epoch_tracker et;
  657         int error = 0;
  658 
  659         NET_EPOCH_ENTER(et);
  660         inp = sotoinpcb(so);
  661         KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
  662         INP_WLOCK(inp);
  663         if (inp->inp_flags & INP_DROPPED) {
  664                 error = ECONNRESET;
  665                 goto out;
  666         }
  667         tp = intotcpcb(inp);
  668         tcp_disconnect(tp);
  669 out:
  670         TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
  671         INP_WUNLOCK(inp);
  672         NET_EPOCH_EXIT(et);
  673         return (error);
  674 }
  675 
  676 #ifdef INET
  677 /*
  678  * Accept a connection.  Essentially all the work is done at higher levels;
  679  * just return the address of the peer, storing through addr.
  680  */
  681 static int
  682 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
  683 {
  684         int error = 0;
  685         struct inpcb *inp = NULL;
  686 #ifdef KDTRACE_HOOKS
  687         struct tcpcb *tp = NULL;
  688 #endif
  689         struct in_addr addr;
  690         in_port_t port = 0;
  691 
  692         if (so->so_state & SS_ISDISCONNECTED)
  693                 return (ECONNABORTED);
  694 
  695         inp = sotoinpcb(so);
  696         KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
  697         INP_WLOCK(inp);
  698         if (inp->inp_flags & INP_DROPPED) {
  699                 error = ECONNABORTED;
  700                 goto out;
  701         }
  702 #ifdef KDTRACE_HOOKS
  703         tp = intotcpcb(inp);
  704 #endif
  705 
  706         /*
  707          * We inline in_getpeeraddr and COMMON_END here, so that we can
  708          * copy the data of interest and defer the malloc until after we
  709          * release the lock.
  710          */
  711         port = inp->inp_fport;
  712         addr = inp->inp_faddr;
  713 
  714 out:
  715         TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
  716         INP_WUNLOCK(inp);
  717         if (error == 0)
  718                 *nam = in_sockaddr(port, &addr);
  719         return error;
  720 }
  721 #endif /* INET */
  722 
  723 #ifdef INET6
  724 static int
  725 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
  726 {
  727         struct inpcb *inp = NULL;
  728         int error = 0;
  729 #ifdef KDTRACE_HOOKS
  730         struct tcpcb *tp = NULL;
  731 #endif
  732         struct in_addr addr;
  733         struct in6_addr addr6;
  734         struct epoch_tracker et;
  735         in_port_t port = 0;
  736         int v4 = 0;
  737 
  738         if (so->so_state & SS_ISDISCONNECTED)
  739                 return (ECONNABORTED);
  740 
  741         inp = sotoinpcb(so);
  742         KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
  743         NET_EPOCH_ENTER(et);
  744         INP_WLOCK(inp);
  745         if (inp->inp_flags & INP_DROPPED) {
  746                 error = ECONNABORTED;
  747                 goto out;
  748         }
  749 #ifdef KDTRACE_HOOKS
  750         tp = intotcpcb(inp);
  751 #endif
  752 
  753         /*
  754          * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
  755          * copy the data of interest and defer the malloc until after we
  756          * release the lock.
  757          */
  758         if (inp->inp_vflag & INP_IPV4) {
  759                 v4 = 1;
  760                 port = inp->inp_fport;
  761                 addr = inp->inp_faddr;
  762         } else {
  763                 port = inp->inp_fport;
  764                 addr6 = inp->in6p_faddr;
  765         }
  766 
  767 out:
  768         TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
  769         INP_WUNLOCK(inp);
  770         NET_EPOCH_EXIT(et);
  771         if (error == 0) {
  772                 if (v4)
  773                         *nam = in6_v4mapsin6_sockaddr(port, &addr);
  774                 else
  775                         *nam = in6_sockaddr(port, &addr6);
  776         }
  777         return error;
  778 }
  779 #endif /* INET6 */
  780 
  781 /*
  782  * Mark the connection as being incapable of further output.
  783  */
  784 static int
  785 tcp_usr_shutdown(struct socket *so)
  786 {
  787         int error = 0;
  788         struct inpcb *inp;
  789         struct tcpcb *tp = NULL;
  790         struct epoch_tracker et;
  791 
  792         inp = sotoinpcb(so);
  793         KASSERT(inp != NULL, ("inp == NULL"));
  794         INP_WLOCK(inp);
  795         if (inp->inp_flags & INP_DROPPED) {
  796                 INP_WUNLOCK(inp);
  797                 return (ECONNRESET);
  798         }
  799         tp = intotcpcb(inp);
  800         NET_EPOCH_ENTER(et);
  801         socantsendmore(so);
  802         tcp_usrclosed(tp);
  803         if (!(inp->inp_flags & INP_DROPPED))
  804                 error = tcp_output_nodrop(tp);
  805         TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
  806         error = tcp_unlock_or_drop(tp, error);
  807         NET_EPOCH_EXIT(et);
  808 
  809         return (error);
  810 }
  811 
  812 /*
  813  * After a receive, possibly send window update to peer.
  814  */
  815 static int
  816 tcp_usr_rcvd(struct socket *so, int flags)
  817 {
  818         struct epoch_tracker et;
  819         struct inpcb *inp;
  820         struct tcpcb *tp = NULL;
  821         int outrv = 0, error = 0;
  822 
  823         inp = sotoinpcb(so);
  824         KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
  825         INP_WLOCK(inp);
  826         if (inp->inp_flags & INP_DROPPED) {
  827                 INP_WUNLOCK(inp);
  828                 return (ECONNRESET);
  829         }
  830         tp = intotcpcb(inp);
  831         NET_EPOCH_ENTER(et);
  832         /*
  833          * For passively-created TFO connections, don't attempt a window
  834          * update while still in SYN_RECEIVED as this may trigger an early
  835          * SYN|ACK.  It is preferable to have the SYN|ACK be sent along with
  836          * application response data, or failing that, when the DELACK timer
  837          * expires.
  838          */
  839         if (IS_FASTOPEN(tp->t_flags) &&
  840             (tp->t_state == TCPS_SYN_RECEIVED))
  841                 goto out;
  842 #ifdef TCP_OFFLOAD
  843         if (tp->t_flags & TF_TOE)
  844                 tcp_offload_rcvd(tp);
  845         else
  846 #endif
  847                 outrv = tcp_output_nodrop(tp);
  848 out:
  849         TCP_PROBE2(debug__user, tp, PRU_RCVD);
  850         (void) tcp_unlock_or_drop(tp, outrv);
  851         NET_EPOCH_EXIT(et);
  852         return (error);
  853 }
  854 
  855 /*
  856  * Do a send by putting data in output queue and updating urgent
  857  * marker if URG set.  Possibly send more data.  Unlike the other
  858  * pru_*() routines, the mbuf chains are our responsibility.  We
  859  * must either enqueue them or free them.  The other pru_* routines
  860  * generally are caller-frees.
  861  */
  862 static int
  863 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
  864     struct sockaddr *nam, struct mbuf *control, struct thread *td)
  865 {
  866         struct epoch_tracker et;
  867         int error = 0;
  868         struct inpcb *inp;
  869         struct tcpcb *tp = NULL;
  870 #ifdef INET
  871 #ifdef INET6
  872         struct sockaddr_in sin;
  873 #endif
  874         struct sockaddr_in *sinp;
  875 #endif
  876 #ifdef INET6
  877         int isipv6;
  878 #endif
  879         u_int8_t incflagsav;
  880         u_char vflagsav;
  881         bool restoreflags;
  882 
  883         if (control != NULL) {
  884                 /* TCP doesn't do control messages (rights, creds, etc) */
  885                 if (control->m_len) {
  886                         m_freem(control);
  887                         return (EINVAL);
  888                 }
  889                 m_freem(control);       /* empty control, just free it */
  890         }
  891 
  892         inp = sotoinpcb(so);
  893         KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
  894         INP_WLOCK(inp);
  895         if (inp->inp_flags & INP_DROPPED) {
  896                 if (m != NULL && (flags & PRUS_NOTREADY) == 0)
  897                         m_freem(m);
  898                 INP_WUNLOCK(inp);
  899                 return (ECONNRESET);
  900         }
  901 
  902         vflagsav = inp->inp_vflag;
  903         incflagsav = inp->inp_inc.inc_flags;
  904         restoreflags = false;
  905         tp = intotcpcb(inp);
  906 
  907         NET_EPOCH_ENTER(et);
  908         if ((flags & PRUS_OOB) != 0 &&
  909             (error = tcp_pru_options_support(tp, PRUS_OOB)) != 0)
  910                 goto out;
  911 
  912         if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
  913                 if (tp->t_state == TCPS_LISTEN) {
  914                         error = EINVAL;
  915                         goto out;
  916                 }
  917                 switch (nam->sa_family) {
  918 #ifdef INET
  919                 case AF_INET:
  920                         sinp = (struct sockaddr_in *)nam;
  921                         if (sinp->sin_len != sizeof(struct sockaddr_in)) {
  922                                 error = EINVAL;
  923                                 goto out;
  924                         }
  925                         if ((inp->inp_vflag & INP_IPV6) != 0) {
  926                                 error = EAFNOSUPPORT;
  927                                 goto out;
  928                         }
  929                         if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
  930                                 error = EAFNOSUPPORT;
  931                                 goto out;
  932                         }
  933                         if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) {
  934                                 error = EACCES;
  935                                 goto out;
  936                         }
  937                         if ((error = prison_remote_ip4(td->td_ucred,
  938                             &sinp->sin_addr)))
  939                                 goto out;
  940 #ifdef INET6
  941                         isipv6 = 0;
  942 #endif
  943                         break;
  944 #endif /* INET */
  945 #ifdef INET6
  946                 case AF_INET6:
  947                 {
  948                         struct sockaddr_in6 *sin6;
  949 
  950                         sin6 = (struct sockaddr_in6 *)nam;
  951                         if (sin6->sin6_len != sizeof(*sin6)) {
  952                                 error = EINVAL;
  953                                 goto out;
  954                         }
  955                         if ((inp->inp_vflag & INP_IPV6PROTO) == 0) {
  956                                 error = EAFNOSUPPORT;
  957                                 goto out;
  958                         }
  959                         if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
  960                                 error = EAFNOSUPPORT;
  961                                 goto out;
  962                         }
  963                         if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
  964 #ifdef INET
  965                                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
  966                                         error = EINVAL;
  967                                         goto out;
  968                                 }
  969                                 if ((inp->inp_vflag & INP_IPV4) == 0) {
  970                                         error = EAFNOSUPPORT;
  971                                         goto out;
  972                                 }
  973                                 restoreflags = true;
  974                                 inp->inp_vflag &= ~INP_IPV6;
  975                                 sinp = &sin;
  976                                 in6_sin6_2_sin(sinp, sin6);
  977                                 if (IN_MULTICAST(
  978                                     ntohl(sinp->sin_addr.s_addr))) {
  979                                         error = EAFNOSUPPORT;
  980                                         goto out;
  981                                 }
  982                                 if ((error = prison_remote_ip4(td->td_ucred,
  983                                     &sinp->sin_addr)))
  984                                         goto out;
  985                                 isipv6 = 0;
  986 #else /* !INET */
  987                                 error = EAFNOSUPPORT;
  988                                 goto out;
  989 #endif /* INET */
  990                         } else {
  991                                 if ((inp->inp_vflag & INP_IPV6) == 0) {
  992                                         error = EAFNOSUPPORT;
  993                                         goto out;
  994                                 }
  995                                 restoreflags = true;
  996                                 inp->inp_vflag &= ~INP_IPV4;
  997                                 inp->inp_inc.inc_flags |= INC_ISIPV6;
  998                                 if ((error = prison_remote_ip6(td->td_ucred,
  999                                     &sin6->sin6_addr)))
 1000                                         goto out;
 1001                                 isipv6 = 1;
 1002                         }
 1003                         break;
 1004                 }
 1005 #endif /* INET6 */
 1006                 default:
 1007                         error = EAFNOSUPPORT;
 1008                         goto out;
 1009                 }
 1010         }
 1011         if (!(flags & PRUS_OOB)) {
 1012                 if (tp->t_acktime == 0)
 1013                         tp->t_acktime = ticks;
 1014                 sbappendstream(&so->so_snd, m, flags);
 1015                 m = NULL;
 1016                 if (nam && tp->t_state < TCPS_SYN_SENT) {
 1017                         KASSERT(tp->t_state == TCPS_CLOSED,
 1018                             ("%s: tp %p is listening", __func__, tp));
 1019 
 1020                         /*
 1021                          * Do implied connect if not yet connected,
 1022                          * initialize window to default value, and
 1023                          * initialize maxseg using peer's cached MSS.
 1024                          */
 1025 #ifdef INET6
 1026                         if (isipv6)
 1027                                 error = tcp6_connect(tp, nam, td);
 1028 #endif /* INET6 */
 1029 #if defined(INET6) && defined(INET)
 1030                         else
 1031 #endif
 1032 #ifdef INET
 1033                                 error = tcp_connect(tp,
 1034                                     (struct sockaddr *)sinp, td);
 1035 #endif
 1036                         /*
 1037                          * The bind operation in tcp_connect succeeded. We
 1038                          * no longer want to restore the flags if later
 1039                          * operations fail.
 1040                          */
 1041                         if (error == 0 || inp->inp_lport != 0)
 1042                                 restoreflags = false;
 1043 
 1044                         if (error) {
 1045                                 /* m is freed if PRUS_NOTREADY is unset. */
 1046                                 sbflush(&so->so_snd);
 1047                                 goto out;
 1048                         }
 1049                         if (IS_FASTOPEN(tp->t_flags))
 1050                                 tcp_fastopen_connect(tp);
 1051                         else {
 1052                                 tp->snd_wnd = TTCP_CLIENT_SND_WND;
 1053                                 tcp_mss(tp, -1);
 1054                         }
 1055                 }
 1056                 if (flags & PRUS_EOF) {
 1057                         /*
 1058                          * Close the send side of the connection after
 1059                          * the data is sent.
 1060                          */
 1061                         socantsendmore(so);
 1062                         tcp_usrclosed(tp);
 1063                 }
 1064                 if (TCPS_HAVEESTABLISHED(tp->t_state) &&
 1065                     ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
 1066                     (tp->t_fbyte_out == 0) &&
 1067                     (so->so_snd.sb_ccc > 0)) {
 1068                         tp->t_fbyte_out = ticks;
 1069                         if (tp->t_fbyte_out == 0)
 1070                                 tp->t_fbyte_out = 1;
 1071                         if (tp->t_fbyte_out && tp->t_fbyte_in)
 1072                                 tp->t_flags2 |= TF2_FBYTES_COMPLETE;
 1073                 }
 1074                 if (!(inp->inp_flags & INP_DROPPED) &&
 1075                     !(flags & PRUS_NOTREADY)) {
 1076                         if (flags & PRUS_MORETOCOME)
 1077                                 tp->t_flags |= TF_MORETOCOME;
 1078                         error = tcp_output_nodrop(tp);
 1079                         if (flags & PRUS_MORETOCOME)
 1080                                 tp->t_flags &= ~TF_MORETOCOME;
 1081                 }
 1082         } else {
 1083                 /*
 1084                  * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
 1085                  */
 1086                 SOCKBUF_LOCK(&so->so_snd);
 1087                 if (sbspace(&so->so_snd) < -512) {
 1088                         SOCKBUF_UNLOCK(&so->so_snd);
 1089                         error = ENOBUFS;
 1090                         goto out;
 1091                 }
 1092                 /*
 1093                  * According to RFC961 (Assigned Protocols),
 1094                  * the urgent pointer points to the last octet
 1095                  * of urgent data.  We continue, however,
 1096                  * to consider it to indicate the first octet
 1097                  * of data past the urgent section.
 1098                  * Otherwise, snd_up should be one lower.
 1099                  */
 1100                 if (tp->t_acktime == 0)
 1101                         tp->t_acktime = ticks;
 1102                 sbappendstream_locked(&so->so_snd, m, flags);
 1103                 SOCKBUF_UNLOCK(&so->so_snd);
 1104                 m = NULL;
 1105                 if (nam && tp->t_state < TCPS_SYN_SENT) {
 1106                         /*
 1107                          * Do implied connect if not yet connected,
 1108                          * initialize window to default value, and
 1109                          * initialize maxseg using peer's cached MSS.
 1110                          */
 1111 
 1112                         /*
 1113                          * Not going to contemplate SYN|URG
 1114                          */
 1115                         if (IS_FASTOPEN(tp->t_flags))
 1116                                 tp->t_flags &= ~TF_FASTOPEN;
 1117 #ifdef INET6
 1118                         if (isipv6)
 1119                                 error = tcp6_connect(tp, nam, td);
 1120 #endif /* INET6 */
 1121 #if defined(INET6) && defined(INET)
 1122                         else
 1123 #endif
 1124 #ifdef INET
 1125                                 error = tcp_connect(tp,
 1126                                     (struct sockaddr *)sinp, td);
 1127 #endif
 1128                         /*
 1129                          * The bind operation in tcp_connect succeeded. We
 1130                          * no longer want to restore the flags if later
 1131                          * operations fail.
 1132                          */
 1133                         if (error == 0 || inp->inp_lport != 0)
 1134                                 restoreflags = false;
 1135 
 1136                         if (error != 0) {
 1137                                 /* m is freed if PRUS_NOTREADY is unset. */
 1138                                 sbflush(&so->so_snd);
 1139                                 goto out;
 1140                         }
 1141                         tp->snd_wnd = TTCP_CLIENT_SND_WND;
 1142                         tcp_mss(tp, -1);
 1143                 }
 1144                 tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
 1145                 if ((flags & PRUS_NOTREADY) == 0) {
 1146                         tp->t_flags |= TF_FORCEDATA;
 1147                         error = tcp_output_nodrop(tp);
 1148                         tp->t_flags &= ~TF_FORCEDATA;
 1149                 }
 1150         }
 1151         TCP_LOG_EVENT(tp, NULL,
 1152             &inp->inp_socket->so_rcv,
 1153             &inp->inp_socket->so_snd,
 1154             TCP_LOG_USERSEND, error,
 1155             0, NULL, false);
 1156 
 1157 out:
 1158         /*
 1159          * In case of PRUS_NOTREADY, the caller or tcp_usr_ready() is
 1160          * responsible for freeing memory.
 1161          */
 1162         if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 1163                 m_freem(m);
 1164 
 1165         /*
 1166          * If the request was unsuccessful and we changed flags,
 1167          * restore the original flags.
 1168          */
 1169         if (error != 0 && restoreflags) {
 1170                 inp->inp_vflag = vflagsav;
 1171                 inp->inp_inc.inc_flags = incflagsav;
 1172         }
 1173         TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
 1174                    ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
 1175         error = tcp_unlock_or_drop(tp, error);
 1176         NET_EPOCH_EXIT(et);
 1177         return (error);
 1178 }
 1179 
 1180 static int
 1181 tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
 1182 {
 1183         struct epoch_tracker et;
 1184         struct inpcb *inp;
 1185         struct tcpcb *tp;
 1186         int error;
 1187 
 1188         inp = sotoinpcb(so);
 1189         INP_WLOCK(inp);
 1190         if (inp->inp_flags & INP_DROPPED) {
 1191                 INP_WUNLOCK(inp);
 1192                 mb_free_notready(m, count);
 1193                 return (ECONNRESET);
 1194         }
 1195         tp = intotcpcb(inp);
 1196 
 1197         SOCKBUF_LOCK(&so->so_snd);
 1198         error = sbready(&so->so_snd, m, count);
 1199         SOCKBUF_UNLOCK(&so->so_snd);
 1200         if (error) {
 1201                 INP_WUNLOCK(inp);
 1202                 return (error);
 1203         }
 1204         NET_EPOCH_ENTER(et);
 1205         error = tcp_output_unlock(tp);
 1206         NET_EPOCH_EXIT(et);
 1207 
 1208         return (error);
 1209 }
 1210 
 1211 /*
 1212  * Abort the TCP.  Drop the connection abruptly.
 1213  */
 1214 static void
 1215 tcp_usr_abort(struct socket *so)
 1216 {
 1217         struct inpcb *inp;
 1218         struct tcpcb *tp = NULL;
 1219         struct epoch_tracker et;
 1220 
 1221         inp = sotoinpcb(so);
 1222         KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
 1223 
 1224         NET_EPOCH_ENTER(et);
 1225         INP_WLOCK(inp);
 1226         KASSERT(inp->inp_socket != NULL,
 1227             ("tcp_usr_abort: inp_socket == NULL"));
 1228 
 1229         /*
 1230          * If we still have full TCP state, and we're not dropped, drop.
 1231          */
 1232         if (!(inp->inp_flags & INP_DROPPED)) {
 1233                 tp = intotcpcb(inp);
 1234                 tp = tcp_drop(tp, ECONNABORTED);
 1235                 if (tp == NULL)
 1236                         goto dropped;
 1237                 TCP_PROBE2(debug__user, tp, PRU_ABORT);
 1238         }
 1239         if (!(inp->inp_flags & INP_DROPPED)) {
 1240                 soref(so);
 1241                 inp->inp_flags |= INP_SOCKREF;
 1242         }
 1243         INP_WUNLOCK(inp);
 1244 dropped:
 1245         NET_EPOCH_EXIT(et);
 1246 }
 1247 
 1248 /*
 1249  * TCP socket is closed.  Start friendly disconnect.
 1250  */
 1251 static void
 1252 tcp_usr_close(struct socket *so)
 1253 {
 1254         struct inpcb *inp;
 1255         struct tcpcb *tp = NULL;
 1256         struct epoch_tracker et;
 1257 
 1258         inp = sotoinpcb(so);
 1259         KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
 1260 
 1261         NET_EPOCH_ENTER(et);
 1262         INP_WLOCK(inp);
 1263         KASSERT(inp->inp_socket != NULL,
 1264             ("tcp_usr_close: inp_socket == NULL"));
 1265 
 1266         /*
 1267          * If we still have full TCP state, and we're not dropped, initiate
 1268          * a disconnect.
 1269          */
 1270         if (!(inp->inp_flags & INP_DROPPED)) {
 1271                 tp = intotcpcb(inp);
 1272                 tp->t_flags |= TF_CLOSED;
 1273                 tcp_disconnect(tp);
 1274                 TCP_PROBE2(debug__user, tp, PRU_CLOSE);
 1275         }
 1276         if (!(inp->inp_flags & INP_DROPPED)) {
 1277                 soref(so);
 1278                 inp->inp_flags |= INP_SOCKREF;
 1279         }
 1280         INP_WUNLOCK(inp);
 1281         NET_EPOCH_EXIT(et);
 1282 }
 1283 
 1284 static int
 1285 tcp_pru_options_support(struct tcpcb *tp, int flags)
 1286 {
 1287         /*
 1288          * If the specific TCP stack has a pru_options
 1289          * specified then it does not always support
 1290          * all the PRU_XX options and we must ask it.
 1291          * If the function is not specified then all
 1292          * of the PRU_XX options are supported.
 1293          */
 1294         int ret = 0;
 1295 
 1296         if (tp->t_fb->tfb_pru_options) {
 1297                 ret = (*tp->t_fb->tfb_pru_options)(tp, flags);
 1298         }
 1299         return (ret);
 1300 }
 1301 
 1302 /*
 1303  * Receive out-of-band data.
 1304  */
 1305 static int
 1306 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
 1307 {
 1308         int error = 0;
 1309         struct inpcb *inp;
 1310         struct tcpcb *tp = NULL;
 1311 
 1312         inp = sotoinpcb(so);
 1313         KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
 1314         INP_WLOCK(inp);
 1315         if (inp->inp_flags & INP_DROPPED) {
 1316                 error = ECONNRESET;
 1317                 goto out;
 1318         }
 1319         tp = intotcpcb(inp);
 1320         error = tcp_pru_options_support(tp, PRUS_OOB);
 1321         if (error) {
 1322                 goto out;
 1323         }
 1324         if ((so->so_oobmark == 0 &&
 1325              (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
 1326             so->so_options & SO_OOBINLINE ||
 1327             tp->t_oobflags & TCPOOB_HADDATA) {
 1328                 error = EINVAL;
 1329                 goto out;
 1330         }
 1331         if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
 1332                 error = EWOULDBLOCK;
 1333                 goto out;
 1334         }
 1335         m->m_len = 1;
 1336         *mtod(m, caddr_t) = tp->t_iobc;
 1337         if ((flags & MSG_PEEK) == 0)
 1338                 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 1339 
 1340 out:
 1341         TCP_PROBE2(debug__user, tp, PRU_RCVOOB);
 1342         INP_WUNLOCK(inp);
 1343         return (error);
 1344 }
 1345 
 1346 #ifdef INET
 1347 struct protosw tcp_protosw = {
 1348         .pr_type =              SOCK_STREAM,
 1349         .pr_protocol =          IPPROTO_TCP,
 1350         .pr_flags =             PR_CONNREQUIRED | PR_IMPLOPCL | PR_WANTRCVD |
 1351                                     PR_CAPATTACH,
 1352         .pr_ctloutput =         tcp_ctloutput,
 1353         .pr_abort =             tcp_usr_abort,
 1354         .pr_accept =            tcp_usr_accept,
 1355         .pr_attach =            tcp_usr_attach,
 1356         .pr_bind =              tcp_usr_bind,
 1357         .pr_connect =           tcp_usr_connect,
 1358         .pr_control =           in_control,
 1359         .pr_detach =            tcp_usr_detach,
 1360         .pr_disconnect =        tcp_usr_disconnect,
 1361         .pr_listen =            tcp_usr_listen,
 1362         .pr_peeraddr =          in_getpeeraddr,
 1363         .pr_rcvd =              tcp_usr_rcvd,
 1364         .pr_rcvoob =            tcp_usr_rcvoob,
 1365         .pr_send =              tcp_usr_send,
 1366         .pr_ready =             tcp_usr_ready,
 1367         .pr_shutdown =          tcp_usr_shutdown,
 1368         .pr_sockaddr =          in_getsockaddr,
 1369         .pr_sosetlabel =        in_pcbsosetlabel,
 1370         .pr_close =             tcp_usr_close,
 1371 };
 1372 #endif /* INET */
 1373 
 1374 #ifdef INET6
 1375 struct protosw tcp6_protosw = {
 1376         .pr_type =              SOCK_STREAM,
 1377         .pr_protocol =          IPPROTO_TCP,
 1378         .pr_flags =             PR_CONNREQUIRED | PR_IMPLOPCL |PR_WANTRCVD |
 1379                                     PR_CAPATTACH,
 1380         .pr_ctloutput =         tcp_ctloutput,
 1381         .pr_abort =             tcp_usr_abort,
 1382         .pr_accept =            tcp6_usr_accept,
 1383         .pr_attach =            tcp_usr_attach,
 1384         .pr_bind =              tcp6_usr_bind,
 1385         .pr_connect =           tcp6_usr_connect,
 1386         .pr_control =           in6_control,
 1387         .pr_detach =            tcp_usr_detach,
 1388         .pr_disconnect =        tcp_usr_disconnect,
 1389         .pr_listen =            tcp6_usr_listen,
 1390         .pr_peeraddr =          in6_mapped_peeraddr,
 1391         .pr_rcvd =              tcp_usr_rcvd,
 1392         .pr_rcvoob =            tcp_usr_rcvoob,
 1393         .pr_send =              tcp_usr_send,
 1394         .pr_ready =             tcp_usr_ready,
 1395         .pr_shutdown =          tcp_usr_shutdown,
 1396         .pr_sockaddr =          in6_mapped_sockaddr,
 1397         .pr_sosetlabel =        in_pcbsosetlabel,
 1398         .pr_close =             tcp_usr_close,
 1399 };
 1400 #endif /* INET6 */
 1401 
 1402 #ifdef INET
 1403 /*
 1404  * Common subroutine to open a TCP connection to remote host specified
 1405  * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
 1406  * port number if needed.  Call in_pcbconnect_setup to do the routing and
 1407  * to choose a local host address (interface).  If there is an existing
 1408  * incarnation of the same connection in TIME-WAIT state and if the remote
 1409  * host was sending CC options and if the connection duration was < MSL, then
 1410  * truncate the previous TIME-WAIT state and proceed.
 1411  * Initialize connection parameters and enter SYN-SENT state.
 1412  */
 1413 static int
 1414 tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
 1415 {
 1416         struct inpcb *inp = tptoinpcb(tp), *oinp;
 1417         struct socket *so = tptosocket(tp);
 1418         struct in_addr laddr;
 1419         u_short lport;
 1420         int error;
 1421 
 1422         NET_EPOCH_ASSERT();
 1423         INP_WLOCK_ASSERT(inp);
 1424         INP_HASH_WLOCK(&V_tcbinfo);
 1425 
 1426         if (V_tcp_require_unique_port && inp->inp_lport == 0) {
 1427                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 1428                 if (error)
 1429                         goto out;
 1430         }
 1431 
 1432         /*
 1433          * Cannot simply call in_pcbconnect, because there might be an
 1434          * earlier incarnation of this same connection still in
 1435          * TIME_WAIT state, creating an ADDRINUSE error.
 1436          */
 1437         laddr = inp->inp_laddr;
 1438         lport = inp->inp_lport;
 1439         error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
 1440             &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
 1441         if (error && oinp == NULL)
 1442                 goto out;
 1443         if (oinp) {
 1444                 error = EADDRINUSE;
 1445                 goto out;
 1446         }
 1447         /* Handle initial bind if it hadn't been done in advance. */
 1448         if (inp->inp_lport == 0) {
 1449                 inp->inp_lport = lport;
 1450                 if (in_pcbinshash(inp) != 0) {
 1451                         inp->inp_lport = 0;
 1452                         error = EAGAIN;
 1453                         goto out;
 1454                 }
 1455         }
 1456         inp->inp_laddr = laddr;
 1457         in_pcbrehash(inp);
 1458         INP_HASH_WUNLOCK(&V_tcbinfo);
 1459 
 1460         /*
 1461          * Compute window scaling to request:
 1462          * Scale to fit into sweet spot.  See tcp_syncache.c.
 1463          * XXX: This should move to tcp_output().
 1464          */
 1465         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
 1466             (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 1467                 tp->request_r_scale++;
 1468 
 1469         soisconnecting(so);
 1470         TCPSTAT_INC(tcps_connattempt);
 1471         tcp_state_change(tp, TCPS_SYN_SENT);
 1472         tp->iss = tcp_new_isn(&inp->inp_inc);
 1473         if (tp->t_flags & TF_REQ_TSTMP)
 1474                 tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
 1475         tcp_sendseqinit(tp);
 1476 
 1477         return 0;
 1478 
 1479 out:
 1480         INP_HASH_WUNLOCK(&V_tcbinfo);
 1481         return (error);
 1482 }
 1483 #endif /* INET */
 1484 
 1485 #ifdef INET6
 1486 static int
 1487 tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
 1488 {
 1489         struct inpcb *inp = tptoinpcb(tp);
 1490         int error;
 1491 
 1492         INP_WLOCK_ASSERT(inp);
 1493         INP_HASH_WLOCK(&V_tcbinfo);
 1494 
 1495         if (V_tcp_require_unique_port && inp->inp_lport == 0) {
 1496                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 1497                 if (error)
 1498                         goto out;
 1499         }
 1500         error = in6_pcbconnect(inp, nam, td->td_ucred);
 1501         if (error != 0)
 1502                 goto out;
 1503         INP_HASH_WUNLOCK(&V_tcbinfo);
 1504 
 1505         /* Compute window scaling to request.  */
 1506         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
 1507             (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 1508                 tp->request_r_scale++;
 1509 
 1510         soisconnecting(inp->inp_socket);
 1511         TCPSTAT_INC(tcps_connattempt);
 1512         tcp_state_change(tp, TCPS_SYN_SENT);
 1513         tp->iss = tcp_new_isn(&inp->inp_inc);
 1514         if (tp->t_flags & TF_REQ_TSTMP)
 1515                 tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
 1516         tcp_sendseqinit(tp);
 1517 
 1518         return 0;
 1519 
 1520 out:
 1521         INP_HASH_WUNLOCK(&V_tcbinfo);
 1522         return error;
 1523 }
 1524 #endif /* INET6 */
 1525 
 1526 /*
 1527  * Export TCP internal state information via a struct tcp_info, based on the
 1528  * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
 1529  * (TCP state machine, etc).  We export all information using FreeBSD-native
 1530  * constants -- for example, the numeric values for tcpi_state will differ
 1531  * from Linux.
 1532  */
 1533 static void
 1534 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
 1535 {
 1536 
 1537         INP_WLOCK_ASSERT(tptoinpcb(tp));
 1538         bzero(ti, sizeof(*ti));
 1539 
 1540         ti->tcpi_state = tp->t_state;
 1541         if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
 1542                 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
 1543         if (tp->t_flags & TF_SACK_PERMIT)
 1544                 ti->tcpi_options |= TCPI_OPT_SACK;
 1545         if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
 1546                 ti->tcpi_options |= TCPI_OPT_WSCALE;
 1547                 ti->tcpi_snd_wscale = tp->snd_scale;
 1548                 ti->tcpi_rcv_wscale = tp->rcv_scale;
 1549         }
 1550         if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
 1551                 ti->tcpi_options |= TCPI_OPT_ECN;
 1552 
 1553         ti->tcpi_rto = tp->t_rxtcur * tick;
 1554         ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick;
 1555         ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
 1556         ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
 1557 
 1558         ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
 1559         ti->tcpi_snd_cwnd = tp->snd_cwnd;
 1560 
 1561         /*
 1562          * FreeBSD-specific extension fields for tcp_info.
 1563          */
 1564         ti->tcpi_rcv_space = tp->rcv_wnd;
 1565         ti->tcpi_rcv_nxt = tp->rcv_nxt;
 1566         ti->tcpi_snd_wnd = tp->snd_wnd;
 1567         ti->tcpi_snd_bwnd = 0;          /* Unused, kept for compat. */
 1568         ti->tcpi_snd_nxt = tp->snd_nxt;
 1569         ti->tcpi_snd_mss = tp->t_maxseg;
 1570         ti->tcpi_rcv_mss = tp->t_maxseg;
 1571         ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
 1572         ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
 1573         ti->tcpi_snd_zerowin = tp->t_sndzerowin;
 1574 #ifdef TCP_OFFLOAD
 1575         if (tp->t_flags & TF_TOE) {
 1576                 ti->tcpi_options |= TCPI_OPT_TOE;
 1577                 tcp_offload_tcp_info(tp, ti);
 1578         }
 1579 #endif
 1580         /*
 1581          * AccECN related counters.
 1582          */
 1583         if ((tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) ==
 1584             (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
 1585                 /*
 1586                  * Internal counter starts at 5 for AccECN
 1587                  * but 0 for RFC3168 ECN.
 1588                  */
 1589                 ti->tcpi_delivered_ce = tp->t_scep - 5;
 1590         else
 1591                 ti->tcpi_delivered_ce = tp->t_scep;
 1592         ti->tcpi_received_ce = tp->t_rcep;
 1593 }
 1594 
 1595 /*
 1596  * tcp_ctloutput() must drop the inpcb lock before performing copyin on
 1597  * socket option arguments.  When it re-acquires the lock after the copy, it
 1598  * has to revalidate that the connection is still valid for the socket
 1599  * option.
 1600  */
 1601 #define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do {                    \
 1602         INP_WLOCK(inp);                                                 \
 1603         if (inp->inp_flags & INP_DROPPED) {                             \
 1604                 INP_WUNLOCK(inp);                                       \
 1605                 cleanup;                                                \
 1606                 return (ECONNRESET);                                    \
 1607         }                                                               \
 1608         tp = intotcpcb(inp);                                            \
 1609 } while(0)
 1610 #define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */)
 1611 
 1612 int
 1613 tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
 1614 {
 1615         struct socket *so = inp->inp_socket;
 1616         struct tcpcb *tp = intotcpcb(inp);
 1617         int error = 0;
 1618 
 1619         MPASS(sopt->sopt_dir == SOPT_SET);
 1620         INP_WLOCK_ASSERT(inp);
 1621         KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 1622             ("inp_flags == %x", inp->inp_flags));
 1623         KASSERT(so != NULL, ("inp_socket == NULL"));
 1624 
 1625         if (sopt->sopt_level != IPPROTO_TCP) {
 1626                 INP_WUNLOCK(inp);
 1627 #ifdef INET6
 1628                 if (inp->inp_vflag & INP_IPV6PROTO)
 1629                         error = ip6_ctloutput(so, sopt);
 1630 #endif
 1631 #if defined(INET6) && defined(INET)
 1632                 else
 1633 #endif
 1634 #ifdef INET
 1635                         error = ip_ctloutput(so, sopt);
 1636 #endif
 1637                 /*
 1638                  * When an IP-level socket option affects TCP, pass control
 1639                  * down to stack tfb_tcp_ctloutput, otherwise return what
 1640                  * IP level returned.
 1641                  */
 1642                 switch (sopt->sopt_level) {
 1643 #ifdef INET6
 1644                 case IPPROTO_IPV6:
 1645                         if ((inp->inp_vflag & INP_IPV6PROTO) == 0)
 1646                                 return (error);
 1647                         switch (sopt->sopt_name) {
 1648                         case IPV6_TCLASS:
 1649                                 /* Notify tcp stacks that care (e.g. RACK). */
 1650                                 break;
 1651                         case IPV6_USE_MIN_MTU:
 1652                                 /* Update t_maxseg accordingly. */
 1653                                 break;
 1654                         default:
 1655                                 return (error);
 1656                         }
 1657                         break;
 1658 #endif
 1659 #ifdef INET
 1660                 case IPPROTO_IP:
 1661                         switch (sopt->sopt_name) {
 1662                         case IP_TOS:
 1663                                 inp->inp_ip_tos &= ~IPTOS_ECN_MASK;
 1664                                 break;
 1665                         case IP_TTL:
 1666                                 /* Notify tcp stacks that care (e.g. RACK). */
 1667                                 break;
 1668                         default:
 1669                                 return (error);
 1670                         }
 1671                         break;
 1672 #endif
 1673                 default:
 1674                         return (error);
 1675                 }
 1676                 INP_WLOCK(inp);
 1677                 if (inp->inp_flags & INP_DROPPED) {
 1678                         INP_WUNLOCK(inp);
 1679                         return (ECONNRESET);
 1680                 }
 1681         } else if (sopt->sopt_name == TCP_FUNCTION_BLK) {
 1682                 /*
 1683                  * Protect the TCP option TCP_FUNCTION_BLK so
 1684                  * that a sub-function can *never* overwrite this.
 1685                  */
 1686                 struct tcp_function_set fsn;
 1687                 struct tcp_function_block *blk;
 1688 
 1689                 INP_WUNLOCK(inp);
 1690                 error = sooptcopyin(sopt, &fsn, sizeof fsn, sizeof fsn);
 1691                 if (error)
 1692                         return (error);
 1693 
 1694                 INP_WLOCK(inp);
 1695                 if (inp->inp_flags & INP_DROPPED) {
 1696                         INP_WUNLOCK(inp);
 1697                         return (ECONNRESET);
 1698                 }
 1699                 tp = intotcpcb(inp);
 1700 
 1701                 blk = find_and_ref_tcp_functions(&fsn);
 1702                 if (blk == NULL) {
 1703                         INP_WUNLOCK(inp);
 1704                         return (ENOENT);
 1705                 }
 1706                 if (tp->t_fb == blk) {
 1707                         /* You already have this */
 1708                         refcount_release(&blk->tfb_refcnt);
 1709                         INP_WUNLOCK(inp);
 1710                         return (0);
 1711                 }
 1712                 if (tp->t_state != TCPS_CLOSED) {
 1713                         /*
 1714                          * The user has advanced the state
 1715                          * past the initial point, we may not
 1716                          * be able to switch.
 1717                          */
 1718                         if (blk->tfb_tcp_handoff_ok != NULL) {
 1719                                 /*
 1720                                  * Does the stack provide a
 1721                                  * query mechanism, if so it may
 1722                                  * still be possible?
 1723                                  */
 1724                                 error = (*blk->tfb_tcp_handoff_ok)(tp);
 1725                         } else
 1726                                 error = EINVAL;
 1727                         if (error) {
 1728                                 refcount_release(&blk->tfb_refcnt);
 1729                                 INP_WUNLOCK(inp);
 1730                                 return(error);
 1731                         }
 1732                 }
 1733                 if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
 1734                         refcount_release(&blk->tfb_refcnt);
 1735                         INP_WUNLOCK(inp);
 1736                         return (ENOENT);
 1737                 }
 1738                 /*
 1739                  * Release the old refcnt, the
 1740                  * lookup acquired a ref on the
 1741                  * new one already.
 1742                  */
 1743                 if (tp->t_fb->tfb_tcp_fb_fini) {
 1744                         struct epoch_tracker et;
 1745                         /*
 1746                          * Tell the stack to cleanup with 0 i.e.
 1747                          * the tcb is not going away.
 1748                          */
 1749                         NET_EPOCH_ENTER(et);
 1750                         (*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
 1751                         NET_EPOCH_EXIT(et);
 1752                 }
 1753 #ifdef TCPHPTS
 1754                 /* Assure that we are not on any hpts */
 1755                 tcp_hpts_remove(tptoinpcb(tp));
 1756 #endif
 1757                 if (blk->tfb_tcp_fb_init) {
 1758                         error = (*blk->tfb_tcp_fb_init)(tp);
 1759                         if (error) {
 1760                                 refcount_release(&blk->tfb_refcnt);
 1761                                 if (tp->t_fb->tfb_tcp_fb_init) {
 1762                                         if((*tp->t_fb->tfb_tcp_fb_init)(tp) != 0)  {
 1763                                                 /* Fall back failed, drop the connection */
 1764                                                 INP_WUNLOCK(inp);
 1765                                                 soabort(so);
 1766                                                 return (error);
 1767                                         }
 1768                                 }
 1769                                 goto err_out;
 1770                         }
 1771                 }
 1772                 refcount_release(&tp->t_fb->tfb_refcnt);
 1773                 tp->t_fb = blk;
 1774 #ifdef TCP_OFFLOAD
 1775                 if (tp->t_flags & TF_TOE) {
 1776                         tcp_offload_ctloutput(tp, sopt->sopt_dir,
 1777                              sopt->sopt_name);
 1778                 }
 1779 #endif
 1780 err_out:
 1781                 INP_WUNLOCK(inp);
 1782                 return (error);
 1783         }
 1784 
 1785         /* Pass in the INP locked, callee must unlock it. */
 1786         return (tp->t_fb->tfb_tcp_ctloutput(inp, sopt));
 1787 }
 1788 
 1789 static int
 1790 tcp_ctloutput_get(struct inpcb *inp, struct sockopt *sopt)
 1791 {
 1792         struct socket *so = inp->inp_socket;
 1793         struct tcpcb *tp = intotcpcb(inp);
 1794         int error = 0;
 1795 
 1796         MPASS(sopt->sopt_dir == SOPT_GET);
 1797         INP_WLOCK_ASSERT(inp);
 1798         KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 1799             ("inp_flags == %x", inp->inp_flags));
 1800         KASSERT(so != NULL, ("inp_socket == NULL"));
 1801 
 1802         if (sopt->sopt_level != IPPROTO_TCP) {
 1803                 INP_WUNLOCK(inp);
 1804 #ifdef INET6
 1805                 if (inp->inp_vflag & INP_IPV6PROTO)
 1806                         error = ip6_ctloutput(so, sopt);
 1807 #endif /* INET6 */
 1808 #if defined(INET6) && defined(INET)
 1809                 else
 1810 #endif
 1811 #ifdef INET
 1812                         error = ip_ctloutput(so, sopt);
 1813 #endif
 1814                 return (error);
 1815         }
 1816         if (((sopt->sopt_name == TCP_FUNCTION_BLK) ||
 1817              (sopt->sopt_name == TCP_FUNCTION_ALIAS))) {
 1818                 struct tcp_function_set fsn;
 1819 
 1820                 if (sopt->sopt_name == TCP_FUNCTION_ALIAS) {
 1821                         memset(&fsn, 0, sizeof(fsn));
 1822                         find_tcp_function_alias(tp->t_fb, &fsn);
 1823                 } else {
 1824                         strncpy(fsn.function_set_name,
 1825                             tp->t_fb->tfb_tcp_block_name,
 1826                             TCP_FUNCTION_NAME_LEN_MAX);
 1827                         fsn.function_set_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
 1828                 }
 1829                 fsn.pcbcnt = tp->t_fb->tfb_refcnt;
 1830                 INP_WUNLOCK(inp);
 1831                 error = sooptcopyout(sopt, &fsn, sizeof fsn);
 1832                 return (error);
 1833         }
 1834 
 1835         /* Pass in the INP locked, callee must unlock it. */
 1836         return (tp->t_fb->tfb_tcp_ctloutput(inp, sopt));
 1837 }
 1838 
 1839 int
 1840 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
 1841 {
 1842         struct  inpcb *inp;
 1843 
 1844         inp = sotoinpcb(so);
 1845         KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
 1846 
 1847         INP_WLOCK(inp);
 1848         if (inp->inp_flags & INP_DROPPED) {
 1849                 INP_WUNLOCK(inp);
 1850                 return (ECONNRESET);
 1851         }
 1852         if (sopt->sopt_dir == SOPT_SET)
 1853                 return (tcp_ctloutput_set(inp, sopt));
 1854         else if (sopt->sopt_dir == SOPT_GET)
 1855                 return (tcp_ctloutput_get(inp, sopt));
 1856         else
 1857                 panic("%s: sopt_dir $%d", __func__, sopt->sopt_dir);
 1858 }
 1859 
 1860 /*
 1861  * If this assert becomes untrue, we need to change the size of the buf
 1862  * variable in tcp_default_ctloutput().
 1863  */
 1864 #ifdef CTASSERT
 1865 CTASSERT(TCP_CA_NAME_MAX <= TCP_LOG_ID_LEN);
 1866 CTASSERT(TCP_LOG_REASON_LEN <= TCP_LOG_ID_LEN);
 1867 #endif
 1868 
 1869 #ifdef KERN_TLS
 1870 static int
 1871 copyin_tls_enable(struct sockopt *sopt, struct tls_enable *tls)
 1872 {
 1873         struct tls_enable_v0 tls_v0;
 1874         int error;
 1875 
 1876         if (sopt->sopt_valsize == sizeof(tls_v0)) {
 1877                 error = sooptcopyin(sopt, &tls_v0, sizeof(tls_v0),
 1878                     sizeof(tls_v0));
 1879                 if (error)
 1880                         return (error);
 1881                 memset(tls, 0, sizeof(*tls));
 1882                 tls->cipher_key = tls_v0.cipher_key;
 1883                 tls->iv = tls_v0.iv;
 1884                 tls->auth_key = tls_v0.auth_key;
 1885                 tls->cipher_algorithm = tls_v0.cipher_algorithm;
 1886                 tls->cipher_key_len = tls_v0.cipher_key_len;
 1887                 tls->iv_len = tls_v0.iv_len;
 1888                 tls->auth_algorithm = tls_v0.auth_algorithm;
 1889                 tls->auth_key_len = tls_v0.auth_key_len;
 1890                 tls->flags = tls_v0.flags;
 1891                 tls->tls_vmajor = tls_v0.tls_vmajor;
 1892                 tls->tls_vminor = tls_v0.tls_vminor;
 1893                 return (0);
 1894         }
 1895 
 1896         return (sooptcopyin(sopt, tls, sizeof(*tls), sizeof(*tls)));
 1897 }
 1898 #endif
 1899 
 1900 extern struct cc_algo newreno_cc_algo;
 1901 
 1902 static int
 1903 tcp_set_cc_mod(struct inpcb *inp, struct sockopt *sopt)
 1904 {
 1905         struct cc_algo *algo;
 1906         void *ptr = NULL;
 1907         struct tcpcb *tp;
 1908         struct cc_var cc_mem;
 1909         char    buf[TCP_CA_NAME_MAX];
 1910         size_t mem_sz;
 1911         int error;
 1912 
 1913         INP_WUNLOCK(inp);
 1914         error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
 1915         if (error)
 1916                 return(error);
 1917         buf[sopt->sopt_valsize] = '\0';
 1918         CC_LIST_RLOCK();
 1919         STAILQ_FOREACH(algo, &cc_list, entries) {
 1920                 if (strncmp(buf, algo->name,
 1921                             TCP_CA_NAME_MAX) == 0) {
 1922                         if (algo->flags & CC_MODULE_BEING_REMOVED) {
 1923                                 /* We can't "see" modules being unloaded */
 1924                                 continue;
 1925                         }
 1926                         break;
 1927                 }
 1928         }
 1929         if (algo == NULL) {
 1930                 CC_LIST_RUNLOCK();
 1931                 return(ESRCH);
 1932         }
 1933         /* 
 1934          * With a reference the algorithm cannot be removed
 1935          * so we hold a reference through the change process.
 1936          */
 1937         cc_refer(algo);
 1938         CC_LIST_RUNLOCK();
 1939         if (algo->cb_init != NULL) {
 1940                 /* We can now pre-get the memory for the CC */
 1941                 mem_sz = (*algo->cc_data_sz)();
 1942                 if (mem_sz == 0) {
 1943                         goto no_mem_needed;
 1944                 }
 1945                 ptr = malloc(mem_sz, M_CC_MEM, M_WAITOK);
 1946         } else {
 1947 no_mem_needed:
 1948                 mem_sz = 0;
 1949                 ptr = NULL;
 1950         }
 1951         /*
 1952          * Make sure its all clean and zero and also get
 1953          * back the inplock.
 1954          */
 1955         memset(&cc_mem, 0, sizeof(cc_mem));
 1956         INP_WLOCK(inp);
 1957         if (inp->inp_flags & INP_DROPPED) {
 1958                 INP_WUNLOCK(inp);
 1959                 if (ptr)
 1960                         free(ptr, M_CC_MEM);
 1961                 /* Release our temp reference */
 1962                 CC_LIST_RLOCK();
 1963                 cc_release(algo);
 1964                 CC_LIST_RUNLOCK();
 1965                 return (ECONNRESET);
 1966         }
 1967         tp = intotcpcb(inp);
 1968         if (ptr != NULL)
 1969                 memset(ptr, 0, mem_sz);
 1970         cc_mem.ccvc.tcp = tp;
 1971         /*
 1972          * We once again hold a write lock over the tcb so it's
 1973          * safe to do these things without ordering concerns.
 1974          * Note here we init into stack memory.
 1975          */
 1976         if (algo->cb_init != NULL)
 1977                 error = algo->cb_init(&cc_mem, ptr);
 1978         else
 1979                 error = 0;
 1980         /*
 1981          * The CC algorithms, when given their memory
 1982          * should not fail we could in theory have a
 1983          * KASSERT here.
 1984          */
 1985         if (error == 0) {
 1986                 /*
 1987                  * Touchdown, lets go ahead and move the
 1988                  * connection to the new CC module by
 1989                  * copying in the cc_mem after we call
 1990                  * the old ones cleanup (if any).
 1991                  */
 1992                 if (CC_ALGO(tp)->cb_destroy != NULL)
 1993                         CC_ALGO(tp)->cb_destroy(&tp->t_ccv);
 1994                 /* Detach the old CC from the tcpcb  */
 1995                 cc_detach(tp);
 1996                 /* Copy in our temp memory that was inited */
 1997                 memcpy(&tp->t_ccv, &cc_mem, sizeof(struct cc_var));
 1998                 /* Now attach the new, which takes a reference */
 1999                 cc_attach(tp, algo);
 2000                 /* Ok now are we where we have gotten past any conn_init? */
 2001                 if (TCPS_HAVEESTABLISHED(tp->t_state) && (CC_ALGO(tp)->conn_init != NULL)) {
 2002                         /* Yep run the connection init for the new CC */
 2003                         CC_ALGO(tp)->conn_init(&tp->t_ccv);
 2004                 }
 2005         } else if (ptr)
 2006                 free(ptr, M_CC_MEM);
 2007         INP_WUNLOCK(inp);
 2008         /* Now lets release our temp reference */
 2009         CC_LIST_RLOCK();
 2010         cc_release(algo);
 2011         CC_LIST_RUNLOCK();
 2012         return (error);
 2013 }
 2014 
 2015 int
 2016 tcp_default_ctloutput(struct inpcb *inp, struct sockopt *sopt)
 2017 {
 2018         struct tcpcb *tp = intotcpcb(inp);
 2019         int     error, opt, optval;
 2020         u_int   ui;
 2021         struct  tcp_info ti;
 2022 #ifdef KERN_TLS
 2023         struct tls_enable tls;
 2024         struct socket *so = inp->inp_socket;
 2025 #endif
 2026         char    *pbuf, buf[TCP_LOG_ID_LEN];
 2027 #ifdef STATS
 2028         struct statsblob *sbp;
 2029 #endif
 2030         size_t  len;
 2031 
 2032         INP_WLOCK_ASSERT(inp);
 2033         KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 2034             ("inp_flags == %x", inp->inp_flags));
 2035         KASSERT(inp->inp_socket != NULL, ("inp_socket == NULL"));
 2036 
 2037         switch (sopt->sopt_level) {
 2038 #ifdef INET6
 2039         case IPPROTO_IPV6:
 2040                 MPASS(inp->inp_vflag & INP_IPV6PROTO);
 2041                 switch (sopt->sopt_name) {
 2042                 case IPV6_USE_MIN_MTU:
 2043                         tcp6_use_min_mtu(tp);
 2044                         /* FALLTHROUGH */
 2045                 }
 2046                 INP_WUNLOCK(inp);
 2047                 return (0);
 2048 #endif
 2049 #ifdef INET
 2050         case IPPROTO_IP:
 2051                 INP_WUNLOCK(inp);
 2052                 return (0);
 2053 #endif
 2054         }
 2055 
 2056         /*
 2057          * For TCP_CCALGOOPT forward the control to CC module, for both
 2058          * SOPT_SET and SOPT_GET.
 2059          */
 2060         switch (sopt->sopt_name) {
 2061         case TCP_CCALGOOPT:
 2062                 INP_WUNLOCK(inp);
 2063                 if (sopt->sopt_valsize > CC_ALGOOPT_LIMIT)
 2064                         return (EINVAL);
 2065                 pbuf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
 2066                 error = sooptcopyin(sopt, pbuf, sopt->sopt_valsize,
 2067                     sopt->sopt_valsize);
 2068                 if (error) {
 2069                         free(pbuf, M_TEMP);
 2070                         return (error);
 2071                 }
 2072                 INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP));
 2073                 if (CC_ALGO(tp)->ctl_output != NULL)
 2074                         error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, sopt, pbuf);
 2075                 else
 2076                         error = ENOENT;
 2077                 INP_WUNLOCK(inp);
 2078                 if (error == 0 && sopt->sopt_dir == SOPT_GET)
 2079                         error = sooptcopyout(sopt, pbuf, sopt->sopt_valsize);
 2080                 free(pbuf, M_TEMP);
 2081                 return (error);
 2082         }
 2083 
 2084         switch (sopt->sopt_dir) {
 2085         case SOPT_SET:
 2086                 switch (sopt->sopt_name) {
 2087 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 2088                 case TCP_MD5SIG:
 2089                         INP_WUNLOCK(inp);
 2090                         if (!TCPMD5_ENABLED())
 2091                                 return (ENOPROTOOPT);
 2092                         error = TCPMD5_PCBCTL(inp, sopt);
 2093                         if (error)
 2094                                 return (error);
 2095                         INP_WLOCK_RECHECK(inp);
 2096                         goto unlock_and_done;
 2097 #endif /* IPSEC */
 2098 
 2099                 case TCP_NODELAY:
 2100                 case TCP_NOOPT:
 2101                 case TCP_LRD:
 2102                         INP_WUNLOCK(inp);
 2103                         error = sooptcopyin(sopt, &optval, sizeof optval,
 2104                             sizeof optval);
 2105                         if (error)
 2106                                 return (error);
 2107 
 2108                         INP_WLOCK_RECHECK(inp);
 2109                         switch (sopt->sopt_name) {
 2110                         case TCP_NODELAY:
 2111                                 opt = TF_NODELAY;
 2112                                 break;
 2113                         case TCP_NOOPT:
 2114                                 opt = TF_NOOPT;
 2115                                 break;
 2116                         case TCP_LRD:
 2117                                 opt = TF_LRD;
 2118                                 break;
 2119                         default:
 2120                                 opt = 0; /* dead code to fool gcc */
 2121                                 break;
 2122                         }
 2123 
 2124                         if (optval)
 2125                                 tp->t_flags |= opt;
 2126                         else
 2127                                 tp->t_flags &= ~opt;
 2128 unlock_and_done:
 2129 #ifdef TCP_OFFLOAD
 2130                         if (tp->t_flags & TF_TOE) {
 2131                                 tcp_offload_ctloutput(tp, sopt->sopt_dir,
 2132                                     sopt->sopt_name);
 2133                         }
 2134 #endif
 2135                         INP_WUNLOCK(inp);
 2136                         break;
 2137 
 2138                 case TCP_NOPUSH:
 2139                         INP_WUNLOCK(inp);
 2140                         error = sooptcopyin(sopt, &optval, sizeof optval,
 2141                             sizeof optval);
 2142                         if (error)
 2143                                 return (error);
 2144 
 2145                         INP_WLOCK_RECHECK(inp);
 2146                         if (optval)
 2147                                 tp->t_flags |= TF_NOPUSH;
 2148                         else if (tp->t_flags & TF_NOPUSH) {
 2149                                 tp->t_flags &= ~TF_NOPUSH;
 2150                                 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
 2151                                         struct epoch_tracker et;
 2152 
 2153                                         NET_EPOCH_ENTER(et);
 2154                                         error = tcp_output_nodrop(tp);
 2155                                         NET_EPOCH_EXIT(et);
 2156                                 }
 2157                         }
 2158                         goto unlock_and_done;
 2159 
 2160                 case TCP_REMOTE_UDP_ENCAPS_PORT:
 2161                         INP_WUNLOCK(inp);
 2162                         error = sooptcopyin(sopt, &optval, sizeof optval,
 2163                             sizeof optval);
 2164                         if (error)
 2165                                 return (error);
 2166                         if ((optval < TCP_TUNNELING_PORT_MIN) ||
 2167                             (optval > TCP_TUNNELING_PORT_MAX)) {
 2168                                 /* Its got to be in range */
 2169                                 return (EINVAL);
 2170                         }
 2171                         if ((V_tcp_udp_tunneling_port == 0) && (optval != 0)) {
 2172                                 /* You have to have enabled a UDP tunneling port first */
 2173                                 return (EINVAL);
 2174                         }
 2175                         INP_WLOCK_RECHECK(inp);
 2176                         if (tp->t_state != TCPS_CLOSED) {
 2177                                 /* You can't change after you are connected */
 2178                                 error = EINVAL;
 2179                         } else {
 2180                                 /* Ok we are all good set the port */
 2181                                 tp->t_port = htons(optval);
 2182                         }
 2183                         goto unlock_and_done;
 2184 
 2185                 case TCP_MAXSEG:
 2186                         INP_WUNLOCK(inp);
 2187                         error = sooptcopyin(sopt, &optval, sizeof optval,
 2188                             sizeof optval);
 2189                         if (error)
 2190                                 return (error);
 2191 
 2192                         INP_WLOCK_RECHECK(inp);
 2193                         if (optval > 0 && optval <= tp->t_maxseg &&
 2194                             optval + 40 >= V_tcp_minmss)
 2195                                 tp->t_maxseg = optval;
 2196                         else
 2197                                 error = EINVAL;
 2198                         goto unlock_and_done;
 2199 
 2200                 case TCP_INFO:
 2201                         INP_WUNLOCK(inp);
 2202                         error = EINVAL;
 2203                         break;
 2204 
 2205                 case TCP_STATS:
 2206                         INP_WUNLOCK(inp);
 2207 #ifdef STATS
 2208                         error = sooptcopyin(sopt, &optval, sizeof optval,
 2209                             sizeof optval);
 2210                         if (error)
 2211                                 return (error);
 2212 
 2213                         if (optval > 0)
 2214                                 sbp = stats_blob_alloc(
 2215                                     V_tcp_perconn_stats_dflt_tpl, 0);
 2216                         else
 2217                                 sbp = NULL;
 2218 
 2219                         INP_WLOCK_RECHECK(inp);
 2220                         if ((tp->t_stats != NULL && sbp == NULL) ||
 2221                             (tp->t_stats == NULL && sbp != NULL)) {
 2222                                 struct statsblob *t = tp->t_stats;
 2223                                 tp->t_stats = sbp;
 2224                                 sbp = t;
 2225                         }
 2226                         INP_WUNLOCK(inp);
 2227 
 2228                         stats_blob_destroy(sbp);
 2229 #else
 2230                         return (EOPNOTSUPP);
 2231 #endif /* !STATS */
 2232                         break;
 2233 
 2234                 case TCP_CONGESTION:
 2235                         error = tcp_set_cc_mod(inp, sopt);
 2236                         break;
 2237 
 2238                 case TCP_REUSPORT_LB_NUMA:
 2239                         INP_WUNLOCK(inp);
 2240                         error = sooptcopyin(sopt, &optval, sizeof(optval),
 2241                             sizeof(optval));
 2242                         INP_WLOCK_RECHECK(inp);
 2243                         if (!error)
 2244                                 error = in_pcblbgroup_numa(inp, optval);
 2245                         INP_WUNLOCK(inp);
 2246                         break;
 2247 
 2248 #ifdef KERN_TLS
 2249                 case TCP_TXTLS_ENABLE:
 2250                         INP_WUNLOCK(inp);
 2251                         error = copyin_tls_enable(sopt, &tls);
 2252                         if (error)
 2253                                 break;
 2254                         error = ktls_enable_tx(so, &tls);
 2255                         break;
 2256                 case TCP_TXTLS_MODE:
 2257                         INP_WUNLOCK(inp);
 2258                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 2259                         if (error)
 2260                                 return (error);
 2261 
 2262                         INP_WLOCK_RECHECK(inp);
 2263                         error = ktls_set_tx_mode(so, ui);
 2264                         INP_WUNLOCK(inp);
 2265                         break;
 2266                 case TCP_RXTLS_ENABLE:
 2267                         INP_WUNLOCK(inp);
 2268                         error = sooptcopyin(sopt, &tls, sizeof(tls),
 2269                             sizeof(tls));
 2270                         if (error)
 2271                                 break;
 2272                         error = ktls_enable_rx(so, &tls);
 2273                         break;
 2274 #endif
 2275                 case TCP_MAXUNACKTIME:
 2276                 case TCP_KEEPIDLE:
 2277                 case TCP_KEEPINTVL:
 2278                 case TCP_KEEPINIT:
 2279                         INP_WUNLOCK(inp);
 2280                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 2281                         if (error)
 2282                                 return (error);
 2283 
 2284                         if (ui > (UINT_MAX / hz)) {
 2285                                 error = EINVAL;
 2286                                 break;
 2287                         }
 2288                         ui *= hz;
 2289 
 2290                         INP_WLOCK_RECHECK(inp);
 2291                         switch (sopt->sopt_name) {
 2292                         case TCP_MAXUNACKTIME:
 2293                                 tp->t_maxunacktime = ui;
 2294                                 break;
 2295 
 2296                         case TCP_KEEPIDLE:
 2297                                 tp->t_keepidle = ui;
 2298                                 /*
 2299                                  * XXX: better check current remaining
 2300                                  * timeout and "merge" it with new value.
 2301                                  */
 2302                                 if ((tp->t_state > TCPS_LISTEN) &&
 2303                                     (tp->t_state <= TCPS_CLOSING))
 2304                                         tcp_timer_activate(tp, TT_KEEP,
 2305                                             TP_KEEPIDLE(tp));
 2306                                 break;
 2307                         case TCP_KEEPINTVL:
 2308                                 tp->t_keepintvl = ui;
 2309                                 if ((tp->t_state == TCPS_FIN_WAIT_2) &&
 2310                                     (TP_MAXIDLE(tp) > 0))
 2311                                         tcp_timer_activate(tp, TT_2MSL,
 2312                                             TP_MAXIDLE(tp));
 2313                                 break;
 2314                         case TCP_KEEPINIT:
 2315                                 tp->t_keepinit = ui;
 2316                                 if (tp->t_state == TCPS_SYN_RECEIVED ||
 2317                                     tp->t_state == TCPS_SYN_SENT)
 2318                                         tcp_timer_activate(tp, TT_KEEP,
 2319                                             TP_KEEPINIT(tp));
 2320                                 break;
 2321                         }
 2322                         goto unlock_and_done;
 2323 
 2324                 case TCP_KEEPCNT:
 2325                         INP_WUNLOCK(inp);
 2326                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 2327                         if (error)
 2328                                 return (error);
 2329 
 2330                         INP_WLOCK_RECHECK(inp);
 2331                         tp->t_keepcnt = ui;
 2332                         if ((tp->t_state == TCPS_FIN_WAIT_2) &&
 2333                             (TP_MAXIDLE(tp) > 0))
 2334                                 tcp_timer_activate(tp, TT_2MSL,
 2335                                     TP_MAXIDLE(tp));
 2336                         goto unlock_and_done;
 2337 
 2338 #ifdef TCPPCAP
 2339                 case TCP_PCAP_OUT:
 2340                 case TCP_PCAP_IN:
 2341                         INP_WUNLOCK(inp);
 2342                         error = sooptcopyin(sopt, &optval, sizeof optval,
 2343                             sizeof optval);
 2344                         if (error)
 2345                                 return (error);
 2346 
 2347                         INP_WLOCK_RECHECK(inp);
 2348                         if (optval >= 0)
 2349                                 tcp_pcap_set_sock_max(TCP_PCAP_OUT ?
 2350                                         &(tp->t_outpkts) : &(tp->t_inpkts),
 2351                                         optval);
 2352                         else
 2353                                 error = EINVAL;
 2354                         goto unlock_and_done;
 2355 #endif
 2356 
 2357                 case TCP_FASTOPEN: {
 2358                         struct tcp_fastopen tfo_optval;
 2359 
 2360                         INP_WUNLOCK(inp);
 2361                         if (!V_tcp_fastopen_client_enable &&
 2362                             !V_tcp_fastopen_server_enable)
 2363                                 return (EPERM);
 2364 
 2365                         error = sooptcopyin(sopt, &tfo_optval,
 2366                                     sizeof(tfo_optval), sizeof(int));
 2367                         if (error)
 2368                                 return (error);
 2369 
 2370                         INP_WLOCK_RECHECK(inp);
 2371                         if ((tp->t_state != TCPS_CLOSED) &&
 2372                             (tp->t_state != TCPS_LISTEN)) {
 2373                                 error = EINVAL;
 2374                                 goto unlock_and_done;
 2375                         }
 2376                         if (tfo_optval.enable) {
 2377                                 if (tp->t_state == TCPS_LISTEN) {
 2378                                         if (!V_tcp_fastopen_server_enable) {
 2379                                                 error = EPERM;
 2380                                                 goto unlock_and_done;
 2381                                         }
 2382 
 2383                                         if (tp->t_tfo_pending == NULL)
 2384                                                 tp->t_tfo_pending =
 2385                                                     tcp_fastopen_alloc_counter();
 2386                                 } else {
 2387                                         /*
 2388                                          * If a pre-shared key was provided,
 2389                                          * stash it in the client cookie
 2390                                          * field of the tcpcb for use during
 2391                                          * connect.
 2392                                          */
 2393                                         if (sopt->sopt_valsize ==
 2394                                             sizeof(tfo_optval)) {
 2395                                                 memcpy(tp->t_tfo_cookie.client,
 2396                                                        tfo_optval.psk,
 2397                                                        TCP_FASTOPEN_PSK_LEN);
 2398                                                 tp->t_tfo_client_cookie_len =
 2399                                                     TCP_FASTOPEN_PSK_LEN;
 2400                                         }
 2401                                 }
 2402                                 tp->t_flags |= TF_FASTOPEN;
 2403                         } else
 2404                                 tp->t_flags &= ~TF_FASTOPEN;
 2405                         goto unlock_and_done;
 2406                 }
 2407 
 2408 #ifdef TCP_BLACKBOX
 2409                 case TCP_LOG:
 2410                         INP_WUNLOCK(inp);
 2411                         error = sooptcopyin(sopt, &optval, sizeof optval,
 2412                             sizeof optval);
 2413                         if (error)
 2414                                 return (error);
 2415 
 2416                         INP_WLOCK_RECHECK(inp);
 2417                         error = tcp_log_state_change(tp, optval);
 2418                         goto unlock_and_done;
 2419 
 2420                 case TCP_LOGBUF:
 2421                         INP_WUNLOCK(inp);
 2422                         error = EINVAL;
 2423                         break;
 2424 
 2425                 case TCP_LOGID:
 2426                         INP_WUNLOCK(inp);
 2427                         error = sooptcopyin(sopt, buf, TCP_LOG_ID_LEN - 1, 0);
 2428                         if (error)
 2429                                 break;
 2430                         buf[sopt->sopt_valsize] = '\0';
 2431                         INP_WLOCK_RECHECK(inp);
 2432                         error = tcp_log_set_id(tp, buf);
 2433                         /* tcp_log_set_id() unlocks the INP. */
 2434                         break;
 2435 
 2436                 case TCP_LOGDUMP:
 2437                 case TCP_LOGDUMPID:
 2438                         INP_WUNLOCK(inp);
 2439                         error =
 2440                             sooptcopyin(sopt, buf, TCP_LOG_REASON_LEN - 1, 0);
 2441                         if (error)
 2442                                 break;
 2443                         buf[sopt->sopt_valsize] = '\0';
 2444                         INP_WLOCK_RECHECK(inp);
 2445                         if (sopt->sopt_name == TCP_LOGDUMP) {
 2446                                 error = tcp_log_dump_tp_logbuf(tp, buf,
 2447                                     M_WAITOK, true);
 2448                                 INP_WUNLOCK(inp);
 2449                         } else {
 2450                                 tcp_log_dump_tp_bucket_logbufs(tp, buf);
 2451                                 /*
 2452                                  * tcp_log_dump_tp_bucket_logbufs() drops the
 2453                                  * INP lock.
 2454                                  */
 2455                         }
 2456                         break;
 2457 #endif
 2458 
 2459                 default:
 2460                         INP_WUNLOCK(inp);
 2461                         error = ENOPROTOOPT;
 2462                         break;
 2463                 }
 2464                 break;
 2465 
 2466         case SOPT_GET:
 2467                 tp = intotcpcb(inp);
 2468                 switch (sopt->sopt_name) {
 2469 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 2470                 case TCP_MD5SIG:
 2471                         INP_WUNLOCK(inp);
 2472                         if (!TCPMD5_ENABLED())
 2473                                 return (ENOPROTOOPT);
 2474                         error = TCPMD5_PCBCTL(inp, sopt);
 2475                         break;
 2476 #endif
 2477 
 2478                 case TCP_NODELAY:
 2479                         optval = tp->t_flags & TF_NODELAY;
 2480                         INP_WUNLOCK(inp);
 2481                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2482                         break;
 2483                 case TCP_MAXSEG:
 2484                         optval = tp->t_maxseg;
 2485                         INP_WUNLOCK(inp);
 2486                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2487                         break;
 2488                 case TCP_REMOTE_UDP_ENCAPS_PORT:
 2489                         optval = ntohs(tp->t_port);
 2490                         INP_WUNLOCK(inp);
 2491                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2492                         break;
 2493                 case TCP_NOOPT:
 2494                         optval = tp->t_flags & TF_NOOPT;
 2495                         INP_WUNLOCK(inp);
 2496                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2497                         break;
 2498                 case TCP_NOPUSH:
 2499                         optval = tp->t_flags & TF_NOPUSH;
 2500                         INP_WUNLOCK(inp);
 2501                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2502                         break;
 2503                 case TCP_INFO:
 2504                         tcp_fill_info(tp, &ti);
 2505                         INP_WUNLOCK(inp);
 2506                         error = sooptcopyout(sopt, &ti, sizeof ti);
 2507                         break;
 2508                 case TCP_STATS:
 2509                         {
 2510 #ifdef STATS
 2511                         int nheld;
 2512                         TYPEOF_MEMBER(struct statsblob, flags) sbflags = 0;
 2513 
 2514                         error = 0;
 2515                         socklen_t outsbsz = sopt->sopt_valsize;
 2516                         if (tp->t_stats == NULL)
 2517                                 error = ENOENT;
 2518                         else if (outsbsz >= tp->t_stats->cursz)
 2519                                 outsbsz = tp->t_stats->cursz;
 2520                         else if (outsbsz >= sizeof(struct statsblob))
 2521                                 outsbsz = sizeof(struct statsblob);
 2522                         else
 2523                                 error = EINVAL;
 2524                         INP_WUNLOCK(inp);
 2525                         if (error)
 2526                                 break;
 2527 
 2528                         sbp = sopt->sopt_val;
 2529                         nheld = atop(round_page(((vm_offset_t)sbp) +
 2530                             (vm_size_t)outsbsz) - trunc_page((vm_offset_t)sbp));
 2531                         vm_page_t ma[nheld];
 2532                         if (vm_fault_quick_hold_pages(
 2533                             &curproc->p_vmspace->vm_map, (vm_offset_t)sbp,
 2534                             outsbsz, VM_PROT_READ | VM_PROT_WRITE, ma,
 2535                             nheld) < 0) {
 2536                                 error = EFAULT;
 2537                                 break;
 2538                         }
 2539 
 2540                         if ((error = copyin_nofault(&(sbp->flags), &sbflags,
 2541                             SIZEOF_MEMBER(struct statsblob, flags))))
 2542                                 goto unhold;
 2543 
 2544                         INP_WLOCK_RECHECK(inp);
 2545                         error = stats_blob_snapshot(&sbp, outsbsz, tp->t_stats,
 2546                             sbflags | SB_CLONE_USRDSTNOFAULT);
 2547                         INP_WUNLOCK(inp);
 2548                         sopt->sopt_valsize = outsbsz;
 2549 unhold:
 2550                         vm_page_unhold_pages(ma, nheld);
 2551 #else
 2552                         INP_WUNLOCK(inp);
 2553                         error = EOPNOTSUPP;
 2554 #endif /* !STATS */
 2555                         break;
 2556                         }
 2557                 case TCP_CONGESTION:
 2558                         len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
 2559                         INP_WUNLOCK(inp);
 2560                         error = sooptcopyout(sopt, buf, len + 1);
 2561                         break;
 2562                 case TCP_MAXUNACKTIME:
 2563                 case TCP_KEEPIDLE:
 2564                 case TCP_KEEPINTVL:
 2565                 case TCP_KEEPINIT:
 2566                 case TCP_KEEPCNT:
 2567                         switch (sopt->sopt_name) {
 2568                         case TCP_MAXUNACKTIME:
 2569                                 ui = TP_MAXUNACKTIME(tp) / hz;
 2570                                 break;
 2571                         case TCP_KEEPIDLE:
 2572                                 ui = TP_KEEPIDLE(tp) / hz;
 2573                                 break;
 2574                         case TCP_KEEPINTVL:
 2575                                 ui = TP_KEEPINTVL(tp) / hz;
 2576                                 break;
 2577                         case TCP_KEEPINIT:
 2578                                 ui = TP_KEEPINIT(tp) / hz;
 2579                                 break;
 2580                         case TCP_KEEPCNT:
 2581                                 ui = TP_KEEPCNT(tp);
 2582                                 break;
 2583                         }
 2584                         INP_WUNLOCK(inp);
 2585                         error = sooptcopyout(sopt, &ui, sizeof(ui));
 2586                         break;
 2587 #ifdef TCPPCAP
 2588                 case TCP_PCAP_OUT:
 2589                 case TCP_PCAP_IN:
 2590                         optval = tcp_pcap_get_sock_max(TCP_PCAP_OUT ?
 2591                                         &(tp->t_outpkts) : &(tp->t_inpkts));
 2592                         INP_WUNLOCK(inp);
 2593                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2594                         break;
 2595 #endif
 2596                 case TCP_FASTOPEN:
 2597                         optval = tp->t_flags & TF_FASTOPEN;
 2598                         INP_WUNLOCK(inp);
 2599                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2600                         break;
 2601 #ifdef TCP_BLACKBOX
 2602                 case TCP_LOG:
 2603                         optval = tp->t_logstate;
 2604                         INP_WUNLOCK(inp);
 2605                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 2606                         break;
 2607                 case TCP_LOGBUF:
 2608                         /* tcp_log_getlogbuf() does INP_WUNLOCK(inp) */
 2609                         error = tcp_log_getlogbuf(sopt, tp);
 2610                         break;
 2611                 case TCP_LOGID:
 2612                         len = tcp_log_get_id(tp, buf);
 2613                         INP_WUNLOCK(inp);
 2614                         error = sooptcopyout(sopt, buf, len + 1);
 2615                         break;
 2616                 case TCP_LOGDUMP:
 2617                 case TCP_LOGDUMPID:
 2618                         INP_WUNLOCK(inp);
 2619                         error = EINVAL;
 2620                         break;
 2621 #endif
 2622 #ifdef KERN_TLS
 2623                 case TCP_TXTLS_MODE:
 2624                         error = ktls_get_tx_mode(so, &optval);
 2625                         INP_WUNLOCK(inp);
 2626                         if (error == 0)
 2627                                 error = sooptcopyout(sopt, &optval,
 2628                                     sizeof(optval));
 2629                         break;
 2630                 case TCP_RXTLS_MODE:
 2631                         error = ktls_get_rx_mode(so, &optval);
 2632                         INP_WUNLOCK(inp);
 2633                         if (error == 0)
 2634                                 error = sooptcopyout(sopt, &optval,
 2635                                     sizeof(optval));
 2636                         break;
 2637 #endif
 2638                 case TCP_LRD:
 2639                         optval = tp->t_flags & TF_LRD;
 2640                         INP_WUNLOCK(inp);
 2641                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2642                         break;
 2643                 default:
 2644                         INP_WUNLOCK(inp);
 2645                         error = ENOPROTOOPT;
 2646                         break;
 2647                 }
 2648                 break;
 2649         }
 2650         return (error);
 2651 }
 2652 #undef INP_WLOCK_RECHECK
 2653 #undef INP_WLOCK_RECHECK_CLEANUP
 2654 
 2655 /*
 2656  * Initiate (or continue) disconnect.
 2657  * If embryonic state, just send reset (once).
 2658  * If in ``let data drain'' option and linger null, just drop.
 2659  * Otherwise (hard), mark socket disconnecting and drop
 2660  * current input data; switch states based on user close, and
 2661  * send segment to peer (with FIN).
 2662  */
 2663 static void
 2664 tcp_disconnect(struct tcpcb *tp)
 2665 {
 2666         struct inpcb *inp = tptoinpcb(tp);
 2667         struct socket *so = tptosocket(tp);
 2668 
 2669         NET_EPOCH_ASSERT();
 2670         INP_WLOCK_ASSERT(inp);
 2671 
 2672         /*
 2673          * Neither tcp_close() nor tcp_drop() should return NULL, as the
 2674          * socket is still open.
 2675          */
 2676         if (tp->t_state < TCPS_ESTABLISHED &&
 2677             !(tp->t_state > TCPS_LISTEN && IS_FASTOPEN(tp->t_flags))) {
 2678                 tp = tcp_close(tp);
 2679                 KASSERT(tp != NULL,
 2680                     ("tcp_disconnect: tcp_close() returned NULL"));
 2681         } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
 2682                 tp = tcp_drop(tp, 0);
 2683                 KASSERT(tp != NULL,
 2684                     ("tcp_disconnect: tcp_drop() returned NULL"));
 2685         } else {
 2686                 soisdisconnecting(so);
 2687                 sbflush(&so->so_rcv);
 2688                 tcp_usrclosed(tp);
 2689                 if (!(inp->inp_flags & INP_DROPPED))
 2690                         /* Ignore stack's drop request, we already at it. */
 2691                         (void)tcp_output_nodrop(tp);
 2692         }
 2693 }
 2694 
 2695 /*
 2696  * User issued close, and wish to trail through shutdown states:
 2697  * if never received SYN, just forget it.  If got a SYN from peer,
 2698  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
 2699  * If already got a FIN from peer, then almost done; go to LAST_ACK
 2700  * state.  In all other cases, have already sent FIN to peer (e.g.
 2701  * after PRU_SHUTDOWN), and just have to play tedious game waiting
 2702  * for peer to send FIN or not respond to keep-alives, etc.
 2703  * We can let the user exit from the close as soon as the FIN is acked.
 2704  */
 2705 static void
 2706 tcp_usrclosed(struct tcpcb *tp)
 2707 {
 2708 
 2709         NET_EPOCH_ASSERT();
 2710         INP_WLOCK_ASSERT(tptoinpcb(tp));
 2711 
 2712         switch (tp->t_state) {
 2713         case TCPS_LISTEN:
 2714 #ifdef TCP_OFFLOAD
 2715                 tcp_offload_listen_stop(tp);
 2716 #endif
 2717                 tcp_state_change(tp, TCPS_CLOSED);
 2718                 /* FALLTHROUGH */
 2719         case TCPS_CLOSED:
 2720                 tp = tcp_close(tp);
 2721                 /*
 2722                  * tcp_close() should never return NULL here as the socket is
 2723                  * still open.
 2724                  */
 2725                 KASSERT(tp != NULL,
 2726                     ("tcp_usrclosed: tcp_close() returned NULL"));
 2727                 break;
 2728 
 2729         case TCPS_SYN_SENT:
 2730         case TCPS_SYN_RECEIVED:
 2731                 tp->t_flags |= TF_NEEDFIN;
 2732                 break;
 2733 
 2734         case TCPS_ESTABLISHED:
 2735                 tcp_state_change(tp, TCPS_FIN_WAIT_1);
 2736                 break;
 2737 
 2738         case TCPS_CLOSE_WAIT:
 2739                 tcp_state_change(tp, TCPS_LAST_ACK);
 2740                 break;
 2741         }
 2742         if (tp->t_acktime == 0)
 2743                 tp->t_acktime = ticks;
 2744         if (tp->t_state >= TCPS_FIN_WAIT_2) {
 2745                 soisdisconnected(tptosocket(tp));
 2746                 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
 2747                 if (tp->t_state == TCPS_FIN_WAIT_2) {
 2748                         int timeout;
 2749 
 2750                         timeout = (tcp_fast_finwait2_recycle) ?
 2751                             tcp_finwait2_timeout : TP_MAXIDLE(tp);
 2752                         tcp_timer_activate(tp, TT_2MSL, timeout);
 2753                 }
 2754         }
 2755 }
 2756 
 2757 #ifdef DDB
 2758 static void
 2759 db_print_indent(int indent)
 2760 {
 2761         int i;
 2762 
 2763         for (i = 0; i < indent; i++)
 2764                 db_printf(" ");
 2765 }
 2766 
 2767 static void
 2768 db_print_tstate(int t_state)
 2769 {
 2770 
 2771         switch (t_state) {
 2772         case TCPS_CLOSED:
 2773                 db_printf("TCPS_CLOSED");
 2774                 return;
 2775 
 2776         case TCPS_LISTEN:
 2777                 db_printf("TCPS_LISTEN");
 2778                 return;
 2779 
 2780         case TCPS_SYN_SENT:
 2781                 db_printf("TCPS_SYN_SENT");
 2782                 return;
 2783 
 2784         case TCPS_SYN_RECEIVED:
 2785                 db_printf("TCPS_SYN_RECEIVED");
 2786                 return;
 2787 
 2788         case TCPS_ESTABLISHED:
 2789                 db_printf("TCPS_ESTABLISHED");
 2790                 return;
 2791 
 2792         case TCPS_CLOSE_WAIT:
 2793                 db_printf("TCPS_CLOSE_WAIT");
 2794                 return;
 2795 
 2796         case TCPS_FIN_WAIT_1:
 2797                 db_printf("TCPS_FIN_WAIT_1");
 2798                 return;
 2799 
 2800         case TCPS_CLOSING:
 2801                 db_printf("TCPS_CLOSING");
 2802                 return;
 2803 
 2804         case TCPS_LAST_ACK:
 2805                 db_printf("TCPS_LAST_ACK");
 2806                 return;
 2807 
 2808         case TCPS_FIN_WAIT_2:
 2809                 db_printf("TCPS_FIN_WAIT_2");
 2810                 return;
 2811 
 2812         case TCPS_TIME_WAIT:
 2813                 db_printf("TCPS_TIME_WAIT");
 2814                 return;
 2815 
 2816         default:
 2817                 db_printf("unknown");
 2818                 return;
 2819         }
 2820 }
 2821 
 2822 static void
 2823 db_print_tflags(u_int t_flags)
 2824 {
 2825         int comma;
 2826 
 2827         comma = 0;
 2828         if (t_flags & TF_ACKNOW) {
 2829                 db_printf("%sTF_ACKNOW", comma ? ", " : "");
 2830                 comma = 1;
 2831         }
 2832         if (t_flags & TF_DELACK) {
 2833                 db_printf("%sTF_DELACK", comma ? ", " : "");
 2834                 comma = 1;
 2835         }
 2836         if (t_flags & TF_NODELAY) {
 2837                 db_printf("%sTF_NODELAY", comma ? ", " : "");
 2838                 comma = 1;
 2839         }
 2840         if (t_flags & TF_NOOPT) {
 2841                 db_printf("%sTF_NOOPT", comma ? ", " : "");
 2842                 comma = 1;
 2843         }
 2844         if (t_flags & TF_SENTFIN) {
 2845                 db_printf("%sTF_SENTFIN", comma ? ", " : "");
 2846                 comma = 1;
 2847         }
 2848         if (t_flags & TF_REQ_SCALE) {
 2849                 db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
 2850                 comma = 1;
 2851         }
 2852         if (t_flags & TF_RCVD_SCALE) {
 2853                 db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
 2854                 comma = 1;
 2855         }
 2856         if (t_flags & TF_REQ_TSTMP) {
 2857                 db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
 2858                 comma = 1;
 2859         }
 2860         if (t_flags & TF_RCVD_TSTMP) {
 2861                 db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
 2862                 comma = 1;
 2863         }
 2864         if (t_flags & TF_SACK_PERMIT) {
 2865                 db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
 2866                 comma = 1;
 2867         }
 2868         if (t_flags & TF_NEEDSYN) {
 2869                 db_printf("%sTF_NEEDSYN", comma ? ", " : "");
 2870                 comma = 1;
 2871         }
 2872         if (t_flags & TF_NEEDFIN) {
 2873                 db_printf("%sTF_NEEDFIN", comma ? ", " : "");
 2874                 comma = 1;
 2875         }
 2876         if (t_flags & TF_NOPUSH) {
 2877                 db_printf("%sTF_NOPUSH", comma ? ", " : "");
 2878                 comma = 1;
 2879         }
 2880         if (t_flags & TF_PREVVALID) {
 2881                 db_printf("%sTF_PREVVALID", comma ? ", " : "");
 2882                 comma = 1;
 2883         }
 2884         if (t_flags & TF_MORETOCOME) {
 2885                 db_printf("%sTF_MORETOCOME", comma ? ", " : "");
 2886                 comma = 1;
 2887         }
 2888         if (t_flags & TF_SONOTCONN) {
 2889                 db_printf("%sTF_SONOTCONN", comma ? ", " : "");
 2890                 comma = 1;
 2891         }
 2892         if (t_flags & TF_LASTIDLE) {
 2893                 db_printf("%sTF_LASTIDLE", comma ? ", " : "");
 2894                 comma = 1;
 2895         }
 2896         if (t_flags & TF_RXWIN0SENT) {
 2897                 db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
 2898                 comma = 1;
 2899         }
 2900         if (t_flags & TF_FASTRECOVERY) {
 2901                 db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
 2902                 comma = 1;
 2903         }
 2904         if (t_flags & TF_CONGRECOVERY) {
 2905                 db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
 2906                 comma = 1;
 2907         }
 2908         if (t_flags & TF_WASFRECOVERY) {
 2909                 db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
 2910                 comma = 1;
 2911         }
 2912         if (t_flags & TF_WASCRECOVERY) {
 2913                 db_printf("%sTF_WASCRECOVERY", comma ? ", " : "");
 2914                 comma = 1;
 2915         }
 2916         if (t_flags & TF_SIGNATURE) {
 2917                 db_printf("%sTF_SIGNATURE", comma ? ", " : "");
 2918                 comma = 1;
 2919         }
 2920         if (t_flags & TF_FORCEDATA) {
 2921                 db_printf("%sTF_FORCEDATA", comma ? ", " : "");
 2922                 comma = 1;
 2923         }
 2924         if (t_flags & TF_TSO) {
 2925                 db_printf("%sTF_TSO", comma ? ", " : "");
 2926                 comma = 1;
 2927         }
 2928         if (t_flags & TF_FASTOPEN) {
 2929                 db_printf("%sTF_FASTOPEN", comma ? ", " : "");
 2930                 comma = 1;
 2931         }
 2932 }
 2933 
 2934 static void
 2935 db_print_tflags2(u_int t_flags2)
 2936 {
 2937         int comma;
 2938 
 2939         comma = 0;
 2940         if (t_flags2 & TF2_PLPMTU_BLACKHOLE) {
 2941                 db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : "");
 2942                 comma = 1;
 2943         }
 2944         if (t_flags2 & TF2_PLPMTU_PMTUD) {
 2945                 db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : "");
 2946                 comma = 1;
 2947         }
 2948         if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) {
 2949                 db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : "");
 2950                 comma = 1;
 2951         }
 2952         if (t_flags2 & TF2_LOG_AUTO) {
 2953                 db_printf("%sTF2_LOG_AUTO", comma ? ", " : "");
 2954                 comma = 1;
 2955         }
 2956         if (t_flags2 & TF2_DROP_AF_DATA) {
 2957                 db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : "");
 2958                 comma = 1;
 2959         }
 2960         if (t_flags2 & TF2_ECN_PERMIT) {
 2961                 db_printf("%sTF2_ECN_PERMIT", comma ? ", " : "");
 2962                 comma = 1;
 2963         }
 2964         if (t_flags2 & TF2_ECN_SND_CWR) {
 2965                 db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : "");
 2966                 comma = 1;
 2967         }
 2968         if (t_flags2 & TF2_ECN_SND_ECE) {
 2969                 db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : "");
 2970                 comma = 1;
 2971         }
 2972         if (t_flags2 & TF2_ACE_PERMIT) {
 2973                 db_printf("%sTF2_ACE_PERMIT", comma ? ", " : "");
 2974                 comma = 1;
 2975         }
 2976         if (t_flags2 & TF2_FBYTES_COMPLETE) {
 2977                 db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : "");
 2978                 comma = 1;
 2979         }
 2980 }
 2981 
 2982 static void
 2983 db_print_toobflags(char t_oobflags)
 2984 {
 2985         int comma;
 2986 
 2987         comma = 0;
 2988         if (t_oobflags & TCPOOB_HAVEDATA) {
 2989                 db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
 2990                 comma = 1;
 2991         }
 2992         if (t_oobflags & TCPOOB_HADDATA) {
 2993                 db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
 2994                 comma = 1;
 2995         }
 2996 }
 2997 
 2998 static void
 2999 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
 3000 {
 3001 
 3002         db_print_indent(indent);
 3003         db_printf("%s at %p\n", name, tp);
 3004 
 3005         indent += 2;
 3006 
 3007         db_print_indent(indent);
 3008         db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
 3009            TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
 3010 
 3011         db_print_indent(indent);
 3012         db_printf("t_callout: %p   t_timers: %p\n",
 3013             &tp->t_callout, &tp->t_timers);
 3014 
 3015         db_print_indent(indent);
 3016         db_printf("t_state: %d (", tp->t_state);
 3017         db_print_tstate(tp->t_state);
 3018         db_printf(")\n");
 3019 
 3020         db_print_indent(indent);
 3021         db_printf("t_flags: 0x%x (", tp->t_flags);
 3022         db_print_tflags(tp->t_flags);
 3023         db_printf(")\n");
 3024 
 3025         db_print_indent(indent);
 3026         db_printf("t_flags2: 0x%x (", tp->t_flags2);
 3027         db_print_tflags2(tp->t_flags2);
 3028         db_printf(")\n");
 3029 
 3030         db_print_indent(indent);
 3031         db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: x0%08x\n",
 3032             tp->snd_una, tp->snd_max, tp->snd_nxt);
 3033 
 3034         db_print_indent(indent);
 3035         db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
 3036            tp->snd_up, tp->snd_wl1, tp->snd_wl2);
 3037 
 3038         db_print_indent(indent);
 3039         db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
 3040             tp->iss, tp->irs, tp->rcv_nxt);
 3041 
 3042         db_print_indent(indent);
 3043         db_printf("rcv_adv: 0x%08x   rcv_wnd: %u   rcv_up: 0x%08x\n",
 3044             tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
 3045 
 3046         db_print_indent(indent);
 3047         db_printf("snd_wnd: %u   snd_cwnd: %u\n",
 3048            tp->snd_wnd, tp->snd_cwnd);
 3049 
 3050         db_print_indent(indent);
 3051         db_printf("snd_ssthresh: %u   snd_recover: "
 3052             "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
 3053 
 3054         db_print_indent(indent);
 3055         db_printf("t_rcvtime: %u   t_startime: %u\n",
 3056             tp->t_rcvtime, tp->t_starttime);
 3057 
 3058         db_print_indent(indent);
 3059         db_printf("t_rttime: %u   t_rtsq: 0x%08x\n",
 3060             tp->t_rtttime, tp->t_rtseq);
 3061 
 3062         db_print_indent(indent);
 3063         db_printf("t_rxtcur: %d   t_maxseg: %u   t_srtt: %d\n",
 3064             tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
 3065 
 3066         db_print_indent(indent);
 3067         db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u\n",
 3068             tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin);
 3069 
 3070         db_print_indent(indent);
 3071         db_printf("t_rttupdated: %u   max_sndwnd: %u   t_softerror: %d\n",
 3072             tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
 3073 
 3074         db_print_indent(indent);
 3075         db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
 3076         db_print_toobflags(tp->t_oobflags);
 3077         db_printf(")   t_iobc: 0x%02x\n", tp->t_iobc);
 3078 
 3079         db_print_indent(indent);
 3080         db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
 3081             tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
 3082 
 3083         db_print_indent(indent);
 3084         db_printf("ts_recent: %u   ts_recent_age: %u\n",
 3085             tp->ts_recent, tp->ts_recent_age);
 3086 
 3087         db_print_indent(indent);
 3088         db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
 3089             "%u\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
 3090 
 3091         db_print_indent(indent);
 3092         db_printf("snd_ssthresh_prev: %u   snd_recover_prev: 0x%08x   "
 3093             "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
 3094             tp->snd_recover_prev, tp->t_badrxtwin);
 3095 
 3096         db_print_indent(indent);
 3097         db_printf("snd_numholes: %d  snd_holes first: %p\n",
 3098             tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
 3099 
 3100         db_print_indent(indent);
 3101         db_printf("snd_fack: 0x%08x   rcv_numsacks: %d\n",
 3102             tp->snd_fack, tp->rcv_numsacks);
 3103 
 3104         /* Skip sackblks, sackhint. */
 3105 
 3106         db_print_indent(indent);
 3107         db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
 3108             tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
 3109 }
 3110 
 3111 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
 3112 {
 3113         struct tcpcb *tp;
 3114 
 3115         if (!have_addr) {
 3116                 db_printf("usage: show tcpcb <addr>\n");
 3117                 return;
 3118         }
 3119         tp = (struct tcpcb *)addr;
 3120 
 3121         db_print_tcpcb(tp, "tcpcb", 0);
 3122 }
 3123 #endif

Cache object: 356cdb73eddd202f2e7d2886d2e00836


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.