The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1988, 1993
    5  *      The Regents of the University of California.
    6  * Copyright (c) 2006-2007 Robert N. M. Watson
    7  * Copyright (c) 2010-2011 Juniper Networks, Inc.
    8  * All rights reserved.
    9  *
   10  * Portions of this software were developed by Robert N. M. Watson under
   11  * contract to Juniper Networks, Inc.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      From: @(#)tcp_usrreq.c  8.2 (Berkeley) 1/3/94
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __FBSDID("$FreeBSD: releng/12.0/sys/netinet/tcp_usrreq.c 338291 2018-08-24 10:50:19Z tuexen $");
   42 
   43 #include "opt_ddb.h"
   44 #include "opt_inet.h"
   45 #include "opt_inet6.h"
   46 #include "opt_ipsec.h"
   47 #include "opt_tcpdebug.h"
   48 
   49 #include <sys/param.h>
   50 #include <sys/systm.h>
   51 #include <sys/limits.h>
   52 #include <sys/malloc.h>
   53 #include <sys/refcount.h>
   54 #include <sys/kernel.h>
   55 #include <sys/sysctl.h>
   56 #include <sys/mbuf.h>
   57 #ifdef INET6
   58 #include <sys/domain.h>
   59 #endif /* INET6 */
   60 #include <sys/socket.h>
   61 #include <sys/socketvar.h>
   62 #include <sys/protosw.h>
   63 #include <sys/proc.h>
   64 #include <sys/jail.h>
   65 #include <sys/syslog.h>
   66 
   67 #ifdef DDB
   68 #include <ddb/ddb.h>
   69 #endif
   70 
   71 #include <net/if.h>
   72 #include <net/if_var.h>
   73 #include <net/route.h>
   74 #include <net/vnet.h>
   75 
   76 #include <netinet/in.h>
   77 #include <netinet/in_kdtrace.h>
   78 #include <netinet/in_pcb.h>
   79 #include <netinet/in_systm.h>
   80 #include <netinet/in_var.h>
   81 #include <netinet/ip_var.h>
   82 #ifdef INET6
   83 #include <netinet/ip6.h>
   84 #include <netinet6/in6_pcb.h>
   85 #include <netinet6/ip6_var.h>
   86 #include <netinet6/scope6_var.h>
   87 #endif
   88 #include <netinet/tcp.h>
   89 #include <netinet/tcp_fsm.h>
   90 #include <netinet/tcp_seq.h>
   91 #include <netinet/tcp_timer.h>
   92 #include <netinet/tcp_var.h>
   93 #include <netinet/tcp_log_buf.h>
   94 #include <netinet/tcpip.h>
   95 #include <netinet/cc/cc.h>
   96 #include <netinet/tcp_fastopen.h>
   97 #include <netinet/tcp_hpts.h>
   98 #ifdef TCPPCAP
   99 #include <netinet/tcp_pcap.h>
  100 #endif
  101 #ifdef TCPDEBUG
  102 #include <netinet/tcp_debug.h>
  103 #endif
  104 #ifdef TCP_OFFLOAD
  105 #include <netinet/tcp_offload.h>
  106 #endif
  107 #include <netipsec/ipsec_support.h>
  108 
  109 /*
  110  * TCP protocol interface to socket abstraction.
  111  */
  112 static int      tcp_attach(struct socket *);
  113 #ifdef INET
  114 static int      tcp_connect(struct tcpcb *, struct sockaddr *,
  115                     struct thread *td);
  116 #endif /* INET */
  117 #ifdef INET6
  118 static int      tcp6_connect(struct tcpcb *, struct sockaddr *,
  119                     struct thread *td);
  120 #endif /* INET6 */
  121 static void     tcp_disconnect(struct tcpcb *);
  122 static void     tcp_usrclosed(struct tcpcb *);
  123 static void     tcp_fill_info(struct tcpcb *, struct tcp_info *);
  124 
  125 #ifdef TCPDEBUG
  126 #define TCPDEBUG0       int ostate = 0
  127 #define TCPDEBUG1()     ostate = tp ? tp->t_state : 0
  128 #define TCPDEBUG2(req)  if (tp && (so->so_options & SO_DEBUG)) \
  129                                 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
  130 #else
  131 #define TCPDEBUG0
  132 #define TCPDEBUG1()
  133 #define TCPDEBUG2(req)
  134 #endif
  135 
  136 /*
  137  * TCP attaches to socket via pru_attach(), reserving space,
  138  * and an internet control block.
  139  */
  140 static int
  141 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
  142 {
  143         struct inpcb *inp;
  144         struct tcpcb *tp = NULL;
  145         int error;
  146         TCPDEBUG0;
  147 
  148         inp = sotoinpcb(so);
  149         KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
  150         TCPDEBUG1();
  151 
  152         error = tcp_attach(so);
  153         if (error)
  154                 goto out;
  155 
  156         if ((so->so_options & SO_LINGER) && so->so_linger == 0)
  157                 so->so_linger = TCP_LINGERTIME;
  158 
  159         inp = sotoinpcb(so);
  160         tp = intotcpcb(inp);
  161 out:
  162         TCPDEBUG2(PRU_ATTACH);
  163         TCP_PROBE2(debug__user, tp, PRU_ATTACH);
  164         return error;
  165 }
  166 
  167 /*
  168  * tcp_detach is called when the socket layer loses its final reference
  169  * to the socket, be it a file descriptor reference, a reference from TCP,
  170  * etc.  At this point, there is only one case in which we will keep around
  171  * inpcb state: time wait.
  172  *
  173  * This function can probably be re-absorbed back into tcp_usr_detach() now
  174  * that there is a single detach path.
  175  */
  176 static void
  177 tcp_detach(struct socket *so, struct inpcb *inp)
  178 {
  179         struct tcpcb *tp;
  180 
  181         INP_INFO_LOCK_ASSERT(&V_tcbinfo);
  182         INP_WLOCK_ASSERT(inp);
  183 
  184         KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
  185         KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so"));
  186 
  187         tp = intotcpcb(inp);
  188 
  189         if (inp->inp_flags & INP_TIMEWAIT) {
  190                 /*
  191                  * There are two cases to handle: one in which the time wait
  192                  * state is being discarded (INP_DROPPED), and one in which
  193                  * this connection will remain in timewait.  In the former,
  194                  * it is time to discard all state (except tcptw, which has
  195                  * already been discarded by the timewait close code, which
  196                  * should be further up the call stack somewhere).  In the
  197                  * latter case, we detach from the socket, but leave the pcb
  198                  * present until timewait ends.
  199                  *
  200                  * XXXRW: Would it be cleaner to free the tcptw here?
  201                  *
  202                  * Astute question indeed, from twtcp perspective there are
  203                  * four cases to consider:
  204                  *
  205                  * #1 tcp_detach is called at tcptw creation time by
  206                  *  tcp_twstart, then do not discard the newly created tcptw
  207                  *  and leave inpcb present until timewait ends
  208                  * #2 tcp_detach is called at tcptw creation time by
  209                  *  tcp_twstart, but connection is local and tw will be
  210                  *  discarded immediately
  211                  * #3 tcp_detach is called at timewait end (or reuse) by
  212                  *  tcp_twclose, then the tcptw has already been discarded
  213                  *  (or reused) and inpcb is freed here
  214                  * #4 tcp_detach is called() after timewait ends (or reuse)
  215                  *  (e.g. by soclose), then tcptw has already been discarded
  216                  *  (or reused) and inpcb is freed here
  217                  *
  218                  *  In all three cases the tcptw should not be freed here.
  219                  */
  220                 if (inp->inp_flags & INP_DROPPED) {
  221                         in_pcbdetach(inp);
  222                         if (__predict_true(tp == NULL)) {
  223                                 in_pcbfree(inp);
  224                         } else {
  225                                 /*
  226                                  * This case should not happen as in TIMEWAIT
  227                                  * state the inp should not be destroyed before
  228                                  * its tcptw.  If INVARIANTS is defined, panic.
  229                                  */
  230 #ifdef INVARIANTS
  231                                 panic("%s: Panic before an inp double-free: "
  232                                     "INP_TIMEWAIT && INP_DROPPED && tp != NULL"
  233                                     , __func__);
  234 #else
  235                                 log(LOG_ERR, "%s: Avoid an inp double-free: "
  236                                     "INP_TIMEWAIT && INP_DROPPED && tp != NULL"
  237                                     , __func__);
  238 #endif
  239                                 INP_WUNLOCK(inp);
  240                         }
  241                 } else {
  242                         in_pcbdetach(inp);
  243                         INP_WUNLOCK(inp);
  244                 }
  245         } else {
  246                 /*
  247                  * If the connection is not in timewait, we consider two
  248                  * two conditions: one in which no further processing is
  249                  * necessary (dropped || embryonic), and one in which TCP is
  250                  * not yet done, but no longer requires the socket, so the
  251                  * pcb will persist for the time being.
  252                  *
  253                  * XXXRW: Does the second case still occur?
  254                  */
  255                 if (inp->inp_flags & INP_DROPPED ||
  256                     tp->t_state < TCPS_SYN_SENT) {
  257                         tcp_discardcb(tp);
  258                         in_pcbdetach(inp);
  259                         in_pcbfree(inp);
  260                 } else {
  261                         in_pcbdetach(inp);
  262                         INP_WUNLOCK(inp);
  263                 }
  264         }
  265 }
  266 
  267 /*
  268  * pru_detach() detaches the TCP protocol from the socket.
  269  * If the protocol state is non-embryonic, then can't
  270  * do this directly: have to initiate a pru_disconnect(),
  271  * which may finish later; embryonic TCB's can just
  272  * be discarded here.
  273  */
  274 static void
  275 tcp_usr_detach(struct socket *so)
  276 {
  277         struct inpcb *inp;
  278         int rlock = 0;
  279         struct epoch_tracker et;
  280 
  281         inp = sotoinpcb(so);
  282         KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
  283         if (!INP_INFO_WLOCKED(&V_tcbinfo)) {
  284                 INP_INFO_RLOCK_ET(&V_tcbinfo, et);
  285                 rlock = 1;
  286         }
  287         INP_WLOCK(inp);
  288         KASSERT(inp->inp_socket != NULL,
  289             ("tcp_usr_detach: inp_socket == NULL"));
  290         tcp_detach(so, inp);
  291         if (rlock)
  292                 INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
  293 }
  294 
  295 #ifdef INET
  296 /*
  297  * Give the socket an address.
  298  */
  299 static int
  300 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  301 {
  302         int error = 0;
  303         struct inpcb *inp;
  304         struct tcpcb *tp = NULL;
  305         struct sockaddr_in *sinp;
  306 
  307         sinp = (struct sockaddr_in *)nam;
  308         if (nam->sa_len != sizeof (*sinp))
  309                 return (EINVAL);
  310         /*
  311          * Must check for multicast addresses and disallow binding
  312          * to them.
  313          */
  314         if (sinp->sin_family == AF_INET &&
  315             IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
  316                 return (EAFNOSUPPORT);
  317 
  318         TCPDEBUG0;
  319         inp = sotoinpcb(so);
  320         KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
  321         INP_WLOCK(inp);
  322         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  323                 error = EINVAL;
  324                 goto out;
  325         }
  326         tp = intotcpcb(inp);
  327         TCPDEBUG1();
  328         INP_HASH_WLOCK(&V_tcbinfo);
  329         error = in_pcbbind(inp, nam, td->td_ucred);
  330         INP_HASH_WUNLOCK(&V_tcbinfo);
  331 out:
  332         TCPDEBUG2(PRU_BIND);
  333         TCP_PROBE2(debug__user, tp, PRU_BIND);
  334         INP_WUNLOCK(inp);
  335 
  336         return (error);
  337 }
  338 #endif /* INET */
  339 
  340 #ifdef INET6
  341 static int
  342 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  343 {
  344         int error = 0;
  345         struct inpcb *inp;
  346         struct tcpcb *tp = NULL;
  347         struct sockaddr_in6 *sin6p;
  348 
  349         sin6p = (struct sockaddr_in6 *)nam;
  350         if (nam->sa_len != sizeof (*sin6p))
  351                 return (EINVAL);
  352         /*
  353          * Must check for multicast addresses and disallow binding
  354          * to them.
  355          */
  356         if (sin6p->sin6_family == AF_INET6 &&
  357             IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
  358                 return (EAFNOSUPPORT);
  359 
  360         TCPDEBUG0;
  361         inp = sotoinpcb(so);
  362         KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
  363         INP_WLOCK(inp);
  364         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  365                 error = EINVAL;
  366                 goto out;
  367         }
  368         tp = intotcpcb(inp);
  369         TCPDEBUG1();
  370         INP_HASH_WLOCK(&V_tcbinfo);
  371         inp->inp_vflag &= ~INP_IPV4;
  372         inp->inp_vflag |= INP_IPV6;
  373 #ifdef INET
  374         if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
  375                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
  376                         inp->inp_vflag |= INP_IPV4;
  377                 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
  378                         struct sockaddr_in sin;
  379 
  380                         in6_sin6_2_sin(&sin, sin6p);
  381                         if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
  382                                 error = EAFNOSUPPORT;
  383                                 INP_HASH_WUNLOCK(&V_tcbinfo);
  384                                 goto out;
  385                         }
  386                         inp->inp_vflag |= INP_IPV4;
  387                         inp->inp_vflag &= ~INP_IPV6;
  388                         error = in_pcbbind(inp, (struct sockaddr *)&sin,
  389                             td->td_ucred);
  390                         INP_HASH_WUNLOCK(&V_tcbinfo);
  391                         goto out;
  392                 }
  393         }
  394 #endif
  395         error = in6_pcbbind(inp, nam, td->td_ucred);
  396         INP_HASH_WUNLOCK(&V_tcbinfo);
  397 out:
  398         TCPDEBUG2(PRU_BIND);
  399         TCP_PROBE2(debug__user, tp, PRU_BIND);
  400         INP_WUNLOCK(inp);
  401         return (error);
  402 }
  403 #endif /* INET6 */
  404 
  405 #ifdef INET
  406 /*
  407  * Prepare to accept connections.
  408  */
  409 static int
  410 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
  411 {
  412         int error = 0;
  413         struct inpcb *inp;
  414         struct tcpcb *tp = NULL;
  415 
  416         TCPDEBUG0;
  417         inp = sotoinpcb(so);
  418         KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
  419         INP_WLOCK(inp);
  420         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  421                 error = EINVAL;
  422                 goto out;
  423         }
  424         tp = intotcpcb(inp);
  425         TCPDEBUG1();
  426         SOCK_LOCK(so);
  427         error = solisten_proto_check(so);
  428         INP_HASH_WLOCK(&V_tcbinfo);
  429         if (error == 0 && inp->inp_lport == 0)
  430                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
  431         INP_HASH_WUNLOCK(&V_tcbinfo);
  432         if (error == 0) {
  433                 tcp_state_change(tp, TCPS_LISTEN);
  434                 solisten_proto(so, backlog);
  435 #ifdef TCP_OFFLOAD
  436                 if ((so->so_options & SO_NO_OFFLOAD) == 0)
  437                         tcp_offload_listen_start(tp);
  438 #endif
  439         }
  440         SOCK_UNLOCK(so);
  441 
  442         if (IS_FASTOPEN(tp->t_flags))
  443                 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
  444 
  445 out:
  446         TCPDEBUG2(PRU_LISTEN);
  447         TCP_PROBE2(debug__user, tp, PRU_LISTEN);
  448         INP_WUNLOCK(inp);
  449         return (error);
  450 }
  451 #endif /* INET */
  452 
  453 #ifdef INET6
  454 static int
  455 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
  456 {
  457         int error = 0;
  458         struct inpcb *inp;
  459         struct tcpcb *tp = NULL;
  460 
  461         TCPDEBUG0;
  462         inp = sotoinpcb(so);
  463         KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
  464         INP_WLOCK(inp);
  465         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  466                 error = EINVAL;
  467                 goto out;
  468         }
  469         tp = intotcpcb(inp);
  470         TCPDEBUG1();
  471         SOCK_LOCK(so);
  472         error = solisten_proto_check(so);
  473         INP_HASH_WLOCK(&V_tcbinfo);
  474         if (error == 0 && inp->inp_lport == 0) {
  475                 inp->inp_vflag &= ~INP_IPV4;
  476                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
  477                         inp->inp_vflag |= INP_IPV4;
  478                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
  479         }
  480         INP_HASH_WUNLOCK(&V_tcbinfo);
  481         if (error == 0) {
  482                 tcp_state_change(tp, TCPS_LISTEN);
  483                 solisten_proto(so, backlog);
  484 #ifdef TCP_OFFLOAD
  485                 if ((so->so_options & SO_NO_OFFLOAD) == 0)
  486                         tcp_offload_listen_start(tp);
  487 #endif
  488         }
  489         SOCK_UNLOCK(so);
  490 
  491         if (IS_FASTOPEN(tp->t_flags))
  492                 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
  493 
  494 out:
  495         TCPDEBUG2(PRU_LISTEN);
  496         TCP_PROBE2(debug__user, tp, PRU_LISTEN);
  497         INP_WUNLOCK(inp);
  498         return (error);
  499 }
  500 #endif /* INET6 */
  501 
  502 #ifdef INET
  503 /*
  504  * Initiate connection to peer.
  505  * Create a template for use in transmissions on this connection.
  506  * Enter SYN_SENT state, and mark socket as connecting.
  507  * Start keep-alive timer, and seed output sequence space.
  508  * Send initial segment on connection.
  509  */
  510 static int
  511 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  512 {
  513         int error = 0;
  514         struct inpcb *inp;
  515         struct tcpcb *tp = NULL;
  516         struct sockaddr_in *sinp;
  517 
  518         sinp = (struct sockaddr_in *)nam;
  519         if (nam->sa_len != sizeof (*sinp))
  520                 return (EINVAL);
  521         /*
  522          * Must disallow TCP ``connections'' to multicast addresses.
  523          */
  524         if (sinp->sin_family == AF_INET
  525             && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
  526                 return (EAFNOSUPPORT);
  527         if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
  528                 return (error);
  529 
  530         TCPDEBUG0;
  531         inp = sotoinpcb(so);
  532         KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
  533         INP_WLOCK(inp);
  534         if (inp->inp_flags & INP_TIMEWAIT) {
  535                 error = EADDRINUSE;
  536                 goto out;
  537         }
  538         if (inp->inp_flags & INP_DROPPED) {
  539                 error = ECONNREFUSED;
  540                 goto out;
  541         }
  542         tp = intotcpcb(inp);
  543         TCPDEBUG1();
  544         if ((error = tcp_connect(tp, nam, td)) != 0)
  545                 goto out;
  546 #ifdef TCP_OFFLOAD
  547         if (registered_toedevs > 0 &&
  548             (so->so_options & SO_NO_OFFLOAD) == 0 &&
  549             (error = tcp_offload_connect(so, nam)) == 0)
  550                 goto out;
  551 #endif
  552         tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
  553         error = tp->t_fb->tfb_tcp_output(tp);
  554 out:
  555         TCPDEBUG2(PRU_CONNECT);
  556         TCP_PROBE2(debug__user, tp, PRU_CONNECT);
  557         INP_WUNLOCK(inp);
  558         return (error);
  559 }
  560 #endif /* INET */
  561 
  562 #ifdef INET6
  563 static int
  564 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  565 {
  566         int error = 0;
  567         struct inpcb *inp;
  568         struct tcpcb *tp = NULL;
  569         struct sockaddr_in6 *sin6p;
  570 
  571         TCPDEBUG0;
  572 
  573         sin6p = (struct sockaddr_in6 *)nam;
  574         if (nam->sa_len != sizeof (*sin6p))
  575                 return (EINVAL);
  576         /*
  577          * Must disallow TCP ``connections'' to multicast addresses.
  578          */
  579         if (sin6p->sin6_family == AF_INET6
  580             && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
  581                 return (EAFNOSUPPORT);
  582 
  583         inp = sotoinpcb(so);
  584         KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
  585         INP_WLOCK(inp);
  586         if (inp->inp_flags & INP_TIMEWAIT) {
  587                 error = EADDRINUSE;
  588                 goto out;
  589         }
  590         if (inp->inp_flags & INP_DROPPED) {
  591                 error = ECONNREFUSED;
  592                 goto out;
  593         }
  594         tp = intotcpcb(inp);
  595         TCPDEBUG1();
  596 #ifdef INET
  597         /*
  598          * XXXRW: Some confusion: V4/V6 flags relate to binding, and
  599          * therefore probably require the hash lock, which isn't held here.
  600          * Is this a significant problem?
  601          */
  602         if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
  603                 struct sockaddr_in sin;
  604 
  605                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
  606                         error = EINVAL;
  607                         goto out;
  608                 }
  609                 if ((inp->inp_vflag & INP_IPV4) == 0) {
  610                         error = EAFNOSUPPORT;
  611                         goto out;
  612                 }
  613 
  614                 in6_sin6_2_sin(&sin, sin6p);
  615                 if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
  616                         error = EAFNOSUPPORT;
  617                         goto out;
  618                 }
  619                 inp->inp_vflag |= INP_IPV4;
  620                 inp->inp_vflag &= ~INP_IPV6;
  621                 if ((error = prison_remote_ip4(td->td_ucred,
  622                     &sin.sin_addr)) != 0)
  623                         goto out;
  624                 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
  625                         goto out;
  626 #ifdef TCP_OFFLOAD
  627                 if (registered_toedevs > 0 &&
  628                     (so->so_options & SO_NO_OFFLOAD) == 0 &&
  629                     (error = tcp_offload_connect(so, nam)) == 0)
  630                         goto out;
  631 #endif
  632                 error = tp->t_fb->tfb_tcp_output(tp);
  633                 goto out;
  634         } else {
  635                 if ((inp->inp_vflag & INP_IPV6) == 0) {
  636                         error = EAFNOSUPPORT;
  637                         goto out;
  638                 }
  639         }
  640 #endif
  641         inp->inp_vflag &= ~INP_IPV4;
  642         inp->inp_vflag |= INP_IPV6;
  643         inp->inp_inc.inc_flags |= INC_ISIPV6;
  644         if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0)
  645                 goto out;
  646         if ((error = tcp6_connect(tp, nam, td)) != 0)
  647                 goto out;
  648 #ifdef TCP_OFFLOAD
  649         if (registered_toedevs > 0 &&
  650             (so->so_options & SO_NO_OFFLOAD) == 0 &&
  651             (error = tcp_offload_connect(so, nam)) == 0)
  652                 goto out;
  653 #endif
  654         tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
  655         error = tp->t_fb->tfb_tcp_output(tp);
  656 
  657 out:
  658         TCPDEBUG2(PRU_CONNECT);
  659         TCP_PROBE2(debug__user, tp, PRU_CONNECT);
  660         INP_WUNLOCK(inp);
  661         return (error);
  662 }
  663 #endif /* INET6 */
  664 
  665 /*
  666  * Initiate disconnect from peer.
  667  * If connection never passed embryonic stage, just drop;
  668  * else if don't need to let data drain, then can just drop anyways,
  669  * else have to begin TCP shutdown process: mark socket disconnecting,
  670  * drain unread data, state switch to reflect user close, and
  671  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
  672  * when peer sends FIN and acks ours.
  673  *
  674  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
  675  */
  676 static int
  677 tcp_usr_disconnect(struct socket *so)
  678 {
  679         struct inpcb *inp;
  680         struct tcpcb *tp = NULL;
  681         struct epoch_tracker et;
  682         int error = 0;
  683 
  684         TCPDEBUG0;
  685         INP_INFO_RLOCK_ET(&V_tcbinfo, et);
  686         inp = sotoinpcb(so);
  687         KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
  688         INP_WLOCK(inp);
  689         if (inp->inp_flags & INP_TIMEWAIT)
  690                 goto out;
  691         if (inp->inp_flags & INP_DROPPED) {
  692                 error = ECONNRESET;
  693                 goto out;
  694         }
  695         tp = intotcpcb(inp);
  696         TCPDEBUG1();
  697         tcp_disconnect(tp);
  698 out:
  699         TCPDEBUG2(PRU_DISCONNECT);
  700         TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
  701         INP_WUNLOCK(inp);
  702         INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
  703         return (error);
  704 }
  705 
  706 #ifdef INET
  707 /*
  708  * Accept a connection.  Essentially all the work is done at higher levels;
  709  * just return the address of the peer, storing through addr.
  710  */
  711 static int
  712 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
  713 {
  714         int error = 0;
  715         struct inpcb *inp = NULL;
  716         struct tcpcb *tp = NULL;
  717         struct in_addr addr;
  718         in_port_t port = 0;
  719         TCPDEBUG0;
  720 
  721         if (so->so_state & SS_ISDISCONNECTED)
  722                 return (ECONNABORTED);
  723 
  724         inp = sotoinpcb(so);
  725         KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
  726         INP_WLOCK(inp);
  727         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  728                 error = ECONNABORTED;
  729                 goto out;
  730         }
  731         tp = intotcpcb(inp);
  732         TCPDEBUG1();
  733 
  734         /*
  735          * We inline in_getpeeraddr and COMMON_END here, so that we can
  736          * copy the data of interest and defer the malloc until after we
  737          * release the lock.
  738          */
  739         port = inp->inp_fport;
  740         addr = inp->inp_faddr;
  741 
  742 out:
  743         TCPDEBUG2(PRU_ACCEPT);
  744         TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
  745         INP_WUNLOCK(inp);
  746         if (error == 0)
  747                 *nam = in_sockaddr(port, &addr);
  748         return error;
  749 }
  750 #endif /* INET */
  751 
  752 #ifdef INET6
  753 static int
  754 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
  755 {
  756         struct inpcb *inp = NULL;
  757         int error = 0;
  758         struct tcpcb *tp = NULL;
  759         struct in_addr addr;
  760         struct in6_addr addr6;
  761         struct epoch_tracker et;
  762         in_port_t port = 0;
  763         int v4 = 0;
  764         TCPDEBUG0;
  765 
  766         if (so->so_state & SS_ISDISCONNECTED)
  767                 return (ECONNABORTED);
  768 
  769         inp = sotoinpcb(so);
  770         KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
  771         INP_INFO_RLOCK_ET(&V_tcbinfo, et);
  772         INP_WLOCK(inp);
  773         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  774                 error = ECONNABORTED;
  775                 goto out;
  776         }
  777         tp = intotcpcb(inp);
  778         TCPDEBUG1();
  779 
  780         /*
  781          * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
  782          * copy the data of interest and defer the malloc until after we
  783          * release the lock.
  784          */
  785         if (inp->inp_vflag & INP_IPV4) {
  786                 v4 = 1;
  787                 port = inp->inp_fport;
  788                 addr = inp->inp_faddr;
  789         } else {
  790                 port = inp->inp_fport;
  791                 addr6 = inp->in6p_faddr;
  792         }
  793 
  794 out:
  795         TCPDEBUG2(PRU_ACCEPT);
  796         TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
  797         INP_WUNLOCK(inp);
  798         INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
  799         if (error == 0) {
  800                 if (v4)
  801                         *nam = in6_v4mapsin6_sockaddr(port, &addr);
  802                 else
  803                         *nam = in6_sockaddr(port, &addr6);
  804         }
  805         return error;
  806 }
  807 #endif /* INET6 */
  808 
  809 /*
  810  * Mark the connection as being incapable of further output.
  811  */
  812 static int
  813 tcp_usr_shutdown(struct socket *so)
  814 {
  815         int error = 0;
  816         struct inpcb *inp;
  817         struct tcpcb *tp = NULL;
  818         struct epoch_tracker et;
  819 
  820         TCPDEBUG0;
  821         INP_INFO_RLOCK_ET(&V_tcbinfo, et);
  822         inp = sotoinpcb(so);
  823         KASSERT(inp != NULL, ("inp == NULL"));
  824         INP_WLOCK(inp);
  825         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  826                 error = ECONNRESET;
  827                 goto out;
  828         }
  829         tp = intotcpcb(inp);
  830         TCPDEBUG1();
  831         socantsendmore(so);
  832         tcp_usrclosed(tp);
  833         if (!(inp->inp_flags & INP_DROPPED))
  834                 error = tp->t_fb->tfb_tcp_output(tp);
  835 
  836 out:
  837         TCPDEBUG2(PRU_SHUTDOWN);
  838         TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
  839         INP_WUNLOCK(inp);
  840         INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
  841 
  842         return (error);
  843 }
  844 
  845 /*
  846  * After a receive, possibly send window update to peer.
  847  */
  848 static int
  849 tcp_usr_rcvd(struct socket *so, int flags)
  850 {
  851         struct inpcb *inp;
  852         struct tcpcb *tp = NULL;
  853         int error = 0;
  854 
  855         TCPDEBUG0;
  856         inp = sotoinpcb(so);
  857         KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
  858         INP_WLOCK(inp);
  859         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  860                 error = ECONNRESET;
  861                 goto out;
  862         }
  863         tp = intotcpcb(inp);
  864         TCPDEBUG1();
  865         /*
  866          * For passively-created TFO connections, don't attempt a window
  867          * update while still in SYN_RECEIVED as this may trigger an early
  868          * SYN|ACK.  It is preferable to have the SYN|ACK be sent along with
  869          * application response data, or failing that, when the DELACK timer
  870          * expires.
  871          */
  872         if (IS_FASTOPEN(tp->t_flags) &&
  873             (tp->t_state == TCPS_SYN_RECEIVED))
  874                 goto out;
  875 #ifdef TCP_OFFLOAD
  876         if (tp->t_flags & TF_TOE)
  877                 tcp_offload_rcvd(tp);
  878         else
  879 #endif
  880         tp->t_fb->tfb_tcp_output(tp);
  881 
  882 out:
  883         TCPDEBUG2(PRU_RCVD);
  884         TCP_PROBE2(debug__user, tp, PRU_RCVD);
  885         INP_WUNLOCK(inp);
  886         return (error);
  887 }
  888 
  889 /*
  890  * Do a send by putting data in output queue and updating urgent
  891  * marker if URG set.  Possibly send more data.  Unlike the other
  892  * pru_*() routines, the mbuf chains are our responsibility.  We
  893  * must either enqueue them or free them.  The other pru_* routines
  894  * generally are caller-frees.
  895  */
  896 static int
  897 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
  898     struct sockaddr *nam, struct mbuf *control, struct thread *td)
  899 {
  900         int error = 0;
  901         struct inpcb *inp;
  902         struct tcpcb *tp = NULL;
  903         struct epoch_tracker net_et;
  904 #ifdef INET
  905 #ifdef INET6
  906         struct sockaddr_in sin;
  907 #endif
  908         struct sockaddr_in *sinp;
  909 #endif
  910 #ifdef INET6
  911         int isipv6;
  912 #endif
  913         TCPDEBUG0;
  914 
  915         /*
  916          * We require the pcbinfo lock if we will close the socket as part of
  917          * this call.
  918          */
  919         if (flags & PRUS_EOF)
  920                 INP_INFO_RLOCK_ET(&V_tcbinfo, net_et);
  921         inp = sotoinpcb(so);
  922         KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
  923         INP_WLOCK(inp);
  924         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  925                 if (control)
  926                         m_freem(control);
  927                 /*
  928                  * In case of PRUS_NOTREADY, tcp_usr_ready() is responsible
  929                  * for freeing memory.
  930                  */
  931                 if (m && (flags & PRUS_NOTREADY) == 0)
  932                         m_freem(m);
  933                 error = ECONNRESET;
  934                 goto out;
  935         }
  936         tp = intotcpcb(inp);
  937         TCPDEBUG1();
  938         if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
  939                 switch (nam->sa_family) {
  940 #ifdef INET
  941                 case AF_INET:
  942                         sinp = (struct sockaddr_in *)nam;
  943                         if (sinp->sin_len != sizeof(struct sockaddr_in)) {
  944                                 if (m)
  945                                         m_freem(m);
  946                                 error = EINVAL;
  947                                 goto out;
  948                         }
  949                         if ((inp->inp_vflag & INP_IPV6) != 0) {
  950                                 if (m)
  951                                         m_freem(m);
  952                                 error = EAFNOSUPPORT;
  953                                 goto out;
  954                         }
  955                         if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
  956                                 if (m)
  957                                         m_freem(m);
  958                                 error = EAFNOSUPPORT;
  959                                 goto out;
  960                         }
  961                         if ((error = prison_remote_ip4(td->td_ucred,
  962                             &sinp->sin_addr))) {
  963                                 if (m)
  964                                         m_freem(m);
  965                                 goto out;
  966                         }
  967 #ifdef INET6
  968                         isipv6 = 0;
  969 #endif
  970                         break;
  971 #endif /* INET */
  972 #ifdef INET6
  973                 case AF_INET6:
  974                 {
  975                         struct sockaddr_in6 *sin6p;
  976 
  977                         sin6p = (struct sockaddr_in6 *)nam;
  978                         if (sin6p->sin6_len != sizeof(struct sockaddr_in6)) {
  979                                 if (m)
  980                                         m_freem(m);
  981                                 error = EINVAL;
  982                                 goto out;
  983                         }
  984                         if (IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
  985                                 if (m)
  986                                         m_freem(m);
  987                                 error = EAFNOSUPPORT;
  988                                 goto out;
  989                         }
  990                         if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
  991 #ifdef INET
  992                                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
  993                                         error = EINVAL;
  994                                         if (m)
  995                                                 m_freem(m);
  996                                         goto out;
  997                                 }
  998                                 if ((inp->inp_vflag & INP_IPV4) == 0) {
  999                                         error = EAFNOSUPPORT;
 1000                                         if (m)
 1001                                                 m_freem(m);
 1002                                         goto out;
 1003                                 }
 1004                                 inp->inp_vflag &= ~INP_IPV6;
 1005                                 sinp = &sin;
 1006                                 in6_sin6_2_sin(sinp, sin6p);
 1007                                 if (IN_MULTICAST(
 1008                                     ntohl(sinp->sin_addr.s_addr))) {
 1009                                         error = EAFNOSUPPORT;
 1010                                         if (m)
 1011                                                 m_freem(m);
 1012                                         goto out;
 1013                                 }
 1014                                 if ((error = prison_remote_ip4(td->td_ucred,
 1015                                     &sinp->sin_addr))) {
 1016                                         if (m)
 1017                                                 m_freem(m);
 1018                                         goto out;
 1019                                 }
 1020                                 isipv6 = 0;
 1021 #else /* !INET */
 1022                                 error = EAFNOSUPPORT;
 1023                                 if (m)
 1024                                         m_freem(m);
 1025                                 goto out;
 1026 #endif /* INET */
 1027                         } else {
 1028                                 if ((inp->inp_vflag & INP_IPV6) == 0) {
 1029                                         if (m)
 1030                                                 m_freem(m);
 1031                                         error = EAFNOSUPPORT;
 1032                                         goto out;
 1033                                 }
 1034                                 inp->inp_vflag &= ~INP_IPV4;
 1035                                 inp->inp_inc.inc_flags |= INC_ISIPV6;
 1036                                 if ((error = prison_remote_ip6(td->td_ucred,
 1037                                     &sin6p->sin6_addr))) {
 1038                                         if (m)
 1039                                                 m_freem(m);
 1040                                         goto out;
 1041                                 }
 1042                                 isipv6 = 1;
 1043                         }
 1044                         break;
 1045                 }
 1046 #endif /* INET6 */
 1047                 default:
 1048                         if (m)
 1049                                 m_freem(m);
 1050                         error = EAFNOSUPPORT;
 1051                         goto out;
 1052                 }
 1053         }
 1054         if (control) {
 1055                 /* TCP doesn't do control messages (rights, creds, etc) */
 1056                 if (control->m_len) {
 1057                         m_freem(control);
 1058                         if (m)
 1059                                 m_freem(m);
 1060                         error = EINVAL;
 1061                         goto out;
 1062                 }
 1063                 m_freem(control);       /* empty control, just free it */
 1064         }
 1065         if (!(flags & PRUS_OOB)) {
 1066                 sbappendstream(&so->so_snd, m, flags);
 1067                 if (nam && tp->t_state < TCPS_SYN_SENT) {
 1068                         /*
 1069                          * Do implied connect if not yet connected,
 1070                          * initialize window to default value, and
 1071                          * initialize maxseg using peer's cached MSS.
 1072                          */
 1073 #ifdef INET6
 1074                         if (isipv6)
 1075                                 error = tcp6_connect(tp, nam, td);
 1076 #endif /* INET6 */
 1077 #if defined(INET6) && defined(INET)
 1078                         else
 1079 #endif
 1080 #ifdef INET
 1081                                 error = tcp_connect(tp,
 1082                                     (struct sockaddr *)sinp, td);
 1083 #endif
 1084                         if (error)
 1085                                 goto out;
 1086                         if (IS_FASTOPEN(tp->t_flags))
 1087                                 tcp_fastopen_connect(tp);
 1088                         else {
 1089                                 tp->snd_wnd = TTCP_CLIENT_SND_WND;
 1090                                 tcp_mss(tp, -1);
 1091                         }
 1092                 }
 1093                 if (flags & PRUS_EOF) {
 1094                         /*
 1095                          * Close the send side of the connection after
 1096                          * the data is sent.
 1097                          */
 1098                         INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 1099                         socantsendmore(so);
 1100                         tcp_usrclosed(tp);
 1101                 }
 1102                 if (!(inp->inp_flags & INP_DROPPED) &&
 1103                     !(flags & PRUS_NOTREADY)) {
 1104                         if (flags & PRUS_MORETOCOME)
 1105                                 tp->t_flags |= TF_MORETOCOME;
 1106                         error = tp->t_fb->tfb_tcp_output(tp);
 1107                         if (flags & PRUS_MORETOCOME)
 1108                                 tp->t_flags &= ~TF_MORETOCOME;
 1109                 }
 1110         } else {
 1111                 /*
 1112                  * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
 1113                  */
 1114                 SOCKBUF_LOCK(&so->so_snd);
 1115                 if (sbspace(&so->so_snd) < -512) {
 1116                         SOCKBUF_UNLOCK(&so->so_snd);
 1117                         m_freem(m);
 1118                         error = ENOBUFS;
 1119                         goto out;
 1120                 }
 1121                 /*
 1122                  * According to RFC961 (Assigned Protocols),
 1123                  * the urgent pointer points to the last octet
 1124                  * of urgent data.  We continue, however,
 1125                  * to consider it to indicate the first octet
 1126                  * of data past the urgent section.
 1127                  * Otherwise, snd_up should be one lower.
 1128                  */
 1129                 sbappendstream_locked(&so->so_snd, m, flags);
 1130                 SOCKBUF_UNLOCK(&so->so_snd);
 1131                 if (nam && tp->t_state < TCPS_SYN_SENT) {
 1132                         /*
 1133                          * Do implied connect if not yet connected,
 1134                          * initialize window to default value, and
 1135                          * initialize maxseg using peer's cached MSS.
 1136                          */
 1137 
 1138                         /*
 1139                          * Not going to contemplate SYN|URG
 1140                          */
 1141                         if (IS_FASTOPEN(tp->t_flags))
 1142                                 tp->t_flags &= ~TF_FASTOPEN;
 1143 #ifdef INET6
 1144                         if (isipv6)
 1145                                 error = tcp6_connect(tp, nam, td);
 1146 #endif /* INET6 */
 1147 #if defined(INET6) && defined(INET)
 1148                         else
 1149 #endif
 1150 #ifdef INET
 1151                                 error = tcp_connect(tp,
 1152                                     (struct sockaddr *)sinp, td);
 1153 #endif
 1154                         if (error)
 1155                                 goto out;
 1156                         tp->snd_wnd = TTCP_CLIENT_SND_WND;
 1157                         tcp_mss(tp, -1);
 1158                 }
 1159                 tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
 1160                 if (!(flags & PRUS_NOTREADY)) {
 1161                         tp->t_flags |= TF_FORCEDATA;
 1162                         error = tp->t_fb->tfb_tcp_output(tp);
 1163                         tp->t_flags &= ~TF_FORCEDATA;
 1164                 }
 1165         }
 1166         TCP_LOG_EVENT(tp, NULL,
 1167             &inp->inp_socket->so_rcv,
 1168             &inp->inp_socket->so_snd,
 1169             TCP_LOG_USERSEND, error,
 1170             0, NULL, false);
 1171 out:
 1172         TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
 1173                   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
 1174         TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
 1175                    ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
 1176         INP_WUNLOCK(inp);
 1177         if (flags & PRUS_EOF)
 1178                 INP_INFO_RUNLOCK_ET(&V_tcbinfo, net_et);
 1179         return (error);
 1180 }
 1181 
 1182 static int
 1183 tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
 1184 {
 1185         struct inpcb *inp;
 1186         struct tcpcb *tp;
 1187         int error;
 1188 
 1189         inp = sotoinpcb(so);
 1190         INP_WLOCK(inp);
 1191         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 1192                 INP_WUNLOCK(inp);
 1193                 for (int i = 0; i < count; i++)
 1194                         m = m_free(m);
 1195                 return (ECONNRESET);
 1196         }
 1197         tp = intotcpcb(inp);
 1198 
 1199         SOCKBUF_LOCK(&so->so_snd);
 1200         error = sbready(&so->so_snd, m, count);
 1201         SOCKBUF_UNLOCK(&so->so_snd);
 1202         if (error == 0)
 1203                 error = tp->t_fb->tfb_tcp_output(tp);
 1204         INP_WUNLOCK(inp);
 1205 
 1206         return (error);
 1207 }
 1208 
 1209 /*
 1210  * Abort the TCP.  Drop the connection abruptly.
 1211  */
 1212 static void
 1213 tcp_usr_abort(struct socket *so)
 1214 {
 1215         struct inpcb *inp;
 1216         struct tcpcb *tp = NULL;
 1217         struct epoch_tracker et;
 1218         TCPDEBUG0;
 1219 
 1220         inp = sotoinpcb(so);
 1221         KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
 1222 
 1223         INP_INFO_RLOCK_ET(&V_tcbinfo, et);
 1224         INP_WLOCK(inp);
 1225         KASSERT(inp->inp_socket != NULL,
 1226             ("tcp_usr_abort: inp_socket == NULL"));
 1227 
 1228         /*
 1229          * If we still have full TCP state, and we're not dropped, drop.
 1230          */
 1231         if (!(inp->inp_flags & INP_TIMEWAIT) &&
 1232             !(inp->inp_flags & INP_DROPPED)) {
 1233                 tp = intotcpcb(inp);
 1234                 TCPDEBUG1();
 1235                 tp = tcp_drop(tp, ECONNABORTED);
 1236                 if (tp == NULL)
 1237                         goto dropped;
 1238                 TCPDEBUG2(PRU_ABORT);
 1239                 TCP_PROBE2(debug__user, tp, PRU_ABORT);
 1240         }
 1241         if (!(inp->inp_flags & INP_DROPPED)) {
 1242                 SOCK_LOCK(so);
 1243                 so->so_state |= SS_PROTOREF;
 1244                 SOCK_UNLOCK(so);
 1245                 inp->inp_flags |= INP_SOCKREF;
 1246         }
 1247         INP_WUNLOCK(inp);
 1248 dropped:
 1249         INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
 1250 }
 1251 
 1252 /*
 1253  * TCP socket is closed.  Start friendly disconnect.
 1254  */
 1255 static void
 1256 tcp_usr_close(struct socket *so)
 1257 {
 1258         struct inpcb *inp;
 1259         struct tcpcb *tp = NULL;
 1260         struct epoch_tracker et;
 1261         TCPDEBUG0;
 1262 
 1263         inp = sotoinpcb(so);
 1264         KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
 1265 
 1266         INP_INFO_RLOCK_ET(&V_tcbinfo, et);
 1267         INP_WLOCK(inp);
 1268         KASSERT(inp->inp_socket != NULL,
 1269             ("tcp_usr_close: inp_socket == NULL"));
 1270 
 1271         /*
 1272          * If we still have full TCP state, and we're not dropped, initiate
 1273          * a disconnect.
 1274          */
 1275         if (!(inp->inp_flags & INP_TIMEWAIT) &&
 1276             !(inp->inp_flags & INP_DROPPED)) {
 1277                 tp = intotcpcb(inp);
 1278                 TCPDEBUG1();
 1279                 tcp_disconnect(tp);
 1280                 TCPDEBUG2(PRU_CLOSE);
 1281                 TCP_PROBE2(debug__user, tp, PRU_CLOSE);
 1282         }
 1283         if (!(inp->inp_flags & INP_DROPPED)) {
 1284                 SOCK_LOCK(so);
 1285                 so->so_state |= SS_PROTOREF;
 1286                 SOCK_UNLOCK(so);
 1287                 inp->inp_flags |= INP_SOCKREF;
 1288         }
 1289         INP_WUNLOCK(inp);
 1290         INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
 1291 }
 1292 
 1293 /*
 1294  * Receive out-of-band data.
 1295  */
 1296 static int
 1297 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
 1298 {
 1299         int error = 0;
 1300         struct inpcb *inp;
 1301         struct tcpcb *tp = NULL;
 1302 
 1303         TCPDEBUG0;
 1304         inp = sotoinpcb(so);
 1305         KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
 1306         INP_WLOCK(inp);
 1307         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 1308                 error = ECONNRESET;
 1309                 goto out;
 1310         }
 1311         tp = intotcpcb(inp);
 1312         TCPDEBUG1();
 1313         if ((so->so_oobmark == 0 &&
 1314              (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
 1315             so->so_options & SO_OOBINLINE ||
 1316             tp->t_oobflags & TCPOOB_HADDATA) {
 1317                 error = EINVAL;
 1318                 goto out;
 1319         }
 1320         if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
 1321                 error = EWOULDBLOCK;
 1322                 goto out;
 1323         }
 1324         m->m_len = 1;
 1325         *mtod(m, caddr_t) = tp->t_iobc;
 1326         if ((flags & MSG_PEEK) == 0)
 1327                 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 1328 
 1329 out:
 1330         TCPDEBUG2(PRU_RCVOOB);
 1331         TCP_PROBE2(debug__user, tp, PRU_RCVOOB);
 1332         INP_WUNLOCK(inp);
 1333         return (error);
 1334 }
 1335 
 1336 #ifdef INET
 1337 struct pr_usrreqs tcp_usrreqs = {
 1338         .pru_abort =            tcp_usr_abort,
 1339         .pru_accept =           tcp_usr_accept,
 1340         .pru_attach =           tcp_usr_attach,
 1341         .pru_bind =             tcp_usr_bind,
 1342         .pru_connect =          tcp_usr_connect,
 1343         .pru_control =          in_control,
 1344         .pru_detach =           tcp_usr_detach,
 1345         .pru_disconnect =       tcp_usr_disconnect,
 1346         .pru_listen =           tcp_usr_listen,
 1347         .pru_peeraddr =         in_getpeeraddr,
 1348         .pru_rcvd =             tcp_usr_rcvd,
 1349         .pru_rcvoob =           tcp_usr_rcvoob,
 1350         .pru_send =             tcp_usr_send,
 1351         .pru_ready =            tcp_usr_ready,
 1352         .pru_shutdown =         tcp_usr_shutdown,
 1353         .pru_sockaddr =         in_getsockaddr,
 1354         .pru_sosetlabel =       in_pcbsosetlabel,
 1355         .pru_close =            tcp_usr_close,
 1356 };
 1357 #endif /* INET */
 1358 
 1359 #ifdef INET6
 1360 struct pr_usrreqs tcp6_usrreqs = {
 1361         .pru_abort =            tcp_usr_abort,
 1362         .pru_accept =           tcp6_usr_accept,
 1363         .pru_attach =           tcp_usr_attach,
 1364         .pru_bind =             tcp6_usr_bind,
 1365         .pru_connect =          tcp6_usr_connect,
 1366         .pru_control =          in6_control,
 1367         .pru_detach =           tcp_usr_detach,
 1368         .pru_disconnect =       tcp_usr_disconnect,
 1369         .pru_listen =           tcp6_usr_listen,
 1370         .pru_peeraddr =         in6_mapped_peeraddr,
 1371         .pru_rcvd =             tcp_usr_rcvd,
 1372         .pru_rcvoob =           tcp_usr_rcvoob,
 1373         .pru_send =             tcp_usr_send,
 1374         .pru_ready =            tcp_usr_ready,
 1375         .pru_shutdown =         tcp_usr_shutdown,
 1376         .pru_sockaddr =         in6_mapped_sockaddr,
 1377         .pru_sosetlabel =       in_pcbsosetlabel,
 1378         .pru_close =            tcp_usr_close,
 1379 };
 1380 #endif /* INET6 */
 1381 
 1382 #ifdef INET
 1383 /*
 1384  * Common subroutine to open a TCP connection to remote host specified
 1385  * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
 1386  * port number if needed.  Call in_pcbconnect_setup to do the routing and
 1387  * to choose a local host address (interface).  If there is an existing
 1388  * incarnation of the same connection in TIME-WAIT state and if the remote
 1389  * host was sending CC options and if the connection duration was < MSL, then
 1390  * truncate the previous TIME-WAIT state and proceed.
 1391  * Initialize connection parameters and enter SYN-SENT state.
 1392  */
 1393 static int
 1394 tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
 1395 {
 1396         struct inpcb *inp = tp->t_inpcb, *oinp;
 1397         struct socket *so = inp->inp_socket;
 1398         struct in_addr laddr;
 1399         u_short lport;
 1400         int error;
 1401 
 1402         INP_WLOCK_ASSERT(inp);
 1403         INP_HASH_WLOCK(&V_tcbinfo);
 1404 
 1405         if (inp->inp_lport == 0) {
 1406                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 1407                 if (error)
 1408                         goto out;
 1409         }
 1410 
 1411         /*
 1412          * Cannot simply call in_pcbconnect, because there might be an
 1413          * earlier incarnation of this same connection still in
 1414          * TIME_WAIT state, creating an ADDRINUSE error.
 1415          */
 1416         laddr = inp->inp_laddr;
 1417         lport = inp->inp_lport;
 1418         error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
 1419             &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
 1420         if (error && oinp == NULL)
 1421                 goto out;
 1422         if (oinp) {
 1423                 error = EADDRINUSE;
 1424                 goto out;
 1425         }
 1426         inp->inp_laddr = laddr;
 1427         in_pcbrehash(inp);
 1428         INP_HASH_WUNLOCK(&V_tcbinfo);
 1429 
 1430         /*
 1431          * Compute window scaling to request:
 1432          * Scale to fit into sweet spot.  See tcp_syncache.c.
 1433          * XXX: This should move to tcp_output().
 1434          */
 1435         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
 1436             (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 1437                 tp->request_r_scale++;
 1438 
 1439         soisconnecting(so);
 1440         TCPSTAT_INC(tcps_connattempt);
 1441         tcp_state_change(tp, TCPS_SYN_SENT);
 1442         tp->iss = tcp_new_isn(&inp->inp_inc);
 1443         if (tp->t_flags & TF_REQ_TSTMP)
 1444                 tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
 1445         tcp_sendseqinit(tp);
 1446 
 1447         return 0;
 1448 
 1449 out:
 1450         INP_HASH_WUNLOCK(&V_tcbinfo);
 1451         return (error);
 1452 }
 1453 #endif /* INET */
 1454 
 1455 #ifdef INET6
 1456 static int
 1457 tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
 1458 {
 1459         struct inpcb *inp = tp->t_inpcb;
 1460         int error;
 1461 
 1462         INP_WLOCK_ASSERT(inp);
 1463         INP_HASH_WLOCK(&V_tcbinfo);
 1464 
 1465         if (inp->inp_lport == 0) {
 1466                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 1467                 if (error)
 1468                         goto out;
 1469         }
 1470         error = in6_pcbconnect(inp, nam, td->td_ucred);
 1471         if (error != 0)
 1472                 goto out;
 1473         INP_HASH_WUNLOCK(&V_tcbinfo);
 1474 
 1475         /* Compute window scaling to request.  */
 1476         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
 1477             (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 1478                 tp->request_r_scale++;
 1479 
 1480         soisconnecting(inp->inp_socket);
 1481         TCPSTAT_INC(tcps_connattempt);
 1482         tcp_state_change(tp, TCPS_SYN_SENT);
 1483         tp->iss = tcp_new_isn(&inp->inp_inc);
 1484         if (tp->t_flags & TF_REQ_TSTMP)
 1485                 tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
 1486         tcp_sendseqinit(tp);
 1487 
 1488         return 0;
 1489 
 1490 out:
 1491         INP_HASH_WUNLOCK(&V_tcbinfo);
 1492         return error;
 1493 }
 1494 #endif /* INET6 */
 1495 
 1496 /*
 1497  * Export TCP internal state information via a struct tcp_info, based on the
 1498  * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
 1499  * (TCP state machine, etc).  We export all information using FreeBSD-native
 1500  * constants -- for example, the numeric values for tcpi_state will differ
 1501  * from Linux.
 1502  */
 1503 static void
 1504 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
 1505 {
 1506 
 1507         INP_WLOCK_ASSERT(tp->t_inpcb);
 1508         bzero(ti, sizeof(*ti));
 1509 
 1510         ti->tcpi_state = tp->t_state;
 1511         if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
 1512                 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
 1513         if (tp->t_flags & TF_SACK_PERMIT)
 1514                 ti->tcpi_options |= TCPI_OPT_SACK;
 1515         if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
 1516                 ti->tcpi_options |= TCPI_OPT_WSCALE;
 1517                 ti->tcpi_snd_wscale = tp->snd_scale;
 1518                 ti->tcpi_rcv_wscale = tp->rcv_scale;
 1519         }
 1520         if (tp->t_flags & TF_ECN_PERMIT)
 1521                 ti->tcpi_options |= TCPI_OPT_ECN;
 1522 
 1523         ti->tcpi_rto = tp->t_rxtcur * tick;
 1524         ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick;
 1525         ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
 1526         ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
 1527 
 1528         ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
 1529         ti->tcpi_snd_cwnd = tp->snd_cwnd;
 1530 
 1531         /*
 1532          * FreeBSD-specific extension fields for tcp_info.
 1533          */
 1534         ti->tcpi_rcv_space = tp->rcv_wnd;
 1535         ti->tcpi_rcv_nxt = tp->rcv_nxt;
 1536         ti->tcpi_snd_wnd = tp->snd_wnd;
 1537         ti->tcpi_snd_bwnd = 0;          /* Unused, kept for compat. */
 1538         ti->tcpi_snd_nxt = tp->snd_nxt;
 1539         ti->tcpi_snd_mss = tp->t_maxseg;
 1540         ti->tcpi_rcv_mss = tp->t_maxseg;
 1541         ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
 1542         ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
 1543         ti->tcpi_snd_zerowin = tp->t_sndzerowin;
 1544 #ifdef TCP_OFFLOAD
 1545         if (tp->t_flags & TF_TOE) {
 1546                 ti->tcpi_options |= TCPI_OPT_TOE;
 1547                 tcp_offload_tcp_info(tp, ti);
 1548         }
 1549 #endif
 1550 }
 1551 
 1552 /*
 1553  * tcp_ctloutput() must drop the inpcb lock before performing copyin on
 1554  * socket option arguments.  When it re-acquires the lock after the copy, it
 1555  * has to revalidate that the connection is still valid for the socket
 1556  * option.
 1557  */
 1558 #define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do {                    \
 1559         INP_WLOCK(inp);                                                 \
 1560         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {            \
 1561                 INP_WUNLOCK(inp);                                       \
 1562                 cleanup;                                                \
 1563                 return (ECONNRESET);                                    \
 1564         }                                                               \
 1565         tp = intotcpcb(inp);                                            \
 1566 } while(0)
 1567 #define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */)
 1568 
 1569 int
 1570 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
 1571 {
 1572         int     error;
 1573         struct  inpcb *inp;
 1574         struct  tcpcb *tp;
 1575         struct tcp_function_block *blk;
 1576         struct tcp_function_set fsn;
 1577 
 1578         error = 0;
 1579         inp = sotoinpcb(so);
 1580         KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
 1581         INP_WLOCK(inp);
 1582         if (sopt->sopt_level != IPPROTO_TCP) {
 1583 #ifdef INET6
 1584                 if (inp->inp_vflag & INP_IPV6PROTO) {
 1585                         INP_WUNLOCK(inp);
 1586                         error = ip6_ctloutput(so, sopt);
 1587                         /*
 1588                          * In case of the IPV6_USE_MIN_MTU socket option,
 1589                          * the INC_IPV6MINMTU flag to announce a corresponding
 1590                          * MSS during the initial handshake.
 1591                          * If the TCP connection is not in the front states,
 1592                          * just reduce the MSS being used.
 1593                          * This avoids the sending of TCP segments which will
 1594                          * be fragmented at the IPv6 layer.
 1595                          */
 1596                         if ((error == 0) &&
 1597                             (sopt->sopt_dir == SOPT_SET) &&
 1598                             (sopt->sopt_level == IPPROTO_IPV6) &&
 1599                             (sopt->sopt_name == IPV6_USE_MIN_MTU)) {
 1600                                 INP_WLOCK(inp);
 1601                                 if ((inp->inp_flags &
 1602                                     (INP_TIMEWAIT | INP_DROPPED))) {
 1603                                         INP_WUNLOCK(inp);
 1604                                         return (ECONNRESET);
 1605                                 }
 1606                                 inp->inp_inc.inc_flags |= INC_IPV6MINMTU;
 1607                                 tp = intotcpcb(inp);
 1608                                 if ((tp->t_state >= TCPS_SYN_SENT) &&
 1609                                     (inp->inp_inc.inc_flags & INC_ISIPV6)) {
 1610                                         struct ip6_pktopts *opt;
 1611 
 1612                                         opt = inp->in6p_outputopts;
 1613                                         if ((opt != NULL) &&
 1614                                             (opt->ip6po_minmtu ==
 1615                                             IP6PO_MINMTU_ALL)) {
 1616                                                 if (tp->t_maxseg > TCP6_MSS) {
 1617                                                         tp->t_maxseg = TCP6_MSS;
 1618                                                 }
 1619                                         }
 1620                                 }
 1621                                 INP_WUNLOCK(inp);
 1622                         }
 1623                 }
 1624 #endif /* INET6 */
 1625 #if defined(INET6) && defined(INET)
 1626                 else
 1627 #endif
 1628 #ifdef INET
 1629                 {
 1630                         INP_WUNLOCK(inp);
 1631                         error = ip_ctloutput(so, sopt);
 1632                 }
 1633 #endif
 1634                 return (error);
 1635         }
 1636         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 1637                 INP_WUNLOCK(inp);
 1638                 return (ECONNRESET);
 1639         }
 1640         tp = intotcpcb(inp);
 1641         /*
 1642          * Protect the TCP option TCP_FUNCTION_BLK so
 1643          * that a sub-function can *never* overwrite this.
 1644          */
 1645         if ((sopt->sopt_dir == SOPT_SET) && 
 1646             (sopt->sopt_name == TCP_FUNCTION_BLK)) {
 1647                 INP_WUNLOCK(inp);
 1648                 error = sooptcopyin(sopt, &fsn, sizeof fsn,
 1649                     sizeof fsn);
 1650                 if (error)
 1651                         return (error);
 1652                 INP_WLOCK_RECHECK(inp);
 1653                 blk = find_and_ref_tcp_functions(&fsn);
 1654                 if (blk == NULL) {
 1655                         INP_WUNLOCK(inp);
 1656                         return (ENOENT);
 1657                 }
 1658                 if (tp->t_fb == blk) {
 1659                         /* You already have this */
 1660                         refcount_release(&blk->tfb_refcnt);
 1661                         INP_WUNLOCK(inp);
 1662                         return (0);
 1663                 }
 1664                 if (tp->t_state != TCPS_CLOSED) {
 1665                         /* 
 1666                          * The user has advanced the state
 1667                          * past the initial point, we may not
 1668                          * be able to switch. 
 1669                          */
 1670                         if (blk->tfb_tcp_handoff_ok != NULL) {
 1671                                 /* 
 1672                                  * Does the stack provide a
 1673                                  * query mechanism, if so it may
 1674                                  * still be possible?
 1675                                  */
 1676                                 error = (*blk->tfb_tcp_handoff_ok)(tp);
 1677                         } else
 1678                                 error = EINVAL;
 1679                         if (error) {
 1680                                 refcount_release(&blk->tfb_refcnt);
 1681                                 INP_WUNLOCK(inp);
 1682                                 return(error);
 1683                         }
 1684                 }
 1685                 if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
 1686                         refcount_release(&blk->tfb_refcnt);
 1687                         INP_WUNLOCK(inp);
 1688                         return (ENOENT);
 1689                 }
 1690                 /* 
 1691                  * Release the old refcnt, the
 1692                  * lookup acquired a ref on the
 1693                  * new one already.
 1694                  */
 1695                 if (tp->t_fb->tfb_tcp_fb_fini) {
 1696                         /* 
 1697                          * Tell the stack to cleanup with 0 i.e.
 1698                          * the tcb is not going away.
 1699                          */
 1700                         (*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
 1701                 }
 1702 #ifdef TCPHPTS 
 1703                 /* Assure that we are not on any hpts */
 1704                 tcp_hpts_remove(tp->t_inpcb, HPTS_REMOVE_ALL);
 1705 #endif
 1706                 if (blk->tfb_tcp_fb_init) {
 1707                         error = (*blk->tfb_tcp_fb_init)(tp);
 1708                         if (error) {
 1709                                 refcount_release(&blk->tfb_refcnt);
 1710                                 if (tp->t_fb->tfb_tcp_fb_init) {
 1711                                         if((*tp->t_fb->tfb_tcp_fb_init)(tp) != 0)  {
 1712                                                 /* Fall back failed, drop the connection */
 1713                                                 INP_WUNLOCK(inp);
 1714                                                 soabort(so);
 1715                                                 return(error);
 1716                                         }
 1717                                 }
 1718                                 goto err_out;
 1719                         }
 1720                 }
 1721                 refcount_release(&tp->t_fb->tfb_refcnt);
 1722                 tp->t_fb = blk;
 1723 #ifdef TCP_OFFLOAD
 1724                 if (tp->t_flags & TF_TOE) {
 1725                         tcp_offload_ctloutput(tp, sopt->sopt_dir,
 1726                              sopt->sopt_name);
 1727                 }
 1728 #endif
 1729 err_out:
 1730                 INP_WUNLOCK(inp);
 1731                 return (error);
 1732         } else if ((sopt->sopt_dir == SOPT_GET) && 
 1733             (sopt->sopt_name == TCP_FUNCTION_BLK)) {
 1734                 strncpy(fsn.function_set_name, tp->t_fb->tfb_tcp_block_name,
 1735                     TCP_FUNCTION_NAME_LEN_MAX);
 1736                 fsn.function_set_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
 1737                 fsn.pcbcnt = tp->t_fb->tfb_refcnt;
 1738                 INP_WUNLOCK(inp);
 1739                 error = sooptcopyout(sopt, &fsn, sizeof fsn);
 1740                 return (error);
 1741         }
 1742         /* Pass in the INP locked, called must unlock it */
 1743         return (tp->t_fb->tfb_tcp_ctloutput(so, sopt, inp, tp));
 1744 }
 1745 
 1746 /*
 1747  * If this assert becomes untrue, we need to change the size of the buf
 1748  * variable in tcp_default_ctloutput().
 1749  */
 1750 #ifdef CTASSERT
 1751 CTASSERT(TCP_CA_NAME_MAX <= TCP_LOG_ID_LEN);
 1752 CTASSERT(TCP_LOG_REASON_LEN <= TCP_LOG_ID_LEN);
 1753 #endif
 1754 
 1755 int
 1756 tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
 1757 {
 1758         int     error, opt, optval;
 1759         u_int   ui;
 1760         struct  tcp_info ti;
 1761         struct cc_algo *algo;
 1762         char    *pbuf, buf[TCP_LOG_ID_LEN];
 1763         size_t  len;
 1764 
 1765         /*
 1766          * For TCP_CCALGOOPT forward the control to CC module, for both
 1767          * SOPT_SET and SOPT_GET.
 1768          */
 1769         switch (sopt->sopt_name) {
 1770         case TCP_CCALGOOPT:
 1771                 INP_WUNLOCK(inp);
 1772                 pbuf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
 1773                 error = sooptcopyin(sopt, pbuf, sopt->sopt_valsize,
 1774                     sopt->sopt_valsize);
 1775                 if (error) {
 1776                         free(pbuf, M_TEMP);
 1777                         return (error);
 1778                 }
 1779                 INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP));
 1780                 if (CC_ALGO(tp)->ctl_output != NULL)
 1781                         error = CC_ALGO(tp)->ctl_output(tp->ccv, sopt, pbuf);
 1782                 else
 1783                         error = ENOENT;
 1784                 INP_WUNLOCK(inp);
 1785                 if (error == 0 && sopt->sopt_dir == SOPT_GET)
 1786                         error = sooptcopyout(sopt, pbuf, sopt->sopt_valsize);
 1787                 free(pbuf, M_TEMP);
 1788                 return (error);
 1789         }
 1790 
 1791         switch (sopt->sopt_dir) {
 1792         case SOPT_SET:
 1793                 switch (sopt->sopt_name) {
 1794 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 1795                 case TCP_MD5SIG:
 1796                         if (!TCPMD5_ENABLED()) {
 1797                                 INP_WUNLOCK(inp);
 1798                                 return (ENOPROTOOPT);
 1799                         }
 1800                         error = TCPMD5_PCBCTL(inp, sopt);
 1801                         if (error)
 1802                                 return (error);
 1803                         goto unlock_and_done;
 1804 #endif /* IPSEC */
 1805 
 1806                 case TCP_NODELAY:
 1807                 case TCP_NOOPT:
 1808                         INP_WUNLOCK(inp);
 1809                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1810                             sizeof optval);
 1811                         if (error)
 1812                                 return (error);
 1813 
 1814                         INP_WLOCK_RECHECK(inp);
 1815                         switch (sopt->sopt_name) {
 1816                         case TCP_NODELAY:
 1817                                 opt = TF_NODELAY;
 1818                                 break;
 1819                         case TCP_NOOPT:
 1820                                 opt = TF_NOOPT;
 1821                                 break;
 1822                         default:
 1823                                 opt = 0; /* dead code to fool gcc */
 1824                                 break;
 1825                         }
 1826 
 1827                         if (optval)
 1828                                 tp->t_flags |= opt;
 1829                         else
 1830                                 tp->t_flags &= ~opt;
 1831 unlock_and_done:
 1832 #ifdef TCP_OFFLOAD
 1833                         if (tp->t_flags & TF_TOE) {
 1834                                 tcp_offload_ctloutput(tp, sopt->sopt_dir,
 1835                                     sopt->sopt_name);
 1836                         }
 1837 #endif
 1838                         INP_WUNLOCK(inp);
 1839                         break;
 1840 
 1841                 case TCP_NOPUSH:
 1842                         INP_WUNLOCK(inp);
 1843                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1844                             sizeof optval);
 1845                         if (error)
 1846                                 return (error);
 1847 
 1848                         INP_WLOCK_RECHECK(inp);
 1849                         if (optval)
 1850                                 tp->t_flags |= TF_NOPUSH;
 1851                         else if (tp->t_flags & TF_NOPUSH) {
 1852                                 tp->t_flags &= ~TF_NOPUSH;
 1853                                 if (TCPS_HAVEESTABLISHED(tp->t_state))
 1854                                         error = tp->t_fb->tfb_tcp_output(tp);
 1855                         }
 1856                         goto unlock_and_done;
 1857 
 1858                 case TCP_MAXSEG:
 1859                         INP_WUNLOCK(inp);
 1860                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1861                             sizeof optval);
 1862                         if (error)
 1863                                 return (error);
 1864 
 1865                         INP_WLOCK_RECHECK(inp);
 1866                         if (optval > 0 && optval <= tp->t_maxseg &&
 1867                             optval + 40 >= V_tcp_minmss)
 1868                                 tp->t_maxseg = optval;
 1869                         else
 1870                                 error = EINVAL;
 1871                         goto unlock_and_done;
 1872 
 1873                 case TCP_INFO:
 1874                         INP_WUNLOCK(inp);
 1875                         error = EINVAL;
 1876                         break;
 1877 
 1878                 case TCP_CONGESTION:
 1879                         INP_WUNLOCK(inp);
 1880                         error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
 1881                         if (error)
 1882                                 break;
 1883                         buf[sopt->sopt_valsize] = '\0';
 1884                         INP_WLOCK_RECHECK(inp);
 1885                         CC_LIST_RLOCK();
 1886                         STAILQ_FOREACH(algo, &cc_list, entries)
 1887                                 if (strncmp(buf, algo->name,
 1888                                     TCP_CA_NAME_MAX) == 0)
 1889                                         break;
 1890                         CC_LIST_RUNLOCK();
 1891                         if (algo == NULL) {
 1892                                 INP_WUNLOCK(inp);
 1893                                 error = EINVAL;
 1894                                 break;
 1895                         }
 1896                         /*
 1897                          * We hold a write lock over the tcb so it's safe to
 1898                          * do these things without ordering concerns.
 1899                          */
 1900                         if (CC_ALGO(tp)->cb_destroy != NULL)
 1901                                 CC_ALGO(tp)->cb_destroy(tp->ccv);
 1902                         CC_DATA(tp) = NULL;
 1903                         CC_ALGO(tp) = algo;
 1904                         /*
 1905                          * If something goes pear shaped initialising the new
 1906                          * algo, fall back to newreno (which does not
 1907                          * require initialisation).
 1908                          */
 1909                         if (algo->cb_init != NULL &&
 1910                             algo->cb_init(tp->ccv) != 0) {
 1911                                 CC_ALGO(tp) = &newreno_cc_algo;
 1912                                 /*
 1913                                  * The only reason init should fail is
 1914                                  * because of malloc.
 1915                                  */
 1916                                 error = ENOMEM;
 1917                         }
 1918                         INP_WUNLOCK(inp);
 1919                         break;
 1920 
 1921                 case TCP_KEEPIDLE:
 1922                 case TCP_KEEPINTVL:
 1923                 case TCP_KEEPINIT:
 1924                         INP_WUNLOCK(inp);
 1925                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 1926                         if (error)
 1927                                 return (error);
 1928 
 1929                         if (ui > (UINT_MAX / hz)) {
 1930                                 error = EINVAL;
 1931                                 break;
 1932                         }
 1933                         ui *= hz;
 1934 
 1935                         INP_WLOCK_RECHECK(inp);
 1936                         switch (sopt->sopt_name) {
 1937                         case TCP_KEEPIDLE:
 1938                                 tp->t_keepidle = ui;
 1939                                 /*
 1940                                  * XXX: better check current remaining
 1941                                  * timeout and "merge" it with new value.
 1942                                  */
 1943                                 if ((tp->t_state > TCPS_LISTEN) &&
 1944                                     (tp->t_state <= TCPS_CLOSING))
 1945                                         tcp_timer_activate(tp, TT_KEEP,
 1946                                             TP_KEEPIDLE(tp));
 1947                                 break;
 1948                         case TCP_KEEPINTVL:
 1949                                 tp->t_keepintvl = ui;
 1950                                 if ((tp->t_state == TCPS_FIN_WAIT_2) &&
 1951                                     (TP_MAXIDLE(tp) > 0))
 1952                                         tcp_timer_activate(tp, TT_2MSL,
 1953                                             TP_MAXIDLE(tp));
 1954                                 break;
 1955                         case TCP_KEEPINIT:
 1956                                 tp->t_keepinit = ui;
 1957                                 if (tp->t_state == TCPS_SYN_RECEIVED ||
 1958                                     tp->t_state == TCPS_SYN_SENT)
 1959                                         tcp_timer_activate(tp, TT_KEEP,
 1960                                             TP_KEEPINIT(tp));
 1961                                 break;
 1962                         }
 1963                         goto unlock_and_done;
 1964 
 1965                 case TCP_KEEPCNT:
 1966                         INP_WUNLOCK(inp);
 1967                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 1968                         if (error)
 1969                                 return (error);
 1970 
 1971                         INP_WLOCK_RECHECK(inp);
 1972                         tp->t_keepcnt = ui;
 1973                         if ((tp->t_state == TCPS_FIN_WAIT_2) &&
 1974                             (TP_MAXIDLE(tp) > 0))
 1975                                 tcp_timer_activate(tp, TT_2MSL,
 1976                                     TP_MAXIDLE(tp));
 1977                         goto unlock_and_done;
 1978 
 1979 #ifdef TCPPCAP
 1980                 case TCP_PCAP_OUT:
 1981                 case TCP_PCAP_IN:
 1982                         INP_WUNLOCK(inp);
 1983                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1984                             sizeof optval);
 1985                         if (error)
 1986                                 return (error);
 1987 
 1988                         INP_WLOCK_RECHECK(inp);
 1989                         if (optval >= 0)
 1990                                 tcp_pcap_set_sock_max(TCP_PCAP_OUT ?
 1991                                         &(tp->t_outpkts) : &(tp->t_inpkts),
 1992                                         optval);
 1993                         else
 1994                                 error = EINVAL;
 1995                         goto unlock_and_done;
 1996 #endif
 1997 
 1998                 case TCP_FASTOPEN: {
 1999                         struct tcp_fastopen tfo_optval;
 2000 
 2001                         INP_WUNLOCK(inp);
 2002                         if (!V_tcp_fastopen_client_enable &&
 2003                             !V_tcp_fastopen_server_enable)
 2004                                 return (EPERM);
 2005 
 2006                         error = sooptcopyin(sopt, &tfo_optval,
 2007                                     sizeof(tfo_optval), sizeof(int));
 2008                         if (error)
 2009                                 return (error);
 2010 
 2011                         INP_WLOCK_RECHECK(inp);
 2012                         if (tfo_optval.enable) {
 2013                                 if (tp->t_state == TCPS_LISTEN) {
 2014                                         if (!V_tcp_fastopen_server_enable) {
 2015                                                 error = EPERM;
 2016                                                 goto unlock_and_done;
 2017                                         }
 2018 
 2019                                         tp->t_flags |= TF_FASTOPEN;
 2020                                         if (tp->t_tfo_pending == NULL)
 2021                                                 tp->t_tfo_pending =
 2022                                                     tcp_fastopen_alloc_counter();
 2023                                 } else {
 2024                                         /*
 2025                                          * If a pre-shared key was provided,
 2026                                          * stash it in the client cookie
 2027                                          * field of the tcpcb for use during
 2028                                          * connect.
 2029                                          */
 2030                                         if (sopt->sopt_valsize ==
 2031                                             sizeof(tfo_optval)) {
 2032                                                 memcpy(tp->t_tfo_cookie.client,
 2033                                                        tfo_optval.psk,
 2034                                                        TCP_FASTOPEN_PSK_LEN);
 2035                                                 tp->t_tfo_client_cookie_len =
 2036                                                     TCP_FASTOPEN_PSK_LEN;
 2037                                         }
 2038                                         tp->t_flags |= TF_FASTOPEN;
 2039                                 }
 2040                         } else
 2041                                 tp->t_flags &= ~TF_FASTOPEN;
 2042                         goto unlock_and_done;
 2043                 }
 2044 
 2045 #ifdef TCP_BLACKBOX
 2046                 case TCP_LOG:
 2047                         INP_WUNLOCK(inp);
 2048                         error = sooptcopyin(sopt, &optval, sizeof optval,
 2049                             sizeof optval);
 2050                         if (error)
 2051                                 return (error);
 2052 
 2053                         INP_WLOCK_RECHECK(inp);
 2054                         error = tcp_log_state_change(tp, optval);
 2055                         goto unlock_and_done;
 2056 
 2057                 case TCP_LOGBUF:
 2058                         INP_WUNLOCK(inp);
 2059                         error = EINVAL;
 2060                         break;
 2061 
 2062                 case TCP_LOGID:
 2063                         INP_WUNLOCK(inp);
 2064                         error = sooptcopyin(sopt, buf, TCP_LOG_ID_LEN - 1, 0);
 2065                         if (error)
 2066                                 break;
 2067                         buf[sopt->sopt_valsize] = '\0';
 2068                         INP_WLOCK_RECHECK(inp);
 2069                         error = tcp_log_set_id(tp, buf);
 2070                         /* tcp_log_set_id() unlocks the INP. */
 2071                         break;
 2072 
 2073                 case TCP_LOGDUMP:
 2074                 case TCP_LOGDUMPID:
 2075                         INP_WUNLOCK(inp);
 2076                         error =
 2077                             sooptcopyin(sopt, buf, TCP_LOG_REASON_LEN - 1, 0);
 2078                         if (error)
 2079                                 break;
 2080                         buf[sopt->sopt_valsize] = '\0';
 2081                         INP_WLOCK_RECHECK(inp);
 2082                         if (sopt->sopt_name == TCP_LOGDUMP) {
 2083                                 error = tcp_log_dump_tp_logbuf(tp, buf,
 2084                                     M_WAITOK, true);
 2085                                 INP_WUNLOCK(inp);
 2086                         } else {
 2087                                 tcp_log_dump_tp_bucket_logbufs(tp, buf);
 2088                                 /*
 2089                                  * tcp_log_dump_tp_bucket_logbufs() drops the
 2090                                  * INP lock.
 2091                                  */
 2092                         }
 2093                         break;
 2094 #endif
 2095 
 2096                 default:
 2097                         INP_WUNLOCK(inp);
 2098                         error = ENOPROTOOPT;
 2099                         break;
 2100                 }
 2101                 break;
 2102 
 2103         case SOPT_GET:
 2104                 tp = intotcpcb(inp);
 2105                 switch (sopt->sopt_name) {
 2106 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 2107                 case TCP_MD5SIG:
 2108                         if (!TCPMD5_ENABLED()) {
 2109                                 INP_WUNLOCK(inp);
 2110                                 return (ENOPROTOOPT);
 2111                         }
 2112                         error = TCPMD5_PCBCTL(inp, sopt);
 2113                         break;
 2114 #endif
 2115 
 2116                 case TCP_NODELAY:
 2117                         optval = tp->t_flags & TF_NODELAY;
 2118                         INP_WUNLOCK(inp);
 2119                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2120                         break;
 2121                 case TCP_MAXSEG:
 2122                         optval = tp->t_maxseg;
 2123                         INP_WUNLOCK(inp);
 2124                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2125                         break;
 2126                 case TCP_NOOPT:
 2127                         optval = tp->t_flags & TF_NOOPT;
 2128                         INP_WUNLOCK(inp);
 2129                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2130                         break;
 2131                 case TCP_NOPUSH:
 2132                         optval = tp->t_flags & TF_NOPUSH;
 2133                         INP_WUNLOCK(inp);
 2134                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2135                         break;
 2136                 case TCP_INFO:
 2137                         tcp_fill_info(tp, &ti);
 2138                         INP_WUNLOCK(inp);
 2139                         error = sooptcopyout(sopt, &ti, sizeof ti);
 2140                         break;
 2141                 case TCP_CONGESTION:
 2142                         len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
 2143                         INP_WUNLOCK(inp);
 2144                         error = sooptcopyout(sopt, buf, len + 1);
 2145                         break;
 2146                 case TCP_KEEPIDLE:
 2147                 case TCP_KEEPINTVL:
 2148                 case TCP_KEEPINIT:
 2149                 case TCP_KEEPCNT:
 2150                         switch (sopt->sopt_name) {
 2151                         case TCP_KEEPIDLE:
 2152                                 ui = TP_KEEPIDLE(tp) / hz;
 2153                                 break;
 2154                         case TCP_KEEPINTVL:
 2155                                 ui = TP_KEEPINTVL(tp) / hz;
 2156                                 break;
 2157                         case TCP_KEEPINIT:
 2158                                 ui = TP_KEEPINIT(tp) / hz;
 2159                                 break;
 2160                         case TCP_KEEPCNT:
 2161                                 ui = TP_KEEPCNT(tp);
 2162                                 break;
 2163                         }
 2164                         INP_WUNLOCK(inp);
 2165                         error = sooptcopyout(sopt, &ui, sizeof(ui));
 2166                         break;
 2167 #ifdef TCPPCAP
 2168                 case TCP_PCAP_OUT:
 2169                 case TCP_PCAP_IN:
 2170                         optval = tcp_pcap_get_sock_max(TCP_PCAP_OUT ?
 2171                                         &(tp->t_outpkts) : &(tp->t_inpkts));
 2172                         INP_WUNLOCK(inp);
 2173                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2174                         break;
 2175 #endif
 2176                 case TCP_FASTOPEN:
 2177                         optval = tp->t_flags & TF_FASTOPEN;
 2178                         INP_WUNLOCK(inp);
 2179                         error = sooptcopyout(sopt, &optval, sizeof optval);
 2180                         break;
 2181 #ifdef TCP_BLACKBOX
 2182                 case TCP_LOG:
 2183                         optval = tp->t_logstate;
 2184                         INP_WUNLOCK(inp);
 2185                         error = sooptcopyout(sopt, &optval, sizeof(optval));
 2186                         break;
 2187                 case TCP_LOGBUF:
 2188                         /* tcp_log_getlogbuf() does INP_WUNLOCK(inp) */
 2189                         error = tcp_log_getlogbuf(sopt, tp);
 2190                         break;
 2191                 case TCP_LOGID:
 2192                         len = tcp_log_get_id(tp, buf);
 2193                         INP_WUNLOCK(inp);
 2194                         error = sooptcopyout(sopt, buf, len + 1);
 2195                         break;
 2196                 case TCP_LOGDUMP:
 2197                 case TCP_LOGDUMPID:
 2198                         INP_WUNLOCK(inp);
 2199                         error = EINVAL;
 2200                         break;
 2201 #endif
 2202                 default:
 2203                         INP_WUNLOCK(inp);
 2204                         error = ENOPROTOOPT;
 2205                         break;
 2206                 }
 2207                 break;
 2208         }
 2209         return (error);
 2210 }
 2211 #undef INP_WLOCK_RECHECK
 2212 #undef INP_WLOCK_RECHECK_CLEANUP
 2213 
 2214 /*
 2215  * Attach TCP protocol to socket, allocating
 2216  * internet protocol control block, tcp control block,
 2217  * bufer space, and entering LISTEN state if to accept connections.
 2218  */
 2219 static int
 2220 tcp_attach(struct socket *so)
 2221 {
 2222         struct tcpcb *tp;
 2223         struct inpcb *inp;
 2224         struct epoch_tracker et;
 2225         int error;
 2226 
 2227         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 2228                 error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
 2229                 if (error)
 2230                         return (error);
 2231         }
 2232         so->so_rcv.sb_flags |= SB_AUTOSIZE;
 2233         so->so_snd.sb_flags |= SB_AUTOSIZE;
 2234         INP_INFO_RLOCK_ET(&V_tcbinfo, et);
 2235         error = in_pcballoc(so, &V_tcbinfo);
 2236         if (error) {
 2237                 INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
 2238                 return (error);
 2239         }
 2240         inp = sotoinpcb(so);
 2241 #ifdef INET6
 2242         if (inp->inp_vflag & INP_IPV6PROTO) {
 2243                 inp->inp_vflag |= INP_IPV6;
 2244                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
 2245                         inp->inp_vflag |= INP_IPV4;
 2246                 inp->in6p_hops = -1;    /* use kernel default */
 2247         }
 2248         else
 2249 #endif
 2250         inp->inp_vflag |= INP_IPV4;
 2251         tp = tcp_newtcpcb(inp);
 2252         if (tp == NULL) {
 2253                 in_pcbdetach(inp);
 2254                 in_pcbfree(inp);
 2255                 INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
 2256                 return (ENOBUFS);
 2257         }
 2258         tp->t_state = TCPS_CLOSED;
 2259         INP_WUNLOCK(inp);
 2260         INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
 2261         TCPSTATES_INC(TCPS_CLOSED);
 2262         return (0);
 2263 }
 2264 
 2265 /*
 2266  * Initiate (or continue) disconnect.
 2267  * If embryonic state, just send reset (once).
 2268  * If in ``let data drain'' option and linger null, just drop.
 2269  * Otherwise (hard), mark socket disconnecting and drop
 2270  * current input data; switch states based on user close, and
 2271  * send segment to peer (with FIN).
 2272  */
 2273 static void
 2274 tcp_disconnect(struct tcpcb *tp)
 2275 {
 2276         struct inpcb *inp = tp->t_inpcb;
 2277         struct socket *so = inp->inp_socket;
 2278 
 2279         INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 2280         INP_WLOCK_ASSERT(inp);
 2281 
 2282         /*
 2283          * Neither tcp_close() nor tcp_drop() should return NULL, as the
 2284          * socket is still open.
 2285          */
 2286         if (tp->t_state < TCPS_ESTABLISHED &&
 2287             !(tp->t_state > TCPS_LISTEN && IS_FASTOPEN(tp->t_flags))) {
 2288                 tp = tcp_close(tp);
 2289                 KASSERT(tp != NULL,
 2290                     ("tcp_disconnect: tcp_close() returned NULL"));
 2291         } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
 2292                 tp = tcp_drop(tp, 0);
 2293                 KASSERT(tp != NULL,
 2294                     ("tcp_disconnect: tcp_drop() returned NULL"));
 2295         } else {
 2296                 soisdisconnecting(so);
 2297                 sbflush(&so->so_rcv);
 2298                 tcp_usrclosed(tp);
 2299                 if (!(inp->inp_flags & INP_DROPPED))
 2300                         tp->t_fb->tfb_tcp_output(tp);
 2301         }
 2302 }
 2303 
 2304 /*
 2305  * User issued close, and wish to trail through shutdown states:
 2306  * if never received SYN, just forget it.  If got a SYN from peer,
 2307  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
 2308  * If already got a FIN from peer, then almost done; go to LAST_ACK
 2309  * state.  In all other cases, have already sent FIN to peer (e.g.
 2310  * after PRU_SHUTDOWN), and just have to play tedious game waiting
 2311  * for peer to send FIN or not respond to keep-alives, etc.
 2312  * We can let the user exit from the close as soon as the FIN is acked.
 2313  */
 2314 static void
 2315 tcp_usrclosed(struct tcpcb *tp)
 2316 {
 2317 
 2318         INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 2319         INP_WLOCK_ASSERT(tp->t_inpcb);
 2320 
 2321         switch (tp->t_state) {
 2322         case TCPS_LISTEN:
 2323 #ifdef TCP_OFFLOAD
 2324                 tcp_offload_listen_stop(tp);
 2325 #endif
 2326                 tcp_state_change(tp, TCPS_CLOSED);
 2327                 /* FALLTHROUGH */
 2328         case TCPS_CLOSED:
 2329                 tp = tcp_close(tp);
 2330                 /*
 2331                  * tcp_close() should never return NULL here as the socket is
 2332                  * still open.
 2333                  */
 2334                 KASSERT(tp != NULL,
 2335                     ("tcp_usrclosed: tcp_close() returned NULL"));
 2336                 break;
 2337 
 2338         case TCPS_SYN_SENT:
 2339         case TCPS_SYN_RECEIVED:
 2340                 tp->t_flags |= TF_NEEDFIN;
 2341                 break;
 2342 
 2343         case TCPS_ESTABLISHED:
 2344                 tcp_state_change(tp, TCPS_FIN_WAIT_1);
 2345                 break;
 2346 
 2347         case TCPS_CLOSE_WAIT:
 2348                 tcp_state_change(tp, TCPS_LAST_ACK);
 2349                 break;
 2350         }
 2351         if (tp->t_state >= TCPS_FIN_WAIT_2) {
 2352                 soisdisconnected(tp->t_inpcb->inp_socket);
 2353                 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
 2354                 if (tp->t_state == TCPS_FIN_WAIT_2) {
 2355                         int timeout;
 2356 
 2357                         timeout = (tcp_fast_finwait2_recycle) ? 
 2358                             tcp_finwait2_timeout : TP_MAXIDLE(tp);
 2359                         tcp_timer_activate(tp, TT_2MSL, timeout);
 2360                 }
 2361         }
 2362 }
 2363 
 2364 #ifdef DDB
 2365 static void
 2366 db_print_indent(int indent)
 2367 {
 2368         int i;
 2369 
 2370         for (i = 0; i < indent; i++)
 2371                 db_printf(" ");
 2372 }
 2373 
 2374 static void
 2375 db_print_tstate(int t_state)
 2376 {
 2377 
 2378         switch (t_state) {
 2379         case TCPS_CLOSED:
 2380                 db_printf("TCPS_CLOSED");
 2381                 return;
 2382 
 2383         case TCPS_LISTEN:
 2384                 db_printf("TCPS_LISTEN");
 2385                 return;
 2386 
 2387         case TCPS_SYN_SENT:
 2388                 db_printf("TCPS_SYN_SENT");
 2389                 return;
 2390 
 2391         case TCPS_SYN_RECEIVED:
 2392                 db_printf("TCPS_SYN_RECEIVED");
 2393                 return;
 2394 
 2395         case TCPS_ESTABLISHED:
 2396                 db_printf("TCPS_ESTABLISHED");
 2397                 return;
 2398 
 2399         case TCPS_CLOSE_WAIT:
 2400                 db_printf("TCPS_CLOSE_WAIT");
 2401                 return;
 2402 
 2403         case TCPS_FIN_WAIT_1:
 2404                 db_printf("TCPS_FIN_WAIT_1");
 2405                 return;
 2406 
 2407         case TCPS_CLOSING:
 2408                 db_printf("TCPS_CLOSING");
 2409                 return;
 2410 
 2411         case TCPS_LAST_ACK:
 2412                 db_printf("TCPS_LAST_ACK");
 2413                 return;
 2414 
 2415         case TCPS_FIN_WAIT_2:
 2416                 db_printf("TCPS_FIN_WAIT_2");
 2417                 return;
 2418 
 2419         case TCPS_TIME_WAIT:
 2420                 db_printf("TCPS_TIME_WAIT");
 2421                 return;
 2422 
 2423         default:
 2424                 db_printf("unknown");
 2425                 return;
 2426         }
 2427 }
 2428 
 2429 static void
 2430 db_print_tflags(u_int t_flags)
 2431 {
 2432         int comma;
 2433 
 2434         comma = 0;
 2435         if (t_flags & TF_ACKNOW) {
 2436                 db_printf("%sTF_ACKNOW", comma ? ", " : "");
 2437                 comma = 1;
 2438         }
 2439         if (t_flags & TF_DELACK) {
 2440                 db_printf("%sTF_DELACK", comma ? ", " : "");
 2441                 comma = 1;
 2442         }
 2443         if (t_flags & TF_NODELAY) {
 2444                 db_printf("%sTF_NODELAY", comma ? ", " : "");
 2445                 comma = 1;
 2446         }
 2447         if (t_flags & TF_NOOPT) {
 2448                 db_printf("%sTF_NOOPT", comma ? ", " : "");
 2449                 comma = 1;
 2450         }
 2451         if (t_flags & TF_SENTFIN) {
 2452                 db_printf("%sTF_SENTFIN", comma ? ", " : "");
 2453                 comma = 1;
 2454         }
 2455         if (t_flags & TF_REQ_SCALE) {
 2456                 db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
 2457                 comma = 1;
 2458         }
 2459         if (t_flags & TF_RCVD_SCALE) {
 2460                 db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
 2461                 comma = 1;
 2462         }
 2463         if (t_flags & TF_REQ_TSTMP) {
 2464                 db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
 2465                 comma = 1;
 2466         }
 2467         if (t_flags & TF_RCVD_TSTMP) {
 2468                 db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
 2469                 comma = 1;
 2470         }
 2471         if (t_flags & TF_SACK_PERMIT) {
 2472                 db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
 2473                 comma = 1;
 2474         }
 2475         if (t_flags & TF_NEEDSYN) {
 2476                 db_printf("%sTF_NEEDSYN", comma ? ", " : "");
 2477                 comma = 1;
 2478         }
 2479         if (t_flags & TF_NEEDFIN) {
 2480                 db_printf("%sTF_NEEDFIN", comma ? ", " : "");
 2481                 comma = 1;
 2482         }
 2483         if (t_flags & TF_NOPUSH) {
 2484                 db_printf("%sTF_NOPUSH", comma ? ", " : "");
 2485                 comma = 1;
 2486         }
 2487         if (t_flags & TF_MORETOCOME) {
 2488                 db_printf("%sTF_MORETOCOME", comma ? ", " : "");
 2489                 comma = 1;
 2490         }
 2491         if (t_flags & TF_LQ_OVERFLOW) {
 2492                 db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : "");
 2493                 comma = 1;
 2494         }
 2495         if (t_flags & TF_LASTIDLE) {
 2496                 db_printf("%sTF_LASTIDLE", comma ? ", " : "");
 2497                 comma = 1;
 2498         }
 2499         if (t_flags & TF_RXWIN0SENT) {
 2500                 db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
 2501                 comma = 1;
 2502         }
 2503         if (t_flags & TF_FASTRECOVERY) {
 2504                 db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
 2505                 comma = 1;
 2506         }
 2507         if (t_flags & TF_CONGRECOVERY) {
 2508                 db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
 2509                 comma = 1;
 2510         }
 2511         if (t_flags & TF_WASFRECOVERY) {
 2512                 db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
 2513                 comma = 1;
 2514         }
 2515         if (t_flags & TF_SIGNATURE) {
 2516                 db_printf("%sTF_SIGNATURE", comma ? ", " : "");
 2517                 comma = 1;
 2518         }
 2519         if (t_flags & TF_FORCEDATA) {
 2520                 db_printf("%sTF_FORCEDATA", comma ? ", " : "");
 2521                 comma = 1;
 2522         }
 2523         if (t_flags & TF_TSO) {
 2524                 db_printf("%sTF_TSO", comma ? ", " : "");
 2525                 comma = 1;
 2526         }
 2527         if (t_flags & TF_ECN_PERMIT) {
 2528                 db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
 2529                 comma = 1;
 2530         }
 2531         if (t_flags & TF_FASTOPEN) {
 2532                 db_printf("%sTF_FASTOPEN", comma ? ", " : "");
 2533                 comma = 1;
 2534         }
 2535 }
 2536 
 2537 static void
 2538 db_print_toobflags(char t_oobflags)
 2539 {
 2540         int comma;
 2541 
 2542         comma = 0;
 2543         if (t_oobflags & TCPOOB_HAVEDATA) {
 2544                 db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
 2545                 comma = 1;
 2546         }
 2547         if (t_oobflags & TCPOOB_HADDATA) {
 2548                 db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
 2549                 comma = 1;
 2550         }
 2551 }
 2552 
 2553 static void
 2554 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
 2555 {
 2556 
 2557         db_print_indent(indent);
 2558         db_printf("%s at %p\n", name, tp);
 2559 
 2560         indent += 2;
 2561 
 2562         db_print_indent(indent);
 2563         db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
 2564            TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
 2565 
 2566         db_print_indent(indent);
 2567         db_printf("tt_rexmt: %p   tt_persist: %p   tt_keep: %p\n",
 2568             &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep);
 2569 
 2570         db_print_indent(indent);
 2571         db_printf("tt_2msl: %p   tt_delack: %p   t_inpcb: %p\n", &tp->t_timers->tt_2msl,
 2572             &tp->t_timers->tt_delack, tp->t_inpcb);
 2573 
 2574         db_print_indent(indent);
 2575         db_printf("t_state: %d (", tp->t_state);
 2576         db_print_tstate(tp->t_state);
 2577         db_printf(")\n");
 2578 
 2579         db_print_indent(indent);
 2580         db_printf("t_flags: 0x%x (", tp->t_flags);
 2581         db_print_tflags(tp->t_flags);
 2582         db_printf(")\n");
 2583 
 2584         db_print_indent(indent);
 2585         db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: x0%08x\n",
 2586             tp->snd_una, tp->snd_max, tp->snd_nxt);
 2587 
 2588         db_print_indent(indent);
 2589         db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
 2590            tp->snd_up, tp->snd_wl1, tp->snd_wl2);
 2591 
 2592         db_print_indent(indent);
 2593         db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
 2594             tp->iss, tp->irs, tp->rcv_nxt);
 2595 
 2596         db_print_indent(indent);
 2597         db_printf("rcv_adv: 0x%08x   rcv_wnd: %u   rcv_up: 0x%08x\n",
 2598             tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
 2599 
 2600         db_print_indent(indent);
 2601         db_printf("snd_wnd: %u   snd_cwnd: %u\n",
 2602            tp->snd_wnd, tp->snd_cwnd);
 2603 
 2604         db_print_indent(indent);
 2605         db_printf("snd_ssthresh: %u   snd_recover: "
 2606             "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
 2607 
 2608         db_print_indent(indent);
 2609         db_printf("t_rcvtime: %u   t_startime: %u\n",
 2610             tp->t_rcvtime, tp->t_starttime);
 2611 
 2612         db_print_indent(indent);
 2613         db_printf("t_rttime: %u   t_rtsq: 0x%08x\n",
 2614             tp->t_rtttime, tp->t_rtseq);
 2615 
 2616         db_print_indent(indent);
 2617         db_printf("t_rxtcur: %d   t_maxseg: %u   t_srtt: %d\n",
 2618             tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
 2619 
 2620         db_print_indent(indent);
 2621         db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u   "
 2622             "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
 2623             tp->t_rttbest);
 2624 
 2625         db_print_indent(indent);
 2626         db_printf("t_rttupdated: %lu   max_sndwnd: %u   t_softerror: %d\n",
 2627             tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
 2628 
 2629         db_print_indent(indent);
 2630         db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
 2631         db_print_toobflags(tp->t_oobflags);
 2632         db_printf(")   t_iobc: 0x%02x\n", tp->t_iobc);
 2633 
 2634         db_print_indent(indent);
 2635         db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
 2636             tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
 2637 
 2638         db_print_indent(indent);
 2639         db_printf("ts_recent: %u   ts_recent_age: %u\n",
 2640             tp->ts_recent, tp->ts_recent_age);
 2641 
 2642         db_print_indent(indent);
 2643         db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
 2644             "%u\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
 2645 
 2646         db_print_indent(indent);
 2647         db_printf("snd_ssthresh_prev: %u   snd_recover_prev: 0x%08x   "
 2648             "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
 2649             tp->snd_recover_prev, tp->t_badrxtwin);
 2650 
 2651         db_print_indent(indent);
 2652         db_printf("snd_numholes: %d  snd_holes first: %p\n",
 2653             tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
 2654 
 2655         db_print_indent(indent);
 2656         db_printf("snd_fack: 0x%08x   rcv_numsacks: %d   sack_newdata: "
 2657             "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata);
 2658 
 2659         /* Skip sackblks, sackhint. */
 2660 
 2661         db_print_indent(indent);
 2662         db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
 2663             tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
 2664 }
 2665 
 2666 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
 2667 {
 2668         struct tcpcb *tp;
 2669 
 2670         if (!have_addr) {
 2671                 db_printf("usage: show tcpcb <addr>\n");
 2672                 return;
 2673         }
 2674         tp = (struct tcpcb *)addr;
 2675 
 2676         db_print_tcpcb(tp, "tcpcb", 0);
 2677 }
 2678 #endif

Cache object: f65798cc4d32c5e6f35dab296c2d11b3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.