The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1988, 1993
    3  *      The Regents of the University of California.
    4  * Copyright (c) 2006-2007 Robert N. M. Watson
    5  * Copyright (c) 2010-2011 Juniper Networks, Inc.
    6  * All rights reserved.
    7  *
    8  * Portions of this software were developed by Robert N. M. Watson under
    9  * contract to Juniper Networks, Inc.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 4. Neither the name of the University nor the names of its contributors
   20  *    may be used to endorse or promote products derived from this software
   21  *    without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   33  * SUCH DAMAGE.
   34  *
   35  *      From: @(#)tcp_usrreq.c  8.2 (Berkeley) 1/3/94
   36  */
   37 
   38 #include <sys/cdefs.h>
   39 __FBSDID("$FreeBSD: releng/10.3/sys/netinet/tcp_usrreq.c 292823 2015-12-28 02:43:12Z pkelsey $");
   40 
   41 #include "opt_ddb.h"
   42 #include "opt_inet.h"
   43 #include "opt_inet6.h"
   44 #include "opt_tcpdebug.h"
   45 
   46 #include <sys/param.h>
   47 #include <sys/systm.h>
   48 #include <sys/limits.h>
   49 #include <sys/malloc.h>
   50 #include <sys/kernel.h>
   51 #include <sys/sysctl.h>
   52 #include <sys/mbuf.h>
   53 #ifdef INET6
   54 #include <sys/domain.h>
   55 #endif /* INET6 */
   56 #include <sys/socket.h>
   57 #include <sys/socketvar.h>
   58 #include <sys/protosw.h>
   59 #include <sys/proc.h>
   60 #include <sys/jail.h>
   61 
   62 #ifdef DDB
   63 #include <ddb/ddb.h>
   64 #endif
   65 
   66 #include <net/if.h>
   67 #include <net/route.h>
   68 #include <net/vnet.h>
   69 
   70 #include <netinet/cc.h>
   71 #include <netinet/in.h>
   72 #include <netinet/in_pcb.h>
   73 #include <netinet/in_systm.h>
   74 #include <netinet/in_var.h>
   75 #include <netinet/ip_var.h>
   76 #ifdef INET6
   77 #include <netinet/ip6.h>
   78 #include <netinet6/in6_pcb.h>
   79 #include <netinet6/ip6_var.h>
   80 #include <netinet6/scope6_var.h>
   81 #endif
   82 #ifdef TCP_RFC7413
   83 #include <netinet/tcp_fastopen.h>
   84 #endif
   85 #include <netinet/tcp_fsm.h>
   86 #include <netinet/tcp_seq.h>
   87 #include <netinet/tcp_timer.h>
   88 #include <netinet/tcp_var.h>
   89 #include <netinet/tcpip.h>
   90 #ifdef TCPDEBUG
   91 #include <netinet/tcp_debug.h>
   92 #endif
   93 #ifdef TCP_OFFLOAD
   94 #include <netinet/tcp_offload.h>
   95 #endif
   96 
   97 /*
   98  * TCP protocol interface to socket abstraction.
   99  */
  100 static int      tcp_attach(struct socket *);
  101 #ifdef INET
  102 static int      tcp_connect(struct tcpcb *, struct sockaddr *,
  103                     struct thread *td);
  104 #endif /* INET */
  105 #ifdef INET6
  106 static int      tcp6_connect(struct tcpcb *, struct sockaddr *,
  107                     struct thread *td);
  108 #endif /* INET6 */
  109 static void     tcp_disconnect(struct tcpcb *);
  110 static void     tcp_usrclosed(struct tcpcb *);
  111 static void     tcp_fill_info(struct tcpcb *, struct tcp_info *);
  112 
  113 #ifdef TCPDEBUG
  114 #define TCPDEBUG0       int ostate = 0
  115 #define TCPDEBUG1()     ostate = tp ? tp->t_state : 0
  116 #define TCPDEBUG2(req)  if (tp && (so->so_options & SO_DEBUG)) \
  117                                 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
  118 #else
  119 #define TCPDEBUG0
  120 #define TCPDEBUG1()
  121 #define TCPDEBUG2(req)
  122 #endif
  123 
  124 /*
  125  * TCP attaches to socket via pru_attach(), reserving space,
  126  * and an internet control block.
  127  */
  128 static int
  129 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
  130 {
  131         struct inpcb *inp;
  132         struct tcpcb *tp = NULL;
  133         int error;
  134         TCPDEBUG0;
  135 
  136         inp = sotoinpcb(so);
  137         KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
  138         TCPDEBUG1();
  139 
  140         error = tcp_attach(so);
  141         if (error)
  142                 goto out;
  143 
  144         if ((so->so_options & SO_LINGER) && so->so_linger == 0)
  145                 so->so_linger = TCP_LINGERTIME;
  146 
  147         inp = sotoinpcb(so);
  148         tp = intotcpcb(inp);
  149 out:
  150         TCPDEBUG2(PRU_ATTACH);
  151         return error;
  152 }
  153 
  154 /*
  155  * tcp_detach is called when the socket layer loses its final reference
  156  * to the socket, be it a file descriptor reference, a reference from TCP,
  157  * etc.  At this point, there is only one case in which we will keep around
  158  * inpcb state: time wait.
  159  *
  160  * This function can probably be re-absorbed back into tcp_usr_detach() now
  161  * that there is a single detach path.
  162  */
  163 static void
  164 tcp_detach(struct socket *so, struct inpcb *inp)
  165 {
  166         struct tcpcb *tp;
  167 
  168         INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
  169         INP_WLOCK_ASSERT(inp);
  170 
  171         KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
  172         KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so"));
  173 
  174         tp = intotcpcb(inp);
  175 
  176         if (inp->inp_flags & INP_TIMEWAIT) {
  177                 /*
  178                  * There are two cases to handle: one in which the time wait
  179                  * state is being discarded (INP_DROPPED), and one in which
  180                  * this connection will remain in timewait.  In the former,
  181                  * it is time to discard all state (except tcptw, which has
  182                  * already been discarded by the timewait close code, which
  183                  * should be further up the call stack somewhere).  In the
  184                  * latter case, we detach from the socket, but leave the pcb
  185                  * present until timewait ends.
  186                  *
  187                  * XXXRW: Would it be cleaner to free the tcptw here?
  188                  *
  189                  * Astute question indeed, from twtcp perspective there are
  190                  * three cases to consider:
  191                  *
  192                  * #1 tcp_detach is called at tcptw creation time by
  193                  *  tcp_twstart, then do not discard the newly created tcptw
  194                  *  and leave inpcb present until timewait ends
  195                  * #2 tcp_detach is called at timewait end (or reuse) by
  196                  *  tcp_twclose, then the tcptw has already been discarded
  197                  *  and inpcb is freed here
  198                  * #3 tcp_detach is called() after timewait ends (or reuse)
  199                  *  (e.g. by soclose), then tcptw has already been discarded
  200                  *  and inpcb is freed here
  201                  *
  202                  *  In all three cases the tcptw should not be freed here.
  203                  */
  204                 if (inp->inp_flags & INP_DROPPED) {
  205                         KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && "
  206                             "INP_DROPPED && tp != NULL"));
  207                         in_pcbdetach(inp);
  208                         in_pcbfree(inp);
  209                 } else {
  210                         in_pcbdetach(inp);
  211                         INP_WUNLOCK(inp);
  212                 }
  213         } else {
  214                 /*
  215                  * If the connection is not in timewait, we consider two
  216                  * two conditions: one in which no further processing is
  217                  * necessary (dropped || embryonic), and one in which TCP is
  218                  * not yet done, but no longer requires the socket, so the
  219                  * pcb will persist for the time being.
  220                  *
  221                  * XXXRW: Does the second case still occur?
  222                  */
  223                 if (inp->inp_flags & INP_DROPPED ||
  224                     tp->t_state < TCPS_SYN_SENT) {
  225                         tcp_discardcb(tp);
  226                         in_pcbdetach(inp);
  227                         in_pcbfree(inp);
  228                 } else {
  229                         in_pcbdetach(inp);
  230                         INP_WUNLOCK(inp);
  231                 }
  232         }
  233 }
  234 
  235 /*
  236  * pru_detach() detaches the TCP protocol from the socket.
  237  * If the protocol state is non-embryonic, then can't
  238  * do this directly: have to initiate a pru_disconnect(),
  239  * which may finish later; embryonic TCB's can just
  240  * be discarded here.
  241  */
  242 static void
  243 tcp_usr_detach(struct socket *so)
  244 {
  245         struct inpcb *inp;
  246 
  247         inp = sotoinpcb(so);
  248         KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
  249         INP_INFO_WLOCK(&V_tcbinfo);
  250         INP_WLOCK(inp);
  251         KASSERT(inp->inp_socket != NULL,
  252             ("tcp_usr_detach: inp_socket == NULL"));
  253         tcp_detach(so, inp);
  254         INP_INFO_WUNLOCK(&V_tcbinfo);
  255 }
  256 
  257 #ifdef INET
  258 /*
  259  * Give the socket an address.
  260  */
  261 static int
  262 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  263 {
  264         int error = 0;
  265         struct inpcb *inp;
  266         struct tcpcb *tp = NULL;
  267         struct sockaddr_in *sinp;
  268 
  269         sinp = (struct sockaddr_in *)nam;
  270         if (nam->sa_len != sizeof (*sinp))
  271                 return (EINVAL);
  272         /*
  273          * Must check for multicast addresses and disallow binding
  274          * to them.
  275          */
  276         if (sinp->sin_family == AF_INET &&
  277             IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
  278                 return (EAFNOSUPPORT);
  279 
  280         TCPDEBUG0;
  281         inp = sotoinpcb(so);
  282         KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
  283         INP_WLOCK(inp);
  284         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  285                 error = EINVAL;
  286                 goto out;
  287         }
  288         tp = intotcpcb(inp);
  289         TCPDEBUG1();
  290         INP_HASH_WLOCK(&V_tcbinfo);
  291         error = in_pcbbind(inp, nam, td->td_ucred);
  292         INP_HASH_WUNLOCK(&V_tcbinfo);
  293 out:
  294         TCPDEBUG2(PRU_BIND);
  295         INP_WUNLOCK(inp);
  296 
  297         return (error);
  298 }
  299 #endif /* INET */
  300 
  301 #ifdef INET6
  302 static int
  303 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  304 {
  305         int error = 0;
  306         struct inpcb *inp;
  307         struct tcpcb *tp = NULL;
  308         struct sockaddr_in6 *sin6p;
  309 
  310         sin6p = (struct sockaddr_in6 *)nam;
  311         if (nam->sa_len != sizeof (*sin6p))
  312                 return (EINVAL);
  313         /*
  314          * Must check for multicast addresses and disallow binding
  315          * to them.
  316          */
  317         if (sin6p->sin6_family == AF_INET6 &&
  318             IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
  319                 return (EAFNOSUPPORT);
  320 
  321         TCPDEBUG0;
  322         inp = sotoinpcb(so);
  323         KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
  324         INP_WLOCK(inp);
  325         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  326                 error = EINVAL;
  327                 goto out;
  328         }
  329         tp = intotcpcb(inp);
  330         TCPDEBUG1();
  331         INP_HASH_WLOCK(&V_tcbinfo);
  332         inp->inp_vflag &= ~INP_IPV4;
  333         inp->inp_vflag |= INP_IPV6;
  334 #ifdef INET
  335         if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
  336                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
  337                         inp->inp_vflag |= INP_IPV4;
  338                 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
  339                         struct sockaddr_in sin;
  340 
  341                         in6_sin6_2_sin(&sin, sin6p);
  342                         inp->inp_vflag |= INP_IPV4;
  343                         inp->inp_vflag &= ~INP_IPV6;
  344                         error = in_pcbbind(inp, (struct sockaddr *)&sin,
  345                             td->td_ucred);
  346                         INP_HASH_WUNLOCK(&V_tcbinfo);
  347                         goto out;
  348                 }
  349         }
  350 #endif
  351         error = in6_pcbbind(inp, nam, td->td_ucred);
  352         INP_HASH_WUNLOCK(&V_tcbinfo);
  353 out:
  354         TCPDEBUG2(PRU_BIND);
  355         INP_WUNLOCK(inp);
  356         return (error);
  357 }
  358 #endif /* INET6 */
  359 
  360 #ifdef INET
  361 /*
  362  * Prepare to accept connections.
  363  */
  364 static int
  365 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
  366 {
  367         int error = 0;
  368         struct inpcb *inp;
  369         struct tcpcb *tp = NULL;
  370 
  371         TCPDEBUG0;
  372         inp = sotoinpcb(so);
  373         KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
  374         INP_WLOCK(inp);
  375         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  376                 error = EINVAL;
  377                 goto out;
  378         }
  379         tp = intotcpcb(inp);
  380         TCPDEBUG1();
  381         SOCK_LOCK(so);
  382         error = solisten_proto_check(so);
  383         INP_HASH_WLOCK(&V_tcbinfo);
  384         if (error == 0 && inp->inp_lport == 0)
  385                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
  386         INP_HASH_WUNLOCK(&V_tcbinfo);
  387         if (error == 0) {
  388                 tcp_state_change(tp, TCPS_LISTEN);
  389                 solisten_proto(so, backlog);
  390 #ifdef TCP_OFFLOAD
  391                 if ((so->so_options & SO_NO_OFFLOAD) == 0)
  392                         tcp_offload_listen_start(tp);
  393 #endif
  394         }
  395         SOCK_UNLOCK(so);
  396 
  397 #ifdef TCP_RFC7413
  398         if (tp->t_flags & TF_FASTOPEN)
  399                 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
  400 #endif
  401 out:
  402         TCPDEBUG2(PRU_LISTEN);
  403         INP_WUNLOCK(inp);
  404         return (error);
  405 }
  406 #endif /* INET */
  407 
  408 #ifdef INET6
  409 static int
  410 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
  411 {
  412         int error = 0;
  413         struct inpcb *inp;
  414         struct tcpcb *tp = NULL;
  415 
  416         TCPDEBUG0;
  417         inp = sotoinpcb(so);
  418         KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
  419         INP_WLOCK(inp);
  420         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  421                 error = EINVAL;
  422                 goto out;
  423         }
  424         tp = intotcpcb(inp);
  425         TCPDEBUG1();
  426         SOCK_LOCK(so);
  427         error = solisten_proto_check(so);
  428         INP_HASH_WLOCK(&V_tcbinfo);
  429         if (error == 0 && inp->inp_lport == 0) {
  430                 inp->inp_vflag &= ~INP_IPV4;
  431                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
  432                         inp->inp_vflag |= INP_IPV4;
  433                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
  434         }
  435         INP_HASH_WUNLOCK(&V_tcbinfo);
  436         if (error == 0) {
  437                 tcp_state_change(tp, TCPS_LISTEN);
  438                 solisten_proto(so, backlog);
  439 #ifdef TCP_OFFLOAD
  440                 if ((so->so_options & SO_NO_OFFLOAD) == 0)
  441                         tcp_offload_listen_start(tp);
  442 #endif
  443         }
  444         SOCK_UNLOCK(so);
  445 
  446 #ifdef TCP_RFC7413
  447         if (tp->t_flags & TF_FASTOPEN)
  448                 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
  449 #endif
  450 out:
  451         TCPDEBUG2(PRU_LISTEN);
  452         INP_WUNLOCK(inp);
  453         return (error);
  454 }
  455 #endif /* INET6 */
  456 
  457 #ifdef INET
  458 /*
  459  * Initiate connection to peer.
  460  * Create a template for use in transmissions on this connection.
  461  * Enter SYN_SENT state, and mark socket as connecting.
  462  * Start keep-alive timer, and seed output sequence space.
  463  * Send initial segment on connection.
  464  */
  465 static int
  466 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  467 {
  468         int error = 0;
  469         struct inpcb *inp;
  470         struct tcpcb *tp = NULL;
  471         struct sockaddr_in *sinp;
  472 
  473         sinp = (struct sockaddr_in *)nam;
  474         if (nam->sa_len != sizeof (*sinp))
  475                 return (EINVAL);
  476         /*
  477          * Must disallow TCP ``connections'' to multicast addresses.
  478          */
  479         if (sinp->sin_family == AF_INET
  480             && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
  481                 return (EAFNOSUPPORT);
  482         if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
  483                 return (error);
  484 
  485         TCPDEBUG0;
  486         inp = sotoinpcb(so);
  487         KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
  488         INP_WLOCK(inp);
  489         if (inp->inp_flags & INP_TIMEWAIT) {
  490                 error = EADDRINUSE;
  491                 goto out;
  492         }
  493         if (inp->inp_flags & INP_DROPPED) {
  494                 error = ECONNREFUSED;
  495                 goto out;
  496         }
  497         tp = intotcpcb(inp);
  498         TCPDEBUG1();
  499         if ((error = tcp_connect(tp, nam, td)) != 0)
  500                 goto out;
  501 #ifdef TCP_OFFLOAD
  502         if (registered_toedevs > 0 &&
  503             (so->so_options & SO_NO_OFFLOAD) == 0 &&
  504             (error = tcp_offload_connect(so, nam)) == 0)
  505                 goto out;
  506 #endif
  507         tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
  508         error = tcp_output(tp);
  509 out:
  510         TCPDEBUG2(PRU_CONNECT);
  511         INP_WUNLOCK(inp);
  512         return (error);
  513 }
  514 #endif /* INET */
  515 
  516 #ifdef INET6
  517 static int
  518 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  519 {
  520         int error = 0;
  521         struct inpcb *inp;
  522         struct tcpcb *tp = NULL;
  523         struct sockaddr_in6 *sin6p;
  524 
  525         TCPDEBUG0;
  526 
  527         sin6p = (struct sockaddr_in6 *)nam;
  528         if (nam->sa_len != sizeof (*sin6p))
  529                 return (EINVAL);
  530         /*
  531          * Must disallow TCP ``connections'' to multicast addresses.
  532          */
  533         if (sin6p->sin6_family == AF_INET6
  534             && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
  535                 return (EAFNOSUPPORT);
  536 
  537         inp = sotoinpcb(so);
  538         KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
  539         INP_WLOCK(inp);
  540         if (inp->inp_flags & INP_TIMEWAIT) {
  541                 error = EADDRINUSE;
  542                 goto out;
  543         }
  544         if (inp->inp_flags & INP_DROPPED) {
  545                 error = ECONNREFUSED;
  546                 goto out;
  547         }
  548         tp = intotcpcb(inp);
  549         TCPDEBUG1();
  550 #ifdef INET
  551         /*
  552          * XXXRW: Some confusion: V4/V6 flags relate to binding, and
  553          * therefore probably require the hash lock, which isn't held here.
  554          * Is this a significant problem?
  555          */
  556         if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
  557                 struct sockaddr_in sin;
  558 
  559                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
  560                         error = EINVAL;
  561                         goto out;
  562                 }
  563 
  564                 in6_sin6_2_sin(&sin, sin6p);
  565                 inp->inp_vflag |= INP_IPV4;
  566                 inp->inp_vflag &= ~INP_IPV6;
  567                 if ((error = prison_remote_ip4(td->td_ucred,
  568                     &sin.sin_addr)) != 0)
  569                         goto out;
  570                 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
  571                         goto out;
  572 #ifdef TCP_OFFLOAD
  573                 if (registered_toedevs > 0 &&
  574                     (so->so_options & SO_NO_OFFLOAD) == 0 &&
  575                     (error = tcp_offload_connect(so, nam)) == 0)
  576                         goto out;
  577 #endif
  578                 error = tcp_output(tp);
  579                 goto out;
  580         }
  581 #endif
  582         inp->inp_vflag &= ~INP_IPV4;
  583         inp->inp_vflag |= INP_IPV6;
  584         inp->inp_inc.inc_flags |= INC_ISIPV6;
  585         if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0)
  586                 goto out;
  587         if ((error = tcp6_connect(tp, nam, td)) != 0)
  588                 goto out;
  589 #ifdef TCP_OFFLOAD
  590         if (registered_toedevs > 0 &&
  591             (so->so_options & SO_NO_OFFLOAD) == 0 &&
  592             (error = tcp_offload_connect(so, nam)) == 0)
  593                 goto out;
  594 #endif
  595         tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
  596         error = tcp_output(tp);
  597 
  598 out:
  599         TCPDEBUG2(PRU_CONNECT);
  600         INP_WUNLOCK(inp);
  601         return (error);
  602 }
  603 #endif /* INET6 */
  604 
  605 /*
  606  * Initiate disconnect from peer.
  607  * If connection never passed embryonic stage, just drop;
  608  * else if don't need to let data drain, then can just drop anyways,
  609  * else have to begin TCP shutdown process: mark socket disconnecting,
  610  * drain unread data, state switch to reflect user close, and
  611  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
  612  * when peer sends FIN and acks ours.
  613  *
  614  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
  615  */
  616 static int
  617 tcp_usr_disconnect(struct socket *so)
  618 {
  619         struct inpcb *inp;
  620         struct tcpcb *tp = NULL;
  621         int error = 0;
  622 
  623         TCPDEBUG0;
  624         INP_INFO_WLOCK(&V_tcbinfo);
  625         inp = sotoinpcb(so);
  626         KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
  627         INP_WLOCK(inp);
  628         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  629                 error = ECONNRESET;
  630                 goto out;
  631         }
  632         tp = intotcpcb(inp);
  633         TCPDEBUG1();
  634         tcp_disconnect(tp);
  635 out:
  636         TCPDEBUG2(PRU_DISCONNECT);
  637         INP_WUNLOCK(inp);
  638         INP_INFO_WUNLOCK(&V_tcbinfo);
  639         return (error);
  640 }
  641 
  642 #ifdef INET
  643 /*
  644  * Accept a connection.  Essentially all the work is done at higher levels;
  645  * just return the address of the peer, storing through addr.
  646  *
  647  * The rationale for acquiring the tcbinfo lock here is somewhat complicated,
  648  * and is described in detail in the commit log entry for r175612.  Acquiring
  649  * it delays an accept(2) racing with sonewconn(), which inserts the socket
  650  * before the inpcb address/port fields are initialized.  A better fix would
  651  * prevent the socket from being placed in the listen queue until all fields
  652  * are fully initialized.
  653  */
  654 static int
  655 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
  656 {
  657         int error = 0;
  658         struct inpcb *inp = NULL;
  659         struct tcpcb *tp = NULL;
  660         struct in_addr addr;
  661         in_port_t port = 0;
  662         TCPDEBUG0;
  663 
  664         if (so->so_state & SS_ISDISCONNECTED)
  665                 return (ECONNABORTED);
  666 
  667         inp = sotoinpcb(so);
  668         KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
  669         INP_INFO_RLOCK(&V_tcbinfo);
  670         INP_WLOCK(inp);
  671         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  672                 error = ECONNABORTED;
  673                 goto out;
  674         }
  675         tp = intotcpcb(inp);
  676         TCPDEBUG1();
  677 
  678         /*
  679          * We inline in_getpeeraddr and COMMON_END here, so that we can
  680          * copy the data of interest and defer the malloc until after we
  681          * release the lock.
  682          */
  683         port = inp->inp_fport;
  684         addr = inp->inp_faddr;
  685 
  686 out:
  687         TCPDEBUG2(PRU_ACCEPT);
  688         INP_WUNLOCK(inp);
  689         INP_INFO_RUNLOCK(&V_tcbinfo);
  690         if (error == 0)
  691                 *nam = in_sockaddr(port, &addr);
  692         return error;
  693 }
  694 #endif /* INET */
  695 
  696 #ifdef INET6
  697 static int
  698 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
  699 {
  700         struct inpcb *inp = NULL;
  701         int error = 0;
  702         struct tcpcb *tp = NULL;
  703         struct in_addr addr;
  704         struct in6_addr addr6;
  705         in_port_t port = 0;
  706         int v4 = 0;
  707         TCPDEBUG0;
  708 
  709         if (so->so_state & SS_ISDISCONNECTED)
  710                 return (ECONNABORTED);
  711 
  712         inp = sotoinpcb(so);
  713         KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
  714         INP_INFO_RLOCK(&V_tcbinfo);
  715         INP_WLOCK(inp);
  716         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  717                 error = ECONNABORTED;
  718                 goto out;
  719         }
  720         tp = intotcpcb(inp);
  721         TCPDEBUG1();
  722 
  723         /*
  724          * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
  725          * copy the data of interest and defer the malloc until after we
  726          * release the lock.
  727          */
  728         if (inp->inp_vflag & INP_IPV4) {
  729                 v4 = 1;
  730                 port = inp->inp_fport;
  731                 addr = inp->inp_faddr;
  732         } else {
  733                 port = inp->inp_fport;
  734                 addr6 = inp->in6p_faddr;
  735         }
  736 
  737 out:
  738         TCPDEBUG2(PRU_ACCEPT);
  739         INP_WUNLOCK(inp);
  740         INP_INFO_RUNLOCK(&V_tcbinfo);
  741         if (error == 0) {
  742                 if (v4)
  743                         *nam = in6_v4mapsin6_sockaddr(port, &addr);
  744                 else
  745                         *nam = in6_sockaddr(port, &addr6);
  746         }
  747         return error;
  748 }
  749 #endif /* INET6 */
  750 
  751 /*
  752  * Mark the connection as being incapable of further output.
  753  */
  754 static int
  755 tcp_usr_shutdown(struct socket *so)
  756 {
  757         int error = 0;
  758         struct inpcb *inp;
  759         struct tcpcb *tp = NULL;
  760 
  761         TCPDEBUG0;
  762         INP_INFO_WLOCK(&V_tcbinfo);
  763         inp = sotoinpcb(so);
  764         KASSERT(inp != NULL, ("inp == NULL"));
  765         INP_WLOCK(inp);
  766         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  767                 error = ECONNRESET;
  768                 goto out;
  769         }
  770         tp = intotcpcb(inp);
  771         TCPDEBUG1();
  772         socantsendmore(so);
  773         tcp_usrclosed(tp);
  774         if (!(inp->inp_flags & INP_DROPPED))
  775                 error = tcp_output(tp);
  776 
  777 out:
  778         TCPDEBUG2(PRU_SHUTDOWN);
  779         INP_WUNLOCK(inp);
  780         INP_INFO_WUNLOCK(&V_tcbinfo);
  781 
  782         return (error);
  783 }
  784 
  785 /*
  786  * After a receive, possibly send window update to peer.
  787  */
  788 static int
  789 tcp_usr_rcvd(struct socket *so, int flags)
  790 {
  791         struct inpcb *inp;
  792         struct tcpcb *tp = NULL;
  793         int error = 0;
  794 
  795         TCPDEBUG0;
  796         inp = sotoinpcb(so);
  797         KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
  798         INP_WLOCK(inp);
  799         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  800                 error = ECONNRESET;
  801                 goto out;
  802         }
  803         tp = intotcpcb(inp);
  804         TCPDEBUG1();
  805 #ifdef TCP_RFC7413
  806         /*
  807          * For passively-created TFO connections, don't attempt a window
  808          * update while still in SYN_RECEIVED as this may trigger an early
  809          * SYN|ACK.  It is preferable to have the SYN|ACK be sent along with
  810          * application response data, or failing that, when the DELACK timer
  811          * expires.
  812          */
  813         if ((tp->t_flags & TF_FASTOPEN) &&
  814             (tp->t_state == TCPS_SYN_RECEIVED))
  815                 goto out;
  816 #endif
  817 #ifdef TCP_OFFLOAD
  818         if (tp->t_flags & TF_TOE)
  819                 tcp_offload_rcvd(tp);
  820         else
  821 #endif
  822         tcp_output(tp);
  823 
  824 out:
  825         TCPDEBUG2(PRU_RCVD);
  826         INP_WUNLOCK(inp);
  827         return (error);
  828 }
  829 
  830 /*
  831  * Do a send by putting data in output queue and updating urgent
  832  * marker if URG set.  Possibly send more data.  Unlike the other
  833  * pru_*() routines, the mbuf chains are our responsibility.  We
  834  * must either enqueue them or free them.  The other pru_* routines
  835  * generally are caller-frees.
  836  */
  837 static int
  838 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
  839     struct sockaddr *nam, struct mbuf *control, struct thread *td)
  840 {
  841         int error = 0;
  842         struct inpcb *inp;
  843         struct tcpcb *tp = NULL;
  844 #ifdef INET6
  845         int isipv6;
  846 #endif
  847         TCPDEBUG0;
  848 
  849         /*
  850          * We require the pcbinfo lock if we will close the socket as part of
  851          * this call.
  852          */
  853         if (flags & PRUS_EOF)
  854                 INP_INFO_WLOCK(&V_tcbinfo);
  855         inp = sotoinpcb(so);
  856         KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
  857         INP_WLOCK(inp);
  858         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
  859                 if (control)
  860                         m_freem(control);
  861                 if (m)
  862                         m_freem(m);
  863                 error = ECONNRESET;
  864                 goto out;
  865         }
  866 #ifdef INET6
  867         isipv6 = nam && nam->sa_family == AF_INET6;
  868 #endif /* INET6 */
  869         tp = intotcpcb(inp);
  870         TCPDEBUG1();
  871         if (control) {
  872                 /* TCP doesn't do control messages (rights, creds, etc) */
  873                 if (control->m_len) {
  874                         m_freem(control);
  875                         if (m)
  876                                 m_freem(m);
  877                         error = EINVAL;
  878                         goto out;
  879                 }
  880                 m_freem(control);       /* empty control, just free it */
  881         }
  882         if (!(flags & PRUS_OOB)) {
  883                 sbappendstream(&so->so_snd, m);
  884                 if (nam && tp->t_state < TCPS_SYN_SENT) {
  885                         /*
  886                          * Do implied connect if not yet connected,
  887                          * initialize window to default value, and
  888                          * initialize maxseg/maxopd using peer's cached
  889                          * MSS.
  890                          */
  891 #ifdef INET6
  892                         if (isipv6)
  893                                 error = tcp6_connect(tp, nam, td);
  894 #endif /* INET6 */
  895 #if defined(INET6) && defined(INET)
  896                         else
  897 #endif
  898 #ifdef INET
  899                                 error = tcp_connect(tp, nam, td);
  900 #endif
  901                         if (error)
  902                                 goto out;
  903                         tp->snd_wnd = TTCP_CLIENT_SND_WND;
  904                         tcp_mss(tp, -1);
  905                 }
  906                 if (flags & PRUS_EOF) {
  907                         /*
  908                          * Close the send side of the connection after
  909                          * the data is sent.
  910                          */
  911                         INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
  912                         socantsendmore(so);
  913                         tcp_usrclosed(tp);
  914                 }
  915                 if (!(inp->inp_flags & INP_DROPPED)) {
  916                         if (flags & PRUS_MORETOCOME)
  917                                 tp->t_flags |= TF_MORETOCOME;
  918                         error = tcp_output(tp);
  919                         if (flags & PRUS_MORETOCOME)
  920                                 tp->t_flags &= ~TF_MORETOCOME;
  921                 }
  922         } else {
  923                 /*
  924                  * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
  925                  */
  926                 SOCKBUF_LOCK(&so->so_snd);
  927                 if (sbspace(&so->so_snd) < -512) {
  928                         SOCKBUF_UNLOCK(&so->so_snd);
  929                         m_freem(m);
  930                         error = ENOBUFS;
  931                         goto out;
  932                 }
  933                 /*
  934                  * According to RFC961 (Assigned Protocols),
  935                  * the urgent pointer points to the last octet
  936                  * of urgent data.  We continue, however,
  937                  * to consider it to indicate the first octet
  938                  * of data past the urgent section.
  939                  * Otherwise, snd_up should be one lower.
  940                  */
  941                 sbappendstream_locked(&so->so_snd, m);
  942                 SOCKBUF_UNLOCK(&so->so_snd);
  943                 if (nam && tp->t_state < TCPS_SYN_SENT) {
  944                         /*
  945                          * Do implied connect if not yet connected,
  946                          * initialize window to default value, and
  947                          * initialize maxseg/maxopd using peer's cached
  948                          * MSS.
  949                          */
  950 #ifdef INET6
  951                         if (isipv6)
  952                                 error = tcp6_connect(tp, nam, td);
  953 #endif /* INET6 */
  954 #if defined(INET6) && defined(INET)
  955                         else
  956 #endif
  957 #ifdef INET
  958                                 error = tcp_connect(tp, nam, td);
  959 #endif
  960                         if (error)
  961                                 goto out;
  962                         tp->snd_wnd = TTCP_CLIENT_SND_WND;
  963                         tcp_mss(tp, -1);
  964                 }
  965                 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
  966                 tp->t_flags |= TF_FORCEDATA;
  967                 error = tcp_output(tp);
  968                 tp->t_flags &= ~TF_FORCEDATA;
  969         }
  970 out:
  971         TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
  972                   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
  973         INP_WUNLOCK(inp);
  974         if (flags & PRUS_EOF)
  975                 INP_INFO_WUNLOCK(&V_tcbinfo);
  976         return (error);
  977 }
  978 
  979 /*
  980  * Abort the TCP.  Drop the connection abruptly.
  981  */
  982 static void
  983 tcp_usr_abort(struct socket *so)
  984 {
  985         struct inpcb *inp;
  986         struct tcpcb *tp = NULL;
  987         TCPDEBUG0;
  988 
  989         inp = sotoinpcb(so);
  990         KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
  991 
  992         INP_INFO_WLOCK(&V_tcbinfo);
  993         INP_WLOCK(inp);
  994         KASSERT(inp->inp_socket != NULL,
  995             ("tcp_usr_abort: inp_socket == NULL"));
  996 
  997         /*
  998          * If we still have full TCP state, and we're not dropped, drop.
  999          */
 1000         if (!(inp->inp_flags & INP_TIMEWAIT) &&
 1001             !(inp->inp_flags & INP_DROPPED)) {
 1002                 tp = intotcpcb(inp);
 1003                 TCPDEBUG1();
 1004                 tcp_drop(tp, ECONNABORTED);
 1005                 TCPDEBUG2(PRU_ABORT);
 1006         }
 1007         if (!(inp->inp_flags & INP_DROPPED)) {
 1008                 SOCK_LOCK(so);
 1009                 so->so_state |= SS_PROTOREF;
 1010                 SOCK_UNLOCK(so);
 1011                 inp->inp_flags |= INP_SOCKREF;
 1012         }
 1013         INP_WUNLOCK(inp);
 1014         INP_INFO_WUNLOCK(&V_tcbinfo);
 1015 }
 1016 
 1017 /*
 1018  * TCP socket is closed.  Start friendly disconnect.
 1019  */
 1020 static void
 1021 tcp_usr_close(struct socket *so)
 1022 {
 1023         struct inpcb *inp;
 1024         struct tcpcb *tp = NULL;
 1025         TCPDEBUG0;
 1026 
 1027         inp = sotoinpcb(so);
 1028         KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
 1029 
 1030         INP_INFO_WLOCK(&V_tcbinfo);
 1031         INP_WLOCK(inp);
 1032         KASSERT(inp->inp_socket != NULL,
 1033             ("tcp_usr_close: inp_socket == NULL"));
 1034 
 1035         /*
 1036          * If we still have full TCP state, and we're not dropped, initiate
 1037          * a disconnect.
 1038          */
 1039         if (!(inp->inp_flags & INP_TIMEWAIT) &&
 1040             !(inp->inp_flags & INP_DROPPED)) {
 1041                 tp = intotcpcb(inp);
 1042                 TCPDEBUG1();
 1043                 tcp_disconnect(tp);
 1044                 TCPDEBUG2(PRU_CLOSE);
 1045         }
 1046         if (!(inp->inp_flags & INP_DROPPED)) {
 1047                 SOCK_LOCK(so);
 1048                 so->so_state |= SS_PROTOREF;
 1049                 SOCK_UNLOCK(so);
 1050                 inp->inp_flags |= INP_SOCKREF;
 1051         }
 1052         INP_WUNLOCK(inp);
 1053         INP_INFO_WUNLOCK(&V_tcbinfo);
 1054 }
 1055 
 1056 /*
 1057  * Receive out-of-band data.
 1058  */
 1059 static int
 1060 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
 1061 {
 1062         int error = 0;
 1063         struct inpcb *inp;
 1064         struct tcpcb *tp = NULL;
 1065 
 1066         TCPDEBUG0;
 1067         inp = sotoinpcb(so);
 1068         KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
 1069         INP_WLOCK(inp);
 1070         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 1071                 error = ECONNRESET;
 1072                 goto out;
 1073         }
 1074         tp = intotcpcb(inp);
 1075         TCPDEBUG1();
 1076         if ((so->so_oobmark == 0 &&
 1077              (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
 1078             so->so_options & SO_OOBINLINE ||
 1079             tp->t_oobflags & TCPOOB_HADDATA) {
 1080                 error = EINVAL;
 1081                 goto out;
 1082         }
 1083         if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
 1084                 error = EWOULDBLOCK;
 1085                 goto out;
 1086         }
 1087         m->m_len = 1;
 1088         *mtod(m, caddr_t) = tp->t_iobc;
 1089         if ((flags & MSG_PEEK) == 0)
 1090                 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 1091 
 1092 out:
 1093         TCPDEBUG2(PRU_RCVOOB);
 1094         INP_WUNLOCK(inp);
 1095         return (error);
 1096 }
 1097 
 1098 #ifdef INET
 1099 struct pr_usrreqs tcp_usrreqs = {
 1100         .pru_abort =            tcp_usr_abort,
 1101         .pru_accept =           tcp_usr_accept,
 1102         .pru_attach =           tcp_usr_attach,
 1103         .pru_bind =             tcp_usr_bind,
 1104         .pru_connect =          tcp_usr_connect,
 1105         .pru_control =          in_control,
 1106         .pru_detach =           tcp_usr_detach,
 1107         .pru_disconnect =       tcp_usr_disconnect,
 1108         .pru_listen =           tcp_usr_listen,
 1109         .pru_peeraddr =         in_getpeeraddr,
 1110         .pru_rcvd =             tcp_usr_rcvd,
 1111         .pru_rcvoob =           tcp_usr_rcvoob,
 1112         .pru_send =             tcp_usr_send,
 1113         .pru_shutdown =         tcp_usr_shutdown,
 1114         .pru_sockaddr =         in_getsockaddr,
 1115         .pru_sosetlabel =       in_pcbsosetlabel,
 1116         .pru_close =            tcp_usr_close,
 1117 };
 1118 #endif /* INET */
 1119 
 1120 #ifdef INET6
 1121 struct pr_usrreqs tcp6_usrreqs = {
 1122         .pru_abort =            tcp_usr_abort,
 1123         .pru_accept =           tcp6_usr_accept,
 1124         .pru_attach =           tcp_usr_attach,
 1125         .pru_bind =             tcp6_usr_bind,
 1126         .pru_connect =          tcp6_usr_connect,
 1127         .pru_control =          in6_control,
 1128         .pru_detach =           tcp_usr_detach,
 1129         .pru_disconnect =       tcp_usr_disconnect,
 1130         .pru_listen =           tcp6_usr_listen,
 1131         .pru_peeraddr =         in6_mapped_peeraddr,
 1132         .pru_rcvd =             tcp_usr_rcvd,
 1133         .pru_rcvoob =           tcp_usr_rcvoob,
 1134         .pru_send =             tcp_usr_send,
 1135         .pru_shutdown =         tcp_usr_shutdown,
 1136         .pru_sockaddr =         in6_mapped_sockaddr,
 1137         .pru_sosetlabel =       in_pcbsosetlabel,
 1138         .pru_close =            tcp_usr_close,
 1139 };
 1140 #endif /* INET6 */
 1141 
 1142 #ifdef INET
 1143 /*
 1144  * Common subroutine to open a TCP connection to remote host specified
 1145  * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
 1146  * port number if needed.  Call in_pcbconnect_setup to do the routing and
 1147  * to choose a local host address (interface).  If there is an existing
 1148  * incarnation of the same connection in TIME-WAIT state and if the remote
 1149  * host was sending CC options and if the connection duration was < MSL, then
 1150  * truncate the previous TIME-WAIT state and proceed.
 1151  * Initialize connection parameters and enter SYN-SENT state.
 1152  */
 1153 static int
 1154 tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
 1155 {
 1156         struct inpcb *inp = tp->t_inpcb, *oinp;
 1157         struct socket *so = inp->inp_socket;
 1158         struct in_addr laddr;
 1159         u_short lport;
 1160         int error;
 1161 
 1162         INP_WLOCK_ASSERT(inp);
 1163         INP_HASH_WLOCK(&V_tcbinfo);
 1164 
 1165         if (inp->inp_lport == 0) {
 1166                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 1167                 if (error)
 1168                         goto out;
 1169         }
 1170 
 1171         /*
 1172          * Cannot simply call in_pcbconnect, because there might be an
 1173          * earlier incarnation of this same connection still in
 1174          * TIME_WAIT state, creating an ADDRINUSE error.
 1175          */
 1176         laddr = inp->inp_laddr;
 1177         lport = inp->inp_lport;
 1178         error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
 1179             &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
 1180         if (error && oinp == NULL)
 1181                 goto out;
 1182         if (oinp) {
 1183                 error = EADDRINUSE;
 1184                 goto out;
 1185         }
 1186         inp->inp_laddr = laddr;
 1187         in_pcbrehash(inp);
 1188         INP_HASH_WUNLOCK(&V_tcbinfo);
 1189 
 1190         /*
 1191          * Compute window scaling to request:
 1192          * Scale to fit into sweet spot.  See tcp_syncache.c.
 1193          * XXX: This should move to tcp_output().
 1194          */
 1195         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
 1196             (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 1197                 tp->request_r_scale++;
 1198 
 1199         soisconnecting(so);
 1200         TCPSTAT_INC(tcps_connattempt);
 1201         tcp_state_change(tp, TCPS_SYN_SENT);
 1202         tp->iss = tcp_new_isn(tp);
 1203         tcp_sendseqinit(tp);
 1204 
 1205         return 0;
 1206 
 1207 out:
 1208         INP_HASH_WUNLOCK(&V_tcbinfo);
 1209         return (error);
 1210 }
 1211 #endif /* INET */
 1212 
 1213 #ifdef INET6
 1214 static int
 1215 tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
 1216 {
 1217         struct inpcb *inp = tp->t_inpcb, *oinp;
 1218         struct socket *so = inp->inp_socket;
 1219         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
 1220         struct in6_addr addr6;
 1221         int error;
 1222 
 1223         INP_WLOCK_ASSERT(inp);
 1224         INP_HASH_WLOCK(&V_tcbinfo);
 1225 
 1226         if (inp->inp_lport == 0) {
 1227                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 1228                 if (error)
 1229                         goto out;
 1230         }
 1231 
 1232         /*
 1233          * Cannot simply call in_pcbconnect, because there might be an
 1234          * earlier incarnation of this same connection still in
 1235          * TIME_WAIT state, creating an ADDRINUSE error.
 1236          * in6_pcbladdr() also handles scope zone IDs.
 1237          *
 1238          * XXXRW: We wouldn't need to expose in6_pcblookup_hash_locked()
 1239          * outside of in6_pcb.c if there were an in6_pcbconnect_setup().
 1240          */
 1241         error = in6_pcbladdr(inp, nam, &addr6);
 1242         if (error)
 1243                 goto out;
 1244         oinp = in6_pcblookup_hash_locked(inp->inp_pcbinfo,
 1245                                   &sin6->sin6_addr, sin6->sin6_port,
 1246                                   IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
 1247                                   ? &addr6
 1248                                   : &inp->in6p_laddr,
 1249                                   inp->inp_lport,  0, NULL);
 1250         if (oinp) {
 1251                 error = EADDRINUSE;
 1252                 goto out;
 1253         }
 1254         if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 1255                 inp->in6p_laddr = addr6;
 1256         inp->in6p_faddr = sin6->sin6_addr;
 1257         inp->inp_fport = sin6->sin6_port;
 1258         /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
 1259         inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
 1260         if (inp->inp_flags & IN6P_AUTOFLOWLABEL)
 1261                 inp->inp_flow |=
 1262                     (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
 1263         in_pcbrehash(inp);
 1264         INP_HASH_WUNLOCK(&V_tcbinfo);
 1265 
 1266         /* Compute window scaling to request.  */
 1267         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
 1268             (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 1269                 tp->request_r_scale++;
 1270 
 1271         soisconnecting(so);
 1272         TCPSTAT_INC(tcps_connattempt);
 1273         tcp_state_change(tp, TCPS_SYN_SENT);
 1274         tp->iss = tcp_new_isn(tp);
 1275         tcp_sendseqinit(tp);
 1276 
 1277         return 0;
 1278 
 1279 out:
 1280         INP_HASH_WUNLOCK(&V_tcbinfo);
 1281         return error;
 1282 }
 1283 #endif /* INET6 */
 1284 
 1285 /*
 1286  * Export TCP internal state information via a struct tcp_info, based on the
 1287  * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
 1288  * (TCP state machine, etc).  We export all information using FreeBSD-native
 1289  * constants -- for example, the numeric values for tcpi_state will differ
 1290  * from Linux.
 1291  */
 1292 static void
 1293 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
 1294 {
 1295 
 1296         INP_WLOCK_ASSERT(tp->t_inpcb);
 1297         bzero(ti, sizeof(*ti));
 1298 
 1299         ti->tcpi_state = tp->t_state;
 1300         if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
 1301                 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
 1302         if (tp->t_flags & TF_SACK_PERMIT)
 1303                 ti->tcpi_options |= TCPI_OPT_SACK;
 1304         if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
 1305                 ti->tcpi_options |= TCPI_OPT_WSCALE;
 1306                 ti->tcpi_snd_wscale = tp->snd_scale;
 1307                 ti->tcpi_rcv_wscale = tp->rcv_scale;
 1308         }
 1309 
 1310         ti->tcpi_rto = tp->t_rxtcur * tick;
 1311         ti->tcpi_last_data_recv = (long)(ticks - (int)tp->t_rcvtime) * tick;
 1312         ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
 1313         ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
 1314 
 1315         ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
 1316         ti->tcpi_snd_cwnd = tp->snd_cwnd;
 1317 
 1318         /*
 1319          * FreeBSD-specific extension fields for tcp_info.
 1320          */
 1321         ti->tcpi_rcv_space = tp->rcv_wnd;
 1322         ti->tcpi_rcv_nxt = tp->rcv_nxt;
 1323         ti->tcpi_snd_wnd = tp->snd_wnd;
 1324         ti->tcpi_snd_bwnd = 0;          /* Unused, kept for compat. */
 1325         ti->tcpi_snd_nxt = tp->snd_nxt;
 1326         ti->tcpi_snd_mss = tp->t_maxseg;
 1327         ti->tcpi_rcv_mss = tp->t_maxseg;
 1328         if (tp->t_flags & TF_TOE)
 1329                 ti->tcpi_options |= TCPI_OPT_TOE;
 1330         ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
 1331         ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
 1332         ti->tcpi_snd_zerowin = tp->t_sndzerowin;
 1333 }
 1334 
 1335 /*
 1336  * tcp_ctloutput() must drop the inpcb lock before performing copyin on
 1337  * socket option arguments.  When it re-acquires the lock after the copy, it
 1338  * has to revalidate that the connection is still valid for the socket
 1339  * option.
 1340  */
 1341 #define INP_WLOCK_RECHECK(inp) do {                                     \
 1342         INP_WLOCK(inp);                                                 \
 1343         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {            \
 1344                 INP_WUNLOCK(inp);                                       \
 1345                 return (ECONNRESET);                                    \
 1346         }                                                               \
 1347         tp = intotcpcb(inp);                                            \
 1348 } while(0)
 1349 
 1350 int
 1351 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
 1352 {
 1353         int     error, opt, optval;
 1354         u_int   ui;
 1355         struct  inpcb *inp;
 1356         struct  tcpcb *tp;
 1357         struct  tcp_info ti;
 1358         char buf[TCP_CA_NAME_MAX];
 1359         struct cc_algo *algo;
 1360 
 1361         error = 0;
 1362         inp = sotoinpcb(so);
 1363         KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
 1364         INP_WLOCK(inp);
 1365         if (sopt->sopt_level != IPPROTO_TCP) {
 1366 #ifdef INET6
 1367                 if (inp->inp_vflag & INP_IPV6PROTO) {
 1368                         INP_WUNLOCK(inp);
 1369                         error = ip6_ctloutput(so, sopt);
 1370                 }
 1371 #endif /* INET6 */
 1372 #if defined(INET6) && defined(INET)
 1373                 else
 1374 #endif
 1375 #ifdef INET
 1376                 {
 1377                         INP_WUNLOCK(inp);
 1378                         error = ip_ctloutput(so, sopt);
 1379                 }
 1380 #endif
 1381                 return (error);
 1382         }
 1383         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 1384                 INP_WUNLOCK(inp);
 1385                 return (ECONNRESET);
 1386         }
 1387 
 1388         switch (sopt->sopt_dir) {
 1389         case SOPT_SET:
 1390                 switch (sopt->sopt_name) {
 1391 #ifdef TCP_SIGNATURE
 1392                 case TCP_MD5SIG:
 1393                         INP_WUNLOCK(inp);
 1394                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1395                             sizeof optval);
 1396                         if (error)
 1397                                 return (error);
 1398 
 1399                         INP_WLOCK_RECHECK(inp);
 1400                         if (optval > 0)
 1401                                 tp->t_flags |= TF_SIGNATURE;
 1402                         else
 1403                                 tp->t_flags &= ~TF_SIGNATURE;
 1404                         goto unlock_and_done;
 1405 #endif /* TCP_SIGNATURE */
 1406 
 1407                 case TCP_NODELAY:
 1408                 case TCP_NOOPT:
 1409                         INP_WUNLOCK(inp);
 1410                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1411                             sizeof optval);
 1412                         if (error)
 1413                                 return (error);
 1414 
 1415                         INP_WLOCK_RECHECK(inp);
 1416                         switch (sopt->sopt_name) {
 1417                         case TCP_NODELAY:
 1418                                 opt = TF_NODELAY;
 1419                                 break;
 1420                         case TCP_NOOPT:
 1421                                 opt = TF_NOOPT;
 1422                                 break;
 1423                         default:
 1424                                 opt = 0; /* dead code to fool gcc */
 1425                                 break;
 1426                         }
 1427 
 1428                         if (optval)
 1429                                 tp->t_flags |= opt;
 1430                         else
 1431                                 tp->t_flags &= ~opt;
 1432 unlock_and_done:
 1433 #ifdef TCP_OFFLOAD
 1434                         if (tp->t_flags & TF_TOE) {
 1435                                 tcp_offload_ctloutput(tp, sopt->sopt_dir,
 1436                                     sopt->sopt_name);
 1437                         }
 1438 #endif
 1439                         INP_WUNLOCK(inp);
 1440                         break;
 1441 
 1442                 case TCP_NOPUSH:
 1443                         INP_WUNLOCK(inp);
 1444                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1445                             sizeof optval);
 1446                         if (error)
 1447                                 return (error);
 1448 
 1449                         INP_WLOCK_RECHECK(inp);
 1450                         if (optval)
 1451                                 tp->t_flags |= TF_NOPUSH;
 1452                         else if (tp->t_flags & TF_NOPUSH) {
 1453                                 tp->t_flags &= ~TF_NOPUSH;
 1454                                 if (TCPS_HAVEESTABLISHED(tp->t_state))
 1455                                         error = tcp_output(tp);
 1456                         }
 1457                         goto unlock_and_done;
 1458 
 1459                 case TCP_MAXSEG:
 1460                         INP_WUNLOCK(inp);
 1461                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1462                             sizeof optval);
 1463                         if (error)
 1464                                 return (error);
 1465 
 1466                         INP_WLOCK_RECHECK(inp);
 1467                         if (optval > 0 && optval <= tp->t_maxseg &&
 1468                             optval + 40 >= V_tcp_minmss)
 1469                                 tp->t_maxseg = optval;
 1470                         else
 1471                                 error = EINVAL;
 1472                         goto unlock_and_done;
 1473 
 1474                 case TCP_INFO:
 1475                         INP_WUNLOCK(inp);
 1476                         error = EINVAL;
 1477                         break;
 1478 
 1479                 case TCP_CONGESTION:
 1480                         INP_WUNLOCK(inp);
 1481                         bzero(buf, sizeof(buf));
 1482                         error = sooptcopyin(sopt, &buf, sizeof(buf), 1);
 1483                         if (error)
 1484                                 break;
 1485                         INP_WLOCK_RECHECK(inp);
 1486                         /*
 1487                          * Return EINVAL if we can't find the requested cc algo.
 1488                          */
 1489                         error = EINVAL;
 1490                         CC_LIST_RLOCK();
 1491                         STAILQ_FOREACH(algo, &cc_list, entries) {
 1492                                 if (strncmp(buf, algo->name, TCP_CA_NAME_MAX)
 1493                                     == 0) {
 1494                                         /* We've found the requested algo. */
 1495                                         error = 0;
 1496                                         /*
 1497                                          * We hold a write lock over the tcb
 1498                                          * so it's safe to do these things
 1499                                          * without ordering concerns.
 1500                                          */
 1501                                         if (CC_ALGO(tp)->cb_destroy != NULL)
 1502                                                 CC_ALGO(tp)->cb_destroy(tp->ccv);
 1503                                         CC_ALGO(tp) = algo;
 1504                                         /*
 1505                                          * If something goes pear shaped
 1506                                          * initialising the new algo,
 1507                                          * fall back to newreno (which
 1508                                          * does not require initialisation).
 1509                                          */
 1510                                         if (algo->cb_init != NULL)
 1511                                                 if (algo->cb_init(tp->ccv) > 0) {
 1512                                                         CC_ALGO(tp) = &newreno_cc_algo;
 1513                                                         /*
 1514                                                          * The only reason init
 1515                                                          * should fail is
 1516                                                          * because of malloc.
 1517                                                          */
 1518                                                         error = ENOMEM;
 1519                                                 }
 1520                                         break; /* Break the STAILQ_FOREACH. */
 1521                                 }
 1522                         }
 1523                         CC_LIST_RUNLOCK();
 1524                         goto unlock_and_done;
 1525 
 1526                 case TCP_KEEPIDLE:
 1527                 case TCP_KEEPINTVL:
 1528                 case TCP_KEEPINIT:
 1529                         INP_WUNLOCK(inp);
 1530                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 1531                         if (error)
 1532                                 return (error);
 1533 
 1534                         if (ui > (UINT_MAX / hz)) {
 1535                                 error = EINVAL;
 1536                                 break;
 1537                         }
 1538                         ui *= hz;
 1539 
 1540                         INP_WLOCK_RECHECK(inp);
 1541                         switch (sopt->sopt_name) {
 1542                         case TCP_KEEPIDLE:
 1543                                 tp->t_keepidle = ui;
 1544                                 /*
 1545                                  * XXX: better check current remaining
 1546                                  * timeout and "merge" it with new value.
 1547                                  */
 1548                                 if ((tp->t_state > TCPS_LISTEN) &&
 1549                                     (tp->t_state <= TCPS_CLOSING))
 1550                                         tcp_timer_activate(tp, TT_KEEP,
 1551                                             TP_KEEPIDLE(tp));
 1552                                 break;
 1553                         case TCP_KEEPINTVL:
 1554                                 tp->t_keepintvl = ui;
 1555                                 if ((tp->t_state == TCPS_FIN_WAIT_2) &&
 1556                                     (TP_MAXIDLE(tp) > 0))
 1557                                         tcp_timer_activate(tp, TT_2MSL,
 1558                                             TP_MAXIDLE(tp));
 1559                                 break;
 1560                         case TCP_KEEPINIT:
 1561                                 tp->t_keepinit = ui;
 1562                                 if (tp->t_state == TCPS_SYN_RECEIVED ||
 1563                                     tp->t_state == TCPS_SYN_SENT)
 1564                                         tcp_timer_activate(tp, TT_KEEP,
 1565                                             TP_KEEPINIT(tp));
 1566                                 break;
 1567                         }
 1568                         goto unlock_and_done;
 1569 
 1570                 case TCP_KEEPCNT:
 1571                         INP_WUNLOCK(inp);
 1572                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 1573                         if (error)
 1574                                 return (error);
 1575 
 1576                         INP_WLOCK_RECHECK(inp);
 1577                         tp->t_keepcnt = ui;
 1578                         if ((tp->t_state == TCPS_FIN_WAIT_2) &&
 1579                             (TP_MAXIDLE(tp) > 0))
 1580                                 tcp_timer_activate(tp, TT_2MSL,
 1581                                     TP_MAXIDLE(tp));
 1582                         goto unlock_and_done;
 1583 
 1584 #ifdef TCP_RFC7413
 1585                 case TCP_FASTOPEN:
 1586                         INP_WUNLOCK(inp);
 1587                         if (!V_tcp_fastopen_enabled)
 1588                                 return (EPERM);
 1589 
 1590                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1591                             sizeof optval);
 1592                         if (error)
 1593                                 return (error);
 1594 
 1595                         INP_WLOCK_RECHECK(inp);
 1596                         if (optval) {
 1597                                 tp->t_flags |= TF_FASTOPEN;
 1598                                 if ((tp->t_state == TCPS_LISTEN) &&
 1599                                     (tp->t_tfo_pending == NULL))
 1600                                         tp->t_tfo_pending =
 1601                                             tcp_fastopen_alloc_counter();
 1602                         } else
 1603                                 tp->t_flags &= ~TF_FASTOPEN;
 1604                         goto unlock_and_done;
 1605 #endif
 1606 
 1607                 default:
 1608                         INP_WUNLOCK(inp);
 1609                         error = ENOPROTOOPT;
 1610                         break;
 1611                 }
 1612                 break;
 1613 
 1614         case SOPT_GET:
 1615                 tp = intotcpcb(inp);
 1616                 switch (sopt->sopt_name) {
 1617 #ifdef TCP_SIGNATURE
 1618                 case TCP_MD5SIG:
 1619                         optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
 1620                         INP_WUNLOCK(inp);
 1621                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1622                         break;
 1623 #endif
 1624 
 1625                 case TCP_NODELAY:
 1626                         optval = tp->t_flags & TF_NODELAY;
 1627                         INP_WUNLOCK(inp);
 1628                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1629                         break;
 1630                 case TCP_MAXSEG:
 1631                         optval = tp->t_maxseg;
 1632                         INP_WUNLOCK(inp);
 1633                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1634                         break;
 1635                 case TCP_NOOPT:
 1636                         optval = tp->t_flags & TF_NOOPT;
 1637                         INP_WUNLOCK(inp);
 1638                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1639                         break;
 1640                 case TCP_NOPUSH:
 1641                         optval = tp->t_flags & TF_NOPUSH;
 1642                         INP_WUNLOCK(inp);
 1643                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1644                         break;
 1645                 case TCP_INFO:
 1646                         tcp_fill_info(tp, &ti);
 1647                         INP_WUNLOCK(inp);
 1648                         error = sooptcopyout(sopt, &ti, sizeof ti);
 1649                         break;
 1650                 case TCP_CONGESTION:
 1651                         bzero(buf, sizeof(buf));
 1652                         strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
 1653                         INP_WUNLOCK(inp);
 1654                         error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX);
 1655                         break;
 1656                 case TCP_KEEPIDLE:
 1657                 case TCP_KEEPINTVL:
 1658                 case TCP_KEEPINIT:
 1659                 case TCP_KEEPCNT:
 1660                         switch (sopt->sopt_name) {
 1661                         case TCP_KEEPIDLE:
 1662                                 ui = tp->t_keepidle / hz;
 1663                                 break;
 1664                         case TCP_KEEPINTVL:
 1665                                 ui = tp->t_keepintvl / hz;
 1666                                 break;
 1667                         case TCP_KEEPINIT:
 1668                                 ui = tp->t_keepinit / hz;
 1669                                 break;
 1670                         case TCP_KEEPCNT:
 1671                                 ui = tp->t_keepcnt;
 1672                                 break;
 1673                         }
 1674                         INP_WUNLOCK(inp);
 1675                         error = sooptcopyout(sopt, &ui, sizeof(ui));
 1676                         break;
 1677 #ifdef TCP_RFC7413
 1678                 case TCP_FASTOPEN:
 1679                         optval = tp->t_flags & TF_FASTOPEN;
 1680                         INP_WUNLOCK(inp);
 1681                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1682                         break;
 1683 #endif
 1684                 default:
 1685                         INP_WUNLOCK(inp);
 1686                         error = ENOPROTOOPT;
 1687                         break;
 1688                 }
 1689                 break;
 1690         }
 1691         return (error);
 1692 }
 1693 #undef INP_WLOCK_RECHECK
 1694 
 1695 /*
 1696  * Attach TCP protocol to socket, allocating
 1697  * internet protocol control block, tcp control block,
 1698  * bufer space, and entering LISTEN state if to accept connections.
 1699  */
 1700 static int
 1701 tcp_attach(struct socket *so)
 1702 {
 1703         struct tcpcb *tp;
 1704         struct inpcb *inp;
 1705         int error;
 1706 
 1707         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 1708                 error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
 1709                 if (error)
 1710                         return (error);
 1711         }
 1712         so->so_rcv.sb_flags |= SB_AUTOSIZE;
 1713         so->so_snd.sb_flags |= SB_AUTOSIZE;
 1714         INP_INFO_WLOCK(&V_tcbinfo);
 1715         error = in_pcballoc(so, &V_tcbinfo);
 1716         if (error) {
 1717                 INP_INFO_WUNLOCK(&V_tcbinfo);
 1718                 return (error);
 1719         }
 1720         inp = sotoinpcb(so);
 1721 #ifdef INET6
 1722         if (inp->inp_vflag & INP_IPV6PROTO) {
 1723                 inp->inp_vflag |= INP_IPV6;
 1724                 inp->in6p_hops = -1;    /* use kernel default */
 1725         }
 1726         else
 1727 #endif
 1728         inp->inp_vflag |= INP_IPV4;
 1729         tp = tcp_newtcpcb(inp);
 1730         if (tp == NULL) {
 1731                 in_pcbdetach(inp);
 1732                 in_pcbfree(inp);
 1733                 INP_INFO_WUNLOCK(&V_tcbinfo);
 1734                 return (ENOBUFS);
 1735         }
 1736         tp->t_state = TCPS_CLOSED;
 1737         INP_WUNLOCK(inp);
 1738         INP_INFO_WUNLOCK(&V_tcbinfo);
 1739         return (0);
 1740 }
 1741 
 1742 /*
 1743  * Initiate (or continue) disconnect.
 1744  * If embryonic state, just send reset (once).
 1745  * If in ``let data drain'' option and linger null, just drop.
 1746  * Otherwise (hard), mark socket disconnecting and drop
 1747  * current input data; switch states based on user close, and
 1748  * send segment to peer (with FIN).
 1749  */
 1750 static void
 1751 tcp_disconnect(struct tcpcb *tp)
 1752 {
 1753         struct inpcb *inp = tp->t_inpcb;
 1754         struct socket *so = inp->inp_socket;
 1755 
 1756         INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
 1757         INP_WLOCK_ASSERT(inp);
 1758 
 1759         /*
 1760          * Neither tcp_close() nor tcp_drop() should return NULL, as the
 1761          * socket is still open.
 1762          */
 1763         if (tp->t_state < TCPS_ESTABLISHED) {
 1764                 tp = tcp_close(tp);
 1765                 KASSERT(tp != NULL,
 1766                     ("tcp_disconnect: tcp_close() returned NULL"));
 1767         } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
 1768                 tp = tcp_drop(tp, 0);
 1769                 KASSERT(tp != NULL,
 1770                     ("tcp_disconnect: tcp_drop() returned NULL"));
 1771         } else {
 1772                 soisdisconnecting(so);
 1773                 sbflush(&so->so_rcv);
 1774                 tcp_usrclosed(tp);
 1775                 if (!(inp->inp_flags & INP_DROPPED))
 1776                         tcp_output(tp);
 1777         }
 1778 }
 1779 
 1780 /*
 1781  * User issued close, and wish to trail through shutdown states:
 1782  * if never received SYN, just forget it.  If got a SYN from peer,
 1783  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
 1784  * If already got a FIN from peer, then almost done; go to LAST_ACK
 1785  * state.  In all other cases, have already sent FIN to peer (e.g.
 1786  * after PRU_SHUTDOWN), and just have to play tedious game waiting
 1787  * for peer to send FIN or not respond to keep-alives, etc.
 1788  * We can let the user exit from the close as soon as the FIN is acked.
 1789  */
 1790 static void
 1791 tcp_usrclosed(struct tcpcb *tp)
 1792 {
 1793 
 1794         INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
 1795         INP_WLOCK_ASSERT(tp->t_inpcb);
 1796 
 1797         switch (tp->t_state) {
 1798         case TCPS_LISTEN:
 1799 #ifdef TCP_OFFLOAD
 1800                 tcp_offload_listen_stop(tp);
 1801 #endif
 1802                 tcp_state_change(tp, TCPS_CLOSED);
 1803                 /* FALLTHROUGH */
 1804         case TCPS_CLOSED:
 1805                 tp = tcp_close(tp);
 1806                 /*
 1807                  * tcp_close() should never return NULL here as the socket is
 1808                  * still open.
 1809                  */
 1810                 KASSERT(tp != NULL,
 1811                     ("tcp_usrclosed: tcp_close() returned NULL"));
 1812                 break;
 1813 
 1814         case TCPS_SYN_SENT:
 1815         case TCPS_SYN_RECEIVED:
 1816                 tp->t_flags |= TF_NEEDFIN;
 1817                 break;
 1818 
 1819         case TCPS_ESTABLISHED:
 1820                 tcp_state_change(tp, TCPS_FIN_WAIT_1);
 1821                 break;
 1822 
 1823         case TCPS_CLOSE_WAIT:
 1824                 tcp_state_change(tp, TCPS_LAST_ACK);
 1825                 break;
 1826         }
 1827         if (tp->t_state >= TCPS_FIN_WAIT_2) {
 1828                 soisdisconnected(tp->t_inpcb->inp_socket);
 1829                 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
 1830                 if (tp->t_state == TCPS_FIN_WAIT_2) {
 1831                         int timeout;
 1832 
 1833                         timeout = (tcp_fast_finwait2_recycle) ? 
 1834                             tcp_finwait2_timeout : TP_MAXIDLE(tp);
 1835                         tcp_timer_activate(tp, TT_2MSL, timeout);
 1836                 }
 1837         }
 1838 }
 1839 
 1840 #ifdef DDB
 1841 static void
 1842 db_print_indent(int indent)
 1843 {
 1844         int i;
 1845 
 1846         for (i = 0; i < indent; i++)
 1847                 db_printf(" ");
 1848 }
 1849 
 1850 static void
 1851 db_print_tstate(int t_state)
 1852 {
 1853 
 1854         switch (t_state) {
 1855         case TCPS_CLOSED:
 1856                 db_printf("TCPS_CLOSED");
 1857                 return;
 1858 
 1859         case TCPS_LISTEN:
 1860                 db_printf("TCPS_LISTEN");
 1861                 return;
 1862 
 1863         case TCPS_SYN_SENT:
 1864                 db_printf("TCPS_SYN_SENT");
 1865                 return;
 1866 
 1867         case TCPS_SYN_RECEIVED:
 1868                 db_printf("TCPS_SYN_RECEIVED");
 1869                 return;
 1870 
 1871         case TCPS_ESTABLISHED:
 1872                 db_printf("TCPS_ESTABLISHED");
 1873                 return;
 1874 
 1875         case TCPS_CLOSE_WAIT:
 1876                 db_printf("TCPS_CLOSE_WAIT");
 1877                 return;
 1878 
 1879         case TCPS_FIN_WAIT_1:
 1880                 db_printf("TCPS_FIN_WAIT_1");
 1881                 return;
 1882 
 1883         case TCPS_CLOSING:
 1884                 db_printf("TCPS_CLOSING");
 1885                 return;
 1886 
 1887         case TCPS_LAST_ACK:
 1888                 db_printf("TCPS_LAST_ACK");
 1889                 return;
 1890 
 1891         case TCPS_FIN_WAIT_2:
 1892                 db_printf("TCPS_FIN_WAIT_2");
 1893                 return;
 1894 
 1895         case TCPS_TIME_WAIT:
 1896                 db_printf("TCPS_TIME_WAIT");
 1897                 return;
 1898 
 1899         default:
 1900                 db_printf("unknown");
 1901                 return;
 1902         }
 1903 }
 1904 
 1905 static void
 1906 db_print_tflags(u_int t_flags)
 1907 {
 1908         int comma;
 1909 
 1910         comma = 0;
 1911         if (t_flags & TF_ACKNOW) {
 1912                 db_printf("%sTF_ACKNOW", comma ? ", " : "");
 1913                 comma = 1;
 1914         }
 1915         if (t_flags & TF_DELACK) {
 1916                 db_printf("%sTF_DELACK", comma ? ", " : "");
 1917                 comma = 1;
 1918         }
 1919         if (t_flags & TF_NODELAY) {
 1920                 db_printf("%sTF_NODELAY", comma ? ", " : "");
 1921                 comma = 1;
 1922         }
 1923         if (t_flags & TF_NOOPT) {
 1924                 db_printf("%sTF_NOOPT", comma ? ", " : "");
 1925                 comma = 1;
 1926         }
 1927         if (t_flags & TF_SENTFIN) {
 1928                 db_printf("%sTF_SENTFIN", comma ? ", " : "");
 1929                 comma = 1;
 1930         }
 1931         if (t_flags & TF_REQ_SCALE) {
 1932                 db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
 1933                 comma = 1;
 1934         }
 1935         if (t_flags & TF_RCVD_SCALE) {
 1936                 db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
 1937                 comma = 1;
 1938         }
 1939         if (t_flags & TF_REQ_TSTMP) {
 1940                 db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
 1941                 comma = 1;
 1942         }
 1943         if (t_flags & TF_RCVD_TSTMP) {
 1944                 db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
 1945                 comma = 1;
 1946         }
 1947         if (t_flags & TF_SACK_PERMIT) {
 1948                 db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
 1949                 comma = 1;
 1950         }
 1951         if (t_flags & TF_NEEDSYN) {
 1952                 db_printf("%sTF_NEEDSYN", comma ? ", " : "");
 1953                 comma = 1;
 1954         }
 1955         if (t_flags & TF_NEEDFIN) {
 1956                 db_printf("%sTF_NEEDFIN", comma ? ", " : "");
 1957                 comma = 1;
 1958         }
 1959         if (t_flags & TF_NOPUSH) {
 1960                 db_printf("%sTF_NOPUSH", comma ? ", " : "");
 1961                 comma = 1;
 1962         }
 1963         if (t_flags & TF_MORETOCOME) {
 1964                 db_printf("%sTF_MORETOCOME", comma ? ", " : "");
 1965                 comma = 1;
 1966         }
 1967         if (t_flags & TF_LQ_OVERFLOW) {
 1968                 db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : "");
 1969                 comma = 1;
 1970         }
 1971         if (t_flags & TF_LASTIDLE) {
 1972                 db_printf("%sTF_LASTIDLE", comma ? ", " : "");
 1973                 comma = 1;
 1974         }
 1975         if (t_flags & TF_RXWIN0SENT) {
 1976                 db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
 1977                 comma = 1;
 1978         }
 1979         if (t_flags & TF_FASTRECOVERY) {
 1980                 db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
 1981                 comma = 1;
 1982         }
 1983         if (t_flags & TF_CONGRECOVERY) {
 1984                 db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
 1985                 comma = 1;
 1986         }
 1987         if (t_flags & TF_WASFRECOVERY) {
 1988                 db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
 1989                 comma = 1;
 1990         }
 1991         if (t_flags & TF_SIGNATURE) {
 1992                 db_printf("%sTF_SIGNATURE", comma ? ", " : "");
 1993                 comma = 1;
 1994         }
 1995         if (t_flags & TF_FORCEDATA) {
 1996                 db_printf("%sTF_FORCEDATA", comma ? ", " : "");
 1997                 comma = 1;
 1998         }
 1999         if (t_flags & TF_TSO) {
 2000                 db_printf("%sTF_TSO", comma ? ", " : "");
 2001                 comma = 1;
 2002         }
 2003         if (t_flags & TF_ECN_PERMIT) {
 2004                 db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
 2005                 comma = 1;
 2006         }
 2007         if (t_flags & TF_FASTOPEN) {
 2008                 db_printf("%sTF_FASTOPEN", comma ? ", " : "");
 2009                 comma = 1;
 2010         }
 2011 }
 2012 
 2013 static void
 2014 db_print_toobflags(char t_oobflags)
 2015 {
 2016         int comma;
 2017 
 2018         comma = 0;
 2019         if (t_oobflags & TCPOOB_HAVEDATA) {
 2020                 db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
 2021                 comma = 1;
 2022         }
 2023         if (t_oobflags & TCPOOB_HADDATA) {
 2024                 db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
 2025                 comma = 1;
 2026         }
 2027 }
 2028 
 2029 static void
 2030 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
 2031 {
 2032 
 2033         db_print_indent(indent);
 2034         db_printf("%s at %p\n", name, tp);
 2035 
 2036         indent += 2;
 2037 
 2038         db_print_indent(indent);
 2039         db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
 2040            LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
 2041 
 2042         db_print_indent(indent);
 2043         db_printf("tt_rexmt: %p   tt_persist: %p   tt_keep: %p\n",
 2044             &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep);
 2045 
 2046         db_print_indent(indent);
 2047         db_printf("tt_2msl: %p   tt_delack: %p   t_inpcb: %p\n", &tp->t_timers->tt_2msl,
 2048             &tp->t_timers->tt_delack, tp->t_inpcb);
 2049 
 2050         db_print_indent(indent);
 2051         db_printf("t_state: %d (", tp->t_state);
 2052         db_print_tstate(tp->t_state);
 2053         db_printf(")\n");
 2054 
 2055         db_print_indent(indent);
 2056         db_printf("t_flags: 0x%x (", tp->t_flags);
 2057         db_print_tflags(tp->t_flags);
 2058         db_printf(")\n");
 2059 
 2060         db_print_indent(indent);
 2061         db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: x0%08x\n",
 2062             tp->snd_una, tp->snd_max, tp->snd_nxt);
 2063 
 2064         db_print_indent(indent);
 2065         db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
 2066            tp->snd_up, tp->snd_wl1, tp->snd_wl2);
 2067 
 2068         db_print_indent(indent);
 2069         db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
 2070             tp->iss, tp->irs, tp->rcv_nxt);
 2071 
 2072         db_print_indent(indent);
 2073         db_printf("rcv_adv: 0x%08x   rcv_wnd: %lu   rcv_up: 0x%08x\n",
 2074             tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
 2075 
 2076         db_print_indent(indent);
 2077         db_printf("snd_wnd: %lu   snd_cwnd: %lu\n",
 2078            tp->snd_wnd, tp->snd_cwnd);
 2079 
 2080         db_print_indent(indent);
 2081         db_printf("snd_ssthresh: %lu   snd_recover: "
 2082             "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
 2083 
 2084         db_print_indent(indent);
 2085         db_printf("t_maxopd: %u   t_rcvtime: %u   t_startime: %u\n",
 2086             tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
 2087 
 2088         db_print_indent(indent);
 2089         db_printf("t_rttime: %u   t_rtsq: 0x%08x\n",
 2090             tp->t_rtttime, tp->t_rtseq);
 2091 
 2092         db_print_indent(indent);
 2093         db_printf("t_rxtcur: %d   t_maxseg: %u   t_srtt: %d\n",
 2094             tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
 2095 
 2096         db_print_indent(indent);
 2097         db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u   "
 2098             "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
 2099             tp->t_rttbest);
 2100 
 2101         db_print_indent(indent);
 2102         db_printf("t_rttupdated: %lu   max_sndwnd: %lu   t_softerror: %d\n",
 2103             tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
 2104 
 2105         db_print_indent(indent);
 2106         db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
 2107         db_print_toobflags(tp->t_oobflags);
 2108         db_printf(")   t_iobc: 0x%02x\n", tp->t_iobc);
 2109 
 2110         db_print_indent(indent);
 2111         db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
 2112             tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
 2113 
 2114         db_print_indent(indent);
 2115         db_printf("ts_recent: %u   ts_recent_age: %u\n",
 2116             tp->ts_recent, tp->ts_recent_age);
 2117 
 2118         db_print_indent(indent);
 2119         db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
 2120             "%lu\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
 2121 
 2122         db_print_indent(indent);
 2123         db_printf("snd_ssthresh_prev: %lu   snd_recover_prev: 0x%08x   "
 2124             "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
 2125             tp->snd_recover_prev, tp->t_badrxtwin);
 2126 
 2127         db_print_indent(indent);
 2128         db_printf("snd_numholes: %d  snd_holes first: %p\n",
 2129             tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
 2130 
 2131         db_print_indent(indent);
 2132         db_printf("snd_fack: 0x%08x   rcv_numsacks: %d   sack_newdata: "
 2133             "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata);
 2134 
 2135         /* Skip sackblks, sackhint. */
 2136 
 2137         db_print_indent(indent);
 2138         db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
 2139             tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
 2140 }
 2141 
 2142 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
 2143 {
 2144         struct tcpcb *tp;
 2145 
 2146         if (!have_addr) {
 2147                 db_printf("usage: show tcpcb <addr>\n");
 2148                 return;
 2149         }
 2150         tp = (struct tcpcb *)addr;
 2151 
 2152         db_print_tcpcb(tp, "tcpcb", 0);
 2153 }
 2154 #endif

Cache object: c84a2b4eb67eb931aef26a29df78ba10


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.