The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1988, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      From: @(#)tcp_usrreq.c  8.2 (Berkeley) 1/3/94
   30  * $FreeBSD: releng/6.3/sys/netinet/tcp_usrreq.c 165892 2007-01-08 18:10:12Z sam $
   31  */
   32 
   33 #include "opt_inet.h"
   34 #include "opt_inet6.h"
   35 #include "opt_tcpdebug.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/malloc.h>
   40 #include <sys/kernel.h>
   41 #include <sys/sysctl.h>
   42 #include <sys/mbuf.h>
   43 #ifdef INET6
   44 #include <sys/domain.h>
   45 #endif /* INET6 */
   46 #include <sys/socket.h>
   47 #include <sys/socketvar.h>
   48 #include <sys/protosw.h>
   49 #include <sys/proc.h>
   50 #include <sys/jail.h>
   51 
   52 #include <net/if.h>
   53 #include <net/route.h>
   54 
   55 #include <netinet/in.h>
   56 #include <netinet/in_systm.h>
   57 #ifdef INET6
   58 #include <netinet/ip6.h>
   59 #endif
   60 #include <netinet/in_pcb.h>
   61 #ifdef INET6
   62 #include <netinet6/in6_pcb.h>
   63 #endif
   64 #include <netinet/in_var.h>
   65 #include <netinet/ip_var.h>
   66 #ifdef INET6
   67 #include <netinet6/ip6_var.h>
   68 #include <netinet6/scope6_var.h>
   69 #endif
   70 #include <netinet/tcp.h>
   71 #include <netinet/tcp_fsm.h>
   72 #include <netinet/tcp_seq.h>
   73 #include <netinet/tcp_timer.h>
   74 #include <netinet/tcp_var.h>
   75 #include <netinet/tcpip.h>
   76 #ifdef TCPDEBUG
   77 #include <netinet/tcp_debug.h>
   78 #endif
   79 
   80 /*
   81  * TCP protocol interface to socket abstraction.
   82  */
   83 extern  char *tcpstates[];      /* XXX ??? */
   84 
   85 static int      tcp_attach(struct socket *);
   86 static int      tcp_connect(struct tcpcb *, struct sockaddr *,
   87                     struct thread *td);
   88 #ifdef INET6
   89 static int      tcp6_connect(struct tcpcb *, struct sockaddr *,
   90                     struct thread *td);
   91 #endif /* INET6 */
   92 static struct tcpcb *
   93                 tcp_disconnect(struct tcpcb *);
   94 static struct tcpcb *
   95                 tcp_usrclosed(struct tcpcb *);
   96 static void     tcp_fill_info(struct tcpcb *, struct tcp_info *);
   97 
   98 #ifdef TCPDEBUG
   99 #define TCPDEBUG0       int ostate = 0
  100 #define TCPDEBUG1()     ostate = tp ? tp->t_state : 0
  101 #define TCPDEBUG2(req)  if (tp && (so->so_options & SO_DEBUG)) \
  102                                 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
  103 #else
  104 #define TCPDEBUG0
  105 #define TCPDEBUG1()
  106 #define TCPDEBUG2(req)
  107 #endif
  108 
  109 /*
  110  * TCP attaches to socket via pru_attach(), reserving space,
  111  * and an internet control block.
  112  */
  113 static int
  114 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
  115 {
  116         int error;
  117         struct inpcb *inp;
  118         struct tcpcb *tp = 0;
  119         TCPDEBUG0;
  120 
  121         INP_INFO_WLOCK(&tcbinfo);
  122         TCPDEBUG1();
  123         inp = sotoinpcb(so);
  124         if (inp) {
  125                 error = EISCONN;
  126                 goto out;
  127         }
  128 
  129         error = tcp_attach(so);
  130         if (error)
  131                 goto out;
  132 
  133         if ((so->so_options & SO_LINGER) && so->so_linger == 0)
  134                 so->so_linger = TCP_LINGERTIME;
  135 
  136         inp = sotoinpcb(so);
  137         tp = intotcpcb(inp);
  138 out:
  139         TCPDEBUG2(PRU_ATTACH);
  140         INP_INFO_WUNLOCK(&tcbinfo);
  141         return error;
  142 }
  143 
  144 /*
  145  * pru_detach() detaches the TCP protocol from the socket.
  146  * If the protocol state is non-embryonic, then can't
  147  * do this directly: have to initiate a pru_disconnect(),
  148  * which may finish later; embryonic TCB's can just
  149  * be discarded here.
  150  */
  151 static int
  152 tcp_usr_detach(struct socket *so)
  153 {
  154         int error = 0;
  155         struct inpcb *inp;
  156         struct tcpcb *tp;
  157         TCPDEBUG0;
  158 
  159         INP_INFO_WLOCK(&tcbinfo);
  160         inp = sotoinpcb(so);
  161         if (inp == NULL) {
  162                 INP_INFO_WUNLOCK(&tcbinfo);
  163                 return error;
  164         }
  165         INP_LOCK(inp);
  166         tp = intotcpcb(inp);
  167         TCPDEBUG1();
  168         tp = tcp_disconnect(tp);
  169 
  170         TCPDEBUG2(PRU_DETACH);
  171         if (tp)
  172                 INP_UNLOCK(inp);
  173         INP_INFO_WUNLOCK(&tcbinfo);
  174         return error;
  175 }
  176 
  177 #define INI_NOLOCK      0
  178 #define INI_READ        1
  179 #define INI_WRITE       2
  180 
  181 #define COMMON_START(_errno)                                    \
  182         TCPDEBUG0;                                              \
  183         do {                                                    \
  184                 if (inirw == INI_READ)                          \
  185                         INP_INFO_RLOCK(&tcbinfo);               \
  186                 else if (inirw == INI_WRITE)                    \
  187                         INP_INFO_WLOCK(&tcbinfo);               \
  188                 inp = sotoinpcb(so);                            \
  189                 if (inp == 0) {                                 \
  190                         if (inirw == INI_READ)                  \
  191                                 INP_INFO_RUNLOCK(&tcbinfo);     \
  192                         else if (inirw == INI_WRITE)            \
  193                                 INP_INFO_WUNLOCK(&tcbinfo);     \
  194                         return _errno;                          \
  195                 }                                               \
  196                 INP_LOCK(inp);                                  \
  197                 if (inirw == INI_READ)                          \
  198                         INP_INFO_RUNLOCK(&tcbinfo);             \
  199                 tp = intotcpcb(inp);                            \
  200                 TCPDEBUG1();                                    \
  201 } while(0)
  202 
  203 #define COMMON_END(req)                                         \
  204 out:    TCPDEBUG2(req);                                         \
  205         do {                                                    \
  206                 if (tp)                                         \
  207                         INP_UNLOCK(inp);                        \
  208                 if (inirw == INI_WRITE)                         \
  209                         INP_INFO_WUNLOCK(&tcbinfo);             \
  210                 return error;                                   \
  211                 goto out;                                       \
  212 } while(0)
  213 
  214 /*
  215  * Give the socket an address.
  216  */
  217 static int
  218 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  219 {
  220         int error = 0;
  221         struct inpcb *inp;
  222         struct tcpcb *tp;
  223         struct sockaddr_in *sinp;
  224         const int inirw = INI_WRITE;
  225 
  226         sinp = (struct sockaddr_in *)nam;
  227         if (nam->sa_len != sizeof (*sinp))
  228                 return (EINVAL);
  229         /*
  230          * Must check for multicast addresses and disallow binding
  231          * to them.
  232          */
  233         if (sinp->sin_family == AF_INET &&
  234             IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
  235                 return (EAFNOSUPPORT);
  236 
  237         COMMON_START(EINVAL);
  238         error = in_pcbbind(inp, nam, td->td_ucred);
  239         if (error)
  240                 goto out;
  241         COMMON_END(PRU_BIND);
  242 }
  243 
  244 #ifdef INET6
  245 static int
  246 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  247 {
  248         int error = 0;
  249         struct inpcb *inp;
  250         struct tcpcb *tp;
  251         struct sockaddr_in6 *sin6p;
  252         const int inirw = INI_WRITE;
  253 
  254         sin6p = (struct sockaddr_in6 *)nam;
  255         if (nam->sa_len != sizeof (*sin6p))
  256                 return (EINVAL);
  257         /*
  258          * Must check for multicast addresses and disallow binding
  259          * to them.
  260          */
  261         if (sin6p->sin6_family == AF_INET6 &&
  262             IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
  263                 return (EAFNOSUPPORT);
  264 
  265         COMMON_START(EINVAL);
  266         inp->inp_vflag &= ~INP_IPV4;
  267         inp->inp_vflag |= INP_IPV6;
  268         if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
  269                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
  270                         inp->inp_vflag |= INP_IPV4;
  271                 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
  272                         struct sockaddr_in sin;
  273 
  274                         in6_sin6_2_sin(&sin, sin6p);
  275                         inp->inp_vflag |= INP_IPV4;
  276                         inp->inp_vflag &= ~INP_IPV6;
  277                         error = in_pcbbind(inp, (struct sockaddr *)&sin,
  278                             td->td_ucred);
  279                         goto out;
  280                 }
  281         }
  282         error = in6_pcbbind(inp, nam, td->td_ucred);
  283         if (error)
  284                 goto out;
  285         COMMON_END(PRU_BIND);
  286 }
  287 #endif /* INET6 */
  288 
  289 /*
  290  * Prepare to accept connections.
  291  */
  292 static int
  293 tcp_usr_listen(struct socket *so, struct thread *td)
  294 {
  295         int error = 0;
  296         struct inpcb *inp;
  297         struct tcpcb *tp;
  298         const int inirw = INI_WRITE;
  299 
  300         COMMON_START(EINVAL);
  301         SOCK_LOCK(so);
  302         error = solisten_proto_check(so);
  303         if (error == 0 && inp->inp_lport == 0)
  304                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
  305         if (error == 0) {
  306                 tp->t_state = TCPS_LISTEN;
  307                 solisten_proto(so);
  308         }
  309         SOCK_UNLOCK(so);
  310         COMMON_END(PRU_LISTEN);
  311 }
  312 
  313 #ifdef INET6
  314 static int
  315 tcp6_usr_listen(struct socket *so, struct thread *td)
  316 {
  317         int error = 0;
  318         struct inpcb *inp;
  319         struct tcpcb *tp;
  320         const int inirw = INI_WRITE;
  321 
  322         COMMON_START(EINVAL);
  323         SOCK_LOCK(so);
  324         error = solisten_proto_check(so);
  325         if (error == 0 && inp->inp_lport == 0) {
  326                 inp->inp_vflag &= ~INP_IPV4;
  327                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
  328                         inp->inp_vflag |= INP_IPV4;
  329                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
  330         }
  331         if (error == 0) {
  332                 tp->t_state = TCPS_LISTEN;
  333                 solisten_proto(so);
  334         }
  335         SOCK_UNLOCK(so);
  336         COMMON_END(PRU_LISTEN);
  337 }
  338 #endif /* INET6 */
  339 
  340 /*
  341  * Initiate connection to peer.
  342  * Create a template for use in transmissions on this connection.
  343  * Enter SYN_SENT state, and mark socket as connecting.
  344  * Start keep-alive timer, and seed output sequence space.
  345  * Send initial segment on connection.
  346  */
  347 static int
  348 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  349 {
  350         int error = 0;
  351         struct inpcb *inp;
  352         struct tcpcb *tp;
  353         struct sockaddr_in *sinp;
  354         const int inirw = INI_WRITE;
  355 
  356         sinp = (struct sockaddr_in *)nam;
  357         if (nam->sa_len != sizeof (*sinp))
  358                 return (EINVAL);
  359         /*
  360          * Must disallow TCP ``connections'' to multicast addresses.
  361          */
  362         if (sinp->sin_family == AF_INET
  363             && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
  364                 return (EAFNOSUPPORT);
  365         if (jailed(td->td_ucred))
  366                 prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr);
  367 
  368         COMMON_START(EINVAL);
  369         if ((error = tcp_connect(tp, nam, td)) != 0)
  370                 goto out;
  371         error = tcp_output(tp);
  372         COMMON_END(PRU_CONNECT);
  373 }
  374 
  375 #ifdef INET6
  376 static int
  377 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  378 {
  379         int error = 0;
  380         struct inpcb *inp;
  381         struct tcpcb *tp;
  382         struct sockaddr_in6 *sin6p;
  383         const int inirw = INI_WRITE;
  384 
  385         sin6p = (struct sockaddr_in6 *)nam;
  386         if (nam->sa_len != sizeof (*sin6p))
  387                 return (EINVAL);
  388         /*
  389          * Must disallow TCP ``connections'' to multicast addresses.
  390          */
  391         if (sin6p->sin6_family == AF_INET6
  392             && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
  393                 return (EAFNOSUPPORT);
  394 
  395         COMMON_START(EINVAL);
  396         if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
  397                 struct sockaddr_in sin;
  398 
  399                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
  400                         error = EINVAL;
  401                         goto out;
  402                 }
  403 
  404                 in6_sin6_2_sin(&sin, sin6p);
  405                 inp->inp_vflag |= INP_IPV4;
  406                 inp->inp_vflag &= ~INP_IPV6;
  407                 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
  408                         goto out;
  409                 error = tcp_output(tp);
  410                 goto out;
  411         }
  412         inp->inp_vflag &= ~INP_IPV4;
  413         inp->inp_vflag |= INP_IPV6;
  414         inp->inp_inc.inc_isipv6 = 1;
  415         if ((error = tcp6_connect(tp, nam, td)) != 0)
  416                 goto out;
  417         error = tcp_output(tp);
  418         COMMON_END(PRU_CONNECT);
  419 }
  420 #endif /* INET6 */
  421 
  422 /*
  423  * Initiate disconnect from peer.
  424  * If connection never passed embryonic stage, just drop;
  425  * else if don't need to let data drain, then can just drop anyways,
  426  * else have to begin TCP shutdown process: mark socket disconnecting,
  427  * drain unread data, state switch to reflect user close, and
  428  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
  429  * when peer sends FIN and acks ours.
  430  *
  431  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
  432  */
  433 static int
  434 tcp_usr_disconnect(struct socket *so)
  435 {
  436         int error = 0;
  437         struct inpcb *inp;
  438         struct tcpcb *tp;
  439         const int inirw = INI_WRITE;
  440 
  441         COMMON_START(ECONNRESET);
  442         tp = tcp_disconnect(tp);
  443         COMMON_END(PRU_DISCONNECT);
  444 }
  445 
  446 /*
  447  * Accept a connection.  Essentially all the work is
  448  * done at higher levels; just return the address
  449  * of the peer, storing through addr.
  450  */
  451 static int
  452 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
  453 {
  454         int error = 0;
  455         struct inpcb *inp = NULL;
  456         struct tcpcb *tp = NULL;
  457         struct in_addr addr;
  458         in_port_t port = 0;
  459         TCPDEBUG0;
  460 
  461         if (so->so_state & SS_ISDISCONNECTED) {
  462                 error = ECONNABORTED;
  463                 goto out;
  464         }
  465 
  466         INP_INFO_RLOCK(&tcbinfo);
  467         inp = sotoinpcb(so);
  468         if (!inp) {
  469                 INP_INFO_RUNLOCK(&tcbinfo);
  470                 return (ECONNABORTED);
  471         }
  472         INP_LOCK(inp);
  473         INP_INFO_RUNLOCK(&tcbinfo);
  474         tp = intotcpcb(inp);
  475         TCPDEBUG1();
  476 
  477         /*
  478          * We inline in_setpeeraddr and COMMON_END here, so that we can
  479          * copy the data of interest and defer the malloc until after we
  480          * release the lock.
  481          */
  482         port = inp->inp_fport;
  483         addr = inp->inp_faddr;
  484 
  485 out:    TCPDEBUG2(PRU_ACCEPT);
  486         if (tp)
  487                 INP_UNLOCK(inp);
  488         if (error == 0)
  489                 *nam = in_sockaddr(port, &addr);
  490         return error;
  491 }
  492 
  493 #ifdef INET6
  494 static int
  495 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
  496 {
  497         struct inpcb *inp = NULL;
  498         int error = 0;
  499         struct tcpcb *tp = NULL;
  500         struct in_addr addr;
  501         struct in6_addr addr6;
  502         in_port_t port = 0;
  503         int v4 = 0;
  504         TCPDEBUG0;
  505 
  506         if (so->so_state & SS_ISDISCONNECTED) {
  507                 error = ECONNABORTED;
  508                 goto out;
  509         }
  510 
  511         INP_INFO_RLOCK(&tcbinfo);
  512         inp = sotoinpcb(so);
  513         if (inp == 0) {
  514                 INP_INFO_RUNLOCK(&tcbinfo);
  515                 return (ECONNABORTED);
  516         }
  517         INP_LOCK(inp);
  518         INP_INFO_RUNLOCK(&tcbinfo);
  519         tp = intotcpcb(inp);
  520         TCPDEBUG1();
  521         /*
  522          * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
  523          * copy the data of interest and defer the malloc until after we
  524          * release the lock.
  525          */
  526         if (inp->inp_vflag & INP_IPV4) {
  527                 v4 = 1;
  528                 port = inp->inp_fport;
  529                 addr = inp->inp_faddr;
  530         } else {
  531                 port = inp->inp_fport;
  532                 addr6 = inp->in6p_faddr;
  533         }
  534 
  535 out:    TCPDEBUG2(PRU_ACCEPT);
  536         if (tp)
  537                 INP_UNLOCK(inp);
  538         if (error == 0) {
  539                 if (v4)
  540                         *nam = in6_v4mapsin6_sockaddr(port, &addr);
  541                 else
  542                         *nam = in6_sockaddr(port, &addr6);
  543         }
  544         return error;
  545 }
  546 #endif /* INET6 */
  547 
  548 /*
  549  * This is the wrapper function for in_setsockaddr. We just pass down
  550  * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking
  551  * here because in_setsockaddr will call malloc and can block.
  552  */
  553 static int
  554 tcp_sockaddr(struct socket *so, struct sockaddr **nam)
  555 {
  556         return (in_setsockaddr(so, nam, &tcbinfo));
  557 }
  558 
  559 /*
  560  * This is the wrapper function for in_setpeeraddr. We just pass down
  561  * the pcbinfo for in_setpeeraddr to lock.
  562  */
  563 static int
  564 tcp_peeraddr(struct socket *so, struct sockaddr **nam)
  565 {
  566         return (in_setpeeraddr(so, nam, &tcbinfo));
  567 }
  568 
  569 /*
  570  * Mark the connection as being incapable of further output.
  571  */
  572 static int
  573 tcp_usr_shutdown(struct socket *so)
  574 {
  575         int error = 0;
  576         struct inpcb *inp;
  577         struct tcpcb *tp;
  578         const int inirw = INI_WRITE;
  579 
  580         COMMON_START(ECONNRESET);
  581         socantsendmore(so);
  582         tp = tcp_usrclosed(tp);
  583         if (tp)
  584                 error = tcp_output(tp);
  585         COMMON_END(PRU_SHUTDOWN);
  586 }
  587 
  588 /*
  589  * After a receive, possibly send window update to peer.
  590  */
  591 static int
  592 tcp_usr_rcvd(struct socket *so, int flags)
  593 {
  594         int error = 0;
  595         struct inpcb *inp;
  596         struct tcpcb *tp;
  597         const int inirw = INI_READ;
  598 
  599         COMMON_START(ECONNRESET);
  600         tcp_output(tp);
  601         COMMON_END(PRU_RCVD);
  602 }
  603 
  604 /*
  605  * Do a send by putting data in output queue and updating urgent
  606  * marker if URG set.  Possibly send more data.  Unlike the other
  607  * pru_*() routines, the mbuf chains are our responsibility.  We
  608  * must either enqueue them or free them.  The other pru_* routines
  609  * generally are caller-frees.
  610  */
  611 static int
  612 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
  613              struct sockaddr *nam, struct mbuf *control, struct thread *td)
  614 {
  615         int error = 0;
  616         struct inpcb *inp;
  617         struct tcpcb *tp;
  618         int unlocked = 0;
  619 #ifdef INET6
  620         int isipv6;
  621 #endif
  622         TCPDEBUG0;
  623 
  624         /*
  625          * Need write lock here because this function might call
  626          * tcp_connect or tcp_usrclosed.
  627          * We really want to have to this function upgrade from read lock
  628          * to write lock.  XXX
  629          */
  630         INP_INFO_WLOCK(&tcbinfo);
  631         inp = sotoinpcb(so);
  632         if (inp == NULL) {
  633                 /*
  634                  * OOPS! we lost a race, the TCP session got reset after
  635                  * we checked SBS_CANTSENDMORE, eg: while doing uiomove or a
  636                  * network interrupt in the non-splnet() section of sosend().
  637                  */
  638                 if (m)
  639                         m_freem(m);
  640                 if (control)
  641                         m_freem(control);
  642                 error = ECONNRESET;     /* XXX EPIPE? */
  643                 tp = NULL;
  644                 TCPDEBUG1();
  645                 goto out;
  646         }
  647         INP_LOCK(inp);
  648 #ifdef INET6
  649         isipv6 = nam && nam->sa_family == AF_INET6;
  650 #endif /* INET6 */
  651         tp = intotcpcb(inp);
  652         TCPDEBUG1();
  653         if (control) {
  654                 /* TCP doesn't do control messages (rights, creds, etc) */
  655                 if (control->m_len) {
  656                         m_freem(control);
  657                         if (m)
  658                                 m_freem(m);
  659                         error = EINVAL;
  660                         goto out;
  661                 }
  662                 m_freem(control);       /* empty control, just free it */
  663         }
  664         if (!(flags & PRUS_OOB)) {
  665                 sbappendstream(&so->so_snd, m);
  666                 if (nam && tp->t_state < TCPS_SYN_SENT) {
  667                         /*
  668                          * Do implied connect if not yet connected,
  669                          * initialize window to default value, and
  670                          * initialize maxseg/maxopd using peer's cached
  671                          * MSS.
  672                          */
  673 #ifdef INET6
  674                         if (isipv6)
  675                                 error = tcp6_connect(tp, nam, td);
  676                         else
  677 #endif /* INET6 */
  678                         error = tcp_connect(tp, nam, td);
  679                         if (error)
  680                                 goto out;
  681                         tp->snd_wnd = TTCP_CLIENT_SND_WND;
  682                         tcp_mss(tp, -1);
  683                 }
  684 
  685                 if (flags & PRUS_EOF) {
  686                         /*
  687                          * Close the send side of the connection after
  688                          * the data is sent.
  689                          */
  690                         socantsendmore(so);
  691                         tp = tcp_usrclosed(tp);
  692                 }
  693                 INP_INFO_WUNLOCK(&tcbinfo);
  694                 unlocked = 1;
  695                 if (tp != NULL) {
  696                         if (flags & PRUS_MORETOCOME)
  697                                 tp->t_flags |= TF_MORETOCOME;
  698                         error = tcp_output(tp);
  699                         if (flags & PRUS_MORETOCOME)
  700                                 tp->t_flags &= ~TF_MORETOCOME;
  701                 }
  702         } else {
  703                 SOCKBUF_LOCK(&so->so_snd);
  704                 if (sbspace(&so->so_snd) < -512) {
  705                         SOCKBUF_UNLOCK(&so->so_snd);
  706                         m_freem(m);
  707                         error = ENOBUFS;
  708                         goto out;
  709                 }
  710                 /*
  711                  * According to RFC961 (Assigned Protocols),
  712                  * the urgent pointer points to the last octet
  713                  * of urgent data.  We continue, however,
  714                  * to consider it to indicate the first octet
  715                  * of data past the urgent section.
  716                  * Otherwise, snd_up should be one lower.
  717                  */
  718                 sbappendstream_locked(&so->so_snd, m);
  719                 SOCKBUF_UNLOCK(&so->so_snd);
  720                 if (nam && tp->t_state < TCPS_SYN_SENT) {
  721                         /*
  722                          * Do implied connect if not yet connected,
  723                          * initialize window to default value, and
  724                          * initialize maxseg/maxopd using peer's cached
  725                          * MSS.
  726                          */
  727 #ifdef INET6
  728                         if (isipv6)
  729                                 error = tcp6_connect(tp, nam, td);
  730                         else
  731 #endif /* INET6 */
  732                         error = tcp_connect(tp, nam, td);
  733                         if (error)
  734                                 goto out;
  735                         tp->snd_wnd = TTCP_CLIENT_SND_WND;
  736                         tcp_mss(tp, -1);
  737                 }
  738                 INP_INFO_WUNLOCK(&tcbinfo);
  739                 unlocked = 1;
  740                 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
  741                 tp->t_flags |= TF_FORCEDATA;
  742                 error = tcp_output(tp);
  743                 tp->t_flags &= ~TF_FORCEDATA;
  744         }
  745 out:
  746         TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
  747                   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
  748         if (tp)
  749                 INP_UNLOCK(inp);
  750         if (!unlocked)
  751                 INP_INFO_WUNLOCK(&tcbinfo);
  752         return (error);
  753 }
  754 
  755 /*
  756  * Abort the TCP.
  757  */
  758 static int
  759 tcp_usr_abort(struct socket *so)
  760 {
  761         int error = 0;
  762         struct inpcb *inp;
  763         struct tcpcb *tp;
  764         const int inirw = INI_WRITE;
  765 
  766         COMMON_START(EINVAL);
  767         tp = tcp_drop(tp, ECONNABORTED);
  768         COMMON_END(PRU_ABORT);
  769 }
  770 
  771 /*
  772  * Receive out-of-band data.
  773  */
  774 static int
  775 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
  776 {
  777         int error = 0;
  778         struct inpcb *inp;
  779         struct tcpcb *tp;
  780         const int inirw = INI_READ;
  781 
  782         COMMON_START(ECONNRESET);
  783         if ((so->so_oobmark == 0 &&
  784              (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
  785             so->so_options & SO_OOBINLINE ||
  786             tp->t_oobflags & TCPOOB_HADDATA) {
  787                 error = EINVAL;
  788                 goto out;
  789         }
  790         if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
  791                 error = EWOULDBLOCK;
  792                 goto out;
  793         }
  794         m->m_len = 1;
  795         *mtod(m, caddr_t) = tp->t_iobc;
  796         if ((flags & MSG_PEEK) == 0)
  797                 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
  798         COMMON_END(PRU_RCVOOB);
  799 }
  800 
  801 struct pr_usrreqs tcp_usrreqs = {
  802         .pru_abort =            tcp_usr_abort,
  803         .pru_accept =           tcp_usr_accept,
  804         .pru_attach =           tcp_usr_attach,
  805         .pru_bind =             tcp_usr_bind,
  806         .pru_connect =          tcp_usr_connect,
  807         .pru_control =          in_control,
  808         .pru_detach =           tcp_usr_detach,
  809         .pru_disconnect =       tcp_usr_disconnect,
  810         .pru_listen =           tcp_usr_listen,
  811         .pru_peeraddr =         tcp_peeraddr,
  812         .pru_rcvd =             tcp_usr_rcvd,
  813         .pru_rcvoob =           tcp_usr_rcvoob,
  814         .pru_send =             tcp_usr_send,
  815         .pru_shutdown =         tcp_usr_shutdown,
  816         .pru_sockaddr =         tcp_sockaddr,
  817         .pru_sosetlabel =       in_pcbsosetlabel
  818 };
  819 
  820 #ifdef INET6
  821 struct pr_usrreqs tcp6_usrreqs = {
  822         .pru_abort =            tcp_usr_abort,
  823         .pru_accept =           tcp6_usr_accept,
  824         .pru_attach =           tcp_usr_attach,
  825         .pru_bind =             tcp6_usr_bind,
  826         .pru_connect =          tcp6_usr_connect,
  827         .pru_control =          in6_control,
  828         .pru_detach =           tcp_usr_detach,
  829         .pru_disconnect =       tcp_usr_disconnect,
  830         .pru_listen =           tcp6_usr_listen,
  831         .pru_peeraddr =         in6_mapped_peeraddr,
  832         .pru_rcvd =             tcp_usr_rcvd,
  833         .pru_rcvoob =           tcp_usr_rcvoob,
  834         .pru_send =             tcp_usr_send,
  835         .pru_shutdown =         tcp_usr_shutdown,
  836         .pru_sockaddr =         in6_mapped_sockaddr,
  837         .pru_sosetlabel =       in_pcbsosetlabel
  838 };
  839 #endif /* INET6 */
  840 
  841 /*
  842  * Common subroutine to open a TCP connection to remote host specified
  843  * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
  844  * port number if needed.  Call in_pcbconnect_setup to do the routing and
  845  * to choose a local host address (interface).  If there is an existing
  846  * incarnation of the same connection in TIME-WAIT state and if the remote
  847  * host was sending CC options and if the connection duration was < MSL, then
  848  * truncate the previous TIME-WAIT state and proceed.
  849  * Initialize connection parameters and enter SYN-SENT state.
  850  */
  851 static int
  852 tcp_connect(tp, nam, td)
  853         register struct tcpcb *tp;
  854         struct sockaddr *nam;
  855         struct thread *td;
  856 {
  857         struct inpcb *inp = tp->t_inpcb, *oinp;
  858         struct socket *so = inp->inp_socket;
  859         struct in_addr laddr;
  860         u_short lport;
  861         int error;
  862 
  863         if (inp->inp_lport == 0) {
  864                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
  865                 if (error)
  866                         return error;
  867         }
  868 
  869         /*
  870          * Cannot simply call in_pcbconnect, because there might be an
  871          * earlier incarnation of this same connection still in
  872          * TIME_WAIT state, creating an ADDRINUSE error.
  873          */
  874         laddr = inp->inp_laddr;
  875         lport = inp->inp_lport;
  876         error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
  877             &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
  878         if (error && oinp == NULL)
  879                 return error;
  880         if (oinp)
  881                 return EADDRINUSE;
  882         inp->inp_laddr = laddr;
  883         in_pcbrehash(inp);
  884 
  885         /* Compute window scaling to request.  */
  886         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
  887             (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
  888                 tp->request_r_scale++;
  889 
  890         soisconnecting(so);
  891         tcpstat.tcps_connattempt++;
  892         tp->t_state = TCPS_SYN_SENT;
  893         callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
  894         tp->iss = tcp_new_isn(tp);
  895         tp->t_bw_rtseq = tp->iss;
  896         tcp_sendseqinit(tp);
  897 
  898         return 0;
  899 }
  900 
  901 #ifdef INET6
  902 static int
  903 tcp6_connect(tp, nam, td)
  904         register struct tcpcb *tp;
  905         struct sockaddr *nam;
  906         struct thread *td;
  907 {
  908         struct inpcb *inp = tp->t_inpcb, *oinp;
  909         struct socket *so = inp->inp_socket;
  910         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
  911         struct in6_addr *addr6;
  912         int error;
  913 
  914         if (inp->inp_lport == 0) {
  915                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
  916                 if (error)
  917                         return error;
  918         }
  919 
  920         /*
  921          * Cannot simply call in_pcbconnect, because there might be an
  922          * earlier incarnation of this same connection still in
  923          * TIME_WAIT state, creating an ADDRINUSE error.
  924          * in6_pcbladdr() also handles scope zone IDs.
  925          */
  926         error = in6_pcbladdr(inp, nam, &addr6);
  927         if (error)
  928                 return error;
  929         oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
  930                                   &sin6->sin6_addr, sin6->sin6_port,
  931                                   IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
  932                                   ? addr6
  933                                   : &inp->in6p_laddr,
  934                                   inp->inp_lport,  0, NULL);
  935         if (oinp)
  936                 return EADDRINUSE;
  937         if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
  938                 inp->in6p_laddr = *addr6;
  939         inp->in6p_faddr = sin6->sin6_addr;
  940         inp->inp_fport = sin6->sin6_port;
  941         /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
  942         inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
  943         if (inp->in6p_flags & IN6P_AUTOFLOWLABEL)
  944                 inp->in6p_flowinfo |=
  945                     (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
  946         in_pcbrehash(inp);
  947 
  948         /* Compute window scaling to request.  */
  949         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
  950             (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
  951                 tp->request_r_scale++;
  952 
  953         soisconnecting(so);
  954         tcpstat.tcps_connattempt++;
  955         tp->t_state = TCPS_SYN_SENT;
  956         callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
  957         tp->iss = tcp_new_isn(tp);
  958         tp->t_bw_rtseq = tp->iss;
  959         tcp_sendseqinit(tp);
  960 
  961         return 0;
  962 }
  963 #endif /* INET6 */
  964 
  965 /*
  966  * Export TCP internal state information via a struct tcp_info, based on the
  967  * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
  968  * (TCP state machine, etc).  We export all information using FreeBSD-native
  969  * constants -- for example, the numeric values for tcpi_state will differ
  970  * from Linux.
  971  */
  972 static void
  973 tcp_fill_info(tp, ti)
  974         struct tcpcb *tp;
  975         struct tcp_info *ti;
  976 {
  977 
  978         INP_LOCK_ASSERT(tp->t_inpcb);
  979         bzero(ti, sizeof(*ti));
  980 
  981         ti->tcpi_state = tp->t_state;
  982         if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
  983                 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
  984         if (tp->sack_enable)
  985                 ti->tcpi_options |= TCPI_OPT_SACK;
  986         if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
  987                 ti->tcpi_options |= TCPI_OPT_WSCALE;
  988                 ti->tcpi_snd_wscale = tp->snd_scale;
  989                 ti->tcpi_rcv_wscale = tp->rcv_scale;
  990         }
  991         ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
  992         ti->tcpi_snd_cwnd = tp->snd_cwnd;
  993 
  994         /*
  995          * FreeBSD-specific extension fields for tcp_info.
  996          */
  997         ti->tcpi_rcv_space = tp->rcv_wnd;
  998         ti->tcpi_snd_wnd = tp->snd_wnd;
  999         ti->tcpi_snd_bwnd = tp->snd_bwnd;
 1000 }
 1001 
 1002 /*
 1003  * The new sockopt interface makes it possible for us to block in the
 1004  * copyin/out step (if we take a page fault).  Taking a page fault at
 1005  * splnet() is probably a Bad Thing.  (Since sockets and pcbs both now
 1006  * use TSM, there probably isn't any need for this function to run at
 1007  * splnet() any more.  This needs more examination.)
 1008  *
 1009  * XXXRW: The locking here is wrong; we may take a page fault while holding
 1010  * the inpcb lock.
 1011  */
 1012 int
 1013 tcp_ctloutput(so, sopt)
 1014         struct socket *so;
 1015         struct sockopt *sopt;
 1016 {
 1017         int     error, opt, optval;
 1018         struct  inpcb *inp;
 1019         struct  tcpcb *tp;
 1020         struct  tcp_info ti;
 1021 
 1022         error = 0;
 1023         INP_INFO_RLOCK(&tcbinfo);
 1024         inp = sotoinpcb(so);
 1025         if (inp == NULL) {
 1026                 INP_INFO_RUNLOCK(&tcbinfo);
 1027                 return (ECONNRESET);
 1028         }
 1029         INP_LOCK(inp);
 1030         INP_INFO_RUNLOCK(&tcbinfo);
 1031         if (sopt->sopt_level != IPPROTO_TCP) {
 1032                 INP_UNLOCK(inp);
 1033 #ifdef INET6
 1034                 if (INP_CHECK_SOCKAF(so, AF_INET6))
 1035                         error = ip6_ctloutput(so, sopt);
 1036                 else
 1037 #endif /* INET6 */
 1038                 error = ip_ctloutput_pcbinfo(so, sopt, &tcbinfo);
 1039                 return (error);
 1040         }
 1041         tp = intotcpcb(inp);
 1042 
 1043         switch (sopt->sopt_dir) {
 1044         case SOPT_SET:
 1045                 switch (sopt->sopt_name) {
 1046 #ifdef TCP_SIGNATURE
 1047                 case TCP_MD5SIG:
 1048                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1049                                             sizeof optval);
 1050                         if (error)
 1051                                 break;
 1052 
 1053                         if (optval > 0)
 1054                                 tp->t_flags |= TF_SIGNATURE;
 1055                         else
 1056                                 tp->t_flags &= ~TF_SIGNATURE;
 1057                         break;
 1058 #endif /* TCP_SIGNATURE */
 1059                 case TCP_NODELAY:
 1060                 case TCP_NOOPT:
 1061                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1062                                             sizeof optval);
 1063                         if (error)
 1064                                 break;
 1065 
 1066                         switch (sopt->sopt_name) {
 1067                         case TCP_NODELAY:
 1068                                 opt = TF_NODELAY;
 1069                                 break;
 1070                         case TCP_NOOPT:
 1071                                 opt = TF_NOOPT;
 1072                                 break;
 1073                         default:
 1074                                 opt = 0; /* dead code to fool gcc */
 1075                                 break;
 1076                         }
 1077 
 1078                         if (optval)
 1079                                 tp->t_flags |= opt;
 1080                         else
 1081                                 tp->t_flags &= ~opt;
 1082                         break;
 1083 
 1084                 case TCP_NOPUSH:
 1085                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1086                                             sizeof optval);
 1087                         if (error)
 1088                                 break;
 1089 
 1090                         if (optval)
 1091                                 tp->t_flags |= TF_NOPUSH;
 1092                         else {
 1093                                 tp->t_flags &= ~TF_NOPUSH;
 1094                                 error = tcp_output(tp);
 1095                         }
 1096                         break;
 1097 
 1098                 case TCP_MAXSEG:
 1099                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1100                                             sizeof optval);
 1101                         if (error)
 1102                                 break;
 1103 
 1104                         if (optval > 0 && optval <= tp->t_maxseg &&
 1105                             optval + 40 >= tcp_minmss)
 1106                                 tp->t_maxseg = optval;
 1107                         else
 1108                                 error = EINVAL;
 1109                         break;
 1110 
 1111                 case TCP_INFO:
 1112                         error = EINVAL;
 1113                         break;
 1114 
 1115                 default:
 1116                         error = ENOPROTOOPT;
 1117                         break;
 1118                 }
 1119                 break;
 1120 
 1121         case SOPT_GET:
 1122                 switch (sopt->sopt_name) {
 1123 #ifdef TCP_SIGNATURE
 1124                 case TCP_MD5SIG:
 1125                         optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
 1126                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1127                         break;
 1128 #endif
 1129                 case TCP_NODELAY:
 1130                         optval = tp->t_flags & TF_NODELAY;
 1131                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1132                         break;
 1133                 case TCP_MAXSEG:
 1134                         optval = tp->t_maxseg;
 1135                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1136                         break;
 1137                 case TCP_NOOPT:
 1138                         optval = tp->t_flags & TF_NOOPT;
 1139                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1140                         break;
 1141                 case TCP_NOPUSH:
 1142                         optval = tp->t_flags & TF_NOPUSH;
 1143                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1144                         break;
 1145                 case TCP_INFO:
 1146                         tcp_fill_info(tp, &ti);
 1147                         error = sooptcopyout(sopt, &ti, sizeof ti);
 1148                         break;
 1149                 default:
 1150                         error = ENOPROTOOPT;
 1151                         break;
 1152                 }
 1153                 break;
 1154         }
 1155         INP_UNLOCK(inp);
 1156         return (error);
 1157 }
 1158 
 1159 /*
 1160  * tcp_sendspace and tcp_recvspace are the default send and receive window
 1161  * sizes, respectively.  These are obsolescent (this information should
 1162  * be set by the route).
 1163  */
 1164 u_long  tcp_sendspace = 1024*32;
 1165 SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
 1166     &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
 1167 u_long  tcp_recvspace = 1024*64;
 1168 SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
 1169     &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
 1170 
 1171 /*
 1172  * Attach TCP protocol to socket, allocating
 1173  * internet protocol control block, tcp control block,
 1174  * bufer space, and entering LISTEN state if to accept connections.
 1175  */
 1176 static int
 1177 tcp_attach(so)
 1178         struct socket *so;
 1179 {
 1180         register struct tcpcb *tp;
 1181         struct inpcb *inp;
 1182         int error;
 1183 #ifdef INET6
 1184         int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0;
 1185 #endif
 1186 
 1187         INP_INFO_WLOCK_ASSERT(&tcbinfo);
 1188 
 1189         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 1190                 error = soreserve(so, tcp_sendspace, tcp_recvspace);
 1191                 if (error)
 1192                         return (error);
 1193         }
 1194         error = in_pcballoc(so, &tcbinfo);
 1195         if (error)
 1196                 return (error);
 1197         inp = sotoinpcb(so);
 1198 #ifdef INET6
 1199         if (isipv6) {
 1200                 inp->inp_vflag |= INP_IPV6;
 1201                 inp->in6p_hops = -1;    /* use kernel default */
 1202         }
 1203         else
 1204 #endif
 1205         inp->inp_vflag |= INP_IPV4;
 1206         tp = tcp_newtcpcb(inp);
 1207         if (tp == 0) {
 1208                 int nofd = so->so_state & SS_NOFDREF;   /* XXX */
 1209 
 1210                 so->so_state &= ~SS_NOFDREF;    /* don't free the socket yet */
 1211 
 1212 #ifdef INET6
 1213                 if (isipv6)
 1214                         in6_pcbdetach(inp);
 1215                 else
 1216 #endif
 1217                 in_pcbdetach(inp);
 1218                 so->so_state |= nofd;
 1219                 return (ENOBUFS);
 1220         }
 1221         tp->t_state = TCPS_CLOSED;
 1222         INP_UNLOCK(inp);
 1223         return (0);
 1224 }
 1225 
 1226 /*
 1227  * Initiate (or continue) disconnect.
 1228  * If embryonic state, just send reset (once).
 1229  * If in ``let data drain'' option and linger null, just drop.
 1230  * Otherwise (hard), mark socket disconnecting and drop
 1231  * current input data; switch states based on user close, and
 1232  * send segment to peer (with FIN).
 1233  */
 1234 static struct tcpcb *
 1235 tcp_disconnect(tp)
 1236         register struct tcpcb *tp;
 1237 {
 1238         struct inpcb *inp = tp->t_inpcb;
 1239         struct socket *so = inp->inp_socket;
 1240 
 1241         INP_INFO_WLOCK_ASSERT(&tcbinfo);
 1242         INP_LOCK_ASSERT(inp);
 1243 
 1244         if (tp->t_state < TCPS_ESTABLISHED)
 1245                 tp = tcp_close(tp);
 1246         else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
 1247                 tp = tcp_drop(tp, 0);
 1248         else {
 1249                 soisdisconnecting(so);
 1250                 sbflush(&so->so_rcv);
 1251                 tp = tcp_usrclosed(tp);
 1252                 if (tp)
 1253                         (void) tcp_output(tp);
 1254         }
 1255         return (tp);
 1256 }
 1257 
 1258 /*
 1259  * User issued close, and wish to trail through shutdown states:
 1260  * if never received SYN, just forget it.  If got a SYN from peer,
 1261  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
 1262  * If already got a FIN from peer, then almost done; go to LAST_ACK
 1263  * state.  In all other cases, have already sent FIN to peer (e.g.
 1264  * after PRU_SHUTDOWN), and just have to play tedious game waiting
 1265  * for peer to send FIN or not respond to keep-alives, etc.
 1266  * We can let the user exit from the close as soon as the FIN is acked.
 1267  */
 1268 static struct tcpcb *
 1269 tcp_usrclosed(tp)
 1270         register struct tcpcb *tp;
 1271 {
 1272 
 1273         INP_INFO_WLOCK_ASSERT(&tcbinfo);
 1274         INP_LOCK_ASSERT(tp->t_inpcb);
 1275 
 1276         switch (tp->t_state) {
 1277 
 1278         case TCPS_CLOSED:
 1279         case TCPS_LISTEN:
 1280                 tp->t_state = TCPS_CLOSED;
 1281                 tp = tcp_close(tp);
 1282                 break;
 1283 
 1284         case TCPS_SYN_SENT:
 1285         case TCPS_SYN_RECEIVED:
 1286                 tp->t_flags |= TF_NEEDFIN;
 1287                 break;
 1288 
 1289         case TCPS_ESTABLISHED:
 1290                 tp->t_state = TCPS_FIN_WAIT_1;
 1291                 break;
 1292 
 1293         case TCPS_CLOSE_WAIT:
 1294                 tp->t_state = TCPS_LAST_ACK;
 1295                 break;
 1296         }
 1297         if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
 1298                 soisdisconnected(tp->t_inpcb->inp_socket);
 1299                 /* To prevent the connection hanging in FIN_WAIT_2 forever. */
 1300                 if (tp->t_state == TCPS_FIN_WAIT_2)
 1301                         callout_reset(tp->tt_2msl, tcp_maxidle,
 1302                                       tcp_timer_2msl, tp);
 1303         }
 1304         return (tp);
 1305 }

Cache object: efc37a6050dd207d50c3b836ba62fceb


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.