The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_usrreq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1988, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      From: @(#)tcp_usrreq.c  8.2 (Berkeley) 1/3/94
   30  * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.107.2.6 2005/06/14 12:01:03 rwatson Exp $
   31  */
   32 
   33 #include "opt_ipsec.h"
   34 #include "opt_inet.h"
   35 #include "opt_inet6.h"
   36 #include "opt_tcpdebug.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/malloc.h>
   41 #include <sys/kernel.h>
   42 #include <sys/sysctl.h>
   43 #include <sys/mbuf.h>
   44 #ifdef INET6
   45 #include <sys/domain.h>
   46 #endif /* INET6 */
   47 #include <sys/socket.h>
   48 #include <sys/socketvar.h>
   49 #include <sys/protosw.h>
   50 #include <sys/proc.h>
   51 #include <sys/jail.h>
   52 
   53 #include <net/if.h>
   54 #include <net/route.h>
   55 
   56 #include <netinet/in.h>
   57 #include <netinet/in_systm.h>
   58 #ifdef INET6
   59 #include <netinet/ip6.h>
   60 #endif
   61 #include <netinet/in_pcb.h>
   62 #ifdef INET6
   63 #include <netinet6/in6_pcb.h>
   64 #endif
   65 #include <netinet/in_var.h>
   66 #include <netinet/ip_var.h>
   67 #ifdef INET6
   68 #include <netinet6/ip6_var.h>
   69 #endif
   70 #include <netinet/tcp.h>
   71 #include <netinet/tcp_fsm.h>
   72 #include <netinet/tcp_seq.h>
   73 #include <netinet/tcp_timer.h>
   74 #include <netinet/tcp_var.h>
   75 #include <netinet/tcpip.h>
   76 #ifdef TCPDEBUG
   77 #include <netinet/tcp_debug.h>
   78 #endif
   79 
   80 #ifdef IPSEC
   81 #include <netinet6/ipsec.h>
   82 #endif /*IPSEC*/
   83 
   84 /*
   85  * TCP protocol interface to socket abstraction.
   86  */
   87 extern  char *tcpstates[];      /* XXX ??? */
   88 
   89 static int      tcp_attach(struct socket *);
   90 static int      tcp_connect(struct tcpcb *, struct sockaddr *,
   91                     struct thread *td);
   92 #ifdef INET6
   93 static int      tcp6_connect(struct tcpcb *, struct sockaddr *,
   94                     struct thread *td);
   95 #endif /* INET6 */
   96 static struct tcpcb *
   97                 tcp_disconnect(struct tcpcb *);
   98 static struct tcpcb *
   99                 tcp_usrclosed(struct tcpcb *);
  100 
  101 #ifdef TCPDEBUG
  102 #define TCPDEBUG0       int ostate = 0
  103 #define TCPDEBUG1()     ostate = tp ? tp->t_state : 0
  104 #define TCPDEBUG2(req)  if (tp && (so->so_options & SO_DEBUG)) \
  105                                 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
  106 #else
  107 #define TCPDEBUG0
  108 #define TCPDEBUG1()
  109 #define TCPDEBUG2(req)
  110 #endif
  111 
  112 /*
  113  * TCP attaches to socket via pru_attach(), reserving space,
  114  * and an internet control block.
  115  */
  116 static int
  117 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
  118 {
  119         int error;
  120         struct inpcb *inp;
  121         struct tcpcb *tp = 0;
  122         TCPDEBUG0;
  123 
  124         INP_INFO_WLOCK(&tcbinfo);
  125         TCPDEBUG1();
  126         inp = sotoinpcb(so);
  127         if (inp) {
  128                 error = EISCONN;
  129                 goto out;
  130         }
  131 
  132         error = tcp_attach(so);
  133         if (error)
  134                 goto out;
  135 
  136         if ((so->so_options & SO_LINGER) && so->so_linger == 0)
  137                 so->so_linger = TCP_LINGERTIME;
  138 
  139         inp = sotoinpcb(so);
  140         tp = intotcpcb(inp);
  141 out:
  142         TCPDEBUG2(PRU_ATTACH);
  143         INP_INFO_WUNLOCK(&tcbinfo);
  144         return error;
  145 }
  146 
  147 /*
  148  * pru_detach() detaches the TCP protocol from the socket.
  149  * If the protocol state is non-embryonic, then can't
  150  * do this directly: have to initiate a pru_disconnect(),
  151  * which may finish later; embryonic TCB's can just
  152  * be discarded here.
  153  */
  154 static int
  155 tcp_usr_detach(struct socket *so)
  156 {
  157         int error = 0;
  158         struct inpcb *inp;
  159         struct tcpcb *tp;
  160         TCPDEBUG0;
  161 
  162         INP_INFO_WLOCK(&tcbinfo);
  163         inp = sotoinpcb(so);
  164         if (inp == NULL) {
  165                 INP_INFO_WUNLOCK(&tcbinfo);
  166                 return error;
  167         }
  168         INP_LOCK(inp);
  169         tp = intotcpcb(inp);
  170         TCPDEBUG1();
  171         tp = tcp_disconnect(tp);
  172 
  173         TCPDEBUG2(PRU_DETACH);
  174         if (tp)
  175                 INP_UNLOCK(inp);
  176         INP_INFO_WUNLOCK(&tcbinfo);
  177         return error;
  178 }
  179 
  180 #define INI_NOLOCK      0
  181 #define INI_READ        1
  182 #define INI_WRITE       2
  183 
  184 #define COMMON_START()                                          \
  185         TCPDEBUG0;                                              \
  186         do {                                                    \
  187                 if (inirw == INI_READ)                          \
  188                         INP_INFO_RLOCK(&tcbinfo);               \
  189                 else if (inirw == INI_WRITE)                    \
  190                         INP_INFO_WLOCK(&tcbinfo);               \
  191                 inp = sotoinpcb(so);                            \
  192                 if (inp == 0) {                                 \
  193                         if (inirw == INI_READ)                  \
  194                                 INP_INFO_RUNLOCK(&tcbinfo);     \
  195                         else if (inirw == INI_WRITE)            \
  196                                 INP_INFO_WUNLOCK(&tcbinfo);     \
  197                         return EINVAL;                          \
  198                 }                                               \
  199                 INP_LOCK(inp);                                  \
  200                 if (inirw == INI_READ)                          \
  201                         INP_INFO_RUNLOCK(&tcbinfo);             \
  202                 tp = intotcpcb(inp);                            \
  203                 TCPDEBUG1();                                    \
  204 } while(0)
  205 
  206 #define COMMON_END(req)                                         \
  207 out:    TCPDEBUG2(req);                                         \
  208         do {                                                    \
  209                 if (tp)                                         \
  210                         INP_UNLOCK(inp);                        \
  211                 if (inirw == INI_WRITE)                         \
  212                         INP_INFO_WUNLOCK(&tcbinfo);             \
  213                 return error;                                   \
  214                 goto out;                                       \
  215 } while(0)
  216 
  217 /*
  218  * Give the socket an address.
  219  */
  220 static int
  221 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  222 {
  223         int error = 0;
  224         struct inpcb *inp;
  225         struct tcpcb *tp;
  226         struct sockaddr_in *sinp;
  227         const int inirw = INI_WRITE;
  228 
  229         sinp = (struct sockaddr_in *)nam;
  230         if (nam->sa_len != sizeof (*sinp))
  231                 return (EINVAL);
  232         /*
  233          * Must check for multicast addresses and disallow binding
  234          * to them.
  235          */
  236         if (sinp->sin_family == AF_INET &&
  237             IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
  238                 return (EAFNOSUPPORT);
  239 
  240         COMMON_START();
  241         error = in_pcbbind(inp, nam, td->td_ucred);
  242         if (error)
  243                 goto out;
  244         COMMON_END(PRU_BIND);
  245 }
  246 
  247 #ifdef INET6
  248 static int
  249 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
  250 {
  251         int error = 0;
  252         struct inpcb *inp;
  253         struct tcpcb *tp;
  254         struct sockaddr_in6 *sin6p;
  255         const int inirw = INI_WRITE;
  256 
  257         sin6p = (struct sockaddr_in6 *)nam;
  258         if (nam->sa_len != sizeof (*sin6p))
  259                 return (EINVAL);
  260         /*
  261          * Must check for multicast addresses and disallow binding
  262          * to them.
  263          */
  264         if (sin6p->sin6_family == AF_INET6 &&
  265             IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
  266                 return (EAFNOSUPPORT);
  267 
  268         COMMON_START();
  269         inp->inp_vflag &= ~INP_IPV4;
  270         inp->inp_vflag |= INP_IPV6;
  271         if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
  272                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
  273                         inp->inp_vflag |= INP_IPV4;
  274                 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
  275                         struct sockaddr_in sin;
  276 
  277                         in6_sin6_2_sin(&sin, sin6p);
  278                         inp->inp_vflag |= INP_IPV4;
  279                         inp->inp_vflag &= ~INP_IPV6;
  280                         error = in_pcbbind(inp, (struct sockaddr *)&sin,
  281                             td->td_ucred);
  282                         goto out;
  283                 }
  284         }
  285         error = in6_pcbbind(inp, nam, td->td_ucred);
  286         if (error)
  287                 goto out;
  288         COMMON_END(PRU_BIND);
  289 }
  290 #endif /* INET6 */
  291 
  292 /*
  293  * Prepare to accept connections.
  294  */
  295 static int
  296 tcp_usr_listen(struct socket *so, struct thread *td)
  297 {
  298         int error = 0;
  299         struct inpcb *inp;
  300         struct tcpcb *tp;
  301         const int inirw = INI_WRITE;
  302 
  303         COMMON_START();
  304         SOCK_LOCK(so);
  305         error = solisten_proto_check(so);
  306         if (error == 0 && inp->inp_lport == 0)
  307                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
  308         if (error == 0) {
  309                 tp->t_state = TCPS_LISTEN;
  310                 solisten_proto(so);
  311         }
  312         SOCK_UNLOCK(so);
  313         COMMON_END(PRU_LISTEN);
  314 }
  315 
  316 #ifdef INET6
  317 static int
  318 tcp6_usr_listen(struct socket *so, struct thread *td)
  319 {
  320         int error = 0;
  321         struct inpcb *inp;
  322         struct tcpcb *tp;
  323         const int inirw = INI_WRITE;
  324 
  325         COMMON_START();
  326         SOCK_LOCK(so);
  327         error = solisten_proto_check(so);
  328         if (error == 0 && inp->inp_lport == 0) {
  329                 inp->inp_vflag &= ~INP_IPV4;
  330                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
  331                         inp->inp_vflag |= INP_IPV4;
  332                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
  333         }
  334         if (error == 0) {
  335                 tp->t_state = TCPS_LISTEN;
  336                 solisten_proto(so);
  337         }
  338         SOCK_UNLOCK(so);
  339         COMMON_END(PRU_LISTEN);
  340 }
  341 #endif /* INET6 */
  342 
  343 /*
  344  * Initiate connection to peer.
  345  * Create a template for use in transmissions on this connection.
  346  * Enter SYN_SENT state, and mark socket as connecting.
  347  * Start keep-alive timer, and seed output sequence space.
  348  * Send initial segment on connection.
  349  */
  350 static int
  351 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  352 {
  353         int error = 0;
  354         struct inpcb *inp;
  355         struct tcpcb *tp;
  356         struct sockaddr_in *sinp;
  357         const int inirw = INI_WRITE;
  358 
  359         sinp = (struct sockaddr_in *)nam;
  360         if (nam->sa_len != sizeof (*sinp))
  361                 return (EINVAL);
  362         /*
  363          * Must disallow TCP ``connections'' to multicast addresses.
  364          */
  365         if (sinp->sin_family == AF_INET
  366             && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
  367                 return (EAFNOSUPPORT);
  368         if (td && jailed(td->td_ucred))
  369                 prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr);
  370 
  371         COMMON_START();
  372         if ((error = tcp_connect(tp, nam, td)) != 0)
  373                 goto out;
  374         error = tcp_output(tp);
  375         COMMON_END(PRU_CONNECT);
  376 }
  377 
  378 #ifdef INET6
  379 static int
  380 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
  381 {
  382         int error = 0;
  383         struct inpcb *inp;
  384         struct tcpcb *tp;
  385         struct sockaddr_in6 *sin6p;
  386         const int inirw = INI_WRITE;
  387 
  388         sin6p = (struct sockaddr_in6 *)nam;
  389         if (nam->sa_len != sizeof (*sin6p))
  390                 return (EINVAL);
  391         /*
  392          * Must disallow TCP ``connections'' to multicast addresses.
  393          */
  394         if (sin6p->sin6_family == AF_INET6
  395             && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
  396                 return (EAFNOSUPPORT);
  397 
  398         COMMON_START();
  399         if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
  400                 struct sockaddr_in sin;
  401 
  402                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
  403                         error = EINVAL;
  404                         goto out;
  405                 }
  406 
  407                 in6_sin6_2_sin(&sin, sin6p);
  408                 inp->inp_vflag |= INP_IPV4;
  409                 inp->inp_vflag &= ~INP_IPV6;
  410                 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
  411                         goto out;
  412                 error = tcp_output(tp);
  413                 goto out;
  414         }
  415         inp->inp_vflag &= ~INP_IPV4;
  416         inp->inp_vflag |= INP_IPV6;
  417         inp->inp_inc.inc_isipv6 = 1;
  418         if ((error = tcp6_connect(tp, nam, td)) != 0)
  419                 goto out;
  420         error = tcp_output(tp);
  421         COMMON_END(PRU_CONNECT);
  422 }
  423 #endif /* INET6 */
  424 
  425 /*
  426  * Initiate disconnect from peer.
  427  * If connection never passed embryonic stage, just drop;
  428  * else if don't need to let data drain, then can just drop anyways,
  429  * else have to begin TCP shutdown process: mark socket disconnecting,
  430  * drain unread data, state switch to reflect user close, and
  431  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
  432  * when peer sends FIN and acks ours.
  433  *
  434  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
  435  */
  436 static int
  437 tcp_usr_disconnect(struct socket *so)
  438 {
  439         int error = 0;
  440         struct inpcb *inp;
  441         struct tcpcb *tp;
  442         const int inirw = INI_WRITE;
  443 
  444         COMMON_START();
  445         tp = tcp_disconnect(tp);
  446         COMMON_END(PRU_DISCONNECT);
  447 }
  448 
  449 /*
  450  * Accept a connection.  Essentially all the work is
  451  * done at higher levels; just return the address
  452  * of the peer, storing through addr.
  453  */
  454 static int
  455 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
  456 {
  457         int error = 0;
  458         struct inpcb *inp = NULL;
  459         struct tcpcb *tp = NULL;
  460         struct in_addr addr;
  461         in_port_t port = 0;
  462         TCPDEBUG0;
  463 
  464         if (so->so_state & SS_ISDISCONNECTED) {
  465                 error = ECONNABORTED;
  466                 goto out;
  467         }
  468 
  469         INP_INFO_RLOCK(&tcbinfo);
  470         inp = sotoinpcb(so);
  471         if (!inp) {
  472                 INP_INFO_RUNLOCK(&tcbinfo);
  473                 return (EINVAL);
  474         }
  475         INP_LOCK(inp);
  476         INP_INFO_RUNLOCK(&tcbinfo);
  477         tp = intotcpcb(inp);
  478         TCPDEBUG1();
  479 
  480         /*
  481          * We inline in_setpeeraddr and COMMON_END here, so that we can
  482          * copy the data of interest and defer the malloc until after we
  483          * release the lock.
  484          */
  485         port = inp->inp_fport;
  486         addr = inp->inp_faddr;
  487 
  488 out:    TCPDEBUG2(PRU_ACCEPT);
  489         if (tp)
  490                 INP_UNLOCK(inp);
  491         if (error == 0)
  492                 *nam = in_sockaddr(port, &addr);
  493         return error;
  494 }
  495 
  496 #ifdef INET6
  497 static int
  498 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
  499 {
  500         struct inpcb *inp = NULL;
  501         int error = 0;
  502         struct tcpcb *tp = NULL;
  503         struct in_addr addr;
  504         struct in6_addr addr6;
  505         in_port_t port = 0;
  506         int v4 = 0;
  507         TCPDEBUG0;
  508 
  509         if (so->so_state & SS_ISDISCONNECTED) {
  510                 error = ECONNABORTED;
  511                 goto out;
  512         }
  513 
  514         INP_INFO_RLOCK(&tcbinfo);
  515         inp = sotoinpcb(so);
  516         if (inp == 0) {
  517                 INP_INFO_RUNLOCK(&tcbinfo);
  518                 return (EINVAL);
  519         }
  520         INP_LOCK(inp);
  521         INP_INFO_RUNLOCK(&tcbinfo);
  522         tp = intotcpcb(inp);
  523         TCPDEBUG1();
  524         /*
  525          * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
  526          * copy the data of interest and defer the malloc until after we
  527          * release the lock.
  528          */
  529         if (inp->inp_vflag & INP_IPV4) {
  530                 v4 = 1;
  531                 port = inp->inp_fport;
  532                 addr = inp->inp_faddr;
  533         } else {
  534                 port = inp->inp_fport;
  535                 addr6 = inp->in6p_faddr;
  536         }
  537 
  538 out:    TCPDEBUG2(PRU_ACCEPT);
  539         if (tp)
  540                 INP_UNLOCK(inp);
  541         if (error == 0) {
  542                 if (v4)
  543                         *nam = in6_v4mapsin6_sockaddr(port, &addr);
  544                 else
  545                         *nam = in6_sockaddr(port, &addr6);
  546         }
  547         return error;
  548 }
  549 #endif /* INET6 */
  550 
  551 /*
  552  * This is the wrapper function for in_setsockaddr. We just pass down
  553  * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking
  554  * here because in_setsockaddr will call malloc and can block.
  555  */
  556 static int
  557 tcp_sockaddr(struct socket *so, struct sockaddr **nam)
  558 {
  559         return (in_setsockaddr(so, nam, &tcbinfo));
  560 }
  561 
  562 /*
  563  * This is the wrapper function for in_setpeeraddr. We just pass down
  564  * the pcbinfo for in_setpeeraddr to lock.
  565  */
  566 static int
  567 tcp_peeraddr(struct socket *so, struct sockaddr **nam)
  568 {
  569         return (in_setpeeraddr(so, nam, &tcbinfo));
  570 }
  571 
  572 /*
  573  * Mark the connection as being incapable of further output.
  574  */
  575 static int
  576 tcp_usr_shutdown(struct socket *so)
  577 {
  578         int error = 0;
  579         struct inpcb *inp;
  580         struct tcpcb *tp;
  581         const int inirw = INI_WRITE;
  582 
  583         COMMON_START();
  584         socantsendmore(so);
  585         tp = tcp_usrclosed(tp);
  586         if (tp)
  587                 error = tcp_output(tp);
  588         COMMON_END(PRU_SHUTDOWN);
  589 }
  590 
  591 /*
  592  * After a receive, possibly send window update to peer.
  593  */
  594 static int
  595 tcp_usr_rcvd(struct socket *so, int flags)
  596 {
  597         int error = 0;
  598         struct inpcb *inp;
  599         struct tcpcb *tp;
  600         const int inirw = INI_READ;
  601 
  602         COMMON_START();
  603         tcp_output(tp);
  604         COMMON_END(PRU_RCVD);
  605 }
  606 
  607 /*
  608  * Do a send by putting data in output queue and updating urgent
  609  * marker if URG set.  Possibly send more data.  Unlike the other
  610  * pru_*() routines, the mbuf chains are our responsibility.  We
  611  * must either enqueue them or free them.  The other pru_* routines
  612  * generally are caller-frees.
  613  */
  614 static int
  615 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
  616              struct sockaddr *nam, struct mbuf *control, struct thread *td)
  617 {
  618         int error = 0;
  619         struct inpcb *inp;
  620         struct tcpcb *tp;
  621         const int inirw = INI_WRITE;
  622 #ifdef INET6
  623         int isipv6;
  624 #endif
  625         TCPDEBUG0;
  626 
  627         /*
  628          * Need write lock here because this function might call
  629          * tcp_connect or tcp_usrclosed.
  630          * We really want to have to this function upgrade from read lock
  631          * to write lock.  XXX
  632          */
  633         INP_INFO_WLOCK(&tcbinfo);
  634         inp = sotoinpcb(so);
  635         if (inp == NULL) {
  636                 /*
  637                  * OOPS! we lost a race, the TCP session got reset after
  638                  * we checked SBS_CANTSENDMORE, eg: while doing uiomove or a
  639                  * network interrupt in the non-splnet() section of sosend().
  640                  */
  641                 if (m)
  642                         m_freem(m);
  643                 if (control)
  644                         m_freem(control);
  645                 error = ECONNRESET;     /* XXX EPIPE? */
  646                 tp = NULL;
  647                 TCPDEBUG1();
  648                 goto out;
  649         }
  650         INP_LOCK(inp);
  651 #ifdef INET6
  652         isipv6 = nam && nam->sa_family == AF_INET6;
  653 #endif /* INET6 */
  654         tp = intotcpcb(inp);
  655         TCPDEBUG1();
  656         if (control) {
  657                 /* TCP doesn't do control messages (rights, creds, etc) */
  658                 if (control->m_len) {
  659                         m_freem(control);
  660                         if (m)
  661                                 m_freem(m);
  662                         error = EINVAL;
  663                         goto out;
  664                 }
  665                 m_freem(control);       /* empty control, just free it */
  666         }
  667         if (!(flags & PRUS_OOB)) {
  668                 sbappendstream(&so->so_snd, m);
  669                 if (nam && tp->t_state < TCPS_SYN_SENT) {
  670                         /*
  671                          * Do implied connect if not yet connected,
  672                          * initialize window to default value, and
  673                          * initialize maxseg/maxopd using peer's cached
  674                          * MSS.
  675                          */
  676 #ifdef INET6
  677                         if (isipv6)
  678                                 error = tcp6_connect(tp, nam, td);
  679                         else
  680 #endif /* INET6 */
  681                         error = tcp_connect(tp, nam, td);
  682                         if (error)
  683                                 goto out;
  684                         tp->snd_wnd = TTCP_CLIENT_SND_WND;
  685                         tcp_mss(tp, -1);
  686                 }
  687 
  688                 if (flags & PRUS_EOF) {
  689                         /*
  690                          * Close the send side of the connection after
  691                          * the data is sent.
  692                          */
  693                         socantsendmore(so);
  694                         tp = tcp_usrclosed(tp);
  695                 }
  696                 if (tp != NULL) {
  697                         if (flags & PRUS_MORETOCOME)
  698                                 tp->t_flags |= TF_MORETOCOME;
  699                         error = tcp_output(tp);
  700                         if (flags & PRUS_MORETOCOME)
  701                                 tp->t_flags &= ~TF_MORETOCOME;
  702                 }
  703         } else {
  704                 SOCKBUF_LOCK(&so->so_snd);
  705                 if (sbspace(&so->so_snd) < -512) {
  706                         SOCKBUF_UNLOCK(&so->so_snd);
  707                         m_freem(m);
  708                         error = ENOBUFS;
  709                         goto out;
  710                 }
  711                 /*
  712                  * According to RFC961 (Assigned Protocols),
  713                  * the urgent pointer points to the last octet
  714                  * of urgent data.  We continue, however,
  715                  * to consider it to indicate the first octet
  716                  * of data past the urgent section.
  717                  * Otherwise, snd_up should be one lower.
  718                  */
  719                 sbappendstream_locked(&so->so_snd, m);
  720                 SOCKBUF_UNLOCK(&so->so_snd);
  721                 if (nam && tp->t_state < TCPS_SYN_SENT) {
  722                         /*
  723                          * Do implied connect if not yet connected,
  724                          * initialize window to default value, and
  725                          * initialize maxseg/maxopd using peer's cached
  726                          * MSS.
  727                          */
  728 #ifdef INET6
  729                         if (isipv6)
  730                                 error = tcp6_connect(tp, nam, td);
  731                         else
  732 #endif /* INET6 */
  733                         error = tcp_connect(tp, nam, td);
  734                         if (error)
  735                                 goto out;
  736                         tp->snd_wnd = TTCP_CLIENT_SND_WND;
  737                         tcp_mss(tp, -1);
  738                 }
  739                 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
  740                 tp->t_force = 1;
  741                 error = tcp_output(tp);
  742                 tp->t_force = 0;
  743         }
  744         COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
  745                    ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
  746 }
  747 
  748 /*
  749  * Abort the TCP.
  750  */
  751 static int
  752 tcp_usr_abort(struct socket *so)
  753 {
  754         int error = 0;
  755         struct inpcb *inp;
  756         struct tcpcb *tp;
  757         const int inirw = INI_WRITE;
  758 
  759         COMMON_START();
  760         tp = tcp_drop(tp, ECONNABORTED);
  761         COMMON_END(PRU_ABORT);
  762 }
  763 
  764 /*
  765  * Receive out-of-band data.
  766  */
  767 static int
  768 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
  769 {
  770         int error = 0;
  771         struct inpcb *inp;
  772         struct tcpcb *tp;
  773         const int inirw = INI_READ;
  774 
  775         COMMON_START();
  776         if ((so->so_oobmark == 0 &&
  777              (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
  778             so->so_options & SO_OOBINLINE ||
  779             tp->t_oobflags & TCPOOB_HADDATA) {
  780                 error = EINVAL;
  781                 goto out;
  782         }
  783         if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
  784                 error = EWOULDBLOCK;
  785                 goto out;
  786         }
  787         m->m_len = 1;
  788         *mtod(m, caddr_t) = tp->t_iobc;
  789         if ((flags & MSG_PEEK) == 0)
  790                 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
  791         COMMON_END(PRU_RCVOOB);
  792 }
  793 
  794 /* xxx - should be const */
  795 struct pr_usrreqs tcp_usrreqs = {
  796         tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
  797         tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach,
  798         tcp_usr_disconnect, tcp_usr_listen, tcp_peeraddr, tcp_usr_rcvd,
  799         tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
  800         tcp_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel
  801 };
  802 
  803 #ifdef INET6
  804 struct pr_usrreqs tcp6_usrreqs = {
  805         tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind,
  806         tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach,
  807         tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd,
  808         tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown,
  809         in6_mapped_sockaddr, sosend, soreceive, sopoll, in_pcbsosetlabel
  810 };
  811 #endif /* INET6 */
  812 
  813 /*
  814  * Common subroutine to open a TCP connection to remote host specified
  815  * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
  816  * port number if needed.  Call in_pcbconnect_setup to do the routing and
  817  * to choose a local host address (interface).  If there is an existing
  818  * incarnation of the same connection in TIME-WAIT state and if the remote
  819  * host was sending CC options and if the connection duration was < MSL, then
  820  * truncate the previous TIME-WAIT state and proceed.
  821  * Initialize connection parameters and enter SYN-SENT state.
  822  */
  823 static int
  824 tcp_connect(tp, nam, td)
  825         register struct tcpcb *tp;
  826         struct sockaddr *nam;
  827         struct thread *td;
  828 {
  829         struct inpcb *inp = tp->t_inpcb, *oinp;
  830         struct socket *so = inp->inp_socket;
  831         struct tcptw *otw;
  832         struct rmxp_tao tao;
  833         struct in_addr laddr;
  834         u_short lport;
  835         int error;
  836 
  837         bzero(&tao, sizeof(tao));
  838 
  839         if (inp->inp_lport == 0) {
  840                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
  841                 if (error)
  842                         return error;
  843         }
  844 
  845         /*
  846          * Cannot simply call in_pcbconnect, because there might be an
  847          * earlier incarnation of this same connection still in
  848          * TIME_WAIT state, creating an ADDRINUSE error.
  849          */
  850         laddr = inp->inp_laddr;
  851         lport = inp->inp_lport;
  852         error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
  853             &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
  854         if (error && oinp == NULL)
  855                 return error;
  856         if (oinp) {
  857                 if (oinp != inp &&
  858                     (oinp->inp_vflag & INP_TIMEWAIT) &&
  859                     (ticks - (otw = intotw(oinp))->t_starttime) < tcp_msl &&
  860                     otw->cc_recv != 0) {
  861                         inp->inp_faddr = oinp->inp_faddr;
  862                         inp->inp_fport = oinp->inp_fport;
  863                         (void) tcp_twclose(otw, 0);
  864                 } else
  865                         return EADDRINUSE;
  866         }
  867         inp->inp_laddr = laddr;
  868         in_pcbrehash(inp);
  869 
  870         /* Compute window scaling to request.  */
  871         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
  872             (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
  873                 tp->request_r_scale++;
  874 
  875         soisconnecting(so);
  876         tcpstat.tcps_connattempt++;
  877         tp->t_state = TCPS_SYN_SENT;
  878         callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
  879         tp->iss = tcp_new_isn(tp);
  880         tp->t_bw_rtseq = tp->iss;
  881         tcp_sendseqinit(tp);
  882 
  883         /*
  884          * Generate a CC value for this connection and
  885          * check whether CC or CCnew should be used.
  886          */
  887         if (tcp_do_rfc1644)
  888                 tcp_hc_gettao(&inp->inp_inc, &tao);
  889 
  890         tp->cc_send = CC_INC(tcp_ccgen);
  891         if (tao.tao_ccsent != 0 &&
  892             CC_GEQ(tp->cc_send, tao.tao_ccsent)) {
  893                 tao.tao_ccsent = tp->cc_send;
  894         } else {
  895                 tao.tao_ccsent = 0;
  896                 tp->t_flags |= TF_SENDCCNEW;
  897         }
  898 
  899         if (tcp_do_rfc1644)
  900                 tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT,
  901                                  tao.tao_ccsent, 0);
  902 
  903         return 0;
  904 }
  905 
  906 #ifdef INET6
  907 static int
  908 tcp6_connect(tp, nam, td)
  909         register struct tcpcb *tp;
  910         struct sockaddr *nam;
  911         struct thread *td;
  912 {
  913         struct inpcb *inp = tp->t_inpcb, *oinp;
  914         struct socket *so = inp->inp_socket;
  915         struct tcptw *otw;
  916         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
  917         struct in6_addr *addr6;
  918         struct rmxp_tao tao;
  919         int error;
  920 
  921         bzero(&tao, sizeof(tao));
  922 
  923         if (inp->inp_lport == 0) {
  924                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
  925                 if (error)
  926                         return error;
  927         }
  928 
  929         /*
  930          * Cannot simply call in_pcbconnect, because there might be an
  931          * earlier incarnation of this same connection still in
  932          * TIME_WAIT state, creating an ADDRINUSE error.
  933          */
  934         error = in6_pcbladdr(inp, nam, &addr6);
  935         if (error)
  936                 return error;
  937         oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
  938                                   &sin6->sin6_addr, sin6->sin6_port,
  939                                   IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
  940                                   ? addr6
  941                                   : &inp->in6p_laddr,
  942                                   inp->inp_lport,  0, NULL);
  943         if (oinp) {
  944                 if (oinp != inp &&
  945                     (oinp->inp_vflag & INP_TIMEWAIT) &&
  946                     (ticks - (otw = intotw(oinp))->t_starttime) < tcp_msl &&
  947                     otw->cc_recv != 0) {
  948                         inp->inp_faddr = oinp->inp_faddr;
  949                         inp->inp_fport = oinp->inp_fport;
  950                         (void) tcp_twclose(otw, 0);
  951                 } else
  952                         return EADDRINUSE;
  953         }
  954         if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
  955                 inp->in6p_laddr = *addr6;
  956         inp->in6p_faddr = sin6->sin6_addr;
  957         inp->inp_fport = sin6->sin6_port;
  958         /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
  959         inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
  960         if (inp->in6p_flags & IN6P_AUTOFLOWLABEL)
  961                 inp->in6p_flowinfo |=
  962                     (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
  963         in_pcbrehash(inp);
  964 
  965         /* Compute window scaling to request.  */
  966         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
  967             (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
  968                 tp->request_r_scale++;
  969 
  970         soisconnecting(so);
  971         tcpstat.tcps_connattempt++;
  972         tp->t_state = TCPS_SYN_SENT;
  973         callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
  974         tp->iss = tcp_new_isn(tp);
  975         tp->t_bw_rtseq = tp->iss;
  976         tcp_sendseqinit(tp);
  977 
  978         /*
  979          * Generate a CC value for this connection and
  980          * check whether CC or CCnew should be used.
  981          */
  982         if (tcp_do_rfc1644)
  983                 tcp_hc_gettao(&inp->inp_inc, &tao);
  984 
  985         tp->cc_send = CC_INC(tcp_ccgen);
  986         if (tao.tao_ccsent != 0 &&
  987             CC_GEQ(tp->cc_send, tao.tao_ccsent)) {
  988                 tao.tao_ccsent = tp->cc_send;
  989         } else {
  990                 tao.tao_ccsent = 0;
  991                 tp->t_flags |= TF_SENDCCNEW;
  992         }
  993         if (tcp_do_rfc1644)
  994                 tcp_hc_updatetao(&inp->inp_inc, TCP_HC_TAO_CCSENT,
  995                                  tao.tao_ccsent, 0);
  996 
  997         return 0;
  998 }
  999 #endif /* INET6 */
 1000 
 1001 /*
 1002  * The new sockopt interface makes it possible for us to block in the
 1003  * copyin/out step (if we take a page fault).  Taking a page fault at
 1004  * splnet() is probably a Bad Thing.  (Since sockets and pcbs both now
 1005  * use TSM, there probably isn't any need for this function to run at
 1006  * splnet() any more.  This needs more examination.)
 1007  */
 1008 int
 1009 tcp_ctloutput(so, sopt)
 1010         struct socket *so;
 1011         struct sockopt *sopt;
 1012 {
 1013         int     error, opt, optval;
 1014         struct  inpcb *inp;
 1015         struct  tcpcb *tp;
 1016 
 1017         error = 0;
 1018         INP_INFO_RLOCK(&tcbinfo);
 1019         inp = sotoinpcb(so);
 1020         if (inp == NULL) {
 1021                 INP_INFO_RUNLOCK(&tcbinfo);
 1022                 return (ECONNRESET);
 1023         }
 1024         INP_LOCK(inp);
 1025         INP_INFO_RUNLOCK(&tcbinfo);
 1026         if (sopt->sopt_level != IPPROTO_TCP) {
 1027                 INP_UNLOCK(inp);
 1028 #ifdef INET6
 1029                 if (INP_CHECK_SOCKAF(so, AF_INET6))
 1030                         error = ip6_ctloutput(so, sopt);
 1031                 else
 1032 #endif /* INET6 */
 1033                 error = ip_ctloutput(so, sopt);
 1034                 return (error);
 1035         }
 1036         tp = intotcpcb(inp);
 1037 
 1038         switch (sopt->sopt_dir) {
 1039         case SOPT_SET:
 1040                 switch (sopt->sopt_name) {
 1041 #ifdef TCP_SIGNATURE
 1042                 case TCP_MD5SIG:
 1043                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1044                                             sizeof optval);
 1045                         if (error)
 1046                                 break;
 1047 
 1048                         if (optval > 0)
 1049                                 tp->t_flags |= TF_SIGNATURE;
 1050                         else
 1051                                 tp->t_flags &= ~TF_SIGNATURE;
 1052                         break;
 1053 #endif /* TCP_SIGNATURE */
 1054                 case TCP_NODELAY:
 1055                 case TCP_NOOPT:
 1056                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1057                                             sizeof optval);
 1058                         if (error)
 1059                                 break;
 1060 
 1061                         switch (sopt->sopt_name) {
 1062                         case TCP_NODELAY:
 1063                                 opt = TF_NODELAY;
 1064                                 break;
 1065                         case TCP_NOOPT:
 1066                                 opt = TF_NOOPT;
 1067                                 break;
 1068                         default:
 1069                                 opt = 0; /* dead code to fool gcc */
 1070                                 break;
 1071                         }
 1072 
 1073                         if (optval)
 1074                                 tp->t_flags |= opt;
 1075                         else
 1076                                 tp->t_flags &= ~opt;
 1077                         break;
 1078 
 1079                 case TCP_NOPUSH:
 1080                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1081                                             sizeof optval);
 1082                         if (error)
 1083                                 break;
 1084 
 1085                         if (optval)
 1086                                 tp->t_flags |= TF_NOPUSH;
 1087                         else {
 1088                                 tp->t_flags &= ~TF_NOPUSH;
 1089                                 error = tcp_output(tp);
 1090                         }
 1091                         break;
 1092 
 1093                 case TCP_MAXSEG:
 1094                         error = sooptcopyin(sopt, &optval, sizeof optval,
 1095                                             sizeof optval);
 1096                         if (error)
 1097                                 break;
 1098 
 1099                         if (optval > 0 && optval <= tp->t_maxseg &&
 1100                             optval + 40 >= tcp_minmss)
 1101                                 tp->t_maxseg = optval;
 1102                         else
 1103                                 error = EINVAL;
 1104                         break;
 1105 
 1106                 default:
 1107                         error = ENOPROTOOPT;
 1108                         break;
 1109                 }
 1110                 break;
 1111 
 1112         case SOPT_GET:
 1113                 switch (sopt->sopt_name) {
 1114 #ifdef TCP_SIGNATURE
 1115                 case TCP_MD5SIG:
 1116                         optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
 1117                         break;
 1118 #endif
 1119                 case TCP_NODELAY:
 1120                         optval = tp->t_flags & TF_NODELAY;
 1121                         break;
 1122                 case TCP_MAXSEG:
 1123                         optval = tp->t_maxseg;
 1124                         break;
 1125                 case TCP_NOOPT:
 1126                         optval = tp->t_flags & TF_NOOPT;
 1127                         break;
 1128                 case TCP_NOPUSH:
 1129                         optval = tp->t_flags & TF_NOPUSH;
 1130                         break;
 1131                 default:
 1132                         error = ENOPROTOOPT;
 1133                         break;
 1134                 }
 1135                 if (error == 0)
 1136                         error = sooptcopyout(sopt, &optval, sizeof optval);
 1137                 break;
 1138         }
 1139         INP_UNLOCK(inp);
 1140         return (error);
 1141 }
 1142 
 1143 /*
 1144  * tcp_sendspace and tcp_recvspace are the default send and receive window
 1145  * sizes, respectively.  These are obsolescent (this information should
 1146  * be set by the route).
 1147  */
 1148 u_long  tcp_sendspace = 1024*32;
 1149 SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
 1150     &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
 1151 u_long  tcp_recvspace = 1024*64;
 1152 SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
 1153     &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
 1154 
 1155 /*
 1156  * Attach TCP protocol to socket, allocating
 1157  * internet protocol control block, tcp control block,
 1158  * bufer space, and entering LISTEN state if to accept connections.
 1159  */
 1160 static int
 1161 tcp_attach(so)
 1162         struct socket *so;
 1163 {
 1164         register struct tcpcb *tp;
 1165         struct inpcb *inp;
 1166         int error;
 1167 #ifdef INET6
 1168         int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0;
 1169 #endif
 1170 
 1171         INP_INFO_WLOCK_ASSERT(&tcbinfo);
 1172 
 1173         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 1174                 error = soreserve(so, tcp_sendspace, tcp_recvspace);
 1175                 if (error)
 1176                         return (error);
 1177         }
 1178         error = in_pcballoc(so, &tcbinfo, "tcpinp");
 1179         if (error)
 1180                 return (error);
 1181         inp = sotoinpcb(so);
 1182 #ifdef INET6
 1183         if (isipv6) {
 1184                 inp->inp_vflag |= INP_IPV6;
 1185                 inp->in6p_hops = -1;    /* use kernel default */
 1186         }
 1187         else
 1188 #endif
 1189         inp->inp_vflag |= INP_IPV4;
 1190         tp = tcp_newtcpcb(inp);
 1191         if (tp == 0) {
 1192                 int nofd = so->so_state & SS_NOFDREF;   /* XXX */
 1193 
 1194                 so->so_state &= ~SS_NOFDREF;    /* don't free the socket yet */
 1195 
 1196                 INP_LOCK(inp);
 1197 #ifdef INET6
 1198                 if (isipv6)
 1199                         in6_pcbdetach(inp);
 1200                 else
 1201 #endif
 1202                 in_pcbdetach(inp);
 1203                 so->so_state |= nofd;
 1204                 return (ENOBUFS);
 1205         }
 1206         tp->t_state = TCPS_CLOSED;
 1207         return (0);
 1208 }
 1209 
 1210 /*
 1211  * Initiate (or continue) disconnect.
 1212  * If embryonic state, just send reset (once).
 1213  * If in ``let data drain'' option and linger null, just drop.
 1214  * Otherwise (hard), mark socket disconnecting and drop
 1215  * current input data; switch states based on user close, and
 1216  * send segment to peer (with FIN).
 1217  */
 1218 static struct tcpcb *
 1219 tcp_disconnect(tp)
 1220         register struct tcpcb *tp;
 1221 {
 1222         struct inpcb *inp = tp->t_inpcb;
 1223         struct socket *so = inp->inp_socket;
 1224 
 1225         INP_INFO_WLOCK_ASSERT(&tcbinfo);
 1226         INP_LOCK_ASSERT(inp);
 1227 
 1228         if (tp->t_state < TCPS_ESTABLISHED)
 1229                 tp = tcp_close(tp);
 1230         else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
 1231                 tp = tcp_drop(tp, 0);
 1232         else {
 1233                 soisdisconnecting(so);
 1234                 sbflush(&so->so_rcv);
 1235                 tp = tcp_usrclosed(tp);
 1236                 if (tp)
 1237                         (void) tcp_output(tp);
 1238         }
 1239         return (tp);
 1240 }
 1241 
 1242 /*
 1243  * User issued close, and wish to trail through shutdown states:
 1244  * if never received SYN, just forget it.  If got a SYN from peer,
 1245  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
 1246  * If already got a FIN from peer, then almost done; go to LAST_ACK
 1247  * state.  In all other cases, have already sent FIN to peer (e.g.
 1248  * after PRU_SHUTDOWN), and just have to play tedious game waiting
 1249  * for peer to send FIN or not respond to keep-alives, etc.
 1250  * We can let the user exit from the close as soon as the FIN is acked.
 1251  */
 1252 static struct tcpcb *
 1253 tcp_usrclosed(tp)
 1254         register struct tcpcb *tp;
 1255 {
 1256 
 1257         INP_INFO_WLOCK_ASSERT(&tcbinfo);
 1258         INP_LOCK_ASSERT(tp->t_inpcb);
 1259 
 1260         switch (tp->t_state) {
 1261 
 1262         case TCPS_CLOSED:
 1263         case TCPS_LISTEN:
 1264                 tp->t_state = TCPS_CLOSED;
 1265                 tp = tcp_close(tp);
 1266                 break;
 1267 
 1268         case TCPS_SYN_SENT:
 1269         case TCPS_SYN_RECEIVED:
 1270                 tp->t_flags |= TF_NEEDFIN;
 1271                 break;
 1272 
 1273         case TCPS_ESTABLISHED:
 1274                 tp->t_state = TCPS_FIN_WAIT_1;
 1275                 break;
 1276 
 1277         case TCPS_CLOSE_WAIT:
 1278                 tp->t_state = TCPS_LAST_ACK;
 1279                 break;
 1280         }
 1281         if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
 1282                 soisdisconnected(tp->t_inpcb->inp_socket);
 1283                 /* To prevent the connection hanging in FIN_WAIT_2 forever. */
 1284                 if (tp->t_state == TCPS_FIN_WAIT_2)
 1285                         callout_reset(tp->tt_2msl, tcp_maxidle,
 1286                                       tcp_timer_2msl, tp);
 1287         }
 1288         return (tp);
 1289 }
 1290 

Cache object: e7bf585d5f1c7de37177b290dc6524aa


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.