The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_subr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: tcp_subr.c,v 1.190 2022/11/07 11:22:55 yasuoka Exp $  */
    2 /*      $NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $   */
    3 
    4 /*
    5  * Copyright (c) 1982, 1986, 1988, 1990, 1993
    6  *      The Regents of the University of California.  All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      @(#)COPYRIGHT   1.1 (NRL) 17 January 1995
   33  *
   34  * NRL grants permission for redistribution and use in source and binary
   35  * forms, with or without modification, of the software and documentation
   36  * created at NRL provided that the following conditions are met:
   37  *
   38  * 1. Redistributions of source code must retain the above copyright
   39  *    notice, this list of conditions and the following disclaimer.
   40  * 2. Redistributions in binary form must reproduce the above copyright
   41  *    notice, this list of conditions and the following disclaimer in the
   42  *    documentation and/or other materials provided with the distribution.
   43  * 3. All advertising materials mentioning features or use of this software
   44  *    must display the following acknowledgements:
   45  *      This product includes software developed by the University of
   46  *      California, Berkeley and its contributors.
   47  *      This product includes software developed at the Information
   48  *      Technology Division, US Naval Research Laboratory.
   49  * 4. Neither the name of the NRL nor the names of its contributors
   50  *    may be used to endorse or promote products derived from this software
   51  *    without specific prior written permission.
   52  *
   53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
   54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
   56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
   57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   64  *
   65  * The views and conclusions contained in the software and documentation
   66  * are those of the authors and should not be interpreted as representing
   67  * official policies, either expressed or implied, of the US Naval
   68  * Research Laboratory (NRL).
   69  */
   70 
   71 #include <sys/param.h>
   72 #include <sys/systm.h>
   73 #include <sys/mbuf.h>
   74 #include <sys/mutex.h>
   75 #include <sys/socket.h>
   76 #include <sys/socketvar.h>
   77 #include <sys/timeout.h>
   78 #include <sys/protosw.h>
   79 #include <sys/kernel.h>
   80 #include <sys/pool.h>
   81 
   82 #include <net/route.h>
   83 
   84 #include <netinet/in.h>
   85 #include <netinet/ip.h>
   86 #include <netinet/in_pcb.h>
   87 #include <netinet/ip_var.h>
   88 #include <netinet/ip_icmp.h>
   89 #include <netinet/tcp.h>
   90 #include <netinet/tcp_fsm.h>
   91 #include <netinet/tcp_seq.h>
   92 #include <netinet/tcp_timer.h>
   93 #include <netinet/tcp_var.h>
   94 
   95 #ifdef INET6
   96 #include <netinet6/ip6protosw.h>
   97 #endif /* INET6 */
   98 
   99 #include <crypto/md5.h>
  100 #include <crypto/sha2.h>
  101 
  102 /*
  103  * Locks used to protect struct members in this file:
  104  *      I       immutable after creation
  105  *      T       tcp_timer_mtx           global tcp timer data structures
  106  */
  107 
  108 struct mutex tcp_timer_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
  109 
  110 /* patchable/settable parameters for tcp */
  111 int     tcp_mssdflt = TCP_MSS;
  112 int     tcp_rttdflt = TCPTV_SRTTDFLT;
  113 
  114 /* values controllable via sysctl */
  115 int     tcp_do_rfc1323 = 1;
  116 int     tcp_do_sack = 1;        /* RFC 2018 selective ACKs */
  117 int     tcp_ack_on_push = 0;    /* set to enable immediate ACK-on-PUSH */
  118 #ifdef TCP_ECN
  119 int     tcp_do_ecn = 0;         /* RFC3168 ECN enabled/disabled? */
  120 #endif
  121 int     tcp_do_rfc3390 = 2;     /* Increase TCP's Initial Window to 10*mss */
  122 
  123 #ifndef TCB_INITIAL_HASH_SIZE
  124 #define TCB_INITIAL_HASH_SIZE   128
  125 #endif
  126 
  127 int tcp_reass_limit = NMBCLUSTERS / 8; /* hardlimit for tcpqe_pool */
  128 int tcp_sackhole_limit = 32*1024; /* hardlimit for sackhl_pool */
  129 
  130 struct pool tcpcb_pool;
  131 struct pool tcpqe_pool;
  132 struct pool sackhl_pool;
  133 
  134 struct cpumem *tcpcounters;             /* tcp statistics */
  135 
  136 u_char          tcp_secret[16]; /* [I] */
  137 SHA2_CTX        tcp_secret_ctx; /* [I] */
  138 tcp_seq         tcp_iss;        /* [T] updated by timer and connection */
  139 
  140 /*
  141  * Tcp initialization
  142  */
  143 void
  144 tcp_init(void)
  145 {
  146         tcp_iss = 1;            /* wrong */
  147         pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, IPL_SOFTNET, 0,
  148             "tcpcb", NULL);
  149         pool_init(&tcpqe_pool, sizeof(struct tcpqent), 0, IPL_SOFTNET, 0,
  150             "tcpqe", NULL);
  151         pool_sethardlimit(&tcpqe_pool, tcp_reass_limit, NULL, 0);
  152         pool_init(&sackhl_pool, sizeof(struct sackhole), 0, IPL_SOFTNET, 0,
  153             "sackhl", NULL);
  154         pool_sethardlimit(&sackhl_pool, tcp_sackhole_limit, NULL, 0);
  155         in_pcbinit(&tcbtable, TCB_INITIAL_HASH_SIZE);
  156         tcpcounters = counters_alloc(tcps_ncounters);
  157 
  158         arc4random_buf(tcp_secret, sizeof(tcp_secret));
  159         SHA512Init(&tcp_secret_ctx);
  160         SHA512Update(&tcp_secret_ctx, tcp_secret, sizeof(tcp_secret));
  161 
  162 #ifdef INET6
  163         /*
  164          * Since sizeof(struct ip6_hdr) > sizeof(struct ip), we
  165          * do max length checks/computations only on the former.
  166          */
  167         if (max_protohdr < (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)))
  168                 max_protohdr = (sizeof(struct ip6_hdr) + sizeof(struct tcphdr));
  169         if ((max_linkhdr + sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) >
  170             MHLEN)
  171                 panic("tcp_init");
  172 
  173         icmp6_mtudisc_callback_register(tcp6_mtudisc_callback);
  174 #endif /* INET6 */
  175 
  176         /* Initialize the compressed state engine. */
  177         syn_cache_init();
  178 
  179         /* Initialize timer state. */
  180         tcp_timer_init();
  181 }
  182 
  183 /*
  184  * Create template to be used to send tcp packets on a connection.
  185  * Call after host entry created, allocates an mbuf and fills
  186  * in a skeletal tcp/ip header, minimizing the amount of work
  187  * necessary when the connection is used.
  188  *
  189  * To support IPv6 in addition to IPv4 and considering that the sizes of
  190  * the IPv4 and IPv6 headers are not the same, we now use a separate pointer
  191  * for the TCP header.  Also, we made the former tcpiphdr header pointer
  192  * into just an IP overlay pointer, with casting as appropriate for v6. rja
  193  */
  194 struct mbuf *
  195 tcp_template(struct tcpcb *tp)
  196 {
  197         struct inpcb *inp = tp->t_inpcb;
  198         struct mbuf *m;
  199         struct tcphdr *th;
  200 
  201         CTASSERT(sizeof(struct ip) + sizeof(struct tcphdr) <= MHLEN);
  202         CTASSERT(sizeof(struct ip6_hdr) + sizeof(struct tcphdr) <= MHLEN);
  203 
  204         if ((m = tp->t_template) == 0) {
  205                 m = m_get(M_DONTWAIT, MT_HEADER);
  206                 if (m == NULL)
  207                         return (0);
  208 
  209                 switch (tp->pf) {
  210                 case 0: /*default to PF_INET*/
  211                 case AF_INET:
  212                         m->m_len = sizeof(struct ip);
  213                         break;
  214 #ifdef INET6
  215                 case AF_INET6:
  216                         m->m_len = sizeof(struct ip6_hdr);
  217                         break;
  218 #endif /* INET6 */
  219                 }
  220                 m->m_len += sizeof (struct tcphdr);
  221         }
  222 
  223         switch(tp->pf) {
  224         case AF_INET:
  225                 {
  226                         struct ipovly *ipovly;
  227 
  228                         ipovly = mtod(m, struct ipovly *);
  229 
  230                         bzero(ipovly->ih_x1, sizeof ipovly->ih_x1);
  231                         ipovly->ih_pr = IPPROTO_TCP;
  232                         ipovly->ih_len = htons(sizeof (struct tcphdr));
  233                         ipovly->ih_src = inp->inp_laddr;
  234                         ipovly->ih_dst = inp->inp_faddr;
  235 
  236                         th = (struct tcphdr *)(mtod(m, caddr_t) +
  237                                 sizeof(struct ip));
  238                 }
  239                 break;
  240 #ifdef INET6
  241         case AF_INET6:
  242                 {
  243                         struct ip6_hdr *ip6;
  244 
  245                         ip6 = mtod(m, struct ip6_hdr *);
  246 
  247                         ip6->ip6_src = inp->inp_laddr6;
  248                         ip6->ip6_dst = inp->inp_faddr6;
  249                         ip6->ip6_flow = htonl(0x60000000) |
  250                             (inp->inp_flowinfo & IPV6_FLOWLABEL_MASK);
  251 
  252                         ip6->ip6_nxt = IPPROTO_TCP;
  253                         ip6->ip6_plen = htons(sizeof(struct tcphdr)); /*XXX*/
  254                         ip6->ip6_hlim = in6_selecthlim(inp);    /*XXX*/
  255 
  256                         th = (struct tcphdr *)(mtod(m, caddr_t) +
  257                                 sizeof(struct ip6_hdr));
  258                 }
  259                 break;
  260 #endif /* INET6 */
  261         }
  262 
  263         th->th_sport = inp->inp_lport;
  264         th->th_dport = inp->inp_fport;
  265         th->th_seq = 0;
  266         th->th_ack = 0;
  267         th->th_x2  = 0;
  268         th->th_off = 5;
  269         th->th_flags = 0;
  270         th->th_win = 0;
  271         th->th_urp = 0;
  272         th->th_sum = 0;
  273         return (m);
  274 }
  275 
  276 /*
  277  * Send a single message to the TCP at address specified by
  278  * the given TCP/IP header.  If m == 0, then we make a copy
  279  * of the tcpiphdr at ti and send directly to the addressed host.
  280  * This is used to force keep alive messages out using the TCP
  281  * template for a connection tp->t_template.  If flags are given
  282  * then we send a message back to the TCP which originated the
  283  * segment ti, and discard the mbuf containing it and any other
  284  * attached mbufs.
  285  *
  286  * In any case the ack and sequence number of the transmitted
  287  * segment are as specified by the parameters.
  288  */
  289 void
  290 tcp_respond(struct tcpcb *tp, caddr_t template, struct tcphdr *th0,
  291     tcp_seq ack, tcp_seq seq, int flags, u_int rtableid, uint32_t now)
  292 {
  293         int tlen;
  294         int win = 0;
  295         struct mbuf *m = NULL;
  296         struct tcphdr *th;
  297         struct ip *ip;
  298 #ifdef INET6
  299         struct ip6_hdr *ip6;
  300 #endif
  301         int af;         /* af on wire */
  302 
  303         if (tp) {
  304                 struct socket *so = tp->t_inpcb->inp_socket;
  305                 win = sbspace(so, &so->so_rcv);
  306                 /*
  307                  * If this is called with an unconnected
  308                  * socket/tp/pcb (tp->pf is 0), we lose.
  309                  */
  310                 af = tp->pf;
  311         } else
  312                 af = (((struct ip *)template)->ip_v == 6) ? AF_INET6 : AF_INET;
  313 
  314         m = m_gethdr(M_DONTWAIT, MT_HEADER);
  315         if (m == NULL)
  316                 return;
  317         m->m_data += max_linkhdr;
  318         tlen = 0;
  319 
  320 #define xchg(a,b,type) do { type t; t=a; a=b; b=t; } while (0)
  321         switch (af) {
  322 #ifdef INET6
  323         case AF_INET6:
  324                 ip6 = mtod(m, struct ip6_hdr *);
  325                 th = (struct tcphdr *)(ip6 + 1);
  326                 tlen = sizeof(*ip6) + sizeof(*th);
  327                 if (th0) {
  328                         bcopy(template, ip6, sizeof(*ip6));
  329                         bcopy(th0, th, sizeof(*th));
  330                         xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
  331                 } else {
  332                         bcopy(template, ip6, tlen);
  333                 }
  334                 break;
  335 #endif /* INET6 */
  336         case AF_INET:
  337                 ip = mtod(m, struct ip *);
  338                 th = (struct tcphdr *)(ip + 1);
  339                 tlen = sizeof(*ip) + sizeof(*th);
  340                 if (th0) {
  341                         bcopy(template, ip, sizeof(*ip));
  342                         bcopy(th0, th, sizeof(*th));
  343                         xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, u_int32_t);
  344                 } else {
  345                         bcopy(template, ip, tlen);
  346                 }
  347                 break;
  348         }
  349         if (th0)
  350                 xchg(th->th_dport, th->th_sport, u_int16_t);
  351         else
  352                 flags = TH_ACK;
  353 #undef xchg
  354 
  355         th->th_seq = htonl(seq);
  356         th->th_ack = htonl(ack);
  357         th->th_x2 = 0;
  358         th->th_off = sizeof (struct tcphdr) >> 2;
  359         th->th_flags = flags;
  360         if (tp)
  361                 win >>= tp->rcv_scale;
  362         if (win > TCP_MAXWIN)
  363                 win = TCP_MAXWIN;
  364         th->th_win = htons((u_int16_t)win);
  365         th->th_urp = 0;
  366 
  367         if (tp && (tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
  368             (flags & TH_RST) == 0 && (tp->t_flags & TF_RCVD_TSTMP)) {
  369                 u_int32_t *lp = (u_int32_t *)(th + 1);
  370                 /* Form timestamp option as shown in appendix A of RFC 1323. */
  371                 *lp++ = htonl(TCPOPT_TSTAMP_HDR);
  372                 *lp++ = htonl(now + tp->ts_modulate);
  373                 *lp   = htonl(tp->ts_recent);
  374                 tlen += TCPOLEN_TSTAMP_APPA;
  375                 th->th_off = (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_APPA) >> 2;
  376         }
  377 
  378         m->m_len = tlen;
  379         m->m_pkthdr.len = tlen;
  380         m->m_pkthdr.ph_ifidx = 0;
  381         m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
  382 
  383         /* force routing table */
  384         if (tp)
  385                 m->m_pkthdr.ph_rtableid = tp->t_inpcb->inp_rtableid;
  386         else
  387                 m->m_pkthdr.ph_rtableid = rtableid;
  388 
  389         switch (af) {
  390 #ifdef INET6
  391         case AF_INET6:
  392                 ip6->ip6_flow = htonl(0x60000000);
  393                 ip6->ip6_nxt  = IPPROTO_TCP;
  394                 ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL);        /*XXX*/
  395                 ip6->ip6_plen = tlen - sizeof(struct ip6_hdr);
  396                 ip6->ip6_plen = htons(ip6->ip6_plen);
  397                 ip6_output(m, tp ? tp->t_inpcb->inp_outputopts6 : NULL,
  398                     tp ? &tp->t_inpcb->inp_route6 : NULL,
  399                     0, NULL,
  400                     tp ? tp->t_inpcb : NULL);
  401                 break;
  402 #endif /* INET6 */
  403         case AF_INET:
  404                 ip->ip_len = htons(tlen);
  405                 ip->ip_ttl = ip_defttl;
  406                 ip->ip_tos = 0;
  407                 ip_output(m, NULL,
  408                     tp ? &tp->t_inpcb->inp_route : NULL,
  409                     ip_mtudisc ? IP_MTUDISC : 0, NULL,
  410                     tp ? tp->t_inpcb : NULL, 0);
  411                 break;
  412         }
  413 }
  414 
  415 /*
  416  * Create a new TCP control block, making an
  417  * empty reassembly queue and hooking it to the argument
  418  * protocol control block.
  419  */
  420 struct tcpcb *
  421 tcp_newtcpcb(struct inpcb *inp, int wait)
  422 {
  423         struct tcpcb *tp;
  424         int i;
  425 
  426         tp = pool_get(&tcpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
  427             PR_ZERO);
  428         if (tp == NULL)
  429                 return (NULL);
  430         TAILQ_INIT(&tp->t_segq);
  431         tp->t_maxseg = tcp_mssdflt;
  432         tp->t_maxopd = 0;
  433 
  434         for (i = 0; i < TCPT_NTIMERS; i++)
  435                 TCP_TIMER_INIT(tp, i);
  436 
  437         tp->sack_enable = tcp_do_sack;
  438         tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
  439         tp->t_inpcb = inp;
  440         /*
  441          * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
  442          * rtt estimate.  Set rttvar so that srtt + 2 * rttvar gives
  443          * reasonable initial retransmit time.
  444          */
  445         tp->t_srtt = TCPTV_SRTTBASE;
  446         tp->t_rttvar = tcp_rttdflt <<
  447             (TCP_RTTVAR_SHIFT + TCP_RTT_BASE_SHIFT - 1);
  448         tp->t_rttmin = TCPTV_MIN;
  449         TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
  450             TCPTV_MIN, TCPTV_REXMTMAX);
  451         tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
  452         tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
  453         
  454         tp->t_pmtud_mtu_sent = 0;
  455         tp->t_pmtud_mss_acked = 0;
  456         
  457 #ifdef INET6
  458         /* we disallow IPv4 mapped address completely. */
  459         if ((inp->inp_flags & INP_IPV6) == 0)
  460                 tp->pf = PF_INET;
  461         else
  462                 tp->pf = PF_INET6;
  463 #else
  464         tp->pf = PF_INET;
  465 #endif
  466 
  467 #ifdef INET6
  468         if (inp->inp_flags & INP_IPV6)
  469                 inp->inp_ipv6.ip6_hlim = ip6_defhlim;
  470         else
  471 #endif /* INET6 */
  472                 inp->inp_ip.ip_ttl = ip_defttl;
  473 
  474         inp->inp_ppcb = (caddr_t)tp;
  475         return (tp);
  476 }
  477 
  478 /*
  479  * Drop a TCP connection, reporting
  480  * the specified error.  If connection is synchronized,
  481  * then send a RST to peer.
  482  */
  483 struct tcpcb *
  484 tcp_drop(struct tcpcb *tp, int errno)
  485 {
  486         struct socket *so = tp->t_inpcb->inp_socket;
  487 
  488         if (TCPS_HAVERCVDSYN(tp->t_state)) {
  489                 tp->t_state = TCPS_CLOSED;
  490                 (void) tcp_output(tp);
  491                 tcpstat_inc(tcps_drops);
  492         } else
  493                 tcpstat_inc(tcps_conndrops);
  494         if (errno == ETIMEDOUT && tp->t_softerror)
  495                 errno = tp->t_softerror;
  496         so->so_error = errno;
  497         return (tcp_close(tp));
  498 }
  499 
  500 /*
  501  * Close a TCP control block:
  502  *      discard all space held by the tcp
  503  *      discard internet protocol block
  504  *      wake up any sleepers
  505  */
  506 struct tcpcb *
  507 tcp_close(struct tcpcb *tp)
  508 {
  509         struct inpcb *inp = tp->t_inpcb;
  510         struct socket *so = inp->inp_socket;
  511         struct sackhole *p, *q;
  512 
  513         /* free the reassembly queue, if any */
  514         tcp_freeq(tp);
  515 
  516         tcp_canceltimers(tp);
  517         syn_cache_cleanup(tp);
  518 
  519         /* Free SACK holes. */
  520         q = p = tp->snd_holes;
  521         while (p != 0) {
  522                 q = p->next;
  523                 pool_put(&sackhl_pool, p);
  524                 p = q;
  525         }
  526 
  527         m_free(tp->t_template);
  528         /* Free tcpcb after all pending timers have been run. */
  529         TCP_TIMER_ARM(tp, TCPT_REAPER, 1);
  530 
  531         inp->inp_ppcb = NULL;
  532         soisdisconnected(so);
  533         in_pcbdetach(inp);
  534         return (NULL);
  535 }
  536 
  537 int
  538 tcp_freeq(struct tcpcb *tp)
  539 {
  540         struct tcpqent *qe;
  541         int rv = 0;
  542 
  543         while ((qe = TAILQ_FIRST(&tp->t_segq)) != NULL) {
  544                 TAILQ_REMOVE(&tp->t_segq, qe, tcpqe_q);
  545                 m_freem(qe->tcpqe_m);
  546                 pool_put(&tcpqe_pool, qe);
  547                 rv = 1;
  548         }
  549         return (rv);
  550 }
  551 
  552 /*
  553  * Compute proper scaling value for receiver window from buffer space
  554  */
  555 
  556 void
  557 tcp_rscale(struct tcpcb *tp, u_long hiwat)
  558 {
  559         tp->request_r_scale = 0;
  560         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
  561                TCP_MAXWIN << tp->request_r_scale < hiwat)
  562                 tp->request_r_scale++;
  563 }
  564 
  565 /*
  566  * Notify a tcp user of an asynchronous error;
  567  * store error as soft error, but wake up user
  568  * (for now, won't do anything until can select for soft error).
  569  */
  570 void
  571 tcp_notify(struct inpcb *inp, int error)
  572 {
  573         struct tcpcb *tp = intotcpcb(inp);
  574         struct socket *so = inp->inp_socket;
  575 
  576         /*
  577          * Ignore some errors if we are hooked up.
  578          * If connection hasn't completed, has retransmitted several times,
  579          * and receives a second error, give up now.  This is better
  580          * than waiting a long time to establish a connection that
  581          * can never complete.
  582          */
  583         if (tp->t_state == TCPS_ESTABLISHED &&
  584              (error == EHOSTUNREACH || error == ENETUNREACH ||
  585               error == EHOSTDOWN)) {
  586                 return;
  587         } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 &&
  588             tp->t_rxtshift > 3 && tp->t_softerror)
  589                 so->so_error = error;
  590         else
  591                 tp->t_softerror = error;
  592         wakeup((caddr_t) &so->so_timeo);
  593         sorwakeup(so);
  594         sowwakeup(so);
  595 }
  596 
  597 #ifdef INET6
  598 void
  599 tcp6_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *d)
  600 {
  601         struct tcphdr th;
  602         struct tcpcb *tp;
  603         void (*notify)(struct inpcb *, int) = tcp_notify;
  604         struct ip6_hdr *ip6;
  605         const struct sockaddr_in6 *sa6_src = NULL;
  606         struct sockaddr_in6 *sa6 = satosin6(sa);
  607         struct inpcb *inp;
  608         struct mbuf *m;
  609         tcp_seq seq;
  610         int off;
  611         struct {
  612                 u_int16_t th_sport;
  613                 u_int16_t th_dport;
  614                 u_int32_t th_seq;
  615         } *thp;
  616 
  617         CTASSERT(sizeof(*thp) <= sizeof(th));
  618         if (sa->sa_family != AF_INET6 ||
  619             sa->sa_len != sizeof(struct sockaddr_in6) ||
  620             IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
  621             IN6_IS_ADDR_V4MAPPED(&sa6->sin6_addr))
  622                 return;
  623         if ((unsigned)cmd >= PRC_NCMDS)
  624                 return;
  625         else if (cmd == PRC_QUENCH) {
  626                 /* 
  627                  * Don't honor ICMP Source Quench messages meant for
  628                  * TCP connections.
  629                  */
  630                 /* XXX there's no PRC_QUENCH in IPv6 */
  631                 return;
  632         } else if (PRC_IS_REDIRECT(cmd))
  633                 notify = in_rtchange, d = NULL;
  634         else if (cmd == PRC_MSGSIZE)
  635                 ; /* special code is present, see below */
  636         else if (cmd == PRC_HOSTDEAD)
  637                 d = NULL;
  638         else if (inet6ctlerrmap[cmd] == 0)
  639                 return;
  640 
  641         /* if the parameter is from icmp6, decode it. */
  642         if (d != NULL) {
  643                 struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
  644                 m = ip6cp->ip6c_m;
  645                 ip6 = ip6cp->ip6c_ip6;
  646                 off = ip6cp->ip6c_off;
  647                 sa6_src = ip6cp->ip6c_src;
  648         } else {
  649                 m = NULL;
  650                 ip6 = NULL;
  651                 sa6_src = &sa6_any;
  652         }
  653 
  654         if (ip6) {
  655                 /*
  656                  * XXX: We assume that when ip6 is non NULL,
  657                  * M and OFF are valid.
  658                  */
  659 
  660                 /* check if we can safely examine src and dst ports */
  661                 if (m->m_pkthdr.len < off + sizeof(*thp))
  662                         return;
  663 
  664                 bzero(&th, sizeof(th));
  665                 m_copydata(m, off, sizeof(*thp), &th);
  666 
  667                 /*
  668                  * Check to see if we have a valid TCP connection
  669                  * corresponding to the address in the ICMPv6 message
  670                  * payload.
  671                  */
  672                 inp = in6_pcblookup(&tcbtable, &sa6->sin6_addr,
  673                     th.th_dport, &sa6_src->sin6_addr, th.th_sport, rdomain);
  674                 if (cmd == PRC_MSGSIZE) {
  675                         /*
  676                          * Depending on the value of "valid" and routing table
  677                          * size (mtudisc_{hi,lo}wat), we will:
  678                          * - recalculate the new MTU and create the
  679                          *   corresponding routing entry, or
  680                          * - ignore the MTU change notification.
  681                          */
  682                         icmp6_mtudisc_update((struct ip6ctlparam *)d,
  683                             inp != NULL);
  684                         in_pcbunref(inp);
  685                         return;
  686                 }
  687                 if (inp) {
  688                         seq = ntohl(th.th_seq);
  689                         if (inp->inp_socket &&
  690                             (tp = intotcpcb(inp)) &&
  691                             SEQ_GEQ(seq, tp->snd_una) &&
  692                             SEQ_LT(seq, tp->snd_max))
  693                                 notify(inp, inet6ctlerrmap[cmd]);
  694                 } else if (inet6ctlerrmap[cmd] == EHOSTUNREACH ||
  695                     inet6ctlerrmap[cmd] == ENETUNREACH ||
  696                     inet6ctlerrmap[cmd] == EHOSTDOWN)
  697                         syn_cache_unreach((struct sockaddr *)sa6_src,
  698                             sa, &th, rdomain);
  699                 in_pcbunref(inp);
  700         } else {
  701                 in6_pcbnotify(&tcbtable, sa6, 0,
  702                     sa6_src, 0, rdomain, cmd, NULL, notify);
  703         }
  704 }
  705 #endif
  706 
  707 void
  708 tcp_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
  709 {
  710         struct ip *ip = v;
  711         struct tcphdr *th;
  712         struct tcpcb *tp;
  713         struct inpcb *inp;
  714         struct in_addr faddr;
  715         tcp_seq seq;
  716         u_int mtu;
  717         void (*notify)(struct inpcb *, int) = tcp_notify;
  718         int errno;
  719 
  720         if (sa->sa_family != AF_INET)
  721                 return;
  722         faddr = satosin(sa)->sin_addr;
  723         if (faddr.s_addr == INADDR_ANY)
  724                 return;
  725 
  726         if ((unsigned)cmd >= PRC_NCMDS)
  727                 return;
  728         errno = inetctlerrmap[cmd];
  729         if (cmd == PRC_QUENCH)
  730                 /* 
  731                  * Don't honor ICMP Source Quench messages meant for
  732                  * TCP connections.
  733                  */
  734                 return;
  735         else if (PRC_IS_REDIRECT(cmd))
  736                 notify = in_rtchange, ip = 0;
  737         else if (cmd == PRC_MSGSIZE && ip_mtudisc && ip) {
  738                 /*
  739                  * Verify that the packet in the icmp payload refers
  740                  * to an existing TCP connection.
  741                  */
  742                 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
  743                 seq = ntohl(th->th_seq);
  744                 inp = in_pcblookup(&tcbtable,
  745                     ip->ip_dst, th->th_dport, ip->ip_src, th->th_sport,
  746                     rdomain);
  747                 if (inp && (tp = intotcpcb(inp)) &&
  748                     SEQ_GEQ(seq, tp->snd_una) &&
  749                     SEQ_LT(seq, tp->snd_max)) {
  750                         struct icmp *icp;
  751                         icp = (struct icmp *)((caddr_t)ip -
  752                                               offsetof(struct icmp, icmp_ip));
  753 
  754                         /* 
  755                          * If the ICMP message advertises a Next-Hop MTU
  756                          * equal or larger than the maximum packet size we have
  757                          * ever sent, drop the message.
  758                          */
  759                         mtu = (u_int)ntohs(icp->icmp_nextmtu);
  760                         if (mtu >= tp->t_pmtud_mtu_sent) {
  761                                 in_pcbunref(inp);
  762                                 return;
  763                         }
  764                         if (mtu >= tcp_hdrsz(tp) + tp->t_pmtud_mss_acked) {
  765                                 /* 
  766                                  * Calculate new MTU, and create corresponding
  767                                  * route (traditional PMTUD).
  768                                  */
  769                                 tp->t_flags &= ~TF_PMTUD_PEND;
  770                                 icmp_mtudisc(icp, inp->inp_rtableid);
  771                         } else {
  772                                 /*
  773                                  * Record the information got in the ICMP
  774                                  * message; act on it later.
  775                                  * If we had already recorded an ICMP message,
  776                                  * replace the old one only if the new message
  777                                  * refers to an older TCP segment
  778                                  */
  779                                 if (tp->t_flags & TF_PMTUD_PEND) {
  780                                         if (SEQ_LT(tp->t_pmtud_th_seq, seq)) {
  781                                                 in_pcbunref(inp);
  782                                                 return;
  783                                         }
  784                                 } else
  785                                         tp->t_flags |= TF_PMTUD_PEND;
  786                                 tp->t_pmtud_th_seq = seq;
  787                                 tp->t_pmtud_nextmtu = icp->icmp_nextmtu;
  788                                 tp->t_pmtud_ip_len = icp->icmp_ip.ip_len;
  789                                 tp->t_pmtud_ip_hl = icp->icmp_ip.ip_hl;
  790                                 in_pcbunref(inp);
  791                                 return;
  792                         }
  793                 } else {
  794                         /* ignore if we don't have a matching connection */
  795                         in_pcbunref(inp);
  796                         return;
  797                 }
  798                 in_pcbunref(inp);
  799                 notify = tcp_mtudisc, ip = 0;
  800         } else if (cmd == PRC_MTUINC)
  801                 notify = tcp_mtudisc_increase, ip = 0;
  802         else if (cmd == PRC_HOSTDEAD)
  803                 ip = 0;
  804         else if (errno == 0)
  805                 return;
  806 
  807         if (ip) {
  808                 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
  809                 inp = in_pcblookup(&tcbtable,
  810                     ip->ip_dst, th->th_dport, ip->ip_src, th->th_sport,
  811                     rdomain);
  812                 if (inp) {
  813                         seq = ntohl(th->th_seq);
  814                         if (inp->inp_socket &&
  815                             (tp = intotcpcb(inp)) &&
  816                             SEQ_GEQ(seq, tp->snd_una) &&
  817                             SEQ_LT(seq, tp->snd_max))
  818                                 notify(inp, errno);
  819                 } else if (inetctlerrmap[cmd] == EHOSTUNREACH ||
  820                     inetctlerrmap[cmd] == ENETUNREACH ||
  821                     inetctlerrmap[cmd] == EHOSTDOWN) {
  822                         struct sockaddr_in sin;
  823 
  824                         bzero(&sin, sizeof(sin));
  825                         sin.sin_len = sizeof(sin);
  826                         sin.sin_family = AF_INET;
  827                         sin.sin_port = th->th_sport;
  828                         sin.sin_addr = ip->ip_src;
  829                         syn_cache_unreach(sintosa(&sin), sa, th, rdomain);
  830                 }
  831                 in_pcbunref(inp);
  832         } else
  833                 in_pcbnotifyall(&tcbtable, sa, rdomain, errno, notify);
  834 }
  835 
  836 
  837 #ifdef INET6
  838 /*
  839  * Path MTU Discovery handlers.
  840  */
  841 void
  842 tcp6_mtudisc_callback(struct sockaddr_in6 *sin6, u_int rdomain)
  843 {
  844         in6_pcbnotify(&tcbtable, sin6, 0,
  845             &sa6_any, 0, rdomain, PRC_MSGSIZE, NULL, tcp_mtudisc);
  846 }
  847 #endif /* INET6 */
  848 
  849 /*
  850  * On receipt of path MTU corrections, flush old route and replace it
  851  * with the new one.  Retransmit all unacknowledged packets, to ensure
  852  * that all packets will be received.
  853  */
  854 void
  855 tcp_mtudisc(struct inpcb *inp, int errno)
  856 {
  857         struct tcpcb *tp = intotcpcb(inp);
  858         struct rtentry *rt;
  859         int orig_maxseg, change = 0;
  860 
  861         if (tp == NULL)
  862                 return;
  863         orig_maxseg = tp->t_maxseg;
  864 
  865         rt = in_pcbrtentry(inp);
  866         if (rt != NULL) {
  867                 unsigned int orig_mtulock = (rt->rt_locks & RTV_MTU);
  868 
  869                 /*
  870                  * If this was not a host route, remove and realloc.
  871                  */
  872                 if ((rt->rt_flags & RTF_HOST) == 0) {
  873                         in_rtchange(inp, errno);
  874                         if ((rt = in_pcbrtentry(inp)) == NULL)
  875                                 return;
  876                 }
  877                 if (orig_mtulock < (rt->rt_locks & RTV_MTU))
  878                         change = 1;
  879         }
  880         tcp_mss(tp, -1);
  881         if (orig_maxseg > tp->t_maxseg)
  882                 change = 1;
  883 
  884         /*
  885          * Resend unacknowledged packets
  886          */
  887         tp->snd_nxt = tp->snd_una;
  888         if (change || errno > 0)
  889                 tcp_output(tp);
  890 }
  891 
  892 void
  893 tcp_mtudisc_increase(struct inpcb *inp, int errno)
  894 {
  895         struct tcpcb *tp = intotcpcb(inp);
  896         struct rtentry *rt = in_pcbrtentry(inp);
  897 
  898         if (tp != 0 && rt != 0) {
  899                 /*
  900                  * If this was a host route, remove and realloc.
  901                  */
  902                 if (rt->rt_flags & RTF_HOST)
  903                         in_rtchange(inp, errno);
  904 
  905                 /* also takes care of congestion window */
  906                 tcp_mss(tp, -1);
  907         }
  908 }
  909 
  910 /*
  911  * Generate new ISNs with a method based on RFC1948
  912  */
  913 #define TCP_ISS_CONN_INC 4096
  914 
  915 void
  916 tcp_set_iss_tsm(struct tcpcb *tp)
  917 {
  918         SHA2_CTX ctx;
  919         union {
  920                 uint8_t bytes[SHA512_DIGEST_LENGTH];
  921                 uint32_t words[2];
  922         } digest;
  923         u_int rdomain = rtable_l2(tp->t_inpcb->inp_rtableid);
  924         tcp_seq iss;
  925 
  926         mtx_enter(&tcp_timer_mtx);
  927         tcp_iss += TCP_ISS_CONN_INC;
  928         iss = tcp_iss;
  929         mtx_leave(&tcp_timer_mtx);
  930 
  931         ctx = tcp_secret_ctx;
  932         SHA512Update(&ctx, &rdomain, sizeof(rdomain));
  933         SHA512Update(&ctx, &tp->t_inpcb->inp_lport, sizeof(u_short));
  934         SHA512Update(&ctx, &tp->t_inpcb->inp_fport, sizeof(u_short));
  935         if (tp->pf == AF_INET6) {
  936                 SHA512Update(&ctx, &tp->t_inpcb->inp_laddr6,
  937                     sizeof(struct in6_addr));
  938                 SHA512Update(&ctx, &tp->t_inpcb->inp_faddr6,
  939                     sizeof(struct in6_addr));
  940         } else {
  941                 SHA512Update(&ctx, &tp->t_inpcb->inp_laddr,
  942                     sizeof(struct in_addr));
  943                 SHA512Update(&ctx, &tp->t_inpcb->inp_faddr,
  944                     sizeof(struct in_addr));
  945         }
  946         SHA512Final(digest.bytes, &ctx);
  947         tp->iss = digest.words[0] + iss;
  948         tp->ts_modulate = digest.words[1];
  949 }
  950 
  951 #ifdef TCP_SIGNATURE
  952 int
  953 tcp_signature_tdb_attach(void)
  954 {
  955         return (0);
  956 }
  957 
  958 int
  959 tcp_signature_tdb_init(struct tdb *tdbp, const struct xformsw *xsp,
  960     struct ipsecinit *ii)
  961 {
  962         if ((ii->ii_authkeylen < 1) || (ii->ii_authkeylen > 80))
  963                 return (EINVAL);
  964 
  965         tdbp->tdb_amxkey = malloc(ii->ii_authkeylen, M_XDATA, M_NOWAIT);
  966         if (tdbp->tdb_amxkey == NULL)
  967                 return (ENOMEM);
  968         memcpy(tdbp->tdb_amxkey, ii->ii_authkey, ii->ii_authkeylen);
  969         tdbp->tdb_amxkeylen = ii->ii_authkeylen;
  970 
  971         return (0);
  972 }
  973 
  974 int
  975 tcp_signature_tdb_zeroize(struct tdb *tdbp)
  976 {
  977         if (tdbp->tdb_amxkey) {
  978                 explicit_bzero(tdbp->tdb_amxkey, tdbp->tdb_amxkeylen);
  979                 free(tdbp->tdb_amxkey, M_XDATA, tdbp->tdb_amxkeylen);
  980                 tdbp->tdb_amxkey = NULL;
  981         }
  982 
  983         return (0);
  984 }
  985 
  986 int
  987 tcp_signature_tdb_input(struct mbuf **mp, struct tdb *tdbp, int skip,
  988     int protoff)
  989 {
  990         m_freemp(mp);
  991         return (IPPROTO_DONE);
  992 }
  993 
  994 int
  995 tcp_signature_tdb_output(struct mbuf *m, struct tdb *tdbp, int skip,
  996     int protoff)
  997 {
  998         m_freem(m);
  999         return (EINVAL);
 1000 }
 1001 
 1002 int
 1003 tcp_signature_apply(caddr_t fstate, caddr_t data, unsigned int len)
 1004 {
 1005         MD5Update((MD5_CTX *)fstate, (char *)data, len);
 1006         return 0;
 1007 }
 1008 
 1009 int
 1010 tcp_signature(struct tdb *tdb, int af, struct mbuf *m, struct tcphdr *th,
 1011     int iphlen, int doswap, char *sig)
 1012 {
 1013         MD5_CTX ctx;
 1014         int len;
 1015         struct tcphdr th0;
 1016 
 1017         MD5Init(&ctx);
 1018 
 1019         switch(af) {
 1020         case 0:
 1021         case AF_INET: {
 1022                 struct ippseudo ippseudo;
 1023                 struct ip *ip;
 1024 
 1025                 ip = mtod(m, struct ip *);
 1026 
 1027                 ippseudo.ippseudo_src = ip->ip_src;
 1028                 ippseudo.ippseudo_dst = ip->ip_dst;
 1029                 ippseudo.ippseudo_pad = 0;
 1030                 ippseudo.ippseudo_p = IPPROTO_TCP;
 1031                 ippseudo.ippseudo_len = htons(m->m_pkthdr.len - iphlen);
 1032 
 1033                 MD5Update(&ctx, (char *)&ippseudo,
 1034                     sizeof(struct ippseudo));
 1035                 break;
 1036                 }
 1037 #ifdef INET6
 1038         case AF_INET6: {
 1039                 struct ip6_hdr_pseudo ip6pseudo;
 1040                 struct ip6_hdr *ip6;
 1041 
 1042                 ip6 = mtod(m, struct ip6_hdr *);
 1043                 bzero(&ip6pseudo, sizeof(ip6pseudo));
 1044                 ip6pseudo.ip6ph_src = ip6->ip6_src;
 1045                 ip6pseudo.ip6ph_dst = ip6->ip6_dst;
 1046                 in6_clearscope(&ip6pseudo.ip6ph_src);
 1047                 in6_clearscope(&ip6pseudo.ip6ph_dst);
 1048                 ip6pseudo.ip6ph_nxt = IPPROTO_TCP;
 1049                 ip6pseudo.ip6ph_len = htonl(m->m_pkthdr.len - iphlen);
 1050 
 1051                 MD5Update(&ctx, (char *)&ip6pseudo,
 1052                     sizeof(ip6pseudo));
 1053                 break;
 1054                 }
 1055 #endif
 1056         }
 1057 
 1058         th0 = *th;
 1059         th0.th_sum = 0;
 1060 
 1061         if (doswap) {
 1062                 th0.th_seq = htonl(th0.th_seq);
 1063                 th0.th_ack = htonl(th0.th_ack);
 1064                 th0.th_win = htons(th0.th_win);
 1065                 th0.th_urp = htons(th0.th_urp);
 1066         }
 1067         MD5Update(&ctx, (char *)&th0, sizeof(th0));
 1068 
 1069         len = m->m_pkthdr.len - iphlen - th->th_off * sizeof(uint32_t);
 1070 
 1071         if (len > 0 &&
 1072             m_apply(m, iphlen + th->th_off * sizeof(uint32_t), len,
 1073             tcp_signature_apply, (caddr_t)&ctx))
 1074                 return (-1); 
 1075 
 1076         MD5Update(&ctx, tdb->tdb_amxkey, tdb->tdb_amxkeylen);
 1077         MD5Final(sig, &ctx);
 1078 
 1079         return (0);
 1080 }
 1081 #endif /* TCP_SIGNATURE */

Cache object: 563c43c8f90016feae56390341c1010c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.