The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_subr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: tcp_subr.c,v 1.160.2.5 2004/09/19 15:38:01 he Exp $    */
    2 
    3 /*
    4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the project nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  */
   31 
   32 /*-
   33  * Copyright (c) 1997, 1998, 2000, 2001 The NetBSD Foundation, Inc.
   34  * All rights reserved.
   35  *
   36  * This code is derived from software contributed to The NetBSD Foundation
   37  * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
   38  * Facility, NASA Ames Research Center.
   39  *
   40  * Redistribution and use in source and binary forms, with or without
   41  * modification, are permitted provided that the following conditions
   42  * are met:
   43  * 1. Redistributions of source code must retain the above copyright
   44  *    notice, this list of conditions and the following disclaimer.
   45  * 2. Redistributions in binary form must reproduce the above copyright
   46  *    notice, this list of conditions and the following disclaimer in the
   47  *    documentation and/or other materials provided with the distribution.
   48  * 3. All advertising materials mentioning features or use of this software
   49  *    must display the following acknowledgement:
   50  *      This product includes software developed by the NetBSD
   51  *      Foundation, Inc. and its contributors.
   52  * 4. Neither the name of The NetBSD Foundation nor the names of its
   53  *    contributors may be used to endorse or promote products derived
   54  *    from this software without specific prior written permission.
   55  *
   56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   66  * POSSIBILITY OF SUCH DAMAGE.
   67  */
   68 
   69 /*
   70  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
   71  *      The Regents of the University of California.  All rights reserved.
   72  *
   73  * Redistribution and use in source and binary forms, with or without
   74  * modification, are permitted provided that the following conditions
   75  * are met:
   76  * 1. Redistributions of source code must retain the above copyright
   77  *    notice, this list of conditions and the following disclaimer.
   78  * 2. Redistributions in binary form must reproduce the above copyright
   79  *    notice, this list of conditions and the following disclaimer in the
   80  *    documentation and/or other materials provided with the distribution.
   81  * 3. Neither the name of the University nor the names of its contributors
   82  *    may be used to endorse or promote products derived from this software
   83  *    without specific prior written permission.
   84  *
   85  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   86  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   87  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   88  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   89  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   90  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   91  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   92  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   93  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   94  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   95  * SUCH DAMAGE.
   96  *
   97  *      @(#)tcp_subr.c  8.2 (Berkeley) 5/24/95
   98  */
   99 
  100 #include <sys/cdefs.h>
  101 __KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.160.2.5 2004/09/19 15:38:01 he Exp $");
  102 
  103 #include "opt_inet.h"
  104 #include "opt_ipsec.h"
  105 #include "opt_tcp_compat_42.h"
  106 #include "opt_inet_csum.h"
  107 #include "opt_mbuftrace.h"
  108 #include "rnd.h"
  109 
  110 #include <sys/param.h>
  111 #include <sys/proc.h>
  112 #include <sys/systm.h>
  113 #include <sys/malloc.h>
  114 #include <sys/mbuf.h>
  115 #include <sys/socket.h>
  116 #include <sys/socketvar.h>
  117 #include <sys/protosw.h>
  118 #include <sys/errno.h>
  119 #include <sys/kernel.h>
  120 #include <sys/pool.h>
  121 #if NRND > 0
  122 #include <sys/md5.h>
  123 #include <sys/rnd.h>
  124 #endif
  125 
  126 #include <net/route.h>
  127 #include <net/if.h>
  128 
  129 #include <netinet/in.h>
  130 #include <netinet/in_systm.h>
  131 #include <netinet/ip.h>
  132 #include <netinet/in_pcb.h>
  133 #include <netinet/ip_var.h>
  134 #include <netinet/ip_icmp.h>
  135 
  136 #ifdef INET6
  137 #ifndef INET
  138 #include <netinet/in.h>
  139 #endif
  140 #include <netinet/ip6.h>
  141 #include <netinet6/in6_pcb.h>
  142 #include <netinet6/ip6_var.h>
  143 #include <netinet6/in6_var.h>
  144 #include <netinet6/ip6protosw.h>
  145 #include <netinet/icmp6.h>
  146 #include <netinet6/nd6.h>
  147 #endif
  148 
  149 #include <netinet/tcp.h>
  150 #include <netinet/tcp_fsm.h>
  151 #include <netinet/tcp_seq.h>
  152 #include <netinet/tcp_timer.h>
  153 #include <netinet/tcp_var.h>
  154 #include <netinet/tcpip.h>
  155 
  156 #ifdef IPSEC
  157 #include <netinet6/ipsec.h>
  158 #endif /*IPSEC*/
  159 
  160 #ifdef FAST_IPSEC
  161 #include <netipsec/ipsec.h>
  162 #ifdef INET6
  163 #include <netipsec/ipsec6.h>
  164 #endif
  165 #endif  /* FAST_IPSEC*/
  166 
  167 
  168 struct  inpcbtable tcbtable;    /* head of queue of active tcpcb's */
  169 struct  tcpstat tcpstat;        /* tcp statistics */
  170 u_int32_t tcp_now;              /* for RFC 1323 timestamps */
  171 
  172 /* patchable/settable parameters for tcp */
  173 int     tcp_mssdflt = TCP_MSS;
  174 int     tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
  175 int     tcp_do_rfc1323 = 1;     /* window scaling / timestamps (obsolete) */
  176 #if NRND > 0
  177 int     tcp_do_rfc1948 = 0;     /* ISS by cryptographic hash */
  178 #endif
  179 int     tcp_do_sack = 1;        /* selective acknowledgement */
  180 int     tcp_do_win_scale = 1;   /* RFC1323 window scaling */
  181 int     tcp_do_timestamps = 1;  /* RFC1323 timestamps */
  182 int     tcp_do_newreno = 1;     /* Use the New Reno algorithms */
  183 int     tcp_ack_on_push = 0;    /* set to enable immediate ACK-on-PUSH */
  184 #ifndef TCP_INIT_WIN
  185 #define TCP_INIT_WIN    0       /* initial slow start window */
  186 #endif
  187 #ifndef TCP_INIT_WIN_LOCAL
  188 #define TCP_INIT_WIN_LOCAL 4    /* initial slow start window for local nets */
  189 #endif
  190 int     tcp_init_win = TCP_INIT_WIN;
  191 int     tcp_init_win_local = TCP_INIT_WIN_LOCAL;
  192 int     tcp_mss_ifmtu = 0;
  193 #ifdef TCP_COMPAT_42
  194 int     tcp_compat_42 = 1;
  195 #else
  196 int     tcp_compat_42 = 0;
  197 #endif
  198 int     tcp_rst_ppslim = 100;   /* 100pps */
  199 int     tcp_ackdrop_ppslim = 100;       /* 100pps */
  200 
  201 /* tcb hash */
  202 #ifndef TCBHASHSIZE
  203 #define TCBHASHSIZE     128
  204 #endif
  205 int     tcbhashsize = TCBHASHSIZE;
  206 
  207 /* syn hash parameters */
  208 #define TCP_SYN_HASH_SIZE       293
  209 #define TCP_SYN_BUCKET_SIZE     35
  210 int     tcp_syn_cache_size = TCP_SYN_HASH_SIZE;
  211 int     tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE;
  212 int     tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE;
  213 struct  syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE];
  214 
  215 int     tcp_freeq __P((struct tcpcb *));
  216 
  217 #ifdef INET
  218 void    tcp_mtudisc_callback __P((struct in_addr));
  219 #endif
  220 #ifdef INET6
  221 void    tcp6_mtudisc_callback __P((struct in6_addr *));
  222 #endif
  223 
  224 void    tcp_mtudisc __P((struct inpcb *, int));
  225 #ifdef INET6
  226 void    tcp6_mtudisc __P((struct in6pcb *, int));
  227 #endif
  228 
  229 struct pool tcpcb_pool;
  230 
  231 #ifdef TCP_CSUM_COUNTERS
  232 #include <sys/device.h>
  233 
  234 struct evcnt tcp_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  235     NULL, "tcp", "hwcsum bad");
  236 struct evcnt tcp_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  237     NULL, "tcp", "hwcsum ok");
  238 struct evcnt tcp_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  239     NULL, "tcp", "hwcsum data");
  240 struct evcnt tcp_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  241     NULL, "tcp", "swcsum");
  242 #endif /* TCP_CSUM_COUNTERS */
  243 
  244 #ifdef TCP_OUTPUT_COUNTERS
  245 #include <sys/device.h>
  246 
  247 struct evcnt tcp_output_bigheader = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  248     NULL, "tcp", "output big header");
  249 struct evcnt tcp_output_predict_hit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  250     NULL, "tcp", "output predict hit");
  251 struct evcnt tcp_output_predict_miss = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  252     NULL, "tcp", "output predict miss");
  253 struct evcnt tcp_output_copysmall = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  254     NULL, "tcp", "output copy small");
  255 struct evcnt tcp_output_copybig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  256     NULL, "tcp", "output copy big");
  257 struct evcnt tcp_output_refbig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  258     NULL, "tcp", "output reference big");
  259 #endif /* TCP_OUTPUT_COUNTERS */
  260 
  261 #ifdef TCP_REASS_COUNTERS
  262 #include <sys/device.h>
  263 
  264 struct evcnt tcp_reass_ = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  265     NULL, "tcp_reass", "calls");
  266 struct evcnt tcp_reass_empty = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  267     &tcp_reass_, "tcp_reass", "insert into empty queue");
  268 struct evcnt tcp_reass_iteration[8] = {
  269     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", ">7 iterations"),
  270     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "1 iteration"),
  271     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "2 iterations"),
  272     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "3 iterations"),
  273     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "4 iterations"),
  274     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "5 iterations"),
  275     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "6 iterations"),
  276     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "7 iterations"),
  277 };
  278 struct evcnt tcp_reass_prependfirst = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  279     &tcp_reass_, "tcp_reass", "prepend to first");
  280 struct evcnt tcp_reass_prepend = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  281     &tcp_reass_, "tcp_reass", "prepend");
  282 struct evcnt tcp_reass_insert = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  283     &tcp_reass_, "tcp_reass", "insert");
  284 struct evcnt tcp_reass_inserttail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  285     &tcp_reass_, "tcp_reass", "insert at tail");
  286 struct evcnt tcp_reass_append = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  287     &tcp_reass_, "tcp_reass", "append");
  288 struct evcnt tcp_reass_appendtail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  289     &tcp_reass_, "tcp_reass", "append to tail fragment");
  290 struct evcnt tcp_reass_overlaptail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  291     &tcp_reass_, "tcp_reass", "overlap at end");
  292 struct evcnt tcp_reass_overlapfront = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  293     &tcp_reass_, "tcp_reass", "overlap at start");
  294 struct evcnt tcp_reass_segdup = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  295     &tcp_reass_, "tcp_reass", "duplicate segment");
  296 struct evcnt tcp_reass_fragdup = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  297     &tcp_reass_, "tcp_reass", "duplicate fragment");
  298 
  299 #endif /* TCP_REASS_COUNTERS */
  300 
  301 #ifdef MBUFTRACE
  302 struct mowner tcp_mowner = { "tcp" };
  303 struct mowner tcp_rx_mowner = { "tcp", "rx" };
  304 struct mowner tcp_tx_mowner = { "tcp", "tx" };
  305 #endif
  306 
  307 /*
  308  * Tcp initialization
  309  */
  310 void
  311 tcp_init()
  312 {
  313         int hlen;
  314 
  315         /* Initialize the TCPCB template. */
  316         tcp_tcpcb_template();
  317 
  318         pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcbpl",
  319             NULL);
  320         in_pcbinit(&tcbtable, tcbhashsize, tcbhashsize);
  321 
  322         pool_init(&tcpipqent_pool, sizeof(struct ipqent), 0, 0, 0, "tcpipqepl",
  323             NULL);
  324 
  325         hlen = sizeof(struct ip) + sizeof(struct tcphdr);
  326 #ifdef INET6
  327         if (sizeof(struct ip) < sizeof(struct ip6_hdr))
  328                 hlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
  329 #endif
  330         if (max_protohdr < hlen)
  331                 max_protohdr = hlen;
  332         if (max_linkhdr + hlen > MHLEN)
  333                 panic("tcp_init");
  334 
  335 #ifdef INET
  336         icmp_mtudisc_callback_register(tcp_mtudisc_callback);
  337 #endif
  338 #ifdef INET6
  339         icmp6_mtudisc_callback_register(tcp6_mtudisc_callback);
  340 #endif
  341 
  342         /* Initialize timer state. */
  343         tcp_timer_init();
  344 
  345         /* Initialize the compressed state engine. */
  346         syn_cache_init();
  347 
  348 #ifdef TCP_CSUM_COUNTERS
  349         evcnt_attach_static(&tcp_hwcsum_bad);
  350         evcnt_attach_static(&tcp_hwcsum_ok);
  351         evcnt_attach_static(&tcp_hwcsum_data);
  352         evcnt_attach_static(&tcp_swcsum);
  353 #endif /* TCP_CSUM_COUNTERS */
  354 
  355 #ifdef TCP_OUTPUT_COUNTERS
  356         evcnt_attach_static(&tcp_output_bigheader);
  357         evcnt_attach_static(&tcp_output_predict_hit);
  358         evcnt_attach_static(&tcp_output_predict_miss);
  359         evcnt_attach_static(&tcp_output_copysmall);
  360         evcnt_attach_static(&tcp_output_copybig);
  361         evcnt_attach_static(&tcp_output_refbig);
  362 #endif /* TCP_OUTPUT_COUNTERS */
  363 
  364 #ifdef TCP_REASS_COUNTERS
  365         evcnt_attach_static(&tcp_reass_);
  366         evcnt_attach_static(&tcp_reass_empty);
  367         evcnt_attach_static(&tcp_reass_iteration[0]);
  368         evcnt_attach_static(&tcp_reass_iteration[1]);
  369         evcnt_attach_static(&tcp_reass_iteration[2]);
  370         evcnt_attach_static(&tcp_reass_iteration[3]);
  371         evcnt_attach_static(&tcp_reass_iteration[4]);
  372         evcnt_attach_static(&tcp_reass_iteration[5]);
  373         evcnt_attach_static(&tcp_reass_iteration[6]);
  374         evcnt_attach_static(&tcp_reass_iteration[7]);
  375         evcnt_attach_static(&tcp_reass_prependfirst);
  376         evcnt_attach_static(&tcp_reass_prepend);
  377         evcnt_attach_static(&tcp_reass_insert);
  378         evcnt_attach_static(&tcp_reass_inserttail);
  379         evcnt_attach_static(&tcp_reass_append);
  380         evcnt_attach_static(&tcp_reass_appendtail);
  381         evcnt_attach_static(&tcp_reass_overlaptail);
  382         evcnt_attach_static(&tcp_reass_overlapfront);
  383         evcnt_attach_static(&tcp_reass_segdup);
  384         evcnt_attach_static(&tcp_reass_fragdup);
  385 #endif /* TCP_REASS_COUNTERS */
  386 
  387         MOWNER_ATTACH(&tcp_tx_mowner);
  388         MOWNER_ATTACH(&tcp_rx_mowner);
  389         MOWNER_ATTACH(&tcp_mowner);
  390 }
  391 
  392 /*
  393  * Create template to be used to send tcp packets on a connection.
  394  * Call after host entry created, allocates an mbuf and fills
  395  * in a skeletal tcp/ip header, minimizing the amount of work
  396  * necessary when the connection is used.
  397  */
  398 struct mbuf *
  399 tcp_template(tp)
  400         struct tcpcb *tp;
  401 {
  402         struct inpcb *inp = tp->t_inpcb;
  403 #ifdef INET6
  404         struct in6pcb *in6p = tp->t_in6pcb;
  405 #endif
  406         struct tcphdr *n;
  407         struct mbuf *m;
  408         int hlen;
  409 
  410         switch (tp->t_family) {
  411         case AF_INET:
  412                 hlen = sizeof(struct ip);
  413                 if (inp)
  414                         break;
  415 #ifdef INET6
  416                 if (in6p) {
  417                         /* mapped addr case */
  418                         if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)
  419                          && IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr))
  420                                 break;
  421                 }
  422 #endif
  423                 return NULL;    /*EINVAL*/
  424 #ifdef INET6
  425         case AF_INET6:
  426                 hlen = sizeof(struct ip6_hdr);
  427                 if (in6p) {
  428                         /* more sainty check? */
  429                         break;
  430                 }
  431                 return NULL;    /*EINVAL*/
  432 #endif
  433         default:
  434                 hlen = 0;       /*pacify gcc*/
  435                 return NULL;    /*EAFNOSUPPORT*/
  436         }
  437 #ifdef DIAGNOSTIC
  438         if (hlen + sizeof(struct tcphdr) > MCLBYTES)
  439                 panic("mclbytes too small for t_template");
  440 #endif
  441         m = tp->t_template;
  442         if (m && m->m_len == hlen + sizeof(struct tcphdr))
  443                 ;
  444         else {
  445                 if (m)
  446                         m_freem(m);
  447                 m = tp->t_template = NULL;
  448                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
  449                 if (m && hlen + sizeof(struct tcphdr) > MHLEN) {
  450                         MCLGET(m, M_DONTWAIT);
  451                         if ((m->m_flags & M_EXT) == 0) {
  452                                 m_free(m);
  453                                 m = NULL;
  454                         }
  455                 }
  456                 if (m == NULL)
  457                         return NULL;
  458                 MCLAIM(m, &tcp_mowner);
  459                 m->m_pkthdr.len = m->m_len = hlen + sizeof(struct tcphdr);
  460         }
  461 
  462         bzero(mtod(m, caddr_t), m->m_len);
  463 
  464         n = (struct tcphdr *)(mtod(m, caddr_t) + hlen);
  465 
  466         switch (tp->t_family) {
  467         case AF_INET:
  468             {
  469                 struct ipovly *ipov;
  470                 mtod(m, struct ip *)->ip_v = 4;
  471                 mtod(m, struct ip *)->ip_hl = hlen >> 2;
  472                 ipov = mtod(m, struct ipovly *);
  473                 ipov->ih_pr = IPPROTO_TCP;
  474                 ipov->ih_len = htons(sizeof(struct tcphdr));
  475                 if (inp) {
  476                         ipov->ih_src = inp->inp_laddr;
  477                         ipov->ih_dst = inp->inp_faddr;
  478                 }
  479 #ifdef INET6
  480                 else if (in6p) {
  481                         /* mapped addr case */
  482                         bcopy(&in6p->in6p_laddr.s6_addr32[3], &ipov->ih_src,
  483                                 sizeof(ipov->ih_src));
  484                         bcopy(&in6p->in6p_faddr.s6_addr32[3], &ipov->ih_dst,
  485                                 sizeof(ipov->ih_dst));
  486                 }
  487 #endif
  488                 /*
  489                  * Compute the pseudo-header portion of the checksum
  490                  * now.  We incrementally add in the TCP option and
  491                  * payload lengths later, and then compute the TCP
  492                  * checksum right before the packet is sent off onto
  493                  * the wire.
  494                  */
  495                 n->th_sum = in_cksum_phdr(ipov->ih_src.s_addr,
  496                     ipov->ih_dst.s_addr,
  497                     htons(sizeof(struct tcphdr) + IPPROTO_TCP));
  498                 break;
  499             }
  500 #ifdef INET6
  501         case AF_INET6:
  502             {
  503                 struct ip6_hdr *ip6;
  504                 mtod(m, struct ip *)->ip_v = 6;
  505                 ip6 = mtod(m, struct ip6_hdr *);
  506                 ip6->ip6_nxt = IPPROTO_TCP;
  507                 ip6->ip6_plen = htons(sizeof(struct tcphdr));
  508                 ip6->ip6_src = in6p->in6p_laddr;
  509                 ip6->ip6_dst = in6p->in6p_faddr;
  510                 ip6->ip6_flow = in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK;
  511                 if (ip6_auto_flowlabel) {
  512                         ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK;
  513                         ip6->ip6_flow |=
  514                             (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
  515                 }
  516                 ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
  517                 ip6->ip6_vfc |= IPV6_VERSION;
  518 
  519                 /*
  520                  * Compute the pseudo-header portion of the checksum
  521                  * now.  We incrementally add in the TCP option and
  522                  * payload lengths later, and then compute the TCP
  523                  * checksum right before the packet is sent off onto
  524                  * the wire.
  525                  */
  526                 n->th_sum = in6_cksum_phdr(&in6p->in6p_laddr,
  527                     &in6p->in6p_faddr, htonl(sizeof(struct tcphdr)),
  528                     htonl(IPPROTO_TCP));
  529                 break;
  530             }
  531 #endif
  532         }
  533         if (inp) {
  534                 n->th_sport = inp->inp_lport;
  535                 n->th_dport = inp->inp_fport;
  536         }
  537 #ifdef INET6
  538         else if (in6p) {
  539                 n->th_sport = in6p->in6p_lport;
  540                 n->th_dport = in6p->in6p_fport;
  541         }
  542 #endif
  543         n->th_seq = 0;
  544         n->th_ack = 0;
  545         n->th_x2 = 0;
  546         n->th_off = 5;
  547         n->th_flags = 0;
  548         n->th_win = 0;
  549         n->th_urp = 0;
  550         return (m);
  551 }
  552 
  553 /*
  554  * Send a single message to the TCP at address specified by
  555  * the given TCP/IP header.  If m == 0, then we make a copy
  556  * of the tcpiphdr at ti and send directly to the addressed host.
  557  * This is used to force keep alive messages out using the TCP
  558  * template for a connection tp->t_template.  If flags are given
  559  * then we send a message back to the TCP which originated the
  560  * segment ti, and discard the mbuf containing it and any other
  561  * attached mbufs.
  562  *
  563  * In any case the ack and sequence number of the transmitted
  564  * segment are as specified by the parameters.
  565  */
  566 int
  567 tcp_respond(tp, template, m, th0, ack, seq, flags)
  568         struct tcpcb *tp;
  569         struct mbuf *template;
  570         struct mbuf *m;
  571         struct tcphdr *th0;
  572         tcp_seq ack, seq;
  573         int flags;
  574 {
  575         struct route *ro;
  576         int error, tlen, win = 0;
  577         int hlen;
  578         struct ip *ip;
  579 #ifdef INET6
  580         struct ip6_hdr *ip6;
  581 #endif
  582         int family;     /* family on packet, not inpcb/in6pcb! */
  583         struct tcphdr *th;
  584         struct socket *so;
  585 
  586         if (tp != NULL && (flags & TH_RST) == 0) {
  587 #ifdef DIAGNOSTIC
  588                 if (tp->t_inpcb && tp->t_in6pcb)
  589                         panic("tcp_respond: both t_inpcb and t_in6pcb are set");
  590 #endif
  591 #ifdef INET
  592                 if (tp->t_inpcb)
  593                         win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
  594 #endif
  595 #ifdef INET6
  596                 if (tp->t_in6pcb)
  597                         win = sbspace(&tp->t_in6pcb->in6p_socket->so_rcv);
  598 #endif
  599         }
  600 
  601         th = NULL;      /* Quell uninitialized warning */
  602         ip = NULL;
  603 #ifdef INET6
  604         ip6 = NULL;
  605 #endif
  606         if (m == 0) {
  607                 if (!template)
  608                         return EINVAL;
  609 
  610                 /* get family information from template */
  611                 switch (mtod(template, struct ip *)->ip_v) {
  612                 case 4:
  613                         family = AF_INET;
  614                         hlen = sizeof(struct ip);
  615                         break;
  616 #ifdef INET6
  617                 case 6:
  618                         family = AF_INET6;
  619                         hlen = sizeof(struct ip6_hdr);
  620                         break;
  621 #endif
  622                 default:
  623                         return EAFNOSUPPORT;
  624                 }
  625 
  626                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
  627                 if (m) {
  628                         MCLAIM(m, &tcp_tx_mowner);
  629                         MCLGET(m, M_DONTWAIT);
  630                         if ((m->m_flags & M_EXT) == 0) {
  631                                 m_free(m);
  632                                 m = NULL;
  633                         }
  634                 }
  635                 if (m == NULL)
  636                         return (ENOBUFS);
  637 
  638                 if (tcp_compat_42)
  639                         tlen = 1;
  640                 else
  641                         tlen = 0;
  642 
  643                 m->m_data += max_linkhdr;
  644                 bcopy(mtod(template, caddr_t), mtod(m, caddr_t),
  645                         template->m_len);
  646                 switch (family) {
  647                 case AF_INET:
  648                         ip = mtod(m, struct ip *);
  649                         th = (struct tcphdr *)(ip + 1);
  650                         break;
  651 #ifdef INET6
  652                 case AF_INET6:
  653                         ip6 = mtod(m, struct ip6_hdr *);
  654                         th = (struct tcphdr *)(ip6 + 1);
  655                         break;
  656 #endif
  657 #if 0
  658                 default:
  659                         /* noone will visit here */
  660                         m_freem(m);
  661                         return EAFNOSUPPORT;
  662 #endif
  663                 }
  664                 flags = TH_ACK;
  665         } else {
  666 
  667                 if ((m->m_flags & M_PKTHDR) == 0) {
  668 #if 0
  669                         printf("non PKTHDR to tcp_respond\n");
  670 #endif
  671                         m_freem(m);
  672                         return EINVAL;
  673                 }
  674 #ifdef DIAGNOSTIC
  675                 if (!th0)
  676                         panic("th0 == NULL in tcp_respond");
  677 #endif
  678 
  679                 /* get family information from m */
  680                 switch (mtod(m, struct ip *)->ip_v) {
  681                 case 4:
  682                         family = AF_INET;
  683                         hlen = sizeof(struct ip);
  684                         ip = mtod(m, struct ip *);
  685                         break;
  686 #ifdef INET6
  687                 case 6:
  688                         family = AF_INET6;
  689                         hlen = sizeof(struct ip6_hdr);
  690                         ip6 = mtod(m, struct ip6_hdr *);
  691                         break;
  692 #endif
  693                 default:
  694                         m_freem(m);
  695                         return EAFNOSUPPORT;
  696                 }
  697                 if ((flags & TH_SYN) == 0 || sizeof(*th0) > (th0->th_off << 2))
  698                         tlen = sizeof(*th0);
  699                 else
  700                         tlen = th0->th_off << 2;
  701 
  702                 if (m->m_len > hlen + tlen && (m->m_flags & M_EXT) == 0 &&
  703                     mtod(m, caddr_t) + hlen == (caddr_t)th0) {
  704                         m->m_len = hlen + tlen;
  705                         m_freem(m->m_next);
  706                         m->m_next = NULL;
  707                 } else {
  708                         struct mbuf *n;
  709 
  710 #ifdef DIAGNOSTIC
  711                         if (max_linkhdr + hlen + tlen > MCLBYTES) {
  712                                 m_freem(m);
  713                                 return EMSGSIZE;
  714                         }
  715 #endif
  716                         MGETHDR(n, M_DONTWAIT, MT_HEADER);
  717                         if (n && max_linkhdr + hlen + tlen > MHLEN) {
  718                                 MCLGET(n, M_DONTWAIT);
  719                                 if ((n->m_flags & M_EXT) == 0) {
  720                                         m_freem(n);
  721                                         n = NULL;
  722                                 }
  723                         }
  724                         if (!n) {
  725                                 m_freem(m);
  726                                 return ENOBUFS;
  727                         }
  728 
  729                         MCLAIM(n, &tcp_tx_mowner);
  730                         n->m_data += max_linkhdr;
  731                         n->m_len = hlen + tlen;
  732                         m_copyback(n, 0, hlen, mtod(m, caddr_t));
  733                         m_copyback(n, hlen, tlen, (caddr_t)th0);
  734 
  735                         m_freem(m);
  736                         m = n;
  737                         n = NULL;
  738                 }
  739 
  740 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
  741                 switch (family) {
  742                 case AF_INET:
  743                         ip = mtod(m, struct ip *);
  744                         th = (struct tcphdr *)(ip + 1);
  745                         ip->ip_p = IPPROTO_TCP;
  746                         xchg(ip->ip_dst, ip->ip_src, struct in_addr);
  747                         ip->ip_p = IPPROTO_TCP;
  748                         break;
  749 #ifdef INET6
  750                 case AF_INET6:
  751                         ip6 = mtod(m, struct ip6_hdr *);
  752                         th = (struct tcphdr *)(ip6 + 1);
  753                         ip6->ip6_nxt = IPPROTO_TCP;
  754                         xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
  755                         ip6->ip6_nxt = IPPROTO_TCP;
  756                         break;
  757 #endif
  758 #if 0
  759                 default:
  760                         /* noone will visit here */
  761                         m_freem(m);
  762                         return EAFNOSUPPORT;
  763 #endif
  764                 }
  765                 xchg(th->th_dport, th->th_sport, u_int16_t);
  766 #undef xchg
  767                 tlen = 0;       /*be friendly with the following code*/
  768         }
  769         th->th_seq = htonl(seq);
  770         th->th_ack = htonl(ack);
  771         th->th_x2 = 0;
  772         if ((flags & TH_SYN) == 0) {
  773                 if (tp)
  774                         win >>= tp->rcv_scale;
  775                 if (win > TCP_MAXWIN)
  776                         win = TCP_MAXWIN;
  777                 th->th_win = htons((u_int16_t)win);
  778                 th->th_off = sizeof (struct tcphdr) >> 2;
  779                 tlen += sizeof(*th);
  780         } else
  781                 tlen += th->th_off << 2;
  782         m->m_len = hlen + tlen;
  783         m->m_pkthdr.len = hlen + tlen;
  784         m->m_pkthdr.rcvif = (struct ifnet *) 0;
  785         th->th_flags = flags;
  786         th->th_urp = 0;
  787 
  788         switch (family) {
  789 #ifdef INET
  790         case AF_INET:
  791             {
  792                 struct ipovly *ipov = (struct ipovly *)ip;
  793                 bzero(ipov->ih_x1, sizeof ipov->ih_x1);
  794                 ipov->ih_len = htons((u_int16_t)tlen);
  795 
  796                 th->th_sum = 0;
  797                 th->th_sum = in_cksum(m, hlen + tlen);
  798                 ip->ip_len = htons(hlen + tlen);
  799                 ip->ip_ttl = ip_defttl;
  800                 break;
  801             }
  802 #endif
  803 #ifdef INET6
  804         case AF_INET6:
  805             {
  806                 th->th_sum = 0;
  807                 th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
  808                                 tlen);
  809                 ip6->ip6_plen = ntohs(tlen);
  810                 if (tp && tp->t_in6pcb) {
  811                         struct ifnet *oifp;
  812                         ro = (struct route *)&tp->t_in6pcb->in6p_route;
  813                         oifp = ro->ro_rt ? ro->ro_rt->rt_ifp : NULL;
  814                         ip6->ip6_hlim = in6_selecthlim(tp->t_in6pcb, oifp);
  815                 } else
  816                         ip6->ip6_hlim = ip6_defhlim;
  817                 ip6->ip6_flow &= ~IPV6_FLOWINFO_MASK;
  818                 if (ip6_auto_flowlabel) {
  819                         ip6->ip6_flow |=
  820                             (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
  821                 }
  822                 break;
  823             }
  824 #endif
  825         }
  826 
  827         if (tp && tp->t_inpcb)
  828                 so = tp->t_inpcb->inp_socket;
  829 #ifdef INET6
  830         else if (tp && tp->t_in6pcb)
  831                 so = tp->t_in6pcb->in6p_socket;
  832 #endif
  833         else
  834                 so = NULL;
  835 
  836         if (tp != NULL && tp->t_inpcb != NULL) {
  837                 ro = &tp->t_inpcb->inp_route;
  838 #ifdef DIAGNOSTIC
  839                 if (family != AF_INET)
  840                         panic("tcp_respond: address family mismatch");
  841                 if (!in_hosteq(ip->ip_dst, tp->t_inpcb->inp_faddr)) {
  842                         panic("tcp_respond: ip_dst %x != inp_faddr %x",
  843                             ntohl(ip->ip_dst.s_addr),
  844                             ntohl(tp->t_inpcb->inp_faddr.s_addr));
  845                 }
  846 #endif
  847         }
  848 #ifdef INET6
  849         else if (tp != NULL && tp->t_in6pcb != NULL) {
  850                 ro = (struct route *)&tp->t_in6pcb->in6p_route;
  851 #ifdef DIAGNOSTIC
  852                 if (family == AF_INET) {
  853                         if (!IN6_IS_ADDR_V4MAPPED(&tp->t_in6pcb->in6p_faddr))
  854                                 panic("tcp_respond: not mapped addr");
  855                         if (bcmp(&ip->ip_dst,
  856                             &tp->t_in6pcb->in6p_faddr.s6_addr32[3],
  857                             sizeof(ip->ip_dst)) != 0) {
  858                                 panic("tcp_respond: ip_dst != in6p_faddr");
  859                         }
  860                 } else if (family == AF_INET6) {
  861                         if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
  862                             &tp->t_in6pcb->in6p_faddr))
  863                                 panic("tcp_respond: ip6_dst != in6p_faddr");
  864                 } else
  865                         panic("tcp_respond: address family mismatch");
  866 #endif
  867         }
  868 #endif
  869         else
  870                 ro = NULL;
  871 
  872         switch (family) {
  873 #ifdef INET
  874         case AF_INET:
  875                 error = ip_output(m, NULL, ro,
  876                     (tp && tp->t_mtudisc ? IP_MTUDISC : 0),
  877                     (struct ip_moptions *)0, so);
  878                 break;
  879 #endif
  880 #ifdef INET6
  881         case AF_INET6:
  882                 error = ip6_output(m, NULL, (struct route_in6 *)ro, 0,
  883                     (struct ip6_moptions *)0, so, NULL);
  884                 break;
  885 #endif
  886         default:
  887                 error = EAFNOSUPPORT;
  888                 break;
  889         }
  890 
  891         return (error);
  892 }
  893 
  894 /*
  895  * Template TCPCB.  Rather than zeroing a new TCPCB and initializing
  896  * a bunch of members individually, we maintain this template for the
  897  * static and mostly-static components of the TCPCB, and copy it into
  898  * the new TCPCB instead.
  899  */
  900 static struct tcpcb tcpcb_template = {
  901         /*
  902          * If TCP_NTIMERS ever changes, we'll need to update this
  903          * initializer.
  904          */
  905         .t_timer = {
  906                 CALLOUT_INITIALIZER,
  907                 CALLOUT_INITIALIZER,
  908                 CALLOUT_INITIALIZER,
  909                 CALLOUT_INITIALIZER,
  910         },
  911         .t_delack_ch = CALLOUT_INITIALIZER,
  912 
  913         .t_srtt = TCPTV_SRTTBASE,
  914         .t_rttmin = TCPTV_MIN,
  915 
  916         .snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT,
  917         .snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT,
  918 };
  919 
  920 /*
  921  * Updates the TCPCB template whenever a parameter that would affect
  922  * the template is changed.
  923  */
  924 void
  925 tcp_tcpcb_template(void)
  926 {
  927         struct tcpcb *tp = &tcpcb_template;
  928         int flags;
  929 
  930         tp->t_peermss = tcp_mssdflt;
  931         tp->t_ourmss = tcp_mssdflt;
  932         tp->t_segsz = tcp_mssdflt;
  933 
  934         flags = 0;
  935         if (tcp_do_rfc1323 && tcp_do_win_scale)
  936                 flags |= TF_REQ_SCALE;
  937         if (tcp_do_rfc1323 && tcp_do_timestamps)
  938                 flags |= TF_REQ_TSTMP;
  939         if (tcp_do_sack == 2)
  940                 flags |= TF_WILL_SACK;
  941         else if (tcp_do_sack == 1)
  942                 flags |= TF_WILL_SACK|TF_IGNR_RXSACK;
  943         flags |= TF_CANT_TXSACK;
  944         tp->t_flags = flags;
  945 
  946         /*
  947          * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
  948          * rtt estimate.  Set rttvar so that srtt + 2 * rttvar gives
  949          * reasonable initial retransmit time.
  950          */
  951         tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << (TCP_RTTVAR_SHIFT + 2 - 1);
  952         TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
  953             TCPTV_MIN, TCPTV_REXMTMAX);
  954 }
  955 
  956 /*
  957  * Create a new TCP control block, making an
  958  * empty reassembly queue and hooking it to the argument
  959  * protocol control block.
  960  */
  961 struct tcpcb *
  962 tcp_newtcpcb(family, aux)
  963         int family;     /* selects inpcb, or in6pcb */
  964         void *aux;
  965 {
  966         struct tcpcb *tp;
  967         int i;
  968 
  969         /* XXX Consider using a pool_cache for speed. */
  970         tp = pool_get(&tcpcb_pool, PR_NOWAIT);
  971         if (tp == NULL)
  972                 return (NULL);
  973         memcpy(tp, &tcpcb_template, sizeof(*tp));
  974         TAILQ_INIT(&tp->segq);
  975         TAILQ_INIT(&tp->timeq);
  976         tp->t_family = family;          /* may be overridden later on */
  977         LIST_INIT(&tp->t_sc);           /* XXX can template this */
  978 
  979         /* Don't sweat this loop; hopefully the compiler will unroll it. */
  980         for (i = 0; i < TCPT_NTIMERS; i++)
  981                 TCP_TIMER_INIT(tp, i);
  982 
  983         switch (family) {
  984         case AF_INET:
  985             {
  986                 struct inpcb *inp = (struct inpcb *)aux;
  987 
  988                 inp->inp_ip.ip_ttl = ip_defttl;
  989                 inp->inp_ppcb = (caddr_t)tp;
  990 
  991                 tp->t_inpcb = inp;
  992                 tp->t_mtudisc = ip_mtudisc;
  993                 break;
  994             }
  995 #ifdef INET6
  996         case AF_INET6:
  997             {
  998                 struct in6pcb *in6p = (struct in6pcb *)aux;
  999 
 1000                 in6p->in6p_ip6.ip6_hlim = in6_selecthlim(in6p,
 1001                         in6p->in6p_route.ro_rt ? in6p->in6p_route.ro_rt->rt_ifp
 1002                                                : NULL);
 1003                 in6p->in6p_ppcb = (caddr_t)tp;
 1004 
 1005                 tp->t_in6pcb = in6p;
 1006                 /* for IPv6, always try to run path MTU discovery */
 1007                 tp->t_mtudisc = 1;
 1008                 break;
 1009             }
 1010 #endif /* INET6 */
 1011         default:
 1012                 pool_put(&tcpcb_pool, tp);
 1013                 return (NULL);
 1014         }
 1015 
 1016         /*
 1017          * Initialize our timebase.  When we send timestamps, we take
 1018          * the delta from tcp_now -- this means each connection always
 1019          * gets a timebase of 0, which makes it, among other things,
 1020          * more difficult to determine how long a system has been up,
 1021          * and thus how many TCP sequence increments have occurred.
 1022          */
 1023         tp->ts_timebase = tcp_now;
 1024 
 1025         return (tp);
 1026 }
 1027 
 1028 /*
 1029  * Drop a TCP connection, reporting
 1030  * the specified error.  If connection is synchronized,
 1031  * then send a RST to peer.
 1032  */
 1033 struct tcpcb *
 1034 tcp_drop(tp, errno)
 1035         struct tcpcb *tp;
 1036         int errno;
 1037 {
 1038         struct socket *so = NULL;
 1039 
 1040 #ifdef DIAGNOSTIC
 1041         if (tp->t_inpcb && tp->t_in6pcb)
 1042                 panic("tcp_drop: both t_inpcb and t_in6pcb are set");
 1043 #endif
 1044 #ifdef INET
 1045         if (tp->t_inpcb)
 1046                 so = tp->t_inpcb->inp_socket;
 1047 #endif
 1048 #ifdef INET6
 1049         if (tp->t_in6pcb)
 1050                 so = tp->t_in6pcb->in6p_socket;
 1051 #endif
 1052         if (!so)
 1053                 return NULL;
 1054 
 1055         if (TCPS_HAVERCVDSYN(tp->t_state)) {
 1056                 tp->t_state = TCPS_CLOSED;
 1057                 (void) tcp_output(tp);
 1058                 tcpstat.tcps_drops++;
 1059         } else
 1060                 tcpstat.tcps_conndrops++;
 1061         if (errno == ETIMEDOUT && tp->t_softerror)
 1062                 errno = tp->t_softerror;
 1063         so->so_error = errno;
 1064         return (tcp_close(tp));
 1065 }
 1066 
 1067 /*
 1068  * Return whether this tcpcb is marked as dead, indicating
 1069  * to the calling timer function that no further action should
 1070  * be taken, as we are about to release this tcpcb.  The release
 1071  * of the storage will be done if this is the last timer running.
 1072  *
 1073  * This should be called from the callout handler function after
 1074  * callout_ack() is done, so that the number of invoking timer
 1075  * functions is 0.
 1076  */
 1077 int
 1078 tcp_isdead(tp)
 1079         struct tcpcb *tp;
 1080 {
 1081         int dead = (tp->t_flags & TF_DEAD);
 1082 
 1083         if (__predict_false(dead)) {
 1084                 if (tcp_timers_invoking(tp) > 0)
 1085                                 /* not quite there yet -- count separately? */
 1086                         return dead;
 1087                 tcpstat.tcps_delayed_free++;
 1088                 pool_put(&tcpcb_pool, tp);
 1089         }
 1090         return dead;
 1091 }
 1092 
 1093 /*
 1094  * Close a TCP control block:
 1095  *      discard all space held by the tcp
 1096  *      discard internet protocol block
 1097  *      wake up any sleepers
 1098  */
 1099 struct tcpcb *
 1100 tcp_close(tp)
 1101         struct tcpcb *tp;
 1102 {
 1103         struct inpcb *inp;
 1104 #ifdef INET6
 1105         struct in6pcb *in6p;
 1106 #endif
 1107         struct socket *so;
 1108 #ifdef RTV_RTT
 1109         struct rtentry *rt;
 1110 #endif
 1111         struct route *ro;
 1112 
 1113         inp = tp->t_inpcb;
 1114 #ifdef INET6
 1115         in6p = tp->t_in6pcb;
 1116 #endif
 1117         so = NULL;
 1118         ro = NULL;
 1119         if (inp) {
 1120                 so = inp->inp_socket;
 1121                 ro = &inp->inp_route;
 1122         }
 1123 #ifdef INET6
 1124         else if (in6p) {
 1125                 so = in6p->in6p_socket;
 1126                 ro = (struct route *)&in6p->in6p_route;
 1127         }
 1128 #endif
 1129 
 1130 #ifdef RTV_RTT
 1131         /*
 1132          * If we sent enough data to get some meaningful characteristics,
 1133          * save them in the routing entry.  'Enough' is arbitrarily
 1134          * defined as the sendpipesize (default 4K) * 16.  This would
 1135          * give us 16 rtt samples assuming we only get one sample per
 1136          * window (the usual case on a long haul net).  16 samples is
 1137          * enough for the srtt filter to converge to within 5% of the correct
 1138          * value; fewer samples and we could save a very bogus rtt.
 1139          *
 1140          * Don't update the default route's characteristics and don't
 1141          * update anything that the user "locked".
 1142          */
 1143         if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
 1144             ro && (rt = ro->ro_rt) &&
 1145             !in_nullhost(satosin(rt_key(rt))->sin_addr)) {
 1146                 u_long i = 0;
 1147 
 1148                 if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
 1149                         i = tp->t_srtt *
 1150                             ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2));
 1151                         if (rt->rt_rmx.rmx_rtt && i)
 1152                                 /*
 1153                                  * filter this update to half the old & half
 1154                                  * the new values, converting scale.
 1155                                  * See route.h and tcp_var.h for a
 1156                                  * description of the scaling constants.
 1157                                  */
 1158                                 rt->rt_rmx.rmx_rtt =
 1159                                     (rt->rt_rmx.rmx_rtt + i) / 2;
 1160                         else
 1161                                 rt->rt_rmx.rmx_rtt = i;
 1162                 }
 1163                 if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
 1164                         i = tp->t_rttvar *
 1165                             ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTTVAR_SHIFT + 2));
 1166                         if (rt->rt_rmx.rmx_rttvar && i)
 1167                                 rt->rt_rmx.rmx_rttvar =
 1168                                     (rt->rt_rmx.rmx_rttvar + i) / 2;
 1169                         else
 1170                                 rt->rt_rmx.rmx_rttvar = i;
 1171                 }
 1172                 /*
 1173                  * update the pipelimit (ssthresh) if it has been updated
 1174                  * already or if a pipesize was specified & the threshhold
 1175                  * got below half the pipesize.  I.e., wait for bad news
 1176                  * before we start updating, then update on both good
 1177                  * and bad news.
 1178                  */
 1179                 if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
 1180                     (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh) ||
 1181                     i < (rt->rt_rmx.rmx_sendpipe / 2)) {
 1182                         /*
 1183                          * convert the limit from user data bytes to
 1184                          * packets then to packet data bytes.
 1185                          */
 1186                         i = (i + tp->t_segsz / 2) / tp->t_segsz;
 1187                         if (i < 2)
 1188                                 i = 2;
 1189                         i *= (u_long)(tp->t_segsz + sizeof (struct tcpiphdr));
 1190                         if (rt->rt_rmx.rmx_ssthresh)
 1191                                 rt->rt_rmx.rmx_ssthresh =
 1192                                     (rt->rt_rmx.rmx_ssthresh + i) / 2;
 1193                         else
 1194                                 rt->rt_rmx.rmx_ssthresh = i;
 1195                 }
 1196         }
 1197 #endif /* RTV_RTT */
 1198         /* free the reassembly queue, if any */
 1199         TCP_REASS_LOCK(tp);
 1200         (void) tcp_freeq(tp);
 1201         TCP_REASS_UNLOCK(tp);
 1202 
 1203         tcp_canceltimers(tp);
 1204         TCP_CLEAR_DELACK(tp);
 1205         syn_cache_cleanup(tp);
 1206 
 1207         if (tp->t_template) {
 1208                 m_free(tp->t_template);
 1209                 tp->t_template = NULL;
 1210         }
 1211         if (tcp_timers_invoking(tp))
 1212                 tp->t_flags |= TF_DEAD;
 1213         else
 1214                 pool_put(&tcpcb_pool, tp);
 1215 
 1216         if (inp) {
 1217                 inp->inp_ppcb = 0;
 1218                 soisdisconnected(so);
 1219                 in_pcbdetach(inp);
 1220         }
 1221 #ifdef INET6
 1222         else if (in6p) {
 1223                 in6p->in6p_ppcb = 0;
 1224                 soisdisconnected(so);
 1225                 in6_pcbdetach(in6p);
 1226         }
 1227 #endif
 1228         tcpstat.tcps_closed++;
 1229         return ((struct tcpcb *)0);
 1230 }
 1231 
 1232 int
 1233 tcp_freeq(tp)
 1234         struct tcpcb *tp;
 1235 {
 1236         struct ipqent *qe;
 1237         int rv = 0;
 1238 #ifdef TCPREASS_DEBUG
 1239         int i = 0;
 1240 #endif
 1241 
 1242         TCP_REASS_LOCK_CHECK(tp);
 1243 
 1244         while ((qe = TAILQ_FIRST(&tp->segq)) != NULL) {
 1245 #ifdef TCPREASS_DEBUG
 1246                 printf("tcp_freeq[%p,%d]: %u:%u(%u) 0x%02x\n",
 1247                         tp, i++, qe->ipqe_seq, qe->ipqe_seq + qe->ipqe_len,
 1248                         qe->ipqe_len, qe->ipqe_flags & (TH_SYN|TH_FIN|TH_RST));
 1249 #endif
 1250                 TAILQ_REMOVE(&tp->segq, qe, ipqe_q);
 1251                 TAILQ_REMOVE(&tp->timeq, qe, ipqe_timeq);
 1252                 m_freem(qe->ipqe_m);
 1253                 pool_put(&tcpipqent_pool, qe);
 1254                 rv = 1;
 1255         }
 1256         return (rv);
 1257 }
 1258 
 1259 /*
 1260  * Protocol drain routine.  Called when memory is in short supply.
 1261  */
 1262 void
 1263 tcp_drain()
 1264 {
 1265         struct inpcb_hdr *inph;
 1266         struct tcpcb *tp;
 1267 
 1268         /*
 1269          * Free the sequence queue of all TCP connections.
 1270          */
 1271         CIRCLEQ_FOREACH(inph, &tcbtable.inpt_queue, inph_queue) {
 1272                 switch (inph->inph_af) {
 1273                 case AF_INET:
 1274                         tp = intotcpcb((struct inpcb *)inph);
 1275                         break;
 1276 #ifdef INET6
 1277                 case AF_INET6:
 1278                         tp = in6totcpcb((struct in6pcb *)inph);
 1279                         break;
 1280 #endif
 1281                 default:
 1282                         tp = NULL;
 1283                         break;
 1284                 }
 1285                 if (tp != NULL) {
 1286                         /*
 1287                          * We may be called from a device's interrupt
 1288                          * context.  If the tcpcb is already busy,
 1289                          * just bail out now.
 1290                          */
 1291                         if (tcp_reass_lock_try(tp) == 0)
 1292                                 continue;
 1293                         if (tcp_freeq(tp))
 1294                                 tcpstat.tcps_connsdrained++;
 1295                         TCP_REASS_UNLOCK(tp);
 1296                 }
 1297         }
 1298 }
 1299 
 1300 /*
 1301  * Notify a tcp user of an asynchronous error;
 1302  * store error as soft error, but wake up user
 1303  * (for now, won't do anything until can select for soft error).
 1304  */
 1305 void
 1306 tcp_notify(inp, error)
 1307         struct inpcb *inp;
 1308         int error;
 1309 {
 1310         struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
 1311         struct socket *so = inp->inp_socket;
 1312 
 1313         /*
 1314          * Ignore some errors if we are hooked up.
 1315          * If connection hasn't completed, has retransmitted several times,
 1316          * and receives a second error, give up now.  This is better
 1317          * than waiting a long time to establish a connection that
 1318          * can never complete.
 1319          */
 1320         if (tp->t_state == TCPS_ESTABLISHED &&
 1321              (error == EHOSTUNREACH || error == ENETUNREACH ||
 1322               error == EHOSTDOWN)) {
 1323                 return;
 1324         } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 &&
 1325             tp->t_rxtshift > 3 && tp->t_softerror)
 1326                 so->so_error = error;
 1327         else
 1328                 tp->t_softerror = error;
 1329         wakeup((caddr_t) &so->so_timeo);
 1330         sorwakeup(so);
 1331         sowwakeup(so);
 1332 }
 1333 
 1334 #ifdef INET6
 1335 void
 1336 tcp6_notify(in6p, error)
 1337         struct in6pcb *in6p;
 1338         int error;
 1339 {
 1340         struct tcpcb *tp = (struct tcpcb *)in6p->in6p_ppcb;
 1341         struct socket *so = in6p->in6p_socket;
 1342 
 1343         /*
 1344          * Ignore some errors if we are hooked up.
 1345          * If connection hasn't completed, has retransmitted several times,
 1346          * and receives a second error, give up now.  This is better
 1347          * than waiting a long time to establish a connection that
 1348          * can never complete.
 1349          */
 1350         if (tp->t_state == TCPS_ESTABLISHED &&
 1351              (error == EHOSTUNREACH || error == ENETUNREACH ||
 1352               error == EHOSTDOWN)) {
 1353                 return;
 1354         } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 &&
 1355             tp->t_rxtshift > 3 && tp->t_softerror)
 1356                 so->so_error = error;
 1357         else
 1358                 tp->t_softerror = error;
 1359         wakeup((caddr_t) &so->so_timeo);
 1360         sorwakeup(so);
 1361         sowwakeup(so);
 1362 }
 1363 #endif
 1364 
 1365 #ifdef INET6
 1366 void
 1367 tcp6_ctlinput(cmd, sa, d)
 1368         int cmd;
 1369         struct sockaddr *sa;
 1370         void *d;
 1371 {
 1372         struct tcphdr th;
 1373         void (*notify) __P((struct in6pcb *, int)) = tcp6_notify;
 1374         int nmatch;
 1375         struct ip6_hdr *ip6;
 1376         const struct sockaddr_in6 *sa6_src = NULL;
 1377         struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
 1378         struct mbuf *m;
 1379         int off;
 1380 
 1381         if (sa->sa_family != AF_INET6 ||
 1382             sa->sa_len != sizeof(struct sockaddr_in6))
 1383                 return;
 1384         if ((unsigned)cmd >= PRC_NCMDS)
 1385                 return;
 1386         else if (cmd == PRC_QUENCH) {
 1387                 /* XXX there's no PRC_QUENCH in IPv6 */
 1388                 notify = tcp6_quench;
 1389         } else if (PRC_IS_REDIRECT(cmd))
 1390                 notify = in6_rtchange, d = NULL;
 1391         else if (cmd == PRC_MSGSIZE)
 1392                 ; /* special code is present, see below */
 1393         else if (cmd == PRC_HOSTDEAD)
 1394                 d = NULL;
 1395         else if (inet6ctlerrmap[cmd] == 0)
 1396                 return;
 1397 
 1398         /* if the parameter is from icmp6, decode it. */
 1399         if (d != NULL) {
 1400                 struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
 1401                 m = ip6cp->ip6c_m;
 1402                 ip6 = ip6cp->ip6c_ip6;
 1403                 off = ip6cp->ip6c_off;
 1404                 sa6_src = ip6cp->ip6c_src;
 1405         } else {
 1406                 m = NULL;
 1407                 ip6 = NULL;
 1408                 sa6_src = &sa6_any;
 1409                 off = 0;
 1410         }
 1411 
 1412         if (ip6) {
 1413                 /*
 1414                  * XXX: We assume that when ip6 is non NULL,
 1415                  * M and OFF are valid.
 1416                  */
 1417 
 1418                 /* check if we can safely examine src and dst ports */
 1419                 if (m->m_pkthdr.len < off + sizeof(th)) {
 1420                         if (cmd == PRC_MSGSIZE)
 1421                                 icmp6_mtudisc_update((struct ip6ctlparam *)d, 0);
 1422                         return;
 1423                 }
 1424 
 1425                 bzero(&th, sizeof(th));
 1426                 m_copydata(m, off, sizeof(th), (caddr_t)&th);
 1427 
 1428                 if (cmd == PRC_MSGSIZE) {
 1429                         int valid = 0;
 1430 
 1431                         /*
 1432                          * Check to see if we have a valid TCP connection
 1433                          * corresponding to the address in the ICMPv6 message
 1434                          * payload.
 1435                          */
 1436                         if (in6_pcblookup_connect(&tcbtable, &sa6->sin6_addr,
 1437                             th.th_dport, (struct in6_addr *)&sa6_src->sin6_addr,
 1438                             th.th_sport, 0))
 1439                                 valid++;
 1440 
 1441                         /*
 1442                          * Depending on the value of "valid" and routing table
 1443                          * size (mtudisc_{hi,lo}wat), we will:
 1444                          * - recalcurate the new MTU and create the
 1445                          *   corresponding routing entry, or
 1446                          * - ignore the MTU change notification.
 1447                          */
 1448                         icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
 1449 
 1450                         /*
 1451                          * no need to call in6_pcbnotify, it should have been
 1452                          * called via callback if necessary
 1453                          */
 1454                         return;
 1455                 }
 1456 
 1457                 nmatch = in6_pcbnotify(&tcbtable, sa, th.th_dport,
 1458                     (struct sockaddr *)sa6_src, th.th_sport, cmd, NULL, notify);
 1459                 if (nmatch == 0 && syn_cache_count &&
 1460                     (inet6ctlerrmap[cmd] == EHOSTUNREACH ||
 1461                      inet6ctlerrmap[cmd] == ENETUNREACH ||
 1462                      inet6ctlerrmap[cmd] == EHOSTDOWN))
 1463                         syn_cache_unreach((struct sockaddr *)sa6_src,
 1464                                           sa, &th);
 1465         } else {
 1466                 (void) in6_pcbnotify(&tcbtable, sa, 0,
 1467                     (struct sockaddr *)sa6_src, 0, cmd, NULL, notify);
 1468         }
 1469 }
 1470 #endif
 1471 
 1472 #ifdef INET
 1473 /* assumes that ip header and tcp header are contiguous on mbuf */
 1474 void *
 1475 tcp_ctlinput(cmd, sa, v)
 1476         int cmd;
 1477         struct sockaddr *sa;
 1478         void *v;
 1479 {
 1480         struct ip *ip = v;
 1481         struct tcphdr *th;
 1482         struct icmp *icp;
 1483         extern const int inetctlerrmap[];
 1484         void (*notify) __P((struct inpcb *, int)) = tcp_notify;
 1485         int errno;
 1486         int nmatch;
 1487 #ifdef INET6
 1488         struct in6_addr src6, dst6;
 1489 #endif
 1490 
 1491         if (sa->sa_family != AF_INET ||
 1492             sa->sa_len != sizeof(struct sockaddr_in))
 1493                 return NULL;
 1494         if ((unsigned)cmd >= PRC_NCMDS)
 1495                 return NULL;
 1496         errno = inetctlerrmap[cmd];
 1497         if (cmd == PRC_QUENCH)
 1498                 notify = tcp_quench;
 1499         else if (PRC_IS_REDIRECT(cmd))
 1500                 notify = in_rtchange, ip = 0;
 1501         else if (cmd == PRC_MSGSIZE && ip && ip->ip_v == 4) {
 1502                 /*
 1503                  * Check to see if we have a valid TCP connection
 1504                  * corresponding to the address in the ICMP message
 1505                  * payload.
 1506                  *
 1507                  * Boundary check is made in icmp_input(), with ICMP_ADVLENMIN.
 1508                  */
 1509                 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 1510 #ifdef INET6
 1511                 memset(&src6, 0, sizeof(src6));
 1512                 memset(&dst6, 0, sizeof(dst6));
 1513                 src6.s6_addr16[5] = dst6.s6_addr16[5] = 0xffff;
 1514                 memcpy(&src6.s6_addr32[3], &ip->ip_src, sizeof(struct in_addr));
 1515                 memcpy(&dst6.s6_addr32[3], &ip->ip_dst, sizeof(struct in_addr));
 1516 #endif
 1517                 if (in_pcblookup_connect(&tcbtable, ip->ip_dst, th->th_dport,
 1518                     ip->ip_src, th->th_sport) != NULL)
 1519                         ;
 1520 #ifdef INET6
 1521                 else if (in6_pcblookup_connect(&tcbtable, &dst6,
 1522                     th->th_dport, &src6, th->th_sport, 0) != NULL)
 1523                         ;
 1524 #endif
 1525                 else
 1526                         return NULL;
 1527 
 1528                 /*
 1529                  * Now that we've validated that we are actually communicating
 1530                  * with the host indicated in the ICMP message, locate the
 1531                  * ICMP header, recalculate the new MTU, and create the
 1532                  * corresponding routing entry.
 1533                  */
 1534                 icp = (struct icmp *)((caddr_t)ip -
 1535                     offsetof(struct icmp, icmp_ip));
 1536                 icmp_mtudisc(icp, ip->ip_dst);
 1537 
 1538                 return NULL;
 1539         } else if (cmd == PRC_HOSTDEAD)
 1540                 ip = 0;
 1541         else if (errno == 0)
 1542                 return NULL;
 1543         if (ip && ip->ip_v == 4 && sa->sa_family == AF_INET) {
 1544                 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 1545                 nmatch = in_pcbnotify(&tcbtable, satosin(sa)->sin_addr,
 1546                     th->th_dport, ip->ip_src, th->th_sport, errno, notify);
 1547                 if (nmatch == 0 && syn_cache_count &&
 1548                     (inetctlerrmap[cmd] == EHOSTUNREACH ||
 1549                     inetctlerrmap[cmd] == ENETUNREACH ||
 1550                     inetctlerrmap[cmd] == EHOSTDOWN)) {
 1551                         struct sockaddr_in sin;
 1552                         bzero(&sin, sizeof(sin));
 1553                         sin.sin_len = sizeof(sin);
 1554                         sin.sin_family = AF_INET;
 1555                         sin.sin_port = th->th_sport;
 1556                         sin.sin_addr = ip->ip_src;
 1557                         syn_cache_unreach((struct sockaddr *)&sin, sa, th);
 1558                 }
 1559 
 1560                 /* XXX mapped address case */
 1561         } else
 1562                 in_pcbnotifyall(&tcbtable, satosin(sa)->sin_addr, errno,
 1563                     notify);
 1564         return NULL;
 1565 }
 1566 
 1567 /*
 1568  * When a source quence is received, we are being notifed of congestion.
 1569  * Close the congestion window down to the Loss Window (one segment).
 1570  * We will gradually open it again as we proceed.
 1571  */
 1572 void
 1573 tcp_quench(inp, errno)
 1574         struct inpcb *inp;
 1575         int errno;
 1576 {
 1577         struct tcpcb *tp = intotcpcb(inp);
 1578 
 1579         if (tp)
 1580                 tp->snd_cwnd = tp->t_segsz;
 1581 }
 1582 #endif
 1583 
 1584 #ifdef INET6
 1585 void
 1586 tcp6_quench(in6p, errno)
 1587         struct in6pcb *in6p;
 1588         int errno;
 1589 {
 1590         struct tcpcb *tp = in6totcpcb(in6p);
 1591 
 1592         if (tp)
 1593                 tp->snd_cwnd = tp->t_segsz;
 1594 }
 1595 #endif
 1596 
 1597 #ifdef INET
 1598 /*
 1599  * Path MTU Discovery handlers.
 1600  */
 1601 void
 1602 tcp_mtudisc_callback(faddr)
 1603         struct in_addr faddr;
 1604 {
 1605 #ifdef INET6
 1606         struct in6_addr in6;
 1607 #endif
 1608 
 1609         in_pcbnotifyall(&tcbtable, faddr, EMSGSIZE, tcp_mtudisc);
 1610 #ifdef INET6
 1611         memset(&in6, 0, sizeof(in6));
 1612         in6.s6_addr16[5] = 0xffff;
 1613         memcpy(&in6.s6_addr32[3], &faddr, sizeof(struct in_addr));
 1614         tcp6_mtudisc_callback(&in6);
 1615 #endif
 1616 }
 1617 
 1618 /*
 1619  * On receipt of path MTU corrections, flush old route and replace it
 1620  * with the new one.  Retransmit all unacknowledged packets, to ensure
 1621  * that all packets will be received.
 1622  */
 1623 void
 1624 tcp_mtudisc(inp, errno)
 1625         struct inpcb *inp;
 1626         int errno;
 1627 {
 1628         struct tcpcb *tp = intotcpcb(inp);
 1629         struct rtentry *rt = in_pcbrtentry(inp);
 1630 
 1631         if (tp != 0) {
 1632                 if (rt != 0) {
 1633                         /*
 1634                          * If this was not a host route, remove and realloc.
 1635                          */
 1636                         if ((rt->rt_flags & RTF_HOST) == 0) {
 1637                                 in_rtchange(inp, errno);
 1638                                 if ((rt = in_pcbrtentry(inp)) == 0)
 1639                                         return;
 1640                         }
 1641 
 1642                         /*
 1643                          * Slow start out of the error condition.  We
 1644                          * use the MTU because we know it's smaller
 1645                          * than the previously transmitted segment.
 1646                          *
 1647                          * Note: This is more conservative than the
 1648                          * suggestion in draft-floyd-incr-init-win-03.
 1649                          */
 1650                         if (rt->rt_rmx.rmx_mtu != 0)
 1651                                 tp->snd_cwnd =
 1652                                     TCP_INITIAL_WINDOW(tcp_init_win,
 1653                                     rt->rt_rmx.rmx_mtu);
 1654                 }
 1655 
 1656                 /*
 1657                  * Resend unacknowledged packets.
 1658                  */
 1659                 tp->snd_nxt = tp->snd_una;
 1660                 tcp_output(tp);
 1661         }
 1662 }
 1663 #endif
 1664 
 1665 #ifdef INET6
 1666 /*
 1667  * Path MTU Discovery handlers.
 1668  */
 1669 void
 1670 tcp6_mtudisc_callback(faddr)
 1671         struct in6_addr *faddr;
 1672 {
 1673         struct sockaddr_in6 sin6;
 1674 
 1675         bzero(&sin6, sizeof(sin6));
 1676         sin6.sin6_family = AF_INET6;
 1677         sin6.sin6_len = sizeof(struct sockaddr_in6);
 1678         sin6.sin6_addr = *faddr;
 1679         (void) in6_pcbnotify(&tcbtable, (struct sockaddr *)&sin6, 0,
 1680             (struct sockaddr *)&sa6_any, 0, PRC_MSGSIZE, NULL, tcp6_mtudisc);
 1681 }
 1682 
 1683 void
 1684 tcp6_mtudisc(in6p, errno)
 1685         struct in6pcb *in6p;
 1686         int errno;
 1687 {
 1688         struct tcpcb *tp = in6totcpcb(in6p);
 1689         struct rtentry *rt = in6_pcbrtentry(in6p);
 1690 
 1691         if (tp != 0) {
 1692                 if (rt != 0) {
 1693                         /*
 1694                          * If this was not a host route, remove and realloc.
 1695                          */
 1696                         if ((rt->rt_flags & RTF_HOST) == 0) {
 1697                                 in6_rtchange(in6p, errno);
 1698                                 if ((rt = in6_pcbrtentry(in6p)) == 0)
 1699                                         return;
 1700                         }
 1701 
 1702                         /*
 1703                          * Slow start out of the error condition.  We
 1704                          * use the MTU because we know it's smaller
 1705                          * than the previously transmitted segment.
 1706                          *
 1707                          * Note: This is more conservative than the
 1708                          * suggestion in draft-floyd-incr-init-win-03.
 1709                          */
 1710                         if (rt->rt_rmx.rmx_mtu != 0)
 1711                                 tp->snd_cwnd =
 1712                                     TCP_INITIAL_WINDOW(tcp_init_win,
 1713                                     rt->rt_rmx.rmx_mtu);
 1714                 }
 1715 
 1716                 /*
 1717                  * Resend unacknowledged packets.
 1718                  */
 1719                 tp->snd_nxt = tp->snd_una;
 1720                 tcp_output(tp);
 1721         }
 1722 }
 1723 #endif /* INET6 */
 1724 
 1725 /*
 1726  * Compute the MSS to advertise to the peer.  Called only during
 1727  * the 3-way handshake.  If we are the server (peer initiated
 1728  * connection), we are called with a pointer to the interface
 1729  * on which the SYN packet arrived.  If we are the client (we
 1730  * initiated connection), we are called with a pointer to the
 1731  * interface out which this connection should go.
 1732  *
 1733  * NOTE: Do not subtract IP option/extension header size nor IPsec
 1734  * header size from MSS advertisement.  MSS option must hold the maximum
 1735  * segment size we can accept, so it must always be:
 1736  *       max(if mtu) - ip header - tcp header
 1737  */
 1738 u_long
 1739 tcp_mss_to_advertise(ifp, af)
 1740         const struct ifnet *ifp;
 1741         int af;
 1742 {
 1743         extern u_long in_maxmtu;
 1744         u_long mss = 0;
 1745         u_long hdrsiz;
 1746 
 1747         /*
 1748          * In order to avoid defeating path MTU discovery on the peer,
 1749          * we advertise the max MTU of all attached networks as our MSS,
 1750          * per RFC 1191, section 3.1.
 1751          *
 1752          * We provide the option to advertise just the MTU of
 1753          * the interface on which we hope this connection will
 1754          * be receiving.  If we are responding to a SYN, we
 1755          * will have a pretty good idea about this, but when
 1756          * initiating a connection there is a bit more doubt.
 1757          *
 1758          * We also need to ensure that loopback has a large enough
 1759          * MSS, as the loopback MTU is never included in in_maxmtu.
 1760          */
 1761 
 1762         if (ifp != NULL)
 1763                 switch (af) {
 1764                 case AF_INET:
 1765                         mss = ifp->if_mtu;
 1766                         break;
 1767 #ifdef INET6
 1768                 case AF_INET6:
 1769                         mss = IN6_LINKMTU(ifp);
 1770                         break;
 1771 #endif
 1772                 }
 1773 
 1774         if (tcp_mss_ifmtu == 0)
 1775                 switch (af) {
 1776                 case AF_INET:
 1777                         mss = max(in_maxmtu, mss);
 1778                         break;
 1779 #ifdef INET6
 1780                 case AF_INET6:
 1781                         mss = max(in6_maxmtu, mss);
 1782                         break;
 1783 #endif
 1784                 }
 1785 
 1786         switch (af) {
 1787         case AF_INET:
 1788                 hdrsiz = sizeof(struct ip);
 1789                 break;
 1790 #ifdef INET6
 1791         case AF_INET6:
 1792                 hdrsiz = sizeof(struct ip6_hdr);
 1793                 break;
 1794 #endif
 1795         default:
 1796                 hdrsiz = 0;
 1797                 break;
 1798         }
 1799         hdrsiz += sizeof(struct tcphdr);
 1800         if (mss > hdrsiz)
 1801                 mss -= hdrsiz;
 1802 
 1803         mss = max(tcp_mssdflt, mss);
 1804         return (mss);
 1805 }
 1806 
 1807 /*
 1808  * Set connection variables based on the peer's advertised MSS.
 1809  * We are passed the TCPCB for the actual connection.  If we
 1810  * are the server, we are called by the compressed state engine
 1811  * when the 3-way handshake is complete.  If we are the client,
 1812  * we are called when we receive the SYN,ACK from the server.
 1813  *
 1814  * NOTE: Our advertised MSS value must be initialized in the TCPCB
 1815  * before this routine is called!
 1816  */
 1817 void
 1818 tcp_mss_from_peer(tp, offer)
 1819         struct tcpcb *tp;
 1820         int offer;
 1821 {
 1822         struct socket *so;
 1823 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
 1824         struct rtentry *rt;
 1825 #endif
 1826         u_long bufsize;
 1827         int mss;
 1828 
 1829 #ifdef DIAGNOSTIC
 1830         if (tp->t_inpcb && tp->t_in6pcb)
 1831                 panic("tcp_mss_from_peer: both t_inpcb and t_in6pcb are set");
 1832 #endif
 1833         so = NULL;
 1834         rt = NULL;
 1835 #ifdef INET
 1836         if (tp->t_inpcb) {
 1837                 so = tp->t_inpcb->inp_socket;
 1838 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
 1839                 rt = in_pcbrtentry(tp->t_inpcb);
 1840 #endif
 1841         }
 1842 #endif
 1843 #ifdef INET6
 1844         if (tp->t_in6pcb) {
 1845                 so = tp->t_in6pcb->in6p_socket;
 1846 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
 1847                 rt = in6_pcbrtentry(tp->t_in6pcb);
 1848 #endif
 1849         }
 1850 #endif
 1851 
 1852         /*
 1853          * As per RFC1122, use the default MSS value, unless they
 1854          * sent us an offer.  Do not accept offers less than 256 bytes.
 1855          */
 1856         mss = tcp_mssdflt;
 1857         if (offer)
 1858                 mss = offer;
 1859         mss = max(mss, 256);            /* sanity */
 1860         tp->t_peermss = mss;
 1861         mss -= tcp_optlen(tp);
 1862 #ifdef INET
 1863         if (tp->t_inpcb)
 1864                 mss -= ip_optlen(tp->t_inpcb);
 1865 #endif
 1866 #ifdef INET6
 1867         if (tp->t_in6pcb)
 1868                 mss -= ip6_optlen(tp->t_in6pcb);
 1869 #endif
 1870 
 1871         /*
 1872          * If there's a pipesize, change the socket buffer to that size.
 1873          * Make the socket buffer an integral number of MSS units.  If
 1874          * the MSS is larger than the socket buffer, artificially decrease
 1875          * the MSS.
 1876          */
 1877 #ifdef RTV_SPIPE
 1878         if (rt != NULL && rt->rt_rmx.rmx_sendpipe != 0)
 1879                 bufsize = rt->rt_rmx.rmx_sendpipe;
 1880         else
 1881 #endif
 1882                 bufsize = so->so_snd.sb_hiwat;
 1883         if (bufsize < mss)
 1884                 mss = bufsize;
 1885         else {
 1886                 bufsize = roundup(bufsize, mss);
 1887                 if (bufsize > sb_max)
 1888                         bufsize = sb_max;
 1889                 (void) sbreserve(&so->so_snd, bufsize);
 1890         }
 1891         tp->t_segsz = mss;
 1892 
 1893 #ifdef RTV_SSTHRESH
 1894         if (rt != NULL && rt->rt_rmx.rmx_ssthresh) {
 1895                 /*
 1896                  * There's some sort of gateway or interface buffer
 1897                  * limit on the path.  Use this to set the slow
 1898                  * start threshold, but set the threshold to no less
 1899                  * than 2 * MSS.
 1900                  */
 1901                 tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
 1902         }
 1903 #endif
 1904 }
 1905 
 1906 /*
 1907  * Processing necessary when a TCP connection is established.
 1908  */
 1909 void
 1910 tcp_established(tp)
 1911         struct tcpcb *tp;
 1912 {
 1913         struct socket *so;
 1914 #ifdef RTV_RPIPE
 1915         struct rtentry *rt;
 1916 #endif
 1917         u_long bufsize;
 1918 
 1919 #ifdef DIAGNOSTIC
 1920         if (tp->t_inpcb && tp->t_in6pcb)
 1921                 panic("tcp_established: both t_inpcb and t_in6pcb are set");
 1922 #endif
 1923         so = NULL;
 1924         rt = NULL;
 1925 #ifdef INET
 1926         if (tp->t_inpcb) {
 1927                 so = tp->t_inpcb->inp_socket;
 1928 #if defined(RTV_RPIPE)
 1929                 rt = in_pcbrtentry(tp->t_inpcb);
 1930 #endif
 1931         }
 1932 #endif
 1933 #ifdef INET6
 1934         if (tp->t_in6pcb) {
 1935                 so = tp->t_in6pcb->in6p_socket;
 1936 #if defined(RTV_RPIPE)
 1937                 rt = in6_pcbrtentry(tp->t_in6pcb);
 1938 #endif
 1939         }
 1940 #endif
 1941 
 1942         tp->t_state = TCPS_ESTABLISHED;
 1943         TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
 1944 
 1945 #ifdef RTV_RPIPE
 1946         if (rt != NULL && rt->rt_rmx.rmx_recvpipe != 0)
 1947                 bufsize = rt->rt_rmx.rmx_recvpipe;
 1948         else
 1949 #endif
 1950                 bufsize = so->so_rcv.sb_hiwat;
 1951         if (bufsize > tp->t_ourmss) {
 1952                 bufsize = roundup(bufsize, tp->t_ourmss);
 1953                 if (bufsize > sb_max)
 1954                         bufsize = sb_max;
 1955                 (void) sbreserve(&so->so_rcv, bufsize);
 1956         }
 1957 }
 1958 
 1959 /*
 1960  * Check if there's an initial rtt or rttvar.  Convert from the
 1961  * route-table units to scaled multiples of the slow timeout timer.
 1962  * Called only during the 3-way handshake.
 1963  */
 1964 void
 1965 tcp_rmx_rtt(tp)
 1966         struct tcpcb *tp;
 1967 {
 1968 #ifdef RTV_RTT
 1969         struct rtentry *rt = NULL;
 1970         int rtt;
 1971 
 1972 #ifdef DIAGNOSTIC
 1973         if (tp->t_inpcb && tp->t_in6pcb)
 1974                 panic("tcp_rmx_rtt: both t_inpcb and t_in6pcb are set");
 1975 #endif
 1976 #ifdef INET
 1977         if (tp->t_inpcb)
 1978                 rt = in_pcbrtentry(tp->t_inpcb);
 1979 #endif
 1980 #ifdef INET6
 1981         if (tp->t_in6pcb)
 1982                 rt = in6_pcbrtentry(tp->t_in6pcb);
 1983 #endif
 1984         if (rt == NULL)
 1985                 return;
 1986 
 1987         if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
 1988                 /*
 1989                  * XXX The lock bit for MTU indicates that the value
 1990                  * is also a minimum value; this is subject to time.
 1991                  */
 1992                 if (rt->rt_rmx.rmx_locks & RTV_RTT)
 1993                         TCPT_RANGESET(tp->t_rttmin,
 1994                             rtt / (RTM_RTTUNIT / PR_SLOWHZ),
 1995                             TCPTV_MIN, TCPTV_REXMTMAX);
 1996                 tp->t_srtt = rtt /
 1997                     ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2));
 1998                 if (rt->rt_rmx.rmx_rttvar) {
 1999                         tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
 2000                             ((RTM_RTTUNIT / PR_SLOWHZ) >>
 2001                                 (TCP_RTTVAR_SHIFT + 2));
 2002                 } else {
 2003                         /* Default variation is +- 1 rtt */
 2004                         tp->t_rttvar =
 2005                             tp->t_srtt >> (TCP_RTT_SHIFT - TCP_RTTVAR_SHIFT);
 2006                 }
 2007                 TCPT_RANGESET(tp->t_rxtcur,
 2008                     ((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + 2),
 2009                     tp->t_rttmin, TCPTV_REXMTMAX);
 2010         }
 2011 #endif
 2012 }
 2013 
 2014 tcp_seq  tcp_iss_seq = 0;       /* tcp initial seq # */
 2015 #if NRND > 0
 2016 u_int8_t tcp_iss_secret[16];    /* 128 bits; should be plenty */
 2017 #endif
 2018 
 2019 /*
 2020  * Get a new sequence value given a tcp control block
 2021  */
 2022 tcp_seq
 2023 tcp_new_iss(struct tcpcb *tp, tcp_seq addin)
 2024 {
 2025 
 2026 #ifdef INET
 2027         if (tp->t_inpcb != NULL) {
 2028                 return (tcp_new_iss1(&tp->t_inpcb->inp_laddr,
 2029                     &tp->t_inpcb->inp_faddr, tp->t_inpcb->inp_lport,
 2030                     tp->t_inpcb->inp_fport, sizeof(tp->t_inpcb->inp_laddr),
 2031                     addin));
 2032         }
 2033 #endif
 2034 #ifdef INET6
 2035         if (tp->t_in6pcb != NULL) {
 2036                 return (tcp_new_iss1(&tp->t_in6pcb->in6p_laddr,
 2037                     &tp->t_in6pcb->in6p_faddr, tp->t_in6pcb->in6p_lport,
 2038                     tp->t_in6pcb->in6p_fport, sizeof(tp->t_in6pcb->in6p_laddr),
 2039                     addin));
 2040         }
 2041 #endif
 2042         /* Not possible. */
 2043         panic("tcp_new_iss");
 2044 }
 2045 
 2046 /*
 2047  * This routine actually generates a new TCP initial sequence number.
 2048  */
 2049 tcp_seq
 2050 tcp_new_iss1(void *laddr, void *faddr, u_int16_t lport, u_int16_t fport,
 2051     size_t addrsz, tcp_seq addin)
 2052 {
 2053         tcp_seq tcp_iss;
 2054 
 2055 #if NRND > 0
 2056         static int beenhere;
 2057 
 2058         /*
 2059          * If we haven't been here before, initialize our cryptographic
 2060          * hash secret.
 2061          */
 2062         if (beenhere == 0) {
 2063                 rnd_extract_data(tcp_iss_secret, sizeof(tcp_iss_secret),
 2064                     RND_EXTRACT_ANY);
 2065                 beenhere = 1;
 2066         }
 2067 
 2068         if (tcp_do_rfc1948) {
 2069                 MD5_CTX ctx;
 2070                 u_int8_t hash[16];      /* XXX MD5 knowledge */
 2071 
 2072                 /*
 2073                  * Compute the base value of the ISS.  It is a hash
 2074                  * of (saddr, sport, daddr, dport, secret).
 2075                  */
 2076                 MD5Init(&ctx);
 2077 
 2078                 MD5Update(&ctx, (u_char *) laddr, addrsz);
 2079                 MD5Update(&ctx, (u_char *) &lport, sizeof(lport));
 2080 
 2081                 MD5Update(&ctx, (u_char *) faddr, addrsz);
 2082                 MD5Update(&ctx, (u_char *) &fport, sizeof(fport));
 2083 
 2084                 MD5Update(&ctx, tcp_iss_secret, sizeof(tcp_iss_secret));
 2085 
 2086                 MD5Final(hash, &ctx);
 2087 
 2088                 memcpy(&tcp_iss, hash, sizeof(tcp_iss));
 2089 
 2090                 /*
 2091                  * Now increment our "timer", and add it in to
 2092                  * the computed value.
 2093                  *
 2094                  * XXX Use `addin'?
 2095                  * XXX TCP_ISSINCR too large to use?
 2096                  */
 2097                 tcp_iss_seq += TCP_ISSINCR;
 2098 #ifdef TCPISS_DEBUG
 2099                 printf("ISS hash 0x%08x, ", tcp_iss);
 2100 #endif
 2101                 tcp_iss += tcp_iss_seq + addin;
 2102 #ifdef TCPISS_DEBUG
 2103                 printf("new ISS 0x%08x\n", tcp_iss);
 2104 #endif
 2105         } else
 2106 #endif /* NRND > 0 */
 2107         {
 2108                 /*
 2109                  * Randomize.
 2110                  */
 2111 #if NRND > 0
 2112                 rnd_extract_data(&tcp_iss, sizeof(tcp_iss), RND_EXTRACT_ANY);
 2113 #else
 2114                 tcp_iss = arc4random();
 2115 #endif
 2116 
 2117                 /*
 2118                  * If we were asked to add some amount to a known value,
 2119                  * we will take a random value obtained above, mask off
 2120                  * the upper bits, and add in the known value.  We also
 2121                  * add in a constant to ensure that we are at least a
 2122                  * certain distance from the original value.
 2123                  *
 2124                  * This is used when an old connection is in timed wait
 2125                  * and we have a new one coming in, for instance.
 2126                  */
 2127                 if (addin != 0) {
 2128 #ifdef TCPISS_DEBUG
 2129                         printf("Random %08x, ", tcp_iss);
 2130 #endif
 2131                         tcp_iss &= TCP_ISS_RANDOM_MASK;
 2132                         tcp_iss += addin + TCP_ISSINCR;
 2133 #ifdef TCPISS_DEBUG
 2134                         printf("Old ISS %08x, ISS %08x\n", addin, tcp_iss);
 2135 #endif
 2136                 } else {
 2137                         tcp_iss &= TCP_ISS_RANDOM_MASK;
 2138                         tcp_iss += tcp_iss_seq;
 2139                         tcp_iss_seq += TCP_ISSINCR;
 2140 #ifdef TCPISS_DEBUG
 2141                         printf("ISS %08x\n", tcp_iss);
 2142 #endif
 2143                 }
 2144         }
 2145 
 2146         if (tcp_compat_42) {
 2147                 /*
 2148                  * Limit it to the positive range for really old TCP
 2149                  * implementations.
 2150                  * Just AND off the top bit instead of checking if
 2151                  * is set first - saves a branch 50% of the time.
 2152                  */
 2153                 tcp_iss &= 0x7fffffff;          /* XXX */
 2154         }
 2155 
 2156         return (tcp_iss);
 2157 }
 2158 
 2159 #if defined(IPSEC) || defined(FAST_IPSEC)
 2160 /* compute ESP/AH header size for TCP, including outer IP header. */
 2161 size_t
 2162 ipsec4_hdrsiz_tcp(tp)
 2163         struct tcpcb *tp;
 2164 {
 2165         struct inpcb *inp;
 2166         size_t hdrsiz;
 2167 
 2168         /* XXX mapped addr case (tp->t_in6pcb) */
 2169         if (!tp || !tp->t_template || !(inp = tp->t_inpcb))
 2170                 return 0;
 2171         switch (tp->t_family) {
 2172         case AF_INET:
 2173                 /* XXX: should use currect direction. */
 2174                 hdrsiz = ipsec4_hdrsiz(tp->t_template, IPSEC_DIR_OUTBOUND, inp);
 2175                 break;
 2176         default:
 2177                 hdrsiz = 0;
 2178                 break;
 2179         }
 2180 
 2181         return hdrsiz;
 2182 }
 2183 
 2184 #ifdef INET6
 2185 size_t
 2186 ipsec6_hdrsiz_tcp(tp)
 2187         struct tcpcb *tp;
 2188 {
 2189         struct in6pcb *in6p;
 2190         size_t hdrsiz;
 2191 
 2192         if (!tp || !tp->t_template || !(in6p = tp->t_in6pcb))
 2193                 return 0;
 2194         switch (tp->t_family) {
 2195         case AF_INET6:
 2196                 /* XXX: should use currect direction. */
 2197                 hdrsiz = ipsec6_hdrsiz(tp->t_template, IPSEC_DIR_OUTBOUND, in6p);
 2198                 break;
 2199         case AF_INET:
 2200                 /* mapped address case - tricky */
 2201         default:
 2202                 hdrsiz = 0;
 2203                 break;
 2204         }
 2205 
 2206         return hdrsiz;
 2207 }
 2208 #endif
 2209 #endif /*IPSEC*/
 2210 
 2211 /*
 2212  * Determine the length of the TCP options for this connection.
 2213  *
 2214  * XXX:  What do we do for SACK, when we add that?  Just reserve
 2215  *       all of the space?  Otherwise we can't exactly be incrementing
 2216  *       cwnd by an amount that varies depending on the amount we last
 2217  *       had to SACK!
 2218  */
 2219 
 2220 u_int
 2221 tcp_optlen(tp)
 2222         struct tcpcb *tp;
 2223 {
 2224         if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
 2225             (TF_REQ_TSTMP | TF_RCVD_TSTMP))
 2226                 return TCPOLEN_TSTAMP_APPA;
 2227         else
 2228                 return 0;
 2229 }

Cache object: 19eb8554419c878dd56f46aecf04d689


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.