The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_subr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: tcp_subr.c,v 1.233 2008/10/13 19:44:21 pooka Exp $     */
    2 
    3 /*
    4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the project nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  */
   31 
   32 /*-
   33  * Copyright (c) 1997, 1998, 2000, 2001, 2008 The NetBSD Foundation, Inc.
   34  * All rights reserved.
   35  *
   36  * This code is derived from software contributed to The NetBSD Foundation
   37  * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
   38  * Facility, NASA Ames Research Center.
   39  *
   40  * Redistribution and use in source and binary forms, with or without
   41  * modification, are permitted provided that the following conditions
   42  * are met:
   43  * 1. Redistributions of source code must retain the above copyright
   44  *    notice, this list of conditions and the following disclaimer.
   45  * 2. Redistributions in binary form must reproduce the above copyright
   46  *    notice, this list of conditions and the following disclaimer in the
   47  *    documentation and/or other materials provided with the distribution.
   48  *
   49  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   50  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   51  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   52  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   53  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   59  * POSSIBILITY OF SUCH DAMAGE.
   60  */
   61 
   62 /*
   63  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
   64  *      The Regents of the University of California.  All rights reserved.
   65  *
   66  * Redistribution and use in source and binary forms, with or without
   67  * modification, are permitted provided that the following conditions
   68  * are met:
   69  * 1. Redistributions of source code must retain the above copyright
   70  *    notice, this list of conditions and the following disclaimer.
   71  * 2. Redistributions in binary form must reproduce the above copyright
   72  *    notice, this list of conditions and the following disclaimer in the
   73  *    documentation and/or other materials provided with the distribution.
   74  * 3. Neither the name of the University nor the names of its contributors
   75  *    may be used to endorse or promote products derived from this software
   76  *    without specific prior written permission.
   77  *
   78  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   79  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   80  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   81  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   82  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   83  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   84  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   86  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   87  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   88  * SUCH DAMAGE.
   89  *
   90  *      @(#)tcp_subr.c  8.2 (Berkeley) 5/24/95
   91  */
   92 
   93 #include <sys/cdefs.h>
   94 __KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.233 2008/10/13 19:44:21 pooka Exp $");
   95 
   96 #include "opt_inet.h"
   97 #include "opt_ipsec.h"
   98 #include "opt_tcp_compat_42.h"
   99 #include "opt_inet_csum.h"
  100 #include "opt_mbuftrace.h"
  101 #include "rnd.h"
  102 
  103 #include <sys/param.h>
  104 #include <sys/proc.h>
  105 #include <sys/systm.h>
  106 #include <sys/malloc.h>
  107 #include <sys/mbuf.h>
  108 #include <sys/socket.h>
  109 #include <sys/socketvar.h>
  110 #include <sys/protosw.h>
  111 #include <sys/errno.h>
  112 #include <sys/kernel.h>
  113 #include <sys/pool.h>
  114 #if NRND > 0
  115 #include <sys/md5.h>
  116 #include <sys/rnd.h>
  117 #endif
  118 
  119 #include <net/route.h>
  120 #include <net/if.h>
  121 
  122 #include <netinet/in.h>
  123 #include <netinet/in_systm.h>
  124 #include <netinet/ip.h>
  125 #include <netinet/in_pcb.h>
  126 #include <netinet/ip_var.h>
  127 #include <netinet/ip_icmp.h>
  128 
  129 #ifdef INET6
  130 #ifndef INET
  131 #include <netinet/in.h>
  132 #endif
  133 #include <netinet/ip6.h>
  134 #include <netinet6/in6_pcb.h>
  135 #include <netinet6/ip6_var.h>
  136 #include <netinet6/in6_var.h>
  137 #include <netinet6/ip6protosw.h>
  138 #include <netinet/icmp6.h>
  139 #include <netinet6/nd6.h>
  140 #endif
  141 
  142 #include <netinet/tcp.h>
  143 #include <netinet/tcp_fsm.h>
  144 #include <netinet/tcp_seq.h>
  145 #include <netinet/tcp_timer.h>
  146 #include <netinet/tcp_var.h>
  147 #include <netinet/tcp_private.h>
  148 #include <netinet/tcp_congctl.h>
  149 #include <netinet/tcpip.h>
  150 
  151 #ifdef IPSEC
  152 #include <netinet6/ipsec.h>
  153 #include <netkey/key.h>
  154 #endif /*IPSEC*/
  155 
  156 #ifdef FAST_IPSEC
  157 #include <netipsec/ipsec.h>
  158 #include <netipsec/xform.h>
  159 #ifdef INET6
  160 #include <netipsec/ipsec6.h>
  161 #endif
  162  #include <netipsec/key.h>
  163 #endif  /* FAST_IPSEC*/
  164 
  165 
  166 struct  inpcbtable tcbtable;    /* head of queue of active tcpcb's */
  167 u_int32_t tcp_now;              /* for RFC 1323 timestamps */
  168 
  169 percpu_t *tcpstat_percpu;
  170 
  171 /* patchable/settable parameters for tcp */
  172 int     tcp_mssdflt = TCP_MSS;
  173 int     tcp_minmss = TCP_MINMSS;
  174 int     tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
  175 int     tcp_do_rfc1323 = 1;     /* window scaling / timestamps (obsolete) */
  176 #if NRND > 0
  177 int     tcp_do_rfc1948 = 0;     /* ISS by cryptographic hash */
  178 #endif
  179 int     tcp_do_sack = 1;        /* selective acknowledgement */
  180 int     tcp_do_win_scale = 1;   /* RFC1323 window scaling */
  181 int     tcp_do_timestamps = 1;  /* RFC1323 timestamps */
  182 int     tcp_ack_on_push = 0;    /* set to enable immediate ACK-on-PUSH */
  183 int     tcp_do_ecn = 0;         /* Explicit Congestion Notification */
  184 #ifndef TCP_INIT_WIN
  185 #define TCP_INIT_WIN    0       /* initial slow start window */
  186 #endif
  187 #ifndef TCP_INIT_WIN_LOCAL
  188 #define TCP_INIT_WIN_LOCAL 4    /* initial slow start window for local nets */
  189 #endif
  190 int     tcp_init_win = TCP_INIT_WIN;
  191 int     tcp_init_win_local = TCP_INIT_WIN_LOCAL;
  192 int     tcp_mss_ifmtu = 0;
  193 #ifdef TCP_COMPAT_42
  194 int     tcp_compat_42 = 1;
  195 #else
  196 int     tcp_compat_42 = 0;
  197 #endif
  198 int     tcp_rst_ppslim = 100;   /* 100pps */
  199 int     tcp_ackdrop_ppslim = 100;       /* 100pps */
  200 int     tcp_do_loopback_cksum = 0;
  201 int     tcp_do_abc = 1;         /* RFC3465 Appropriate byte counting. */
  202 int     tcp_abc_aggressive = 1; /* 1: L=2*SMSS  0: L=1*SMSS */
  203 int     tcp_sack_tp_maxholes = 32;
  204 int     tcp_sack_globalmaxholes = 1024;
  205 int     tcp_sack_globalholes = 0;
  206 int     tcp_ecn_maxretries = 1;
  207 
  208 /* tcb hash */
  209 #ifndef TCBHASHSIZE
  210 #define TCBHASHSIZE     128
  211 #endif
  212 int     tcbhashsize = TCBHASHSIZE;
  213 
  214 /* syn hash parameters */
  215 #define TCP_SYN_HASH_SIZE       293
  216 #define TCP_SYN_BUCKET_SIZE     35
  217 int     tcp_syn_cache_size = TCP_SYN_HASH_SIZE;
  218 int     tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE;
  219 int     tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE;
  220 struct  syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE];
  221 
  222 int     tcp_freeq(struct tcpcb *);
  223 
  224 #ifdef INET
  225 void    tcp_mtudisc_callback(struct in_addr);
  226 #endif
  227 #ifdef INET6
  228 void    tcp6_mtudisc_callback(struct in6_addr *);
  229 #endif
  230 
  231 #ifdef INET6
  232 void    tcp6_mtudisc(struct in6pcb *, int);
  233 #endif
  234 
  235 static struct pool tcpcb_pool;
  236 
  237 #ifdef TCP_CSUM_COUNTERS
  238 #include <sys/device.h>
  239 
  240 #if defined(INET)
  241 struct evcnt tcp_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  242     NULL, "tcp", "hwcsum bad");
  243 struct evcnt tcp_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  244     NULL, "tcp", "hwcsum ok");
  245 struct evcnt tcp_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  246     NULL, "tcp", "hwcsum data");
  247 struct evcnt tcp_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  248     NULL, "tcp", "swcsum");
  249 
  250 EVCNT_ATTACH_STATIC(tcp_hwcsum_bad);
  251 EVCNT_ATTACH_STATIC(tcp_hwcsum_ok);
  252 EVCNT_ATTACH_STATIC(tcp_hwcsum_data);
  253 EVCNT_ATTACH_STATIC(tcp_swcsum);
  254 #endif /* defined(INET) */
  255 
  256 #if defined(INET6)
  257 struct evcnt tcp6_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  258     NULL, "tcp6", "hwcsum bad");
  259 struct evcnt tcp6_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  260     NULL, "tcp6", "hwcsum ok");
  261 struct evcnt tcp6_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  262     NULL, "tcp6", "hwcsum data");
  263 struct evcnt tcp6_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  264     NULL, "tcp6", "swcsum");
  265 
  266 EVCNT_ATTACH_STATIC(tcp6_hwcsum_bad);
  267 EVCNT_ATTACH_STATIC(tcp6_hwcsum_ok);
  268 EVCNT_ATTACH_STATIC(tcp6_hwcsum_data);
  269 EVCNT_ATTACH_STATIC(tcp6_swcsum);
  270 #endif /* defined(INET6) */
  271 #endif /* TCP_CSUM_COUNTERS */
  272 
  273 
  274 #ifdef TCP_OUTPUT_COUNTERS
  275 #include <sys/device.h>
  276 
  277 struct evcnt tcp_output_bigheader = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  278     NULL, "tcp", "output big header");
  279 struct evcnt tcp_output_predict_hit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  280     NULL, "tcp", "output predict hit");
  281 struct evcnt tcp_output_predict_miss = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  282     NULL, "tcp", "output predict miss");
  283 struct evcnt tcp_output_copysmall = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  284     NULL, "tcp", "output copy small");
  285 struct evcnt tcp_output_copybig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  286     NULL, "tcp", "output copy big");
  287 struct evcnt tcp_output_refbig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  288     NULL, "tcp", "output reference big");
  289 
  290 EVCNT_ATTACH_STATIC(tcp_output_bigheader);
  291 EVCNT_ATTACH_STATIC(tcp_output_predict_hit);
  292 EVCNT_ATTACH_STATIC(tcp_output_predict_miss);
  293 EVCNT_ATTACH_STATIC(tcp_output_copysmall);
  294 EVCNT_ATTACH_STATIC(tcp_output_copybig);
  295 EVCNT_ATTACH_STATIC(tcp_output_refbig);
  296 
  297 #endif /* TCP_OUTPUT_COUNTERS */
  298 
  299 #ifdef TCP_REASS_COUNTERS
  300 #include <sys/device.h>
  301 
  302 struct evcnt tcp_reass_ = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  303     NULL, "tcp_reass", "calls");
  304 struct evcnt tcp_reass_empty = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  305     &tcp_reass_, "tcp_reass", "insert into empty queue");
  306 struct evcnt tcp_reass_iteration[8] = {
  307     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", ">7 iterations"),
  308     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "1 iteration"),
  309     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "2 iterations"),
  310     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "3 iterations"),
  311     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "4 iterations"),
  312     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "5 iterations"),
  313     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "6 iterations"),
  314     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "7 iterations"),
  315 };
  316 struct evcnt tcp_reass_prependfirst = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  317     &tcp_reass_, "tcp_reass", "prepend to first");
  318 struct evcnt tcp_reass_prepend = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  319     &tcp_reass_, "tcp_reass", "prepend");
  320 struct evcnt tcp_reass_insert = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  321     &tcp_reass_, "tcp_reass", "insert");
  322 struct evcnt tcp_reass_inserttail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  323     &tcp_reass_, "tcp_reass", "insert at tail");
  324 struct evcnt tcp_reass_append = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  325     &tcp_reass_, "tcp_reass", "append");
  326 struct evcnt tcp_reass_appendtail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  327     &tcp_reass_, "tcp_reass", "append to tail fragment");
  328 struct evcnt tcp_reass_overlaptail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  329     &tcp_reass_, "tcp_reass", "overlap at end");
  330 struct evcnt tcp_reass_overlapfront = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  331     &tcp_reass_, "tcp_reass", "overlap at start");
  332 struct evcnt tcp_reass_segdup = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  333     &tcp_reass_, "tcp_reass", "duplicate segment");
  334 struct evcnt tcp_reass_fragdup = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
  335     &tcp_reass_, "tcp_reass", "duplicate fragment");
  336 
  337 EVCNT_ATTACH_STATIC(tcp_reass_);
  338 EVCNT_ATTACH_STATIC(tcp_reass_empty);
  339 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 0);
  340 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 1);
  341 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 2);
  342 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 3);
  343 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 4);
  344 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 5);
  345 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 6);
  346 EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 7);
  347 EVCNT_ATTACH_STATIC(tcp_reass_prependfirst);
  348 EVCNT_ATTACH_STATIC(tcp_reass_prepend);
  349 EVCNT_ATTACH_STATIC(tcp_reass_insert);
  350 EVCNT_ATTACH_STATIC(tcp_reass_inserttail);
  351 EVCNT_ATTACH_STATIC(tcp_reass_append);
  352 EVCNT_ATTACH_STATIC(tcp_reass_appendtail);
  353 EVCNT_ATTACH_STATIC(tcp_reass_overlaptail);
  354 EVCNT_ATTACH_STATIC(tcp_reass_overlapfront);
  355 EVCNT_ATTACH_STATIC(tcp_reass_segdup);
  356 EVCNT_ATTACH_STATIC(tcp_reass_fragdup);
  357 
  358 #endif /* TCP_REASS_COUNTERS */
  359 
  360 #ifdef MBUFTRACE
  361 struct mowner tcp_mowner = MOWNER_INIT("tcp", "");
  362 struct mowner tcp_rx_mowner = MOWNER_INIT("tcp", "rx");
  363 struct mowner tcp_tx_mowner = MOWNER_INIT("tcp", "tx");
  364 struct mowner tcp_sock_mowner = MOWNER_INIT("tcp", "sock");
  365 struct mowner tcp_sock_rx_mowner = MOWNER_INIT("tcp", "sock rx");
  366 struct mowner tcp_sock_tx_mowner = MOWNER_INIT("tcp", "sock tx");
  367 #endif
  368 
  369 /*
  370  * Tcp initialization
  371  */
  372 void
  373 tcp_init(void)
  374 {
  375         int hlen;
  376 
  377         in_pcbinit(&tcbtable, tcbhashsize, tcbhashsize);
  378         pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcbpl",
  379             NULL, IPL_SOFTNET);
  380 
  381         hlen = sizeof(struct ip) + sizeof(struct tcphdr);
  382 #ifdef INET6
  383         if (sizeof(struct ip) < sizeof(struct ip6_hdr))
  384                 hlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
  385 #endif
  386         if (max_protohdr < hlen)
  387                 max_protohdr = hlen;
  388         if (max_linkhdr + hlen > MHLEN)
  389                 panic("tcp_init");
  390 
  391 #ifdef INET
  392         icmp_mtudisc_callback_register(tcp_mtudisc_callback);
  393 #endif
  394 #ifdef INET6
  395         icmp6_mtudisc_callback_register(tcp6_mtudisc_callback);
  396 #endif
  397 
  398         /* Initialize timer state. */
  399         tcp_timer_init();
  400 
  401         /* Initialize the compressed state engine. */
  402         syn_cache_init();
  403 
  404         /* Initialize the congestion control algorithms. */
  405         tcp_congctl_init();
  406 
  407         /* Initialize the TCPCB template. */
  408         tcp_tcpcb_template();
  409 
  410         MOWNER_ATTACH(&tcp_tx_mowner);
  411         MOWNER_ATTACH(&tcp_rx_mowner);
  412         MOWNER_ATTACH(&tcp_reass_mowner);
  413         MOWNER_ATTACH(&tcp_sock_mowner);
  414         MOWNER_ATTACH(&tcp_sock_tx_mowner);
  415         MOWNER_ATTACH(&tcp_sock_rx_mowner);
  416         MOWNER_ATTACH(&tcp_mowner);
  417 
  418         tcpstat_percpu = percpu_alloc(sizeof(uint64_t) * TCP_NSTATS);
  419 }
  420 
  421 /*
  422  * Create template to be used to send tcp packets on a connection.
  423  * Call after host entry created, allocates an mbuf and fills
  424  * in a skeletal tcp/ip header, minimizing the amount of work
  425  * necessary when the connection is used.
  426  */
  427 struct mbuf *
  428 tcp_template(struct tcpcb *tp)
  429 {
  430         struct inpcb *inp = tp->t_inpcb;
  431 #ifdef INET6
  432         struct in6pcb *in6p = tp->t_in6pcb;
  433 #endif
  434         struct tcphdr *n;
  435         struct mbuf *m;
  436         int hlen;
  437 
  438         switch (tp->t_family) {
  439         case AF_INET:
  440                 hlen = sizeof(struct ip);
  441                 if (inp)
  442                         break;
  443 #ifdef INET6
  444                 if (in6p) {
  445                         /* mapped addr case */
  446                         if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)
  447                          && IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr))
  448                                 break;
  449                 }
  450 #endif
  451                 return NULL;    /*EINVAL*/
  452 #ifdef INET6
  453         case AF_INET6:
  454                 hlen = sizeof(struct ip6_hdr);
  455                 if (in6p) {
  456                         /* more sainty check? */
  457                         break;
  458                 }
  459                 return NULL;    /*EINVAL*/
  460 #endif
  461         default:
  462                 hlen = 0;       /*pacify gcc*/
  463                 return NULL;    /*EAFNOSUPPORT*/
  464         }
  465 #ifdef DIAGNOSTIC
  466         if (hlen + sizeof(struct tcphdr) > MCLBYTES)
  467                 panic("mclbytes too small for t_template");
  468 #endif
  469         m = tp->t_template;
  470         if (m && m->m_len == hlen + sizeof(struct tcphdr))
  471                 ;
  472         else {
  473                 if (m)
  474                         m_freem(m);
  475                 m = tp->t_template = NULL;
  476                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
  477                 if (m && hlen + sizeof(struct tcphdr) > MHLEN) {
  478                         MCLGET(m, M_DONTWAIT);
  479                         if ((m->m_flags & M_EXT) == 0) {
  480                                 m_free(m);
  481                                 m = NULL;
  482                         }
  483                 }
  484                 if (m == NULL)
  485                         return NULL;
  486                 MCLAIM(m, &tcp_mowner);
  487                 m->m_pkthdr.len = m->m_len = hlen + sizeof(struct tcphdr);
  488         }
  489 
  490         bzero(mtod(m, void *), m->m_len);
  491 
  492         n = (struct tcphdr *)(mtod(m, char *) + hlen);
  493 
  494         switch (tp->t_family) {
  495         case AF_INET:
  496             {
  497                 struct ipovly *ipov;
  498                 mtod(m, struct ip *)->ip_v = 4;
  499                 mtod(m, struct ip *)->ip_hl = hlen >> 2;
  500                 ipov = mtod(m, struct ipovly *);
  501                 ipov->ih_pr = IPPROTO_TCP;
  502                 ipov->ih_len = htons(sizeof(struct tcphdr));
  503                 if (inp) {
  504                         ipov->ih_src = inp->inp_laddr;
  505                         ipov->ih_dst = inp->inp_faddr;
  506                 }
  507 #ifdef INET6
  508                 else if (in6p) {
  509                         /* mapped addr case */
  510                         bcopy(&in6p->in6p_laddr.s6_addr32[3], &ipov->ih_src,
  511                                 sizeof(ipov->ih_src));
  512                         bcopy(&in6p->in6p_faddr.s6_addr32[3], &ipov->ih_dst,
  513                                 sizeof(ipov->ih_dst));
  514                 }
  515 #endif
  516                 /*
  517                  * Compute the pseudo-header portion of the checksum
  518                  * now.  We incrementally add in the TCP option and
  519                  * payload lengths later, and then compute the TCP
  520                  * checksum right before the packet is sent off onto
  521                  * the wire.
  522                  */
  523                 n->th_sum = in_cksum_phdr(ipov->ih_src.s_addr,
  524                     ipov->ih_dst.s_addr,
  525                     htons(sizeof(struct tcphdr) + IPPROTO_TCP));
  526                 break;
  527             }
  528 #ifdef INET6
  529         case AF_INET6:
  530             {
  531                 struct ip6_hdr *ip6;
  532                 mtod(m, struct ip *)->ip_v = 6;
  533                 ip6 = mtod(m, struct ip6_hdr *);
  534                 ip6->ip6_nxt = IPPROTO_TCP;
  535                 ip6->ip6_plen = htons(sizeof(struct tcphdr));
  536                 ip6->ip6_src = in6p->in6p_laddr;
  537                 ip6->ip6_dst = in6p->in6p_faddr;
  538                 ip6->ip6_flow = in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK;
  539                 if (ip6_auto_flowlabel) {
  540                         ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK;
  541                         ip6->ip6_flow |=
  542                             (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
  543                 }
  544                 ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
  545                 ip6->ip6_vfc |= IPV6_VERSION;
  546 
  547                 /*
  548                  * Compute the pseudo-header portion of the checksum
  549                  * now.  We incrementally add in the TCP option and
  550                  * payload lengths later, and then compute the TCP
  551                  * checksum right before the packet is sent off onto
  552                  * the wire.
  553                  */
  554                 n->th_sum = in6_cksum_phdr(&in6p->in6p_laddr,
  555                     &in6p->in6p_faddr, htonl(sizeof(struct tcphdr)),
  556                     htonl(IPPROTO_TCP));
  557                 break;
  558             }
  559 #endif
  560         }
  561         if (inp) {
  562                 n->th_sport = inp->inp_lport;
  563                 n->th_dport = inp->inp_fport;
  564         }
  565 #ifdef INET6
  566         else if (in6p) {
  567                 n->th_sport = in6p->in6p_lport;
  568                 n->th_dport = in6p->in6p_fport;
  569         }
  570 #endif
  571         n->th_seq = 0;
  572         n->th_ack = 0;
  573         n->th_x2 = 0;
  574         n->th_off = 5;
  575         n->th_flags = 0;
  576         n->th_win = 0;
  577         n->th_urp = 0;
  578         return (m);
  579 }
  580 
  581 /*
  582  * Send a single message to the TCP at address specified by
  583  * the given TCP/IP header.  If m == 0, then we make a copy
  584  * of the tcpiphdr at ti and send directly to the addressed host.
  585  * This is used to force keep alive messages out using the TCP
  586  * template for a connection tp->t_template.  If flags are given
  587  * then we send a message back to the TCP which originated the
  588  * segment ti, and discard the mbuf containing it and any other
  589  * attached mbufs.
  590  *
  591  * In any case the ack and sequence number of the transmitted
  592  * segment are as specified by the parameters.
  593  */
  594 int
  595 tcp_respond(struct tcpcb *tp, struct mbuf *template, struct mbuf *m,
  596     struct tcphdr *th0, tcp_seq ack, tcp_seq seq, int flags)
  597 {
  598 #ifdef INET6
  599         struct rtentry *rt;
  600 #endif
  601         struct route *ro;
  602         int error, tlen, win = 0;
  603         int hlen;
  604         struct ip *ip;
  605 #ifdef INET6
  606         struct ip6_hdr *ip6;
  607 #endif
  608         int family;     /* family on packet, not inpcb/in6pcb! */
  609         struct tcphdr *th;
  610         struct socket *so;
  611 
  612         if (tp != NULL && (flags & TH_RST) == 0) {
  613 #ifdef DIAGNOSTIC
  614                 if (tp->t_inpcb && tp->t_in6pcb)
  615                         panic("tcp_respond: both t_inpcb and t_in6pcb are set");
  616 #endif
  617 #ifdef INET
  618                 if (tp->t_inpcb)
  619                         win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
  620 #endif
  621 #ifdef INET6
  622                 if (tp->t_in6pcb)
  623                         win = sbspace(&tp->t_in6pcb->in6p_socket->so_rcv);
  624 #endif
  625         }
  626 
  627         th = NULL;      /* Quell uninitialized warning */
  628         ip = NULL;
  629 #ifdef INET6
  630         ip6 = NULL;
  631 #endif
  632         if (m == 0) {
  633                 if (!template)
  634                         return EINVAL;
  635 
  636                 /* get family information from template */
  637                 switch (mtod(template, struct ip *)->ip_v) {
  638                 case 4:
  639                         family = AF_INET;
  640                         hlen = sizeof(struct ip);
  641                         break;
  642 #ifdef INET6
  643                 case 6:
  644                         family = AF_INET6;
  645                         hlen = sizeof(struct ip6_hdr);
  646                         break;
  647 #endif
  648                 default:
  649                         return EAFNOSUPPORT;
  650                 }
  651 
  652                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
  653                 if (m) {
  654                         MCLAIM(m, &tcp_tx_mowner);
  655                         MCLGET(m, M_DONTWAIT);
  656                         if ((m->m_flags & M_EXT) == 0) {
  657                                 m_free(m);
  658                                 m = NULL;
  659                         }
  660                 }
  661                 if (m == NULL)
  662                         return (ENOBUFS);
  663 
  664                 if (tcp_compat_42)
  665                         tlen = 1;
  666                 else
  667                         tlen = 0;
  668 
  669                 m->m_data += max_linkhdr;
  670                 bcopy(mtod(template, void *), mtod(m, void *),
  671                         template->m_len);
  672                 switch (family) {
  673                 case AF_INET:
  674                         ip = mtod(m, struct ip *);
  675                         th = (struct tcphdr *)(ip + 1);
  676                         break;
  677 #ifdef INET6
  678                 case AF_INET6:
  679                         ip6 = mtod(m, struct ip6_hdr *);
  680                         th = (struct tcphdr *)(ip6 + 1);
  681                         break;
  682 #endif
  683 #if 0
  684                 default:
  685                         /* noone will visit here */
  686                         m_freem(m);
  687                         return EAFNOSUPPORT;
  688 #endif
  689                 }
  690                 flags = TH_ACK;
  691         } else {
  692 
  693                 if ((m->m_flags & M_PKTHDR) == 0) {
  694 #if 0
  695                         printf("non PKTHDR to tcp_respond\n");
  696 #endif
  697                         m_freem(m);
  698                         return EINVAL;
  699                 }
  700 #ifdef DIAGNOSTIC
  701                 if (!th0)
  702                         panic("th0 == NULL in tcp_respond");
  703 #endif
  704 
  705                 /* get family information from m */
  706                 switch (mtod(m, struct ip *)->ip_v) {
  707                 case 4:
  708                         family = AF_INET;
  709                         hlen = sizeof(struct ip);
  710                         ip = mtod(m, struct ip *);
  711                         break;
  712 #ifdef INET6
  713                 case 6:
  714                         family = AF_INET6;
  715                         hlen = sizeof(struct ip6_hdr);
  716                         ip6 = mtod(m, struct ip6_hdr *);
  717                         break;
  718 #endif
  719                 default:
  720                         m_freem(m);
  721                         return EAFNOSUPPORT;
  722                 }
  723                 /* clear h/w csum flags inherited from rx packet */
  724                 m->m_pkthdr.csum_flags = 0;
  725 
  726                 if ((flags & TH_SYN) == 0 || sizeof(*th0) > (th0->th_off << 2))
  727                         tlen = sizeof(*th0);
  728                 else
  729                         tlen = th0->th_off << 2;
  730 
  731                 if (m->m_len > hlen + tlen && (m->m_flags & M_EXT) == 0 &&
  732                     mtod(m, char *) + hlen == (char *)th0) {
  733                         m->m_len = hlen + tlen;
  734                         m_freem(m->m_next);
  735                         m->m_next = NULL;
  736                 } else {
  737                         struct mbuf *n;
  738 
  739 #ifdef DIAGNOSTIC
  740                         if (max_linkhdr + hlen + tlen > MCLBYTES) {
  741                                 m_freem(m);
  742                                 return EMSGSIZE;
  743                         }
  744 #endif
  745                         MGETHDR(n, M_DONTWAIT, MT_HEADER);
  746                         if (n && max_linkhdr + hlen + tlen > MHLEN) {
  747                                 MCLGET(n, M_DONTWAIT);
  748                                 if ((n->m_flags & M_EXT) == 0) {
  749                                         m_freem(n);
  750                                         n = NULL;
  751                                 }
  752                         }
  753                         if (!n) {
  754                                 m_freem(m);
  755                                 return ENOBUFS;
  756                         }
  757 
  758                         MCLAIM(n, &tcp_tx_mowner);
  759                         n->m_data += max_linkhdr;
  760                         n->m_len = hlen + tlen;
  761                         m_copyback(n, 0, hlen, mtod(m, void *));
  762                         m_copyback(n, hlen, tlen, (void *)th0);
  763 
  764                         m_freem(m);
  765                         m = n;
  766                         n = NULL;
  767                 }
  768 
  769 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
  770                 switch (family) {
  771                 case AF_INET:
  772                         ip = mtod(m, struct ip *);
  773                         th = (struct tcphdr *)(ip + 1);
  774                         ip->ip_p = IPPROTO_TCP;
  775                         xchg(ip->ip_dst, ip->ip_src, struct in_addr);
  776                         ip->ip_p = IPPROTO_TCP;
  777                         break;
  778 #ifdef INET6
  779                 case AF_INET6:
  780                         ip6 = mtod(m, struct ip6_hdr *);
  781                         th = (struct tcphdr *)(ip6 + 1);
  782                         ip6->ip6_nxt = IPPROTO_TCP;
  783                         xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
  784                         ip6->ip6_nxt = IPPROTO_TCP;
  785                         break;
  786 #endif
  787 #if 0
  788                 default:
  789                         /* noone will visit here */
  790                         m_freem(m);
  791                         return EAFNOSUPPORT;
  792 #endif
  793                 }
  794                 xchg(th->th_dport, th->th_sport, u_int16_t);
  795 #undef xchg
  796                 tlen = 0;       /*be friendly with the following code*/
  797         }
  798         th->th_seq = htonl(seq);
  799         th->th_ack = htonl(ack);
  800         th->th_x2 = 0;
  801         if ((flags & TH_SYN) == 0) {
  802                 if (tp)
  803                         win >>= tp->rcv_scale;
  804                 if (win > TCP_MAXWIN)
  805                         win = TCP_MAXWIN;
  806                 th->th_win = htons((u_int16_t)win);
  807                 th->th_off = sizeof (struct tcphdr) >> 2;
  808                 tlen += sizeof(*th);
  809         } else
  810                 tlen += th->th_off << 2;
  811         m->m_len = hlen + tlen;
  812         m->m_pkthdr.len = hlen + tlen;
  813         m->m_pkthdr.rcvif = (struct ifnet *) 0;
  814         th->th_flags = flags;
  815         th->th_urp = 0;
  816 
  817         switch (family) {
  818 #ifdef INET
  819         case AF_INET:
  820             {
  821                 struct ipovly *ipov = (struct ipovly *)ip;
  822                 bzero(ipov->ih_x1, sizeof ipov->ih_x1);
  823                 ipov->ih_len = htons((u_int16_t)tlen);
  824 
  825                 th->th_sum = 0;
  826                 th->th_sum = in_cksum(m, hlen + tlen);
  827                 ip->ip_len = htons(hlen + tlen);
  828                 ip->ip_ttl = ip_defttl;
  829                 break;
  830             }
  831 #endif
  832 #ifdef INET6
  833         case AF_INET6:
  834             {
  835                 th->th_sum = 0;
  836                 th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
  837                                 tlen);
  838                 ip6->ip6_plen = htons(tlen);
  839                 if (tp && tp->t_in6pcb) {
  840                         struct ifnet *oifp;
  841                         ro = &tp->t_in6pcb->in6p_route;
  842                         oifp = (rt = rtcache_validate(ro)) != NULL ? rt->rt_ifp
  843                                                                    : NULL;
  844                         ip6->ip6_hlim = in6_selecthlim(tp->t_in6pcb, oifp);
  845                 } else
  846                         ip6->ip6_hlim = ip6_defhlim;
  847                 ip6->ip6_flow &= ~IPV6_FLOWINFO_MASK;
  848                 if (ip6_auto_flowlabel) {
  849                         ip6->ip6_flow |=
  850                             (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
  851                 }
  852                 break;
  853             }
  854 #endif
  855         }
  856 
  857         if (tp && tp->t_inpcb)
  858                 so = tp->t_inpcb->inp_socket;
  859 #ifdef INET6
  860         else if (tp && tp->t_in6pcb)
  861                 so = tp->t_in6pcb->in6p_socket;
  862 #endif
  863         else
  864                 so = NULL;
  865 
  866         if (tp != NULL && tp->t_inpcb != NULL) {
  867                 ro = &tp->t_inpcb->inp_route;
  868 #ifdef DIAGNOSTIC
  869                 if (family != AF_INET)
  870                         panic("tcp_respond: address family mismatch");
  871                 if (!in_hosteq(ip->ip_dst, tp->t_inpcb->inp_faddr)) {
  872                         panic("tcp_respond: ip_dst %x != inp_faddr %x",
  873                             ntohl(ip->ip_dst.s_addr),
  874                             ntohl(tp->t_inpcb->inp_faddr.s_addr));
  875                 }
  876 #endif
  877         }
  878 #ifdef INET6
  879         else if (tp != NULL && tp->t_in6pcb != NULL) {
  880                 ro = (struct route *)&tp->t_in6pcb->in6p_route;
  881 #ifdef DIAGNOSTIC
  882                 if (family == AF_INET) {
  883                         if (!IN6_IS_ADDR_V4MAPPED(&tp->t_in6pcb->in6p_faddr))
  884                                 panic("tcp_respond: not mapped addr");
  885                         if (bcmp(&ip->ip_dst,
  886                             &tp->t_in6pcb->in6p_faddr.s6_addr32[3],
  887                             sizeof(ip->ip_dst)) != 0) {
  888                                 panic("tcp_respond: ip_dst != in6p_faddr");
  889                         }
  890                 } else if (family == AF_INET6) {
  891                         if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
  892                             &tp->t_in6pcb->in6p_faddr))
  893                                 panic("tcp_respond: ip6_dst != in6p_faddr");
  894                 } else
  895                         panic("tcp_respond: address family mismatch");
  896 #endif
  897         }
  898 #endif
  899         else
  900                 ro = NULL;
  901 
  902         switch (family) {
  903 #ifdef INET
  904         case AF_INET:
  905                 error = ip_output(m, NULL, ro,
  906                     (tp && tp->t_mtudisc ? IP_MTUDISC : 0),
  907                     (struct ip_moptions *)0, so);
  908                 break;
  909 #endif
  910 #ifdef INET6
  911         case AF_INET6:
  912                 error = ip6_output(m, NULL, ro, 0, NULL, so, NULL);
  913                 break;
  914 #endif
  915         default:
  916                 error = EAFNOSUPPORT;
  917                 break;
  918         }
  919 
  920         return (error);
  921 }
  922 
  923 /*
  924  * Template TCPCB.  Rather than zeroing a new TCPCB and initializing
  925  * a bunch of members individually, we maintain this template for the
  926  * static and mostly-static components of the TCPCB, and copy it into
  927  * the new TCPCB instead.
  928  */
  929 static struct tcpcb tcpcb_template = {
  930         .t_srtt = TCPTV_SRTTBASE,
  931         .t_rttmin = TCPTV_MIN,
  932 
  933         .snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT,
  934         .snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT,
  935         .snd_numholes = 0,
  936 
  937         .t_partialacks = -1,
  938         .t_bytes_acked = 0,
  939 };
  940 
  941 /*
  942  * Updates the TCPCB template whenever a parameter that would affect
  943  * the template is changed.
  944  */
  945 void
  946 tcp_tcpcb_template(void)
  947 {
  948         struct tcpcb *tp = &tcpcb_template;
  949         int flags;
  950 
  951         tp->t_peermss = tcp_mssdflt;
  952         tp->t_ourmss = tcp_mssdflt;
  953         tp->t_segsz = tcp_mssdflt;
  954 
  955         flags = 0;
  956         if (tcp_do_rfc1323 && tcp_do_win_scale)
  957                 flags |= TF_REQ_SCALE;
  958         if (tcp_do_rfc1323 && tcp_do_timestamps)
  959                 flags |= TF_REQ_TSTMP;
  960         tp->t_flags = flags;
  961 
  962         /*
  963          * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
  964          * rtt estimate.  Set rttvar so that srtt + 2 * rttvar gives
  965          * reasonable initial retransmit time.
  966          */
  967         tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << (TCP_RTTVAR_SHIFT + 2 - 1);
  968         TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
  969             TCPTV_MIN, TCPTV_REXMTMAX);
  970 
  971         /* Keep Alive */
  972         tp->t_keepinit = tcp_keepinit;
  973         tp->t_keepidle = tcp_keepidle;
  974         tp->t_keepintvl = tcp_keepintvl;
  975         tp->t_keepcnt = tcp_keepcnt;
  976         tp->t_maxidle = tp->t_keepcnt * tp->t_keepintvl;
  977 }
  978 
  979 /*
  980  * Create a new TCP control block, making an
  981  * empty reassembly queue and hooking it to the argument
  982  * protocol control block.
  983  */
  984 /* family selects inpcb, or in6pcb */
  985 struct tcpcb *
  986 tcp_newtcpcb(int family, void *aux)
  987 {
  988 #ifdef INET6
  989         struct rtentry *rt;
  990 #endif
  991         struct tcpcb *tp;
  992         int i;
  993 
  994         /* XXX Consider using a pool_cache for speed. */
  995         tp = pool_get(&tcpcb_pool, PR_NOWAIT);  /* splsoftnet via tcp_usrreq */
  996         if (tp == NULL)
  997                 return (NULL);
  998         memcpy(tp, &tcpcb_template, sizeof(*tp));
  999         TAILQ_INIT(&tp->segq);
 1000         TAILQ_INIT(&tp->timeq);
 1001         tp->t_family = family;          /* may be overridden later on */
 1002         TAILQ_INIT(&tp->snd_holes);
 1003         LIST_INIT(&tp->t_sc);           /* XXX can template this */
 1004 
 1005         /* Don't sweat this loop; hopefully the compiler will unroll it. */
 1006         for (i = 0; i < TCPT_NTIMERS; i++) {
 1007                 callout_init(&tp->t_timer[i], CALLOUT_MPSAFE);
 1008                 TCP_TIMER_INIT(tp, i);
 1009         }
 1010         callout_init(&tp->t_delack_ch, CALLOUT_MPSAFE);
 1011 
 1012         switch (family) {
 1013         case AF_INET:
 1014             {
 1015                 struct inpcb *inp = (struct inpcb *)aux;
 1016 
 1017                 inp->inp_ip.ip_ttl = ip_defttl;
 1018                 inp->inp_ppcb = (void *)tp;
 1019 
 1020                 tp->t_inpcb = inp;
 1021                 tp->t_mtudisc = ip_mtudisc;
 1022                 break;
 1023             }
 1024 #ifdef INET6
 1025         case AF_INET6:
 1026             {
 1027                 struct in6pcb *in6p = (struct in6pcb *)aux;
 1028 
 1029                 in6p->in6p_ip6.ip6_hlim = in6_selecthlim(in6p,
 1030                         (rt = rtcache_validate(&in6p->in6p_route)) != NULL
 1031                             ? rt->rt_ifp
 1032                             : NULL);
 1033                 in6p->in6p_ppcb = (void *)tp;
 1034 
 1035                 tp->t_in6pcb = in6p;
 1036                 /* for IPv6, always try to run path MTU discovery */
 1037                 tp->t_mtudisc = 1;
 1038                 break;
 1039             }
 1040 #endif /* INET6 */
 1041         default:
 1042                 for (i = 0; i < TCPT_NTIMERS; i++)
 1043                         callout_destroy(&tp->t_timer[i]);
 1044                 callout_destroy(&tp->t_delack_ch);
 1045                 pool_put(&tcpcb_pool, tp);      /* splsoftnet via tcp_usrreq */
 1046                 return (NULL);
 1047         }
 1048 
 1049         /*
 1050          * Initialize our timebase.  When we send timestamps, we take
 1051          * the delta from tcp_now -- this means each connection always
 1052          * gets a timebase of 1, which makes it, among other things,
 1053          * more difficult to determine how long a system has been up,
 1054          * and thus how many TCP sequence increments have occurred.
 1055          *
 1056          * We start with 1, because 0 doesn't work with linux, which
 1057          * considers timestamp 0 in a SYN packet as a bug and disables
 1058          * timestamps.
 1059          */
 1060         tp->ts_timebase = tcp_now - 1;
 1061         
 1062         tcp_congctl_select(tp, tcp_congctl_global_name);
 1063 
 1064         return (tp);
 1065 }
 1066 
 1067 /*
 1068  * Drop a TCP connection, reporting
 1069  * the specified error.  If connection is synchronized,
 1070  * then send a RST to peer.
 1071  */
 1072 struct tcpcb *
 1073 tcp_drop(struct tcpcb *tp, int errno)
 1074 {
 1075         struct socket *so = NULL;
 1076 
 1077 #ifdef DIAGNOSTIC
 1078         if (tp->t_inpcb && tp->t_in6pcb)
 1079                 panic("tcp_drop: both t_inpcb and t_in6pcb are set");
 1080 #endif
 1081 #ifdef INET
 1082         if (tp->t_inpcb)
 1083                 so = tp->t_inpcb->inp_socket;
 1084 #endif
 1085 #ifdef INET6
 1086         if (tp->t_in6pcb)
 1087                 so = tp->t_in6pcb->in6p_socket;
 1088 #endif
 1089         if (!so)
 1090                 return NULL;
 1091 
 1092         if (TCPS_HAVERCVDSYN(tp->t_state)) {
 1093                 tp->t_state = TCPS_CLOSED;
 1094                 (void) tcp_output(tp);
 1095                 TCP_STATINC(TCP_STAT_DROPS);
 1096         } else
 1097                 TCP_STATINC(TCP_STAT_CONNDROPS);
 1098         if (errno == ETIMEDOUT && tp->t_softerror)
 1099                 errno = tp->t_softerror;
 1100         so->so_error = errno;
 1101         return (tcp_close(tp));
 1102 }
 1103 
 1104 /*
 1105  * Close a TCP control block:
 1106  *      discard all space held by the tcp
 1107  *      discard internet protocol block
 1108  *      wake up any sleepers
 1109  */
 1110 struct tcpcb *
 1111 tcp_close(struct tcpcb *tp)
 1112 {
 1113         struct inpcb *inp;
 1114 #ifdef INET6
 1115         struct in6pcb *in6p;
 1116 #endif
 1117         struct socket *so;
 1118 #ifdef RTV_RTT
 1119         struct rtentry *rt;
 1120 #endif
 1121         struct route *ro;
 1122         int j;
 1123 
 1124         inp = tp->t_inpcb;
 1125 #ifdef INET6
 1126         in6p = tp->t_in6pcb;
 1127 #endif
 1128         so = NULL;
 1129         ro = NULL;
 1130         if (inp) {
 1131                 so = inp->inp_socket;
 1132                 ro = &inp->inp_route;
 1133         }
 1134 #ifdef INET6
 1135         else if (in6p) {
 1136                 so = in6p->in6p_socket;
 1137                 ro = (struct route *)&in6p->in6p_route;
 1138         }
 1139 #endif
 1140 
 1141 #ifdef RTV_RTT
 1142         /*
 1143          * If we sent enough data to get some meaningful characteristics,
 1144          * save them in the routing entry.  'Enough' is arbitrarily
 1145          * defined as the sendpipesize (default 4K) * 16.  This would
 1146          * give us 16 rtt samples assuming we only get one sample per
 1147          * window (the usual case on a long haul net).  16 samples is
 1148          * enough for the srtt filter to converge to within 5% of the correct
 1149          * value; fewer samples and we could save a very bogus rtt.
 1150          *
 1151          * Don't update the default route's characteristics and don't
 1152          * update anything that the user "locked".
 1153          */
 1154         if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
 1155             ro && (rt = rtcache_validate(ro)) != NULL &&
 1156             !in_nullhost(satocsin(rt_getkey(rt))->sin_addr)) {
 1157                 u_long i = 0;
 1158 
 1159                 if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
 1160                         i = tp->t_srtt *
 1161                             ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2));
 1162                         if (rt->rt_rmx.rmx_rtt && i)
 1163                                 /*
 1164                                  * filter this update to half the old & half
 1165                                  * the new values, converting scale.
 1166                                  * See route.h and tcp_var.h for a
 1167                                  * description of the scaling constants.
 1168                                  */
 1169                                 rt->rt_rmx.rmx_rtt =
 1170                                     (rt->rt_rmx.rmx_rtt + i) / 2;
 1171                         else
 1172                                 rt->rt_rmx.rmx_rtt = i;
 1173                 }
 1174                 if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
 1175                         i = tp->t_rttvar *
 1176                             ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTTVAR_SHIFT + 2));
 1177                         if (rt->rt_rmx.rmx_rttvar && i)
 1178                                 rt->rt_rmx.rmx_rttvar =
 1179                                     (rt->rt_rmx.rmx_rttvar + i) / 2;
 1180                         else
 1181                                 rt->rt_rmx.rmx_rttvar = i;
 1182                 }
 1183                 /*
 1184                  * update the pipelimit (ssthresh) if it has been updated
 1185                  * already or if a pipesize was specified & the threshhold
 1186                  * got below half the pipesize.  I.e., wait for bad news
 1187                  * before we start updating, then update on both good
 1188                  * and bad news.
 1189                  */
 1190                 if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
 1191                     (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh) ||
 1192                     i < (rt->rt_rmx.rmx_sendpipe / 2)) {
 1193                         /*
 1194                          * convert the limit from user data bytes to
 1195                          * packets then to packet data bytes.
 1196                          */
 1197                         i = (i + tp->t_segsz / 2) / tp->t_segsz;
 1198                         if (i < 2)
 1199                                 i = 2;
 1200                         i *= (u_long)(tp->t_segsz + sizeof (struct tcpiphdr));
 1201                         if (rt->rt_rmx.rmx_ssthresh)
 1202                                 rt->rt_rmx.rmx_ssthresh =
 1203                                     (rt->rt_rmx.rmx_ssthresh + i) / 2;
 1204                         else
 1205                                 rt->rt_rmx.rmx_ssthresh = i;
 1206                 }
 1207         }
 1208 #endif /* RTV_RTT */
 1209         /* free the reassembly queue, if any */
 1210         TCP_REASS_LOCK(tp);
 1211         (void) tcp_freeq(tp);
 1212         TCP_REASS_UNLOCK(tp);
 1213 
 1214         /* free the SACK holes list. */
 1215         tcp_free_sackholes(tp); 
 1216         tcp_congctl_release(tp);
 1217         syn_cache_cleanup(tp);
 1218 
 1219         if (tp->t_template) {
 1220                 m_free(tp->t_template);
 1221                 tp->t_template = NULL;
 1222         }
 1223 
 1224         /*
 1225          * Detaching the pcb will unlock the socket/tcpcb, and stopping
 1226          * the timers can also drop the lock.  We need to prevent access
 1227          * to the tcpcb as it's half torn down.  Flag the pcb as dead
 1228          * (prevents access by timers) and only then detach it.
 1229          */
 1230         tp->t_flags |= TF_DEAD;
 1231         if (inp) {
 1232                 inp->inp_ppcb = 0;
 1233                 soisdisconnected(so);
 1234                 in_pcbdetach(inp);
 1235         }
 1236 #ifdef INET6
 1237         else if (in6p) {
 1238                 in6p->in6p_ppcb = 0;
 1239                 soisdisconnected(so);
 1240                 in6_pcbdetach(in6p);
 1241         }
 1242 #endif
 1243         /*
 1244          * pcb is no longer visble elsewhere, so we can safely release
 1245          * the lock in callout_halt() if needed.
 1246          */
 1247         TCP_STATINC(TCP_STAT_CLOSED);
 1248         for (j = 0; j < TCPT_NTIMERS; j++) {
 1249                 callout_halt(&tp->t_timer[j], softnet_lock);
 1250                 callout_destroy(&tp->t_timer[j]);
 1251         }
 1252         callout_halt(&tp->t_delack_ch, softnet_lock);
 1253         callout_destroy(&tp->t_delack_ch);
 1254         pool_put(&tcpcb_pool, tp);
 1255 
 1256         return ((struct tcpcb *)0);
 1257 }
 1258 
 1259 int
 1260 tcp_freeq(struct tcpcb *tp)
 1261 {
 1262         struct ipqent *qe;
 1263         int rv = 0;
 1264 #ifdef TCPREASS_DEBUG
 1265         int i = 0;
 1266 #endif
 1267 
 1268         TCP_REASS_LOCK_CHECK(tp);
 1269 
 1270         while ((qe = TAILQ_FIRST(&tp->segq)) != NULL) {
 1271 #ifdef TCPREASS_DEBUG
 1272                 printf("tcp_freeq[%p,%d]: %u:%u(%u) 0x%02x\n",
 1273                         tp, i++, qe->ipqe_seq, qe->ipqe_seq + qe->ipqe_len,
 1274                         qe->ipqe_len, qe->ipqe_flags & (TH_SYN|TH_FIN|TH_RST));
 1275 #endif
 1276                 TAILQ_REMOVE(&tp->segq, qe, ipqe_q);
 1277                 TAILQ_REMOVE(&tp->timeq, qe, ipqe_timeq);
 1278                 m_freem(qe->ipqe_m);
 1279                 tcpipqent_free(qe);
 1280                 rv = 1;
 1281         }
 1282         tp->t_segqlen = 0;
 1283         KASSERT(TAILQ_EMPTY(&tp->timeq));
 1284         return (rv);
 1285 }
 1286 
 1287 /*
 1288  * Protocol drain routine.  Called when memory is in short supply.
 1289  * Don't acquire softnet_lock as can be called from hardware
 1290  * interrupt handler.
 1291  */
 1292 void
 1293 tcp_drain(void)
 1294 {
 1295         struct inpcb_hdr *inph;
 1296         struct tcpcb *tp;
 1297 
 1298         KERNEL_LOCK(1, NULL);
 1299 
 1300         /*
 1301          * Free the sequence queue of all TCP connections.
 1302          */
 1303         CIRCLEQ_FOREACH(inph, &tcbtable.inpt_queue, inph_queue) {
 1304                 switch (inph->inph_af) {
 1305                 case AF_INET:
 1306                         tp = intotcpcb((struct inpcb *)inph);
 1307                         break;
 1308 #ifdef INET6
 1309                 case AF_INET6:
 1310                         tp = in6totcpcb((struct in6pcb *)inph);
 1311                         break;
 1312 #endif
 1313                 default:
 1314                         tp = NULL;
 1315                         break;
 1316                 }
 1317                 if (tp != NULL) {
 1318                         /*
 1319                          * We may be called from a device's interrupt
 1320                          * context.  If the tcpcb is already busy,
 1321                          * just bail out now.
 1322                          */
 1323                         if (tcp_reass_lock_try(tp) == 0)
 1324                                 continue;
 1325                         if (tcp_freeq(tp))
 1326                                 TCP_STATINC(TCP_STAT_CONNSDRAINED);
 1327                         TCP_REASS_UNLOCK(tp);
 1328                 }
 1329         }
 1330 
 1331         KERNEL_UNLOCK_ONE(NULL);
 1332 }
 1333 
 1334 /*
 1335  * Notify a tcp user of an asynchronous error;
 1336  * store error as soft error, but wake up user
 1337  * (for now, won't do anything until can select for soft error).
 1338  */
 1339 void
 1340 tcp_notify(struct inpcb *inp, int error)
 1341 {
 1342         struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
 1343         struct socket *so = inp->inp_socket;
 1344 
 1345         /*
 1346          * Ignore some errors if we are hooked up.
 1347          * If connection hasn't completed, has retransmitted several times,
 1348          * and receives a second error, give up now.  This is better
 1349          * than waiting a long time to establish a connection that
 1350          * can never complete.
 1351          */
 1352         if (tp->t_state == TCPS_ESTABLISHED &&
 1353              (error == EHOSTUNREACH || error == ENETUNREACH ||
 1354               error == EHOSTDOWN)) {
 1355                 return;
 1356         } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 &&
 1357             tp->t_rxtshift > 3 && tp->t_softerror)
 1358                 so->so_error = error;
 1359         else
 1360                 tp->t_softerror = error;
 1361         cv_broadcast(&so->so_cv);
 1362         sorwakeup(so);
 1363         sowwakeup(so);
 1364 }
 1365 
 1366 #ifdef INET6
 1367 void
 1368 tcp6_notify(struct in6pcb *in6p, int error)
 1369 {
 1370         struct tcpcb *tp = (struct tcpcb *)in6p->in6p_ppcb;
 1371         struct socket *so = in6p->in6p_socket;
 1372 
 1373         /*
 1374          * Ignore some errors if we are hooked up.
 1375          * If connection hasn't completed, has retransmitted several times,
 1376          * and receives a second error, give up now.  This is better
 1377          * than waiting a long time to establish a connection that
 1378          * can never complete.
 1379          */
 1380         if (tp->t_state == TCPS_ESTABLISHED &&
 1381              (error == EHOSTUNREACH || error == ENETUNREACH ||
 1382               error == EHOSTDOWN)) {
 1383                 return;
 1384         } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 &&
 1385             tp->t_rxtshift > 3 && tp->t_softerror)
 1386                 so->so_error = error;
 1387         else
 1388                 tp->t_softerror = error;
 1389         cv_broadcast(&so->so_cv);
 1390         sorwakeup(so);
 1391         sowwakeup(so);
 1392 }
 1393 #endif
 1394 
 1395 #ifdef INET6
 1396 void *
 1397 tcp6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
 1398 {
 1399         struct tcphdr th;
 1400         void (*notify)(struct in6pcb *, int) = tcp6_notify;
 1401         int nmatch;
 1402         struct ip6_hdr *ip6;
 1403         const struct sockaddr_in6 *sa6_src = NULL;
 1404         const struct sockaddr_in6 *sa6 = (const struct sockaddr_in6 *)sa;
 1405         struct mbuf *m;
 1406         int off;
 1407 
 1408         if (sa->sa_family != AF_INET6 ||
 1409             sa->sa_len != sizeof(struct sockaddr_in6))
 1410                 return NULL;
 1411         if ((unsigned)cmd >= PRC_NCMDS)
 1412                 return NULL;
 1413         else if (cmd == PRC_QUENCH) {
 1414                 /* 
 1415                  * Don't honor ICMP Source Quench messages meant for
 1416                  * TCP connections.
 1417                  */
 1418                 return NULL;
 1419         } else if (PRC_IS_REDIRECT(cmd))
 1420                 notify = in6_rtchange, d = NULL;
 1421         else if (cmd == PRC_MSGSIZE)
 1422                 ; /* special code is present, see below */
 1423         else if (cmd == PRC_HOSTDEAD)
 1424                 d = NULL;
 1425         else if (inet6ctlerrmap[cmd] == 0)
 1426                 return NULL;
 1427 
 1428         /* if the parameter is from icmp6, decode it. */
 1429         if (d != NULL) {
 1430                 struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
 1431                 m = ip6cp->ip6c_m;
 1432                 ip6 = ip6cp->ip6c_ip6;
 1433                 off = ip6cp->ip6c_off;
 1434                 sa6_src = ip6cp->ip6c_src;
 1435         } else {
 1436                 m = NULL;
 1437                 ip6 = NULL;
 1438                 sa6_src = &sa6_any;
 1439                 off = 0;
 1440         }
 1441 
 1442         if (ip6) {
 1443                 /*
 1444                  * XXX: We assume that when ip6 is non NULL,
 1445                  * M and OFF are valid.
 1446                  */
 1447 
 1448                 /* check if we can safely examine src and dst ports */
 1449                 if (m->m_pkthdr.len < off + sizeof(th)) {
 1450                         if (cmd == PRC_MSGSIZE)
 1451                                 icmp6_mtudisc_update((struct ip6ctlparam *)d, 0);
 1452                         return NULL;
 1453                 }
 1454 
 1455                 bzero(&th, sizeof(th));
 1456                 m_copydata(m, off, sizeof(th), (void *)&th);
 1457 
 1458                 if (cmd == PRC_MSGSIZE) {
 1459                         int valid = 0;
 1460 
 1461                         /*
 1462                          * Check to see if we have a valid TCP connection
 1463                          * corresponding to the address in the ICMPv6 message
 1464                          * payload.
 1465                          */
 1466                         if (in6_pcblookup_connect(&tcbtable, &sa6->sin6_addr,
 1467                             th.th_dport,
 1468                             (const struct in6_addr *)&sa6_src->sin6_addr,
 1469                             th.th_sport, 0))
 1470                                 valid++;
 1471 
 1472                         /*
 1473                          * Depending on the value of "valid" and routing table
 1474                          * size (mtudisc_{hi,lo}wat), we will:
 1475                          * - recalcurate the new MTU and create the
 1476                          *   corresponding routing entry, or
 1477                          * - ignore the MTU change notification.
 1478                          */
 1479                         icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
 1480 
 1481                         /*
 1482                          * no need to call in6_pcbnotify, it should have been
 1483                          * called via callback if necessary
 1484                          */
 1485                         return NULL;
 1486                 }
 1487 
 1488                 nmatch = in6_pcbnotify(&tcbtable, sa, th.th_dport,
 1489                     (const struct sockaddr *)sa6_src, th.th_sport, cmd, NULL, notify);
 1490                 if (nmatch == 0 && syn_cache_count &&
 1491                     (inet6ctlerrmap[cmd] == EHOSTUNREACH ||
 1492                      inet6ctlerrmap[cmd] == ENETUNREACH ||
 1493                      inet6ctlerrmap[cmd] == EHOSTDOWN))
 1494                         syn_cache_unreach((const struct sockaddr *)sa6_src,
 1495                                           sa, &th);
 1496         } else {
 1497                 (void) in6_pcbnotify(&tcbtable, sa, 0,
 1498                     (const struct sockaddr *)sa6_src, 0, cmd, NULL, notify);
 1499         }
 1500 
 1501         return NULL;
 1502 }
 1503 #endif
 1504 
 1505 #ifdef INET
 1506 /* assumes that ip header and tcp header are contiguous on mbuf */
 1507 void *
 1508 tcp_ctlinput(int cmd, const struct sockaddr *sa, void *v)
 1509 {
 1510         struct ip *ip = v;
 1511         struct tcphdr *th;
 1512         struct icmp *icp;
 1513         extern const int inetctlerrmap[];
 1514         void (*notify)(struct inpcb *, int) = tcp_notify;
 1515         int errno;
 1516         int nmatch;
 1517         struct tcpcb *tp;
 1518         u_int mtu;
 1519         tcp_seq seq;
 1520         struct inpcb *inp;
 1521 #ifdef INET6
 1522         struct in6pcb *in6p;
 1523         struct in6_addr src6, dst6;
 1524 #endif
 1525 
 1526         if (sa->sa_family != AF_INET ||
 1527             sa->sa_len != sizeof(struct sockaddr_in))
 1528                 return NULL;
 1529         if ((unsigned)cmd >= PRC_NCMDS)
 1530                 return NULL;
 1531         errno = inetctlerrmap[cmd];
 1532         if (cmd == PRC_QUENCH)
 1533                 /* 
 1534                  * Don't honor ICMP Source Quench messages meant for
 1535                  * TCP connections.
 1536                  */
 1537                 return NULL;
 1538         else if (PRC_IS_REDIRECT(cmd))
 1539                 notify = in_rtchange, ip = 0;
 1540         else if (cmd == PRC_MSGSIZE && ip && ip->ip_v == 4) {
 1541                 /*
 1542                  * Check to see if we have a valid TCP connection
 1543                  * corresponding to the address in the ICMP message
 1544                  * payload.
 1545                  *
 1546                  * Boundary check is made in icmp_input(), with ICMP_ADVLENMIN.
 1547                  */
 1548                 th = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
 1549 #ifdef INET6
 1550                 memset(&src6, 0, sizeof(src6));
 1551                 memset(&dst6, 0, sizeof(dst6));
 1552                 src6.s6_addr16[5] = dst6.s6_addr16[5] = 0xffff;
 1553                 memcpy(&src6.s6_addr32[3], &ip->ip_src, sizeof(struct in_addr));
 1554                 memcpy(&dst6.s6_addr32[3], &ip->ip_dst, sizeof(struct in_addr));
 1555 #endif
 1556                 if ((inp = in_pcblookup_connect(&tcbtable, ip->ip_dst,
 1557                     th->th_dport, ip->ip_src, th->th_sport)) != NULL)
 1558 #ifdef INET6
 1559                         in6p = NULL;
 1560 #else
 1561                         ;
 1562 #endif
 1563 #ifdef INET6
 1564                 else if ((in6p = in6_pcblookup_connect(&tcbtable, &dst6,
 1565                     th->th_dport, &src6, th->th_sport, 0)) != NULL)
 1566                         ;
 1567 #endif
 1568                 else
 1569                         return NULL;
 1570 
 1571                 /*
 1572                  * Now that we've validated that we are actually communicating
 1573                  * with the host indicated in the ICMP message, locate the
 1574                  * ICMP header, recalculate the new MTU, and create the
 1575                  * corresponding routing entry.
 1576                  */
 1577                 icp = (struct icmp *)((char *)ip -
 1578                     offsetof(struct icmp, icmp_ip));
 1579                 if (inp) {
 1580                         if ((tp = intotcpcb(inp)) == NULL)
 1581                                 return NULL;
 1582                 }
 1583 #ifdef INET6
 1584                 else if (in6p) {
 1585                         if ((tp = in6totcpcb(in6p)) == NULL)
 1586                                 return NULL;
 1587                 }
 1588 #endif
 1589                 else
 1590                         return NULL;
 1591                 seq = ntohl(th->th_seq);
 1592                 if (SEQ_LT(seq, tp->snd_una) || SEQ_GT(seq, tp->snd_max))
 1593                         return NULL;
 1594                 /* 
 1595                  * If the ICMP message advertises a Next-Hop MTU
 1596                  * equal or larger than the maximum packet size we have
 1597                  * ever sent, drop the message.
 1598                  */
 1599                 mtu = (u_int)ntohs(icp->icmp_nextmtu);
 1600                 if (mtu >= tp->t_pmtud_mtu_sent)
 1601                         return NULL;
 1602                 if (mtu >= tcp_hdrsz(tp) + tp->t_pmtud_mss_acked) {
 1603                         /* 
 1604                          * Calculate new MTU, and create corresponding
 1605                          * route (traditional PMTUD).
 1606                          */
 1607                         tp->t_flags &= ~TF_PMTUD_PEND;
 1608                         icmp_mtudisc(icp, ip->ip_dst);
 1609                 } else {
 1610                         /*
 1611                          * Record the information got in the ICMP
 1612                          * message; act on it later.
 1613                          * If we had already recorded an ICMP message,
 1614                          * replace the old one only if the new message
 1615                          * refers to an older TCP segment
 1616                          */
 1617                         if (tp->t_flags & TF_PMTUD_PEND) {
 1618                                 if (SEQ_LT(tp->t_pmtud_th_seq, seq))
 1619                                         return NULL;
 1620                         } else
 1621                                 tp->t_flags |= TF_PMTUD_PEND;
 1622                         tp->t_pmtud_th_seq = seq;
 1623                         tp->t_pmtud_nextmtu = icp->icmp_nextmtu;
 1624                         tp->t_pmtud_ip_len = icp->icmp_ip.ip_len;
 1625                         tp->t_pmtud_ip_hl = icp->icmp_ip.ip_hl;
 1626                 }
 1627                 return NULL;
 1628         } else if (cmd == PRC_HOSTDEAD)
 1629                 ip = 0;
 1630         else if (errno == 0)
 1631                 return NULL;
 1632         if (ip && ip->ip_v == 4 && sa->sa_family == AF_INET) {
 1633                 th = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
 1634                 nmatch = in_pcbnotify(&tcbtable, satocsin(sa)->sin_addr,
 1635                     th->th_dport, ip->ip_src, th->th_sport, errno, notify);
 1636                 if (nmatch == 0 && syn_cache_count &&
 1637                     (inetctlerrmap[cmd] == EHOSTUNREACH ||
 1638                     inetctlerrmap[cmd] == ENETUNREACH ||
 1639                     inetctlerrmap[cmd] == EHOSTDOWN)) {
 1640                         struct sockaddr_in sin;
 1641                         bzero(&sin, sizeof(sin));
 1642                         sin.sin_len = sizeof(sin);
 1643                         sin.sin_family = AF_INET;
 1644                         sin.sin_port = th->th_sport;
 1645                         sin.sin_addr = ip->ip_src;
 1646                         syn_cache_unreach((struct sockaddr *)&sin, sa, th);
 1647                 }
 1648 
 1649                 /* XXX mapped address case */
 1650         } else
 1651                 in_pcbnotifyall(&tcbtable, satocsin(sa)->sin_addr, errno,
 1652                     notify);
 1653         return NULL;
 1654 }
 1655 
 1656 /*
 1657  * When a source quench is received, we are being notified of congestion.
 1658  * Close the congestion window down to the Loss Window (one segment).
 1659  * We will gradually open it again as we proceed.
 1660  */
 1661 void
 1662 tcp_quench(struct inpcb *inp, int errno)
 1663 {
 1664         struct tcpcb *tp = intotcpcb(inp);
 1665 
 1666         if (tp) {
 1667                 tp->snd_cwnd = tp->t_segsz;
 1668                 tp->t_bytes_acked = 0;
 1669         }
 1670 }
 1671 #endif
 1672 
 1673 #ifdef INET6
 1674 void
 1675 tcp6_quench(struct in6pcb *in6p, int errno)
 1676 {
 1677         struct tcpcb *tp = in6totcpcb(in6p);
 1678 
 1679         if (tp) {
 1680                 tp->snd_cwnd = tp->t_segsz;
 1681                 tp->t_bytes_acked = 0;
 1682         }
 1683 }
 1684 #endif
 1685 
 1686 #ifdef INET
 1687 /*
 1688  * Path MTU Discovery handlers.
 1689  */
 1690 void
 1691 tcp_mtudisc_callback(struct in_addr faddr)
 1692 {
 1693 #ifdef INET6
 1694         struct in6_addr in6;
 1695 #endif
 1696 
 1697         in_pcbnotifyall(&tcbtable, faddr, EMSGSIZE, tcp_mtudisc);
 1698 #ifdef INET6
 1699         memset(&in6, 0, sizeof(in6));
 1700         in6.s6_addr16[5] = 0xffff;
 1701         memcpy(&in6.s6_addr32[3], &faddr, sizeof(struct in_addr));
 1702         tcp6_mtudisc_callback(&in6);
 1703 #endif
 1704 }
 1705 
 1706 /*
 1707  * On receipt of path MTU corrections, flush old route and replace it
 1708  * with the new one.  Retransmit all unacknowledged packets, to ensure
 1709  * that all packets will be received.
 1710  */
 1711 void
 1712 tcp_mtudisc(struct inpcb *inp, int errno)
 1713 {
 1714         struct tcpcb *tp = intotcpcb(inp);
 1715         struct rtentry *rt = in_pcbrtentry(inp);
 1716 
 1717         if (tp != 0) {
 1718                 if (rt != 0) {
 1719                         /*
 1720                          * If this was not a host route, remove and realloc.
 1721                          */
 1722                         if ((rt->rt_flags & RTF_HOST) == 0) {
 1723                                 in_rtchange(inp, errno);
 1724                                 if ((rt = in_pcbrtentry(inp)) == 0)
 1725                                         return;
 1726                         }
 1727 
 1728                         /*
 1729                          * Slow start out of the error condition.  We
 1730                          * use the MTU because we know it's smaller
 1731                          * than the previously transmitted segment.
 1732                          *
 1733                          * Note: This is more conservative than the
 1734                          * suggestion in draft-floyd-incr-init-win-03.
 1735                          */
 1736                         if (rt->rt_rmx.rmx_mtu != 0)
 1737                                 tp->snd_cwnd =
 1738                                     TCP_INITIAL_WINDOW(tcp_init_win,
 1739                                     rt->rt_rmx.rmx_mtu);
 1740                 }
 1741 
 1742                 /*
 1743                  * Resend unacknowledged packets.
 1744                  */
 1745                 tp->snd_nxt = tp->sack_newdata = tp->snd_una;
 1746                 tcp_output(tp);
 1747         }
 1748 }
 1749 #endif
 1750 
 1751 #ifdef INET6
 1752 /*
 1753  * Path MTU Discovery handlers.
 1754  */
 1755 void
 1756 tcp6_mtudisc_callback(struct in6_addr *faddr)
 1757 {
 1758         struct sockaddr_in6 sin6;
 1759 
 1760         bzero(&sin6, sizeof(sin6));
 1761         sin6.sin6_family = AF_INET6;
 1762         sin6.sin6_len = sizeof(struct sockaddr_in6);
 1763         sin6.sin6_addr = *faddr;
 1764         (void) in6_pcbnotify(&tcbtable, (struct sockaddr *)&sin6, 0,
 1765             (const struct sockaddr *)&sa6_any, 0, PRC_MSGSIZE, NULL, tcp6_mtudisc);
 1766 }
 1767 
 1768 void
 1769 tcp6_mtudisc(struct in6pcb *in6p, int errno)
 1770 {
 1771         struct tcpcb *tp = in6totcpcb(in6p);
 1772         struct rtentry *rt = in6_pcbrtentry(in6p);
 1773 
 1774         if (tp != 0) {
 1775                 if (rt != 0) {
 1776                         /*
 1777                          * If this was not a host route, remove and realloc.
 1778                          */
 1779                         if ((rt->rt_flags & RTF_HOST) == 0) {
 1780                                 in6_rtchange(in6p, errno);
 1781                                 if ((rt = in6_pcbrtentry(in6p)) == 0)
 1782                                         return;
 1783                         }
 1784 
 1785                         /*
 1786                          * Slow start out of the error condition.  We
 1787                          * use the MTU because we know it's smaller
 1788                          * than the previously transmitted segment.
 1789                          *
 1790                          * Note: This is more conservative than the
 1791                          * suggestion in draft-floyd-incr-init-win-03.
 1792                          */
 1793                         if (rt->rt_rmx.rmx_mtu != 0)
 1794                                 tp->snd_cwnd =
 1795                                     TCP_INITIAL_WINDOW(tcp_init_win,
 1796                                     rt->rt_rmx.rmx_mtu);
 1797                 }
 1798 
 1799                 /*
 1800                  * Resend unacknowledged packets.
 1801                  */
 1802                 tp->snd_nxt = tp->sack_newdata = tp->snd_una;
 1803                 tcp_output(tp);
 1804         }
 1805 }
 1806 #endif /* INET6 */
 1807 
 1808 /*
 1809  * Compute the MSS to advertise to the peer.  Called only during
 1810  * the 3-way handshake.  If we are the server (peer initiated
 1811  * connection), we are called with a pointer to the interface
 1812  * on which the SYN packet arrived.  If we are the client (we
 1813  * initiated connection), we are called with a pointer to the
 1814  * interface out which this connection should go.
 1815  *
 1816  * NOTE: Do not subtract IP option/extension header size nor IPsec
 1817  * header size from MSS advertisement.  MSS option must hold the maximum
 1818  * segment size we can accept, so it must always be:
 1819  *       max(if mtu) - ip header - tcp header
 1820  */
 1821 u_long
 1822 tcp_mss_to_advertise(const struct ifnet *ifp, int af)
 1823 {
 1824         extern u_long in_maxmtu;
 1825         u_long mss = 0;
 1826         u_long hdrsiz;
 1827 
 1828         /*
 1829          * In order to avoid defeating path MTU discovery on the peer,
 1830          * we advertise the max MTU of all attached networks as our MSS,
 1831          * per RFC 1191, section 3.1.
 1832          *
 1833          * We provide the option to advertise just the MTU of
 1834          * the interface on which we hope this connection will
 1835          * be receiving.  If we are responding to a SYN, we
 1836          * will have a pretty good idea about this, but when
 1837          * initiating a connection there is a bit more doubt.
 1838          *
 1839          * We also need to ensure that loopback has a large enough
 1840          * MSS, as the loopback MTU is never included in in_maxmtu.
 1841          */
 1842 
 1843         if (ifp != NULL)
 1844                 switch (af) {
 1845                 case AF_INET:
 1846                         mss = ifp->if_mtu;
 1847                         break;
 1848 #ifdef INET6
 1849                 case AF_INET6:
 1850                         mss = IN6_LINKMTU(ifp);
 1851                         break;
 1852 #endif
 1853                 }
 1854 
 1855         if (tcp_mss_ifmtu == 0)
 1856                 switch (af) {
 1857                 case AF_INET:
 1858                         mss = max(in_maxmtu, mss);
 1859                         break;
 1860 #ifdef INET6
 1861                 case AF_INET6:
 1862                         mss = max(in6_maxmtu, mss);
 1863                         break;
 1864 #endif
 1865                 }
 1866 
 1867         switch (af) {
 1868         case AF_INET:
 1869                 hdrsiz = sizeof(struct ip);
 1870                 break;
 1871 #ifdef INET6
 1872         case AF_INET6:
 1873                 hdrsiz = sizeof(struct ip6_hdr);
 1874                 break;
 1875 #endif
 1876         default:
 1877                 hdrsiz = 0;
 1878                 break;
 1879         }
 1880         hdrsiz += sizeof(struct tcphdr);
 1881         if (mss > hdrsiz)
 1882                 mss -= hdrsiz;
 1883 
 1884         mss = max(tcp_mssdflt, mss);
 1885         return (mss);
 1886 }
 1887 
 1888 /*
 1889  * Set connection variables based on the peer's advertised MSS.
 1890  * We are passed the TCPCB for the actual connection.  If we
 1891  * are the server, we are called by the compressed state engine
 1892  * when the 3-way handshake is complete.  If we are the client,
 1893  * we are called when we receive the SYN,ACK from the server.
 1894  *
 1895  * NOTE: Our advertised MSS value must be initialized in the TCPCB
 1896  * before this routine is called!
 1897  */
 1898 void
 1899 tcp_mss_from_peer(struct tcpcb *tp, int offer)
 1900 {
 1901         struct socket *so;
 1902 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
 1903         struct rtentry *rt;
 1904 #endif
 1905         u_long bufsize;
 1906         int mss;
 1907 
 1908 #ifdef DIAGNOSTIC
 1909         if (tp->t_inpcb && tp->t_in6pcb)
 1910                 panic("tcp_mss_from_peer: both t_inpcb and t_in6pcb are set");
 1911 #endif
 1912         so = NULL;
 1913         rt = NULL;
 1914 #ifdef INET
 1915         if (tp->t_inpcb) {
 1916                 so = tp->t_inpcb->inp_socket;
 1917 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
 1918                 rt = in_pcbrtentry(tp->t_inpcb);
 1919 #endif
 1920         }
 1921 #endif
 1922 #ifdef INET6
 1923         if (tp->t_in6pcb) {
 1924                 so = tp->t_in6pcb->in6p_socket;
 1925 #if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
 1926                 rt = in6_pcbrtentry(tp->t_in6pcb);
 1927 #endif
 1928         }
 1929 #endif
 1930 
 1931         /*
 1932          * As per RFC1122, use the default MSS value, unless they
 1933          * sent us an offer.  Do not accept offers less than 256 bytes.
 1934          */
 1935         mss = tcp_mssdflt;
 1936         if (offer)
 1937                 mss = offer;
 1938         mss = max(mss, 256);            /* sanity */
 1939         tp->t_peermss = mss;
 1940         mss -= tcp_optlen(tp);
 1941 #ifdef INET
 1942         if (tp->t_inpcb)
 1943                 mss -= ip_optlen(tp->t_inpcb);
 1944 #endif
 1945 #ifdef INET6
 1946         if (tp->t_in6pcb)
 1947                 mss -= ip6_optlen(tp->t_in6pcb);
 1948 #endif
 1949 
 1950         /*
 1951          * If there's a pipesize, change the socket buffer to that size.
 1952          * Make the socket buffer an integral number of MSS units.  If
 1953          * the MSS is larger than the socket buffer, artificially decrease
 1954          * the MSS.
 1955          */
 1956 #ifdef RTV_SPIPE
 1957         if (rt != NULL && rt->rt_rmx.rmx_sendpipe != 0)
 1958                 bufsize = rt->rt_rmx.rmx_sendpipe;
 1959         else
 1960 #endif
 1961         {
 1962                 KASSERT(so != NULL);
 1963                 bufsize = so->so_snd.sb_hiwat;
 1964         }
 1965         if (bufsize < mss)
 1966                 mss = bufsize;
 1967         else {
 1968                 bufsize = roundup(bufsize, mss);
 1969                 if (bufsize > sb_max)
 1970                         bufsize = sb_max;
 1971                 (void) sbreserve(&so->so_snd, bufsize, so);
 1972         }
 1973         tp->t_segsz = mss;
 1974 
 1975 #ifdef RTV_SSTHRESH
 1976         if (rt != NULL && rt->rt_rmx.rmx_ssthresh) {
 1977                 /*
 1978                  * There's some sort of gateway or interface buffer
 1979                  * limit on the path.  Use this to set the slow
 1980                  * start threshold, but set the threshold to no less
 1981                  * than 2 * MSS.
 1982                  */
 1983                 tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
 1984         }
 1985 #endif
 1986 }
 1987 
 1988 /*
 1989  * Processing necessary when a TCP connection is established.
 1990  */
 1991 void
 1992 tcp_established(struct tcpcb *tp)
 1993 {
 1994         struct socket *so;
 1995 #ifdef RTV_RPIPE
 1996         struct rtentry *rt;
 1997 #endif
 1998         u_long bufsize;
 1999 
 2000 #ifdef DIAGNOSTIC
 2001         if (tp->t_inpcb && tp->t_in6pcb)
 2002                 panic("tcp_established: both t_inpcb and t_in6pcb are set");
 2003 #endif
 2004         so = NULL;
 2005         rt = NULL;
 2006 #ifdef INET
 2007         if (tp->t_inpcb) {
 2008                 so = tp->t_inpcb->inp_socket;
 2009 #if defined(RTV_RPIPE)
 2010                 rt = in_pcbrtentry(tp->t_inpcb);
 2011 #endif
 2012         }
 2013 #endif
 2014 #ifdef INET6
 2015         if (tp->t_in6pcb) {
 2016                 so = tp->t_in6pcb->in6p_socket;
 2017 #if defined(RTV_RPIPE)
 2018                 rt = in6_pcbrtentry(tp->t_in6pcb);
 2019 #endif
 2020         }
 2021 #endif
 2022 
 2023         tp->t_state = TCPS_ESTABLISHED;
 2024         TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);
 2025 
 2026 #ifdef RTV_RPIPE
 2027         if (rt != NULL && rt->rt_rmx.rmx_recvpipe != 0)
 2028                 bufsize = rt->rt_rmx.rmx_recvpipe;
 2029         else
 2030 #endif
 2031         {
 2032                 KASSERT(so != NULL);
 2033                 bufsize = so->so_rcv.sb_hiwat;
 2034         }
 2035         if (bufsize > tp->t_ourmss) {
 2036                 bufsize = roundup(bufsize, tp->t_ourmss);
 2037                 if (bufsize > sb_max)
 2038                         bufsize = sb_max;
 2039                 (void) sbreserve(&so->so_rcv, bufsize, so);
 2040         }
 2041 }
 2042 
 2043 /*
 2044  * Check if there's an initial rtt or rttvar.  Convert from the
 2045  * route-table units to scaled multiples of the slow timeout timer.
 2046  * Called only during the 3-way handshake.
 2047  */
 2048 void
 2049 tcp_rmx_rtt(struct tcpcb *tp)
 2050 {
 2051 #ifdef RTV_RTT
 2052         struct rtentry *rt = NULL;
 2053         int rtt;
 2054 
 2055 #ifdef DIAGNOSTIC
 2056         if (tp->t_inpcb && tp->t_in6pcb)
 2057                 panic("tcp_rmx_rtt: both t_inpcb and t_in6pcb are set");
 2058 #endif
 2059 #ifdef INET
 2060         if (tp->t_inpcb)
 2061                 rt = in_pcbrtentry(tp->t_inpcb);
 2062 #endif
 2063 #ifdef INET6
 2064         if (tp->t_in6pcb)
 2065                 rt = in6_pcbrtentry(tp->t_in6pcb);
 2066 #endif
 2067         if (rt == NULL)
 2068                 return;
 2069 
 2070         if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
 2071                 /*
 2072                  * XXX The lock bit for MTU indicates that the value
 2073                  * is also a minimum value; this is subject to time.
 2074                  */
 2075                 if (rt->rt_rmx.rmx_locks & RTV_RTT)
 2076                         TCPT_RANGESET(tp->t_rttmin,
 2077                             rtt / (RTM_RTTUNIT / PR_SLOWHZ),
 2078                             TCPTV_MIN, TCPTV_REXMTMAX);
 2079                 tp->t_srtt = rtt /
 2080                     ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2));
 2081                 if (rt->rt_rmx.rmx_rttvar) {
 2082                         tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
 2083                             ((RTM_RTTUNIT / PR_SLOWHZ) >>
 2084                                 (TCP_RTTVAR_SHIFT + 2));
 2085                 } else {
 2086                         /* Default variation is +- 1 rtt */
 2087                         tp->t_rttvar =
 2088                             tp->t_srtt >> (TCP_RTT_SHIFT - TCP_RTTVAR_SHIFT);
 2089                 }
 2090                 TCPT_RANGESET(tp->t_rxtcur,
 2091                     ((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + 2),
 2092                     tp->t_rttmin, TCPTV_REXMTMAX);
 2093         }
 2094 #endif
 2095 }
 2096 
 2097 tcp_seq  tcp_iss_seq = 0;       /* tcp initial seq # */
 2098 #if NRND > 0
 2099 u_int8_t tcp_iss_secret[16];    /* 128 bits; should be plenty */
 2100 #endif
 2101 
 2102 /*
 2103  * Get a new sequence value given a tcp control block
 2104  */
 2105 tcp_seq
 2106 tcp_new_iss(struct tcpcb *tp, tcp_seq addin)
 2107 {
 2108 
 2109 #ifdef INET
 2110         if (tp->t_inpcb != NULL) {
 2111                 return (tcp_new_iss1(&tp->t_inpcb->inp_laddr,
 2112                     &tp->t_inpcb->inp_faddr, tp->t_inpcb->inp_lport,
 2113                     tp->t_inpcb->inp_fport, sizeof(tp->t_inpcb->inp_laddr),
 2114                     addin));
 2115         }
 2116 #endif
 2117 #ifdef INET6
 2118         if (tp->t_in6pcb != NULL) {
 2119                 return (tcp_new_iss1(&tp->t_in6pcb->in6p_laddr,
 2120                     &tp->t_in6pcb->in6p_faddr, tp->t_in6pcb->in6p_lport,
 2121                     tp->t_in6pcb->in6p_fport, sizeof(tp->t_in6pcb->in6p_laddr),
 2122                     addin));
 2123         }
 2124 #endif
 2125         /* Not possible. */
 2126         panic("tcp_new_iss");
 2127 }
 2128 
 2129 /*
 2130  * This routine actually generates a new TCP initial sequence number.
 2131  */
 2132 tcp_seq
 2133 tcp_new_iss1(void *laddr, void *faddr, u_int16_t lport, u_int16_t fport,
 2134     size_t addrsz, tcp_seq addin)
 2135 {
 2136         tcp_seq tcp_iss;
 2137 
 2138 #if NRND > 0
 2139         static bool tcp_iss_gotten_secret;
 2140 
 2141         /*
 2142          * If we haven't been here before, initialize our cryptographic
 2143          * hash secret.
 2144          */
 2145         if (tcp_iss_gotten_secret == false) {
 2146                 rnd_extract_data(tcp_iss_secret, sizeof(tcp_iss_secret),
 2147                     RND_EXTRACT_ANY);
 2148                 tcp_iss_gotten_secret = true;
 2149         }
 2150 
 2151         if (tcp_do_rfc1948) {
 2152                 MD5_CTX ctx;
 2153                 u_int8_t hash[16];      /* XXX MD5 knowledge */
 2154 
 2155                 /*
 2156                  * Compute the base value of the ISS.  It is a hash
 2157                  * of (saddr, sport, daddr, dport, secret).
 2158                  */
 2159                 MD5Init(&ctx);
 2160 
 2161                 MD5Update(&ctx, (u_char *) laddr, addrsz);
 2162                 MD5Update(&ctx, (u_char *) &lport, sizeof(lport));
 2163 
 2164                 MD5Update(&ctx, (u_char *) faddr, addrsz);
 2165                 MD5Update(&ctx, (u_char *) &fport, sizeof(fport));
 2166 
 2167                 MD5Update(&ctx, tcp_iss_secret, sizeof(tcp_iss_secret));
 2168 
 2169                 MD5Final(hash, &ctx);
 2170 
 2171                 memcpy(&tcp_iss, hash, sizeof(tcp_iss));
 2172 
 2173                 /*
 2174                  * Now increment our "timer", and add it in to
 2175                  * the computed value.
 2176                  *
 2177                  * XXX Use `addin'?
 2178                  * XXX TCP_ISSINCR too large to use?
 2179                  */
 2180                 tcp_iss_seq += TCP_ISSINCR;
 2181 #ifdef TCPISS_DEBUG
 2182                 printf("ISS hash 0x%08x, ", tcp_iss);
 2183 #endif
 2184                 tcp_iss += tcp_iss_seq + addin;
 2185 #ifdef TCPISS_DEBUG
 2186                 printf("new ISS 0x%08x\n", tcp_iss);
 2187 #endif
 2188         } else
 2189 #endif /* NRND > 0 */
 2190         {
 2191                 /*
 2192                  * Randomize.
 2193                  */
 2194 #if NRND > 0
 2195                 rnd_extract_data(&tcp_iss, sizeof(tcp_iss), RND_EXTRACT_ANY);
 2196 #else
 2197                 tcp_iss = arc4random();
 2198 #endif
 2199 
 2200                 /*
 2201                  * If we were asked to add some amount to a known value,
 2202                  * we will take a random value obtained above, mask off
 2203                  * the upper bits, and add in the known value.  We also
 2204                  * add in a constant to ensure that we are at least a
 2205                  * certain distance from the original value.
 2206                  *
 2207                  * This is used when an old connection is in timed wait
 2208                  * and we have a new one coming in, for instance.
 2209                  */
 2210                 if (addin != 0) {
 2211 #ifdef TCPISS_DEBUG
 2212                         printf("Random %08x, ", tcp_iss);
 2213 #endif
 2214                         tcp_iss &= TCP_ISS_RANDOM_MASK;
 2215                         tcp_iss += addin + TCP_ISSINCR;
 2216 #ifdef TCPISS_DEBUG
 2217                         printf("Old ISS %08x, ISS %08x\n", addin, tcp_iss);
 2218 #endif
 2219                 } else {
 2220                         tcp_iss &= TCP_ISS_RANDOM_MASK;
 2221                         tcp_iss += tcp_iss_seq;
 2222                         tcp_iss_seq += TCP_ISSINCR;
 2223 #ifdef TCPISS_DEBUG
 2224                         printf("ISS %08x\n", tcp_iss);
 2225 #endif
 2226                 }
 2227         }
 2228 
 2229         if (tcp_compat_42) {
 2230                 /*
 2231                  * Limit it to the positive range for really old TCP
 2232                  * implementations.
 2233                  * Just AND off the top bit instead of checking if
 2234                  * is set first - saves a branch 50% of the time.
 2235                  */
 2236                 tcp_iss &= 0x7fffffff;          /* XXX */
 2237         }
 2238 
 2239         return (tcp_iss);
 2240 }
 2241 
 2242 #if defined(IPSEC) || defined(FAST_IPSEC)
 2243 /* compute ESP/AH header size for TCP, including outer IP header. */
 2244 size_t
 2245 ipsec4_hdrsiz_tcp(struct tcpcb *tp)
 2246 {
 2247         struct inpcb *inp;
 2248         size_t hdrsiz;
 2249 
 2250         /* XXX mapped addr case (tp->t_in6pcb) */
 2251         if (!tp || !tp->t_template || !(inp = tp->t_inpcb))
 2252                 return 0;
 2253         switch (tp->t_family) {
 2254         case AF_INET:
 2255                 /* XXX: should use currect direction. */
 2256                 hdrsiz = ipsec4_hdrsiz(tp->t_template, IPSEC_DIR_OUTBOUND, inp);
 2257                 break;
 2258         default:
 2259                 hdrsiz = 0;
 2260                 break;
 2261         }
 2262 
 2263         return hdrsiz;
 2264 }
 2265 
 2266 #ifdef INET6
 2267 size_t
 2268 ipsec6_hdrsiz_tcp(struct tcpcb *tp)
 2269 {
 2270         struct in6pcb *in6p;
 2271         size_t hdrsiz;
 2272 
 2273         if (!tp || !tp->t_template || !(in6p = tp->t_in6pcb))
 2274                 return 0;
 2275         switch (tp->t_family) {
 2276         case AF_INET6:
 2277                 /* XXX: should use currect direction. */
 2278                 hdrsiz = ipsec6_hdrsiz(tp->t_template, IPSEC_DIR_OUTBOUND, in6p);
 2279                 break;
 2280         case AF_INET:
 2281                 /* mapped address case - tricky */
 2282         default:
 2283                 hdrsiz = 0;
 2284                 break;
 2285         }
 2286 
 2287         return hdrsiz;
 2288 }
 2289 #endif
 2290 #endif /*IPSEC*/
 2291 
 2292 /*
 2293  * Determine the length of the TCP options for this connection.
 2294  *
 2295  * XXX:  What do we do for SACK, when we add that?  Just reserve
 2296  *       all of the space?  Otherwise we can't exactly be incrementing
 2297  *       cwnd by an amount that varies depending on the amount we last
 2298  *       had to SACK!
 2299  */
 2300 
 2301 u_int
 2302 tcp_optlen(struct tcpcb *tp)
 2303 {
 2304         u_int optlen;
 2305 
 2306         optlen = 0;
 2307         if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
 2308             (TF_REQ_TSTMP | TF_RCVD_TSTMP))
 2309                 optlen += TCPOLEN_TSTAMP_APPA;
 2310 
 2311 #ifdef TCP_SIGNATURE
 2312         if (tp->t_flags & TF_SIGNATURE)
 2313                 optlen += TCPOLEN_SIGNATURE + 2;
 2314 #endif /* TCP_SIGNATURE */
 2315 
 2316         return optlen;
 2317 }
 2318 
 2319 u_int
 2320 tcp_hdrsz(struct tcpcb *tp)
 2321 {
 2322         u_int hlen;
 2323 
 2324         switch (tp->t_family) {
 2325 #ifdef INET6
 2326         case AF_INET6:
 2327                 hlen = sizeof(struct ip6_hdr);
 2328                 break;
 2329 #endif
 2330         case AF_INET:
 2331                 hlen = sizeof(struct ip);
 2332                 break;
 2333         default:
 2334                 hlen = 0;
 2335                 break;
 2336         }
 2337         hlen += sizeof(struct tcphdr);
 2338 
 2339         if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
 2340             (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
 2341                 hlen += TCPOLEN_TSTAMP_APPA;
 2342 #ifdef TCP_SIGNATURE
 2343         if (tp->t_flags & TF_SIGNATURE)
 2344                 hlen += TCPOLEN_SIGLEN;
 2345 #endif
 2346         return hlen;
 2347 }
 2348 
 2349 void
 2350 tcp_statinc(u_int stat)
 2351 {
 2352 
 2353         KASSERT(stat < TCP_NSTATS);
 2354         TCP_STATINC(stat);
 2355 }
 2356 
 2357 void
 2358 tcp_statadd(u_int stat, uint64_t val)
 2359 {
 2360 
 2361         KASSERT(stat < TCP_NSTATS);
 2362         TCP_STATADD(stat, val);
 2363 }

Cache object: b384a2c8724e5d32c7d95406215b09d2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.