The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_timer.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD$");
   34 
   35 #include "opt_inet.h"
   36 #include "opt_inet6.h"
   37 #include "opt_tcpdebug.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/kernel.h>
   41 #include <sys/lock.h>
   42 #include <sys/mbuf.h>
   43 #include <sys/mutex.h>
   44 #include <sys/protosw.h>
   45 #include <sys/smp.h>
   46 #include <sys/socket.h>
   47 #include <sys/socketvar.h>
   48 #include <sys/sysctl.h>
   49 #include <sys/systm.h>
   50 
   51 #include <net/if.h>
   52 #include <net/route.h>
   53 #include <net/vnet.h>
   54 
   55 #include <netinet/cc.h>
   56 #include <netinet/in.h>
   57 #include <netinet/in_pcb.h>
   58 #include <netinet/in_systm.h>
   59 #ifdef INET6
   60 #include <netinet6/in6_pcb.h>
   61 #endif
   62 #include <netinet/ip_var.h>
   63 #include <netinet/tcp_fsm.h>
   64 #include <netinet/tcp_timer.h>
   65 #include <netinet/tcp_var.h>
   66 #ifdef INET6
   67 #include <netinet6/tcp6_var.h>
   68 #endif
   69 #include <netinet/tcpip.h>
   70 #ifdef TCPDEBUG
   71 #include <netinet/tcp_debug.h>
   72 #endif
   73 
   74 int    tcp_persmin;
   75 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin, CTLTYPE_INT|CTLFLAG_RW,
   76     &tcp_persmin, 0, sysctl_msec_to_ticks, "I", "minimum persistence interval");
   77 
   78 int    tcp_persmax;
   79 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax, CTLTYPE_INT|CTLFLAG_RW,
   80     &tcp_persmax, 0, sysctl_msec_to_ticks, "I", "maximum persistence interval");
   81 
   82 int     tcp_keepinit;
   83 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
   84     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
   85 
   86 int     tcp_keepidle;
   87 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
   88     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
   89 
   90 int     tcp_keepintvl;
   91 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
   92     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
   93 
   94 int     tcp_delacktime;
   95 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
   96     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
   97     "Time before a delayed ACK is sent");
   98 
   99 int     tcp_msl;
  100 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
  101     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
  102 
  103 int     tcp_rexmit_min;
  104 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
  105     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
  106     "Minimum Retransmission Timeout");
  107 
  108 int     tcp_rexmit_slop;
  109 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
  110     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
  111     "Retransmission Timer Slop");
  112 
  113 int     tcp_always_keepalive = 1;
  114 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
  115     &tcp_always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
  116 __strong_reference(tcp_always_keepalive, always_keepalive);
  117 
  118 int    tcp_fast_finwait2_recycle = 0;
  119 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 
  120     &tcp_fast_finwait2_recycle, 0,
  121     "Recycle closed FIN_WAIT_2 connections faster");
  122 
  123 int    tcp_finwait2_timeout;
  124 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
  125     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
  126 
  127 int     tcp_keepcnt = TCPTV_KEEPCNT;
  128 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
  129     "Number of keepalive probes to send");
  130 
  131         /* max idle probes */
  132 int     tcp_maxpersistidle;
  133 
  134 static int      tcp_rexmit_drop_options = 0;
  135 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
  136     &tcp_rexmit_drop_options, 0,
  137     "Drop TCP options from 3rd and later retransmitted SYN");
  138 
  139 static VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
  140 #define V_tcp_pmtud_blackhole_detect    VNET(tcp_pmtud_blackhole_detect)
  141 SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
  142     CTLFLAG_RW,
  143     &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
  144     "Path MTU Discovery Black Hole Detection Enabled");
  145 
  146 static VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
  147 #define V_tcp_pmtud_blackhole_activated \
  148     VNET(tcp_pmtud_blackhole_activated)
  149 SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
  150     CTLFLAG_RD,
  151     &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
  152     "Path MTU Discovery Black Hole Detection, Activation Count");
  153 
  154 static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
  155 #define V_tcp_pmtud_blackhole_activated_min_mss \
  156     VNET(tcp_pmtud_blackhole_activated_min_mss)
  157 SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
  158     CTLFLAG_RD,
  159     &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
  160     "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
  161 
  162 static VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
  163 #define V_tcp_pmtud_blackhole_failed    VNET(tcp_pmtud_blackhole_failed)
  164 SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
  165     CTLFLAG_RD,
  166     &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
  167     "Path MTU Discovery Black Hole Detection, Failure Count");
  168 
  169 #ifdef INET
  170 static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
  171 #define V_tcp_pmtud_blackhole_mss       VNET(tcp_pmtud_blackhole_mss)
  172 SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
  173     CTLFLAG_RW,
  174     &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
  175     "Path MTU Discovery Black Hole Detection lowered MSS");
  176 #endif
  177 
  178 #ifdef INET6
  179 static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
  180 #define V_tcp_v6pmtud_blackhole_mss     VNET(tcp_v6pmtud_blackhole_mss)
  181 SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
  182     CTLFLAG_RW,
  183     &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
  184     "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
  185 #endif
  186 
  187 static int      per_cpu_timers = 0;
  188 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
  189     &per_cpu_timers , 0, "run tcp timers on all cpus");
  190 
  191 #define INP_CPU(inp)    (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
  192                 ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
  193 
  194 /*
  195  * Tcp protocol timeout routine called every 500 ms.
  196  * Updates timestamps used for TCP
  197  * causes finite state machine actions if timers expire.
  198  */
  199 void
  200 tcp_slowtimo(void)
  201 {
  202         VNET_ITERATOR_DECL(vnet_iter);
  203 
  204         VNET_LIST_RLOCK_NOSLEEP();
  205         VNET_FOREACH(vnet_iter) {
  206                 CURVNET_SET(vnet_iter);
  207                 (void) tcp_tw_2msl_scan(0);
  208                 CURVNET_RESTORE();
  209         }
  210         VNET_LIST_RUNLOCK_NOSLEEP();
  211 }
  212 
  213 int     tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
  214     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
  215 
  216 int     tcp_backoff[TCP_MAXRXTSHIFT + 1] =
  217     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
  218 
  219 static int tcp_totbackoff = 2559;       /* sum of tcp_backoff[] */
  220 
  221 /*
  222  * TCP timer processing.
  223  */
  224 
  225 void
  226 tcp_timer_delack(void *xtp)
  227 {
  228         struct tcpcb *tp = xtp;
  229         struct inpcb *inp;
  230         CURVNET_SET(tp->t_vnet);
  231 
  232         inp = tp->t_inpcb;
  233         KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
  234         INP_WLOCK(inp);
  235         if (callout_pending(&tp->t_timers->tt_delack) ||
  236             !callout_active(&tp->t_timers->tt_delack)) {
  237                 INP_WUNLOCK(inp);
  238                 CURVNET_RESTORE();
  239                 return;
  240         }
  241         callout_deactivate(&tp->t_timers->tt_delack);
  242         if ((inp->inp_flags & INP_DROPPED) != 0) {
  243                 INP_WUNLOCK(inp);
  244                 CURVNET_RESTORE();
  245                 return;
  246         }
  247         KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
  248                 ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
  249         KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0,
  250                 ("%s: tp %p delack callout should be running", __func__, tp));
  251 
  252         tp->t_flags |= TF_ACKNOW;
  253         TCPSTAT_INC(tcps_delack);
  254         (void) tcp_output(tp);
  255         INP_WUNLOCK(inp);
  256         CURVNET_RESTORE();
  257 }
  258 
  259 void
  260 tcp_timer_2msl(void *xtp)
  261 {
  262         struct tcpcb *tp = xtp;
  263         struct inpcb *inp;
  264         CURVNET_SET(tp->t_vnet);
  265 #ifdef TCPDEBUG
  266         int ostate;
  267 
  268         ostate = tp->t_state;
  269 #endif
  270         INP_INFO_RLOCK(&V_tcbinfo);
  271         inp = tp->t_inpcb;
  272         KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
  273         INP_WLOCK(inp);
  274         tcp_free_sackholes(tp);
  275         if (callout_pending(&tp->t_timers->tt_2msl) ||
  276             !callout_active(&tp->t_timers->tt_2msl)) {
  277                 INP_WUNLOCK(tp->t_inpcb);
  278                 INP_INFO_RUNLOCK(&V_tcbinfo);
  279                 CURVNET_RESTORE();
  280                 return;
  281         }
  282         callout_deactivate(&tp->t_timers->tt_2msl);
  283         if ((inp->inp_flags & INP_DROPPED) != 0) {
  284                 INP_WUNLOCK(inp);
  285                 INP_INFO_RUNLOCK(&V_tcbinfo);
  286                 CURVNET_RESTORE();
  287                 return;
  288         }
  289         KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
  290                 ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
  291         KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0,
  292                 ("%s: tp %p 2msl callout should be running", __func__, tp));
  293         /*
  294          * 2 MSL timeout in shutdown went off.  If we're closed but
  295          * still waiting for peer to close and connection has been idle
  296          * too long delete connection control block.  Otherwise, check
  297          * again in a bit.
  298          *
  299          * If in TIME_WAIT state just ignore as this timeout is handled in
  300          * tcp_tw_2msl_scan().
  301          *
  302          * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
  303          * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
  304          * Ignore fact that there were recent incoming segments.
  305          */
  306         if ((inp->inp_flags & INP_TIMEWAIT) != 0) {
  307                 INP_WUNLOCK(inp);
  308                 INP_INFO_RUNLOCK(&V_tcbinfo);
  309                 CURVNET_RESTORE();
  310                 return;
  311         }
  312         if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
  313             tp->t_inpcb && tp->t_inpcb->inp_socket && 
  314             (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
  315                 TCPSTAT_INC(tcps_finwait2_drops);
  316                 tp = tcp_close(tp);             
  317         } else {
  318                 if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
  319                         if (!callout_reset(&tp->t_timers->tt_2msl,
  320                            TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) {
  321                                 tp->t_timers->tt_flags &= ~TT_2MSL_RST;
  322                         }
  323                 } else
  324                        tp = tcp_close(tp);
  325        }
  326 
  327 #ifdef TCPDEBUG
  328         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  329                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  330                           PRU_SLOWTIMO);
  331 #endif
  332         if (tp != NULL)
  333                 INP_WUNLOCK(inp);
  334         INP_INFO_RUNLOCK(&V_tcbinfo);
  335         CURVNET_RESTORE();
  336 }
  337 
  338 void
  339 tcp_timer_keep(void *xtp)
  340 {
  341         struct tcpcb *tp = xtp;
  342         struct tcptemp *t_template;
  343         struct inpcb *inp;
  344         CURVNET_SET(tp->t_vnet);
  345 #ifdef TCPDEBUG
  346         int ostate;
  347 
  348         ostate = tp->t_state;
  349 #endif
  350         INP_INFO_RLOCK(&V_tcbinfo);
  351         inp = tp->t_inpcb;
  352         KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
  353         INP_WLOCK(inp);
  354         if (callout_pending(&tp->t_timers->tt_keep) ||
  355             !callout_active(&tp->t_timers->tt_keep)) {
  356                 INP_WUNLOCK(inp);
  357                 INP_INFO_RUNLOCK(&V_tcbinfo);
  358                 CURVNET_RESTORE();
  359                 return;
  360         }
  361         callout_deactivate(&tp->t_timers->tt_keep);
  362         if ((inp->inp_flags & INP_DROPPED) != 0) {
  363                 INP_WUNLOCK(inp);
  364                 INP_INFO_RUNLOCK(&V_tcbinfo);
  365                 CURVNET_RESTORE();
  366                 return;
  367         }
  368         KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
  369                 ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
  370         KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0,
  371                 ("%s: tp %p keep callout should be running", __func__, tp));
  372         /*
  373          * Keep-alive timer went off; send something
  374          * or drop connection if idle for too long.
  375          */
  376         TCPSTAT_INC(tcps_keeptimeo);
  377         if (tp->t_state < TCPS_ESTABLISHED)
  378                 goto dropit;
  379         if ((tcp_always_keepalive ||
  380             inp->inp_socket->so_options & SO_KEEPALIVE) &&
  381             tp->t_state <= TCPS_CLOSING) {
  382                 if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
  383                         goto dropit;
  384                 /*
  385                  * Send a packet designed to force a response
  386                  * if the peer is up and reachable:
  387                  * either an ACK if the connection is still alive,
  388                  * or an RST if the peer has closed the connection
  389                  * due to timeout or reboot.
  390                  * Using sequence number tp->snd_una-1
  391                  * causes the transmitted zero-length segment
  392                  * to lie outside the receive window;
  393                  * by the protocol spec, this requires the
  394                  * correspondent TCP to respond.
  395                  */
  396                 TCPSTAT_INC(tcps_keepprobe);
  397                 t_template = tcpip_maketemplate(inp);
  398                 if (t_template) {
  399                         tcp_respond(tp, t_template->tt_ipgen,
  400                                     &t_template->tt_t, (struct mbuf *)NULL,
  401                                     tp->rcv_nxt, tp->snd_una - 1, 0);
  402                         free(t_template, M_TEMP);
  403                 }
  404                 if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
  405                     tcp_timer_keep, tp)) {
  406                         tp->t_timers->tt_flags &= ~TT_KEEP_RST;
  407                 }
  408         } else if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
  409                     tcp_timer_keep, tp)) {
  410                         tp->t_timers->tt_flags &= ~TT_KEEP_RST;
  411                 }
  412 
  413 #ifdef TCPDEBUG
  414         if (inp->inp_socket->so_options & SO_DEBUG)
  415                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  416                           PRU_SLOWTIMO);
  417 #endif
  418         INP_WUNLOCK(inp);
  419         INP_INFO_RUNLOCK(&V_tcbinfo);
  420         CURVNET_RESTORE();
  421         return;
  422 
  423 dropit:
  424         TCPSTAT_INC(tcps_keepdrops);
  425         tp = tcp_drop(tp, ETIMEDOUT);
  426 
  427 #ifdef TCPDEBUG
  428         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  429                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  430                           PRU_SLOWTIMO);
  431 #endif
  432         if (tp != NULL)
  433                 INP_WUNLOCK(tp->t_inpcb);
  434         INP_INFO_RUNLOCK(&V_tcbinfo);
  435         CURVNET_RESTORE();
  436 }
  437 
  438 void
  439 tcp_timer_persist(void *xtp)
  440 {
  441         struct tcpcb *tp = xtp;
  442         struct inpcb *inp;
  443         CURVNET_SET(tp->t_vnet);
  444 #ifdef TCPDEBUG
  445         int ostate;
  446 
  447         ostate = tp->t_state;
  448 #endif
  449         INP_INFO_RLOCK(&V_tcbinfo);
  450         inp = tp->t_inpcb;
  451         KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
  452         INP_WLOCK(inp);
  453         if (callout_pending(&tp->t_timers->tt_persist) ||
  454             !callout_active(&tp->t_timers->tt_persist)) {
  455                 INP_WUNLOCK(inp);
  456                 INP_INFO_RUNLOCK(&V_tcbinfo);
  457                 CURVNET_RESTORE();
  458                 return;
  459         }
  460         callout_deactivate(&tp->t_timers->tt_persist);
  461         if ((inp->inp_flags & INP_DROPPED) != 0) {
  462                 INP_WUNLOCK(inp);
  463                 INP_INFO_RUNLOCK(&V_tcbinfo);
  464                 CURVNET_RESTORE();
  465                 return;
  466         }
  467         KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
  468                 ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
  469         KASSERT((tp->t_timers->tt_flags & TT_PERSIST) != 0,
  470                 ("%s: tp %p persist callout should be running", __func__, tp));
  471         /*
  472          * Persistance timer into zero window.
  473          * Force a byte to be output, if possible.
  474          */
  475         TCPSTAT_INC(tcps_persisttimeo);
  476         /*
  477          * Hack: if the peer is dead/unreachable, we do not
  478          * time out if the window is closed.  After a full
  479          * backoff, drop the connection if the idle time
  480          * (no responses to probes) reaches the maximum
  481          * backoff that we would use if retransmitting.
  482          */
  483         if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
  484             (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
  485              ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
  486                 TCPSTAT_INC(tcps_persistdrop);
  487                 tp = tcp_drop(tp, ETIMEDOUT);
  488                 goto out;
  489         }
  490         /*
  491          * If the user has closed the socket then drop a persisting
  492          * connection after a much reduced timeout.
  493          */
  494         if (tp->t_state > TCPS_CLOSE_WAIT &&
  495             (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
  496                 TCPSTAT_INC(tcps_persistdrop);
  497                 tp = tcp_drop(tp, ETIMEDOUT);
  498                 goto out;
  499         }
  500         tcp_setpersist(tp);
  501         tp->t_flags |= TF_FORCEDATA;
  502         (void) tcp_output(tp);
  503         tp->t_flags &= ~TF_FORCEDATA;
  504 
  505 out:
  506 #ifdef TCPDEBUG
  507         if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
  508                 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
  509 #endif
  510         if (tp != NULL)
  511                 INP_WUNLOCK(inp);
  512         INP_INFO_RUNLOCK(&V_tcbinfo);
  513         CURVNET_RESTORE();
  514 }
  515 
  516 void
  517 tcp_timer_rexmt(void * xtp)
  518 {
  519         struct tcpcb *tp = xtp;
  520         CURVNET_SET(tp->t_vnet);
  521         int rexmt;
  522         int headlocked;
  523         struct inpcb *inp;
  524 #ifdef TCPDEBUG
  525         int ostate;
  526 
  527         ostate = tp->t_state;
  528 #endif
  529 
  530         INP_INFO_RLOCK(&V_tcbinfo);
  531         inp = tp->t_inpcb;
  532         KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
  533         INP_WLOCK(inp);
  534         if (callout_pending(&tp->t_timers->tt_rexmt) ||
  535             !callout_active(&tp->t_timers->tt_rexmt)) {
  536                 INP_WUNLOCK(inp);
  537                 INP_INFO_RUNLOCK(&V_tcbinfo);
  538                 CURVNET_RESTORE();
  539                 return;
  540         }
  541         callout_deactivate(&tp->t_timers->tt_rexmt);
  542         if ((inp->inp_flags & INP_DROPPED) != 0) {
  543                 INP_WUNLOCK(inp);
  544                 INP_INFO_RUNLOCK(&V_tcbinfo);
  545                 CURVNET_RESTORE();
  546                 return;
  547         }
  548         KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
  549                 ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
  550         KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0,
  551                 ("%s: tp %p rexmt callout should be running", __func__, tp));
  552         tcp_free_sackholes(tp);
  553         /*
  554          * Retransmission timer went off.  Message has not
  555          * been acked within retransmit interval.  Back off
  556          * to a longer retransmit interval and retransmit one segment.
  557          */
  558         if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
  559                 tp->t_rxtshift = TCP_MAXRXTSHIFT;
  560                 TCPSTAT_INC(tcps_timeoutdrop);
  561 
  562                 tp = tcp_drop(tp, tp->t_softerror ?
  563                               tp->t_softerror : ETIMEDOUT);
  564                 headlocked = 1;
  565                 goto out;
  566         }
  567         INP_INFO_RUNLOCK(&V_tcbinfo);
  568         headlocked = 0;
  569         if (tp->t_state == TCPS_SYN_SENT) {
  570                 /*
  571                  * If the SYN was retransmitted, indicate CWND to be
  572                  * limited to 1 segment in cc_conn_init().
  573                  */
  574                 tp->snd_cwnd = 1;
  575         } else if (tp->t_rxtshift == 1) {
  576                 /*
  577                  * first retransmit; record ssthresh and cwnd so they can
  578                  * be recovered if this turns out to be a "bad" retransmit.
  579                  * A retransmit is considered "bad" if an ACK for this
  580                  * segment is received within RTT/2 interval; the assumption
  581                  * here is that the ACK was already in flight.  See
  582                  * "On Estimating End-to-End Network Path Properties" by
  583                  * Allman and Paxson for more details.
  584                  */
  585                 tp->snd_cwnd_prev = tp->snd_cwnd;
  586                 tp->snd_ssthresh_prev = tp->snd_ssthresh;
  587                 tp->snd_recover_prev = tp->snd_recover;
  588                 if (IN_FASTRECOVERY(tp->t_flags))
  589                         tp->t_flags |= TF_WASFRECOVERY;
  590                 else
  591                         tp->t_flags &= ~TF_WASFRECOVERY;
  592                 if (IN_CONGRECOVERY(tp->t_flags))
  593                         tp->t_flags |= TF_WASCRECOVERY;
  594                 else
  595                         tp->t_flags &= ~TF_WASCRECOVERY;
  596                 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
  597                 tp->t_flags |= TF_PREVVALID;
  598         } else
  599                 tp->t_flags &= ~TF_PREVVALID;
  600         TCPSTAT_INC(tcps_rexmttimeo);
  601         if ((tp->t_state == TCPS_SYN_SENT) ||
  602             (tp->t_state == TCPS_SYN_RECEIVED))
  603                 rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
  604         else
  605                 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
  606         TCPT_RANGESET(tp->t_rxtcur, rexmt,
  607                       tp->t_rttmin, TCPTV_REXMTMAX);
  608 
  609         /*
  610          * We enter the path for PLMTUD if connection is established or, if
  611          * connection is FIN_WAIT_1 status, reason for the last is that if
  612          * amount of data we send is very small, we could send it in couple of
  613          * packets and process straight to FIN. In that case we won't catch
  614          * ESTABLISHED state.
  615          */
  616         if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
  617             || (tp->t_state == TCPS_FIN_WAIT_1))) {
  618                 int optlen;
  619 #ifdef INET6
  620                 int isipv6;
  621 #endif
  622 
  623                 /*
  624                  * Idea here is that at each stage of mtu probe (usually, 1448
  625                  * -> 1188 -> 524) should be given 2 chances to recover before
  626                  *  further clamping down. 'tp->t_rxtshift % 2 == 0' should
  627                  *  take care of that.
  628                  */
  629                 if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
  630                     (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
  631                     (tp->t_rxtshift >= 2 && tp->t_rxtshift % 2 == 0)) {
  632                         /*
  633                          * Enter Path MTU Black-hole Detection mechanism:
  634                          * - Disable Path MTU Discovery (IP "DF" bit).
  635                          * - Reduce MTU to lower value than what we
  636                          *   negotiated with peer.
  637                          */
  638                         /* Record that we may have found a black hole. */
  639                         tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
  640 
  641                         /* Keep track of previous MSS. */
  642                         optlen = tp->t_maxopd - tp->t_maxseg;
  643                         tp->t_pmtud_saved_maxopd = tp->t_maxopd;
  644 
  645                         /* 
  646                          * Reduce the MSS to blackhole value or to the default
  647                          * in an attempt to retransmit.
  648                          */
  649 #ifdef INET6
  650                         isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
  651                         if (isipv6 &&
  652                             tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
  653                                 /* Use the sysctl tuneable blackhole MSS. */
  654                                 tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
  655                                 V_tcp_pmtud_blackhole_activated++;
  656                         } else if (isipv6) {
  657                                 /* Use the default MSS. */
  658                                 tp->t_maxopd = V_tcp_v6mssdflt;
  659                                 /*
  660                                  * Disable Path MTU Discovery when we switch to
  661                                  * minmss.
  662                                  */
  663                                 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
  664                                 V_tcp_pmtud_blackhole_activated_min_mss++;
  665                         }
  666 #endif
  667 #if defined(INET6) && defined(INET)
  668                         else
  669 #endif
  670 #ifdef INET
  671                         if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
  672                                 /* Use the sysctl tuneable blackhole MSS. */
  673                                 tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
  674                                 V_tcp_pmtud_blackhole_activated++;
  675                         } else {
  676                                 /* Use the default MSS. */
  677                                 tp->t_maxopd = V_tcp_mssdflt;
  678                                 /*
  679                                  * Disable Path MTU Discovery when we switch to
  680                                  * minmss.
  681                                  */
  682                                 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
  683                                 V_tcp_pmtud_blackhole_activated_min_mss++;
  684                         }
  685 #endif
  686                         tp->t_maxseg = tp->t_maxopd - optlen;
  687                         /*
  688                          * Reset the slow-start flight size
  689                          * as it may depend on the new MSS.
  690                          */
  691                         if (CC_ALGO(tp)->conn_init != NULL)
  692                                 CC_ALGO(tp)->conn_init(tp->ccv);
  693                 } else {
  694                         /*
  695                          * If further retransmissions are still unsuccessful
  696                          * with a lowered MTU, maybe this isn't a blackhole and
  697                          * we restore the previous MSS and blackhole detection
  698                          * flags.
  699                          * The limit '6' is determined by giving each probe
  700                          * stage (1448, 1188, 524) 2 chances to recover.
  701                          */
  702                         if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
  703                             (tp->t_rxtshift > 6)) {
  704                                 tp->t_flags2 |= TF2_PLPMTU_PMTUD;
  705                                 tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
  706                                 optlen = tp->t_maxopd - tp->t_maxseg;
  707                                 tp->t_maxopd = tp->t_pmtud_saved_maxopd;
  708                                 tp->t_maxseg = tp->t_maxopd - optlen;
  709                                 V_tcp_pmtud_blackhole_failed++;
  710                                 /*
  711                                  * Reset the slow-start flight size as it
  712                                  * may depend on the new MSS.
  713                                  */
  714                                 if (CC_ALGO(tp)->conn_init != NULL)
  715                                         CC_ALGO(tp)->conn_init(tp->ccv);
  716                         }
  717                 }
  718         }
  719 
  720         /*
  721          * Disable RFC1323 and SACK if we haven't got any response to
  722          * our third SYN to work-around some broken terminal servers
  723          * (most of which have hopefully been retired) that have bad VJ
  724          * header compression code which trashes TCP segments containing
  725          * unknown-to-them TCP options.
  726          */
  727         if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
  728             (tp->t_rxtshift == 3))
  729                 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
  730         /*
  731          * If we backed off this far, our srtt estimate is probably bogus.
  732          * Clobber it so we'll take the next rtt measurement as our srtt;
  733          * move the current srtt into rttvar to keep the current
  734          * retransmit times until then.
  735          */
  736         if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
  737 #ifdef INET6
  738                 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
  739                         in6_losing(tp->t_inpcb);
  740 #endif
  741                 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
  742                 tp->t_srtt = 0;
  743         }
  744         tp->snd_nxt = tp->snd_una;
  745         tp->snd_recover = tp->snd_max;
  746         /*
  747          * Force a segment to be sent.
  748          */
  749         tp->t_flags |= TF_ACKNOW;
  750         /*
  751          * If timing a segment in this window, stop the timer.
  752          */
  753         tp->t_rtttime = 0;
  754 
  755         cc_cong_signal(tp, NULL, CC_RTO);
  756 
  757         (void) tcp_output(tp);
  758 
  759 out:
  760 #ifdef TCPDEBUG
  761         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  762                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  763                           PRU_SLOWTIMO);
  764 #endif
  765         if (tp != NULL)
  766                 INP_WUNLOCK(inp);
  767         if (headlocked)
  768                 INP_INFO_RUNLOCK(&V_tcbinfo);
  769         CURVNET_RESTORE();
  770 }
  771 
  772 void
  773 tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
  774 {
  775         struct callout *t_callout;
  776         timeout_t *f_callout;
  777         struct inpcb *inp = tp->t_inpcb;
  778         int cpu = INP_CPU(inp);
  779         uint32_t f_reset;
  780 
  781 #ifdef TCP_OFFLOAD
  782         if (tp->t_flags & TF_TOE)
  783                 return;
  784 #endif
  785 
  786         if (tp->t_timers->tt_flags & TT_STOPPED)
  787                 return;
  788 
  789         switch (timer_type) {
  790                 case TT_DELACK:
  791                         t_callout = &tp->t_timers->tt_delack;
  792                         f_callout = tcp_timer_delack;
  793                         f_reset = TT_DELACK_RST;
  794                         break;
  795                 case TT_REXMT:
  796                         t_callout = &tp->t_timers->tt_rexmt;
  797                         f_callout = tcp_timer_rexmt;
  798                         f_reset = TT_REXMT_RST;
  799                         break;
  800                 case TT_PERSIST:
  801                         t_callout = &tp->t_timers->tt_persist;
  802                         f_callout = tcp_timer_persist;
  803                         f_reset = TT_PERSIST_RST;
  804                         break;
  805                 case TT_KEEP:
  806                         t_callout = &tp->t_timers->tt_keep;
  807                         f_callout = tcp_timer_keep;
  808                         f_reset = TT_KEEP_RST;
  809                         break;
  810                 case TT_2MSL:
  811                         t_callout = &tp->t_timers->tt_2msl;
  812                         f_callout = tcp_timer_2msl;
  813                         f_reset = TT_2MSL_RST;
  814                         break;
  815                 default:
  816                         panic("tp %p bad timer_type %#x", tp, timer_type);
  817                 }
  818         if (delta == 0) {
  819                 if ((tp->t_timers->tt_flags & timer_type) &&
  820                     callout_stop(t_callout) &&
  821                     (tp->t_timers->tt_flags & f_reset)) {
  822                         tp->t_timers->tt_flags &= ~(timer_type | f_reset);
  823                 }
  824         } else {
  825                 if ((tp->t_timers->tt_flags & timer_type) == 0) {
  826                         tp->t_timers->tt_flags |= (timer_type | f_reset);
  827                         callout_reset_on(t_callout, delta, f_callout, tp, cpu);
  828                 } else {
  829                         /* Reset already running callout on the same CPU. */
  830                         if (!callout_reset(t_callout, delta, f_callout, tp)) {
  831                                 /*
  832                                  * Callout not cancelled, consider it as not
  833                                  * properly restarted. */
  834                                 tp->t_timers->tt_flags &= ~f_reset;
  835                         }
  836                 }
  837         }
  838 }
  839 
  840 int
  841 tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
  842 {
  843         struct callout *t_callout;
  844 
  845         switch (timer_type) {
  846                 case TT_DELACK:
  847                         t_callout = &tp->t_timers->tt_delack;
  848                         break;
  849                 case TT_REXMT:
  850                         t_callout = &tp->t_timers->tt_rexmt;
  851                         break;
  852                 case TT_PERSIST:
  853                         t_callout = &tp->t_timers->tt_persist;
  854                         break;
  855                 case TT_KEEP:
  856                         t_callout = &tp->t_timers->tt_keep;
  857                         break;
  858                 case TT_2MSL:
  859                         t_callout = &tp->t_timers->tt_2msl;
  860                         break;
  861                 default:
  862                         panic("tp %p bad timer_type %#x", tp, timer_type);
  863                 }
  864         return callout_active(t_callout);
  865 }
  866 
  867 void
  868 tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
  869 {
  870         struct callout *t_callout;
  871         timeout_t *f_callout;
  872         uint32_t f_reset;
  873 
  874         tp->t_timers->tt_flags |= TT_STOPPED;
  875 
  876         switch (timer_type) {
  877                 case TT_DELACK:
  878                         t_callout = &tp->t_timers->tt_delack;
  879                         f_callout = tcp_timer_delack_discard;
  880                         f_reset = TT_DELACK_RST;
  881                         break;
  882                 case TT_REXMT:
  883                         t_callout = &tp->t_timers->tt_rexmt;
  884                         f_callout = tcp_timer_rexmt_discard;
  885                         f_reset = TT_REXMT_RST;
  886                         break;
  887                 case TT_PERSIST:
  888                         t_callout = &tp->t_timers->tt_persist;
  889                         f_callout = tcp_timer_persist_discard;
  890                         f_reset = TT_PERSIST_RST;
  891                         break;
  892                 case TT_KEEP:
  893                         t_callout = &tp->t_timers->tt_keep;
  894                         f_callout = tcp_timer_keep_discard;
  895                         f_reset = TT_KEEP_RST;
  896                         break;
  897                 case TT_2MSL:
  898                         t_callout = &tp->t_timers->tt_2msl;
  899                         f_callout = tcp_timer_2msl_discard;
  900                         f_reset = TT_2MSL_RST;
  901                         break;
  902                 default:
  903                         panic("tp %p bad timer_type %#x", tp, timer_type);
  904                 }
  905 
  906         if (tp->t_timers->tt_flags & timer_type) {
  907                 if (callout_stop(t_callout) &&
  908                     (tp->t_timers->tt_flags & f_reset)) {
  909                         tp->t_timers->tt_flags &= ~(timer_type | f_reset);
  910                 } else {
  911                         /*
  912                          * Can't stop the callout, defer tcpcb actual deletion
  913                          * to the last tcp timer discard callout.
  914                          * The TT_STOPPED flag will ensure that no tcp timer
  915                          * callouts can be restarted on our behalf, and
  916                          * past this point currently running callouts waiting
  917                          * on inp lock will return right away after the
  918                          * classical check for callout reset/stop events:
  919                          * callout_pending() || !callout_active()
  920                          */
  921                         callout_reset(t_callout, 1, f_callout, tp);
  922                 }
  923         }
  924 }
  925 
  926 #define ticks_to_msecs(t)       (1000*(t) / hz)
  927 
  928 void
  929 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
  930     struct xtcp_timer *xtimer)
  931 {
  932         sbintime_t now;
  933 
  934         bzero(xtimer, sizeof(*xtimer));
  935         if (timer == NULL)
  936                 return;
  937         now = getsbinuptime();
  938         if (callout_active(&timer->tt_delack))
  939                 xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
  940         if (callout_active(&timer->tt_rexmt))
  941                 xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
  942         if (callout_active(&timer->tt_persist))
  943                 xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
  944         if (callout_active(&timer->tt_keep))
  945                 xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
  946         if (callout_active(&timer->tt_2msl))
  947                 xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
  948         xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
  949 }

Cache object: 3b1d1105eb3988d47a7332464329cc5c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.