The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_timer.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
   34  * $FreeBSD$
   35  */
   36 
   37 #include "opt_compat.h"
   38 #include "opt_inet6.h"
   39 #include "opt_tcpdebug.h"
   40 
   41 #include <sys/param.h>
   42 #include <sys/systm.h>
   43 #include <sys/kernel.h>
   44 #include <sys/mbuf.h>
   45 #include <sys/sysctl.h>
   46 #include <sys/socket.h>
   47 #include <sys/socketvar.h>
   48 #include <sys/protosw.h>
   49 
   50 #include <machine/cpu.h>        /* before tcp_seq.h, for tcp_random18() */
   51 
   52 #include <net/route.h>
   53 
   54 #include <netinet/in.h>
   55 #include <netinet/in_systm.h>
   56 #include <netinet/in_pcb.h>
   57 #ifdef INET6
   58 #include <netinet6/in6_pcb.h>
   59 #endif
   60 #include <netinet/ip_var.h>
   61 #include <netinet/tcp.h>
   62 #include <netinet/tcp_fsm.h>
   63 #include <netinet/tcp_seq.h>
   64 #include <netinet/tcp_timer.h>
   65 #include <netinet/tcp_var.h>
   66 #include <netinet/tcpip.h>
   67 #ifdef TCPDEBUG
   68 #include <netinet/tcp_debug.h>
   69 #endif
   70 
   71 static int
   72 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
   73 {
   74         int error, s, tt;
   75 
   76         tt = *(int *)oidp->oid_arg1;
   77         s = (int)((int64_t)tt * 1000 / hz);
   78 
   79         error = sysctl_handle_int(oidp, &s, 0, req);
   80         if (error || !req->newptr)
   81                 return (error);
   82 
   83         tt = (int)((int64_t)s * hz / 1000);
   84         if (tt < 1)
   85                 return (EINVAL);
   86 
   87         *(int *)oidp->oid_arg1 = tt;
   88         return (0);
   89 }
   90 
   91 int     tcp_keepinit;
   92 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
   93     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "");
   94 
   95 int     tcp_keepidle;
   96 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
   97     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "");
   98 
   99 int     tcp_keepintvl;
  100 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
  101     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "");
  102 
  103 int     tcp_delacktime;
  104 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime,
  105     CTLTYPE_INT|CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
  106     "Time before a delayed ACK is sent");
  107  
  108 int     tcp_msl;
  109 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
  110     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
  111 
  112 int     tcp_rexmit_min;
  113 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
  114     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", "Minimum Retransmission Timeout");
  115 
  116 int     tcp_rexmit_slop;
  117 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
  118     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", "Retransmission Timer Slop");
  119 
  120 static int      always_keepalive = 0;
  121 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 
  122     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
  123 
  124 static int      tcp_keepcnt = TCPTV_KEEPCNT;
  125         /* max idle probes */
  126 int     tcp_maxpersistidle;
  127         /* max idle time in persist */
  128 int     tcp_maxidle;
  129 
  130 /*
  131  * Tcp protocol timeout routine called every 500 ms.
  132  * Updates timestamps used for TCP
  133  * causes finite state machine actions if timers expire.
  134  */
  135 void
  136 tcp_slowtimo()
  137 {
  138         int s;
  139 
  140         s = splnet();
  141 
  142         tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
  143 
  144         splx(s);
  145 }
  146 
  147 /*
  148  * Cancel all timers for TCP tp.
  149  */
  150 void
  151 tcp_canceltimers(tp)
  152         struct tcpcb *tp;
  153 {
  154         callout_stop(tp->tt_2msl);
  155         callout_stop(tp->tt_persist);
  156         callout_stop(tp->tt_keep);
  157         callout_stop(tp->tt_rexmt);
  158 }
  159 
  160 int     tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
  161     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
  162 
  163 int     tcp_backoff[TCP_MAXRXTSHIFT + 1] =
  164     { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
  165 
  166 static int tcp_totbackoff = 511;        /* sum of tcp_backoff[] */
  167 
  168 /*
  169  * TCP timer processing.
  170  */
  171 void
  172 tcp_timer_delack(xtp)
  173         void *xtp;
  174 {
  175         struct tcpcb *tp = xtp;
  176         int s;
  177 
  178         s = splnet();
  179         if (callout_pending(tp->tt_delack) || !callout_active(tp->tt_delack)) {
  180                 splx(s);
  181                 return;
  182         }
  183         callout_deactivate(tp->tt_delack);
  184 
  185         tp->t_flags |= TF_ACKNOW;
  186         tcpstat.tcps_delack++;
  187         (void) tcp_output(tp);
  188         splx(s);
  189 }
  190 
  191 void
  192 tcp_timer_2msl(xtp)
  193         void *xtp;
  194 {
  195         struct tcpcb *tp = xtp;
  196         int s;
  197 #ifdef TCPDEBUG
  198         int ostate;
  199 
  200         ostate = tp->t_state;
  201 #endif
  202         s = splnet();
  203         if (callout_pending(tp->tt_2msl) || !callout_active(tp->tt_2msl)) {
  204                 splx(s);
  205                 return;
  206         }
  207         callout_deactivate(tp->tt_2msl);
  208         /*
  209          * 2 MSL timeout in shutdown went off.  If we're closed but
  210          * still waiting for peer to close and connection has been idle
  211          * too long, or if 2MSL time is up from TIME_WAIT, delete connection
  212          * control block.  Otherwise, check again in a bit.
  213          */
  214         if (tp->t_state != TCPS_TIME_WAIT &&
  215             (ticks - tp->t_rcvtime) <= tcp_maxidle)
  216                 callout_reset(tp->tt_2msl, tcp_keepintvl,
  217                               tcp_timer_2msl, tp);
  218         else
  219                 tp = tcp_close(tp);
  220 
  221 #ifdef TCPDEBUG
  222         if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  223                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  224                           PRU_SLOWTIMO);
  225 #endif
  226         splx(s);
  227 }
  228 
  229 void
  230 tcp_timer_keep(xtp)
  231         void *xtp;
  232 {
  233         struct tcpcb *tp = xtp;
  234         struct tcptemp *t_template;
  235         int s;
  236 #ifdef TCPDEBUG
  237         int ostate;
  238 
  239         ostate = tp->t_state;
  240 #endif
  241         s = splnet();
  242         if (callout_pending(tp->tt_keep) || !callout_active(tp->tt_keep)) {
  243                 splx(s);
  244                 return;
  245         }
  246         callout_deactivate(tp->tt_keep);
  247         /*
  248          * Keep-alive timer went off; send something
  249          * or drop connection if idle for too long.
  250          */
  251         tcpstat.tcps_keeptimeo++;
  252         if (tp->t_state < TCPS_ESTABLISHED)
  253                 goto dropit;
  254         if ((always_keepalive ||
  255              tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) &&
  256             tp->t_state <= TCPS_CLOSING) {
  257                 if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle)
  258                         goto dropit;
  259                 /*
  260                  * Send a packet designed to force a response
  261                  * if the peer is up and reachable:
  262                  * either an ACK if the connection is still alive,
  263                  * or an RST if the peer has closed the connection
  264                  * due to timeout or reboot.
  265                  * Using sequence number tp->snd_una-1
  266                  * causes the transmitted zero-length segment
  267                  * to lie outside the receive window;
  268                  * by the protocol spec, this requires the
  269                  * correspondent TCP to respond.
  270                  */
  271                 tcpstat.tcps_keepprobe++;
  272                 t_template = tcp_maketemplate(tp);
  273                 if (t_template) {
  274                         tcp_respond(tp, t_template->tt_ipgen,
  275                                     &t_template->tt_t, (struct mbuf *)NULL,
  276                                     tp->rcv_nxt, tp->snd_una - 1, 0);
  277                         (void) m_free(dtom(t_template));
  278                 }
  279                 callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
  280         } else
  281                 callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
  282 
  283 #ifdef TCPDEBUG
  284         if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
  285                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  286                           PRU_SLOWTIMO);
  287 #endif
  288         splx(s);
  289         return;
  290 
  291 dropit:
  292         tcpstat.tcps_keepdrops++;
  293         tp = tcp_drop(tp, ETIMEDOUT);
  294 
  295 #ifdef TCPDEBUG
  296         if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  297                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  298                           PRU_SLOWTIMO);
  299 #endif
  300         splx(s);
  301 }
  302 
  303 void
  304 tcp_timer_persist(xtp)
  305         void *xtp;
  306 {
  307         struct tcpcb *tp = xtp;
  308         int s;
  309 #ifdef TCPDEBUG
  310         int ostate;
  311 
  312         ostate = tp->t_state;
  313 #endif
  314         s = splnet();
  315         if (callout_pending(tp->tt_persist) || !callout_active(tp->tt_persist)){
  316                 splx(s);
  317                 return;
  318         }
  319         callout_deactivate(tp->tt_persist);
  320         /*
  321          * Persistance timer into zero window.
  322          * Force a byte to be output, if possible.
  323          */
  324         tcpstat.tcps_persisttimeo++;
  325         /*
  326          * Hack: if the peer is dead/unreachable, we do not
  327          * time out if the window is closed.  After a full
  328          * backoff, drop the connection if the idle time
  329          * (no responses to probes) reaches the maximum
  330          * backoff that we would use if retransmitting.
  331          */
  332         if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
  333             ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle ||
  334              (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
  335                 tcpstat.tcps_persistdrop++;
  336                 tp = tcp_drop(tp, ETIMEDOUT);
  337                 goto out;
  338         }
  339         tcp_setpersist(tp);
  340         tp->t_force = 1;
  341         (void) tcp_output(tp);
  342         tp->t_force = 0;
  343 
  344 out:
  345 #ifdef TCPDEBUG
  346         if (tp && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
  347                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  348                           PRU_SLOWTIMO);
  349 #endif
  350         splx(s);
  351 }
  352 
  353 void
  354 tcp_timer_rexmt(xtp)
  355         void *xtp;
  356 {
  357         struct tcpcb *tp = xtp;
  358         int s;
  359         int rexmt;
  360 #ifdef TCPDEBUG
  361         int ostate;
  362 
  363         ostate = tp->t_state;
  364 #endif
  365         s = splnet();
  366         if (callout_pending(tp->tt_rexmt) || !callout_active(tp->tt_rexmt)) {
  367                 splx(s);
  368                 return;
  369         }
  370         callout_deactivate(tp->tt_rexmt);
  371         /*
  372          * Retransmission timer went off.  Message has not
  373          * been acked within retransmit interval.  Back off
  374          * to a longer retransmit interval and retransmit one segment.
  375          */
  376         if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
  377                 tp->t_rxtshift = TCP_MAXRXTSHIFT;
  378                 tcpstat.tcps_timeoutdrop++;
  379                 tp = tcp_drop(tp, tp->t_softerror ?
  380                               tp->t_softerror : ETIMEDOUT);
  381                 goto out;
  382         }
  383         if (tp->t_rxtshift == 1) {
  384                 /*
  385                  * first retransmit; record ssthresh and cwnd so they can
  386                  * be recovered if this turns out to be a "bad" retransmit.
  387                  * A retransmit is considered "bad" if an ACK for this 
  388                  * segment is received within RTT/2 interval; the assumption
  389                  * here is that the ACK was already in flight.  See 
  390                  * "On Estimating End-to-End Network Path Properties" by
  391                  * Allman and Paxson for more details.
  392                  */
  393                 tp->snd_cwnd_prev = tp->snd_cwnd;
  394                 tp->snd_ssthresh_prev = tp->snd_ssthresh;
  395                 tp->snd_high_prev = tp->snd_high;
  396                 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
  397         }
  398         tcpstat.tcps_rexmttimeo++;
  399         if (tp->t_state == TCPS_SYN_SENT)
  400                 rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
  401         else
  402                 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
  403         TCPT_RANGESET(tp->t_rxtcur, rexmt,
  404                       tp->t_rttmin, TCPTV_REXMTMAX);
  405         /*
  406          * Disable rfc1323 and rfc1644 if we havn't got any response to
  407          * our third SYN to work-around some broken terminal servers 
  408          * (most of which have hopefully been retired) that have bad VJ 
  409          * header compression code which trashes TCP segments containing 
  410          * unknown-to-them TCP options.
  411          */
  412         if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3))
  413                 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC);
  414         /*
  415          * If losing, let the lower level know and try for
  416          * a better route.  Also, if we backed off this far,
  417          * our srtt estimate is probably bogus.  Clobber it
  418          * so we'll take the next rtt measurement as our srtt;
  419          * move the current srtt into rttvar to keep the current
  420          * retransmit times until then.
  421          */
  422         if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
  423 #ifdef INET6
  424                 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
  425                         in6_losing(tp->t_inpcb);
  426                 else
  427 #endif
  428                 in_losing(tp->t_inpcb);
  429                 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
  430                 tp->t_srtt = 0;
  431         }
  432         tp->snd_nxt = tp->snd_una;
  433         tp->snd_high = tp->snd_max;
  434         /*
  435          * Force a segment to be sent.
  436          */
  437         tp->t_flags |= TF_ACKNOW;
  438         /*
  439          * If timing a segment in this window, stop the timer.
  440          */
  441         tp->t_rtttime = 0;
  442         /*
  443          * Close the congestion window down to one segment
  444          * (we'll open it by one segment for each ack we get).
  445          * Since we probably have a window's worth of unacked
  446          * data accumulated, this "slow start" keeps us from
  447          * dumping all that data as back-to-back packets (which
  448          * might overwhelm an intermediate gateway).
  449          *
  450          * There are two phases to the opening: Initially we
  451          * open by one mss on each ack.  This makes the window
  452          * size increase exponentially with time.  If the
  453          * window is larger than the path can handle, this
  454          * exponential growth results in dropped packet(s)
  455          * almost immediately.  To get more time between
  456          * drops but still "push" the network to take advantage
  457          * of improving conditions, we switch from exponential
  458          * to linear window opening at some threshhold size.
  459          * For a threshhold, we use half the current window
  460          * size, truncated to a multiple of the mss.
  461          *
  462          * (the minimum cwnd that will give us exponential
  463          * growth is 2 mss.  We don't allow the threshhold
  464          * to go below this.)
  465          */
  466         {
  467                 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
  468                 if (win < 2)
  469                         win = 2;
  470                 tp->snd_cwnd = tp->t_maxseg;
  471                 tp->snd_ssthresh = win * tp->t_maxseg;
  472                 tp->t_dupacks = 0;
  473         }
  474         (void) tcp_output(tp);
  475 
  476 out:
  477 #ifdef TCPDEBUG
  478         if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  479                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  480                           PRU_SLOWTIMO);
  481 #endif
  482         splx(s);
  483 }

Cache object: 0330e3c9f7f07b43afba20278df2139d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.