The Design and Implementation of the FreeBSD Operating System, Second Edition
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_timer.c

Version: -  FREEBSD  -  FREEBSD10  -  FREEBSD9  -  FREEBSD92  -  FREEBSD91  -  FREEBSD90  -  FREEBSD8  -  FREEBSD82  -  FREEBSD81  -  FREEBSD80  -  FREEBSD7  -  FREEBSD74  -  FREEBSD73  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  cheribsd  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1  -  FREEBSD-LIBC  -  FREEBSD8-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: head/sys/netinet/tcp_timer.c 247777 2013-03-04 11:09:56Z davide $");
   34 
   35 #include "opt_inet.h"
   36 #include "opt_inet6.h"
   37 #include "opt_tcpdebug.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/kernel.h>
   41 #include <sys/lock.h>
   42 #include <sys/mbuf.h>
   43 #include <sys/mutex.h>
   44 #include <sys/protosw.h>
   45 #include <sys/smp.h>
   46 #include <sys/socket.h>
   47 #include <sys/socketvar.h>
   48 #include <sys/sysctl.h>
   49 #include <sys/systm.h>
   50 
   51 #include <net/if.h>
   52 #include <net/route.h>
   53 #include <net/vnet.h>
   54 
   55 #include <netinet/cc.h>
   56 #include <netinet/in.h>
   57 #include <netinet/in_pcb.h>
   58 #include <netinet/in_systm.h>
   59 #ifdef INET6
   60 #include <netinet6/in6_pcb.h>
   61 #endif
   62 #include <netinet/ip_var.h>
   63 #include <netinet/tcp_fsm.h>
   64 #include <netinet/tcp_timer.h>
   65 #include <netinet/tcp_var.h>
   66 #include <netinet/tcpip.h>
   67 #ifdef TCPDEBUG
   68 #include <netinet/tcp_debug.h>
   69 #endif
   70 
   71 int     tcp_keepinit;
   72 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
   73     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
   74 
   75 int     tcp_keepidle;
   76 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
   77     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
   78 
   79 int     tcp_keepintvl;
   80 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
   81     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
   82 
   83 int     tcp_delacktime;
   84 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
   85     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
   86     "Time before a delayed ACK is sent");
   87 
   88 int     tcp_msl;
   89 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
   90     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
   91 
   92 int     tcp_rexmit_min;
   93 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
   94     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
   95     "Minimum Retransmission Timeout");
   96 
   97 int     tcp_rexmit_slop;
   98 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
   99     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
  100     "Retransmission Timer Slop");
  101 
  102 static int      always_keepalive = 1;
  103 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
  104     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
  105 
  106 int    tcp_fast_finwait2_recycle = 0;
  107 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 
  108     &tcp_fast_finwait2_recycle, 0,
  109     "Recycle closed FIN_WAIT_2 connections faster");
  110 
  111 int    tcp_finwait2_timeout;
  112 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
  113     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
  114 
  115 int     tcp_keepcnt = TCPTV_KEEPCNT;
  116 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
  117     "Number of keepalive probes to send");
  118 
  119         /* max idle probes */
  120 int     tcp_maxpersistidle;
  121 
  122 static int      tcp_rexmit_drop_options = 0;
  123 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
  124     &tcp_rexmit_drop_options, 0,
  125     "Drop TCP options from 3rd and later retransmitted SYN");
  126 
  127 static int      per_cpu_timers = 0;
  128 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
  129     &per_cpu_timers , 0, "run tcp timers on all cpus");
  130 
  131 #define INP_CPU(inp)    (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
  132                 ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
  133 
  134 /*
  135  * Tcp protocol timeout routine called every 500 ms.
  136  * Updates timestamps used for TCP
  137  * causes finite state machine actions if timers expire.
  138  */
  139 void
  140 tcp_slowtimo(void)
  141 {
  142         VNET_ITERATOR_DECL(vnet_iter);
  143 
  144         VNET_LIST_RLOCK_NOSLEEP();
  145         VNET_FOREACH(vnet_iter) {
  146                 CURVNET_SET(vnet_iter);
  147                 INP_INFO_WLOCK(&V_tcbinfo);
  148                 (void) tcp_tw_2msl_scan(0);
  149                 INP_INFO_WUNLOCK(&V_tcbinfo);
  150                 CURVNET_RESTORE();
  151         }
  152         VNET_LIST_RUNLOCK_NOSLEEP();
  153 }
  154 
  155 int     tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
  156     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
  157 
  158 int     tcp_backoff[TCP_MAXRXTSHIFT + 1] =
  159     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
  160 
  161 static int tcp_totbackoff = 2559;       /* sum of tcp_backoff[] */
  162 
  163 static int tcp_timer_race;
  164 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
  165     0, "Count of t_inpcb races on tcp_discardcb");
  166 
  167 /*
  168  * TCP timer processing.
  169  */
  170 
  171 void
  172 tcp_timer_delack(void *xtp)
  173 {
  174         struct tcpcb *tp = xtp;
  175         struct inpcb *inp;
  176         CURVNET_SET(tp->t_vnet);
  177 
  178         inp = tp->t_inpcb;
  179         /*
  180          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
  181          * tear-down mean we need it as a work-around for races between
  182          * timers and tcp_discardcb().
  183          *
  184          * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
  185          */
  186         if (inp == NULL) {
  187                 tcp_timer_race++;
  188                 CURVNET_RESTORE();
  189                 return;
  190         }
  191         INP_WLOCK(inp);
  192         if (callout_pending(&tp->t_timers->tt_delack) ||
  193             !callout_active(&tp->t_timers->tt_delack)) {
  194                 INP_WUNLOCK(inp);
  195                 CURVNET_RESTORE();
  196                 return;
  197         }
  198         callout_deactivate(&tp->t_timers->tt_delack);
  199         if ((inp->inp_flags & INP_DROPPED) != 0) {
  200                 INP_WUNLOCK(inp);
  201                 CURVNET_RESTORE();
  202                 return;
  203         }
  204 
  205         tp->t_flags |= TF_ACKNOW;
  206         TCPSTAT_INC(tcps_delack);
  207         (void) tcp_output(tp);
  208         INP_WUNLOCK(inp);
  209         CURVNET_RESTORE();
  210 }
  211 
  212 void
  213 tcp_timer_2msl(void *xtp)
  214 {
  215         struct tcpcb *tp = xtp;
  216         struct inpcb *inp;
  217         CURVNET_SET(tp->t_vnet);
  218 #ifdef TCPDEBUG
  219         int ostate;
  220 
  221         ostate = tp->t_state;
  222 #endif
  223         /*
  224          * XXXRW: Does this actually happen?
  225          */
  226         INP_INFO_WLOCK(&V_tcbinfo);
  227         inp = tp->t_inpcb;
  228         /*
  229          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
  230          * tear-down mean we need it as a work-around for races between
  231          * timers and tcp_discardcb().
  232          *
  233          * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
  234          */
  235         if (inp == NULL) {
  236                 tcp_timer_race++;
  237                 INP_INFO_WUNLOCK(&V_tcbinfo);
  238                 CURVNET_RESTORE();
  239                 return;
  240         }
  241         INP_WLOCK(inp);
  242         tcp_free_sackholes(tp);
  243         if (callout_pending(&tp->t_timers->tt_2msl) ||
  244             !callout_active(&tp->t_timers->tt_2msl)) {
  245                 INP_WUNLOCK(tp->t_inpcb);
  246                 INP_INFO_WUNLOCK(&V_tcbinfo);
  247                 CURVNET_RESTORE();
  248                 return;
  249         }
  250         callout_deactivate(&tp->t_timers->tt_2msl);
  251         if ((inp->inp_flags & INP_DROPPED) != 0) {
  252                 INP_WUNLOCK(inp);
  253                 INP_INFO_WUNLOCK(&V_tcbinfo);
  254                 CURVNET_RESTORE();
  255                 return;
  256         }
  257         /*
  258          * 2 MSL timeout in shutdown went off.  If we're closed but
  259          * still waiting for peer to close and connection has been idle
  260          * too long, or if 2MSL time is up from TIME_WAIT, delete connection
  261          * control block.  Otherwise, check again in a bit.
  262          *
  263          * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
  264          * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
  265          * Ignore fact that there were recent incoming segments.
  266          */
  267         if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
  268             tp->t_inpcb && tp->t_inpcb->inp_socket && 
  269             (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
  270                 TCPSTAT_INC(tcps_finwait2_drops);
  271                 tp = tcp_close(tp);             
  272         } else {
  273                 if (tp->t_state != TCPS_TIME_WAIT &&
  274                    ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
  275                        callout_reset_on(&tp->t_timers->tt_2msl,
  276                            TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp));
  277                else
  278                        tp = tcp_close(tp);
  279        }
  280 
  281 #ifdef TCPDEBUG
  282         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  283                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  284                           PRU_SLOWTIMO);
  285 #endif
  286         if (tp != NULL)
  287                 INP_WUNLOCK(inp);
  288         INP_INFO_WUNLOCK(&V_tcbinfo);
  289         CURVNET_RESTORE();
  290 }
  291 
  292 void
  293 tcp_timer_keep(void *xtp)
  294 {
  295         struct tcpcb *tp = xtp;
  296         struct tcptemp *t_template;
  297         struct inpcb *inp;
  298         CURVNET_SET(tp->t_vnet);
  299 #ifdef TCPDEBUG
  300         int ostate;
  301 
  302         ostate = tp->t_state;
  303 #endif
  304         INP_INFO_WLOCK(&V_tcbinfo);
  305         inp = tp->t_inpcb;
  306         /*
  307          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
  308          * tear-down mean we need it as a work-around for races between
  309          * timers and tcp_discardcb().
  310          *
  311          * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
  312          */
  313         if (inp == NULL) {
  314                 tcp_timer_race++;
  315                 INP_INFO_WUNLOCK(&V_tcbinfo);
  316                 CURVNET_RESTORE();
  317                 return;
  318         }
  319         INP_WLOCK(inp);
  320         if (callout_pending(&tp->t_timers->tt_keep) ||
  321             !callout_active(&tp->t_timers->tt_keep)) {
  322                 INP_WUNLOCK(inp);
  323                 INP_INFO_WUNLOCK(&V_tcbinfo);
  324                 CURVNET_RESTORE();
  325                 return;
  326         }
  327         callout_deactivate(&tp->t_timers->tt_keep);
  328         if ((inp->inp_flags & INP_DROPPED) != 0) {
  329                 INP_WUNLOCK(inp);
  330                 INP_INFO_WUNLOCK(&V_tcbinfo);
  331                 CURVNET_RESTORE();
  332                 return;
  333         }
  334         /*
  335          * Keep-alive timer went off; send something
  336          * or drop connection if idle for too long.
  337          */
  338         TCPSTAT_INC(tcps_keeptimeo);
  339         if (tp->t_state < TCPS_ESTABLISHED)
  340                 goto dropit;
  341         if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
  342             tp->t_state <= TCPS_CLOSING) {
  343                 if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
  344                         goto dropit;
  345                 /*
  346                  * Send a packet designed to force a response
  347                  * if the peer is up and reachable:
  348                  * either an ACK if the connection is still alive,
  349                  * or an RST if the peer has closed the connection
  350                  * due to timeout or reboot.
  351                  * Using sequence number tp->snd_una-1
  352                  * causes the transmitted zero-length segment
  353                  * to lie outside the receive window;
  354                  * by the protocol spec, this requires the
  355                  * correspondent TCP to respond.
  356                  */
  357                 TCPSTAT_INC(tcps_keepprobe);
  358                 t_template = tcpip_maketemplate(inp);
  359                 if (t_template) {
  360                         tcp_respond(tp, t_template->tt_ipgen,
  361                                     &t_template->tt_t, (struct mbuf *)NULL,
  362                                     tp->rcv_nxt, tp->snd_una - 1, 0);
  363                         free(t_template, M_TEMP);
  364                 }
  365                 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
  366                     tcp_timer_keep, tp, INP_CPU(inp));
  367         } else
  368                 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
  369                     tcp_timer_keep, tp, INP_CPU(inp));
  370 
  371 #ifdef TCPDEBUG
  372         if (inp->inp_socket->so_options & SO_DEBUG)
  373                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  374                           PRU_SLOWTIMO);
  375 #endif
  376         INP_WUNLOCK(inp);
  377         INP_INFO_WUNLOCK(&V_tcbinfo);
  378         CURVNET_RESTORE();
  379         return;
  380 
  381 dropit:
  382         TCPSTAT_INC(tcps_keepdrops);
  383         tp = tcp_drop(tp, ETIMEDOUT);
  384 
  385 #ifdef TCPDEBUG
  386         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  387                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  388                           PRU_SLOWTIMO);
  389 #endif
  390         if (tp != NULL)
  391                 INP_WUNLOCK(tp->t_inpcb);
  392         INP_INFO_WUNLOCK(&V_tcbinfo);
  393         CURVNET_RESTORE();
  394 }
  395 
  396 void
  397 tcp_timer_persist(void *xtp)
  398 {
  399         struct tcpcb *tp = xtp;
  400         struct inpcb *inp;
  401         CURVNET_SET(tp->t_vnet);
  402 #ifdef TCPDEBUG
  403         int ostate;
  404 
  405         ostate = tp->t_state;
  406 #endif
  407         INP_INFO_WLOCK(&V_tcbinfo);
  408         inp = tp->t_inpcb;
  409         /*
  410          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
  411          * tear-down mean we need it as a work-around for races between
  412          * timers and tcp_discardcb().
  413          *
  414          * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
  415          */
  416         if (inp == NULL) {
  417                 tcp_timer_race++;
  418                 INP_INFO_WUNLOCK(&V_tcbinfo);
  419                 CURVNET_RESTORE();
  420                 return;
  421         }
  422         INP_WLOCK(inp);
  423         if (callout_pending(&tp->t_timers->tt_persist) ||
  424             !callout_active(&tp->t_timers->tt_persist)) {
  425                 INP_WUNLOCK(inp);
  426                 INP_INFO_WUNLOCK(&V_tcbinfo);
  427                 CURVNET_RESTORE();
  428                 return;
  429         }
  430         callout_deactivate(&tp->t_timers->tt_persist);
  431         if ((inp->inp_flags & INP_DROPPED) != 0) {
  432                 INP_WUNLOCK(inp);
  433                 INP_INFO_WUNLOCK(&V_tcbinfo);
  434                 CURVNET_RESTORE();
  435                 return;
  436         }
  437         /*
  438          * Persistance timer into zero window.
  439          * Force a byte to be output, if possible.
  440          */
  441         TCPSTAT_INC(tcps_persisttimeo);
  442         /*
  443          * Hack: if the peer is dead/unreachable, we do not
  444          * time out if the window is closed.  After a full
  445          * backoff, drop the connection if the idle time
  446          * (no responses to probes) reaches the maximum
  447          * backoff that we would use if retransmitting.
  448          */
  449         if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
  450             (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
  451              ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
  452                 TCPSTAT_INC(tcps_persistdrop);
  453                 tp = tcp_drop(tp, ETIMEDOUT);
  454                 goto out;
  455         }
  456         /*
  457          * If the user has closed the socket then drop a persisting
  458          * connection after a much reduced timeout.
  459          */
  460         if (tp->t_state > TCPS_CLOSE_WAIT &&
  461             (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
  462                 TCPSTAT_INC(tcps_persistdrop);
  463                 tp = tcp_drop(tp, ETIMEDOUT);
  464                 goto out;
  465         }
  466         tcp_setpersist(tp);
  467         tp->t_flags |= TF_FORCEDATA;
  468         (void) tcp_output(tp);
  469         tp->t_flags &= ~TF_FORCEDATA;
  470 
  471 out:
  472 #ifdef TCPDEBUG
  473         if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
  474                 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
  475 #endif
  476         if (tp != NULL)
  477                 INP_WUNLOCK(inp);
  478         INP_INFO_WUNLOCK(&V_tcbinfo);
  479         CURVNET_RESTORE();
  480 }
  481 
  482 void
  483 tcp_timer_rexmt(void * xtp)
  484 {
  485         struct tcpcb *tp = xtp;
  486         CURVNET_SET(tp->t_vnet);
  487         int rexmt;
  488         int headlocked;
  489         struct inpcb *inp;
  490 #ifdef TCPDEBUG
  491         int ostate;
  492 
  493         ostate = tp->t_state;
  494 #endif
  495         INP_INFO_RLOCK(&V_tcbinfo);
  496         inp = tp->t_inpcb;
  497         /*
  498          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
  499          * tear-down mean we need it as a work-around for races between
  500          * timers and tcp_discardcb().
  501          *
  502          * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
  503          */
  504         if (inp == NULL) {
  505                 tcp_timer_race++;
  506                 INP_INFO_RUNLOCK(&V_tcbinfo);
  507                 CURVNET_RESTORE();
  508                 return;
  509         }
  510         INP_WLOCK(inp);
  511         if (callout_pending(&tp->t_timers->tt_rexmt) ||
  512             !callout_active(&tp->t_timers->tt_rexmt)) {
  513                 INP_WUNLOCK(inp);
  514                 INP_INFO_RUNLOCK(&V_tcbinfo);
  515                 CURVNET_RESTORE();
  516                 return;
  517         }
  518         callout_deactivate(&tp->t_timers->tt_rexmt);
  519         if ((inp->inp_flags & INP_DROPPED) != 0) {
  520                 INP_WUNLOCK(inp);
  521                 INP_INFO_RUNLOCK(&V_tcbinfo);
  522                 CURVNET_RESTORE();
  523                 return;
  524         }
  525         tcp_free_sackholes(tp);
  526         /*
  527          * Retransmission timer went off.  Message has not
  528          * been acked within retransmit interval.  Back off
  529          * to a longer retransmit interval and retransmit one segment.
  530          */
  531         if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
  532                 tp->t_rxtshift = TCP_MAXRXTSHIFT;
  533                 TCPSTAT_INC(tcps_timeoutdrop);
  534                 in_pcbref(inp);
  535                 INP_INFO_RUNLOCK(&V_tcbinfo);
  536                 INP_WUNLOCK(inp);
  537                 INP_INFO_WLOCK(&V_tcbinfo);
  538                 INP_WLOCK(inp);
  539                 if (in_pcbrele_wlocked(inp)) {
  540                         INP_INFO_WUNLOCK(&V_tcbinfo);
  541                         CURVNET_RESTORE();
  542                         return;
  543                 }
  544                 if (inp->inp_flags & INP_DROPPED) {
  545                         INP_WUNLOCK(inp);
  546                         INP_INFO_WUNLOCK(&V_tcbinfo);
  547                         CURVNET_RESTORE();
  548                         return;
  549                 }
  550 
  551                 tp = tcp_drop(tp, tp->t_softerror ?
  552                               tp->t_softerror : ETIMEDOUT);
  553                 headlocked = 1;
  554                 goto out;
  555         }
  556         INP_INFO_RUNLOCK(&V_tcbinfo);
  557         headlocked = 0;
  558         if (tp->t_state == TCPS_SYN_SENT) {
  559                 /*
  560                  * If the SYN was retransmitted, indicate CWND to be
  561                  * limited to 1 segment in cc_conn_init().
  562                  */
  563                 tp->snd_cwnd = 1;
  564         } else if (tp->t_rxtshift == 1) {
  565                 /*
  566                  * first retransmit; record ssthresh and cwnd so they can
  567                  * be recovered if this turns out to be a "bad" retransmit.
  568                  * A retransmit is considered "bad" if an ACK for this
  569                  * segment is received within RTT/2 interval; the assumption
  570                  * here is that the ACK was already in flight.  See
  571                  * "On Estimating End-to-End Network Path Properties" by
  572                  * Allman and Paxson for more details.
  573                  */
  574                 tp->snd_cwnd_prev = tp->snd_cwnd;
  575                 tp->snd_ssthresh_prev = tp->snd_ssthresh;
  576                 tp->snd_recover_prev = tp->snd_recover;
  577                 if (IN_FASTRECOVERY(tp->t_flags))
  578                         tp->t_flags |= TF_WASFRECOVERY;
  579                 else
  580                         tp->t_flags &= ~TF_WASFRECOVERY;
  581                 if (IN_CONGRECOVERY(tp->t_flags))
  582                         tp->t_flags |= TF_WASCRECOVERY;
  583                 else
  584                         tp->t_flags &= ~TF_WASCRECOVERY;
  585                 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
  586                 tp->t_flags |= TF_PREVVALID;
  587         } else
  588                 tp->t_flags &= ~TF_PREVVALID;
  589         TCPSTAT_INC(tcps_rexmttimeo);
  590         if (tp->t_state == TCPS_SYN_SENT)
  591                 rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
  592         else
  593                 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
  594         TCPT_RANGESET(tp->t_rxtcur, rexmt,
  595                       tp->t_rttmin, TCPTV_REXMTMAX);
  596         /*
  597          * Disable RFC1323 and SACK if we haven't got any response to
  598          * our third SYN to work-around some broken terminal servers
  599          * (most of which have hopefully been retired) that have bad VJ
  600          * header compression code which trashes TCP segments containing
  601          * unknown-to-them TCP options.
  602          */
  603         if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
  604             (tp->t_rxtshift == 3))
  605                 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
  606         /*
  607          * If we backed off this far, our srtt estimate is probably bogus.
  608          * Clobber it so we'll take the next rtt measurement as our srtt;
  609          * move the current srtt into rttvar to keep the current
  610          * retransmit times until then.
  611          */
  612         if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
  613 #ifdef INET6
  614                 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
  615                         in6_losing(tp->t_inpcb);
  616 #endif
  617                 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
  618                 tp->t_srtt = 0;
  619         }
  620         tp->snd_nxt = tp->snd_una;
  621         tp->snd_recover = tp->snd_max;
  622         /*
  623          * Force a segment to be sent.
  624          */
  625         tp->t_flags |= TF_ACKNOW;
  626         /*
  627          * If timing a segment in this window, stop the timer.
  628          */
  629         tp->t_rtttime = 0;
  630 
  631         cc_cong_signal(tp, NULL, CC_RTO);
  632 
  633         (void) tcp_output(tp);
  634 
  635 out:
  636 #ifdef TCPDEBUG
  637         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  638                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  639                           PRU_SLOWTIMO);
  640 #endif
  641         if (tp != NULL)
  642                 INP_WUNLOCK(inp);
  643         if (headlocked)
  644                 INP_INFO_WUNLOCK(&V_tcbinfo);
  645         CURVNET_RESTORE();
  646 }
  647 
  648 void
  649 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
  650 {
  651         struct callout *t_callout;
  652         void *f_callout;
  653         struct inpcb *inp = tp->t_inpcb;
  654         int cpu = INP_CPU(inp);
  655 
  656 #ifdef TCP_OFFLOAD
  657         if (tp->t_flags & TF_TOE)
  658                 return;
  659 #endif
  660 
  661         switch (timer_type) {
  662                 case TT_DELACK:
  663                         t_callout = &tp->t_timers->tt_delack;
  664                         f_callout = tcp_timer_delack;
  665                         break;
  666                 case TT_REXMT:
  667                         t_callout = &tp->t_timers->tt_rexmt;
  668                         f_callout = tcp_timer_rexmt;
  669                         break;
  670                 case TT_PERSIST:
  671                         t_callout = &tp->t_timers->tt_persist;
  672                         f_callout = tcp_timer_persist;
  673                         break;
  674                 case TT_KEEP:
  675                         t_callout = &tp->t_timers->tt_keep;
  676                         f_callout = tcp_timer_keep;
  677                         break;
  678                 case TT_2MSL:
  679                         t_callout = &tp->t_timers->tt_2msl;
  680                         f_callout = tcp_timer_2msl;
  681                         break;
  682                 default:
  683                         panic("bad timer_type");
  684                 }
  685         if (delta == 0) {
  686                 callout_stop(t_callout);
  687         } else {
  688                 callout_reset_on(t_callout, delta, f_callout, tp, cpu);
  689         }
  690 }
  691 
  692 int
  693 tcp_timer_active(struct tcpcb *tp, int timer_type)
  694 {
  695         struct callout *t_callout;
  696 
  697         switch (timer_type) {
  698                 case TT_DELACK:
  699                         t_callout = &tp->t_timers->tt_delack;
  700                         break;
  701                 case TT_REXMT:
  702                         t_callout = &tp->t_timers->tt_rexmt;
  703                         break;
  704                 case TT_PERSIST:
  705                         t_callout = &tp->t_timers->tt_persist;
  706                         break;
  707                 case TT_KEEP:
  708                         t_callout = &tp->t_timers->tt_keep;
  709                         break;
  710                 case TT_2MSL:
  711                         t_callout = &tp->t_timers->tt_2msl;
  712                         break;
  713                 default:
  714                         panic("bad timer_type");
  715                 }
  716         return callout_active(t_callout);
  717 }
  718 
  719 #define ticks_to_msecs(t)       (1000*(t) / hz)
  720 
  721 void
  722 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
  723     struct xtcp_timer *xtimer)
  724 {
  725         sbintime_t now;
  726 
  727         bzero(xtimer, sizeof(*xtimer));
  728         if (timer == NULL)
  729                 return;
  730         now = getsbinuptime();
  731         if (callout_active(&timer->tt_delack))
  732                 xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
  733         if (callout_active(&timer->tt_rexmt))
  734                 xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
  735         if (callout_active(&timer->tt_persist))
  736                 xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
  737         if (callout_active(&timer->tt_keep))
  738                 xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
  739         if (callout_active(&timer->tt_2msl))
  740                 xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
  741         xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
  742 }

Cache object: e4d014ddac5215416e54c3e00c1e37b9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.