The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System, Second Edition

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_timer.c

Version: -  FREEBSD  -  FREEBSD10  -  FREEBSD9  -  FREEBSD92  -  FREEBSD91  -  FREEBSD90  -  FREEBSD8  -  FREEBSD82  -  FREEBSD81  -  FREEBSD80  -  FREEBSD7  -  FREEBSD74  -  FREEBSD73  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 4. Neither the name of the University nor the names of its contributors
   14  *    may be used to endorse or promote products derived from this software
   15  *    without specific prior written permission.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  *      @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD: head/sys/netinet/tcp_timer.c 273850 2014-10-30 08:53:56Z jch $");
   34 
   35 #include "opt_inet.h"
   36 #include "opt_inet6.h"
   37 #include "opt_tcpdebug.h"
   38 #include "opt_rss.h"
   39 
   40 #include <sys/param.h>
   41 #include <sys/kernel.h>
   42 #include <sys/lock.h>
   43 #include <sys/mbuf.h>
   44 #include <sys/mutex.h>
   45 #include <sys/protosw.h>
   46 #include <sys/smp.h>
   47 #include <sys/socket.h>
   48 #include <sys/socketvar.h>
   49 #include <sys/sysctl.h>
   50 #include <sys/systm.h>
   51 
   52 #include <net/if.h>
   53 #include <net/route.h>
   54 #include <net/vnet.h>
   55 #include <net/netisr.h>
   56 
   57 #include <netinet/cc.h>
   58 #include <netinet/in.h>
   59 #include <netinet/in_pcb.h>
   60 #include <netinet/in_rss.h>
   61 #include <netinet/in_systm.h>
   62 #ifdef INET6
   63 #include <netinet6/in6_pcb.h>
   64 #endif
   65 #include <netinet/ip_var.h>
   66 #include <netinet/tcp_fsm.h>
   67 #include <netinet/tcp_timer.h>
   68 #include <netinet/tcp_var.h>
   69 #ifdef INET6
   70 #include <netinet6/tcp6_var.h>
   71 #endif
   72 #include <netinet/tcpip.h>
   73 #ifdef TCPDEBUG
   74 #include <netinet/tcp_debug.h>
   75 #endif
   76 
   77 int     tcp_keepinit;
   78 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
   79     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
   80 
   81 int     tcp_keepidle;
   82 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
   83     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
   84 
   85 int     tcp_keepintvl;
   86 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
   87     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
   88 
   89 int     tcp_delacktime;
   90 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
   91     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
   92     "Time before a delayed ACK is sent");
   93 
   94 int     tcp_msl;
   95 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
   96     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
   97 
   98 int     tcp_rexmit_min;
   99 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
  100     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
  101     "Minimum Retransmission Timeout");
  102 
  103 int     tcp_rexmit_slop;
  104 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
  105     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
  106     "Retransmission Timer Slop");
  107 
  108 static int      always_keepalive = 1;
  109 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
  110     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
  111 
  112 int    tcp_fast_finwait2_recycle = 0;
  113 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 
  114     &tcp_fast_finwait2_recycle, 0,
  115     "Recycle closed FIN_WAIT_2 connections faster");
  116 
  117 int    tcp_finwait2_timeout;
  118 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
  119     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
  120 
  121 int     tcp_keepcnt = TCPTV_KEEPCNT;
  122 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
  123     "Number of keepalive probes to send");
  124 
  125         /* max idle probes */
  126 int     tcp_maxpersistidle;
  127 
  128 static int      tcp_rexmit_drop_options = 0;
  129 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
  130     &tcp_rexmit_drop_options, 0,
  131     "Drop TCP options from 3rd and later retransmitted SYN");
  132 
  133 static VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
  134 #define V_tcp_pmtud_blackhole_detect    VNET(tcp_pmtud_blackhole_detect)
  135 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
  136     CTLFLAG_RW|CTLFLAG_VNET,
  137     &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
  138     "Path MTU Discovery Black Hole Detection Enabled");
  139 
  140 static VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
  141 #define V_tcp_pmtud_blackhole_activated \
  142     VNET(tcp_pmtud_blackhole_activated)
  143 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
  144     CTLFLAG_RD|CTLFLAG_VNET,
  145     &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
  146     "Path MTU Discovery Black Hole Detection, Activation Count");
  147 
  148 static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
  149 #define V_tcp_pmtud_blackhole_activated_min_mss \
  150     VNET(tcp_pmtud_blackhole_activated_min_mss)
  151 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
  152     CTLFLAG_RD|CTLFLAG_VNET,
  153     &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
  154     "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
  155 
  156 static VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
  157 #define V_tcp_pmtud_blackhole_failed    VNET(tcp_pmtud_blackhole_failed)
  158 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
  159     CTLFLAG_RD|CTLFLAG_VNET,
  160     &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
  161     "Path MTU Discovery Black Hole Detection, Failure Count");
  162 
  163 #ifdef INET
  164 static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
  165 #define V_tcp_pmtud_blackhole_mss       VNET(tcp_pmtud_blackhole_mss)
  166 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
  167     CTLFLAG_RW|CTLFLAG_VNET,
  168     &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
  169     "Path MTU Discovery Black Hole Detection lowered MSS");
  170 #endif
  171 
  172 #ifdef INET6
  173 static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
  174 #define V_tcp_v6pmtud_blackhole_mss     VNET(tcp_v6pmtud_blackhole_mss)
  175 SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
  176     CTLFLAG_RW|CTLFLAG_VNET,
  177     &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
  178     "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
  179 #endif
  180 
  181 #ifdef  RSS
  182 static int      per_cpu_timers = 1;
  183 #else
  184 static int      per_cpu_timers = 0;
  185 #endif
  186 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
  187     &per_cpu_timers , 0, "run tcp timers on all cpus");
  188 
  189 #if 0
  190 #define INP_CPU(inp)    (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
  191                 ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
  192 #endif
  193 
  194 /*
  195  * Map the given inp to a CPU id.
  196  *
  197  * This queries RSS if it's compiled in, else it defaults to the current
  198  * CPU ID.
  199  */
  200 static inline int
  201 inp_to_cpuid(struct inpcb *inp)
  202 {
  203         u_int cpuid;
  204 
  205 #ifdef  RSS
  206         if (per_cpu_timers) {
  207                 cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
  208                 if (cpuid == NETISR_CPUID_NONE)
  209                         return (curcpu);        /* XXX */
  210                 else
  211                         return (cpuid);
  212         }
  213 #else
  214         /* Legacy, pre-RSS behaviour */
  215         if (per_cpu_timers) {
  216                 /*
  217                  * We don't have a flowid -> cpuid mapping, so cheat and
  218                  * just map unknown cpuids to curcpu.  Not the best, but
  219                  * apparently better than defaulting to swi 0.
  220                  */
  221                 cpuid = inp->inp_flowid % (mp_maxid + 1);
  222                 if (! CPU_ABSENT(cpuid))
  223                         return (cpuid);
  224                 return (curcpu);
  225         }
  226 #endif
  227         /* Default for RSS and non-RSS - cpuid 0 */
  228         else {
  229                 return (0);
  230         }
  231 }
  232 
  233 /*
  234  * Tcp protocol timeout routine called every 500 ms.
  235  * Updates timestamps used for TCP
  236  * causes finite state machine actions if timers expire.
  237  */
  238 void
  239 tcp_slowtimo(void)
  240 {
  241         VNET_ITERATOR_DECL(vnet_iter);
  242 
  243         VNET_LIST_RLOCK_NOSLEEP();
  244         VNET_FOREACH(vnet_iter) {
  245                 CURVNET_SET(vnet_iter);
  246                 (void) tcp_tw_2msl_scan(0);
  247                 CURVNET_RESTORE();
  248         }
  249         VNET_LIST_RUNLOCK_NOSLEEP();
  250 }
  251 
  252 int     tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
  253     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
  254 
  255 int     tcp_backoff[TCP_MAXRXTSHIFT + 1] =
  256     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
  257 
  258 static int tcp_totbackoff = 2559;       /* sum of tcp_backoff[] */
  259 
  260 static int tcp_timer_race;
  261 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
  262     0, "Count of t_inpcb races on tcp_discardcb");
  263 
  264 /*
  265  * TCP timer processing.
  266  */
  267 
  268 void
  269 tcp_timer_delack(void *xtp)
  270 {
  271         struct tcpcb *tp = xtp;
  272         struct inpcb *inp;
  273         CURVNET_SET(tp->t_vnet);
  274 
  275         inp = tp->t_inpcb;
  276         /*
  277          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
  278          * tear-down mean we need it as a work-around for races between
  279          * timers and tcp_discardcb().
  280          *
  281          * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
  282          */
  283         if (inp == NULL) {
  284                 tcp_timer_race++;
  285                 CURVNET_RESTORE();
  286                 return;
  287         }
  288         INP_WLOCK(inp);
  289         if (callout_pending(&tp->t_timers->tt_delack) ||
  290             !callout_active(&tp->t_timers->tt_delack)) {
  291                 INP_WUNLOCK(inp);
  292                 CURVNET_RESTORE();
  293                 return;
  294         }
  295         callout_deactivate(&tp->t_timers->tt_delack);
  296         if ((inp->inp_flags & INP_DROPPED) != 0) {
  297                 INP_WUNLOCK(inp);
  298                 CURVNET_RESTORE();
  299                 return;
  300         }
  301 
  302         tp->t_flags |= TF_ACKNOW;
  303         TCPSTAT_INC(tcps_delack);
  304         (void) tcp_output(tp);
  305         INP_WUNLOCK(inp);
  306         CURVNET_RESTORE();
  307 }
  308 
  309 void
  310 tcp_timer_2msl(void *xtp)
  311 {
  312         struct tcpcb *tp = xtp;
  313         struct inpcb *inp;
  314         CURVNET_SET(tp->t_vnet);
  315 #ifdef TCPDEBUG
  316         int ostate;
  317 
  318         ostate = tp->t_state;
  319 #endif
  320         /*
  321          * XXXRW: Does this actually happen?
  322          */
  323         INP_INFO_WLOCK(&V_tcbinfo);
  324         inp = tp->t_inpcb;
  325         /*
  326          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
  327          * tear-down mean we need it as a work-around for races between
  328          * timers and tcp_discardcb().
  329          *
  330          * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
  331          */
  332         if (inp == NULL) {
  333                 tcp_timer_race++;
  334                 INP_INFO_WUNLOCK(&V_tcbinfo);
  335                 CURVNET_RESTORE();
  336                 return;
  337         }
  338         INP_WLOCK(inp);
  339         tcp_free_sackholes(tp);
  340         if (callout_pending(&tp->t_timers->tt_2msl) ||
  341             !callout_active(&tp->t_timers->tt_2msl)) {
  342                 INP_WUNLOCK(tp->t_inpcb);
  343                 INP_INFO_WUNLOCK(&V_tcbinfo);
  344                 CURVNET_RESTORE();
  345                 return;
  346         }
  347         callout_deactivate(&tp->t_timers->tt_2msl);
  348         if ((inp->inp_flags & INP_DROPPED) != 0) {
  349                 INP_WUNLOCK(inp);
  350                 INP_INFO_WUNLOCK(&V_tcbinfo);
  351                 CURVNET_RESTORE();
  352                 return;
  353         }
  354         /*
  355          * 2 MSL timeout in shutdown went off.  If we're closed but
  356          * still waiting for peer to close and connection has been idle
  357          * too long, or if 2MSL time is up from TIME_WAIT, delete connection
  358          * control block.  Otherwise, check again in a bit.
  359          *
  360          * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
  361          * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
  362          * Ignore fact that there were recent incoming segments.
  363          */
  364         if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
  365             tp->t_inpcb && tp->t_inpcb->inp_socket && 
  366             (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
  367                 TCPSTAT_INC(tcps_finwait2_drops);
  368                 tp = tcp_close(tp);             
  369         } else {
  370                 if (tp->t_state != TCPS_TIME_WAIT &&
  371                    ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
  372                        callout_reset_on(&tp->t_timers->tt_2msl,
  373                            TP_KEEPINTVL(tp), tcp_timer_2msl, tp,
  374                            inp_to_cpuid(inp));
  375                else
  376                        tp = tcp_close(tp);
  377        }
  378 
  379 #ifdef TCPDEBUG
  380         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  381                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  382                           PRU_SLOWTIMO);
  383 #endif
  384         if (tp != NULL)
  385                 INP_WUNLOCK(inp);
  386         INP_INFO_WUNLOCK(&V_tcbinfo);
  387         CURVNET_RESTORE();
  388 }
  389 
  390 void
  391 tcp_timer_keep(void *xtp)
  392 {
  393         struct tcpcb *tp = xtp;
  394         struct tcptemp *t_template;
  395         struct inpcb *inp;
  396         CURVNET_SET(tp->t_vnet);
  397 #ifdef TCPDEBUG
  398         int ostate;
  399 
  400         ostate = tp->t_state;
  401 #endif
  402         INP_INFO_WLOCK(&V_tcbinfo);
  403         inp = tp->t_inpcb;
  404         /*
  405          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
  406          * tear-down mean we need it as a work-around for races between
  407          * timers and tcp_discardcb().
  408          *
  409          * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
  410          */
  411         if (inp == NULL) {
  412                 tcp_timer_race++;
  413                 INP_INFO_WUNLOCK(&V_tcbinfo);
  414                 CURVNET_RESTORE();
  415                 return;
  416         }
  417         INP_WLOCK(inp);
  418         if (callout_pending(&tp->t_timers->tt_keep) ||
  419             !callout_active(&tp->t_timers->tt_keep)) {
  420                 INP_WUNLOCK(inp);
  421                 INP_INFO_WUNLOCK(&V_tcbinfo);
  422                 CURVNET_RESTORE();
  423                 return;
  424         }
  425         callout_deactivate(&tp->t_timers->tt_keep);
  426         if ((inp->inp_flags & INP_DROPPED) != 0) {
  427                 INP_WUNLOCK(inp);
  428                 INP_INFO_WUNLOCK(&V_tcbinfo);
  429                 CURVNET_RESTORE();
  430                 return;
  431         }
  432         /*
  433          * Keep-alive timer went off; send something
  434          * or drop connection if idle for too long.
  435          */
  436         TCPSTAT_INC(tcps_keeptimeo);
  437         if (tp->t_state < TCPS_ESTABLISHED)
  438                 goto dropit;
  439         if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
  440             tp->t_state <= TCPS_CLOSING) {
  441                 if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
  442                         goto dropit;
  443                 /*
  444                  * Send a packet designed to force a response
  445                  * if the peer is up and reachable:
  446                  * either an ACK if the connection is still alive,
  447                  * or an RST if the peer has closed the connection
  448                  * due to timeout or reboot.
  449                  * Using sequence number tp->snd_una-1
  450                  * causes the transmitted zero-length segment
  451                  * to lie outside the receive window;
  452                  * by the protocol spec, this requires the
  453                  * correspondent TCP to respond.
  454                  */
  455                 TCPSTAT_INC(tcps_keepprobe);
  456                 t_template = tcpip_maketemplate(inp);
  457                 if (t_template) {
  458                         tcp_respond(tp, t_template->tt_ipgen,
  459                                     &t_template->tt_t, (struct mbuf *)NULL,
  460                                     tp->rcv_nxt, tp->snd_una - 1, 0);
  461                         free(t_template, M_TEMP);
  462                 }
  463                 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
  464                     tcp_timer_keep, tp, inp_to_cpuid(inp));
  465         } else
  466                 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
  467                     tcp_timer_keep, tp, inp_to_cpuid(inp));
  468 
  469 #ifdef TCPDEBUG
  470         if (inp->inp_socket->so_options & SO_DEBUG)
  471                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  472                           PRU_SLOWTIMO);
  473 #endif
  474         INP_WUNLOCK(inp);
  475         INP_INFO_WUNLOCK(&V_tcbinfo);
  476         CURVNET_RESTORE();
  477         return;
  478 
  479 dropit:
  480         TCPSTAT_INC(tcps_keepdrops);
  481         tp = tcp_drop(tp, ETIMEDOUT);
  482 
  483 #ifdef TCPDEBUG
  484         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  485                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  486                           PRU_SLOWTIMO);
  487 #endif
  488         if (tp != NULL)
  489                 INP_WUNLOCK(tp->t_inpcb);
  490         INP_INFO_WUNLOCK(&V_tcbinfo);
  491         CURVNET_RESTORE();
  492 }
  493 
  494 void
  495 tcp_timer_persist(void *xtp)
  496 {
  497         struct tcpcb *tp = xtp;
  498         struct inpcb *inp;
  499         CURVNET_SET(tp->t_vnet);
  500 #ifdef TCPDEBUG
  501         int ostate;
  502 
  503         ostate = tp->t_state;
  504 #endif
  505         INP_INFO_WLOCK(&V_tcbinfo);
  506         inp = tp->t_inpcb;
  507         /*
  508          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
  509          * tear-down mean we need it as a work-around for races between
  510          * timers and tcp_discardcb().
  511          *
  512          * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
  513          */
  514         if (inp == NULL) {
  515                 tcp_timer_race++;
  516                 INP_INFO_WUNLOCK(&V_tcbinfo);
  517                 CURVNET_RESTORE();
  518                 return;
  519         }
  520         INP_WLOCK(inp);
  521         if (callout_pending(&tp->t_timers->tt_persist) ||
  522             !callout_active(&tp->t_timers->tt_persist)) {
  523                 INP_WUNLOCK(inp);
  524                 INP_INFO_WUNLOCK(&V_tcbinfo);
  525                 CURVNET_RESTORE();
  526                 return;
  527         }
  528         callout_deactivate(&tp->t_timers->tt_persist);
  529         if ((inp->inp_flags & INP_DROPPED) != 0) {
  530                 INP_WUNLOCK(inp);
  531                 INP_INFO_WUNLOCK(&V_tcbinfo);
  532                 CURVNET_RESTORE();
  533                 return;
  534         }
  535         /*
  536          * Persistance timer into zero window.
  537          * Force a byte to be output, if possible.
  538          */
  539         TCPSTAT_INC(tcps_persisttimeo);
  540         /*
  541          * Hack: if the peer is dead/unreachable, we do not
  542          * time out if the window is closed.  After a full
  543          * backoff, drop the connection if the idle time
  544          * (no responses to probes) reaches the maximum
  545          * backoff that we would use if retransmitting.
  546          */
  547         if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
  548             (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
  549              ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
  550                 TCPSTAT_INC(tcps_persistdrop);
  551                 tp = tcp_drop(tp, ETIMEDOUT);
  552                 goto out;
  553         }
  554         /*
  555          * If the user has closed the socket then drop a persisting
  556          * connection after a much reduced timeout.
  557          */
  558         if (tp->t_state > TCPS_CLOSE_WAIT &&
  559             (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
  560                 TCPSTAT_INC(tcps_persistdrop);
  561                 tp = tcp_drop(tp, ETIMEDOUT);
  562                 goto out;
  563         }
  564         tcp_setpersist(tp);
  565         tp->t_flags |= TF_FORCEDATA;
  566         (void) tcp_output(tp);
  567         tp->t_flags &= ~TF_FORCEDATA;
  568 
  569 out:
  570 #ifdef TCPDEBUG
  571         if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
  572                 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
  573 #endif
  574         if (tp != NULL)
  575                 INP_WUNLOCK(inp);
  576         INP_INFO_WUNLOCK(&V_tcbinfo);
  577         CURVNET_RESTORE();
  578 }
  579 
  580 void
  581 tcp_timer_rexmt(void * xtp)
  582 {
  583         struct tcpcb *tp = xtp;
  584         CURVNET_SET(tp->t_vnet);
  585         int rexmt;
  586         int headlocked;
  587         struct inpcb *inp;
  588 #ifdef TCPDEBUG
  589         int ostate;
  590 
  591         ostate = tp->t_state;
  592 #endif
  593 
  594         INP_INFO_RLOCK(&V_tcbinfo);
  595         inp = tp->t_inpcb;
  596         /*
  597          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
  598          * tear-down mean we need it as a work-around for races between
  599          * timers and tcp_discardcb().
  600          *
  601          * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
  602          */
  603         if (inp == NULL) {
  604                 tcp_timer_race++;
  605                 INP_INFO_RUNLOCK(&V_tcbinfo);
  606                 CURVNET_RESTORE();
  607                 return;
  608         }
  609         INP_WLOCK(inp);
  610         if (callout_pending(&tp->t_timers->tt_rexmt) ||
  611             !callout_active(&tp->t_timers->tt_rexmt)) {
  612                 INP_WUNLOCK(inp);
  613                 INP_INFO_RUNLOCK(&V_tcbinfo);
  614                 CURVNET_RESTORE();
  615                 return;
  616         }
  617         callout_deactivate(&tp->t_timers->tt_rexmt);
  618         if ((inp->inp_flags & INP_DROPPED) != 0) {
  619                 INP_WUNLOCK(inp);
  620                 INP_INFO_RUNLOCK(&V_tcbinfo);
  621                 CURVNET_RESTORE();
  622                 return;
  623         }
  624         tcp_free_sackholes(tp);
  625         /*
  626          * Retransmission timer went off.  Message has not
  627          * been acked within retransmit interval.  Back off
  628          * to a longer retransmit interval and retransmit one segment.
  629          */
  630         if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
  631                 tp->t_rxtshift = TCP_MAXRXTSHIFT;
  632                 TCPSTAT_INC(tcps_timeoutdrop);
  633                 in_pcbref(inp);
  634                 INP_INFO_RUNLOCK(&V_tcbinfo);
  635                 INP_WUNLOCK(inp);
  636                 INP_INFO_WLOCK(&V_tcbinfo);
  637                 INP_WLOCK(inp);
  638                 if (in_pcbrele_wlocked(inp)) {
  639                         INP_INFO_WUNLOCK(&V_tcbinfo);
  640                         CURVNET_RESTORE();
  641                         return;
  642                 }
  643                 if (inp->inp_flags & INP_DROPPED) {
  644                         INP_WUNLOCK(inp);
  645                         INP_INFO_WUNLOCK(&V_tcbinfo);
  646                         CURVNET_RESTORE();
  647                         return;
  648                 }
  649 
  650                 tp = tcp_drop(tp, tp->t_softerror ?
  651                               tp->t_softerror : ETIMEDOUT);
  652                 headlocked = 1;
  653                 goto out;
  654         }
  655         INP_INFO_RUNLOCK(&V_tcbinfo);
  656         headlocked = 0;
  657         if (tp->t_state == TCPS_SYN_SENT) {
  658                 /*
  659                  * If the SYN was retransmitted, indicate CWND to be
  660                  * limited to 1 segment in cc_conn_init().
  661                  */
  662                 tp->snd_cwnd = 1;
  663         } else if (tp->t_rxtshift == 1) {
  664                 /*
  665                  * first retransmit; record ssthresh and cwnd so they can
  666                  * be recovered if this turns out to be a "bad" retransmit.
  667                  * A retransmit is considered "bad" if an ACK for this
  668                  * segment is received within RTT/2 interval; the assumption
  669                  * here is that the ACK was already in flight.  See
  670                  * "On Estimating End-to-End Network Path Properties" by
  671                  * Allman and Paxson for more details.
  672                  */
  673                 tp->snd_cwnd_prev = tp->snd_cwnd;
  674                 tp->snd_ssthresh_prev = tp->snd_ssthresh;
  675                 tp->snd_recover_prev = tp->snd_recover;
  676                 if (IN_FASTRECOVERY(tp->t_flags))
  677                         tp->t_flags |= TF_WASFRECOVERY;
  678                 else
  679                         tp->t_flags &= ~TF_WASFRECOVERY;
  680                 if (IN_CONGRECOVERY(tp->t_flags))
  681                         tp->t_flags |= TF_WASCRECOVERY;
  682                 else
  683                         tp->t_flags &= ~TF_WASCRECOVERY;
  684                 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
  685                 tp->t_flags |= TF_PREVVALID;
  686         } else
  687                 tp->t_flags &= ~TF_PREVVALID;
  688         TCPSTAT_INC(tcps_rexmttimeo);
  689         if (tp->t_state == TCPS_SYN_SENT)
  690                 rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
  691         else
  692                 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
  693         TCPT_RANGESET(tp->t_rxtcur, rexmt,
  694                       tp->t_rttmin, TCPTV_REXMTMAX);
  695 
  696         /*
  697          * We enter the path for PLMTUD if connection is established or, if
  698          * connection is FIN_WAIT_1 status, reason for the last is that if
  699          * amount of data we send is very small, we could send it in couple of
  700          * packets and process straight to FIN. In that case we won't catch
  701          * ESTABLISHED state.
  702          */
  703         if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
  704             || (tp->t_state == TCPS_FIN_WAIT_1))) {
  705                 int optlen;
  706 #ifdef INET6
  707                 int isipv6;
  708 #endif
  709 
  710                 if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
  711                     (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
  712                     (tp->t_rxtshift <= 2)) {
  713                         /*
  714                          * Enter Path MTU Black-hole Detection mechanism:
  715                          * - Disable Path MTU Discovery (IP "DF" bit).
  716                          * - Reduce MTU to lower value than what we
  717                          *   negotiated with peer.
  718                          */
  719                         /* Record that we may have found a black hole. */
  720                         tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
  721 
  722                         /* Keep track of previous MSS. */
  723                         optlen = tp->t_maxopd - tp->t_maxseg;
  724                         tp->t_pmtud_saved_maxopd = tp->t_maxopd;
  725 
  726                         /* 
  727                          * Reduce the MSS to blackhole value or to the default
  728                          * in an attempt to retransmit.
  729                          */
  730 #ifdef INET6
  731                         isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
  732                         if (isipv6 &&
  733                             tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
  734                                 /* Use the sysctl tuneable blackhole MSS. */
  735                                 tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
  736                                 V_tcp_pmtud_blackhole_activated++;
  737                         } else if (isipv6) {
  738                                 /* Use the default MSS. */
  739                                 tp->t_maxopd = V_tcp_v6mssdflt;
  740                                 /*
  741                                  * Disable Path MTU Discovery when we switch to
  742                                  * minmss.
  743                                  */
  744                                 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
  745                                 V_tcp_pmtud_blackhole_activated_min_mss++;
  746                         }
  747 #endif
  748 #if defined(INET6) && defined(INET)
  749                         else
  750 #endif
  751 #ifdef INET
  752                         if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
  753                                 /* Use the sysctl tuneable blackhole MSS. */
  754                                 tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
  755                                 V_tcp_pmtud_blackhole_activated++;
  756                         } else {
  757                                 /* Use the default MSS. */
  758                                 tp->t_maxopd = V_tcp_mssdflt;
  759                                 /*
  760                                  * Disable Path MTU Discovery when we switch to
  761                                  * minmss.
  762                                  */
  763                                 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
  764                                 V_tcp_pmtud_blackhole_activated_min_mss++;
  765                         }
  766 #endif
  767                         tp->t_maxseg = tp->t_maxopd - optlen;
  768                         /*
  769                          * Reset the slow-start flight size
  770                          * as it may depend on the new MSS.
  771                          */
  772                         if (CC_ALGO(tp)->conn_init != NULL)
  773                                 CC_ALGO(tp)->conn_init(tp->ccv);
  774                 } else {
  775                         /*
  776                          * If further retransmissions are still unsuccessful
  777                          * with a lowered MTU, maybe this isn't a blackhole and
  778                          * we restore the previous MSS and blackhole detection
  779                          * flags.
  780                          */
  781                         if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
  782                             (tp->t_rxtshift > 4)) {
  783                                 tp->t_flags2 |= TF2_PLPMTU_PMTUD;
  784                                 tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
  785                                 optlen = tp->t_maxopd - tp->t_maxseg;
  786                                 tp->t_maxopd = tp->t_pmtud_saved_maxopd;
  787                                 tp->t_maxseg = tp->t_maxopd - optlen;
  788                                 V_tcp_pmtud_blackhole_failed++;
  789                                 /*
  790                                  * Reset the slow-start flight size as it
  791                                  * may depend on the new MSS.
  792                                  */
  793                                 if (CC_ALGO(tp)->conn_init != NULL)
  794                                         CC_ALGO(tp)->conn_init(tp->ccv);
  795                         }
  796                 }
  797         }
  798 
  799         /*
  800          * Disable RFC1323 and SACK if we haven't got any response to
  801          * our third SYN to work-around some broken terminal servers
  802          * (most of which have hopefully been retired) that have bad VJ
  803          * header compression code which trashes TCP segments containing
  804          * unknown-to-them TCP options.
  805          */
  806         if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
  807             (tp->t_rxtshift == 3))
  808                 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
  809         /*
  810          * If we backed off this far, our srtt estimate is probably bogus.
  811          * Clobber it so we'll take the next rtt measurement as our srtt;
  812          * move the current srtt into rttvar to keep the current
  813          * retransmit times until then.
  814          */
  815         if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
  816 #ifdef INET6
  817                 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
  818                         in6_losing(tp->t_inpcb);
  819 #endif
  820                 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
  821                 tp->t_srtt = 0;
  822         }
  823         tp->snd_nxt = tp->snd_una;
  824         tp->snd_recover = tp->snd_max;
  825         /*
  826          * Force a segment to be sent.
  827          */
  828         tp->t_flags |= TF_ACKNOW;
  829         /*
  830          * If timing a segment in this window, stop the timer.
  831          */
  832         tp->t_rtttime = 0;
  833 
  834         cc_cong_signal(tp, NULL, CC_RTO);
  835 
  836         (void) tcp_output(tp);
  837 
  838 out:
  839 #ifdef TCPDEBUG
  840         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  841                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
  842                           PRU_SLOWTIMO);
  843 #endif
  844         if (tp != NULL)
  845                 INP_WUNLOCK(inp);
  846         if (headlocked)
  847                 INP_INFO_WUNLOCK(&V_tcbinfo);
  848         CURVNET_RESTORE();
  849 }
  850 
  851 void
  852 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
  853 {
  854         struct callout *t_callout;
  855         void *f_callout;
  856         struct inpcb *inp = tp->t_inpcb;
  857         int cpu = inp_to_cpuid(inp);
  858 
  859 #ifdef TCP_OFFLOAD
  860         if (tp->t_flags & TF_TOE)
  861                 return;
  862 #endif
  863 
  864         switch (timer_type) {
  865                 case TT_DELACK:
  866                         t_callout = &tp->t_timers->tt_delack;
  867                         f_callout = tcp_timer_delack;
  868                         break;
  869                 case TT_REXMT:
  870                         t_callout = &tp->t_timers->tt_rexmt;
  871                         f_callout = tcp_timer_rexmt;
  872                         break;
  873                 case TT_PERSIST:
  874                         t_callout = &tp->t_timers->tt_persist;
  875                         f_callout = tcp_timer_persist;
  876                         break;
  877                 case TT_KEEP:
  878                         t_callout = &tp->t_timers->tt_keep;
  879                         f_callout = tcp_timer_keep;
  880                         break;
  881                 case TT_2MSL:
  882                         t_callout = &tp->t_timers->tt_2msl;
  883                         f_callout = tcp_timer_2msl;
  884                         break;
  885                 default:
  886                         panic("bad timer_type");
  887                 }
  888         if (delta == 0) {
  889                 callout_stop(t_callout);
  890         } else {
  891                 callout_reset_on(t_callout, delta, f_callout, tp, cpu);
  892         }
  893 }
  894 
  895 int
  896 tcp_timer_active(struct tcpcb *tp, int timer_type)
  897 {
  898         struct callout *t_callout;
  899 
  900         switch (timer_type) {
  901                 case TT_DELACK:
  902                         t_callout = &tp->t_timers->tt_delack;
  903                         break;
  904                 case TT_REXMT:
  905                         t_callout = &tp->t_timers->tt_rexmt;
  906                         break;
  907                 case TT_PERSIST:
  908                         t_callout = &tp->t_timers->tt_persist;
  909                         break;
  910                 case TT_KEEP:
  911                         t_callout = &tp->t_timers->tt_keep;
  912                         break;
  913                 case TT_2MSL:
  914                         t_callout = &tp->t_timers->tt_2msl;
  915                         break;
  916                 default:
  917                         panic("bad timer_type");
  918                 }
  919         return callout_active(t_callout);
  920 }
  921 
  922 #define ticks_to_msecs(t)       (1000*(t) / hz)
  923 
  924 void
  925 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
  926     struct xtcp_timer *xtimer)
  927 {
  928         sbintime_t now;
  929 
  930         bzero(xtimer, sizeof(*xtimer));
  931         if (timer == NULL)
  932                 return;
  933         now = getsbinuptime();
  934         if (callout_active(&timer->tt_delack))
  935                 xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
  936         if (callout_active(&timer->tt_rexmt))
  937                 xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
  938         if (callout_active(&timer->tt_persist))
  939                 xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
  940         if (callout_active(&timer->tt_keep))
  941                 xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
  942         if (callout_active(&timer->tt_2msl))
  943                 xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
  944         xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
  945 }

Cache object: e4d014ddac5215416e54c3e00c1e37b9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.