The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_sack.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2003, 2004 Jeffrey M. Hsu.  All rights reserved.
    3  * Copyright (c) 2003, 2004 The DragonFly Project.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to The DragonFly Project
    6  * by Jeffrey M. Hsu.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. Neither the name of The DragonFly Project nor the names of its
   17  *    contributors may be used to endorse or promote products derived
   18  *    from this software without specific, prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
   24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   25  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
   26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  * $DragonFly: src/sys/netinet/tcp_sack.c,v 1.8 2008/08/15 21:37:16 nth Exp $
   34  */
   35 
   36 #include <sys/param.h>
   37 #include <sys/systm.h>
   38 #include <sys/kernel.h>
   39 #include <sys/malloc.h>
   40 #include <sys/queue.h>
   41 #include <sys/thread.h>
   42 #include <sys/types.h>
   43 #include <sys/socket.h>
   44 #include <sys/socketvar.h>
   45 
   46 #include <net/if.h>
   47 
   48 #include <netinet/in.h>
   49 #include <netinet/in_systm.h>
   50 #include <netinet/ip.h>
   51 #include <netinet/in_var.h>
   52 #include <netinet/in_pcb.h>
   53 #include <netinet/ip_var.h>
   54 #include <netinet/tcp.h>
   55 #include <netinet/tcp_seq.h>
   56 #include <netinet/tcp_var.h>
   57 
   58 /*
   59  * Implemented:
   60  *
   61  * RFC 2018
   62  * RFC 2883
   63  * RFC 3517
   64  * RFC 6675
   65  */
   66 
   67 struct sackblock {
   68         tcp_seq                 sblk_start;
   69         tcp_seq                 sblk_end;
   70         TAILQ_ENTRY(sackblock)  sblk_list;
   71 };
   72 
   73 #define MAXSAVEDBLOCKS  8                       /* per connection limit */
   74 
   75 static int insert_block(struct scoreboard *scb,
   76                         const struct raw_sackblock *raw_sb, boolean_t *update);
   77 
   78 static MALLOC_DEFINE(M_SACKBLOCK, "sblk", "sackblock struct");
   79 
   80 /*
   81  * Per-tcpcb initialization.
   82  */
   83 void
   84 tcp_sack_tcpcb_init(struct tcpcb *tp)
   85 {
   86         struct scoreboard *scb = &tp->scb;
   87 
   88         scb->nblocks = 0;
   89         TAILQ_INIT(&scb->sackblocks);
   90         scb->lastfound = NULL;
   91 }
   92 
   93 /*
   94  * Find the SACK block containing or immediately preceding "seq".
   95  * The boolean result indicates whether the sequence is actually
   96  * contained in the SACK block.
   97  */
   98 static boolean_t
   99 sack_block_lookup(struct scoreboard *scb, tcp_seq seq, struct sackblock **sb)
  100 {
  101         struct sackblock *hint = scb->lastfound;
  102         struct sackblock *cur, *last, *prev;
  103 
  104         if (TAILQ_EMPTY(&scb->sackblocks)) {
  105                 *sb = NULL;
  106                 return FALSE;
  107         }
  108 
  109         if (hint == NULL) {
  110                 /* No hint.  Search from start to end. */
  111                 cur = TAILQ_FIRST(&scb->sackblocks);
  112                 last = NULL;
  113                 prev = TAILQ_LAST(&scb->sackblocks, sackblock_list);
  114         } else  {
  115                 if (SEQ_GEQ(seq, hint->sblk_start)) {
  116                         /* Search from hint to end of list. */
  117                         cur = hint;
  118                         last = NULL;
  119                         prev = TAILQ_LAST(&scb->sackblocks, sackblock_list);
  120                 } else {
  121                         /* Search from front of list to hint. */
  122                         cur = TAILQ_FIRST(&scb->sackblocks);
  123                         last = hint;
  124                         prev = TAILQ_PREV(hint, sackblock_list, sblk_list);
  125                 }
  126         }
  127 
  128         do {
  129                 if (SEQ_GT(cur->sblk_end, seq)) {
  130                         if (SEQ_GEQ(seq, cur->sblk_start)) {
  131                                 *sb = scb->lastfound = cur;
  132                                 return TRUE;
  133                         } else {
  134                                 *sb = scb->lastfound =
  135                                     TAILQ_PREV(cur, sackblock_list, sblk_list);
  136                                 return FALSE;
  137                         }
  138                 }
  139                 cur = TAILQ_NEXT(cur, sblk_list);
  140         } while (cur != last);
  141 
  142         *sb = scb->lastfound = prev;
  143         return FALSE;
  144 }
  145 
  146 /*
  147  * Allocate a SACK block.
  148  */
  149 static __inline struct sackblock *
  150 alloc_sackblock(struct scoreboard *scb, const struct raw_sackblock *raw_sb)
  151 {
  152         struct sackblock *sb;
  153 
  154         if (scb->freecache != NULL) {
  155                 sb = scb->freecache;
  156                 scb->freecache = NULL;
  157                 tcpstat.tcps_sacksbfast++;
  158         } else {
  159                 sb = kmalloc(sizeof(struct sackblock), M_SACKBLOCK, M_NOWAIT);
  160                 if (sb == NULL) {
  161                         tcpstat.tcps_sacksbfailed++;
  162                         return NULL;
  163                 }
  164         }
  165         sb->sblk_start = raw_sb->rblk_start;
  166         sb->sblk_end = raw_sb->rblk_end;
  167         return sb;
  168 }
  169 
  170 static __inline struct sackblock *
  171 alloc_sackblock_limit(struct scoreboard *scb,
  172     const struct raw_sackblock *raw_sb)
  173 {
  174         if (scb->nblocks == MAXSAVEDBLOCKS) {
  175                 /*
  176                  * Should try to kick out older blocks XXX JH
  177                  * May be able to coalesce with existing block.
  178                  * Or, go other way and free all blocks if we hit
  179                  * this limit.
  180                  */
  181                 tcpstat.tcps_sacksboverflow++;
  182                 return NULL;
  183         }
  184         return alloc_sackblock(scb, raw_sb);
  185 }
  186 
  187 /*
  188  * Free a SACK block.
  189  */
  190 static __inline void
  191 free_sackblock(struct scoreboard *scb, struct sackblock *s)
  192 {
  193         if (scb->freecache == NULL) {
  194                 /* YYY Maybe use the latest freed block? */
  195                 scb->freecache = s;
  196                 return;
  197         }
  198         kfree(s, M_SACKBLOCK);
  199 }
  200 
  201 /*
  202  * Free up SACK blocks for data that's been acked.
  203  */
  204 static void
  205 tcp_sack_ack_blocks(struct tcpcb *tp, tcp_seq th_ack)
  206 {
  207         struct scoreboard *scb = &tp->scb;
  208         struct sackblock *sb, *nb;
  209 
  210         sb = TAILQ_FIRST(&scb->sackblocks);
  211         while (sb && SEQ_LEQ(sb->sblk_end, th_ack)) {
  212                 nb = TAILQ_NEXT(sb, sblk_list);
  213                 if (scb->lastfound == sb)
  214                         scb->lastfound = NULL;
  215                 TAILQ_REMOVE(&scb->sackblocks, sb, sblk_list);
  216                 free_sackblock(scb, sb);
  217                 --scb->nblocks;
  218                 KASSERT(scb->nblocks >= 0,
  219                     ("SACK block count underflow: %d < 0", scb->nblocks));
  220                 sb = nb;
  221         }
  222         if (sb && SEQ_GEQ(th_ack, sb->sblk_start)) {
  223                 /* Other side reneged? XXX */
  224                 tcpstat.tcps_sackrenege++;
  225                 tcp_sack_discard(tp);
  226         }
  227 }
  228 
  229 /*
  230  * Delete and free SACK blocks saved in scoreboard.
  231  */
  232 static void
  233 tcp_sack_cleanup(struct scoreboard *scb)
  234 {
  235         struct sackblock *sb, *nb;
  236 
  237         TAILQ_FOREACH_MUTABLE(sb, &scb->sackblocks, sblk_list, nb) {
  238                 free_sackblock(scb, sb);
  239                 --scb->nblocks;
  240         }
  241         KASSERT(scb->nblocks == 0,
  242             ("SACK block %d count not zero", scb->nblocks));
  243         TAILQ_INIT(&scb->sackblocks);
  244         scb->lastfound = NULL;
  245 }
  246 
  247 /*
  248  * Discard SACK scoreboard, HighRxt, RescueRxt and LostSeq.
  249  */
  250 void
  251 tcp_sack_discard(struct tcpcb *tp)
  252 {
  253         tcp_sack_cleanup(&tp->scb);
  254         tp->rexmt_high = tp->snd_una;
  255         tp->sack_flags &= ~TSACK_F_SACKRESCUED;
  256         tp->scb.lostseq = tp->snd_una;
  257 }
  258 
  259 /*
  260  * Delete and free SACK blocks saved in scoreboard.
  261  * Delete the one slot block cache.
  262  */
  263 void
  264 tcp_sack_destroy(struct scoreboard *scb)
  265 {
  266         tcp_sack_cleanup(scb);
  267         if (scb->freecache != NULL) {
  268                 kfree(scb->freecache, M_SACKBLOCK);
  269                 scb->freecache = NULL;
  270         }
  271 }
  272 
  273 /*
  274  * Cleanup the reported SACK block information
  275  */
  276 void
  277 tcp_sack_report_cleanup(struct tcpcb *tp)
  278 {
  279         tp->sack_flags &=
  280             ~(TSACK_F_DUPSEG | TSACK_F_ENCLOSESEG | TSACK_F_SACKLEFT);
  281         tp->reportblk.rblk_start = tp->reportblk.rblk_end;
  282 }
  283 
  284 /*
  285  * Whether SACK report is needed or not
  286  */
  287 boolean_t
  288 tcp_sack_report_needed(const struct tcpcb *tp)
  289 {
  290         if ((tp->sack_flags &
  291              (TSACK_F_DUPSEG | TSACK_F_ENCLOSESEG | TSACK_F_SACKLEFT)) ||
  292             tp->reportblk.rblk_start != tp->reportblk.rblk_end)
  293                 return TRUE;
  294         else
  295                 return FALSE;
  296 }
  297 
  298 /*
  299  * Returns      0 if not D-SACK block,
  300  *              1 if D-SACK,
  301  *              2 if duplicate of out-of-order D-SACK block.
  302  */
  303 int
  304 tcp_sack_ndsack_blocks(const struct raw_sackblock *blocks, const int numblocks,
  305     tcp_seq snd_una)
  306 {
  307         if (numblocks == 0)
  308                 return 0;
  309 
  310         if (SEQ_LT(blocks[0].rblk_start, snd_una))
  311                 return 1;
  312 
  313         /* block 0 inside block 1 */
  314         if (numblocks > 1 &&
  315             SEQ_GEQ(blocks[0].rblk_start, blocks[1].rblk_start) &&
  316             SEQ_LEQ(blocks[0].rblk_end, blocks[1].rblk_end))
  317                 return 2;
  318 
  319         return 0;
  320 }
  321 
  322 /*
  323  * Update scoreboard on new incoming ACK.
  324  */
  325 static void
  326 tcp_sack_add_blocks(struct tcpcb *tp, struct tcpopt *to)
  327 {
  328         const int numblocks = to->to_nsackblocks;
  329         struct raw_sackblock *blocks = to->to_sackblocks;
  330         struct scoreboard *scb = &tp->scb;
  331         int startblock, i;
  332 
  333         if (tcp_sack_ndsack_blocks(blocks, numblocks, tp->snd_una) > 0)
  334                 startblock = 1;
  335         else
  336                 startblock = 0;
  337 
  338         to->to_flags |= TOF_SACK_REDUNDANT;
  339         for (i = startblock; i < numblocks; i++) {
  340                 struct raw_sackblock *newsackblock = &blocks[i];
  341                 boolean_t update;
  342                 int error;
  343 
  344                 /* Guard against ACK reordering */
  345                 if (SEQ_LEQ(newsackblock->rblk_start, tp->snd_una))
  346                         continue;
  347 
  348                 /* Don't accept bad SACK blocks */
  349                 if (SEQ_GT(newsackblock->rblk_end, tp->snd_max)) {
  350                         tcpstat.tcps_rcvbadsackopt++;
  351                         break;          /* skip all other blocks */
  352                 }
  353                 tcpstat.tcps_sacksbupdate++;
  354 
  355                 error = insert_block(scb, newsackblock, &update);
  356                 if (update)
  357                         to->to_flags &= ~TOF_SACK_REDUNDANT;
  358                 if (error)
  359                         break;
  360         }
  361 }
  362 
  363 void
  364 tcp_sack_update_scoreboard(struct tcpcb *tp, struct tcpopt *to)
  365 {
  366         struct scoreboard *scb = &tp->scb;
  367         int rexmt_high_update = 0;
  368 
  369         tcp_sack_ack_blocks(tp, tp->snd_una);
  370         tcp_sack_add_blocks(tp, to);
  371         tcp_sack_update_lostseq(scb, tp->snd_una, tp->t_maxseg,
  372             tp->t_rxtthresh);
  373         if (SEQ_LT(tp->rexmt_high, tp->snd_una)) {
  374                 tp->rexmt_high = tp->snd_una;
  375                 rexmt_high_update = 1;
  376         }
  377         if (tp->sack_flags & TSACK_F_SACKRESCUED) {
  378                 if (SEQ_LEQ(tp->rexmt_rescue, tp->snd_una)) {
  379                         tp->sack_flags &= ~TSACK_F_SACKRESCUED;
  380                 } else if (tcp_aggressive_rescuesack && rexmt_high_update &&
  381                     SEQ_LT(tp->rexmt_rescue, tp->rexmt_high)) {
  382                         /* Drag RescueRxt along with HighRxt */
  383                         tp->rexmt_rescue = tp->rexmt_high;
  384                 }
  385         }
  386 }
  387 
  388 /*
  389  * Insert SACK block into sender's scoreboard.
  390  */
  391 static int
  392 insert_block(struct scoreboard *scb, const struct raw_sackblock *raw_sb,
  393     boolean_t *update)
  394 {
  395         struct sackblock *sb, *workingblock;
  396         boolean_t overlap_front;
  397 
  398         *update = TRUE;
  399         if (TAILQ_EMPTY(&scb->sackblocks)) {
  400                 struct sackblock *newblock;
  401 
  402                 KASSERT(scb->nblocks == 0, ("emply scb w/ blocks"));
  403 
  404                 newblock = alloc_sackblock(scb, raw_sb);
  405                 if (newblock == NULL)
  406                         return ENOMEM;
  407                 TAILQ_INSERT_HEAD(&scb->sackblocks, newblock, sblk_list);
  408                 scb->nblocks = 1;
  409                 return 0;
  410         }
  411 
  412         KASSERT(scb->nblocks > 0, ("insert_block() called w/ no blocks"));
  413         KASSERT(scb->nblocks <= MAXSAVEDBLOCKS,
  414             ("too many SACK blocks %d", scb->nblocks));
  415 
  416         overlap_front = sack_block_lookup(scb, raw_sb->rblk_start, &sb);
  417 
  418         if (sb == NULL) {
  419                 workingblock = alloc_sackblock_limit(scb, raw_sb);
  420                 if (workingblock == NULL)
  421                         return ENOMEM;
  422                 TAILQ_INSERT_HEAD(&scb->sackblocks, workingblock, sblk_list);
  423                 ++scb->nblocks;
  424         } else {
  425                 if (overlap_front || sb->sblk_end == raw_sb->rblk_start) {
  426                         tcpstat.tcps_sacksbreused++;
  427 
  428                         /* Extend old block */
  429                         workingblock = sb;
  430                         if (SEQ_GT(raw_sb->rblk_end, sb->sblk_end)) {
  431                                 sb->sblk_end = raw_sb->rblk_end;
  432                         } else {
  433                                 /* Exact match, nothing to consolidate */
  434                                 *update = FALSE;
  435                                 return 0;
  436                         }
  437                 } else {
  438                         workingblock = alloc_sackblock_limit(scb, raw_sb);
  439                         if (workingblock == NULL)
  440                                 return ENOMEM;
  441                         TAILQ_INSERT_AFTER(&scb->sackblocks, sb, workingblock,
  442                             sblk_list);
  443                         ++scb->nblocks;
  444                 }
  445         }
  446 
  447         /* Consolidate right-hand side. */
  448         sb = TAILQ_NEXT(workingblock, sblk_list);
  449         while (sb != NULL &&
  450             SEQ_GEQ(workingblock->sblk_end, sb->sblk_end)) {
  451                 struct sackblock *nextblock;
  452 
  453                 nextblock = TAILQ_NEXT(sb, sblk_list);
  454                 if (scb->lastfound == sb)
  455                         scb->lastfound = NULL;
  456                 /* Remove completely overlapped block */
  457                 TAILQ_REMOVE(&scb->sackblocks, sb, sblk_list);
  458                 free_sackblock(scb, sb);
  459                 --scb->nblocks;
  460                 KASSERT(scb->nblocks > 0,
  461                     ("removed overlapped block: %d blocks left", scb->nblocks));
  462                 sb = nextblock;
  463         }
  464         if (sb != NULL &&
  465             SEQ_GEQ(workingblock->sblk_end, sb->sblk_start)) {
  466                 /* Extend new block to cover partially overlapped old block. */
  467                 workingblock->sblk_end = sb->sblk_end;
  468                 if (scb->lastfound == sb)
  469                         scb->lastfound = NULL;
  470                 TAILQ_REMOVE(&scb->sackblocks, sb, sblk_list);
  471                 free_sackblock(scb, sb);
  472                 --scb->nblocks;
  473                 KASSERT(scb->nblocks > 0,
  474                     ("removed partial right: %d blocks left", scb->nblocks));
  475         }
  476         return 0;
  477 }
  478 
  479 #ifdef DEBUG_SACK_BLOCKS
  480 static void
  481 tcp_sack_dump_blocks(const struct scoreboard *scb)
  482 {
  483         const struct sackblock *sb;
  484 
  485         kprintf("%d blocks:", scb->nblocks);
  486         TAILQ_FOREACH(sb, &scb->sackblocks, sblk_list)
  487                 kprintf(" [%u, %u)", sb->sblk_start, sb->sblk_end);
  488         kprintf("\n");
  489 }
  490 #else
  491 static __inline void
  492 tcp_sack_dump_blocks(const struct scoreboard *scb)
  493 {
  494 }
  495 #endif
  496 
  497 /*
  498  * Optimization to quickly determine which packets are lost.
  499  */
  500 void
  501 tcp_sack_update_lostseq(struct scoreboard *scb, tcp_seq snd_una, u_int maxseg,
  502     int rxtthresh)
  503 {
  504         struct sackblock *sb;
  505         int nsackblocks = 0;
  506         int bytes_sacked = 0;
  507         int rxtthresh_bytes;
  508 
  509         if (tcp_do_rfc6675)
  510                 rxtthresh_bytes = (rxtthresh - 1) * maxseg;
  511         else
  512                 rxtthresh_bytes = rxtthresh * maxseg;
  513 
  514         sb = TAILQ_LAST(&scb->sackblocks, sackblock_list);
  515         while (sb != NULL) {
  516                 ++nsackblocks;
  517                 bytes_sacked += sb->sblk_end - sb->sblk_start;
  518                 if (nsackblocks == rxtthresh ||
  519                     bytes_sacked >= rxtthresh_bytes) {
  520                         scb->lostseq = sb->sblk_start;
  521                         return;
  522                 }
  523                 sb = TAILQ_PREV(sb, sackblock_list, sblk_list);
  524         }
  525         scb->lostseq = snd_una;
  526 }
  527 
  528 /*
  529  * Return whether the given sequence number is considered lost.
  530  */
  531 boolean_t
  532 tcp_sack_islost(const struct scoreboard *scb, tcp_seq seqnum)
  533 {
  534         return SEQ_LT(seqnum, scb->lostseq);
  535 }
  536 
  537 /*
  538  * True if at least "amount" has been SACKed.  Used by Early Retransmit.
  539  */
  540 boolean_t
  541 tcp_sack_has_sacked(const struct scoreboard *scb, u_int amount)
  542 {
  543         const struct sackblock *sb;
  544         int bytes_sacked = 0;
  545 
  546         TAILQ_FOREACH(sb, &scb->sackblocks, sblk_list) {
  547                 bytes_sacked += sb->sblk_end - sb->sblk_start;
  548                 if (bytes_sacked >= amount)
  549                         return TRUE;
  550         }
  551         return FALSE;
  552 }
  553 
  554 /*
  555  * Number of bytes SACKed below seq.
  556  */
  557 int
  558 tcp_sack_bytes_below(const struct scoreboard *scb, tcp_seq seq)
  559 {
  560         const struct sackblock *sb;
  561         int bytes_sacked = 0;
  562 
  563         sb = TAILQ_FIRST(&scb->sackblocks);
  564         while (sb && SEQ_GT(seq, sb->sblk_start)) {
  565                 bytes_sacked += seq_min(seq, sb->sblk_end) - sb->sblk_start;
  566                 sb = TAILQ_NEXT(sb, sblk_list);
  567         }
  568         return bytes_sacked;
  569 }
  570 
  571 /*
  572  * Return estimate of the number of bytes outstanding in the network.
  573  */
  574 uint32_t
  575 tcp_sack_compute_pipe(const struct tcpcb *tp)
  576 {
  577         const struct scoreboard *scb = &tp->scb;
  578         const struct sackblock *sb;
  579         int nlost, nretransmitted;
  580         tcp_seq end;
  581 
  582         nlost = tp->snd_max - scb->lostseq;
  583         nretransmitted = tp->rexmt_high - tp->snd_una;
  584 
  585         TAILQ_FOREACH(sb, &scb->sackblocks, sblk_list) {
  586                 if (SEQ_LT(sb->sblk_start, tp->rexmt_high)) {
  587                         end = seq_min(sb->sblk_end, tp->rexmt_high);
  588                         nretransmitted -= end - sb->sblk_start;
  589                 }
  590                 if (SEQ_GEQ(sb->sblk_start, scb->lostseq))
  591                         nlost -= sb->sblk_end - sb->sblk_start;
  592         }
  593 
  594         return (nlost + nretransmitted);
  595 }
  596 
  597 /*
  598  * Return the sequence number and length of the next segment to transmit
  599  * when in Fast Recovery.
  600  */
  601 boolean_t
  602 tcp_sack_nextseg(struct tcpcb *tp, tcp_seq *nextrexmt, uint32_t *plen,
  603     boolean_t *rescue)
  604 {
  605         struct scoreboard *scb = &tp->scb;
  606         struct socket *so = tp->t_inpcb->inp_socket;
  607         struct sackblock *sb;
  608         const struct sackblock *lastblock =
  609             TAILQ_LAST(&scb->sackblocks, sackblock_list);
  610         tcp_seq torexmt;
  611         long len, off, sendwin;
  612 
  613         /* skip SACKed data */
  614         tcp_sack_skip_sacked(scb, &tp->rexmt_high);
  615 
  616         /* Look for lost data. */
  617         torexmt = tp->rexmt_high;
  618         *rescue = FALSE;
  619         if (lastblock != NULL) {
  620                 if (SEQ_LT(torexmt, lastblock->sblk_end) &&
  621                     tcp_sack_islost(scb, torexmt)) {
  622 sendunsacked:
  623                         *nextrexmt = torexmt;
  624                         /* If the left-hand edge has been SACKed, pull it in. */
  625                         if (sack_block_lookup(scb, torexmt + tp->t_maxseg, &sb))
  626                                 *plen = sb->sblk_start - torexmt;
  627                         else
  628                                 *plen = tp->t_maxseg;
  629                         return TRUE;
  630                 }
  631         }
  632 
  633         /* See if unsent data available within send window. */
  634         off = tp->snd_max - tp->snd_una;
  635         sendwin = min(tp->snd_wnd, tp->snd_bwnd);
  636         len = (long) ulmin(so->so_snd.ssb_cc, sendwin) - off;
  637         if (len > 0) {
  638                 *nextrexmt = tp->snd_max;       /* Send new data. */
  639                 *plen = tp->t_maxseg;
  640                 return TRUE;
  641         }
  642 
  643         /* We're less certain this data has been lost. */
  644         if (lastblock != NULL && SEQ_LT(torexmt, lastblock->sblk_end))
  645                 goto sendunsacked;
  646 
  647         /* Rescue retransmission */
  648         if (tcp_do_rescuesack || tcp_do_rfc6675) {
  649                 tcpstat.tcps_sackrescue_try++;
  650                 if (tp->sack_flags & TSACK_F_SACKRESCUED) {
  651                         if (!tcp_aggressive_rescuesack)
  652                                 return FALSE;
  653 
  654                         /*
  655                          * Aggressive variant of the rescue retransmission.
  656                          *
  657                          * The idea of the rescue retransmission is to sustain
  658                          * the ACK clock thus to avoid timeout retransmission.
  659                          *
  660                          * Under some situations, the conservative approach
  661                          * suggested in the draft
  662                          * http://tools.ietf.org/html/
  663                          * draft-nishida-tcpm-rescue-retransmission-00
  664                          * could not sustain ACK clock, since it only allows
  665                          * one rescue retransmission before a cumulative ACK
  666                          * covers the segement transmitted by rescue
  667                          * retransmission.
  668                          *
  669                          * We try to locate the next unSACKed segment which
  670                          * follows the previously sent rescue segment.  If
  671                          * there is no such segment, we loop back to the first
  672                          * unacknowledged segment.
  673                          */
  674 
  675                         /*
  676                          * Skip SACKed data, but here we follow
  677                          * the last transmitted rescue segment.
  678                          */
  679                         torexmt = tp->rexmt_rescue;
  680                         tcp_sack_skip_sacked(scb, &torexmt);
  681                 }
  682                 if (torexmt == tp->snd_max) {
  683                         /* Nothing left to retransmit; restart */
  684                         torexmt = tp->snd_una;
  685                 }
  686                 *rescue = TRUE;
  687                 goto sendunsacked;
  688         } else if (tcp_do_smartsack && lastblock == NULL) {
  689                 tcpstat.tcps_sackrescue_try++;
  690                 *rescue = TRUE;
  691                 goto sendunsacked;
  692         }
  693 
  694         return FALSE;
  695 }
  696 
  697 /*
  698  * Return the next sequence number higher than "*prexmt" that has
  699  * not been SACKed.
  700  */
  701 void
  702 tcp_sack_skip_sacked(struct scoreboard *scb, tcp_seq *prexmt)
  703 {
  704         struct sackblock *sb;
  705 
  706         /* skip SACKed data */
  707         if (sack_block_lookup(scb, *prexmt, &sb))
  708                 *prexmt = sb->sblk_end;
  709 }
  710 
  711 /*
  712  * The length of the first amount of unSACKed data
  713  */
  714 uint32_t
  715 tcp_sack_first_unsacked_len(const struct tcpcb *tp)
  716 {
  717         const struct sackblock *sb;
  718 
  719         sb = TAILQ_FIRST(&tp->scb.sackblocks);
  720         if (sb == NULL)
  721                 return tp->t_maxseg;
  722 
  723         KASSERT(SEQ_LT(tp->snd_una, sb->sblk_start),
  724             ("invalid sb start %u, snd_una %u",
  725              sb->sblk_start, tp->snd_una));
  726         return (sb->sblk_start - tp->snd_una);
  727 }
  728 
  729 #ifdef later
  730 void
  731 tcp_sack_save_scoreboard(struct scoreboard *scb)
  732 {
  733         struct scoreboard *scb = &tp->scb;
  734 
  735         scb->sackblocks_prev = scb->sackblocks;
  736         TAILQ_INIT(&scb->sackblocks);
  737 }
  738 
  739 void
  740 tcp_sack_revert_scoreboard(struct scoreboard *scb, tcp_seq snd_una,
  741                            u_int maxseg)
  742 {
  743         struct sackblock *sb;
  744 
  745         scb->sackblocks = scb->sackblocks_prev;
  746         scb->nblocks = 0;
  747         TAILQ_FOREACH(sb, &scb->sackblocks, sblk_list)
  748                 ++scb->nblocks;
  749         tcp_sack_ack_blocks(scb, snd_una);
  750         scb->lastfound = NULL;
  751 }
  752 #endif
  753 
  754 #ifdef DEBUG_SACK_HISTORY
  755 static void
  756 tcp_sack_dump_history(const char *msg, const struct tcpcb *tp)
  757 {
  758         int i;
  759         static int ndumped;
  760 
  761         /* only need a couple of these to debug most problems */
  762         if (++ndumped > 900)
  763                 return;
  764 
  765         kprintf("%s:\tnsackhistory %d: ", msg, tp->nsackhistory);
  766         for (i = 0; i < tp->nsackhistory; ++i)
  767                 kprintf("[%u, %u) ", tp->sackhistory[i].rblk_start,
  768                     tp->sackhistory[i].rblk_end);
  769         kprintf("\n");
  770 }
  771 #else
  772 static __inline void
  773 tcp_sack_dump_history(const char *msg, const struct tcpcb *tp)
  774 {
  775 }
  776 #endif
  777 
  778 /*
  779  * Remove old SACK blocks from the SACK history that have already been ACKed.
  780  */
  781 static void
  782 tcp_sack_ack_history(struct tcpcb *tp)
  783 {
  784         int i, nblocks, openslot;
  785 
  786         tcp_sack_dump_history("before tcp_sack_ack_history", tp);
  787         nblocks = tp->nsackhistory;
  788         for (i = openslot = 0; i < nblocks; ++i) {
  789                 if (SEQ_LEQ(tp->sackhistory[i].rblk_end, tp->rcv_nxt)) {
  790                         --tp->nsackhistory;
  791                         continue;
  792                 }
  793                 if (SEQ_LT(tp->sackhistory[i].rblk_start, tp->rcv_nxt))
  794                         tp->sackhistory[i].rblk_start = tp->rcv_nxt;
  795                 if (i == openslot)
  796                         ++openslot;
  797                 else
  798                         tp->sackhistory[openslot++] = tp->sackhistory[i];
  799         }
  800         tcp_sack_dump_history("after tcp_sack_ack_history", tp);
  801         KASSERT(openslot == tp->nsackhistory,
  802             ("tcp_sack_ack_history miscounted: %d != %d",
  803             openslot, tp->nsackhistory));
  804 }
  805 
  806 /*
  807  * Add or merge newblock into reported history.
  808  * Also remove or update SACK blocks that will be acked.
  809  */
  810 static void
  811 tcp_sack_update_reported_history(struct tcpcb *tp, tcp_seq start, tcp_seq end)
  812 {
  813         struct raw_sackblock copy[MAX_SACK_REPORT_BLOCKS];
  814         int i, cindex;
  815 
  816         tcp_sack_dump_history("before tcp_sack_update_reported_history", tp);
  817         /*
  818          * Six cases:
  819          *      0) no overlap
  820          *      1) newblock == oldblock
  821          *      2) oldblock contains newblock
  822          *      3) newblock contains oldblock
  823          *      4) tail of oldblock overlaps or abuts start of newblock
  824          *      5) tail of newblock overlaps or abuts head of oldblock
  825          */
  826         for (i = cindex = 0; i < tp->nsackhistory; ++i) {
  827                 struct raw_sackblock *oldblock = &tp->sackhistory[i];
  828                 tcp_seq old_start = oldblock->rblk_start;
  829                 tcp_seq old_end = oldblock->rblk_end;
  830 
  831                 if (SEQ_LT(end, old_start) || SEQ_GT(start, old_end)) {
  832                         /* Case 0:  no overlap.  Copy old block. */
  833                         copy[cindex++] = *oldblock;
  834                         continue;
  835                 }
  836 
  837                 if (SEQ_GEQ(start, old_start) && SEQ_LEQ(end, old_end)) {
  838                         /* Cases 1 & 2.  Move block to front of history. */
  839                         int j;
  840 
  841                         start = old_start;
  842                         end = old_end;
  843                         /* no need to check rest of blocks */
  844                         for (j = i + 1; j < tp->nsackhistory; ++j)
  845                                 copy[cindex++] = tp->sackhistory[j];
  846                         break;
  847                 }
  848 
  849                 if (SEQ_GEQ(old_end, start) && SEQ_LT(old_start, start)) {
  850                         /* Case 4:  extend start of new block. */
  851                         start = old_start;
  852                 } else if (SEQ_GEQ(end, old_start) && SEQ_GT(old_end, end)) {
  853                         /* Case 5: extend end of new block */
  854                         end = old_end;
  855                 } else {
  856                         /* Case 3.  Delete old block by not copying it. */
  857                         KASSERT(SEQ_LEQ(start, old_start) &&
  858                                 SEQ_GEQ(end, old_end),
  859                             ("bad logic: old [%u, %u), new [%u, %u)",
  860                              old_start, old_end, start, end));
  861                 }
  862         }
  863 
  864         /* insert new block */
  865         tp->sackhistory[0].rblk_start = start;
  866         tp->sackhistory[0].rblk_end = end;
  867         cindex = min(cindex, MAX_SACK_REPORT_BLOCKS - 1);
  868         for (i = 0; i < cindex; ++i)
  869                 tp->sackhistory[i + 1] = copy[i];
  870         tp->nsackhistory = cindex + 1;
  871         tcp_sack_dump_history("after tcp_sack_update_reported_history", tp);
  872 }
  873 
  874 /*
  875  * Fill in SACK report to return to data sender.
  876  */
  877 void
  878 tcp_sack_fill_report(struct tcpcb *tp, u_char *opt, u_int *plen)
  879 {
  880         u_int optlen = *plen;
  881         uint32_t *lp = (uint32_t *)(opt + optlen);
  882         uint32_t *olp;
  883         tcp_seq hstart = tp->rcv_nxt, hend;
  884         int nblocks;
  885 
  886         KASSERT(TCP_MAXOLEN - optlen >=
  887             TCPOLEN_SACK_ALIGNED + TCPOLEN_SACK_BLOCK,
  888             ("no room for SACK header and one block: optlen %d", optlen));
  889 
  890         if (tp->sack_flags & TSACK_F_DUPSEG)
  891                 tcpstat.tcps_snddsackopt++;
  892         else
  893                 tcpstat.tcps_sndsackopt++;
  894 
  895         olp = lp++;
  896         optlen += TCPOLEN_SACK_ALIGNED;
  897 
  898         tcp_sack_ack_history(tp);
  899         if (tp->reportblk.rblk_start != tp->reportblk.rblk_end) {
  900                 *lp++ = htonl(tp->reportblk.rblk_start);
  901                 *lp++ = htonl(tp->reportblk.rblk_end);
  902                 optlen += TCPOLEN_SACK_BLOCK;
  903                 hstart = tp->reportblk.rblk_start;
  904                 hend = tp->reportblk.rblk_end;
  905                 if (tp->sack_flags & TSACK_F_ENCLOSESEG) {
  906                         KASSERT(TCP_MAXOLEN - optlen >= TCPOLEN_SACK_BLOCK,
  907                             ("no room for enclosing SACK block: oplen %d",
  908                             optlen));
  909                         *lp++ = htonl(tp->encloseblk.rblk_start);
  910                         *lp++ = htonl(tp->encloseblk.rblk_end);
  911                         optlen += TCPOLEN_SACK_BLOCK;
  912                         hstart = tp->encloseblk.rblk_start;
  913                         hend = tp->encloseblk.rblk_end;
  914                 }
  915                 if (SEQ_GT(hstart, tp->rcv_nxt))
  916                         tcp_sack_update_reported_history(tp, hstart, hend);
  917         }
  918         if (tcp_do_smartsack && (tp->sack_flags & TSACK_F_SACKLEFT)) {
  919                 /* Fill in from left!  Walk re-assembly queue. */
  920                 struct tseg_qent *q;
  921 
  922                 q = TAILQ_FIRST(&tp->t_segq);
  923                 while (q != NULL &&
  924                     TCP_MAXOLEN - optlen >= TCPOLEN_SACK_BLOCK) {
  925                         *lp++ = htonl(q->tqe_th->th_seq);
  926                         *lp++ = htonl(TCP_SACK_BLKEND(
  927                             q->tqe_th->th_seq + q->tqe_len,
  928                             q->tqe_th->th_flags));
  929                         optlen += TCPOLEN_SACK_BLOCK;
  930                         q = TAILQ_NEXT(q, tqe_q);
  931                 }
  932         } else {
  933                 int n = 0;
  934 
  935                 /* Fill in SACK blocks from right side. */
  936                 while (n < tp->nsackhistory &&
  937                     TCP_MAXOLEN - optlen >= TCPOLEN_SACK_BLOCK) {
  938                         if (tp->sackhistory[n].rblk_start != hstart) {
  939                                 *lp++ = htonl(tp->sackhistory[n].rblk_start);
  940                                 *lp++ = htonl(tp->sackhistory[n].rblk_end);
  941                                 optlen += TCPOLEN_SACK_BLOCK;
  942                         }
  943                         ++n;
  944                 }
  945         }
  946         tp->reportblk.rblk_start = tp->reportblk.rblk_end;
  947         tp->sack_flags &=
  948             ~(TSACK_F_DUPSEG | TSACK_F_ENCLOSESEG | TSACK_F_SACKLEFT);
  949         nblocks = (lp - olp - 1) / 2;
  950         *olp = htonl(TCPOPT_SACK_ALIGNED |
  951                      (TCPOLEN_SACK + nblocks * TCPOLEN_SACK_BLOCK));
  952         *plen = optlen;
  953 }

Cache object: dc58ff351c91a204e1d2942bee725a72


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.