1 /*-
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95
30 * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.3.2.10 2006/01/24 01:16:18 mohans Exp $
31 */
32
33 /*-
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995
62 *
63 * NRL grants permission for redistribution and use in source and binary
64 * forms, with or without modification, of the software and documentation
65 * created at NRL provided that the following conditions are met:
66 *
67 * 1. Redistributions of source code must retain the above copyright
68 * notice, this list of conditions and the following disclaimer.
69 * 2. Redistributions in binary form must reproduce the above copyright
70 * notice, this list of conditions and the following disclaimer in the
71 * documentation and/or other materials provided with the distribution.
72 * 3. All advertising materials mentioning features or use of this software
73 * must display the following acknowledgements:
74 * This product includes software developed by the University of
75 * California, Berkeley and its contributors.
76 * This product includes software developed at the Information
77 * Technology Division, US Naval Research Laboratory.
78 * 4. Neither the name of the NRL nor the names of its contributors
79 * may be used to endorse or promote products derived from this software
80 * without specific prior written permission.
81 *
82 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
83 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
84 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
85 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
86 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
87 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
88 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
89 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
90 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
91 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
92 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
93 *
94 * The views and conclusions contained in the software and documentation
95 * are those of the authors and should not be interpreted as representing
96 * official policies, either expressed or implied, of the US Naval
97 * Research Laboratory (NRL).
98 */
99 #include "opt_inet.h"
100 #include "opt_inet6.h"
101 #include "opt_ipsec.h"
102 #include "opt_tcpdebug.h"
103 #include "opt_tcp_input.h"
104 #include "opt_tcp_sack.h"
105
106 #include <sys/param.h>
107 #include <sys/systm.h>
108 #include <sys/kernel.h>
109 #include <sys/sysctl.h>
110 #include <sys/malloc.h>
111 #include <sys/mbuf.h>
112 #include <sys/proc.h> /* for proc0 declaration */
113 #include <sys/protosw.h>
114 #include <sys/socket.h>
115 #include <sys/socketvar.h>
116 #include <sys/syslog.h>
117 #include <sys/systm.h>
118
119 #include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
120
121 #include <vm/uma.h>
122
123 #include <net/if.h>
124 #include <net/route.h>
125
126 #include <netinet/in.h>
127 #include <netinet/in_systm.h>
128 #include <netinet/ip.h>
129 #include <netinet/ip_icmp.h> /* for ICMP_BANDLIM */
130 #include <netinet/in_var.h>
131 #include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
132 #include <netinet/in_pcb.h>
133 #include <netinet/ip_var.h>
134 #include <netinet/ip6.h>
135 #include <netinet/icmp6.h>
136 #include <netinet6/nd6.h>
137 #include <netinet6/ip6_var.h>
138 #include <netinet6/in6_pcb.h>
139 #include <netinet/tcp.h>
140 #include <netinet/tcp_fsm.h>
141 #include <netinet/tcp_seq.h>
142 #include <netinet/tcp_timer.h>
143 #include <netinet/tcp_var.h>
144 #include <netinet6/tcp6_var.h>
145 #include <netinet/tcpip.h>
146 #ifdef TCPDEBUG
147 #include <netinet/tcp_debug.h>
148 #endif /* TCPDEBUG */
149
150 #ifdef FAST_IPSEC
151 #include <netipsec/ipsec.h>
152 #include <netipsec/ipsec6.h>
153 #endif
154
155 #ifdef IPSEC
156 #include <netinet6/ipsec.h>
157 #include <netinet6/ipsec6.h>
158 #include <netkey/key.h>
159 #endif /*IPSEC*/
160 #include <machine/in_cksum.h>
161
162 extern struct uma_zone *sack_hole_zone;
163
164 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
165 int tcp_do_sack = 1;
166 SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
167 &tcp_do_sack, 0, "Enable/Disable TCP SACK support");
168 TUNABLE_INT("net.inet.tcp.sack.enable", &tcp_do_sack);
169
170 static int tcp_sack_maxholes = 128;
171 SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_RW,
172 &tcp_sack_maxholes, 0,
173 "Maximum number of TCP SACK holes allowed per connection");
174
175 static int tcp_sack_globalmaxholes = 65536;
176 SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_RW,
177 &tcp_sack_globalmaxholes, 0,
178 "Global maximum number of TCP SACK holes");
179
180 static int tcp_sack_globalholes = 0;
181 SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_RD,
182 &tcp_sack_globalholes, 0,
183 "Global number of TCP SACK holes currently allocated");
184 /*
185 * This function is called upon receipt of new valid data (while not in header
186 * prediction mode), and it updates the ordered list of sacks.
187 */
188 void
189 tcp_update_sack_list(tp, rcv_laststart, rcv_lastend)
190 struct tcpcb *tp;
191 tcp_seq rcv_laststart, rcv_lastend;
192 {
193 /*
194 * First reported block MUST be the most recent one. Subsequent
195 * blocks SHOULD be in the order in which they arrived at the
196 * receiver. These two conditions make the implementation fully
197 * compliant with RFC 2018.
198 */
199 int i, j = 0, count = 0, lastpos = -1;
200 struct sackblk sack, firstsack, temp[MAX_SACK_BLKS];
201
202 INP_LOCK_ASSERT(tp->t_inpcb);
203 /* First clean up current list of sacks */
204 for (i = 0; i < tp->rcv_numsacks; i++) {
205 sack = tp->sackblks[i];
206 if (sack.start == 0 && sack.end == 0) {
207 count++; /* count = number of blocks to be discarded */
208 continue;
209 }
210 if (SEQ_LEQ(sack.end, tp->rcv_nxt)) {
211 tp->sackblks[i].start = tp->sackblks[i].end = 0;
212 count++;
213 } else {
214 temp[j].start = tp->sackblks[i].start;
215 temp[j++].end = tp->sackblks[i].end;
216 }
217 }
218 tp->rcv_numsacks -= count;
219 if (tp->rcv_numsacks == 0) { /* no sack blocks currently (fast path) */
220 tcp_clean_sackreport(tp);
221 if (SEQ_LT(tp->rcv_nxt, rcv_laststart)) {
222 /* ==> need first sack block */
223 tp->sackblks[0].start = rcv_laststart;
224 tp->sackblks[0].end = rcv_lastend;
225 tp->rcv_numsacks = 1;
226 }
227 return;
228 }
229 /* Otherwise, sack blocks are already present. */
230 for (i = 0; i < tp->rcv_numsacks; i++)
231 tp->sackblks[i] = temp[i]; /* first copy back sack list */
232 if (SEQ_GEQ(tp->rcv_nxt, rcv_lastend))
233 return; /* sack list remains unchanged */
234 /*
235 * From here, segment just received should be (part of) the 1st sack.
236 * Go through list, possibly coalescing sack block entries.
237 */
238 firstsack.start = rcv_laststart;
239 firstsack.end = rcv_lastend;
240 for (i = 0; i < tp->rcv_numsacks; i++) {
241 sack = tp->sackblks[i];
242 if (SEQ_LT(sack.end, firstsack.start) ||
243 SEQ_GT(sack.start, firstsack.end))
244 continue; /* no overlap */
245 if (sack.start == firstsack.start && sack.end == firstsack.end){
246 /*
247 * identical block; delete it here since we will
248 * move it to the front of the list.
249 */
250 tp->sackblks[i].start = tp->sackblks[i].end = 0;
251 lastpos = i; /* last posn with a zero entry */
252 continue;
253 }
254 if (SEQ_LEQ(sack.start, firstsack.start))
255 firstsack.start = sack.start; /* merge blocks */
256 if (SEQ_GEQ(sack.end, firstsack.end))
257 firstsack.end = sack.end; /* merge blocks */
258 tp->sackblks[i].start = tp->sackblks[i].end = 0;
259 lastpos = i; /* last posn with a zero entry */
260 }
261 if (lastpos != -1) { /* at least one merge */
262 for (i = 0, j = 1; i < tp->rcv_numsacks; i++) {
263 sack = tp->sackblks[i];
264 if (sack.start == 0 && sack.end == 0)
265 continue;
266 temp[j++] = sack;
267 }
268 tp->rcv_numsacks = j; /* including first blk (added later) */
269 for (i = 1; i < tp->rcv_numsacks; i++) /* now copy back */
270 tp->sackblks[i] = temp[i];
271 } else { /* no merges -- shift sacks by 1 */
272 if (tp->rcv_numsacks < MAX_SACK_BLKS)
273 tp->rcv_numsacks++;
274 for (i = tp->rcv_numsacks-1; i > 0; i--)
275 tp->sackblks[i] = tp->sackblks[i-1];
276 }
277 tp->sackblks[0] = firstsack;
278 return;
279 }
280
281 /*
282 * Delete all receiver-side SACK information.
283 */
284 void
285 tcp_clean_sackreport(tp)
286 struct tcpcb *tp;
287 {
288 int i;
289
290 INP_LOCK_ASSERT(tp->t_inpcb);
291 tp->rcv_numsacks = 0;
292 for (i = 0; i < MAX_SACK_BLKS; i++)
293 tp->sackblks[i].start = tp->sackblks[i].end=0;
294 }
295
296 /*
297 * Process the TCP SACK option. Returns 1 if tcp_dooptions() should continue,
298 * and 0 otherwise, if the option was fine. tp->snd_holes is an ordered list
299 * of holes (oldest to newest, in terms of the sequence space).
300 */
301 int
302 tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
303 {
304 int tmp_olen;
305 u_char *tmp_cp;
306 struct sackhole *cur, *p, *temp;
307
308 INP_LOCK_ASSERT(tp->t_inpcb);
309 if (!tp->sack_enable)
310 return (1);
311 if ((th->th_flags & TH_ACK) == 0)
312 return (1);
313 /* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */
314 if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
315 return (1);
316 /* If ack is outside [snd_una, snd_max], ignore the SACK options */
317 if (SEQ_LT(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))
318 return (1);
319 tmp_cp = cp + 2;
320 tmp_olen = optlen - 2;
321 tcpstat.tcps_sack_rcv_blocks++;
322 if (tp->snd_numholes < 0) /* XXX panic? */
323 tp->snd_numholes = 0;
324 if (tp->t_maxseg == 0)
325 panic("tcp_sack_option"); /* Should never happen */
326 next_block:
327 while (tmp_olen > 0) {
328 struct sackblk sack;
329
330 bcopy(tmp_cp, (char *) &(sack.start), sizeof(tcp_seq));
331 sack.start = ntohl(sack.start);
332 bcopy(tmp_cp + sizeof(tcp_seq),
333 (char *) &(sack.end), sizeof(tcp_seq));
334 sack.end = ntohl(sack.end);
335 tmp_olen -= TCPOLEN_SACK;
336 tmp_cp += TCPOLEN_SACK;
337 if (SEQ_LEQ(sack.end, sack.start))
338 continue; /* bad SACK fields */
339 if (SEQ_LEQ(sack.end, tp->snd_una))
340 continue; /* old block */
341 if (SEQ_GT(th->th_ack, tp->snd_una)) {
342 if (SEQ_LT(sack.start, th->th_ack))
343 continue;
344 }
345 if (SEQ_GT(sack.end, tp->snd_max))
346 continue;
347 if (tp->snd_holes == NULL) { /* first hole */
348 if (tcp_sack_globalholes >= tcp_sack_globalmaxholes ||
349 tcp_sack_maxholes == 0) {
350 tcpstat.tcps_sack_sboverflow++;
351 continue;
352 }
353 tp->snd_holes = (struct sackhole *)
354 uma_zalloc(sack_hole_zone,M_NOWAIT);
355 if (tp->snd_holes == NULL) {
356 /* ENOBUFS, so ignore SACKed block for now*/
357 continue;
358 }
359 cur = tp->snd_holes;
360 cur->start = th->th_ack;
361 cur->end = sack.start;
362 cur->rxmit = cur->start;
363 cur->next = NULL;
364 tp->snd_numholes = 1;
365 tcp_sack_globalholes++;
366 tp->rcv_lastsack = sack.end;
367 continue; /* with next sack block */
368 }
369 /* Go thru list of holes: p = previous, cur = current */
370 p = cur = tp->snd_holes;
371 while (cur) {
372 if (SEQ_LEQ(sack.end, cur->start))
373 /* SACKs data before the current hole */
374 break; /* no use going through more holes */
375 if (SEQ_GEQ(sack.start, cur->end)) {
376 /* SACKs data beyond the current hole */
377 p = cur;
378 cur = cur->next;
379 continue;
380 }
381 if (SEQ_LEQ(sack.start, cur->start)) {
382 /* Data acks at least the beginning of hole */
383 if (SEQ_GEQ(sack.end, cur->end)) {
384 /* Acks entire hole, so delete hole */
385 if (p != cur) {
386 p->next = cur->next;
387 uma_zfree(sack_hole_zone, cur);
388 cur = p->next;
389 } else {
390 cur = cur->next;
391 uma_zfree(sack_hole_zone, p);
392 p = cur;
393 tp->snd_holes = p;
394 }
395 tp->snd_numholes--;
396 tcp_sack_globalholes--;
397 continue;
398 }
399 /* otherwise, move start of hole forward */
400 cur->start = sack.end;
401 cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
402 p = cur;
403 cur = cur->next;
404 continue;
405 }
406 /* move end of hole backward */
407 if (SEQ_GEQ(sack.end, cur->end)) {
408 cur->end = sack.start;
409 cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
410 p = cur;
411 cur = cur->next;
412 continue;
413 }
414 if (SEQ_LT(cur->start, sack.start) &&
415 SEQ_GT(cur->end, sack.end)) {
416 /*
417 * ACKs some data in middle of a hole; need to
418 * split current hole
419 */
420 if (tp->snd_numholes >= tcp_sack_maxholes ||
421 tcp_sack_globalholes >=
422 tcp_sack_globalmaxholes) {
423 tcpstat.tcps_sack_sboverflow++;
424 goto next_block;
425 }
426 temp = (struct sackhole *)
427 uma_zalloc(sack_hole_zone,M_NOWAIT);
428 if (temp == NULL)
429 goto next_block; /* ENOBUFS */
430 temp->next = cur->next;
431 temp->start = sack.end;
432 temp->end = cur->end;
433 temp->rxmit = SEQ_MAX(cur->rxmit, temp->start);
434 cur->end = sack.start;
435 cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
436 cur->next = temp;
437 p = temp;
438 cur = p->next;
439 tp->snd_numholes++;
440 tcp_sack_globalholes++;
441 }
442 }
443 /* At this point, p points to the last hole on the list */
444 if (SEQ_LT(tp->rcv_lastsack, sack.start)) {
445 /*
446 * Need to append new hole at end.
447 * Last hole is p (and it's not NULL).
448 */
449 if (tp->snd_numholes >= tcp_sack_maxholes ||
450 tcp_sack_globalholes >= tcp_sack_globalmaxholes) {
451 tcpstat.tcps_sack_sboverflow++;
452 continue;
453 }
454 temp = (struct sackhole *)
455 uma_zalloc(sack_hole_zone,M_NOWAIT);
456 if (temp == NULL)
457 continue; /* ENOBUFS */
458 temp->start = tp->rcv_lastsack;
459 temp->end = sack.start;
460 temp->rxmit = temp->start;
461 temp->next = 0;
462 p->next = temp;
463 tp->rcv_lastsack = sack.end;
464 tp->snd_numholes++;
465 tcp_sack_globalholes++;
466 }
467 if (SEQ_LT(tp->rcv_lastsack, sack.end))
468 tp->rcv_lastsack = sack.end;
469 }
470 return (0);
471 }
472
473 /*
474 * Delete stale (i.e, cumulatively ack'd) holes. Hole is deleted only if
475 * it is completely acked; otherwise, tcp_sack_option(), called from
476 * tcp_dooptions(), will fix up the hole.
477 */
478 void
479 tcp_del_sackholes(tp, th)
480 struct tcpcb *tp;
481 struct tcphdr *th;
482 {
483 INP_LOCK_ASSERT(tp->t_inpcb);
484 if (tp->sack_enable && tp->t_state != TCPS_LISTEN) {
485 /* max because this could be an older ack just arrived */
486 tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
487 th->th_ack : tp->snd_una;
488 struct sackhole *cur = tp->snd_holes;
489 struct sackhole *prev;
490 while (cur)
491 if (SEQ_LEQ(cur->end, lastack)) {
492 prev = cur;
493 cur = cur->next;
494 uma_zfree(sack_hole_zone, prev);
495 tp->snd_numholes--;
496 tcp_sack_globalholes--;
497 } else if (SEQ_LT(cur->start, lastack)) {
498 cur->start = lastack;
499 if (SEQ_LT(cur->rxmit, cur->start))
500 cur->rxmit = cur->start;
501 break;
502 } else
503 break;
504 tp->snd_holes = cur;
505 }
506 }
507
508 void
509 tcp_free_sackholes(struct tcpcb *tp)
510 {
511 struct sackhole *p, *q;
512
513 INP_LOCK_ASSERT(tp->t_inpcb);
514 q = tp->snd_holes;
515 while (q != NULL) {
516 p = q;
517 q = q->next;
518 uma_zfree(sack_hole_zone, p);
519 tcp_sack_globalholes--;
520 }
521 tp->snd_holes = 0;
522 tp->snd_numholes = 0;
523 }
524
525 /*
526 * Partial ack handling within a sack recovery episode.
527 * Keeping this very simple for now. When a partial ack
528 * is received, force snd_cwnd to a value that will allow
529 * the sender to transmit no more than 2 segments.
530 * If necessary, a better scheme can be adopted at a
531 * later point, but for now, the goal is to prevent the
532 * sender from bursting a large amount of data in the midst
533 * of sack recovery.
534 */
535 void
536 tcp_sack_partialack(tp, th)
537 struct tcpcb *tp;
538 struct tcphdr *th;
539 {
540 int num_segs = 1;
541 int sack_bytes_rxmt = 0;
542
543 INP_LOCK_ASSERT(tp->t_inpcb);
544 callout_stop(tp->tt_rexmt);
545 tp->t_rtttime = 0;
546 /* send one or 2 segments based on how much new data was acked */
547 if (((th->th_ack - tp->snd_una) / tp->t_maxseg) > 2)
548 num_segs = 2;
549 (void)tcp_sack_output(tp, &sack_bytes_rxmt);
550 tp->snd_cwnd = sack_bytes_rxmt + (tp->snd_nxt - tp->sack_newdata) +
551 num_segs * tp->t_maxseg;
552 if (tp->snd_cwnd > tp->snd_ssthresh)
553 tp->snd_cwnd = tp->snd_ssthresh;
554 tp->t_flags |= TF_ACKNOW;
555 (void) tcp_output(tp);
556 }
557
558 #ifdef TCP_SACK_DEBUG
559 void
560 tcp_print_holes(struct tcpcb *tp)
561 {
562 struct sackhole *p = tp->snd_holes;
563 if (p == 0)
564 return;
565 printf("Hole report: start--end dups rxmit\n");
566 while (p) {
567 printf("%x--%x r %x\n", p->start, p->end, p->rxmit);
568 p = p->next;
569 }
570 printf("\n");
571 }
572 #endif /* TCP_SACK_DEBUG */
573
574 /*
575 * Returns pointer to a sackhole if there are any pending retransmissions;
576 * NULL otherwise.
577 */
578 struct sackhole *
579 tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
580 {
581 struct sackhole *p = NULL;
582
583 INP_LOCK_ASSERT(tp->t_inpcb);
584 if (!tp->sack_enable)
585 return (NULL);
586 *sack_bytes_rexmt = 0;
587 for (p = tp->snd_holes; p ; p = p->next) {
588 if (SEQ_LT(p->rxmit, p->end)) {
589 if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */
590 continue;
591 }
592 #ifdef TCP_SACK_DEBUG
593 if (p)
594 tcp_print_holes(tp);
595 #endif
596 *sack_bytes_rexmt += (p->rxmit - p->start);
597 break;
598 }
599 *sack_bytes_rexmt += (p->rxmit - p->start);
600 }
601 return (p);
602 }
603
604 /*
605 * After a timeout, the SACK list may be rebuilt. This SACK information
606 * should be used to avoid retransmitting SACKed data. This function
607 * traverses the SACK list to see if snd_nxt should be moved forward.
608 */
609 void
610 tcp_sack_adjust(struct tcpcb *tp)
611 {
612 INP_LOCK_ASSERT(tp->t_inpcb);
613 struct sackhole *cur = tp->snd_holes;
614 if (cur == NULL)
615 return; /* No holes */
616 if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
617 return; /* We're already beyond any SACKed blocks */
618 /*
619 * Two cases for which we want to advance snd_nxt:
620 * i) snd_nxt lies between end of one hole and beginning of another
621 * ii) snd_nxt lies between end of last hole and rcv_lastsack
622 */
623 while (cur->next) {
624 if (SEQ_LT(tp->snd_nxt, cur->end))
625 return;
626 if (SEQ_GEQ(tp->snd_nxt, cur->next->start))
627 cur = cur->next;
628 else {
629 tp->snd_nxt = cur->next->start;
630 return;
631 }
632 }
633 if (SEQ_LT(tp->snd_nxt, cur->end))
634 return;
635 tp->snd_nxt = tp->rcv_lastsack;
636 return;
637 }
638
639 /*
640 * Calculate the number of SACKed bytes in the scoreboard by
641 * subtracting the amount of data accounted for in sackholes
642 * from the total span of the scoreboard. Also returns the
643 * amount of data that is "lost" and has not yet been retransmitted.
644 */
645 int
646 tcp_sacked_bytes(struct tcpcb *tp, int *lost_not_rexmitted)
647 {
648 INP_LOCK_ASSERT(tp->t_inpcb);
649 struct sackhole *cur = tp->snd_holes;
650 int sacked = 0;
651 u_long lost = 0;
652
653 if (cur == NULL) /* Scoreboard empty. */
654 goto out;
655 if (SEQ_GEQ(tp->snd_una, tp->rcv_lastsack)) /* Scoreboard is stale. */
656 goto out;
657 sacked = tp->rcv_lastsack - cur->start;
658 while (cur) {
659 lost += (cur->end - cur->rxmit);
660 sacked -= (cur->end - cur->start);
661 cur = cur->next;
662 }
663 out:
664 if (lost_not_rexmitted)
665 *lost_not_rexmitted = lost;
666 return (sacked);
667 }
Cache object: 6ef066dd350b10ffbdbadc3b331d9afd
|