FreeBSD/Linux Kernel Cross Reference
sys/netiso/tp_subr.c
1 /* $NetBSD: tp_subr.c,v 1.18 2003/08/11 15:17:31 itojun Exp $ */
2
3 /*-
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)tp_subr.c 8.1 (Berkeley) 6/10/93
32 */
33
34 /***********************************************************
35 Copyright IBM Corporation 1987
36
37 All Rights Reserved
38
39 Permission to use, copy, modify, and distribute this software and its
40 documentation for any purpose and without fee is hereby granted,
41 provided that the above copyright notice appear in all copies and that
42 both that copyright notice and this permission notice appear in
43 supporting documentation, and that the name of IBM not be
44 used in advertising or publicity pertaining to distribution of the
45 software without specific, written prior permission.
46
47 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
48 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
49 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
50 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
51 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
52 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
53 SOFTWARE.
54
55 ******************************************************************/
56
57 /*
58 * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
59 */
60 /*
61 * The main work of data transfer is done here. These routines are called
62 * from tp.trans. They include the routines that check the validity of acks
63 * and Xacks, (tp_goodack() and tp_goodXack() ) take packets from socket
64 * buffers and send them (tp_send()), drop the data from the socket buffers
65 * (tp_sbdrop()), and put incoming packet data into socket buffers
66 * (tp_stash()).
67 */
68
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: tp_subr.c,v 1.18 2003/08/11 15:17:31 itojun Exp $");
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/mbuf.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/protosw.h>
78 #include <sys/errno.h>
79 #include <sys/time.h>
80 #include <sys/kernel.h>
81
82 #include <netiso/tp_ip.h>
83 #include <netiso/iso.h>
84 #include <netiso/argo_debug.h>
85 #include <netiso/tp_timer.h>
86 #include <netiso/tp_param.h>
87 #include <netiso/tp_stat.h>
88 #include <netiso/tp_pcb.h>
89 #include <netiso/tp_tpdu.h>
90 #include <netiso/tp_trace.h>
91 #include <netiso/tp_meas.h>
92 #include <netiso/tp_seq.h>
93 #include <netiso/tp_var.h>
94
95 int tprexmtthresh = 3;
96
97 /*
98 * CALLED FROM:
99 * tp.trans, when an XAK arrives
100 * FUNCTION and ARGUMENTS:
101 * Determines if the sequence number (seq) from the XAK
102 * acks anything new. If so, drop the appropriate tpdu
103 * from the XPD send queue.
104 * RETURN VALUE:
105 * Returns 1 if it did this, 0 if the ack caused no action.
106 */
107 int
108 tp_goodXack(tpcb, seq)
109 struct tp_pcb *tpcb;
110 SeqNum seq;
111 {
112
113 #ifdef TPPT
114 if (tp_traceflags[D_XPD]) {
115 tptraceTPCB(TPPTgotXack,
116 seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
117 tpcb->tp_snduna);
118 }
119 #endif
120
121 if (seq == tpcb->tp_Xuna) {
122 tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
123
124 /*
125 * DROP 1 packet from the Xsnd socket buf - just so happens
126 * that only one packet can be there at any time so drop the
127 * whole thing. If you allow > 1 packet the socket buffer,
128 * then you'll have to keep track of how many characters went
129 * w/ each XPD tpdu, so this will get messier
130 */
131 #ifdef ARGO_DEBUG
132 if (argo_debug[D_XPD]) {
133 dump_mbuf(tpcb->tp_Xsnd.sb_mb,
134 "tp_goodXack Xsnd before sbdrop");
135 }
136 #endif
137
138 #ifdef TPPT
139 if (tp_traceflags[D_XPD]) {
140 tptraceTPCB(TPPTmisc,
141 "goodXack: dropping cc ",
142 (int) (tpcb->tp_Xsnd.sb_cc),
143 0, 0, 0);
144 }
145 #endif
146 sbdroprecord(&tpcb->tp_Xsnd);
147 return 1;
148 }
149 return 0;
150 }
151
152 /*
153 * CALLED FROM:
154 * tp_good_ack()
155 * FUNCTION and ARGUMENTS:
156 * updates
157 * smoothed average round trip time (*rtt)
158 * roundtrip time variance (*rtv) - actually deviation, not variance
159 * given the new value (diff)
160 * RETURN VALUE:
161 * void
162 */
163
164 void
165 tp_rtt_rtv(tpcb)
166 struct tp_pcb *tpcb;
167 {
168 int old = tpcb->tp_rtt;
169 int elapsed, delta = 0;
170
171 elapsed = hardclock_ticks - tpcb->tp_rttemit;
172
173 if (tpcb->tp_rtt != 0) {
174 /*
175 * rtt is the smoothed round trip time in machine clock
176 * ticks (hz). It is stored as a fixed point number,
177 * unscaled (unlike the tcp srtt). The rationale here
178 * is that it is only significant to the nearest unit of
179 * slowtimo, which is at least 8 machine clock ticks
180 * so there is no need to scale. The smoothing is done
181 * according to the same formula as TCP (rtt = rtt*7/8
182 * + measured_rtt/8).
183 */
184 delta = elapsed - tpcb->tp_rtt;
185 if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
186 tpcb->tp_rtt = 1;
187 /*
188 * rtv is a smoothed accumulated mean difference, unscaled
189 * for reasons expressed above.
190 * It is smoothed with an alpha of .75, and the round trip timer
191 * will be set to rtt + 4*rtv, also as TCP does.
192 */
193 if (delta < 0)
194 delta = -delta;
195 if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
196 tpcb->tp_rtv = 1;
197 } else {
198 /*
199 * No rtt measurement yet - use the unsmoothed rtt. Set the
200 * variance to half the rtt (so our first retransmit happens
201 * at 3*rtt)
202 */
203 tpcb->tp_rtt = elapsed;
204 tpcb->tp_rtv = elapsed >> 1;
205 }
206 tpcb->tp_rttemit = 0;
207 tpcb->tp_rxtshift = 0;
208 /*
209 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
210 * Because of the way we do the smoothing, srtt and rttvar
211 * will each average +1/2 tick of bias. When we compute
212 * the retransmit timer, we want 1/2 tick of rounding and
213 * 1 extra tick because of +-1/2 tick uncertainty in the
214 * firing of the timer. The bias will give us exactly the
215 * 1.5 tick we need. But, because the bias is
216 * statistical, we have to test that we don't drop below
217 * the minimum feasible timer (which is 2 ticks)."
218 */
219 TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
220 tpcb->tp_peer_acktime, 128 /* XXX */ );
221 #ifdef ARGO_DEBUG
222 if (argo_debug[D_RTT]) {
223 printf("%s tpcb %p, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
224 "tp_rtt_rtv:", tpcb, elapsed, delta, tpcb->tp_rtt, tpcb->tp_rtv, old);
225 }
226 #endif
227 tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
228 }
229
230 /*
231 * CALLED FROM:
232 * tp.trans when an AK arrives
233 * FUNCTION and ARGUMENTS:
234 * Given (cdt), the credit from the AK tpdu, and
235 * (seq), the sequence number from the AK tpdu,
236 * tp_goodack() determines if the AK acknowledges something in the send
237 * window, and if so, drops the appropriate packets from the retransmission
238 * list, computes the round trip time, and updates the retransmission timer
239 * based on the new smoothed round trip time.
240 * RETURN VALUE:
241 * Returns 1 if
242 * EITHER it actually acked something heretofore unacknowledged
243 * OR no news but the credit should be processed.
244 * If something heretofore unacked was acked with this sequence number,
245 * the appropriate tpdus are dropped from the retransmission control list,
246 * by calling tp_sbdrop().
247 * No need to see the tpdu itself.
248 */
249 int
250 tp_goodack(tpcb, cdt, seq, subseq)
251 struct tp_pcb *tpcb;
252 u_int cdt;
253 SeqNum seq;
254 u_int subseq;
255 {
256 int old_fcredit = 0;
257 int bang = 0; /* bang --> ack for something
258 * heretofore unacked */
259 u_int bytes_acked;
260
261 #ifdef ARGO_DEBUG
262 if (argo_debug[D_ACKRECV]) {
263 printf("goodack tpcb %p seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
264 tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
265 }
266 #endif
267
268 #ifdef TPPT
269 if (tp_traceflags[D_ACKRECV]) {
270 tptraceTPCB(TPPTgotack,
271 seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, subseq);
272 }
273 #endif
274
275 #ifdef TP_PERF_MEAS
276 if (DOPERF(tpcb)) {
277 tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *) 0, seq, 0, 0);
278 }
279 #endif
280
281 if (seq == tpcb->tp_snduna) {
282 if (subseq < tpcb->tp_r_subseq ||
283 (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
284 discard_the_ack:
285 #ifdef ARGO_DEBUG
286 if (argo_debug[D_ACKRECV]) {
287 printf("goodack discard : tpcb %p subseq %d r_subseq %d\n",
288 tpcb, subseq, tpcb->tp_r_subseq);
289 }
290 #endif
291 goto done;
292 }
293 if (cdt == tpcb->tp_fcredit /* && thus subseq >
294 tpcb->tp_r_subseq */ ) {
295 tpcb->tp_r_subseq = subseq;
296 if (tpcb->tp_timer[TM_data_retrans] == 0)
297 tpcb->tp_dupacks = 0;
298 else if (++tpcb->tp_dupacks == tprexmtthresh) {
299 /*
300 * partner went out of his way to signal with
301 * different subsequences that he has the
302 * same lack of an expected packet. This may
303 * be an early indiciation of a loss
304 */
305
306 SeqNum onxt = tpcb->tp_sndnxt;
307 struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
308 u_int win = min(tpcb->tp_fcredit,
309 tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
310 #ifdef ARGO_DEBUG
311 if (argo_debug[D_ACKRECV]) {
312 printf("%s tpcb %p seq 0x%x rttseq 0x%x onxt 0x%x\n",
313 "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
314 }
315 #endif
316 if (win < 2)
317 win = 2;
318 tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
319 tpcb->tp_timer[TM_data_retrans] = 0;
320 tpcb->tp_rttemit = 0;
321 tpcb->tp_sndnxt = tpcb->tp_snduna;
322 tpcb->tp_sndnxt_m = 0;
323 tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
324 tp_send(tpcb);
325 tpcb->tp_cong_win = tpcb->tp_ssthresh +
326 tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
327 if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
328 tpcb->tp_sndnxt = onxt;
329 tpcb->tp_sndnxt_m = onxt_m;
330 }
331 } else if (tpcb->tp_dupacks > tprexmtthresh) {
332 tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
333 }
334 goto done;
335 }
336 } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
337 goto discard_the_ack;
338 /*
339 * If the congestion window was inflated to account
340 * for the other side's cached packets, retract it.
341 */
342 if (tpcb->tp_dupacks > tprexmtthresh &&
343 tpcb->tp_cong_win > tpcb->tp_ssthresh)
344 tpcb->tp_cong_win = tpcb->tp_ssthresh;
345 tpcb->tp_r_subseq = subseq;
346 old_fcredit = tpcb->tp_fcredit;
347 tpcb->tp_fcredit = cdt;
348 if (cdt > tpcb->tp_maxfcredit)
349 tpcb->tp_maxfcredit = cdt;
350 tpcb->tp_dupacks = 0;
351
352 if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
353
354 tpsbcheck(tpcb, 0);
355 bytes_acked = tp_sbdrop(tpcb, seq);
356 tpsbcheck(tpcb, 1);
357 /*
358 * If transmit timer is running and timed sequence
359 * number was acked, update smoothed round trip time.
360 * Since we now have an rtt measurement, cancel the
361 * timer backoff (cf., Phil Karn's retransmit alg.).
362 * Recompute the initial retransmit timer.
363 */
364 if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
365 tp_rtt_rtv(tpcb);
366 /*
367 * If all outstanding data is acked, stop retransmit timer.
368 * If there is more data to be acked, restart retransmit
369 * timer, using current (possibly backed-off) value.
370 * OSI combines the keepalive and persistance functions.
371 * So, there is no persistance timer per se, to restart.
372 */
373 if (tpcb->tp_class != TP_CLASS_0)
374 tpcb->tp_timer[TM_data_retrans] =
375 (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
376 /*
377 * When new data is acked, open the congestion window.
378 * If the window gives us less than ssthresh packets
379 * in flight, open exponentially (maxseg per packet).
380 * Otherwise open linearly: maxseg per window
381 * (maxseg^2 / cwnd per packet), plus a constant
382 * fraction of a packet (maxseg/8) to help larger windows
383 * open quickly enough.
384 */
385 {
386 u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
387
388 incr = min(incr, bytes_acked);
389 if (cw > tpcb->tp_ssthresh)
390 incr = incr * incr / cw + incr / 8;
391 tpcb->tp_cong_win =
392 min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
393 }
394 tpcb->tp_snduna = seq;
395 if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
396 tpcb->tp_sndnxt = seq;
397 tpcb->tp_sndnxt_m = 0;
398 }
399 bang++;
400 }
401 if (cdt != 0 && old_fcredit == 0) {
402 tpcb->tp_sendfcc = 1;
403 }
404 if (cdt == 0) {
405 if (old_fcredit != 0)
406 IncStat(ts_zfcdt);
407 /* The following might mean that the window shrunk */
408 if (tpcb->tp_timer[TM_data_retrans]) {
409 tpcb->tp_timer[TM_data_retrans] = 0;
410 tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
411 if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
412 tpcb->tp_sndnxt = tpcb->tp_snduna;
413 tpcb->tp_sndnxt_m = 0;
414 }
415 }
416 }
417 tpcb->tp_fcredit = cdt;
418 bang |= (old_fcredit < cdt);
419
420 done:
421 #ifdef ARGO_DEBUG
422 if (argo_debug[D_ACKRECV]) {
423 printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%lx\n",
424 bang, cdt, old_fcredit, tpcb->tp_cong_win);
425 }
426 #endif
427 /*
428 * if (bang) XXXXX Very bad to remove this test, but somethings
429 * broken
430 */
431 tp_send(tpcb);
432 return (bang);
433 }
434
435 /*
436 * CALLED FROM:
437 * tp_goodack()
438 * FUNCTION and ARGUMENTS:
439 * drops everything up TO but not INCLUDING seq # (seq)
440 * from the retransmission queue.
441 */
442 int
443 tp_sbdrop(tpcb, seq)
444 struct tp_pcb *tpcb;
445 SeqNum seq;
446 {
447 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
448 int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
449 int oldcc = sb->sb_cc, oldi = i;
450
451 if (i >= tpcb->tp_seqhalf)
452 printf("tp_spdropping too much -- should panic");
453 while (i-- > 0)
454 sbdroprecord(sb);
455 #ifdef ARGO_DEBUG
456 if (argo_debug[D_ACKRECV]) {
457 printf("tp_sbdroping %d pkts %ld bytes on %p at 0x%x\n",
458 oldi, oldcc - sb->sb_cc, tpcb, seq);
459 }
460 #endif
461 if (sb_notify(sb))
462 sowwakeup(tpcb->tp_sock);
463 return (oldcc - sb->sb_cc);
464 }
465
466 /*
467 * CALLED FROM:
468 * tp.trans on user send request, arrival of AK and arrival of XAK
469 * FUNCTION and ARGUMENTS:
470 * Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
471 * Emits until a) runs out of data, or b) runs into an XPD mark, or
472 * c) it hits seq number (highseq) limited by cong or credit.
473 *
474 * If you want XPD to buffer > 1 du per socket buffer, you can
475 * modifiy this to issue XPD tpdus also, but then it'll have
476 * to take some argument(s) to distinguish between the type of DU to
477 * hand tp_emit.
478 *
479 * When something is sent for the first time, its time-of-send
480 * is stashed (in system clock ticks rather than pf_slowtimo ticks).
481 * When the ack arrives, the smoothed round-trip time is figured
482 * using this value.
483 */
484 void
485 tp_send(tpcb)
486 struct tp_pcb *tpcb;
487 {
488 int len;
489 struct mbuf *m;
490 struct mbuf *mb = 0;
491 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
492 unsigned int eotsdu = 0;
493 SeqNum highseq, checkseq;
494 int idle, idleticks, off, cong_win;
495 #ifdef TP_PERF_MEAS
496 int send_start_time = hardclock_ticks;
497 SeqNum oldnxt = tpcb->tp_sndnxt;
498 #endif /* TP_PERF_MEAS */
499
500 idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
501 if (idle) {
502 idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
503 if (idleticks > tpcb->tp_dt_ticks)
504 /*
505 * We have been idle for "a while" and no acks are
506 * expected to clock out any data we send --
507 * slow start to get ack "clock" running again.
508 */
509 tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
510 }
511 cong_win = tpcb->tp_cong_win;
512 highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
513 if (tpcb->tp_Xsnd.sb_mb)
514 highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
515
516 #ifdef ARGO_DEBUG
517 if (argo_debug[D_DATA]) {
518 printf("tp_send enter tpcb %p nxt 0x%x win %d high 0x%x\n",
519 tpcb, tpcb->tp_sndnxt, cong_win, highseq);
520 }
521 #endif
522 #ifdef TPPT
523 if (tp_traceflags[D_DATA]) {
524 tptraceTPCB(TPPTmisc, "tp_send sndnew snduna",
525 tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0);
526 tptraceTPCB(TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
527 tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
528 }
529 #endif
530 #ifdef TPPT
531 if (tp_traceflags[D_DATA]) {
532 tptraceTPCB(TPPTmisc, "tp_send 2 nxt high fcredit congwin",
533 tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
534 }
535 #endif
536
537 if (tpcb->tp_sndnxt_m)
538 m = tpcb->tp_sndnxt_m;
539 else {
540 off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
541 for (m = sb->sb_mb; m && off > 0; m = m->m_next)
542 off--;
543 }
544 /*
545 * Avoid silly window syndrome here . . . figure out how!
546 */
547 checkseq = tpcb->tp_sndnum;
548 if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
549 checkseq = highseq; /* i.e. DON'T retain highest assigned
550 * packet */
551
552 while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
553
554 eotsdu = (m->m_flags & M_EOR) != 0;
555 len = m->m_pkthdr.len;
556 if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
557 len < (tpcb->tp_l_tpdusize / 2))
558 break; /* Nagle . . . . . */
559 cong_win -= len;
560 /*
561 * make a copy - mb goes into the retransmission list while m
562 * gets emitted. m_copy won't copy a zero-length mbuf.
563 */
564 mb = m;
565 m = m_copy(mb, 0, M_COPYALL);
566 if (m == NULL)
567 break;
568 #ifdef TPPT
569 if (tp_traceflags[D_STASH]) {
570 tptraceTPCB(TPPTmisc,
571 "tp_send mcopy nxt high eotsdu len",
572 tpcb->tp_sndnxt, highseq, eotsdu, len);
573 }
574 #endif
575
576 #ifdef ARGO_DEBUG
577 if (argo_debug[D_DATA]) {
578 printf("tp_sending tpcb %p nxt 0x%x\n",
579 tpcb, tpcb->tp_sndnxt);
580 }
581 #endif
582 /*
583 * when headers are precomputed, may need to fill in checksum
584 * here
585 */
586 tpcb->tp_sock->so_error =
587 tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m);
588 if (tpcb->tp_sock->so_error != 0)
589 /* error */
590 break;
591 m = mb->m_nextpkt;
592 tpcb->tp_sndnxt_m = m;
593 if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
594 SEQ_INC(tpcb, tpcb->tp_sndnew);
595 /*
596 * Time this transmission if not a retransmission and
597 * not currently timing anything.
598 */
599 if (tpcb->tp_rttemit == 0) {
600 tpcb->tp_rttemit = hardclock_ticks;
601 tpcb->tp_rttseq = tpcb->tp_sndnxt;
602 }
603 tpcb->tp_sndnxt = tpcb->tp_sndnew;
604 } else
605 SEQ_INC(tpcb, tpcb->tp_sndnxt);
606 /*
607 * Set retransmit timer if not currently set.
608 * Initial value for retransmit timer is smoothed
609 * round-trip time + 2 * round-trip time variance.
610 * Initialize shift counter which is used for backoff
611 * of retransmit time.
612 */
613 if (tpcb->tp_timer[TM_data_retrans] == 0 &&
614 tpcb->tp_class != TP_CLASS_0) {
615 tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
616 tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
617 tpcb->tp_rxtshift = 0;
618 }
619 }
620 if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
621 tpcb->tp_oktonagle = 0;
622 #ifdef TP_PERF_MEAS
623 if (DOPERF(tpcb)) {
624 int npkts;
625 int s, elapsed, *t;
626 struct timeval now;
627
628 elapsed = hardclock_ticks - send_start_time;
629
630 npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
631
632 if (npkts > 0)
633 tpcb->tp_Nwindow++;
634
635 if (npkts > TP_PM_MAX)
636 npkts = TP_PM_MAX;
637
638 t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
639 *t += (t - elapsed) >> TP_RTT_ALPHA;
640
641 if (mb == 0) {
642 IncPStat(tpcb, tps_win_lim_by_data[npkts]);
643 } else {
644 IncPStat(tpcb, tps_win_lim_by_cdt[npkts]);
645 /* not true with congestion-window being used */
646 }
647 now.tv_sec = elapsed / hz;
648 now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
649 tpmeas(tpcb->tp_lref,
650 TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
651 }
652 #endif /* TP_PERF_MEAS */
653
654
655 #ifdef TPPT
656 if (tp_traceflags[D_DATA]) {
657 tptraceTPCB(TPPTmisc,
658 "tp_send at end: new nxt eotsdu error",
659 tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu,
660 tpcb->tp_sock->so_error);
661
662 }
663 #endif
664 }
665
666 int TPNagleok;
667 int TPNagled;
668
669 int
670 tp_packetize(tpcb, m, eotsdu)
671 struct tp_pcb *tpcb;
672 struct mbuf *m;
673 int eotsdu;
674 {
675 struct mbuf *n = NULL;
676 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
677 int maxsize = tpcb->tp_l_tpdusize
678 - tp_headersize(DT_TPDU_type, tpcb)
679 - (tpcb->tp_use_checksum ? 4 : 0);
680 int totlen = m->m_pkthdr.len;
681
682 /*
683 * Pre-packetize the data in the sockbuf
684 * according to negotiated mtu. Do it here
685 * where we can safely wait for mbufs.
686 *
687 * This presumes knowledge of sockbuf conventions.
688 * TODO: allocate space for header and fill it in (once!).
689 */
690 #ifdef ARGO_DEBUG
691 if (argo_debug[D_DATA]) {
692 printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
693 maxsize, totlen, eotsdu, tpcb->tp_sndnum);
694 }
695 #endif
696 if (tpcb->tp_oktonagle) {
697 if ((n = sb->sb_mb) == 0)
698 panic("tp_packetize");
699 while (n->m_nextpkt)
700 n = n->m_nextpkt;
701 if (n->m_flags & M_EOR)
702 panic("tp_packetize 2");
703 SEQ_INC(tpcb, tpcb->tp_sndnum);
704 if (totlen + n->m_pkthdr.len < maxsize) {
705 /*
706 * There is an unsent packet with space,
707 * combine data
708 */
709 struct mbuf *old_n = n;
710 tpsbcheck(tpcb, 3);
711 n->m_pkthdr.len += totlen;
712 while (n->m_next)
713 n = n->m_next;
714 sbcompress(sb, m, n);
715 tpsbcheck(tpcb, 4);
716 n = old_n;
717 TPNagled++;
718 goto out;
719 }
720 }
721
722 while (m) {
723 n = m;
724 if (totlen > maxsize) {
725 if ((m = m_split(n, maxsize, M_WAIT)) == 0)
726 panic("tp_packetize");
727 } else
728 m = 0;
729 totlen -= maxsize;
730 tpsbcheck(tpcb, 5);
731 sbappendrecord(sb, n);
732 tpsbcheck(tpcb, 6);
733 SEQ_INC(tpcb, tpcb->tp_sndnum);
734 }
735 out:
736 if (eotsdu) {
737 n->m_flags |= M_EOR; /* XXX belongs at end */
738 tpcb->tp_oktonagle = 0;
739 } else {
740 SEQ_DEC(tpcb, tpcb->tp_sndnum);
741 tpcb->tp_oktonagle = 1;
742 TPNagleok++;
743 }
744
745 #ifdef ARGO_DEBUG
746 if (argo_debug[D_DATA]) {
747 printf("SEND out: oktonagle %d sndnum 0x%x\n",
748 tpcb->tp_oktonagle, tpcb->tp_sndnum);
749 }
750 #endif
751 return 0;
752 }
753
754
755 /*
756 * NAME: tp_stash()
757 * CALLED FROM:
758 * tp.trans on arrival of a DT tpdu
759 * FUNCTION, ARGUMENTS, and RETURN VALUE:
760 * Returns 1 if
761 * a) something new arrived and it's got eotsdu_reached bit on,
762 * b) this arrival was caused other out-of-sequence things to be
763 * accepted, or
764 * c) this arrival is the highest seq # for which we last gave credit
765 * (sender just sent a whole window)
766 * In other words, returns 1 if tp should send an ack immediately, 0 if
767 * the ack can wait a while.
768 *
769 * Note: this implementation no longer renegs on credit, (except
770 * when debugging option D_RENEG is on, for the purpose of testing
771 * ack subsequencing), so we don't need to check for incoming tpdus
772 * being in a reneged portion of the window.
773 */
774
775 int
776 tp_stash(tpcb, e)
777 struct tp_pcb *tpcb;
778 struct tp_event *e;
779 {
780 int ack_reason = tpcb->tp_ack_strat & ACK_STRAT_EACH;
781 /* 0--> delay acks until full window */
782 /* 1--> ack each tpdu */
783 #define E e->TPDU_ATTR(DT)
784
785 if (E.e_eot) {
786 struct mbuf *n = E.e_data;
787 n->m_flags |= M_EOR;
788 n->m_nextpkt = 0;
789 }
790 #ifdef ARGO_DEBUG
791 if (argo_debug[D_STASH]) {
792 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
793 "stash: so_rcv before appending");
794 dump_mbuf(E.e_data,
795 "stash: e_data before appending");
796 }
797 #endif
798
799 #ifdef TP_PERF_MEAS
800 if (DOPERF(tpcb)) {
801 PStat(tpcb, Nb_from_ll) += E.e_datalen;
802 tpmeas(tpcb->tp_lref, TPtime_from_ll,
803 &e->e_time, E.e_seq,
804 (u_int) PStat(tpcb, Nb_from_ll),
805 (u_int) E.e_datalen);
806 }
807 #endif
808
809 if (E.e_seq == tpcb->tp_rcvnxt) {
810
811 #ifdef ARGO_DEBUG
812 if (argo_debug[D_STASH]) {
813 printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
814 E.e_seq, E.e_datalen, E.e_eot);
815 }
816 #endif
817
818 #ifdef TPPT
819 if (tp_traceflags[D_STASH]) {
820 tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
821 E.e_seq, E.e_datalen, E.e_eot, 0);
822 }
823 #endif
824
825 SET_DELACK(tpcb);
826
827 sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
828
829 SEQ_INC(tpcb, tpcb->tp_rcvnxt);
830 /*
831 * move chains from the reassembly queue to the socket buffer
832 */
833 if (tpcb->tp_rsycnt) {
834 struct mbuf **mp;
835 struct mbuf **mplim;
836
837 mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt %
838 tpcb->tp_maxlcredit);
839 mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
840
841 while (tpcb->tp_rsycnt && *mp) {
842 sbappend(&tpcb->tp_sock->so_rcv, *mp);
843 tpcb->tp_rsycnt--;
844 *mp = 0;
845 SEQ_INC(tpcb, tpcb->tp_rcvnxt);
846 ack_reason |= ACK_REORDER;
847 if (++mp == mplim)
848 mp = tpcb->tp_rsyq;
849 }
850 }
851 #ifdef ARGO_DEBUG
852 if (argo_debug[D_STASH]) {
853 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
854 "stash: so_rcv after appending");
855 }
856 #endif
857
858 } else {
859 struct mbuf **mp;
860 SeqNum uwe;
861
862 #ifdef TPPT
863 if (tp_traceflags[D_STASH]) {
864 tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
865 E.e_seq, tpcb->tp_rcvnxt,
866 tpcb->tp_lcredit, 0);
867 }
868 #endif
869
870 if (tpcb->tp_rsyq == 0)
871 tp_rsyset(tpcb);
872 uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
873 if (tpcb->tp_rsyq == 0 ||
874 !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
875 ack_reason = ACK_DONT;
876 m_freem(E.e_data);
877 } else if (*(mp = tpcb->tp_rsyq +
878 (E.e_seq % tpcb->tp_maxlcredit)) != NULL ) {
879 #ifdef ARGO_DEBUG
880 if (argo_debug[D_STASH]) {
881 printf("tp_stash - drop & ack\n");
882 }
883 #endif
884
885 /*
886 * retransmission - drop it and force
887 * an ack
888 */
889 IncStat(ts_dt_dup);
890 #ifdef TP_PERF_MEAS
891 if (DOPERF(tpcb)) {
892 IncPStat(tpcb, tps_n_ack_cuz_dup);
893 }
894 #endif
895
896 m_freem(E.e_data);
897 ack_reason |= ACK_DUP;
898 } else {
899 *mp = E.e_data;
900 tpcb->tp_rsycnt++;
901 ack_reason = ACK_DONT;
902 }
903 }
904 /*
905 * there were some comments of historical interest
906 * here.
907 */
908 {
909 LOCAL_CREDIT(tpcb);
910
911 if (E.e_seq == tpcb->tp_sent_uwe)
912 ack_reason |= ACK_STRAT_FULLWIN;
913
914 #ifdef TPPT
915 if (tp_traceflags[D_STASH]) {
916 tptraceTPCB(TPPTmisc,
917 "end of stash, eot, ack_reason, sent_uwe ",
918 E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
919 }
920 #endif
921
922 if (ack_reason == ACK_DONT) {
923 IncStat(ts_ackreason[ACK_DONT]);
924 return 0;
925 } else {
926 #ifdef TP_PERF_MEAS
927 if (DOPERF(tpcb)) {
928 if (ack_reason & ACK_STRAT_EACH) {
929 IncPStat(tpcb, tps_n_ack_cuz_strat);
930 } else if (ack_reason & ACK_STRAT_FULLWIN) {
931 IncPStat(tpcb, tps_n_ack_cuz_fullwin);
932 } else if (ack_reason & ACK_REORDER) {
933 IncPStat(tpcb, tps_n_ack_cuz_reorder);
934 }
935 tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
936 SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
937 }
938 #endif
939 {
940 int i;
941
942 /*
943 * keep track of all reasons
944 * that apply
945 */
946 for (i = 1; i < _ACK_NUM_REASONS_; i++) {
947 if (ack_reason & (1 << i))
948 IncStat(ts_ackreason[i]);
949 }
950 }
951 return 1;
952 }
953 }
954 }
955
956 /*
957 * tp_rsyflush - drop all the packets on the reassembly queue.
958 * Do this when closing the socket, or when somebody has changed
959 * the space avaible in the receive socket (XXX).
960 */
961 void
962 tp_rsyflush(tpcb)
963 struct tp_pcb *tpcb;
964 {
965 struct mbuf **mp;
966 if (tpcb->tp_rsycnt) {
967 for (mp = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
968 --mp >= tpcb->tp_rsyq;)
969 if (*mp) {
970 tpcb->tp_rsycnt--;
971 m_freem(*mp);
972 }
973 if (tpcb->tp_rsycnt) {
974 printf("tp_rsyflush %p\n", tpcb);
975 tpcb->tp_rsycnt = 0;
976 }
977 }
978 free((caddr_t) tpcb->tp_rsyq, M_PCB);
979 tpcb->tp_rsyq = 0;
980 }
981
982 void
983 tp_rsyset(tpcb)
984 struct tp_pcb *tpcb;
985 {
986 struct socket *so = tpcb->tp_sock;
987 int maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf;
988 int old_credit = tpcb->tp_maxlcredit;
989 caddr_t rsyq;
990
991 tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
992 (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize) / tpcb->tp_l_tpdusize);
993
994 if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
995 return;
996 maxcredit *= sizeof(struct mbuf *);
997 if (tpcb->tp_rsyq)
998 tp_rsyflush(tpcb);
999 if ((rsyq = (caddr_t) malloc(maxcredit, M_PCB, M_NOWAIT)) != NULL)
1000 bzero(rsyq, maxcredit);
1001 tpcb->tp_rsyq = (struct mbuf **) rsyq;
1002 }
1003
1004
1005 void
1006 tpsbcheck(tpcb, i)
1007 struct tp_pcb *tpcb;
1008 int i;
1009 {
1010 struct mbuf *n, *m;
1011 int len = 0, mbcnt = 0, pktlen;
1012 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
1013
1014 for (n = sb->sb_mb; n; n = n->m_nextpkt) {
1015 if ((n->m_flags & M_PKTHDR) == 0)
1016 panic("tpsbcheck nohdr");
1017 pktlen = len + n->m_pkthdr.len;
1018 for (m = n; m; m = m->m_next) {
1019 len += m->m_len;
1020 mbcnt += MSIZE;
1021 if (m->m_flags & M_EXT)
1022 mbcnt += m->m_ext.ext_size;
1023 }
1024 if (len != pktlen) {
1025 printf("test %d; len %d != pktlen %d on mbuf %p\n",
1026 i, len, pktlen, n);
1027 panic("tpsbcheck short");
1028 }
1029 }
1030 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
1031 printf("test %d: cc %d != %ld || mbcnt %d != %ld\n", i, len, sb->sb_cc,
1032 mbcnt, sb->sb_mbcnt);
1033 panic("tpsbcheck");
1034 }
1035 }
Cache object: e9dd234a2ea7b679988a22caf4c12c9a
|