The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netiso/tp_subr.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: tp_subr.c,v 1.19 2004/04/19 05:16:46 matt Exp $        */
    2 
    3 /*-
    4  * Copyright (c) 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)tp_subr.c   8.1 (Berkeley) 6/10/93
   32  */
   33 
   34 /***********************************************************
   35                 Copyright IBM Corporation 1987
   36 
   37                       All Rights Reserved
   38 
   39 Permission to use, copy, modify, and distribute this software and its
   40 documentation for any purpose and without fee is hereby granted,
   41 provided that the above copyright notice appear in all copies and that
   42 both that copyright notice and this permission notice appear in
   43 supporting documentation, and that the name of IBM not be
   44 used in advertising or publicity pertaining to distribution of the
   45 software without specific, written prior permission.
   46 
   47 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
   48 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
   49 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
   50 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
   51 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
   52 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
   53 SOFTWARE.
   54 
   55 ******************************************************************/
   56 
   57 /*
   58  * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
   59  */
   60 /*
   61  * The main work of data transfer is done here. These routines are called
   62  * from tp.trans. They include the routines that check the validity of acks
   63  * and Xacks, (tp_goodack() and tp_goodXack() ) take packets from socket
   64  * buffers and send them (tp_send()), drop the data from the socket buffers
   65  * (tp_sbdrop()),  and put incoming packet data into socket buffers
   66  * (tp_stash()).
   67  */
   68 
   69 #include <sys/cdefs.h>
   70 __KERNEL_RCSID(0, "$NetBSD: tp_subr.c,v 1.19 2004/04/19 05:16:46 matt Exp $");
   71 
   72 #include <sys/param.h>
   73 #include <sys/systm.h>
   74 #include <sys/mbuf.h>
   75 #include <sys/socket.h>
   76 #include <sys/socketvar.h>
   77 #include <sys/protosw.h>
   78 #include <sys/errno.h>
   79 #include <sys/time.h>
   80 #include <sys/kernel.h>
   81 
   82 #include <netiso/tp_ip.h>
   83 #include <netiso/iso.h>
   84 #include <netiso/argo_debug.h>
   85 #include <netiso/tp_timer.h>
   86 #include <netiso/tp_param.h>
   87 #include <netiso/tp_stat.h>
   88 #include <netiso/tp_pcb.h>
   89 #include <netiso/tp_tpdu.h>
   90 #include <netiso/tp_trace.h>
   91 #include <netiso/tp_meas.h>
   92 #include <netiso/tp_seq.h>
   93 #include <netiso/tp_var.h>
   94 
   95 int             tprexmtthresh = 3;
   96 
   97 /*
   98  * CALLED FROM:
   99  *      tp.trans, when an XAK arrives
  100  * FUNCTION and ARGUMENTS:
  101  *      Determines if the sequence number (seq) from the XAK
  102  *      acks anything new.  If so, drop the appropriate tpdu
  103  *      from the XPD send queue.
  104  * RETURN VALUE:
  105  *      Returns 1 if it did this, 0 if the ack caused no action.
  106  */
  107 int
  108 tp_goodXack(struct tp_pcb  *tpcb, SeqNum seq)
  109 {
  110 
  111 #ifdef TPPT
  112         if (tp_traceflags[D_XPD]) {
  113                 tptraceTPCB(TPPTgotXack,
  114                       seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
  115                             tpcb->tp_snduna);
  116         }
  117 #endif
  118 
  119         if (seq == tpcb->tp_Xuna) {
  120                 tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
  121 
  122                 /*
  123                  * DROP 1 packet from the Xsnd socket buf - just so happens
  124                  * that only one packet can be there at any time so drop the
  125                  * whole thing.  If you allow > 1 packet the socket buffer,
  126                  * then you'll have to keep track of how many characters went
  127                  * w/ each XPD tpdu, so this will get messier
  128                  */
  129 #ifdef ARGO_DEBUG
  130                 if (argo_debug[D_XPD]) {
  131                         dump_mbuf(tpcb->tp_Xsnd.sb_mb,
  132                                   "tp_goodXack Xsnd before sbdrop");
  133                 }
  134 #endif
  135 
  136 #ifdef TPPT
  137                 if (tp_traceflags[D_XPD]) {
  138                         tptraceTPCB(TPPTmisc,
  139                                     "goodXack: dropping cc ",
  140                                     (int) (tpcb->tp_Xsnd.sb_cc),
  141                                     0, 0, 0);
  142                 }
  143 #endif
  144                 sbdroprecord(&tpcb->tp_Xsnd);
  145                 return 1;
  146         }
  147         return 0;
  148 }
  149 
  150 /*
  151  * CALLED FROM:
  152  *  tp_good_ack()
  153  * FUNCTION and ARGUMENTS:
  154  *  updates
  155  *  smoothed average round trip time (*rtt)
  156  *  roundtrip time variance (*rtv) - actually deviation, not variance
  157  *  given the new value (diff)
  158  * RETURN VALUE:
  159  * void
  160  */
  161 
  162 void
  163 tp_rtt_rtv(struct tp_pcb *tpcb)
  164 {
  165         int             old = tpcb->tp_rtt;
  166         int             elapsed, delta = 0;
  167 
  168         elapsed = hardclock_ticks - tpcb->tp_rttemit;
  169 
  170         if (tpcb->tp_rtt != 0) {
  171                 /*
  172                  * rtt is the smoothed round trip time in machine clock
  173                  * ticks (hz). It is stored as a fixed point number,
  174                  * unscaled (unlike the tcp srtt).  The rationale here
  175                  * is that it is only significant to the nearest unit of
  176                  * slowtimo, which is at least 8 machine clock ticks
  177                  * so there is no need to scale.  The smoothing is done
  178                  * according to the same formula as TCP (rtt = rtt*7/8
  179                  * + measured_rtt/8).
  180                  */
  181                 delta = elapsed - tpcb->tp_rtt;
  182                 if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
  183                         tpcb->tp_rtt = 1;
  184                 /*
  185                  * rtv is a smoothed accumulated mean difference, unscaled
  186                  * for reasons expressed above.
  187                  * It is smoothed with an alpha of .75, and the round trip timer
  188                  * will be set to rtt + 4*rtv, also as TCP does.
  189                  */
  190                 if (delta < 0)
  191                         delta = -delta;
  192                 if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
  193                         tpcb->tp_rtv = 1;
  194         } else {
  195                 /*
  196                  * No rtt measurement yet - use the unsmoothed rtt. Set the
  197                  * variance to half the rtt (so our first retransmit happens
  198                  * at 3*rtt)
  199                  */
  200                 tpcb->tp_rtt = elapsed;
  201                 tpcb->tp_rtv = elapsed >> 1;
  202         }
  203         tpcb->tp_rttemit = 0;
  204         tpcb->tp_rxtshift = 0;
  205         /*
  206          * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
  207          * Because of the way we do the smoothing, srtt and rttvar
  208          * will each average +1/2 tick of bias.  When we compute
  209          * the retransmit timer, we want 1/2 tick of rounding and
  210          * 1 extra tick because of +-1/2 tick uncertainty in the
  211          * firing of the timer.  The bias will give us exactly the
  212          * 1.5 tick we need.  But, because the bias is
  213          * statistical, we have to test that we don't drop below
  214          * the minimum feasible timer (which is 2 ticks)."
  215          */
  216         TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
  217                     tpcb->tp_peer_acktime, 128 /* XXX */ );
  218 #ifdef ARGO_DEBUG
  219         if (argo_debug[D_RTT]) {
  220                 printf("%s tpcb %p, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
  221                        "tp_rtt_rtv:", tpcb, elapsed, delta, tpcb->tp_rtt, tpcb->tp_rtv, old);
  222         }
  223 #endif
  224         tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
  225 }
  226 
  227 /*
  228  * CALLED FROM:
  229  *  tp.trans when an AK arrives
  230  * FUNCTION and ARGUMENTS:
  231  *      Given (cdt), the credit from the AK tpdu, and
  232  *      (seq), the sequence number from the AK tpdu,
  233  *  tp_goodack() determines if the AK acknowledges something in the send
  234  *      window, and if so, drops the appropriate packets from the retransmission
  235  *  list, computes the round trip time, and updates the retransmission timer
  236  *  based on the new smoothed round trip time.
  237  * RETURN VALUE:
  238  *      Returns 1 if
  239  *      EITHER it actually acked something heretofore unacknowledged
  240  *      OR no news but the credit should be processed.
  241  *      If something heretofore unacked was acked with this sequence number,
  242  *      the appropriate tpdus are dropped from the retransmission control list,
  243  *      by calling tp_sbdrop().
  244  *      No need to see the tpdu itself.
  245  */
  246 int
  247 tp_goodack(struct tp_pcb *tpcb, u_int cdt, SeqNum seq, u_int subseq)
  248 {
  249         int             old_fcredit = 0;
  250         int             bang = 0;       /* bang --> ack for something
  251                                          * heretofore unacked */
  252         u_int           bytes_acked;
  253 
  254 #ifdef ARGO_DEBUG
  255         if (argo_debug[D_ACKRECV]) {
  256                 printf("goodack tpcb %p seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
  257                        tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
  258         }
  259 #endif
  260 
  261 #ifdef TPPT
  262         if (tp_traceflags[D_ACKRECV]) {
  263                 tptraceTPCB(TPPTgotack,
  264                         seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, subseq);
  265         }
  266 #endif
  267 
  268 #ifdef TP_PERF_MEAS
  269                 if (DOPERF(tpcb)) {
  270                 tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *) 0, seq, 0, 0);
  271         }
  272 #endif
  273 
  274         if (seq == tpcb->tp_snduna) {
  275                 if (subseq < tpcb->tp_r_subseq ||
  276                  (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
  277         discard_the_ack:
  278 #ifdef ARGO_DEBUG
  279                         if (argo_debug[D_ACKRECV]) {
  280                                 printf("goodack discard : tpcb %p subseq %d r_subseq %d\n",
  281                                        tpcb, subseq, tpcb->tp_r_subseq);
  282                         }
  283 #endif
  284                         goto done;
  285                 }
  286                 if (cdt == tpcb->tp_fcredit     /* && thus subseq >
  287                         tpcb->tp_r_subseq */ ) {
  288                         tpcb->tp_r_subseq = subseq;
  289                         if (tpcb->tp_timer[TM_data_retrans] == 0)
  290                                 tpcb->tp_dupacks = 0;
  291                         else if (++tpcb->tp_dupacks == tprexmtthresh) {
  292                                 /*
  293                                  * partner went out of his way to signal with
  294                                  * different subsequences that he has the
  295                                  * same lack of an expected packet.  This may
  296                                  * be an early indiciation of a loss
  297                                  */
  298 
  299                                 SeqNum          onxt = tpcb->tp_sndnxt;
  300                                 struct mbuf    *onxt_m = tpcb->tp_sndnxt_m;
  301                                 u_int           win = min(tpcb->tp_fcredit,
  302                                 tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
  303 #ifdef ARGO_DEBUG
  304                                 if (argo_debug[D_ACKRECV]) {
  305                                         printf("%s tpcb %p seq 0x%x rttseq 0x%x onxt 0x%x\n",
  306                                                "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
  307                                 }
  308 #endif
  309                                 if (win < 2)
  310                                         win = 2;
  311                                 tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
  312                                 tpcb->tp_timer[TM_data_retrans] = 0;
  313                                 tpcb->tp_rttemit = 0;
  314                                 tpcb->tp_sndnxt = tpcb->tp_snduna;
  315                                 tpcb->tp_sndnxt_m = 0;
  316                                 tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
  317                                 tp_send(tpcb);
  318                                 tpcb->tp_cong_win = tpcb->tp_ssthresh +
  319                                         tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
  320                                 if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
  321                                         tpcb->tp_sndnxt = onxt;
  322                                         tpcb->tp_sndnxt_m = onxt_m;
  323                                 }
  324                         } else if (tpcb->tp_dupacks > tprexmtthresh) {
  325                                 tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
  326                         }
  327                         goto done;
  328                 }
  329         } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
  330                 goto discard_the_ack;
  331         /*
  332          * If the congestion window was inflated to account
  333          * for the other side's cached packets, retract it.
  334          */
  335         if (tpcb->tp_dupacks > tprexmtthresh &&
  336             tpcb->tp_cong_win > tpcb->tp_ssthresh)
  337                 tpcb->tp_cong_win = tpcb->tp_ssthresh;
  338         tpcb->tp_r_subseq = subseq;
  339         old_fcredit = tpcb->tp_fcredit;
  340         tpcb->tp_fcredit = cdt;
  341         if (cdt > tpcb->tp_maxfcredit)
  342                 tpcb->tp_maxfcredit = cdt;
  343         tpcb->tp_dupacks = 0;
  344 
  345         if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
  346 
  347                 tpsbcheck(tpcb, 0);
  348                 bytes_acked = tp_sbdrop(tpcb, seq);
  349                 tpsbcheck(tpcb, 1);
  350                 /*
  351                  * If transmit timer is running and timed sequence
  352                  * number was acked, update smoothed round trip time.
  353                  * Since we now have an rtt measurement, cancel the
  354                  * timer backoff (cf., Phil Karn's retransmit alg.).
  355                  * Recompute the initial retransmit timer.
  356                  */
  357                 if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
  358                         tp_rtt_rtv(tpcb);
  359                 /*
  360                  * If all outstanding data is acked, stop retransmit timer.
  361                  * If there is more data to be acked, restart retransmit
  362                  * timer, using current (possibly backed-off) value.
  363                  * OSI combines the keepalive and persistance functions.
  364                  * So, there is no persistance timer per se, to restart.
  365                  */
  366                 if (tpcb->tp_class != TP_CLASS_0)
  367                         tpcb->tp_timer[TM_data_retrans] =
  368                                 (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
  369                 /*
  370                  * When new data is acked, open the congestion window.
  371                  * If the window gives us less than ssthresh packets
  372                  * in flight, open exponentially (maxseg per packet).
  373                  * Otherwise open linearly: maxseg per window
  374                  * (maxseg^2 / cwnd per packet), plus a constant
  375                  * fraction of a packet (maxseg/8) to help larger windows
  376                  * open quickly enough.
  377                  */
  378                 {
  379                         u_int           cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
  380 
  381                         incr = min(incr, bytes_acked);
  382                         if (cw > tpcb->tp_ssthresh)
  383                                 incr = incr * incr / cw + incr / 8;
  384                         tpcb->tp_cong_win =
  385                                 min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
  386                 }
  387                 tpcb->tp_snduna = seq;
  388                 if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
  389                         tpcb->tp_sndnxt = seq;
  390                         tpcb->tp_sndnxt_m = 0;
  391                 }
  392                 bang++;
  393         }
  394         if (cdt != 0 && old_fcredit == 0) {
  395                 tpcb->tp_sendfcc = 1;
  396         }
  397         if (cdt == 0) {
  398                 if (old_fcredit != 0)
  399                         IncStat(ts_zfcdt);
  400                 /* The following might mean that the window shrunk */
  401                 if (tpcb->tp_timer[TM_data_retrans]) {
  402                         tpcb->tp_timer[TM_data_retrans] = 0;
  403                         tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
  404                         if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
  405                                 tpcb->tp_sndnxt = tpcb->tp_snduna;
  406                                 tpcb->tp_sndnxt_m = 0;
  407                         }
  408                 }
  409         }
  410         tpcb->tp_fcredit = cdt;
  411         bang |= (old_fcredit < cdt);
  412 
  413 done:
  414 #ifdef ARGO_DEBUG
  415         if (argo_debug[D_ACKRECV]) {
  416                 printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%lx\n",
  417                        bang, cdt, old_fcredit, tpcb->tp_cong_win);
  418         }
  419 #endif
  420         /*
  421          * if (bang) XXXXX Very bad to remove this test, but somethings
  422          * broken
  423          */
  424         tp_send(tpcb);
  425         return (bang);
  426 }
  427 
  428 /*
  429  * CALLED FROM:
  430  *  tp_goodack()
  431  * FUNCTION and ARGUMENTS:
  432  *  drops everything up TO but not INCLUDING seq # (seq)
  433  *  from the retransmission queue.
  434  */
  435 int
  436 tp_sbdrop(struct tp_pcb *tpcb, SeqNum seq)
  437 {
  438         struct sockbuf *sb = &tpcb->tp_sock->so_snd;
  439         int    i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
  440         int             oldcc = sb->sb_cc, oldi = i;
  441 
  442         if (i >= tpcb->tp_seqhalf)
  443                 printf("tp_spdropping too much -- should panic");
  444         while (i-- > 0)
  445                 sbdroprecord(sb);
  446 #ifdef ARGO_DEBUG
  447         if (argo_debug[D_ACKRECV]) {
  448                 printf("tp_sbdroping %d pkts %ld bytes on %p at 0x%x\n",
  449                        oldi, oldcc - sb->sb_cc, tpcb, seq);
  450         }
  451 #endif
  452         if (sb_notify(sb))
  453                 sowwakeup(tpcb->tp_sock);
  454         return (oldcc - sb->sb_cc);
  455 }
  456 
  457 /*
  458  * CALLED FROM:
  459  *      tp.trans on user send request, arrival of AK and arrival of XAK
  460  * FUNCTION and ARGUMENTS:
  461  *      Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
  462  *      Emits until a) runs out of data, or  b) runs into an XPD mark, or
  463  *                      c) it hits seq number (highseq) limited by cong or credit.
  464  *
  465  *      If you want XPD to buffer > 1 du per socket buffer, you can
  466  *      modifiy this to issue XPD tpdus also, but then it'll have
  467  *      to take some argument(s) to distinguish between the type of DU to
  468  *      hand tp_emit.
  469  *
  470  *      When something is sent for the first time, its time-of-send
  471  *      is stashed (in system clock ticks rather than pf_slowtimo ticks).
  472  *  When the ack arrives, the smoothed round-trip time is figured
  473  *  using this value.
  474  */
  475 void
  476 tp_send(struct tp_pcb *tpcb)
  477 {
  478         int    len;
  479         struct mbuf *m;
  480         struct mbuf    *mb = 0;
  481         struct sockbuf *sb = &tpcb->tp_sock->so_snd;
  482         unsigned int    eotsdu = 0;
  483         SeqNum          highseq, checkseq;
  484         int             idle, idleticks, off, cong_win;
  485 #ifdef TP_PERF_MEAS
  486         int             send_start_time = hardclock_ticks;
  487         SeqNum          oldnxt = tpcb->tp_sndnxt;
  488 #endif /* TP_PERF_MEAS */
  489 
  490         idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
  491         if (idle) {
  492                 idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
  493                 if (idleticks > tpcb->tp_dt_ticks)
  494                         /*
  495                          * We have been idle for "a while" and no acks are
  496                          * expected to clock out any data we send --
  497                          * slow start to get ack "clock" running again.
  498                          */
  499                         tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
  500         }
  501         cong_win = tpcb->tp_cong_win;
  502         highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
  503         if (tpcb->tp_Xsnd.sb_mb)
  504                 highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
  505 
  506 #ifdef ARGO_DEBUG
  507         if (argo_debug[D_DATA]) {
  508                 printf("tp_send enter tpcb %p nxt 0x%x win %d high 0x%x\n",
  509                        tpcb, tpcb->tp_sndnxt, cong_win, highseq);
  510         }
  511 #endif
  512 #ifdef TPPT
  513         if (tp_traceflags[D_DATA]) {
  514                 tptraceTPCB(TPPTmisc, "tp_send sndnew snduna",
  515                             tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0);
  516         tptraceTPCB(TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
  517             tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
  518         }
  519 #endif
  520 #ifdef TPPT
  521                 if (tp_traceflags[D_DATA]) {
  522                 tptraceTPCB(TPPTmisc, "tp_send 2 nxt high fcredit congwin",
  523                       tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
  524         }
  525 #endif
  526 
  527                 if (tpcb->tp_sndnxt_m)
  528                 m = tpcb->tp_sndnxt_m;
  529         else {
  530                 off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
  531                 for (m = sb->sb_mb; m && off > 0; m = m->m_next)
  532                         off--;
  533         }
  534         /*
  535          * Avoid silly window syndrome here . . . figure out how!
  536          */
  537         checkseq = tpcb->tp_sndnum;
  538         if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
  539                 checkseq = highseq;     /* i.e. DON'T retain highest assigned
  540                                          * packet */
  541 
  542         while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
  543 
  544                 eotsdu = (m->m_flags & M_EOR) != 0;
  545                 len = m->m_pkthdr.len;
  546                 if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
  547                     len < (tpcb->tp_l_tpdusize / 2))
  548                         break;  /* Nagle . . . . . */
  549                 cong_win -= len;
  550                 /*
  551                  * make a copy - mb goes into the retransmission list while m
  552                  * gets emitted.  m_copy won't copy a zero-length mbuf.
  553                  */
  554                 mb = m;
  555                 m = m_copy(mb, 0, M_COPYALL);
  556                 if (m == NULL)
  557                         break;
  558 #ifdef TPPT
  559                 if (tp_traceflags[D_STASH]) {
  560                         tptraceTPCB(TPPTmisc,
  561                                     "tp_send mcopy nxt high eotsdu len",
  562                                     tpcb->tp_sndnxt, highseq, eotsdu, len);
  563                 }
  564 #endif
  565 
  566 #ifdef ARGO_DEBUG
  567                         if (argo_debug[D_DATA]) {
  568                         printf("tp_sending tpcb %p nxt 0x%x\n",
  569                                tpcb, tpcb->tp_sndnxt);
  570                 }
  571 #endif
  572                 /*
  573                  * when headers are precomputed, may need to fill in checksum
  574                  * here
  575                  */
  576                 tpcb->tp_sock->so_error =
  577                       tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m);
  578                 if (tpcb->tp_sock->so_error != 0)
  579                         /* error */
  580                         break;
  581                 m = mb->m_nextpkt;
  582                 tpcb->tp_sndnxt_m = m;
  583                 if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
  584                         SEQ_INC(tpcb, tpcb->tp_sndnew);
  585                         /*
  586                          * Time this transmission if not a retransmission and
  587                          * not currently timing anything.
  588                          */
  589                         if (tpcb->tp_rttemit == 0) {
  590                                 tpcb->tp_rttemit = hardclock_ticks;
  591                                 tpcb->tp_rttseq = tpcb->tp_sndnxt;
  592                         }
  593                         tpcb->tp_sndnxt = tpcb->tp_sndnew;
  594                 } else
  595                         SEQ_INC(tpcb, tpcb->tp_sndnxt);
  596                 /*
  597                  * Set retransmit timer if not currently set.
  598                  * Initial value for retransmit timer is smoothed
  599                  * round-trip time + 2 * round-trip time variance.
  600                  * Initialize shift counter which is used for backoff
  601                  * of retransmit time.
  602                  */
  603                 if (tpcb->tp_timer[TM_data_retrans] == 0 &&
  604                     tpcb->tp_class != TP_CLASS_0) {
  605                         tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
  606                         tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
  607                         tpcb->tp_rxtshift = 0;
  608                 }
  609         }
  610         if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
  611                 tpcb->tp_oktonagle = 0;
  612 #ifdef TP_PERF_MEAS
  613         if (DOPERF(tpcb)) {
  614                 int    npkts;
  615                 int             s, elapsed, *t;
  616                 struct timeval  now;
  617 
  618                 elapsed = hardclock_ticks - send_start_time;
  619 
  620                 npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
  621 
  622                 if (npkts > 0)
  623                         tpcb->tp_Nwindow++;
  624 
  625                 if (npkts > TP_PM_MAX)
  626                         npkts = TP_PM_MAX;
  627 
  628                 t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
  629                 *t += (t - elapsed) >> TP_RTT_ALPHA;
  630 
  631                 if (mb == 0) {
  632                         IncPStat(tpcb, tps_win_lim_by_data[npkts]);
  633                 } else {
  634                         IncPStat(tpcb, tps_win_lim_by_cdt[npkts]);
  635                         /* not true with congestion-window being used */
  636                 }
  637                 now.tv_sec = elapsed / hz;
  638                 now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
  639                 tpmeas(tpcb->tp_lref,
  640                        TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
  641         }
  642 #endif                          /* TP_PERF_MEAS */
  643 
  644 
  645 #ifdef TPPT
  646         if (tp_traceflags[D_DATA]) {
  647                 tptraceTPCB(TPPTmisc,
  648                             "tp_send at end: new nxt eotsdu error",
  649                             tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu,
  650                             tpcb->tp_sock->so_error);
  651 
  652         }
  653 #endif
  654 }
  655 
  656 int             TPNagleok;
  657 int             TPNagled;
  658 
  659 int
  660 tp_packetize(struct tp_pcb *tpcb, struct mbuf *m, int eotsdu)
  661 {
  662         struct mbuf *n = NULL;
  663         struct sockbuf *sb = &tpcb->tp_sock->so_snd;
  664         int             maxsize = tpcb->tp_l_tpdusize
  665                             - tp_headersize(DT_TPDU_type, tpcb)
  666                             - (tpcb->tp_use_checksum ? 4 : 0);
  667         int             totlen = m->m_pkthdr.len;
  668 
  669         /*
  670          * Pre-packetize the data in the sockbuf
  671          * according to negotiated mtu.  Do it here
  672          * where we can safely wait for mbufs.
  673          *
  674          * This presumes knowledge of sockbuf conventions.
  675          * TODO: allocate space for header and fill it in (once!).
  676          */
  677 #ifdef ARGO_DEBUG
  678         if (argo_debug[D_DATA]) {
  679                 printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
  680                        maxsize, totlen, eotsdu, tpcb->tp_sndnum);
  681         }
  682 #endif
  683         if (tpcb->tp_oktonagle) {
  684                 if ((n = sb->sb_mb) == 0)
  685                         panic("tp_packetize");
  686                 while (n->m_nextpkt)
  687                         n = n->m_nextpkt;
  688                 if (n->m_flags & M_EOR)
  689                         panic("tp_packetize 2");
  690                 SEQ_INC(tpcb, tpcb->tp_sndnum);
  691                 if (totlen + n->m_pkthdr.len < maxsize) {
  692                         /*
  693                          * There is an unsent packet with space,
  694                          * combine data
  695                          */
  696                         struct mbuf    *old_n = n;
  697                         tpsbcheck(tpcb, 3);
  698                         n->m_pkthdr.len += totlen;
  699                         while (n->m_next)
  700                                 n = n->m_next;
  701                         sbcompress(sb, m, n);
  702                         tpsbcheck(tpcb, 4);
  703                         n = old_n;
  704                         TPNagled++;
  705                         goto out;
  706                 }
  707         }
  708 
  709         while (m) {
  710                 n = m;
  711                 if (totlen > maxsize) {
  712                         if ((m = m_split(n, maxsize, M_WAIT)) == 0)
  713                                 panic("tp_packetize");
  714                 } else
  715                         m = 0;
  716                 totlen -= maxsize;
  717                 tpsbcheck(tpcb, 5);
  718                 sbappendrecord(sb, n);
  719                 tpsbcheck(tpcb, 6);
  720                 SEQ_INC(tpcb, tpcb->tp_sndnum);
  721         }
  722 out:
  723         if (eotsdu) {
  724                 n->m_flags |= M_EOR;    /* XXX belongs at end */
  725                 tpcb->tp_oktonagle = 0;
  726         } else {
  727                 SEQ_DEC(tpcb, tpcb->tp_sndnum);
  728                 tpcb->tp_oktonagle = 1;
  729                 TPNagleok++;
  730         }
  731 
  732 #ifdef ARGO_DEBUG
  733         if (argo_debug[D_DATA]) {
  734                 printf("SEND out: oktonagle %d sndnum 0x%x\n",
  735                        tpcb->tp_oktonagle, tpcb->tp_sndnum);
  736         }
  737 #endif
  738         return 0;
  739 }
  740 
  741 
  742 /*
  743  * NAME: tp_stash()
  744  * CALLED FROM:
  745  *      tp.trans on arrival of a DT tpdu
  746  * FUNCTION, ARGUMENTS, and RETURN VALUE:
  747  *      Returns 1 if
  748  *      a) something new arrived and it's got eotsdu_reached bit on,
  749  *      b) this arrival was caused other out-of-sequence things to be
  750  *      accepted, or
  751  *      c) this arrival is the highest seq # for which we last gave credit
  752  *      (sender just sent a whole window)
  753  *  In other words, returns 1 if tp should send an ack immediately, 0 if
  754  *  the ack can wait a while.
  755  *
  756  * Note: this implementation no longer renegs on credit, (except
  757  * when debugging option D_RENEG is on, for the purpose of testing
  758  * ack subsequencing), so we don't  need to check for incoming tpdus
  759  * being in a reneged portion of the window.
  760  */
  761 
  762 int
  763 tp_stash(struct tp_pcb *tpcb, struct tp_event *e)
  764 {
  765         int    ack_reason = tpcb->tp_ack_strat & ACK_STRAT_EACH;
  766         /* 0--> delay acks until full window */
  767         /* 1--> ack each tpdu */
  768 #define E e->TPDU_ATTR(DT)
  769 
  770         if (E.e_eot) {
  771                 struct mbuf *n = E.e_data;
  772                 n->m_flags |= M_EOR;
  773                 n->m_nextpkt = 0;
  774         }
  775 #ifdef ARGO_DEBUG
  776         if (argo_debug[D_STASH]) {
  777                 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
  778                           "stash: so_rcv before appending");
  779                 dump_mbuf(E.e_data,
  780                           "stash: e_data before appending");
  781         }
  782 #endif
  783 
  784 #ifdef TP_PERF_MEAS
  785         if (DOPERF(tpcb)) {
  786                 PStat(tpcb, Nb_from_ll) += E.e_datalen;
  787                 tpmeas(tpcb->tp_lref, TPtime_from_ll,
  788                        &e->e_time, E.e_seq,
  789                        (u_int) PStat(tpcb, Nb_from_ll),
  790                        (u_int) E.e_datalen);
  791         }
  792 #endif
  793 
  794         if (E.e_seq == tpcb->tp_rcvnxt) {
  795 
  796 #ifdef ARGO_DEBUG
  797                 if (argo_debug[D_STASH]) {
  798                         printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
  799                              E.e_seq, E.e_datalen, E.e_eot);
  800                 }
  801 #endif
  802 
  803 #ifdef TPPT
  804                 if (tp_traceflags[D_STASH]) {
  805                         tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
  806                           E.e_seq, E.e_datalen, E.e_eot, 0);
  807                 }
  808 #endif
  809 
  810                 SET_DELACK(tpcb);
  811 
  812                 sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
  813 
  814                 SEQ_INC(tpcb, tpcb->tp_rcvnxt);
  815                 /*
  816                  * move chains from the reassembly queue to the socket buffer
  817                  */
  818                 if (tpcb->tp_rsycnt) {
  819                         struct mbuf **mp;
  820                         struct mbuf   **mplim;
  821 
  822                         mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt %
  823                                               tpcb->tp_maxlcredit);
  824                         mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
  825 
  826                         while (tpcb->tp_rsycnt && *mp) {
  827                                 sbappend(&tpcb->tp_sock->so_rcv, *mp);
  828                                 tpcb->tp_rsycnt--;
  829                                 *mp = 0;
  830                                 SEQ_INC(tpcb, tpcb->tp_rcvnxt);
  831                                 ack_reason |= ACK_REORDER;
  832                                 if (++mp == mplim)
  833                                         mp = tpcb->tp_rsyq;
  834                         }
  835                 }
  836 #ifdef ARGO_DEBUG
  837                 if (argo_debug[D_STASH]) {
  838                         dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
  839                            "stash: so_rcv after appending");
  840                 }
  841 #endif
  842 
  843         } else {
  844                 struct mbuf **mp;
  845                 SeqNum          uwe;
  846 
  847 #ifdef TPPT
  848                 if (tp_traceflags[D_STASH]) {
  849                         tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
  850                                     E.e_seq, tpcb->tp_rcvnxt,
  851                                     tpcb->tp_lcredit, 0);
  852                 }
  853 #endif
  854 
  855                 if (tpcb->tp_rsyq == 0)
  856                         tp_rsyset(tpcb);
  857                 uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
  858                 if (tpcb->tp_rsyq == 0 ||
  859                     !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
  860                         ack_reason = ACK_DONT;
  861                         m_freem(E.e_data);
  862                 } else if (*(mp = tpcb->tp_rsyq +
  863                              (E.e_seq % tpcb->tp_maxlcredit)) != NULL ) {
  864 #ifdef ARGO_DEBUG
  865                         if (argo_debug[D_STASH]) {
  866                                 printf("tp_stash - drop & ack\n");
  867                         }
  868 #endif
  869 
  870                         /*
  871                          * retransmission - drop it and force
  872                          * an ack
  873                          */
  874                         IncStat(ts_dt_dup);
  875 #ifdef TP_PERF_MEAS
  876                         if (DOPERF(tpcb)) {
  877                                 IncPStat(tpcb, tps_n_ack_cuz_dup);
  878                         }
  879 #endif
  880 
  881                                 m_freem(E.e_data);
  882                         ack_reason |= ACK_DUP;
  883                 } else {
  884                         *mp = E.e_data;
  885                         tpcb->tp_rsycnt++;
  886                         ack_reason = ACK_DONT;
  887                 }
  888         }
  889         /*
  890          * there were some comments of historical interest
  891          * here.
  892          */
  893         {
  894                 LOCAL_CREDIT(tpcb);
  895 
  896                 if (E.e_seq == tpcb->tp_sent_uwe)
  897                         ack_reason |= ACK_STRAT_FULLWIN;
  898 
  899 #ifdef TPPT
  900                 if (tp_traceflags[D_STASH]) {
  901                         tptraceTPCB(TPPTmisc,
  902                  "end of stash, eot, ack_reason, sent_uwe ",
  903                  E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
  904                 }
  905 #endif
  906 
  907                 if (ack_reason == ACK_DONT) {
  908                         IncStat(ts_ackreason[ACK_DONT]);
  909                         return 0;
  910                 } else {
  911 #ifdef TP_PERF_MEAS
  912                         if (DOPERF(tpcb)) {
  913                                 if (ack_reason & ACK_STRAT_EACH) {
  914                                 IncPStat(tpcb, tps_n_ack_cuz_strat);
  915                         } else if (ack_reason & ACK_STRAT_FULLWIN) {
  916                                 IncPStat(tpcb, tps_n_ack_cuz_fullwin);
  917                         } else if (ack_reason & ACK_REORDER) {
  918                                 IncPStat(tpcb, tps_n_ack_cuz_reorder);
  919                         }
  920                         tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
  921                            SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
  922                         }
  923 #endif
  924                         {
  925                                 int    i;
  926 
  927                                 /*
  928                                  * keep track of all reasons
  929                                  * that apply
  930                                  */
  931                                 for (i = 1; i < _ACK_NUM_REASONS_; i++) {
  932                                         if (ack_reason & (1 << i))
  933                                                 IncStat(ts_ackreason[i]);
  934                                 }
  935                         }
  936                         return 1;
  937                 }
  938         }
  939 }
  940 
  941 /*
  942  * tp_rsyflush - drop all the packets on the reassembly queue.
  943  * Do this when closing the socket, or when somebody has changed
  944  * the space avaible in the receive socket (XXX).
  945  */
  946 void
  947 tp_rsyflush(struct tp_pcb *tpcb)
  948 {
  949         struct mbuf **mp;
  950         if (tpcb->tp_rsycnt) {
  951                 for (mp = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
  952                      --mp >= tpcb->tp_rsyq;)
  953                         if (*mp) {
  954                                 tpcb->tp_rsycnt--;
  955                                 m_freem(*mp);
  956                         }
  957                 if (tpcb->tp_rsycnt) {
  958                         printf("tp_rsyflush %p\n", tpcb);
  959                         tpcb->tp_rsycnt = 0;
  960                 }
  961         }
  962         free((caddr_t) tpcb->tp_rsyq, M_PCB);
  963         tpcb->tp_rsyq = 0;
  964 }
  965 
  966 void
  967 tp_rsyset(struct tp_pcb *tpcb)
  968 {
  969         struct socket *so = tpcb->tp_sock;
  970         int             maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf;
  971         int             old_credit = tpcb->tp_maxlcredit;
  972         caddr_t         rsyq;
  973 
  974         tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
  975                                               (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize) / tpcb->tp_l_tpdusize);
  976 
  977         if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
  978                 return;
  979         maxcredit *= sizeof(struct mbuf *);
  980         if (tpcb->tp_rsyq)
  981                 tp_rsyflush(tpcb);
  982         if ((rsyq = (caddr_t) malloc(maxcredit, M_PCB, M_NOWAIT)) != NULL)
  983                 bzero(rsyq, maxcredit);
  984         tpcb->tp_rsyq = (struct mbuf **) rsyq;
  985 }
  986 
  987 
  988 void
  989 tpsbcheck(struct tp_pcb *tpcb, int i)
  990 {
  991         struct mbuf *n, *m;
  992         int    len = 0, mbcnt = 0, pktlen;
  993         struct sockbuf *sb = &tpcb->tp_sock->so_snd;
  994 
  995         for (n = sb->sb_mb; n; n = n->m_nextpkt) {
  996                 if ((n->m_flags & M_PKTHDR) == 0)
  997                         panic("tpsbcheck nohdr");
  998                 pktlen = len + n->m_pkthdr.len;
  999                 for (m = n; m; m = m->m_next) {
 1000                         len += m->m_len;
 1001                         mbcnt += MSIZE;
 1002                         if (m->m_flags & M_EXT)
 1003                                 mbcnt += m->m_ext.ext_size;
 1004                 }
 1005                 if (len != pktlen) {
 1006                         printf("test %d; len %d != pktlen %d on mbuf %p\n",
 1007                                i, len, pktlen, n);
 1008                         panic("tpsbcheck short");
 1009                 }
 1010         }
 1011         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
 1012                 printf("test %d: cc %d != %ld || mbcnt %d != %ld\n", i, len, sb->sb_cc,
 1013                        mbcnt, sb->sb_mbcnt);
 1014                 panic("tpsbcheck");
 1015         }
 1016 }

Cache object: de71d67670f0ec35c56385ecfa58be1f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.