The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netiso/tp_subr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: tp_subr.c,v 1.18 2003/08/11 15:17:31 itojun Exp $      */
    2 
    3 /*-
    4  * Copyright (c) 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)tp_subr.c   8.1 (Berkeley) 6/10/93
   32  */
   33 
   34 /***********************************************************
   35                 Copyright IBM Corporation 1987
   36 
   37                       All Rights Reserved
   38 
   39 Permission to use, copy, modify, and distribute this software and its
   40 documentation for any purpose and without fee is hereby granted,
   41 provided that the above copyright notice appear in all copies and that
   42 both that copyright notice and this permission notice appear in
   43 supporting documentation, and that the name of IBM not be
   44 used in advertising or publicity pertaining to distribution of the
   45 software without specific, written prior permission.
   46 
   47 IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
   48 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
   49 IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
   50 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
   51 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
   52 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
   53 SOFTWARE.
   54 
   55 ******************************************************************/
   56 
   57 /*
   58  * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
   59  */
   60 /*
   61  * The main work of data transfer is done here. These routines are called
   62  * from tp.trans. They include the routines that check the validity of acks
   63  * and Xacks, (tp_goodack() and tp_goodXack() ) take packets from socket
   64  * buffers and send them (tp_send()), drop the data from the socket buffers
   65  * (tp_sbdrop()),  and put incoming packet data into socket buffers
   66  * (tp_stash()).
   67  */
   68 
   69 #include <sys/cdefs.h>
   70 __KERNEL_RCSID(0, "$NetBSD: tp_subr.c,v 1.18 2003/08/11 15:17:31 itojun Exp $");
   71 
   72 #include <sys/param.h>
   73 #include <sys/systm.h>
   74 #include <sys/mbuf.h>
   75 #include <sys/socket.h>
   76 #include <sys/socketvar.h>
   77 #include <sys/protosw.h>
   78 #include <sys/errno.h>
   79 #include <sys/time.h>
   80 #include <sys/kernel.h>
   81 
   82 #include <netiso/tp_ip.h>
   83 #include <netiso/iso.h>
   84 #include <netiso/argo_debug.h>
   85 #include <netiso/tp_timer.h>
   86 #include <netiso/tp_param.h>
   87 #include <netiso/tp_stat.h>
   88 #include <netiso/tp_pcb.h>
   89 #include <netiso/tp_tpdu.h>
   90 #include <netiso/tp_trace.h>
   91 #include <netiso/tp_meas.h>
   92 #include <netiso/tp_seq.h>
   93 #include <netiso/tp_var.h>
   94 
   95 int             tprexmtthresh = 3;
   96 
   97 /*
   98  * CALLED FROM:
   99  *      tp.trans, when an XAK arrives
  100  * FUNCTION and ARGUMENTS:
  101  *      Determines if the sequence number (seq) from the XAK
  102  *      acks anything new.  If so, drop the appropriate tpdu
  103  *      from the XPD send queue.
  104  * RETURN VALUE:
  105  *      Returns 1 if it did this, 0 if the ack caused no action.
  106  */
  107 int
  108 tp_goodXack(tpcb, seq)
  109         struct tp_pcb  *tpcb;
  110         SeqNum          seq;
  111 {
  112 
  113 #ifdef TPPT
  114         if (tp_traceflags[D_XPD]) {
  115                 tptraceTPCB(TPPTgotXack,
  116                       seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
  117                             tpcb->tp_snduna);
  118         }
  119 #endif
  120 
  121         if (seq == tpcb->tp_Xuna) {
  122                 tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
  123 
  124                 /*
  125                  * DROP 1 packet from the Xsnd socket buf - just so happens
  126                  * that only one packet can be there at any time so drop the
  127                  * whole thing.  If you allow > 1 packet the socket buffer,
  128                  * then you'll have to keep track of how many characters went
  129                  * w/ each XPD tpdu, so this will get messier
  130                  */
  131 #ifdef ARGO_DEBUG
  132                 if (argo_debug[D_XPD]) {
  133                         dump_mbuf(tpcb->tp_Xsnd.sb_mb,
  134                                   "tp_goodXack Xsnd before sbdrop");
  135                 }
  136 #endif
  137 
  138 #ifdef TPPT
  139                 if (tp_traceflags[D_XPD]) {
  140                         tptraceTPCB(TPPTmisc,
  141                                     "goodXack: dropping cc ",
  142                                     (int) (tpcb->tp_Xsnd.sb_cc),
  143                                     0, 0, 0);
  144                 }
  145 #endif
  146                 sbdroprecord(&tpcb->tp_Xsnd);
  147                 return 1;
  148         }
  149         return 0;
  150 }
  151 
  152 /*
  153  * CALLED FROM:
  154  *  tp_good_ack()
  155  * FUNCTION and ARGUMENTS:
  156  *  updates
  157  *  smoothed average round trip time (*rtt)
  158  *  roundtrip time variance (*rtv) - actually deviation, not variance
  159  *  given the new value (diff)
  160  * RETURN VALUE:
  161  * void
  162  */
  163 
  164 void
  165 tp_rtt_rtv(tpcb)
  166         struct tp_pcb *tpcb;
  167 {
  168         int             old = tpcb->tp_rtt;
  169         int             elapsed, delta = 0;
  170 
  171         elapsed = hardclock_ticks - tpcb->tp_rttemit;
  172 
  173         if (tpcb->tp_rtt != 0) {
  174                 /*
  175                  * rtt is the smoothed round trip time in machine clock
  176                  * ticks (hz). It is stored as a fixed point number,
  177                  * unscaled (unlike the tcp srtt).  The rationale here
  178                  * is that it is only significant to the nearest unit of
  179                  * slowtimo, which is at least 8 machine clock ticks
  180                  * so there is no need to scale.  The smoothing is done
  181                  * according to the same formula as TCP (rtt = rtt*7/8
  182                  * + measured_rtt/8).
  183                  */
  184                 delta = elapsed - tpcb->tp_rtt;
  185                 if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
  186                         tpcb->tp_rtt = 1;
  187                 /*
  188                  * rtv is a smoothed accumulated mean difference, unscaled
  189                  * for reasons expressed above.
  190                  * It is smoothed with an alpha of .75, and the round trip timer
  191                  * will be set to rtt + 4*rtv, also as TCP does.
  192                  */
  193                 if (delta < 0)
  194                         delta = -delta;
  195                 if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
  196                         tpcb->tp_rtv = 1;
  197         } else {
  198                 /*
  199                  * No rtt measurement yet - use the unsmoothed rtt. Set the
  200                  * variance to half the rtt (so our first retransmit happens
  201                  * at 3*rtt)
  202                  */
  203                 tpcb->tp_rtt = elapsed;
  204                 tpcb->tp_rtv = elapsed >> 1;
  205         }
  206         tpcb->tp_rttemit = 0;
  207         tpcb->tp_rxtshift = 0;
  208         /*
  209          * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
  210          * Because of the way we do the smoothing, srtt and rttvar
  211          * will each average +1/2 tick of bias.  When we compute
  212          * the retransmit timer, we want 1/2 tick of rounding and
  213          * 1 extra tick because of +-1/2 tick uncertainty in the
  214          * firing of the timer.  The bias will give us exactly the
  215          * 1.5 tick we need.  But, because the bias is
  216          * statistical, we have to test that we don't drop below
  217          * the minimum feasible timer (which is 2 ticks)."
  218          */
  219         TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
  220                     tpcb->tp_peer_acktime, 128 /* XXX */ );
  221 #ifdef ARGO_DEBUG
  222         if (argo_debug[D_RTT]) {
  223                 printf("%s tpcb %p, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
  224                        "tp_rtt_rtv:", tpcb, elapsed, delta, tpcb->tp_rtt, tpcb->tp_rtv, old);
  225         }
  226 #endif
  227         tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
  228 }
  229 
  230 /*
  231  * CALLED FROM:
  232  *  tp.trans when an AK arrives
  233  * FUNCTION and ARGUMENTS:
  234  *      Given (cdt), the credit from the AK tpdu, and
  235  *      (seq), the sequence number from the AK tpdu,
  236  *  tp_goodack() determines if the AK acknowledges something in the send
  237  *      window, and if so, drops the appropriate packets from the retransmission
  238  *  list, computes the round trip time, and updates the retransmission timer
  239  *  based on the new smoothed round trip time.
  240  * RETURN VALUE:
  241  *      Returns 1 if
  242  *      EITHER it actually acked something heretofore unacknowledged
  243  *      OR no news but the credit should be processed.
  244  *      If something heretofore unacked was acked with this sequence number,
  245  *      the appropriate tpdus are dropped from the retransmission control list,
  246  *      by calling tp_sbdrop().
  247  *      No need to see the tpdu itself.
  248  */
  249 int
  250 tp_goodack(tpcb, cdt, seq, subseq)
  251         struct tp_pcb *tpcb;
  252         u_int           cdt;
  253         SeqNum seq;
  254         u_int           subseq;
  255 {
  256         int             old_fcredit = 0;
  257         int             bang = 0;       /* bang --> ack for something
  258                                          * heretofore unacked */
  259         u_int           bytes_acked;
  260 
  261 #ifdef ARGO_DEBUG
  262         if (argo_debug[D_ACKRECV]) {
  263                 printf("goodack tpcb %p seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
  264                        tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
  265         }
  266 #endif
  267 
  268 #ifdef TPPT
  269         if (tp_traceflags[D_ACKRECV]) {
  270                 tptraceTPCB(TPPTgotack,
  271                         seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, subseq);
  272         }
  273 #endif
  274 
  275 #ifdef TP_PERF_MEAS
  276                 if (DOPERF(tpcb)) {
  277                 tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *) 0, seq, 0, 0);
  278         }
  279 #endif
  280 
  281         if (seq == tpcb->tp_snduna) {
  282                 if (subseq < tpcb->tp_r_subseq ||
  283                  (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
  284         discard_the_ack:
  285 #ifdef ARGO_DEBUG
  286                         if (argo_debug[D_ACKRECV]) {
  287                                 printf("goodack discard : tpcb %p subseq %d r_subseq %d\n",
  288                                        tpcb, subseq, tpcb->tp_r_subseq);
  289                         }
  290 #endif
  291                         goto done;
  292                 }
  293                 if (cdt == tpcb->tp_fcredit     /* && thus subseq >
  294                         tpcb->tp_r_subseq */ ) {
  295                         tpcb->tp_r_subseq = subseq;
  296                         if (tpcb->tp_timer[TM_data_retrans] == 0)
  297                                 tpcb->tp_dupacks = 0;
  298                         else if (++tpcb->tp_dupacks == tprexmtthresh) {
  299                                 /*
  300                                  * partner went out of his way to signal with
  301                                  * different subsequences that he has the
  302                                  * same lack of an expected packet.  This may
  303                                  * be an early indiciation of a loss
  304                                  */
  305 
  306                                 SeqNum          onxt = tpcb->tp_sndnxt;
  307                                 struct mbuf    *onxt_m = tpcb->tp_sndnxt_m;
  308                                 u_int           win = min(tpcb->tp_fcredit,
  309                                 tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
  310 #ifdef ARGO_DEBUG
  311                                 if (argo_debug[D_ACKRECV]) {
  312                                         printf("%s tpcb %p seq 0x%x rttseq 0x%x onxt 0x%x\n",
  313                                                "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
  314                                 }
  315 #endif
  316                                 if (win < 2)
  317                                         win = 2;
  318                                 tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
  319                                 tpcb->tp_timer[TM_data_retrans] = 0;
  320                                 tpcb->tp_rttemit = 0;
  321                                 tpcb->tp_sndnxt = tpcb->tp_snduna;
  322                                 tpcb->tp_sndnxt_m = 0;
  323                                 tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
  324                                 tp_send(tpcb);
  325                                 tpcb->tp_cong_win = tpcb->tp_ssthresh +
  326                                         tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
  327                                 if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
  328                                         tpcb->tp_sndnxt = onxt;
  329                                         tpcb->tp_sndnxt_m = onxt_m;
  330                                 }
  331                         } else if (tpcb->tp_dupacks > tprexmtthresh) {
  332                                 tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
  333                         }
  334                         goto done;
  335                 }
  336         } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
  337                 goto discard_the_ack;
  338         /*
  339          * If the congestion window was inflated to account
  340          * for the other side's cached packets, retract it.
  341          */
  342         if (tpcb->tp_dupacks > tprexmtthresh &&
  343             tpcb->tp_cong_win > tpcb->tp_ssthresh)
  344                 tpcb->tp_cong_win = tpcb->tp_ssthresh;
  345         tpcb->tp_r_subseq = subseq;
  346         old_fcredit = tpcb->tp_fcredit;
  347         tpcb->tp_fcredit = cdt;
  348         if (cdt > tpcb->tp_maxfcredit)
  349                 tpcb->tp_maxfcredit = cdt;
  350         tpcb->tp_dupacks = 0;
  351 
  352         if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
  353 
  354                 tpsbcheck(tpcb, 0);
  355                 bytes_acked = tp_sbdrop(tpcb, seq);
  356                 tpsbcheck(tpcb, 1);
  357                 /*
  358                  * If transmit timer is running and timed sequence
  359                  * number was acked, update smoothed round trip time.
  360                  * Since we now have an rtt measurement, cancel the
  361                  * timer backoff (cf., Phil Karn's retransmit alg.).
  362                  * Recompute the initial retransmit timer.
  363                  */
  364                 if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
  365                         tp_rtt_rtv(tpcb);
  366                 /*
  367                  * If all outstanding data is acked, stop retransmit timer.
  368                  * If there is more data to be acked, restart retransmit
  369                  * timer, using current (possibly backed-off) value.
  370                  * OSI combines the keepalive and persistance functions.
  371                  * So, there is no persistance timer per se, to restart.
  372                  */
  373                 if (tpcb->tp_class != TP_CLASS_0)
  374                         tpcb->tp_timer[TM_data_retrans] =
  375                                 (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
  376                 /*
  377                  * When new data is acked, open the congestion window.
  378                  * If the window gives us less than ssthresh packets
  379                  * in flight, open exponentially (maxseg per packet).
  380                  * Otherwise open linearly: maxseg per window
  381                  * (maxseg^2 / cwnd per packet), plus a constant
  382                  * fraction of a packet (maxseg/8) to help larger windows
  383                  * open quickly enough.
  384                  */
  385                 {
  386                         u_int           cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
  387 
  388                         incr = min(incr, bytes_acked);
  389                         if (cw > tpcb->tp_ssthresh)
  390                                 incr = incr * incr / cw + incr / 8;
  391                         tpcb->tp_cong_win =
  392                                 min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
  393                 }
  394                 tpcb->tp_snduna = seq;
  395                 if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
  396                         tpcb->tp_sndnxt = seq;
  397                         tpcb->tp_sndnxt_m = 0;
  398                 }
  399                 bang++;
  400         }
  401         if (cdt != 0 && old_fcredit == 0) {
  402                 tpcb->tp_sendfcc = 1;
  403         }
  404         if (cdt == 0) {
  405                 if (old_fcredit != 0)
  406                         IncStat(ts_zfcdt);
  407                 /* The following might mean that the window shrunk */
  408                 if (tpcb->tp_timer[TM_data_retrans]) {
  409                         tpcb->tp_timer[TM_data_retrans] = 0;
  410                         tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
  411                         if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
  412                                 tpcb->tp_sndnxt = tpcb->tp_snduna;
  413                                 tpcb->tp_sndnxt_m = 0;
  414                         }
  415                 }
  416         }
  417         tpcb->tp_fcredit = cdt;
  418         bang |= (old_fcredit < cdt);
  419 
  420 done:
  421 #ifdef ARGO_DEBUG
  422         if (argo_debug[D_ACKRECV]) {
  423                 printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%lx\n",
  424                        bang, cdt, old_fcredit, tpcb->tp_cong_win);
  425         }
  426 #endif
  427         /*
  428          * if (bang) XXXXX Very bad to remove this test, but somethings
  429          * broken
  430          */
  431         tp_send(tpcb);
  432         return (bang);
  433 }
  434 
  435 /*
  436  * CALLED FROM:
  437  *  tp_goodack()
  438  * FUNCTION and ARGUMENTS:
  439  *  drops everything up TO but not INCLUDING seq # (seq)
  440  *  from the retransmission queue.
  441  */
  442 int
  443 tp_sbdrop(tpcb, seq)
  444         struct tp_pcb *tpcb;
  445         SeqNum          seq;
  446 {
  447         struct sockbuf *sb = &tpcb->tp_sock->so_snd;
  448         int    i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
  449         int             oldcc = sb->sb_cc, oldi = i;
  450 
  451         if (i >= tpcb->tp_seqhalf)
  452                 printf("tp_spdropping too much -- should panic");
  453         while (i-- > 0)
  454                 sbdroprecord(sb);
  455 #ifdef ARGO_DEBUG
  456         if (argo_debug[D_ACKRECV]) {
  457                 printf("tp_sbdroping %d pkts %ld bytes on %p at 0x%x\n",
  458                        oldi, oldcc - sb->sb_cc, tpcb, seq);
  459         }
  460 #endif
  461         if (sb_notify(sb))
  462                 sowwakeup(tpcb->tp_sock);
  463         return (oldcc - sb->sb_cc);
  464 }
  465 
  466 /*
  467  * CALLED FROM:
  468  *      tp.trans on user send request, arrival of AK and arrival of XAK
  469  * FUNCTION and ARGUMENTS:
  470  *      Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
  471  *      Emits until a) runs out of data, or  b) runs into an XPD mark, or
  472  *                      c) it hits seq number (highseq) limited by cong or credit.
  473  *
  474  *      If you want XPD to buffer > 1 du per socket buffer, you can
  475  *      modifiy this to issue XPD tpdus also, but then it'll have
  476  *      to take some argument(s) to distinguish between the type of DU to
  477  *      hand tp_emit.
  478  *
  479  *      When something is sent for the first time, its time-of-send
  480  *      is stashed (in system clock ticks rather than pf_slowtimo ticks).
  481  *  When the ack arrives, the smoothed round-trip time is figured
  482  *  using this value.
  483  */
  484 void
  485 tp_send(tpcb)
  486         struct tp_pcb *tpcb;
  487 {
  488         int    len;
  489         struct mbuf *m;
  490         struct mbuf    *mb = 0;
  491         struct sockbuf *sb = &tpcb->tp_sock->so_snd;
  492         unsigned int    eotsdu = 0;
  493         SeqNum          highseq, checkseq;
  494         int             idle, idleticks, off, cong_win;
  495 #ifdef TP_PERF_MEAS
  496         int             send_start_time = hardclock_ticks;
  497         SeqNum          oldnxt = tpcb->tp_sndnxt;
  498 #endif /* TP_PERF_MEAS */
  499 
  500         idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
  501         if (idle) {
  502                 idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
  503                 if (idleticks > tpcb->tp_dt_ticks)
  504                         /*
  505                          * We have been idle for "a while" and no acks are
  506                          * expected to clock out any data we send --
  507                          * slow start to get ack "clock" running again.
  508                          */
  509                         tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
  510         }
  511         cong_win = tpcb->tp_cong_win;
  512         highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
  513         if (tpcb->tp_Xsnd.sb_mb)
  514                 highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
  515 
  516 #ifdef ARGO_DEBUG
  517         if (argo_debug[D_DATA]) {
  518                 printf("tp_send enter tpcb %p nxt 0x%x win %d high 0x%x\n",
  519                        tpcb, tpcb->tp_sndnxt, cong_win, highseq);
  520         }
  521 #endif
  522 #ifdef TPPT
  523         if (tp_traceflags[D_DATA]) {
  524                 tptraceTPCB(TPPTmisc, "tp_send sndnew snduna",
  525                             tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0);
  526         tptraceTPCB(TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
  527             tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
  528         }
  529 #endif
  530 #ifdef TPPT
  531                 if (tp_traceflags[D_DATA]) {
  532                 tptraceTPCB(TPPTmisc, "tp_send 2 nxt high fcredit congwin",
  533                       tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
  534         }
  535 #endif
  536 
  537                 if (tpcb->tp_sndnxt_m)
  538                 m = tpcb->tp_sndnxt_m;
  539         else {
  540                 off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
  541                 for (m = sb->sb_mb; m && off > 0; m = m->m_next)
  542                         off--;
  543         }
  544         /*
  545          * Avoid silly window syndrome here . . . figure out how!
  546          */
  547         checkseq = tpcb->tp_sndnum;
  548         if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
  549                 checkseq = highseq;     /* i.e. DON'T retain highest assigned
  550                                          * packet */
  551 
  552         while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
  553 
  554                 eotsdu = (m->m_flags & M_EOR) != 0;
  555                 len = m->m_pkthdr.len;
  556                 if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
  557                     len < (tpcb->tp_l_tpdusize / 2))
  558                         break;  /* Nagle . . . . . */
  559                 cong_win -= len;
  560                 /*
  561                  * make a copy - mb goes into the retransmission list while m
  562                  * gets emitted.  m_copy won't copy a zero-length mbuf.
  563                  */
  564                 mb = m;
  565                 m = m_copy(mb, 0, M_COPYALL);
  566                 if (m == NULL)
  567                         break;
  568 #ifdef TPPT
  569                 if (tp_traceflags[D_STASH]) {
  570                         tptraceTPCB(TPPTmisc,
  571                                     "tp_send mcopy nxt high eotsdu len",
  572                                     tpcb->tp_sndnxt, highseq, eotsdu, len);
  573                 }
  574 #endif
  575 
  576 #ifdef ARGO_DEBUG
  577                         if (argo_debug[D_DATA]) {
  578                         printf("tp_sending tpcb %p nxt 0x%x\n",
  579                                tpcb, tpcb->tp_sndnxt);
  580                 }
  581 #endif
  582                 /*
  583                  * when headers are precomputed, may need to fill in checksum
  584                  * here
  585                  */
  586                 tpcb->tp_sock->so_error =
  587                       tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m);
  588                 if (tpcb->tp_sock->so_error != 0)
  589                         /* error */
  590                         break;
  591                 m = mb->m_nextpkt;
  592                 tpcb->tp_sndnxt_m = m;
  593                 if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
  594                         SEQ_INC(tpcb, tpcb->tp_sndnew);
  595                         /*
  596                          * Time this transmission if not a retransmission and
  597                          * not currently timing anything.
  598                          */
  599                         if (tpcb->tp_rttemit == 0) {
  600                                 tpcb->tp_rttemit = hardclock_ticks;
  601                                 tpcb->tp_rttseq = tpcb->tp_sndnxt;
  602                         }
  603                         tpcb->tp_sndnxt = tpcb->tp_sndnew;
  604                 } else
  605                         SEQ_INC(tpcb, tpcb->tp_sndnxt);
  606                 /*
  607                  * Set retransmit timer if not currently set.
  608                  * Initial value for retransmit timer is smoothed
  609                  * round-trip time + 2 * round-trip time variance.
  610                  * Initialize shift counter which is used for backoff
  611                  * of retransmit time.
  612                  */
  613                 if (tpcb->tp_timer[TM_data_retrans] == 0 &&
  614                     tpcb->tp_class != TP_CLASS_0) {
  615                         tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
  616                         tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
  617                         tpcb->tp_rxtshift = 0;
  618                 }
  619         }
  620         if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
  621                 tpcb->tp_oktonagle = 0;
  622 #ifdef TP_PERF_MEAS
  623         if (DOPERF(tpcb)) {
  624                 int    npkts;
  625                 int             s, elapsed, *t;
  626                 struct timeval  now;
  627 
  628                 elapsed = hardclock_ticks - send_start_time;
  629 
  630                 npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
  631 
  632                 if (npkts > 0)
  633                         tpcb->tp_Nwindow++;
  634 
  635                 if (npkts > TP_PM_MAX)
  636                         npkts = TP_PM_MAX;
  637 
  638                 t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
  639                 *t += (t - elapsed) >> TP_RTT_ALPHA;
  640 
  641                 if (mb == 0) {
  642                         IncPStat(tpcb, tps_win_lim_by_data[npkts]);
  643                 } else {
  644                         IncPStat(tpcb, tps_win_lim_by_cdt[npkts]);
  645                         /* not true with congestion-window being used */
  646                 }
  647                 now.tv_sec = elapsed / hz;
  648                 now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
  649                 tpmeas(tpcb->tp_lref,
  650                        TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
  651         }
  652 #endif                          /* TP_PERF_MEAS */
  653 
  654 
  655 #ifdef TPPT
  656         if (tp_traceflags[D_DATA]) {
  657                 tptraceTPCB(TPPTmisc,
  658                             "tp_send at end: new nxt eotsdu error",
  659                             tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu,
  660                             tpcb->tp_sock->so_error);
  661 
  662         }
  663 #endif
  664 }
  665 
  666 int             TPNagleok;
  667 int             TPNagled;
  668 
  669 int
  670 tp_packetize(tpcb, m, eotsdu)
  671         struct tp_pcb *tpcb;
  672         struct mbuf *m;
  673         int             eotsdu;
  674 {
  675         struct mbuf *n = NULL;
  676         struct sockbuf *sb = &tpcb->tp_sock->so_snd;
  677         int             maxsize = tpcb->tp_l_tpdusize
  678                             - tp_headersize(DT_TPDU_type, tpcb)
  679                             - (tpcb->tp_use_checksum ? 4 : 0);
  680         int             totlen = m->m_pkthdr.len;
  681 
  682         /*
  683          * Pre-packetize the data in the sockbuf
  684          * according to negotiated mtu.  Do it here
  685          * where we can safely wait for mbufs.
  686          *
  687          * This presumes knowledge of sockbuf conventions.
  688          * TODO: allocate space for header and fill it in (once!).
  689          */
  690 #ifdef ARGO_DEBUG
  691         if (argo_debug[D_DATA]) {
  692                 printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
  693                        maxsize, totlen, eotsdu, tpcb->tp_sndnum);
  694         }
  695 #endif
  696         if (tpcb->tp_oktonagle) {
  697                 if ((n = sb->sb_mb) == 0)
  698                         panic("tp_packetize");
  699                 while (n->m_nextpkt)
  700                         n = n->m_nextpkt;
  701                 if (n->m_flags & M_EOR)
  702                         panic("tp_packetize 2");
  703                 SEQ_INC(tpcb, tpcb->tp_sndnum);
  704                 if (totlen + n->m_pkthdr.len < maxsize) {
  705                         /*
  706                          * There is an unsent packet with space,
  707                          * combine data
  708                          */
  709                         struct mbuf    *old_n = n;
  710                         tpsbcheck(tpcb, 3);
  711                         n->m_pkthdr.len += totlen;
  712                         while (n->m_next)
  713                                 n = n->m_next;
  714                         sbcompress(sb, m, n);
  715                         tpsbcheck(tpcb, 4);
  716                         n = old_n;
  717                         TPNagled++;
  718                         goto out;
  719                 }
  720         }
  721 
  722         while (m) {
  723                 n = m;
  724                 if (totlen > maxsize) {
  725                         if ((m = m_split(n, maxsize, M_WAIT)) == 0)
  726                                 panic("tp_packetize");
  727                 } else
  728                         m = 0;
  729                 totlen -= maxsize;
  730                 tpsbcheck(tpcb, 5);
  731                 sbappendrecord(sb, n);
  732                 tpsbcheck(tpcb, 6);
  733                 SEQ_INC(tpcb, tpcb->tp_sndnum);
  734         }
  735 out:
  736         if (eotsdu) {
  737                 n->m_flags |= M_EOR;    /* XXX belongs at end */
  738                 tpcb->tp_oktonagle = 0;
  739         } else {
  740                 SEQ_DEC(tpcb, tpcb->tp_sndnum);
  741                 tpcb->tp_oktonagle = 1;
  742                 TPNagleok++;
  743         }
  744 
  745 #ifdef ARGO_DEBUG
  746         if (argo_debug[D_DATA]) {
  747                 printf("SEND out: oktonagle %d sndnum 0x%x\n",
  748                        tpcb->tp_oktonagle, tpcb->tp_sndnum);
  749         }
  750 #endif
  751         return 0;
  752 }
  753 
  754 
  755 /*
  756  * NAME: tp_stash()
  757  * CALLED FROM:
  758  *      tp.trans on arrival of a DT tpdu
  759  * FUNCTION, ARGUMENTS, and RETURN VALUE:
  760  *      Returns 1 if
  761  *      a) something new arrived and it's got eotsdu_reached bit on,
  762  *      b) this arrival was caused other out-of-sequence things to be
  763  *      accepted, or
  764  *      c) this arrival is the highest seq # for which we last gave credit
  765  *      (sender just sent a whole window)
  766  *  In other words, returns 1 if tp should send an ack immediately, 0 if
  767  *  the ack can wait a while.
  768  *
  769  * Note: this implementation no longer renegs on credit, (except
  770  * when debugging option D_RENEG is on, for the purpose of testing
  771  * ack subsequencing), so we don't  need to check for incoming tpdus
  772  * being in a reneged portion of the window.
  773  */
  774 
  775 int
  776 tp_stash(tpcb, e)
  777         struct tp_pcb *tpcb;
  778         struct tp_event *e;
  779 {
  780         int    ack_reason = tpcb->tp_ack_strat & ACK_STRAT_EACH;
  781         /* 0--> delay acks until full window */
  782         /* 1--> ack each tpdu */
  783 #define E e->TPDU_ATTR(DT)
  784 
  785         if (E.e_eot) {
  786                 struct mbuf *n = E.e_data;
  787                 n->m_flags |= M_EOR;
  788                 n->m_nextpkt = 0;
  789         }
  790 #ifdef ARGO_DEBUG
  791         if (argo_debug[D_STASH]) {
  792                 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
  793                           "stash: so_rcv before appending");
  794                 dump_mbuf(E.e_data,
  795                           "stash: e_data before appending");
  796         }
  797 #endif
  798 
  799 #ifdef TP_PERF_MEAS
  800         if (DOPERF(tpcb)) {
  801                 PStat(tpcb, Nb_from_ll) += E.e_datalen;
  802                 tpmeas(tpcb->tp_lref, TPtime_from_ll,
  803                        &e->e_time, E.e_seq,
  804                        (u_int) PStat(tpcb, Nb_from_ll),
  805                        (u_int) E.e_datalen);
  806         }
  807 #endif
  808 
  809         if (E.e_seq == tpcb->tp_rcvnxt) {
  810 
  811 #ifdef ARGO_DEBUG
  812                 if (argo_debug[D_STASH]) {
  813                         printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
  814                              E.e_seq, E.e_datalen, E.e_eot);
  815                 }
  816 #endif
  817 
  818 #ifdef TPPT
  819                 if (tp_traceflags[D_STASH]) {
  820                         tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
  821                           E.e_seq, E.e_datalen, E.e_eot, 0);
  822                 }
  823 #endif
  824 
  825                 SET_DELACK(tpcb);
  826 
  827                 sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
  828 
  829                 SEQ_INC(tpcb, tpcb->tp_rcvnxt);
  830                 /*
  831                  * move chains from the reassembly queue to the socket buffer
  832                  */
  833                 if (tpcb->tp_rsycnt) {
  834                         struct mbuf **mp;
  835                         struct mbuf   **mplim;
  836 
  837                         mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt %
  838                                               tpcb->tp_maxlcredit);
  839                         mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
  840 
  841                         while (tpcb->tp_rsycnt && *mp) {
  842                                 sbappend(&tpcb->tp_sock->so_rcv, *mp);
  843                                 tpcb->tp_rsycnt--;
  844                                 *mp = 0;
  845                                 SEQ_INC(tpcb, tpcb->tp_rcvnxt);
  846                                 ack_reason |= ACK_REORDER;
  847                                 if (++mp == mplim)
  848                                         mp = tpcb->tp_rsyq;
  849                         }
  850                 }
  851 #ifdef ARGO_DEBUG
  852                 if (argo_debug[D_STASH]) {
  853                         dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
  854                            "stash: so_rcv after appending");
  855                 }
  856 #endif
  857 
  858         } else {
  859                 struct mbuf **mp;
  860                 SeqNum          uwe;
  861 
  862 #ifdef TPPT
  863                 if (tp_traceflags[D_STASH]) {
  864                         tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
  865                                     E.e_seq, tpcb->tp_rcvnxt,
  866                                     tpcb->tp_lcredit, 0);
  867                 }
  868 #endif
  869 
  870                 if (tpcb->tp_rsyq == 0)
  871                         tp_rsyset(tpcb);
  872                 uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
  873                 if (tpcb->tp_rsyq == 0 ||
  874                     !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
  875                         ack_reason = ACK_DONT;
  876                         m_freem(E.e_data);
  877                 } else if (*(mp = tpcb->tp_rsyq +
  878                              (E.e_seq % tpcb->tp_maxlcredit)) != NULL ) {
  879 #ifdef ARGO_DEBUG
  880                         if (argo_debug[D_STASH]) {
  881                                 printf("tp_stash - drop & ack\n");
  882                         }
  883 #endif
  884 
  885                         /*
  886                          * retransmission - drop it and force
  887                          * an ack
  888                          */
  889                         IncStat(ts_dt_dup);
  890 #ifdef TP_PERF_MEAS
  891                         if (DOPERF(tpcb)) {
  892                                 IncPStat(tpcb, tps_n_ack_cuz_dup);
  893                         }
  894 #endif
  895 
  896                                 m_freem(E.e_data);
  897                         ack_reason |= ACK_DUP;
  898                 } else {
  899                         *mp = E.e_data;
  900                         tpcb->tp_rsycnt++;
  901                         ack_reason = ACK_DONT;
  902                 }
  903         }
  904         /*
  905          * there were some comments of historical interest
  906          * here.
  907          */
  908         {
  909                 LOCAL_CREDIT(tpcb);
  910 
  911                 if (E.e_seq == tpcb->tp_sent_uwe)
  912                         ack_reason |= ACK_STRAT_FULLWIN;
  913 
  914 #ifdef TPPT
  915                 if (tp_traceflags[D_STASH]) {
  916                         tptraceTPCB(TPPTmisc,
  917                  "end of stash, eot, ack_reason, sent_uwe ",
  918                  E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
  919                 }
  920 #endif
  921 
  922                 if (ack_reason == ACK_DONT) {
  923                         IncStat(ts_ackreason[ACK_DONT]);
  924                         return 0;
  925                 } else {
  926 #ifdef TP_PERF_MEAS
  927                         if (DOPERF(tpcb)) {
  928                                 if (ack_reason & ACK_STRAT_EACH) {
  929                                 IncPStat(tpcb, tps_n_ack_cuz_strat);
  930                         } else if (ack_reason & ACK_STRAT_FULLWIN) {
  931                                 IncPStat(tpcb, tps_n_ack_cuz_fullwin);
  932                         } else if (ack_reason & ACK_REORDER) {
  933                                 IncPStat(tpcb, tps_n_ack_cuz_reorder);
  934                         }
  935                         tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
  936                            SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
  937                         }
  938 #endif
  939                         {
  940                                 int    i;
  941 
  942                                 /*
  943                                  * keep track of all reasons
  944                                  * that apply
  945                                  */
  946                                 for (i = 1; i < _ACK_NUM_REASONS_; i++) {
  947                                         if (ack_reason & (1 << i))
  948                                                 IncStat(ts_ackreason[i]);
  949                                 }
  950                         }
  951                         return 1;
  952                 }
  953         }
  954 }
  955 
  956 /*
  957  * tp_rsyflush - drop all the packets on the reassembly queue.
  958  * Do this when closing the socket, or when somebody has changed
  959  * the space avaible in the receive socket (XXX).
  960  */
  961 void
  962 tp_rsyflush(tpcb)
  963         struct tp_pcb *tpcb;
  964 {
  965         struct mbuf **mp;
  966         if (tpcb->tp_rsycnt) {
  967                 for (mp = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
  968                      --mp >= tpcb->tp_rsyq;)
  969                         if (*mp) {
  970                                 tpcb->tp_rsycnt--;
  971                                 m_freem(*mp);
  972                         }
  973                 if (tpcb->tp_rsycnt) {
  974                         printf("tp_rsyflush %p\n", tpcb);
  975                         tpcb->tp_rsycnt = 0;
  976                 }
  977         }
  978         free((caddr_t) tpcb->tp_rsyq, M_PCB);
  979         tpcb->tp_rsyq = 0;
  980 }
  981 
  982 void
  983 tp_rsyset(tpcb)
  984         struct tp_pcb *tpcb;
  985 {
  986         struct socket *so = tpcb->tp_sock;
  987         int             maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf;
  988         int             old_credit = tpcb->tp_maxlcredit;
  989         caddr_t         rsyq;
  990 
  991         tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
  992                                               (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize) / tpcb->tp_l_tpdusize);
  993 
  994         if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
  995                 return;
  996         maxcredit *= sizeof(struct mbuf *);
  997         if (tpcb->tp_rsyq)
  998                 tp_rsyflush(tpcb);
  999         if ((rsyq = (caddr_t) malloc(maxcredit, M_PCB, M_NOWAIT)) != NULL)
 1000                 bzero(rsyq, maxcredit);
 1001         tpcb->tp_rsyq = (struct mbuf **) rsyq;
 1002 }
 1003 
 1004 
 1005 void
 1006 tpsbcheck(tpcb, i)
 1007         struct tp_pcb  *tpcb;
 1008         int i;
 1009 {
 1010         struct mbuf *n, *m;
 1011         int    len = 0, mbcnt = 0, pktlen;
 1012         struct sockbuf *sb = &tpcb->tp_sock->so_snd;
 1013 
 1014         for (n = sb->sb_mb; n; n = n->m_nextpkt) {
 1015                 if ((n->m_flags & M_PKTHDR) == 0)
 1016                         panic("tpsbcheck nohdr");
 1017                 pktlen = len + n->m_pkthdr.len;
 1018                 for (m = n; m; m = m->m_next) {
 1019                         len += m->m_len;
 1020                         mbcnt += MSIZE;
 1021                         if (m->m_flags & M_EXT)
 1022                                 mbcnt += m->m_ext.ext_size;
 1023                 }
 1024                 if (len != pktlen) {
 1025                         printf("test %d; len %d != pktlen %d on mbuf %p\n",
 1026                                i, len, pktlen, n);
 1027                         panic("tpsbcheck short");
 1028                 }
 1029         }
 1030         if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
 1031                 printf("test %d: cc %d != %ld || mbcnt %d != %ld\n", i, len, sb->sb_cc,
 1032                        mbcnt, sb->sb_mbcnt);
 1033                 panic("tpsbcheck");
 1034         }
 1035 }

Cache object: e9dd234a2ea7b679988a22caf4c12c9a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.