The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_output.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-3-Clause
    3  *
    4  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  * 3. Neither the name of the University nor the names of its contributors
   16  *    may be used to endorse or promote products derived from this software
   17  *    without specific prior written permission.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29  * SUCH DAMAGE.
   30  *
   31  *      @(#)tcp_output.c        8.4 (Berkeley) 5/24/95
   32  */
   33 
   34 #include <sys/cdefs.h>
   35 __FBSDID("$FreeBSD$");
   36 
   37 #include "opt_inet.h"
   38 #include "opt_inet6.h"
   39 #include "opt_ipsec.h"
   40 #include "opt_kern_tls.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/arb.h>
   45 #include <sys/domain.h>
   46 #ifdef TCP_HHOOK
   47 #include <sys/hhook.h>
   48 #endif
   49 #include <sys/kernel.h>
   50 #ifdef KERN_TLS
   51 #include <sys/ktls.h>
   52 #endif
   53 #include <sys/lock.h>
   54 #include <sys/mbuf.h>
   55 #include <sys/mutex.h>
   56 #include <sys/protosw.h>
   57 #include <sys/qmath.h>
   58 #include <sys/sdt.h>
   59 #include <sys/socket.h>
   60 #include <sys/socketvar.h>
   61 #include <sys/sysctl.h>
   62 #include <sys/stats.h>
   63 
   64 #include <net/if.h>
   65 #include <net/route.h>
   66 #include <net/route/nhop.h>
   67 #include <net/vnet.h>
   68 
   69 #include <netinet/in.h>
   70 #include <netinet/in_kdtrace.h>
   71 #include <netinet/in_systm.h>
   72 #include <netinet/ip.h>
   73 #include <netinet/in_pcb.h>
   74 #include <netinet/ip_var.h>
   75 #include <netinet/ip_options.h>
   76 #ifdef INET6
   77 #include <netinet6/in6_pcb.h>
   78 #include <netinet/ip6.h>
   79 #include <netinet6/ip6_var.h>
   80 #endif
   81 #include <netinet/tcp.h>
   82 #define TCPOUTFLAGS
   83 #include <netinet/tcp_fsm.h>
   84 #include <netinet/tcp_log_buf.h>
   85 #include <netinet/tcp_seq.h>
   86 #include <netinet/tcp_var.h>
   87 #include <netinet/tcp_syncache.h>
   88 #include <netinet/tcp_timer.h>
   89 #include <netinet/tcpip.h>
   90 #include <netinet/cc/cc.h>
   91 #include <netinet/tcp_fastopen.h>
   92 #ifdef TCPPCAP
   93 #include <netinet/tcp_pcap.h>
   94 #endif
   95 #ifdef TCP_OFFLOAD
   96 #include <netinet/tcp_offload.h>
   97 #endif
   98 #include <netinet/tcp_ecn.h>
   99 
  100 #include <netipsec/ipsec_support.h>
  101 
  102 #include <netinet/udp.h>
  103 #include <netinet/udp_var.h>
  104 #include <machine/in_cksum.h>
  105 
  106 #include <security/mac/mac_framework.h>
  107 
  108 VNET_DEFINE(int, path_mtu_discovery) = 1;
  109 SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_VNET | CTLFLAG_RW,
  110         &VNET_NAME(path_mtu_discovery), 1,
  111         "Enable Path MTU Discovery");
  112 
  113 VNET_DEFINE(int, tcp_do_tso) = 1;
  114 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_VNET | CTLFLAG_RW,
  115         &VNET_NAME(tcp_do_tso), 0,
  116         "Enable TCP Segmentation Offload");
  117 
  118 VNET_DEFINE(int, tcp_sendspace) = 1024*32;
  119 #define V_tcp_sendspace VNET(tcp_sendspace)
  120 SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_VNET | CTLFLAG_RW,
  121         &VNET_NAME(tcp_sendspace), 0, "Initial send socket buffer size");
  122 
  123 VNET_DEFINE(int, tcp_do_autosndbuf) = 1;
  124 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
  125         &VNET_NAME(tcp_do_autosndbuf), 0,
  126         "Enable automatic send buffer sizing");
  127 
  128 VNET_DEFINE(int, tcp_autosndbuf_inc) = 8*1024;
  129 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
  130         &VNET_NAME(tcp_autosndbuf_inc), 0,
  131         "Incrementor step size of automatic send buffer");
  132 
  133 VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024;
  134 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
  135         &VNET_NAME(tcp_autosndbuf_max), 0,
  136         "Max size of automatic send buffer");
  137 
  138 VNET_DEFINE(int, tcp_sendbuf_auto_lowat) = 0;
  139 #define V_tcp_sendbuf_auto_lowat        VNET(tcp_sendbuf_auto_lowat)
  140 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto_lowat, CTLFLAG_VNET | CTLFLAG_RW,
  141         &VNET_NAME(tcp_sendbuf_auto_lowat), 0,
  142         "Modify threshold for auto send buffer growth to account for SO_SNDLOWAT");
  143 
  144 /*
  145  * Make sure that either retransmit or persist timer is set for SYN, FIN and
  146  * non-ACK.
  147  */
  148 #define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags)                        \
  149         KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\
  150             tcp_timer_active((tp), TT_REXMT) ||                         \
  151             tcp_timer_active((tp), TT_PERSIST),                         \
  152             ("neither rexmt nor persist timer is set"))
  153 
  154 #ifdef TCP_HHOOK
  155 /*
  156  * Wrapper for the TCP established output helper hook.
  157  */
  158 void
  159 hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th,
  160     struct tcpopt *to, uint32_t len, int tso)
  161 {
  162         struct tcp_hhook_data hhook_data;
  163 
  164         if (V_tcp_hhh[HHOOK_TCP_EST_OUT]->hhh_nhooks > 0) {
  165                 hhook_data.tp = tp;
  166                 hhook_data.th = th;
  167                 hhook_data.to = to;
  168                 hhook_data.len = len;
  169                 hhook_data.tso = tso;
  170 
  171                 hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_OUT], &hhook_data,
  172                     &tp->t_osd);
  173         }
  174 }
  175 #endif
  176 
  177 /*
  178  * CC wrapper hook functions
  179  */
  180 void
  181 cc_after_idle(struct tcpcb *tp)
  182 {
  183         INP_WLOCK_ASSERT(tptoinpcb(tp));
  184 
  185         if (CC_ALGO(tp)->after_idle != NULL)
  186                 CC_ALGO(tp)->after_idle(&tp->t_ccv);
  187 }
  188 
  189 /*
  190  * Tcp output routine: figure out what should be sent and send it.
  191  */
  192 int
  193 tcp_default_output(struct tcpcb *tp)
  194 {
  195         struct socket *so = tptosocket(tp);
  196         struct inpcb *inp = tptoinpcb(tp);
  197         int32_t len;
  198         uint32_t recwin, sendwin;
  199         uint16_t flags;
  200         int off, error = 0;     /* Keep compiler happy */
  201         u_int if_hw_tsomaxsegcount = 0;
  202         u_int if_hw_tsomaxsegsize = 0;
  203         struct mbuf *m;
  204         struct ip *ip = NULL;
  205         struct tcphdr *th;
  206         u_char opt[TCP_MAXOLEN];
  207         unsigned ipoptlen, optlen, hdrlen, ulen;
  208 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  209         unsigned ipsec_optlen = 0;
  210 #endif
  211         int idle, sendalot, curticks;
  212         int sack_rxmit, sack_bytes_rxmt;
  213         struct sackhole *p;
  214         int tso, mtu;
  215         struct tcpopt to;
  216         struct udphdr *udp = NULL;
  217         struct tcp_log_buffer *lgb;
  218         unsigned int wanted_cookie = 0;
  219         unsigned int dont_sendalot = 0;
  220 #if 0
  221         int maxburst = TCP_MAXBURST;
  222 #endif
  223 #ifdef INET6
  224         struct ip6_hdr *ip6 = NULL;
  225         int isipv6;
  226 
  227         isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
  228 #endif
  229 #ifdef KERN_TLS
  230         const bool hw_tls = (so->so_snd.sb_flags & SB_TLS_IFNET) != 0;
  231 #else
  232         const bool hw_tls = false;
  233 #endif
  234 
  235         NET_EPOCH_ASSERT();
  236         INP_WLOCK_ASSERT(inp);
  237 
  238 #ifdef TCP_OFFLOAD
  239         if (tp->t_flags & TF_TOE)
  240                 return (tcp_offload_output(tp));
  241 #endif
  242 
  243         /*
  244          * For TFO connections in SYN_SENT or SYN_RECEIVED,
  245          * only allow the initial SYN or SYN|ACK and those sent
  246          * by the retransmit timer.
  247          */
  248         if (IS_FASTOPEN(tp->t_flags) &&
  249             ((tp->t_state == TCPS_SYN_SENT) ||
  250              (tp->t_state == TCPS_SYN_RECEIVED)) &&
  251             SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent */
  252             (tp->snd_nxt != tp->snd_una))       /* not a retransmit */
  253                 return (0);
  254 
  255         /*
  256          * Determine length of data that should be transmitted,
  257          * and flags that will be used.
  258          * If there is some data or critical controls (SYN, RST)
  259          * to send, then transmit; otherwise, investigate further.
  260          */
  261         idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
  262         if (idle && (((ticks - tp->t_rcvtime) >= tp->t_rxtcur) ||
  263             (tp->t_sndtime && ((ticks - tp->t_sndtime) >= tp->t_rxtcur))))
  264                 cc_after_idle(tp);
  265         tp->t_flags &= ~TF_LASTIDLE;
  266         if (idle) {
  267                 if (tp->t_flags & TF_MORETOCOME) {
  268                         tp->t_flags |= TF_LASTIDLE;
  269                         idle = 0;
  270                 }
  271         }
  272 again:
  273         /*
  274          * If we've recently taken a timeout, snd_max will be greater than
  275          * snd_nxt.  There may be SACK information that allows us to avoid
  276          * resending already delivered data.  Adjust snd_nxt accordingly.
  277          */
  278         if ((tp->t_flags & TF_SACK_PERMIT) &&
  279             SEQ_LT(tp->snd_nxt, tp->snd_max))
  280                 tcp_sack_adjust(tp);
  281         sendalot = 0;
  282         tso = 0;
  283         mtu = 0;
  284         off = tp->snd_nxt - tp->snd_una;
  285         sendwin = min(tp->snd_wnd, tp->snd_cwnd);
  286 
  287         flags = tcp_outflags[tp->t_state];
  288         /*
  289          * Send any SACK-generated retransmissions.  If we're explicitly trying
  290          * to send out new data (when sendalot is 1), bypass this function.
  291          * If we retransmit in fast recovery mode, decrement snd_cwnd, since
  292          * we're replacing a (future) new transmission with a retransmission
  293          * now, and we previously incremented snd_cwnd in tcp_input().
  294          */
  295         /*
  296          * Still in sack recovery , reset rxmit flag to zero.
  297          */
  298         sack_rxmit = 0;
  299         sack_bytes_rxmt = 0;
  300         len = 0;
  301         p = NULL;
  302         if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) &&
  303             (p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
  304                 uint32_t cwin;
  305 
  306                 cwin =
  307                     imax(min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt, 0);
  308                 /* Do not retransmit SACK segments beyond snd_recover */
  309                 if (SEQ_GT(p->end, tp->snd_recover)) {
  310                         /*
  311                          * (At least) part of sack hole extends beyond
  312                          * snd_recover. Check to see if we can rexmit data
  313                          * for this hole.
  314                          */
  315                         if (SEQ_GEQ(p->rxmit, tp->snd_recover)) {
  316                                 /*
  317                                  * Can't rexmit any more data for this hole.
  318                                  * That data will be rexmitted in the next
  319                                  * sack recovery episode, when snd_recover
  320                                  * moves past p->rxmit.
  321                                  */
  322                                 p = NULL;
  323                                 goto after_sack_rexmit;
  324                         } else {
  325                                 /* Can rexmit part of the current hole */
  326                                 len = ((int32_t)ulmin(cwin,
  327                                     SEQ_SUB(tp->snd_recover, p->rxmit)));
  328                         }
  329                 } else {
  330                         len = ((int32_t)ulmin(cwin,
  331                             SEQ_SUB(p->end, p->rxmit)));
  332                 }
  333                 if (len > 0) {
  334                         off = SEQ_SUB(p->rxmit, tp->snd_una);
  335                         KASSERT(off >= 0,("%s: sack block to the left of una : %d",
  336                             __func__, off));
  337                         sack_rxmit = 1;
  338                         sendalot = 1;
  339                         TCPSTAT_INC(tcps_sack_rexmits);
  340                         TCPSTAT_ADD(tcps_sack_rexmit_bytes,
  341                             min(len, tcp_maxseg(tp)));
  342                 }
  343         }
  344 after_sack_rexmit:
  345         /*
  346          * Get standard flags, and add SYN or FIN if requested by 'hidden'
  347          * state flags.
  348          */
  349         if (tp->t_flags & TF_NEEDFIN)
  350                 flags |= TH_FIN;
  351         if (tp->t_flags & TF_NEEDSYN)
  352                 flags |= TH_SYN;
  353 
  354         SOCKBUF_LOCK(&so->so_snd);
  355         /*
  356          * If in persist timeout with window of 0, send 1 byte.
  357          * Otherwise, if window is small but nonzero
  358          * and timer expired, we will send what we can
  359          * and go to transmit state.
  360          */
  361         if (tp->t_flags & TF_FORCEDATA) {
  362                 if (sendwin == 0) {
  363                         /*
  364                          * If we still have some data to send, then
  365                          * clear the FIN bit.  Usually this would
  366                          * happen below when it realizes that we
  367                          * aren't sending all the data.  However,
  368                          * if we have exactly 1 byte of unsent data,
  369                          * then it won't clear the FIN bit below,
  370                          * and if we are in persist state, we wind
  371                          * up sending the packet without recording
  372                          * that we sent the FIN bit.
  373                          *
  374                          * We can't just blindly clear the FIN bit,
  375                          * because if we don't have any more data
  376                          * to send then the probe will be the FIN
  377                          * itself.
  378                          */
  379                         if (off < sbused(&so->so_snd))
  380                                 flags &= ~TH_FIN;
  381                         sendwin = 1;
  382                 } else {
  383                         tcp_timer_activate(tp, TT_PERSIST, 0);
  384                         tp->t_rxtshift = 0;
  385                 }
  386         }
  387 
  388         /*
  389          * If snd_nxt == snd_max and we have transmitted a FIN, the
  390          * offset will be > 0 even if so_snd.sb_cc is 0, resulting in
  391          * a negative length.  This can also occur when TCP opens up
  392          * its congestion window while receiving additional duplicate
  393          * acks after fast-retransmit because TCP will reset snd_nxt
  394          * to snd_max after the fast-retransmit.
  395          *
  396          * In the normal retransmit-FIN-only case, however, snd_nxt will
  397          * be set to snd_una, the offset will be 0, and the length may
  398          * wind up 0.
  399          *
  400          * If sack_rxmit is true we are retransmitting from the scoreboard
  401          * in which case len is already set.
  402          */
  403         if (sack_rxmit == 0) {
  404                 if (sack_bytes_rxmt == 0)
  405                         len = ((int32_t)min(sbavail(&so->so_snd), sendwin) -
  406                             off);
  407                 else {
  408                         int32_t cwin;
  409 
  410                         /*
  411                          * We are inside of a SACK recovery episode and are
  412                          * sending new data, having retransmitted all the
  413                          * data possible in the scoreboard.
  414                          */
  415                         len = ((int32_t)min(sbavail(&so->so_snd), tp->snd_wnd) -
  416                             off);
  417                         /*
  418                          * Don't remove this (len > 0) check !
  419                          * We explicitly check for len > 0 here (although it
  420                          * isn't really necessary), to work around a gcc
  421                          * optimization issue - to force gcc to compute
  422                          * len above. Without this check, the computation
  423                          * of len is bungled by the optimizer.
  424                          */
  425                         if (len > 0) {
  426                                 cwin = tp->snd_cwnd - imax(0, (int32_t)
  427                                         (tp->snd_nxt - tp->snd_recover)) -
  428                                         sack_bytes_rxmt;
  429                                 if (cwin < 0)
  430                                         cwin = 0;
  431                                 len = imin(len, cwin);
  432                         }
  433                 }
  434         }
  435 
  436         /*
  437          * Lop off SYN bit if it has already been sent.  However, if this
  438          * is SYN-SENT state and if segment contains data and if we don't
  439          * know that foreign host supports TAO, suppress sending segment.
  440          */
  441         if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
  442                 if (tp->t_state != TCPS_SYN_RECEIVED)
  443                         flags &= ~TH_SYN;
  444                 /*
  445                  * When sending additional segments following a TFO SYN|ACK,
  446                  * do not include the SYN bit.
  447                  */
  448                 if (IS_FASTOPEN(tp->t_flags) &&
  449                     (tp->t_state == TCPS_SYN_RECEIVED))
  450                         flags &= ~TH_SYN;
  451                 off--, len++;
  452         }
  453 
  454         /*
  455          * Be careful not to send data and/or FIN on SYN segments.
  456          * This measure is needed to prevent interoperability problems
  457          * with not fully conformant TCP implementations.
  458          */
  459         if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) {
  460                 len = 0;
  461                 flags &= ~TH_FIN;
  462         }
  463 
  464         /*
  465          * On TFO sockets, ensure no data is sent in the following cases:
  466          *
  467          *  - When retransmitting SYN|ACK on a passively-created socket
  468          *
  469          *  - When retransmitting SYN on an actively created socket
  470          *
  471          *  - When sending a zero-length cookie (cookie request) on an
  472          *    actively created socket
  473          *
  474          *  - When the socket is in the CLOSED state (RST is being sent)
  475          */
  476         if (IS_FASTOPEN(tp->t_flags) &&
  477             (((flags & TH_SYN) && (tp->t_rxtshift > 0)) ||
  478              ((tp->t_state == TCPS_SYN_SENT) &&
  479               (tp->t_tfo_client_cookie_len == 0)) ||
  480              (flags & TH_RST)))
  481                 len = 0;
  482         if (len <= 0) {
  483                 /*
  484                  * If FIN has been sent but not acked,
  485                  * but we haven't been called to retransmit,
  486                  * len will be < 0.  Otherwise, window shrank
  487                  * after we sent into it.  If window shrank to 0,
  488                  * cancel pending retransmit, pull snd_nxt back
  489                  * to (closed) window, and set the persist timer
  490                  * if it isn't already going.  If the window didn't
  491                  * close completely, just wait for an ACK.
  492                  *
  493                  * We also do a general check here to ensure that
  494                  * we will set the persist timer when we have data
  495                  * to send, but a 0-byte window. This makes sure
  496                  * the persist timer is set even if the packet
  497                  * hits one of the "goto send" lines below.
  498                  */
  499                 len = 0;
  500                 if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) &&
  501                     (off < (int) sbavail(&so->so_snd)) &&
  502                     !tcp_timer_active(tp, TT_PERSIST)) {
  503                         tcp_timer_activate(tp, TT_REXMT, 0);
  504                         tp->t_rxtshift = 0;
  505                         tp->snd_nxt = tp->snd_una;
  506                         if (!tcp_timer_active(tp, TT_PERSIST))
  507                                 tcp_setpersist(tp);
  508                 }
  509         }
  510 
  511         /* len will be >= 0 after this point. */
  512         KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
  513 
  514         tcp_sndbuf_autoscale(tp, so, sendwin);
  515 
  516         /*
  517          * Decide if we can use TCP Segmentation Offloading (if supported by
  518          * hardware).
  519          *
  520          * TSO may only be used if we are in a pure bulk sending state.  The
  521          * presence of TCP-MD5, SACK retransmits, SACK advertizements and
  522          * IP options prevent using TSO.  With TSO the TCP header is the same
  523          * (except for the sequence number) for all generated packets.  This
  524          * makes it impossible to transmit any options which vary per generated
  525          * segment or packet.
  526          *
  527          * IPv4 handling has a clear separation of ip options and ip header
  528          * flags while IPv6 combines both in in6p_outputopts. ip6_optlen() does
  529          * the right thing below to provide length of just ip options and thus
  530          * checking for ipoptlen is enough to decide if ip options are present.
  531          */
  532 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  533         /*
  534          * Pre-calculate here as we save another lookup into the darknesses
  535          * of IPsec that way and can actually decide if TSO is ok.
  536          */
  537 #ifdef INET6
  538         if (isipv6 && IPSEC_ENABLED(ipv6))
  539                 ipsec_optlen = IPSEC_HDRSIZE(ipv6, inp);
  540 #ifdef INET
  541         else
  542 #endif
  543 #endif /* INET6 */
  544 #ifdef INET
  545         if (IPSEC_ENABLED(ipv4))
  546                 ipsec_optlen = IPSEC_HDRSIZE(ipv4, inp);
  547 #endif /* INET */
  548 #endif /* IPSEC */
  549 #ifdef INET6
  550         if (isipv6)
  551                 ipoptlen = ip6_optlen(inp);
  552         else
  553 #endif
  554         if (inp->inp_options)
  555                 ipoptlen = inp->inp_options->m_len -
  556                                 offsetof(struct ipoption, ipopt_list);
  557         else
  558                 ipoptlen = 0;
  559 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
  560         ipoptlen += ipsec_optlen;
  561 #endif
  562 
  563         if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
  564             (tp->t_port == 0) &&
  565             ((tp->t_flags & TF_SIGNATURE) == 0) &&
  566             tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
  567             ipoptlen == 0 && !(flags & TH_SYN))
  568                 tso = 1;
  569 
  570         if (sack_rxmit) {
  571                 if (SEQ_LT(p->rxmit + len, tp->snd_una + sbused(&so->so_snd)))
  572                         flags &= ~TH_FIN;
  573         } else {
  574                 if (SEQ_LT(tp->snd_nxt + len, tp->snd_una +
  575                     sbused(&so->so_snd)))
  576                         flags &= ~TH_FIN;
  577         }
  578 
  579         recwin = lmin(lmax(sbspace(&so->so_rcv), 0),
  580             (long)TCP_MAXWIN << tp->rcv_scale);
  581 
  582         /*
  583          * Sender silly window avoidance.   We transmit under the following
  584          * conditions when len is non-zero:
  585          *
  586          *      - We have a full segment (or more with TSO)
  587          *      - This is the last buffer in a write()/send() and we are
  588          *        either idle or running NODELAY
  589          *      - we've timed out (e.g. persist timer)
  590          *      - we have more then 1/2 the maximum send window's worth of
  591          *        data (receiver may be limited the window size)
  592          *      - we need to retransmit
  593          */
  594         if (len) {
  595                 if (len >= tp->t_maxseg)
  596                         goto send;
  597                 /*
  598                  * As the TCP header options are now
  599                  * considered when setting up the initial
  600                  * window, we would not send the last segment
  601                  * if we skip considering the option length here.
  602                  * Note: this may not work when tcp headers change
  603                  * very dynamically in the future.
  604                  */
  605                 if ((((tp->t_flags & TF_SIGNATURE) ?
  606                         PADTCPOLEN(TCPOLEN_SIGNATURE) : 0) +
  607                     ((tp->t_flags & TF_RCVD_TSTMP) ?
  608                         PADTCPOLEN(TCPOLEN_TIMESTAMP) : 0) +
  609                     len) >= tp->t_maxseg)
  610                         goto send;
  611                 /*
  612                  * NOTE! on localhost connections an 'ack' from the remote
  613                  * end may occur synchronously with the output and cause
  614                  * us to flush a buffer queued with moretocome.  XXX
  615                  *
  616                  * note: the len + off check is almost certainly unnecessary.
  617                  */
  618                 if (!(tp->t_flags & TF_MORETOCOME) &&   /* normal case */
  619                     (idle || (tp->t_flags & TF_NODELAY)) &&
  620                     (uint32_t)len + (uint32_t)off >= sbavail(&so->so_snd) &&
  621                     (tp->t_flags & TF_NOPUSH) == 0) {
  622                         goto send;
  623                 }
  624                 if (tp->t_flags & TF_FORCEDATA)         /* typ. timeout case */
  625                         goto send;
  626                 if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
  627                         goto send;
  628                 if (SEQ_LT(tp->snd_nxt, tp->snd_max))   /* retransmit case */
  629                         goto send;
  630                 if (sack_rxmit)
  631                         goto send;
  632         }
  633 
  634         /*
  635          * Sending of standalone window updates.
  636          *
  637          * Window updates are important when we close our window due to a
  638          * full socket buffer and are opening it again after the application
  639          * reads data from it.  Once the window has opened again and the
  640          * remote end starts to send again the ACK clock takes over and
  641          * provides the most current window information.
  642          *
  643          * We must avoid the silly window syndrome whereas every read
  644          * from the receive buffer, no matter how small, causes a window
  645          * update to be sent.  We also should avoid sending a flurry of
  646          * window updates when the socket buffer had queued a lot of data
  647          * and the application is doing small reads.
  648          *
  649          * Prevent a flurry of pointless window updates by only sending
  650          * an update when we can increase the advertized window by more
  651          * than 1/4th of the socket buffer capacity.  When the buffer is
  652          * getting full or is very small be more aggressive and send an
  653          * update whenever we can increase by two mss sized segments.
  654          * In all other situations the ACK's to new incoming data will
  655          * carry further window increases.
  656          *
  657          * Don't send an independent window update if a delayed
  658          * ACK is pending (it will get piggy-backed on it) or the
  659          * remote side already has done a half-close and won't send
  660          * more data.  Skip this if the connection is in T/TCP
  661          * half-open state.
  662          */
  663         if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
  664             !(tp->t_flags & TF_DELACK) &&
  665             !TCPS_HAVERCVDFIN(tp->t_state)) {
  666                 /*
  667                  * "adv" is the amount we could increase the window,
  668                  * taking into account that we are limited by
  669                  * TCP_MAXWIN << tp->rcv_scale.
  670                  */
  671                 int32_t adv;
  672                 int oldwin;
  673 
  674                 adv = recwin;
  675                 if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) {
  676                         oldwin = (tp->rcv_adv - tp->rcv_nxt);
  677                         if (adv > oldwin)
  678                                 adv -= oldwin;
  679                         else
  680                                 adv = 0;
  681                 } else
  682                         oldwin = 0;
  683 
  684                 /*
  685                  * If the new window size ends up being the same as or less
  686                  * than the old size when it is scaled, then don't force
  687                  * a window update.
  688                  */
  689                 if (oldwin >> tp->rcv_scale >= (adv + oldwin) >> tp->rcv_scale)
  690                         goto dontupdate;
  691 
  692                 if (adv >= (int32_t)(2 * tp->t_maxseg) &&
  693                     (adv >= (int32_t)(so->so_rcv.sb_hiwat / 4) ||
  694                      recwin <= (so->so_rcv.sb_hiwat / 8) ||
  695                      so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg ||
  696                      adv >= TCP_MAXWIN << tp->rcv_scale))
  697                         goto send;
  698                 if (2 * adv >= (int32_t)so->so_rcv.sb_hiwat)
  699                         goto send;
  700         }
  701 dontupdate:
  702 
  703         /*
  704          * Send if we owe the peer an ACK, RST, SYN, or urgent data.  ACKNOW
  705          * is also a catch-all for the retransmit timer timeout case.
  706          */
  707         if (tp->t_flags & TF_ACKNOW)
  708                 goto send;
  709         if ((flags & TH_RST) ||
  710             ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
  711                 goto send;
  712         if (SEQ_GT(tp->snd_up, tp->snd_una))
  713                 goto send;
  714         /*
  715          * If our state indicates that FIN should be sent
  716          * and we have not yet done so, then we need to send.
  717          */
  718         if (flags & TH_FIN &&
  719             ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
  720                 goto send;
  721         /*
  722          * In SACK, it is possible for tcp_output to fail to send a segment
  723          * after the retransmission timer has been turned off.  Make sure
  724          * that the retransmission timer is set.
  725          */
  726         if ((tp->t_flags & TF_SACK_PERMIT) &&
  727             SEQ_GT(tp->snd_max, tp->snd_una) &&
  728             !tcp_timer_active(tp, TT_REXMT) &&
  729             !tcp_timer_active(tp, TT_PERSIST)) {
  730                 tcp_timer_activate(tp, TT_REXMT, TP_RXTCUR(tp));
  731                 goto just_return;
  732         }
  733         /*
  734          * TCP window updates are not reliable, rather a polling protocol
  735          * using ``persist'' packets is used to insure receipt of window
  736          * updates.  The three ``states'' for the output side are:
  737          *      idle                    not doing retransmits or persists
  738          *      persisting              to move a small or zero window
  739          *      (re)transmitting        and thereby not persisting
  740          *
  741          * tcp_timer_active(tp, TT_PERSIST)
  742          *      is true when we are in persist state.
  743          * (tp->t_flags & TF_FORCEDATA)
  744          *      is set when we are called to send a persist packet.
  745          * tcp_timer_active(tp, TT_REXMT)
  746          *      is set when we are retransmitting
  747          * The output side is idle when both timers are zero.
  748          *
  749          * If send window is too small, there is data to transmit, and no
  750          * retransmit or persist is pending, then go to persist state.
  751          * If nothing happens soon, send when timer expires:
  752          * if window is nonzero, transmit what we can,
  753          * otherwise force out a byte.
  754          */
  755         if (sbavail(&so->so_snd) && !tcp_timer_active(tp, TT_REXMT) &&
  756             !tcp_timer_active(tp, TT_PERSIST)) {
  757                 tp->t_rxtshift = 0;
  758                 tcp_setpersist(tp);
  759         }
  760 
  761         /*
  762          * No reason to send a segment, just return.
  763          */
  764 just_return:
  765         SOCKBUF_UNLOCK(&so->so_snd);
  766         return (0);
  767 
  768 send:
  769         SOCKBUF_LOCK_ASSERT(&so->so_snd);
  770         if (len > 0) {
  771                 if (len >= tp->t_maxseg)
  772                         tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
  773                 else
  774                         tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
  775         }
  776         /*
  777          * Before ESTABLISHED, force sending of initial options
  778          * unless TCP set not to do any options.
  779          * NOTE: we assume that the IP/TCP header plus TCP options
  780          * always fit in a single mbuf, leaving room for a maximum
  781          * link header, i.e.
  782          *      max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES
  783          */
  784         optlen = 0;
  785 #ifdef INET6
  786         if (isipv6)
  787                 hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
  788         else
  789 #endif
  790                 hdrlen = sizeof (struct tcpiphdr);
  791 
  792         if (flags & TH_SYN) {
  793                 tp->snd_nxt = tp->iss;
  794         }
  795 
  796         /*
  797          * Compute options for segment.
  798          * We only have to care about SYN and established connection
  799          * segments.  Options for SYN-ACK segments are handled in TCP
  800          * syncache.
  801          */
  802         to.to_flags = 0;
  803         if ((tp->t_flags & TF_NOOPT) == 0) {
  804                 /* Maximum segment size. */
  805                 if (flags & TH_SYN) {
  806                         to.to_mss = tcp_mssopt(&inp->inp_inc);
  807                         if (tp->t_port)
  808                                 to.to_mss -= V_tcp_udp_tunneling_overhead;
  809                         to.to_flags |= TOF_MSS;
  810 
  811                         /*
  812                          * On SYN or SYN|ACK transmits on TFO connections,
  813                          * only include the TFO option if it is not a
  814                          * retransmit, as the presence of the TFO option may
  815                          * have caused the original SYN or SYN|ACK to have
  816                          * been dropped by a middlebox.
  817                          */
  818                         if (IS_FASTOPEN(tp->t_flags) &&
  819                             (tp->t_rxtshift == 0)) {
  820                                 if (tp->t_state == TCPS_SYN_RECEIVED) {
  821                                         to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
  822                                         to.to_tfo_cookie =
  823                                             (u_int8_t *)&tp->t_tfo_cookie.server;
  824                                         to.to_flags |= TOF_FASTOPEN;
  825                                         wanted_cookie = 1;
  826                                 } else if (tp->t_state == TCPS_SYN_SENT) {
  827                                         to.to_tfo_len =
  828                                             tp->t_tfo_client_cookie_len;
  829                                         to.to_tfo_cookie =
  830                                             tp->t_tfo_cookie.client;
  831                                         to.to_flags |= TOF_FASTOPEN;
  832                                         wanted_cookie = 1;
  833                                         /*
  834                                          * If we wind up having more data to
  835                                          * send with the SYN than can fit in
  836                                          * one segment, don't send any more
  837                                          * until the SYN|ACK comes back from
  838                                          * the other end.
  839                                          */
  840                                         dont_sendalot = 1;
  841                                 }
  842                         }
  843                 }
  844                 /* Window scaling. */
  845                 if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
  846                         to.to_wscale = tp->request_r_scale;
  847                         to.to_flags |= TOF_SCALE;
  848                 }
  849                 /* Timestamps. */
  850                 if ((tp->t_flags & TF_RCVD_TSTMP) ||
  851                     ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
  852                         curticks = tcp_ts_getticks();
  853                         to.to_tsval = curticks + tp->ts_offset;
  854                         to.to_tsecr = tp->ts_recent;
  855                         to.to_flags |= TOF_TS;
  856                         if (tp->t_rxtshift == 1)
  857                                 tp->t_badrxtwin = curticks;
  858                 }
  859 
  860                 /* Set receive buffer autosizing timestamp. */
  861                 if (tp->rfbuf_ts == 0 &&
  862                     (so->so_rcv.sb_flags & SB_AUTOSIZE))
  863                         tp->rfbuf_ts = tcp_ts_getticks();
  864 
  865                 /* Selective ACK's. */
  866                 if (tp->t_flags & TF_SACK_PERMIT) {
  867                         if (flags & TH_SYN)
  868                                 to.to_flags |= TOF_SACKPERM;
  869                         else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
  870                             tp->rcv_numsacks > 0) {
  871                                 to.to_flags |= TOF_SACK;
  872                                 to.to_nsacks = tp->rcv_numsacks;
  873                                 to.to_sacks = (u_char *)tp->sackblks;
  874                         }
  875                 }
  876 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
  877                 /* TCP-MD5 (RFC2385). */
  878                 /*
  879                  * Check that TCP_MD5SIG is enabled in tcpcb to
  880                  * account the size needed to set this TCP option.
  881                  */
  882                 if (tp->t_flags & TF_SIGNATURE)
  883                         to.to_flags |= TOF_SIGNATURE;
  884 #endif /* TCP_SIGNATURE */
  885 
  886                 /* Processing the options. */
  887                 hdrlen += optlen = tcp_addoptions(&to, opt);
  888                 /*
  889                  * If we wanted a TFO option to be added, but it was unable
  890                  * to fit, ensure no data is sent.
  891                  */
  892                 if (IS_FASTOPEN(tp->t_flags) && wanted_cookie &&
  893                     !(to.to_flags & TOF_FASTOPEN))
  894                         len = 0;
  895         }
  896         if (tp->t_port) {
  897                 if (V_tcp_udp_tunneling_port == 0) {
  898                         /* The port was removed?? */
  899                         SOCKBUF_UNLOCK(&so->so_snd);
  900                         return (EHOSTUNREACH);
  901                 }
  902                 hdrlen += sizeof(struct udphdr);
  903         }
  904         /*
  905          * Adjust data length if insertion of options will
  906          * bump the packet length beyond the t_maxseg length.
  907          * Clear the FIN bit because we cut off the tail of
  908          * the segment.
  909          */
  910         if (len + optlen + ipoptlen > tp->t_maxseg) {
  911                 flags &= ~TH_FIN;
  912 
  913                 if (tso) {
  914                         u_int if_hw_tsomax;
  915                         u_int moff;
  916                         int max_len;
  917 
  918                         /* extract TSO information */
  919                         if_hw_tsomax = tp->t_tsomax;
  920                         if_hw_tsomaxsegcount = tp->t_tsomaxsegcount;
  921                         if_hw_tsomaxsegsize = tp->t_tsomaxsegsize;
  922 
  923                         /*
  924                          * Limit a TSO burst to prevent it from
  925                          * overflowing or exceeding the maximum length
  926                          * allowed by the network interface:
  927                          */
  928                         KASSERT(ipoptlen == 0,
  929                             ("%s: TSO can't do IP options", __func__));
  930 
  931                         /*
  932                          * Check if we should limit by maximum payload
  933                          * length:
  934                          */
  935                         if (if_hw_tsomax != 0) {
  936                                 /* compute maximum TSO length */
  937                                 max_len = (if_hw_tsomax - hdrlen -
  938                                     max_linkhdr);
  939                                 if (max_len <= 0) {
  940                                         len = 0;
  941                                 } else if (len > max_len) {
  942                                         sendalot = 1;
  943                                         len = max_len;
  944                                 }
  945                         }
  946 
  947                         /*
  948                          * Prevent the last segment from being
  949                          * fractional unless the send sockbuf can be
  950                          * emptied:
  951                          */
  952                         max_len = (tp->t_maxseg - optlen);
  953                         if (((uint32_t)off + (uint32_t)len) <
  954                             sbavail(&so->so_snd)) {
  955                                 moff = len % max_len;
  956                                 if (moff != 0) {
  957                                         len -= moff;
  958                                         sendalot = 1;
  959                                 }
  960                         }
  961 
  962                         /*
  963                          * In case there are too many small fragments
  964                          * don't use TSO:
  965                          */
  966                         if (len <= max_len) {
  967                                 len = max_len;
  968                                 sendalot = 1;
  969                                 tso = 0;
  970                         }
  971 
  972                         /*
  973                          * Send the FIN in a separate segment
  974                          * after the bulk sending is done.
  975                          * We don't trust the TSO implementations
  976                          * to clear the FIN flag on all but the
  977                          * last segment.
  978                          */
  979                         if (tp->t_flags & TF_NEEDFIN)
  980                                 sendalot = 1;
  981                 } else {
  982                         if (optlen + ipoptlen >= tp->t_maxseg) {
  983                                 /*
  984                                  * Since we don't have enough space to put
  985                                  * the IP header chain and the TCP header in
  986                                  * one packet as required by RFC 7112, don't
  987                                  * send it. Also ensure that at least one
  988                                  * byte of the payload can be put into the
  989                                  * TCP segment.
  990                                  */
  991                                 SOCKBUF_UNLOCK(&so->so_snd);
  992                                 error = EMSGSIZE;
  993                                 sack_rxmit = 0;
  994                                 goto out;
  995                         }
  996                         len = tp->t_maxseg - optlen - ipoptlen;
  997                         sendalot = 1;
  998                         if (dont_sendalot)
  999                                 sendalot = 0;
 1000                 }
 1001         } else
 1002                 tso = 0;
 1003 
 1004         KASSERT(len + hdrlen + ipoptlen <= IP_MAXPACKET,
 1005             ("%s: len > IP_MAXPACKET", __func__));
 1006 
 1007 /*#ifdef DIAGNOSTIC*/
 1008 #ifdef INET6
 1009         if (max_linkhdr + hdrlen > MCLBYTES)
 1010 #else
 1011         if (max_linkhdr + hdrlen > MHLEN)
 1012 #endif
 1013                 panic("tcphdr too big");
 1014 /*#endif*/
 1015 
 1016         /*
 1017          * This KASSERT is here to catch edge cases at a well defined place.
 1018          * Before, those had triggered (random) panic conditions further down.
 1019          */
 1020         KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
 1021 
 1022         /*
 1023          * Grab a header mbuf, attaching a copy of data to
 1024          * be transmitted, and initialize the header from
 1025          * the template for sends on this connection.
 1026          */
 1027         if (len) {
 1028                 struct mbuf *mb;
 1029                 struct sockbuf *msb;
 1030                 u_int moff;
 1031 
 1032                 if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
 1033                         TCPSTAT_INC(tcps_sndprobe);
 1034 #ifdef STATS
 1035                         if (SEQ_LT(tp->snd_nxt, tp->snd_max))
 1036                                 stats_voi_update_abs_u32(tp->t_stats,
 1037                                 VOI_TCP_RETXPB, len);
 1038                         else
 1039                                 stats_voi_update_abs_u64(tp->t_stats,
 1040                                     VOI_TCP_TXPB, len);
 1041 #endif /* STATS */
 1042                 } else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
 1043                         tp->t_sndrexmitpack++;
 1044                         TCPSTAT_INC(tcps_sndrexmitpack);
 1045                         TCPSTAT_ADD(tcps_sndrexmitbyte, len);
 1046 #ifdef STATS
 1047                         stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
 1048                             len);
 1049 #endif /* STATS */
 1050                 } else {
 1051                         TCPSTAT_INC(tcps_sndpack);
 1052                         TCPSTAT_ADD(tcps_sndbyte, len);
 1053 #ifdef STATS
 1054                         stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
 1055                             len);
 1056 #endif /* STATS */
 1057                 }
 1058 #ifdef INET6
 1059                 if (MHLEN < hdrlen + max_linkhdr)
 1060                         m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 1061                 else
 1062 #endif
 1063                         m = m_gethdr(M_NOWAIT, MT_DATA);
 1064 
 1065                 if (m == NULL) {
 1066                         SOCKBUF_UNLOCK(&so->so_snd);
 1067                         error = ENOBUFS;
 1068                         sack_rxmit = 0;
 1069                         goto out;
 1070                 }
 1071 
 1072                 m->m_data += max_linkhdr;
 1073                 m->m_len = hdrlen;
 1074 
 1075                 /*
 1076                  * Start the m_copy functions from the closest mbuf
 1077                  * to the offset in the socket buffer chain.
 1078                  */
 1079                 mb = sbsndptr_noadv(&so->so_snd, off, &moff);
 1080                 if (len <= MHLEN - hdrlen - max_linkhdr && !hw_tls) {
 1081                         m_copydata(mb, moff, len,
 1082                             mtod(m, caddr_t) + hdrlen);
 1083                         if (SEQ_LT(tp->snd_nxt, tp->snd_max))
 1084                                 sbsndptr_adv(&so->so_snd, mb, len);
 1085                         m->m_len += len;
 1086                 } else {
 1087                         if (SEQ_LT(tp->snd_nxt, tp->snd_max))
 1088                                 msb = NULL;
 1089                         else
 1090                                 msb = &so->so_snd;
 1091                         m->m_next = tcp_m_copym(mb, moff,
 1092                             &len, if_hw_tsomaxsegcount,
 1093                             if_hw_tsomaxsegsize, msb, hw_tls);
 1094                         if (len <= (tp->t_maxseg - optlen)) {
 1095                                 /*
 1096                                  * Must have ran out of mbufs for the copy
 1097                                  * shorten it to no longer need tso. Lets
 1098                                  * not put on sendalot since we are low on
 1099                                  * mbufs.
 1100                                  */
 1101                                 tso = 0;
 1102                         }
 1103                         if (m->m_next == NULL) {
 1104                                 SOCKBUF_UNLOCK(&so->so_snd);
 1105                                 (void) m_free(m);
 1106                                 error = ENOBUFS;
 1107                                 sack_rxmit = 0;
 1108                                 goto out;
 1109                         }
 1110                 }
 1111 
 1112                 /*
 1113                  * If we're sending everything we've got, set PUSH.
 1114                  * (This will keep happy those implementations which only
 1115                  * give data to the user when a buffer fills or
 1116                  * a PUSH comes in.)
 1117                  */
 1118                 if (((uint32_t)off + (uint32_t)len == sbused(&so->so_snd)) &&
 1119                     !(flags & TH_SYN))
 1120                         flags |= TH_PUSH;
 1121                 SOCKBUF_UNLOCK(&so->so_snd);
 1122         } else {
 1123                 SOCKBUF_UNLOCK(&so->so_snd);
 1124                 if (tp->t_flags & TF_ACKNOW)
 1125                         TCPSTAT_INC(tcps_sndacks);
 1126                 else if (flags & (TH_SYN|TH_FIN|TH_RST))
 1127                         TCPSTAT_INC(tcps_sndctrl);
 1128                 else if (SEQ_GT(tp->snd_up, tp->snd_una))
 1129                         TCPSTAT_INC(tcps_sndurg);
 1130                 else
 1131                         TCPSTAT_INC(tcps_sndwinup);
 1132 
 1133                 m = m_gethdr(M_NOWAIT, MT_DATA);
 1134                 if (m == NULL) {
 1135                         error = ENOBUFS;
 1136                         sack_rxmit = 0;
 1137                         goto out;
 1138                 }
 1139 #ifdef INET6
 1140                 if (isipv6 && (MHLEN < hdrlen + max_linkhdr) &&
 1141                     MHLEN >= hdrlen) {
 1142                         M_ALIGN(m, hdrlen);
 1143                 } else
 1144 #endif
 1145                 m->m_data += max_linkhdr;
 1146                 m->m_len = hdrlen;
 1147         }
 1148         SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
 1149         m->m_pkthdr.rcvif = (struct ifnet *)0;
 1150 #ifdef MAC
 1151         mac_inpcb_create_mbuf(inp, m);
 1152 #endif
 1153 #ifdef INET6
 1154         if (isipv6) {
 1155                 ip6 = mtod(m, struct ip6_hdr *);
 1156                 if (tp->t_port) {
 1157                         udp = (struct udphdr *)((caddr_t)ip6 + sizeof(struct ip6_hdr));
 1158                         udp->uh_sport = htons(V_tcp_udp_tunneling_port);
 1159                         udp->uh_dport = tp->t_port;
 1160                         ulen = hdrlen + len - sizeof(struct ip6_hdr);
 1161                         udp->uh_ulen = htons(ulen);
 1162                         th = (struct tcphdr *)(udp + 1);
 1163                 } else {
 1164                         th = (struct tcphdr *)(ip6 + 1);
 1165                 }
 1166                 tcpip_fillheaders(inp, tp->t_port, ip6, th);
 1167         } else
 1168 #endif /* INET6 */
 1169         {
 1170                 ip = mtod(m, struct ip *);
 1171                 if (tp->t_port) {
 1172                         udp = (struct udphdr *)((caddr_t)ip + sizeof(struct ip));
 1173                         udp->uh_sport = htons(V_tcp_udp_tunneling_port);
 1174                         udp->uh_dport = tp->t_port;
 1175                         ulen = hdrlen + len - sizeof(struct ip);
 1176                         udp->uh_ulen = htons(ulen);
 1177                         th = (struct tcphdr *)(udp + 1);
 1178                 } else
 1179                         th = (struct tcphdr *)(ip + 1);
 1180                 tcpip_fillheaders(inp, tp->t_port, ip, th);
 1181         }
 1182 
 1183         /*
 1184          * Fill in fields, remembering maximum advertised
 1185          * window for use in delaying messages about window sizes.
 1186          * If resending a FIN, be sure not to use a new sequence number.
 1187          */
 1188         if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
 1189             tp->snd_nxt == tp->snd_max)
 1190                 tp->snd_nxt--;
 1191         /*
 1192          * If we are starting a connection, send ECN setup
 1193          * SYN packet. If we are on a retransmit, we may
 1194          * resend those bits a number of times as per
 1195          * RFC 3168.
 1196          */
 1197         if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
 1198                 flags |= tcp_ecn_output_syn_sent(tp);
 1199         }
 1200         /* Also handle parallel SYN for ECN */
 1201         if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
 1202             (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
 1203                 int ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
 1204                 if ((tp->t_state == TCPS_SYN_RECEIVED) &&
 1205                     (tp->t_flags2 & TF2_ECN_SND_ECE))
 1206                         tp->t_flags2 &= ~TF2_ECN_SND_ECE;
 1207 #ifdef INET6
 1208                 if (isipv6) {
 1209                         ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20);
 1210                         ip6->ip6_flow |= htonl(ect << 20);
 1211                 }
 1212                 else
 1213 #endif
 1214                 {
 1215                         ip->ip_tos &= ~IPTOS_ECN_MASK;
 1216                         ip->ip_tos |= ect;
 1217                 }
 1218         }
 1219 
 1220         /*
 1221          * If we are doing retransmissions, then snd_nxt will
 1222          * not reflect the first unsent octet.  For ACK only
 1223          * packets, we do not want the sequence number of the
 1224          * retransmitted packet, we want the sequence number
 1225          * of the next unsent octet.  So, if there is no data
 1226          * (and no SYN or FIN), use snd_max instead of snd_nxt
 1227          * when filling in ti_seq.  But if we are in persist
 1228          * state, snd_max might reflect one byte beyond the
 1229          * right edge of the window, so use snd_nxt in that
 1230          * case, since we know we aren't doing a retransmission.
 1231          * (retransmit and persist are mutually exclusive...)
 1232          */
 1233         if (sack_rxmit == 0) {
 1234                 if (len || (flags & (TH_SYN|TH_FIN)) ||
 1235                     tcp_timer_active(tp, TT_PERSIST))
 1236                         th->th_seq = htonl(tp->snd_nxt);
 1237                 else
 1238                         th->th_seq = htonl(tp->snd_max);
 1239         } else {
 1240                 th->th_seq = htonl(p->rxmit);
 1241                 p->rxmit += len;
 1242                 /*
 1243                  * Lost Retransmission Detection
 1244                  * trigger resending of a (then
 1245                  * still existing) hole, when
 1246                  * fack acks recoverypoint.
 1247                  */
 1248                 if ((tp->t_flags & TF_LRD) && SEQ_GEQ(p->rxmit, p->end))
 1249                         p->rxmit = tp->snd_recover;
 1250                 tp->sackhint.sack_bytes_rexmit += len;
 1251         }
 1252         if (IN_RECOVERY(tp->t_flags)) {
 1253                 /*
 1254                  * Account all bytes transmitted while
 1255                  * IN_RECOVERY, simplifying PRR and
 1256                  * Lost Retransmit Detection
 1257                  */
 1258                 tp->sackhint.prr_out += len;
 1259         }
 1260         th->th_ack = htonl(tp->rcv_nxt);
 1261         if (optlen) {
 1262                 bcopy(opt, th + 1, optlen);
 1263                 th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
 1264         }
 1265         tcp_set_flags(th, flags);
 1266         /*
 1267          * Calculate receive window.  Don't shrink window,
 1268          * but avoid silly window syndrome.
 1269          * If a RST segment is sent, advertise a window of zero.
 1270          */
 1271         if (flags & TH_RST) {
 1272                 recwin = 0;
 1273         } else {
 1274                 if (recwin < (so->so_rcv.sb_hiwat / 4) &&
 1275                     recwin < tp->t_maxseg)
 1276                         recwin = 0;
 1277                 if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
 1278                     recwin < (tp->rcv_adv - tp->rcv_nxt))
 1279                         recwin = (tp->rcv_adv - tp->rcv_nxt);
 1280         }
 1281         /*
 1282          * According to RFC1323 the window field in a SYN (i.e., a <SYN>
 1283          * or <SYN,ACK>) segment itself is never scaled.  The <SYN,ACK>
 1284          * case is handled in syncache.
 1285          */
 1286         if (flags & TH_SYN)
 1287                 th->th_win = htons((u_short)
 1288                                 (min(sbspace(&so->so_rcv), TCP_MAXWIN)));
 1289         else {
 1290                 /* Avoid shrinking window with window scaling. */
 1291                 recwin = roundup2(recwin, 1 << tp->rcv_scale);
 1292                 th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
 1293         }
 1294 
 1295         /*
 1296          * Adjust the RXWIN0SENT flag - indicate that we have advertised
 1297          * a 0 window.  This may cause the remote transmitter to stall.  This
 1298          * flag tells soreceive() to disable delayed acknowledgements when
 1299          * draining the buffer.  This can occur if the receiver is attempting
 1300          * to read more data than can be buffered prior to transmitting on
 1301          * the connection.
 1302          */
 1303         if (th->th_win == 0) {
 1304                 tp->t_sndzerowin++;
 1305                 tp->t_flags |= TF_RXWIN0SENT;
 1306         } else
 1307                 tp->t_flags &= ~TF_RXWIN0SENT;
 1308         if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
 1309                 th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
 1310                 th->th_flags |= TH_URG;
 1311         } else
 1312                 /*
 1313                  * If no urgent pointer to send, then we pull
 1314                  * the urgent pointer to the left edge of the send window
 1315                  * so that it doesn't drift into the send window on sequence
 1316                  * number wraparound.
 1317                  */
 1318                 tp->snd_up = tp->snd_una;               /* drag it along */
 1319 
 1320         /*
 1321          * Put TCP length in extended header, and then
 1322          * checksum extended header and data.
 1323          */
 1324         m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
 1325 
 1326 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 1327         if (to.to_flags & TOF_SIGNATURE) {
 1328                 /*
 1329                  * Calculate MD5 signature and put it into the place
 1330                  * determined before.
 1331                  * NOTE: since TCP options buffer doesn't point into
 1332                  * mbuf's data, calculate offset and use it.
 1333                  */
 1334                 if (!TCPMD5_ENABLED() || (error = TCPMD5_OUTPUT(m, th,
 1335                     (u_char *)(th + 1) + (to.to_signature - opt))) != 0) {
 1336                         /*
 1337                          * Do not send segment if the calculation of MD5
 1338                          * digest has failed.
 1339                          */
 1340                         m_freem(m);
 1341                         goto out;
 1342                 }
 1343         }
 1344 #endif
 1345 #ifdef INET6
 1346         if (isipv6) {
 1347                 /*
 1348                  * There is no need to fill in ip6_plen right now.
 1349                  * It will be filled later by ip6_output.
 1350                  */
 1351                 if (tp->t_port) {
 1352                         m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 1353                         m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 1354                         udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
 1355                         th->th_sum = htons(0);
 1356                         UDPSTAT_INC(udps_opackets);
 1357                 } else {
 1358                         m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 1359                         m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 1360                         th->th_sum = in6_cksum_pseudo(ip6,
 1361                             sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP,
 1362                             0);
 1363                 }
 1364         }
 1365 #endif
 1366 #if defined(INET6) && defined(INET)
 1367         else
 1368 #endif
 1369 #ifdef INET
 1370         {
 1371                 if (tp->t_port) {
 1372                         m->m_pkthdr.csum_flags = CSUM_UDP;
 1373                         m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 1374                         udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
 1375                            ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
 1376                         th->th_sum = htons(0);
 1377                         UDPSTAT_INC(udps_opackets);
 1378                 } else {
 1379                         m->m_pkthdr.csum_flags = CSUM_TCP;
 1380                         m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 1381                         th->th_sum = in_pseudo(ip->ip_src.s_addr,
 1382                             ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
 1383                             IPPROTO_TCP + len + optlen));
 1384                 }
 1385 
 1386                 /* IP version must be set here for ipv4/ipv6 checking later */
 1387                 KASSERT(ip->ip_v == IPVERSION,
 1388                     ("%s: IP version incorrect: %d", __func__, ip->ip_v));
 1389         }
 1390 #endif
 1391 
 1392         /*
 1393          * Enable TSO and specify the size of the segments.
 1394          * The TCP pseudo header checksum is always provided.
 1395          */
 1396         if (tso) {
 1397                 KASSERT(len > tp->t_maxseg - optlen,
 1398                     ("%s: len <= tso_segsz", __func__));
 1399                 m->m_pkthdr.csum_flags |= CSUM_TSO;
 1400                 m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen;
 1401         }
 1402 
 1403         KASSERT(len + hdrlen == m_length(m, NULL),
 1404             ("%s: mbuf chain shorter than expected: %d + %u != %u",
 1405             __func__, len, hdrlen, m_length(m, NULL)));
 1406 
 1407 #ifdef TCP_HHOOK
 1408         /* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */
 1409         hhook_run_tcp_est_out(tp, th, &to, len, tso);
 1410 #endif
 1411 
 1412         TCP_PROBE3(debug__output, tp, th, m);
 1413 
 1414         /* We're getting ready to send; log now. */
 1415         /* XXXMT: We are not honoring verbose logging. */
 1416         if (tp->t_logstate != TCP_LOG_STATE_OFF)
 1417                 lgb = tcp_log_event_(tp, th, &so->so_rcv, &so->so_snd,
 1418                     TCP_LOG_OUT, ERRNO_UNK, len, NULL, false, NULL, NULL, 0,
 1419                     NULL);
 1420         else
 1421                 lgb = NULL;
 1422 
 1423         /*
 1424          * Fill in IP length and desired time to live and
 1425          * send to IP level.  There should be a better way
 1426          * to handle ttl and tos; we could keep them in
 1427          * the template, but need a way to checksum without them.
 1428          */
 1429         /*
 1430          * m->m_pkthdr.len should have been set before checksum calculation,
 1431          * because in6_cksum() need it.
 1432          */
 1433 #ifdef INET6
 1434         if (isipv6) {
 1435                 /*
 1436                  * we separately set hoplimit for every segment, since the
 1437                  * user might want to change the value via setsockopt.
 1438                  * Also, desired default hop limit might be changed via
 1439                  * Neighbor Discovery.
 1440                  */
 1441                 ip6->ip6_hlim = in6_selecthlim(inp, NULL);
 1442 
 1443                 /*
 1444                  * Set the packet size here for the benefit of DTrace probes.
 1445                  * ip6_output() will set it properly; it's supposed to include
 1446                  * the option header lengths as well.
 1447                  */
 1448                 ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
 1449 
 1450                 if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss)
 1451                         tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 1452                 else
 1453                         tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 1454 
 1455                 if (tp->t_state == TCPS_SYN_SENT)
 1456                         TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th);
 1457 
 1458                 TCP_PROBE5(send, NULL, tp, ip6, tp, th);
 1459 
 1460 #ifdef TCPPCAP
 1461                 /* Save packet, if requested. */
 1462                 tcp_pcap_add(th, m, &(tp->t_outpkts));
 1463 #endif
 1464 
 1465                 /* TODO: IPv6 IP6TOS_ECT bit on */
 1466                 error = ip6_output(m, inp->in6p_outputopts, &inp->inp_route6,
 1467                     ((so->so_options & SO_DONTROUTE) ?  IP_ROUTETOIF : 0),
 1468                     NULL, NULL, inp);
 1469 
 1470                 if (error == EMSGSIZE && inp->inp_route6.ro_nh != NULL)
 1471                         mtu = inp->inp_route6.ro_nh->nh_mtu;
 1472         }
 1473 #endif /* INET6 */
 1474 #if defined(INET) && defined(INET6)
 1475         else
 1476 #endif
 1477 #ifdef INET
 1478     {
 1479         ip->ip_len = htons(m->m_pkthdr.len);
 1480 #ifdef INET6
 1481         if (inp->inp_vflag & INP_IPV6PROTO)
 1482                 ip->ip_ttl = in6_selecthlim(inp, NULL);
 1483 #endif /* INET6 */
 1484         /*
 1485          * If we do path MTU discovery, then we set DF on every packet.
 1486          * This might not be the best thing to do according to RFC3390
 1487          * Section 2. However the tcp hostcache migitates the problem
 1488          * so it affects only the first tcp connection with a host.
 1489          *
 1490          * NB: Don't set DF on small MTU/MSS to have a safe fallback.
 1491          */
 1492         if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
 1493                 tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 1494                 if (tp->t_port == 0 || len < V_tcp_minmss) {
 1495                         ip->ip_off |= htons(IP_DF);
 1496                 }
 1497         } else {
 1498                 tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 1499         }
 1500 
 1501         if (tp->t_state == TCPS_SYN_SENT)
 1502                 TCP_PROBE5(connect__request, NULL, tp, ip, tp, th);
 1503 
 1504         TCP_PROBE5(send, NULL, tp, ip, tp, th);
 1505 
 1506 #ifdef TCPPCAP
 1507         /* Save packet, if requested. */
 1508         tcp_pcap_add(th, m, &(tp->t_outpkts));
 1509 #endif
 1510 
 1511         error = ip_output(m, inp->inp_options, &inp->inp_route,
 1512             ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, inp);
 1513 
 1514         if (error == EMSGSIZE && inp->inp_route.ro_nh != NULL)
 1515                 mtu = inp->inp_route.ro_nh->nh_mtu;
 1516     }
 1517 #endif /* INET */
 1518 
 1519         if (lgb != NULL) {
 1520                 lgb->tlb_errno = error;
 1521                 lgb = NULL;
 1522         }
 1523 out:
 1524         if (error == 0)
 1525                 tcp_account_for_send(tp, len, (tp->snd_nxt != tp->snd_max), 0, hw_tls);
 1526         /*
 1527          * In transmit state, time the transmission and arrange for
 1528          * the retransmit.  In persist state, just set snd_max.
 1529          */
 1530         if ((tp->t_flags & TF_FORCEDATA) == 0 ||
 1531             !tcp_timer_active(tp, TT_PERSIST)) {
 1532                 tcp_seq startseq = tp->snd_nxt;
 1533 
 1534                 /*
 1535                  * Advance snd_nxt over sequence space of this segment.
 1536                  */
 1537                 if (flags & (TH_SYN|TH_FIN)) {
 1538                         if (flags & TH_SYN)
 1539                                 tp->snd_nxt++;
 1540                         if (flags & TH_FIN) {
 1541                                 tp->snd_nxt++;
 1542                                 tp->t_flags |= TF_SENTFIN;
 1543                         }
 1544                 }
 1545                 if (sack_rxmit)
 1546                         goto timer;
 1547                 tp->snd_nxt += len;
 1548                 if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
 1549                         /*
 1550                          * Update "made progress" indication if we just
 1551                          * added new data to an empty socket buffer.
 1552                          */
 1553                         if (tp->snd_una == tp->snd_max)
 1554                                 tp->t_acktime = ticks;
 1555                         tp->snd_max = tp->snd_nxt;
 1556                         /*
 1557                          * Time this transmission if not a retransmission and
 1558                          * not currently timing anything.
 1559                          */
 1560                         tp->t_sndtime = ticks;
 1561                         if (tp->t_rtttime == 0) {
 1562                                 tp->t_rtttime = ticks;
 1563                                 tp->t_rtseq = startseq;
 1564                                 TCPSTAT_INC(tcps_segstimed);
 1565                         }
 1566 #ifdef STATS
 1567                         if (!(tp->t_flags & TF_GPUTINPROG) && len) {
 1568                                 tp->t_flags |= TF_GPUTINPROG;
 1569                                 tp->gput_seq = startseq;
 1570                                 tp->gput_ack = startseq +
 1571                                     ulmin(sbavail(&so->so_snd) - off, sendwin);
 1572                                 tp->gput_ts = tcp_ts_getticks();
 1573                         }
 1574 #endif /* STATS */
 1575                 }
 1576 
 1577                 /*
 1578                  * Set retransmit timer if not currently set,
 1579                  * and not doing a pure ack or a keep-alive probe.
 1580                  * Initial value for retransmit timer is smoothed
 1581                  * round-trip time + 2 * round-trip time variance.
 1582                  * Initialize shift counter which is used for backoff
 1583                  * of retransmit time.
 1584                  */
 1585 timer:
 1586                 if (!tcp_timer_active(tp, TT_REXMT) &&
 1587                     ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
 1588                      (tp->snd_nxt != tp->snd_una))) {
 1589                         if (tcp_timer_active(tp, TT_PERSIST)) {
 1590                                 tcp_timer_activate(tp, TT_PERSIST, 0);
 1591                                 tp->t_rxtshift = 0;
 1592                         }
 1593                         tcp_timer_activate(tp, TT_REXMT, TP_RXTCUR(tp));
 1594                 } else if (len == 0 && sbavail(&so->so_snd) &&
 1595                     !tcp_timer_active(tp, TT_REXMT) &&
 1596                     !tcp_timer_active(tp, TT_PERSIST)) {
 1597                         /*
 1598                          * Avoid a situation where we do not set persist timer
 1599                          * after a zero window condition. For example:
 1600                          * 1) A -> B: packet with enough data to fill the window
 1601                          * 2) B -> A: ACK for #1 + new data (0 window
 1602                          *    advertisement)
 1603                          * 3) A -> B: ACK for #2, 0 len packet
 1604                          *
 1605                          * In this case, A will not activate the persist timer,
 1606                          * because it chose to send a packet. Unless tcp_output
 1607                          * is called for some other reason (delayed ack timer,
 1608                          * another input packet from B, socket syscall), A will
 1609                          * not send zero window probes.
 1610                          *
 1611                          * So, if you send a 0-length packet, but there is data
 1612                          * in the socket buffer, and neither the rexmt or
 1613                          * persist timer is already set, then activate the
 1614                          * persist timer.
 1615                          */
 1616                         tp->t_rxtshift = 0;
 1617                         tcp_setpersist(tp);
 1618                 }
 1619         } else {
 1620                 /*
 1621                  * Persist case, update snd_max but since we are in
 1622                  * persist mode (no window) we do not update snd_nxt.
 1623                  */
 1624                 int xlen = len;
 1625                 if (flags & TH_SYN)
 1626                         ++xlen;
 1627                 if (flags & TH_FIN) {
 1628                         ++xlen;
 1629                         tp->t_flags |= TF_SENTFIN;
 1630                 }
 1631                 if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
 1632                         tp->snd_max = tp->snd_nxt + xlen;
 1633         }
 1634         if ((error == 0) &&
 1635             (TCPS_HAVEESTABLISHED(tp->t_state) &&
 1636              (tp->t_flags & TF_SACK_PERMIT) &&
 1637              tp->rcv_numsacks > 0)) {
 1638                     /* Clean up any DSACK's sent */
 1639                     tcp_clean_dsack_blocks(tp);
 1640         }
 1641         if (error) {
 1642                 /*
 1643                  * We know that the packet was lost, so back out the
 1644                  * sequence number advance, if any.
 1645                  *
 1646                  * If the error is EPERM the packet got blocked by the
 1647                  * local firewall.  Normally we should terminate the
 1648                  * connection but the blocking may have been spurious
 1649                  * due to a firewall reconfiguration cycle.  So we treat
 1650                  * it like a packet loss and let the retransmit timer and
 1651                  * timeouts do their work over time.
 1652                  * XXX: It is a POLA question whether calling tcp_drop right
 1653                  * away would be the really correct behavior instead.
 1654                  */
 1655                 if (((tp->t_flags & TF_FORCEDATA) == 0 ||
 1656                     !tcp_timer_active(tp, TT_PERSIST)) &&
 1657                     ((flags & TH_SYN) == 0) &&
 1658                     (error != EPERM)) {
 1659                         if (sack_rxmit) {
 1660                                 p->rxmit -= len;
 1661                                 tp->sackhint.sack_bytes_rexmit -= len;
 1662                                 KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
 1663                                     ("sackhint bytes rtx >= 0"));
 1664                                 KASSERT((flags & TH_FIN) == 0,
 1665                                     ("error while FIN with SACK rxmit"));
 1666                         } else {
 1667                                 tp->snd_nxt -= len;
 1668                                 if (flags & TH_FIN)
 1669                                         tp->snd_nxt--;
 1670                         }
 1671                 }
 1672                 SOCKBUF_UNLOCK_ASSERT(&so->so_snd);     /* Check gotos. */
 1673                 switch (error) {
 1674                 case EACCES:
 1675                 case EPERM:
 1676                         tp->t_softerror = error;
 1677                         return (error);
 1678                 case ENOBUFS:
 1679                         TCP_XMIT_TIMER_ASSERT(tp, len, flags);
 1680                         tp->snd_cwnd = tp->t_maxseg;
 1681                         return (0);
 1682                 case EMSGSIZE:
 1683                         /*
 1684                          * For some reason the interface we used initially
 1685                          * to send segments changed to another or lowered
 1686                          * its MTU.
 1687                          * If TSO was active we either got an interface
 1688                          * without TSO capabilits or TSO was turned off.
 1689                          * If we obtained mtu from ip_output() then update
 1690                          * it and try again.
 1691                          */
 1692                         if (tso)
 1693                                 tp->t_flags &= ~TF_TSO;
 1694                         if (mtu != 0) {
 1695                                 tcp_mss_update(tp, -1, mtu, NULL, NULL);
 1696                                 goto again;
 1697                         }
 1698                         return (error);
 1699                 case EHOSTDOWN:
 1700                 case EHOSTUNREACH:
 1701                 case ENETDOWN:
 1702                 case ENETUNREACH:
 1703                         if (TCPS_HAVERCVDSYN(tp->t_state)) {
 1704                                 tp->t_softerror = error;
 1705                                 return (0);
 1706                         }
 1707                         /* FALLTHROUGH */
 1708                 default:
 1709                         return (error);
 1710                 }
 1711         }
 1712         TCPSTAT_INC(tcps_sndtotal);
 1713 
 1714         /*
 1715          * Data sent (as far as we can tell).
 1716          * If this advertises a larger window than any other segment,
 1717          * then remember the size of the advertised window.
 1718          * Any pending ACK has now been sent.
 1719          */
 1720         if (SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
 1721                 tp->rcv_adv = tp->rcv_nxt + recwin;
 1722         tp->last_ack_sent = tp->rcv_nxt;
 1723         tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
 1724         if (tcp_timer_active(tp, TT_DELACK))
 1725                 tcp_timer_activate(tp, TT_DELACK, 0);
 1726 #if 0
 1727         /*
 1728          * This completely breaks TCP if newreno is turned on.  What happens
 1729          * is that if delayed-acks are turned on on the receiver, this code
 1730          * on the transmitter effectively destroys the TCP window, forcing
 1731          * it to four packets (1.5Kx4 = 6K window).
 1732          */
 1733         if (sendalot && --maxburst)
 1734                 goto again;
 1735 #endif
 1736         if (sendalot)
 1737                 goto again;
 1738         return (0);
 1739 }
 1740 
 1741 void
 1742 tcp_setpersist(struct tcpcb *tp)
 1743 {
 1744         int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
 1745         int tt;
 1746         int maxunacktime;
 1747 
 1748         tp->t_flags &= ~TF_PREVVALID;
 1749         if (tcp_timer_active(tp, TT_REXMT))
 1750                 panic("tcp_setpersist: retransmit pending");
 1751         /*
 1752          * If the state is already closed, don't bother.
 1753          */
 1754         if (tp->t_state == TCPS_CLOSED)
 1755                 return;
 1756 
 1757         /*
 1758          * Start/restart persistence timer.
 1759          */
 1760         TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
 1761                       tcp_persmin, tcp_persmax);
 1762         if (TP_MAXUNACKTIME(tp) && tp->t_acktime) {
 1763                 maxunacktime = tp->t_acktime + TP_MAXUNACKTIME(tp) - ticks;
 1764                 if (maxunacktime < 1)
 1765                         maxunacktime = 1;
 1766                 if (maxunacktime < tt)
 1767                         tt = maxunacktime;
 1768         }
 1769         tcp_timer_activate(tp, TT_PERSIST, tt);
 1770         if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
 1771                 tp->t_rxtshift++;
 1772 }
 1773 
 1774 /*
 1775  * Insert TCP options according to the supplied parameters to the place
 1776  * optp in a consistent way.  Can handle unaligned destinations.
 1777  *
 1778  * The order of the option processing is crucial for optimal packing and
 1779  * alignment for the scarce option space.
 1780  *
 1781  * The optimal order for a SYN/SYN-ACK segment is:
 1782  *   MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) +
 1783  *   Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40.
 1784  *
 1785  * The SACK options should be last.  SACK blocks consume 8*n+2 bytes.
 1786  * So a full size SACK blocks option is 34 bytes (with 4 SACK blocks).
 1787  * At minimum we need 10 bytes (to generate 1 SACK block).  If both
 1788  * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present,
 1789  * we only have 10 bytes for SACK options (40 - (12 + 18)).
 1790  */
 1791 int
 1792 tcp_addoptions(struct tcpopt *to, u_char *optp)
 1793 {
 1794         u_int32_t mask, optlen = 0;
 1795 
 1796         for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) {
 1797                 if ((to->to_flags & mask) != mask)
 1798                         continue;
 1799                 if (optlen == TCP_MAXOLEN)
 1800                         break;
 1801                 switch (to->to_flags & mask) {
 1802                 case TOF_MSS:
 1803                         while (optlen % 4) {
 1804                                 optlen += TCPOLEN_NOP;
 1805                                 *optp++ = TCPOPT_NOP;
 1806                         }
 1807                         if (TCP_MAXOLEN - optlen < TCPOLEN_MAXSEG)
 1808                                 continue;
 1809                         optlen += TCPOLEN_MAXSEG;
 1810                         *optp++ = TCPOPT_MAXSEG;
 1811                         *optp++ = TCPOLEN_MAXSEG;
 1812                         to->to_mss = htons(to->to_mss);
 1813                         bcopy((u_char *)&to->to_mss, optp, sizeof(to->to_mss));
 1814                         optp += sizeof(to->to_mss);
 1815                         break;
 1816                 case TOF_SCALE:
 1817                         while (!optlen || optlen % 2 != 1) {
 1818                                 optlen += TCPOLEN_NOP;
 1819                                 *optp++ = TCPOPT_NOP;
 1820                         }
 1821                         if (TCP_MAXOLEN - optlen < TCPOLEN_WINDOW)
 1822                                 continue;
 1823                         optlen += TCPOLEN_WINDOW;
 1824                         *optp++ = TCPOPT_WINDOW;
 1825                         *optp++ = TCPOLEN_WINDOW;
 1826                         *optp++ = to->to_wscale;
 1827                         break;
 1828                 case TOF_SACKPERM:
 1829                         while (optlen % 2) {
 1830                                 optlen += TCPOLEN_NOP;
 1831                                 *optp++ = TCPOPT_NOP;
 1832                         }
 1833                         if (TCP_MAXOLEN - optlen < TCPOLEN_SACK_PERMITTED)
 1834                                 continue;
 1835                         optlen += TCPOLEN_SACK_PERMITTED;
 1836                         *optp++ = TCPOPT_SACK_PERMITTED;
 1837                         *optp++ = TCPOLEN_SACK_PERMITTED;
 1838                         break;
 1839                 case TOF_TS:
 1840                         while (!optlen || optlen % 4 != 2) {
 1841                                 optlen += TCPOLEN_NOP;
 1842                                 *optp++ = TCPOPT_NOP;
 1843                         }
 1844                         if (TCP_MAXOLEN - optlen < TCPOLEN_TIMESTAMP)
 1845                                 continue;
 1846                         optlen += TCPOLEN_TIMESTAMP;
 1847                         *optp++ = TCPOPT_TIMESTAMP;
 1848                         *optp++ = TCPOLEN_TIMESTAMP;
 1849                         to->to_tsval = htonl(to->to_tsval);
 1850                         to->to_tsecr = htonl(to->to_tsecr);
 1851                         bcopy((u_char *)&to->to_tsval, optp, sizeof(to->to_tsval));
 1852                         optp += sizeof(to->to_tsval);
 1853                         bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
 1854                         optp += sizeof(to->to_tsecr);
 1855                         break;
 1856                 case TOF_SIGNATURE:
 1857                         {
 1858                         int siglen = TCPOLEN_SIGNATURE - 2;
 1859 
 1860                         while (!optlen || optlen % 4 != 2) {
 1861                                 optlen += TCPOLEN_NOP;
 1862                                 *optp++ = TCPOPT_NOP;
 1863                         }
 1864                         if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE) {
 1865                                 to->to_flags &= ~TOF_SIGNATURE;
 1866                                 continue;
 1867                         }
 1868                         optlen += TCPOLEN_SIGNATURE;
 1869                         *optp++ = TCPOPT_SIGNATURE;
 1870                         *optp++ = TCPOLEN_SIGNATURE;
 1871                         to->to_signature = optp;
 1872                         while (siglen--)
 1873                                  *optp++ = 0;
 1874                         break;
 1875                         }
 1876                 case TOF_SACK:
 1877                         {
 1878                         int sackblks = 0;
 1879                         struct sackblk *sack = (struct sackblk *)to->to_sacks;
 1880                         tcp_seq sack_seq;
 1881 
 1882                         while (!optlen || optlen % 4 != 2) {
 1883                                 optlen += TCPOLEN_NOP;
 1884                                 *optp++ = TCPOPT_NOP;
 1885                         }
 1886                         if (TCP_MAXOLEN - optlen < TCPOLEN_SACKHDR + TCPOLEN_SACK)
 1887                                 continue;
 1888                         optlen += TCPOLEN_SACKHDR;
 1889                         *optp++ = TCPOPT_SACK;
 1890                         sackblks = min(to->to_nsacks,
 1891                                         (TCP_MAXOLEN - optlen) / TCPOLEN_SACK);
 1892                         *optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK;
 1893                         while (sackblks--) {
 1894                                 sack_seq = htonl(sack->start);
 1895                                 bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
 1896                                 optp += sizeof(sack_seq);
 1897                                 sack_seq = htonl(sack->end);
 1898                                 bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
 1899                                 optp += sizeof(sack_seq);
 1900                                 optlen += TCPOLEN_SACK;
 1901                                 sack++;
 1902                         }
 1903                         TCPSTAT_INC(tcps_sack_send_blocks);
 1904                         break;
 1905                         }
 1906                 case TOF_FASTOPEN:
 1907                         {
 1908                         int total_len;
 1909 
 1910                         /* XXX is there any point to aligning this option? */
 1911                         total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
 1912                         if (TCP_MAXOLEN - optlen < total_len) {
 1913                                 to->to_flags &= ~TOF_FASTOPEN;
 1914                                 continue;
 1915                         }
 1916                         *optp++ = TCPOPT_FAST_OPEN;
 1917                         *optp++ = total_len;
 1918                         if (to->to_tfo_len > 0) {
 1919                                 bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
 1920                                 optp += to->to_tfo_len;
 1921                         }
 1922                         optlen += total_len;
 1923                         break;
 1924                         }
 1925                 default:
 1926                         panic("%s: unknown TCP option type", __func__);
 1927                         break;
 1928                 }
 1929         }
 1930 
 1931         /* Terminate and pad TCP options to a 4 byte boundary. */
 1932         if (optlen % 4) {
 1933                 optlen += TCPOLEN_EOL;
 1934                 *optp++ = TCPOPT_EOL;
 1935         }
 1936         /*
 1937          * According to RFC 793 (STD0007):
 1938          *   "The content of the header beyond the End-of-Option option
 1939          *    must be header padding (i.e., zero)."
 1940          *   and later: "The padding is composed of zeros."
 1941          */
 1942         while (optlen % 4) {
 1943                 optlen += TCPOLEN_PAD;
 1944                 *optp++ = TCPOPT_PAD;
 1945         }
 1946 
 1947         KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__));
 1948         return (optlen);
 1949 }
 1950 
 1951 /*
 1952  * This is a copy of m_copym(), taking the TSO segment size/limit
 1953  * constraints into account, and advancing the sndptr as it goes.
 1954  */
 1955 struct mbuf *
 1956 tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
 1957     int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls)
 1958 {
 1959 #ifdef KERN_TLS
 1960         struct ktls_session *tls, *ntls;
 1961         struct mbuf *start __diagused;
 1962 #endif
 1963         struct mbuf *n, **np;
 1964         struct mbuf *top;
 1965         int32_t off = off0;
 1966         int32_t len = *plen;
 1967         int32_t fragsize;
 1968         int32_t len_cp = 0;
 1969         int32_t *pkthdrlen;
 1970         uint32_t mlen, frags;
 1971         bool copyhdr;
 1972 
 1973         KASSERT(off >= 0, ("tcp_m_copym, negative off %d", off));
 1974         KASSERT(len >= 0, ("tcp_m_copym, negative len %d", len));
 1975         if (off == 0 && m->m_flags & M_PKTHDR)
 1976                 copyhdr = true;
 1977         else
 1978                 copyhdr = false;
 1979         while (off > 0) {
 1980                 KASSERT(m != NULL, ("tcp_m_copym, offset > size of mbuf chain"));
 1981                 if (off < m->m_len)
 1982                         break;
 1983                 off -= m->m_len;
 1984                 if ((sb) && (m == sb->sb_sndptr)) {
 1985                         sb->sb_sndptroff += m->m_len;
 1986                         sb->sb_sndptr = m->m_next;
 1987                 }
 1988                 m = m->m_next;
 1989         }
 1990         np = &top;
 1991         top = NULL;
 1992         pkthdrlen = NULL;
 1993 #ifdef KERN_TLS
 1994         if (hw_tls && (m->m_flags & M_EXTPG))
 1995                 tls = m->m_epg_tls;
 1996         else
 1997                 tls = NULL;
 1998         start = m;
 1999 #endif
 2000         while (len > 0) {
 2001                 if (m == NULL) {
 2002                         KASSERT(len == M_COPYALL,
 2003                             ("tcp_m_copym, length > size of mbuf chain"));
 2004                         *plen = len_cp;
 2005                         if (pkthdrlen != NULL)
 2006                                 *pkthdrlen = len_cp;
 2007                         break;
 2008                 }
 2009 #ifdef KERN_TLS
 2010                 if (hw_tls) {
 2011                         if (m->m_flags & M_EXTPG)
 2012                                 ntls = m->m_epg_tls;
 2013                         else
 2014                                 ntls = NULL;
 2015 
 2016                         /*
 2017                          * Avoid mixing TLS records with handshake
 2018                          * data or TLS records from different
 2019                          * sessions.
 2020                          */
 2021                         if (tls != ntls) {
 2022                                 MPASS(m != start);
 2023                                 *plen = len_cp;
 2024                                 if (pkthdrlen != NULL)
 2025                                         *pkthdrlen = len_cp;
 2026                                 break;
 2027                         }
 2028                 }
 2029 #endif
 2030                 mlen = min(len, m->m_len - off);
 2031                 if (seglimit) {
 2032                         /*
 2033                          * For M_EXTPG mbufs, add 3 segments
 2034                          * + 1 in case we are crossing page boundaries
 2035                          * + 2 in case the TLS hdr/trailer are used
 2036                          * It is cheaper to just add the segments
 2037                          * than it is to take the cache miss to look
 2038                          * at the mbuf ext_pgs state in detail.
 2039                          */
 2040                         if (m->m_flags & M_EXTPG) {
 2041                                 fragsize = min(segsize, PAGE_SIZE);
 2042                                 frags = 3;
 2043                         } else {
 2044                                 fragsize = segsize;
 2045                                 frags = 0;
 2046                         }
 2047 
 2048                         /* Break if we really can't fit anymore. */
 2049                         if ((frags + 1) >= seglimit) {
 2050                                 *plen = len_cp;
 2051                                 if (pkthdrlen != NULL)
 2052                                         *pkthdrlen = len_cp;
 2053                                 break;
 2054                         }
 2055 
 2056                         /*
 2057                          * Reduce size if you can't copy the whole
 2058                          * mbuf. If we can't copy the whole mbuf, also
 2059                          * adjust len so the loop will end after this
 2060                          * mbuf.
 2061                          */
 2062                         if ((frags + howmany(mlen, fragsize)) >= seglimit) {
 2063                                 mlen = (seglimit - frags - 1) * fragsize;
 2064                                 len = mlen;
 2065                                 *plen = len_cp + len;
 2066                                 if (pkthdrlen != NULL)
 2067                                         *pkthdrlen = *plen;
 2068                         }
 2069                         frags += howmany(mlen, fragsize);
 2070                         if (frags == 0)
 2071                                 frags++;
 2072                         seglimit -= frags;
 2073                         KASSERT(seglimit > 0,
 2074                             ("%s: seglimit went too low", __func__));
 2075                 }
 2076                 if (copyhdr)
 2077                         n = m_gethdr(M_NOWAIT, m->m_type);
 2078                 else
 2079                         n = m_get(M_NOWAIT, m->m_type);
 2080                 *np = n;
 2081                 if (n == NULL)
 2082                         goto nospace;
 2083                 if (copyhdr) {
 2084                         if (!m_dup_pkthdr(n, m, M_NOWAIT))
 2085                                 goto nospace;
 2086                         if (len == M_COPYALL)
 2087                                 n->m_pkthdr.len -= off0;
 2088                         else
 2089                                 n->m_pkthdr.len = len;
 2090                         pkthdrlen = &n->m_pkthdr.len;
 2091                         copyhdr = false;
 2092                 }
 2093                 n->m_len = mlen;
 2094                 len_cp += n->m_len;
 2095                 if (m->m_flags & (M_EXT|M_EXTPG)) {
 2096                         n->m_data = m->m_data + off;
 2097                         mb_dupcl(n, m);
 2098                 } else
 2099                         bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
 2100                             (u_int)n->m_len);
 2101 
 2102                 if (sb && (sb->sb_sndptr == m) &&
 2103                     ((n->m_len + off) >= m->m_len) && m->m_next) {
 2104                         sb->sb_sndptroff += m->m_len;
 2105                         sb->sb_sndptr = m->m_next;
 2106                 }
 2107                 off = 0;
 2108                 if (len != M_COPYALL) {
 2109                         len -= n->m_len;
 2110                 }
 2111                 m = m->m_next;
 2112                 np = &n->m_next;
 2113         }
 2114         return (top);
 2115 nospace:
 2116         m_freem(top);
 2117         return (NULL);
 2118 }
 2119 
 2120 void
 2121 tcp_sndbuf_autoscale(struct tcpcb *tp, struct socket *so, uint32_t sendwin)
 2122 {
 2123 
 2124         /*
 2125          * Automatic sizing of send socket buffer.  Often the send buffer
 2126          * size is not optimally adjusted to the actual network conditions
 2127          * at hand (delay bandwidth product).  Setting the buffer size too
 2128          * small limits throughput on links with high bandwidth and high
 2129          * delay (eg. trans-continental/oceanic links).  Setting the
 2130          * buffer size too big consumes too much real kernel memory,
 2131          * especially with many connections on busy servers.
 2132          *
 2133          * The criteria to step up the send buffer one notch are:
 2134          *  1. receive window of remote host is larger than send buffer
 2135          *     (with a fudge factor of 5/4th);
 2136          *  2. send buffer is filled to 7/8th with data (so we actually
 2137          *     have data to make use of it);
 2138          *  3. send buffer fill has not hit maximal automatic size;
 2139          *  4. our send window (slow start and cogestion controlled) is
 2140          *     larger than sent but unacknowledged data in send buffer.
 2141          *
 2142          * The remote host receive window scaling factor may limit the
 2143          * growing of the send buffer before it reaches its allowed
 2144          * maximum.
 2145          *
 2146          * It scales directly with slow start or congestion window
 2147          * and does at most one step per received ACK.  This fast
 2148          * scaling has the drawback of growing the send buffer beyond
 2149          * what is strictly necessary to make full use of a given
 2150          * delay*bandwidth product.  However testing has shown this not
 2151          * to be much of an problem.  At worst we are trading wasting
 2152          * of available bandwidth (the non-use of it) for wasting some
 2153          * socket buffer memory.
 2154          *
 2155          * TODO: Shrink send buffer during idle periods together
 2156          * with congestion window.  Requires another timer.  Has to
 2157          * wait for upcoming tcp timer rewrite.
 2158          *
 2159          * XXXGL: should there be used sbused() or sbavail()?
 2160          */
 2161         if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
 2162                 int lowat;
 2163 
 2164                 lowat = V_tcp_sendbuf_auto_lowat ? so->so_snd.sb_lowat : 0;
 2165                 if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat - lowat &&
 2166                     sbused(&so->so_snd) >=
 2167                     (so->so_snd.sb_hiwat / 8 * 7) - lowat &&
 2168                     sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
 2169                     sendwin >= (sbused(&so->so_snd) -
 2170                     (tp->snd_nxt - tp->snd_una))) {
 2171                         if (!sbreserve_locked(so, SO_SND,
 2172                             min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc,
 2173                              V_tcp_autosndbuf_max), curthread))
 2174                                 so->so_snd.sb_flags &= ~SB_AUTOSIZE;
 2175                 }
 2176         }
 2177 }

Cache object: 05e92f467c7773b15713b474cca9a900


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.