The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_subr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  * 3. All advertising materials mentioning features or use of this software
   14  *    must display the following acknowledgement:
   15  *      This product includes software developed by the University of
   16  *      California, Berkeley and its contributors.
   17  * 4. Neither the name of the University nor the names of its contributors
   18  *    may be used to endorse or promote products derived from this software
   19  *    without specific prior written permission.
   20  *
   21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   31  * SUCH DAMAGE.
   32  *
   33  *      @(#)tcp_subr.c  8.2 (Berkeley) 5/24/95
   34  * $FreeBSD$
   35  */
   36 
   37 #include "opt_compat.h"
   38 #include "opt_tcpdebug.h"
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/kernel.h>
   43 #include <sys/sysctl.h>
   44 #include <sys/malloc.h>
   45 #include <sys/mbuf.h>
   46 #include <sys/proc.h>
   47 #include <sys/socket.h>
   48 #include <sys/socketvar.h>
   49 #include <sys/protosw.h>
   50 #include <vm/vm_zone.h>
   51 
   52 #include <i386/include/random.h>
   53 
   54 #include <net/route.h>
   55 #include <net/if.h>
   56 
   57 #define _IP_VHL
   58 #include <netinet/in.h>
   59 #include <netinet/in_systm.h>
   60 #include <netinet/ip.h>
   61 #include <netinet/in_pcb.h>
   62 #include <netinet/in_var.h>
   63 #include <netinet/ip_var.h>
   64 #include <netinet/tcp.h>
   65 #include <netinet/tcp_fsm.h>
   66 #include <netinet/tcp_seq.h>
   67 #include <netinet/tcp_timer.h>
   68 #include <netinet/tcp_var.h>
   69 #include <netinet/tcpip.h>
   70 #ifdef TCPDEBUG
   71 #include <netinet/tcp_debug.h>
   72 #endif
   73 
   74 int     tcp_mssdflt = TCP_MSS;
   75 SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
   76         CTLFLAG_RW, &tcp_mssdflt , 0, "");
   77 
   78 static int      tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
   79 SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt,
   80         CTLFLAG_RW, &tcp_rttdflt , 0, "");
   81 
   82 static int      tcp_do_rfc1323 = 1;
   83 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323,
   84         CTLFLAG_RW, &tcp_do_rfc1323 , 0, "");
   85 
   86 static int      tcp_do_rfc1644 = 0;
   87 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644,
   88         CTLFLAG_RW, &tcp_do_rfc1644 , 0, "");
   89 
   90 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, &tcbinfo.ipi_count,
   91            0, "Number of active PCBs");
   92 
   93 static void     tcp_cleartaocache __P((void));
   94 static void     tcp_notify __P((struct inpcb *, int));
   95 
   96 /*
   97  * Target size of TCP PCB hash tables. Must be a power of two.
   98  *
   99  * Note that this can be overridden by the kernel environment
  100  * variable net.inet.tcp.tcbhashsize
  101  */
  102 #ifndef TCBHASHSIZE
  103 #define TCBHASHSIZE     512
  104 #endif
  105 
  106 /*
  107  * This is the actual shape of what we allocate using the zone
  108  * allocator.  Doing it this way allows us to protect both structures
  109  * using the same generation count, and also eliminates the overhead
  110  * of allocating tcpcbs separately.  By hiding the structure here,
  111  * we avoid changing most of the rest of the code (although it needs
  112  * to be changed, eventually, for greater efficiency).
  113  */
  114 #define ALIGNMENT       32
  115 #define ALIGNM1         (ALIGNMENT - 1)
  116 struct  inp_tp {
  117         union {
  118                 struct  inpcb inp;
  119                 char    align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
  120         } inp_tp_u;
  121         struct  tcpcb tcb;
  122 };
  123 #undef ALIGNMENT
  124 #undef ALIGNM1
  125 
  126 /*
  127  * Tcp initialization
  128  */
  129 void
  130 tcp_init()
  131 {
  132         int hashsize;
  133         
  134 #ifdef TCP_COMPAT_42
  135         tcp_iss = 1;            /* wrong */
  136 #endif /* TCP_COMPAT_42 */
  137         tcp_ccgen = 1;
  138         tcp_cleartaocache();
  139         LIST_INIT(&tcb);
  140         tcbinfo.listhead = &tcb;
  141         if (!(getenv_int("net.inet.tcp.tcbhashsize", &hashsize)))
  142                 hashsize = TCBHASHSIZE;
  143         if (!powerof2(hashsize)) {
  144                 printf("WARNING: TCB hash size not a power of 2\n");
  145                 hashsize = 512; /* safe default */
  146         }
  147         tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
  148         tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
  149                                         &tcbinfo.porthashmask);
  150         tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets,
  151                                  ZONE_INTERRUPT, 0);
  152         if (max_protohdr < sizeof(struct tcpiphdr))
  153                 max_protohdr = sizeof(struct tcpiphdr);
  154         if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
  155                 panic("tcp_init");
  156 }
  157 
  158 /*
  159  * Create template to be used to send tcp packets on a connection.
  160  * Call after host entry created, allocates an mbuf and fills
  161  * in a skeletal tcp/ip header, minimizing the amount of work
  162  * necessary when the connection is used.
  163  */
  164 struct tcpiphdr *
  165 tcp_template(tp)
  166         struct tcpcb *tp;
  167 {
  168         register struct inpcb *inp = tp->t_inpcb;
  169         register struct mbuf *m;
  170         register struct tcpiphdr *n;
  171 
  172         if ((n = tp->t_template) == 0) {
  173                 m = m_get(M_DONTWAIT, MT_HEADER);
  174                 if (m == NULL)
  175                         return (0);
  176                 m->m_len = sizeof (struct tcpiphdr);
  177                 n = mtod(m, struct tcpiphdr *);
  178         }
  179         bzero(n->ti_x1, sizeof(n->ti_x1));
  180         n->ti_pr = IPPROTO_TCP;
  181         n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
  182         n->ti_src = inp->inp_laddr;
  183         n->ti_dst = inp->inp_faddr;
  184         n->ti_sport = inp->inp_lport;
  185         n->ti_dport = inp->inp_fport;
  186         n->ti_seq = 0;
  187         n->ti_ack = 0;
  188         n->ti_x2 = 0;
  189         n->ti_off = 5;
  190         n->ti_flags = 0;
  191         n->ti_win = 0;
  192         n->ti_sum = 0;
  193         n->ti_urp = 0;
  194         return (n);
  195 }
  196 
  197 /*
  198  * Send a single message to the TCP at address specified by
  199  * the given TCP/IP header.  If m == 0, then we make a copy
  200  * of the tcpiphdr at ti and send directly to the addressed host.
  201  * This is used to force keep alive messages out using the TCP
  202  * template for a connection tp->t_template.  If flags are given
  203  * then we send a message back to the TCP which originated the
  204  * segment ti, and discard the mbuf containing it and any other
  205  * attached mbufs.
  206  *
  207  * In any case the ack and sequence number of the transmitted
  208  * segment are as specified by the parameters.
  209  *
  210  * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
  211  */
  212 void
  213 tcp_respond(tp, ti, m, ack, seq, flags)
  214         struct tcpcb *tp;
  215         register struct tcpiphdr *ti;
  216         register struct mbuf *m;
  217         tcp_seq ack, seq;
  218         int flags;
  219 {
  220         register int tlen;
  221         int win = 0;
  222         struct route *ro = 0;
  223         struct route sro;
  224 
  225         if (tp) {
  226                 if (!(flags & TH_RST)) {
  227                         win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
  228                         if (win > (long)TCP_MAXWIN << tp->rcv_scale)
  229                                 win = (long)TCP_MAXWIN << tp->rcv_scale;
  230                 }
  231                 ro = &tp->t_inpcb->inp_route;
  232         } else {
  233                 ro = &sro;
  234                 bzero(ro, sizeof *ro);
  235         }
  236         if (m == 0) {
  237                 m = m_gethdr(M_DONTWAIT, MT_HEADER);
  238                 if (m == NULL)
  239                         return;
  240 #ifdef TCP_COMPAT_42
  241                 tlen = 1;
  242 #else
  243                 tlen = 0;
  244 #endif
  245                 m->m_data += max_linkhdr;
  246                 *mtod(m, struct tcpiphdr *) = *ti;
  247                 ti = mtod(m, struct tcpiphdr *);
  248                 flags = TH_ACK;
  249         } else {
  250                 m_freem(m->m_next);
  251                 m->m_next = 0;
  252                 m->m_data = (caddr_t)ti;
  253                 m->m_len = sizeof (struct tcpiphdr);
  254                 tlen = 0;
  255 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
  256                 xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, n_long);
  257                 xchg(ti->ti_dport, ti->ti_sport, n_short);
  258 #undef xchg
  259         }
  260         ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
  261         tlen += sizeof (struct tcpiphdr);
  262         m->m_len = tlen;
  263         m->m_pkthdr.len = tlen;
  264         m->m_pkthdr.rcvif = (struct ifnet *) 0;
  265         bzero(ti->ti_x1, sizeof(ti->ti_x1));
  266         ti->ti_seq = htonl(seq);
  267         ti->ti_ack = htonl(ack);
  268         ti->ti_x2 = 0;
  269         ti->ti_off = sizeof (struct tcphdr) >> 2;
  270         ti->ti_flags = flags;
  271         if (tp)
  272                 ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
  273         else
  274                 ti->ti_win = htons((u_short)win);
  275         ti->ti_urp = 0;
  276         ti->ti_sum = 0;
  277         ti->ti_sum = in_cksum(m, tlen);
  278         ((struct ip *)ti)->ip_len = tlen;
  279         ((struct ip *)ti)->ip_ttl = ip_defttl;
  280 #ifdef TCPDEBUG
  281         if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
  282                 tcp_trace(TA_OUTPUT, 0, tp, ti, 0);
  283 #endif
  284         (void) ip_output(m, NULL, ro, 0, NULL);
  285         if (ro == &sro && ro->ro_rt) {
  286                 RTFREE(ro->ro_rt);
  287         }
  288 }
  289 
  290 /*
  291  * Create a new TCP control block, making an
  292  * empty reassembly queue and hooking it to the argument
  293  * protocol control block.  The `inp' parameter must have
  294  * come from the zone allocator set up in tcp_init().
  295  */
  296 struct tcpcb *
  297 tcp_newtcpcb(inp)
  298         struct inpcb *inp;
  299 {
  300         struct inp_tp *it;
  301         register struct tcpcb *tp;
  302 
  303         it = (struct inp_tp *)inp;
  304         tp = &it->tcb;
  305         bzero((char *) tp, sizeof(struct tcpcb));
  306         tp->t_segq = NULL;
  307         tp->t_maxseg = tp->t_maxopd = tcp_mssdflt;
  308 
  309         if (tcp_do_rfc1323)
  310                 tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
  311         if (tcp_do_rfc1644)
  312                 tp->t_flags |= TF_REQ_CC;
  313         tp->t_inpcb = inp;      /* XXX */
  314         /*
  315          * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
  316          * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
  317          * reasonable initial retransmit time.
  318          */
  319         tp->t_srtt = TCPTV_SRTTBASE;
  320         tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
  321         tp->t_rttmin = TCPTV_MIN;
  322         tp->t_rxtcur = TCPTV_RTOBASE;
  323         tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
  324         tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
  325         inp->inp_ip_ttl = ip_defttl;
  326         inp->inp_ppcb = (caddr_t)tp;
  327         return (tp);            /* XXX */
  328 }
  329 
  330 /*
  331  * Drop a TCP connection, reporting
  332  * the specified error.  If connection is synchronized,
  333  * then send a RST to peer.
  334  */
  335 struct tcpcb *
  336 tcp_drop(tp, errno)
  337         register struct tcpcb *tp;
  338         int errno;
  339 {
  340         struct socket *so = tp->t_inpcb->inp_socket;
  341 
  342         if (TCPS_HAVERCVDSYN(tp->t_state)) {
  343                 tp->t_state = TCPS_CLOSED;
  344                 (void) tcp_output(tp);
  345                 tcpstat.tcps_drops++;
  346         } else
  347                 tcpstat.tcps_conndrops++;
  348         if (errno == ETIMEDOUT && tp->t_softerror)
  349                 errno = tp->t_softerror;
  350         so->so_error = errno;
  351         return (tcp_close(tp));
  352 }
  353 
  354 /*
  355  * Close a TCP control block:
  356  *      discard all space held by the tcp
  357  *      discard internet protocol block
  358  *      wake up any sleepers
  359  */
  360 struct tcpcb *
  361 tcp_close(tp)
  362         register struct tcpcb *tp;
  363 {
  364         register struct mbuf *q;
  365         register struct mbuf *nq;
  366         struct inpcb *inp = tp->t_inpcb;
  367         struct socket *so = inp->inp_socket;
  368         register struct rtentry *rt;
  369         int dosavessthresh;
  370 
  371         /*
  372          * If we got enough samples through the srtt filter,
  373          * save the rtt and rttvar in the routing entry.
  374          * 'Enough' is arbitrarily defined as the 16 samples.
  375          * 16 samples is enough for the srtt filter to converge
  376          * to within 5% of the correct value; fewer samples and
  377          * we could save a very bogus rtt.
  378          *
  379          * Don't update the default route's characteristics and don't
  380          * update anything that the user "locked".
  381          */
  382         if (tp->t_rttupdated >= 16 &&
  383             (rt = inp->inp_route.ro_rt) &&
  384             ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
  385                 register u_long i = 0;
  386 
  387                 if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
  388                         i = tp->t_srtt *
  389                             (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
  390                         if (rt->rt_rmx.rmx_rtt && i)
  391                                 /*
  392                                  * filter this update to half the old & half
  393                                  * the new values, converting scale.
  394                                  * See route.h and tcp_var.h for a
  395                                  * description of the scaling constants.
  396                                  */
  397                                 rt->rt_rmx.rmx_rtt =
  398                                     (rt->rt_rmx.rmx_rtt + i) / 2;
  399                         else
  400                                 rt->rt_rmx.rmx_rtt = i;
  401                         tcpstat.tcps_cachedrtt++;
  402                 }
  403                 if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
  404                         i = tp->t_rttvar *
  405                             (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
  406                         if (rt->rt_rmx.rmx_rttvar && i)
  407                                 rt->rt_rmx.rmx_rttvar =
  408                                     (rt->rt_rmx.rmx_rttvar + i) / 2;
  409                         else
  410                                 rt->rt_rmx.rmx_rttvar = i;
  411                         tcpstat.tcps_cachedrttvar++;
  412                 }
  413                 /*
  414                  * The old comment here said:
  415                  * update the pipelimit (ssthresh) if it has been updated
  416                  * already or if a pipesize was specified & the threshhold
  417                  * got below half the pipesize.  I.e., wait for bad news
  418                  * before we start updating, then update on both good
  419                  * and bad news.
  420                  *
  421                  * But we want to save the ssthresh even if no pipesize is
  422                  * specified explicitly in the route, because such
  423                  * connections still have an implicit pipesize specified
  424                  * by the global tcp_sendspace.  In the absence of a reliable
  425                  * way to calculate the pipesize, it will have to do.
  426                  */
  427                 i = tp->snd_ssthresh;
  428                 if (rt->rt_rmx.rmx_sendpipe != 0)
  429                         dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
  430                 else
  431                         dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
  432                 if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
  433                      i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
  434                     || dosavessthresh) {
  435                         /*
  436                          * convert the limit from user data bytes to
  437                          * packets then to packet data bytes.
  438                          */
  439                         i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
  440                         if (i < 2)
  441                                 i = 2;
  442                         i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
  443                         if (rt->rt_rmx.rmx_ssthresh)
  444                                 rt->rt_rmx.rmx_ssthresh =
  445                                     (rt->rt_rmx.rmx_ssthresh + i) / 2;
  446                         else
  447                                 rt->rt_rmx.rmx_ssthresh = i;
  448                         tcpstat.tcps_cachedssthresh++;
  449                 }
  450         }
  451         /* free the reassembly queue, if any */
  452         for (q = tp->t_segq; q; q = nq) {
  453                 nq = q->m_nextpkt;
  454                 tp->t_segq = nq;
  455                 m_freem(q);
  456         }
  457         if (tp->t_template)
  458                 (void) m_free(dtom(tp->t_template));
  459         inp->inp_ppcb = NULL;
  460         soisdisconnected(so);
  461         in_pcbdetach(inp);
  462         tcpstat.tcps_closed++;
  463         return ((struct tcpcb *)0);
  464 }
  465 
  466 void
  467 tcp_drain()
  468 {
  469 
  470 }
  471 
  472 /*
  473  * Notify a tcp user of an asynchronous error;
  474  * store error as soft error, but wake up user
  475  * (for now, won't do anything until can select for soft error).
  476  */
  477 static void
  478 tcp_notify(inp, error)
  479         struct inpcb *inp;
  480         int error;
  481 {
  482         register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
  483         register struct socket *so = inp->inp_socket;
  484 
  485         /*
  486          * Ignore some errors if we are hooked up.
  487          * If connection hasn't completed, has retransmitted several times,
  488          * and receives a second error, give up now.  This is better
  489          * than waiting a long time to establish a connection that
  490          * can never complete.
  491          */
  492         if (tp->t_state == TCPS_ESTABLISHED &&
  493              (error == EHOSTUNREACH || error == ENETUNREACH ||
  494               error == EHOSTDOWN)) {
  495                 return;
  496         } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
  497             tp->t_softerror)
  498                 so->so_error = error;
  499         else
  500                 tp->t_softerror = error;
  501         wakeup((caddr_t) &so->so_timeo);
  502         sorwakeup(so);
  503         sowwakeup(so);
  504 }
  505 
  506 static int
  507 tcp_pcblist SYSCTL_HANDLER_ARGS
  508 {
  509         int error, i, n, s;
  510         struct inpcb *inp, **inp_list;
  511         inp_gen_t gencnt;
  512         struct xinpgen xig;
  513 
  514         /*
  515          * The process of preparing the TCB list is too time-consuming and
  516          * resource-intensive to repeat twice on every request.
  517          */
  518         if (req->oldptr == 0) {
  519                 n = tcbinfo.ipi_count;
  520                 req->oldidx = 2 * (sizeof xig)
  521                         + (n + n/8) * sizeof(struct xtcpcb);
  522                 return 0;
  523         }
  524 
  525         if (req->newptr != 0)
  526                 return EPERM;
  527 
  528         /*
  529          * OK, now we're committed to doing something.
  530          */
  531         s = splnet();
  532         gencnt = tcbinfo.ipi_gencnt;
  533         n = tcbinfo.ipi_count;
  534         splx(s);
  535 
  536         xig.xig_len = sizeof xig;
  537         xig.xig_count = n;
  538         xig.xig_gen = gencnt;
  539         xig.xig_sogen = so_gencnt;
  540         error = SYSCTL_OUT(req, &xig, sizeof xig);
  541         if (error)
  542                 return error;
  543 
  544         inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
  545         if (inp_list == 0)
  546                 return ENOMEM;
  547         
  548         s = splnet();
  549         for (inp = tcbinfo.listhead->lh_first, i = 0; inp && i < n;
  550              inp = inp->inp_list.le_next) {
  551                 if (inp->inp_gencnt <= gencnt)
  552                         inp_list[i++] = inp;
  553         }
  554         splx(s);
  555         n = i;
  556 
  557         error = 0;
  558         for (i = 0; i < n; i++) {
  559                 inp = inp_list[i];
  560                 if (inp->inp_gencnt <= gencnt) {
  561                         struct xtcpcb xt;
  562                         caddr_t inp_ppcb;
  563                         xt.xt_len = sizeof xt;
  564                         /* XXX should avoid extra copy */
  565                         bcopy(inp, &xt.xt_inp, sizeof *inp);
  566                         inp_ppcb = inp->inp_ppcb;
  567                         if (inp_ppcb != NULL)
  568                                 bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
  569                         else
  570                                 bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
  571                         if (inp->inp_socket)
  572                                 sotoxsocket(inp->inp_socket, &xt.xt_socket);
  573                         error = SYSCTL_OUT(req, &xt, sizeof xt);
  574                 }
  575         }
  576         if (!error) {
  577                 /*
  578                  * Give the user an updated idea of our state.
  579                  * If the generation differs from what we told
  580                  * her before, she knows that something happened
  581                  * while we were processing this request, and it
  582                  * might be necessary to retry.
  583                  */
  584                 s = splnet();
  585                 xig.xig_gen = tcbinfo.ipi_gencnt;
  586                 xig.xig_sogen = so_gencnt;
  587                 xig.xig_count = tcbinfo.ipi_count;
  588                 splx(s);
  589                 error = SYSCTL_OUT(req, &xig, sizeof xig);
  590         }
  591         free(inp_list, M_TEMP);
  592         return error;
  593 }
  594 
  595 SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
  596             tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
  597 
  598 static int
  599 tcp_getcred SYSCTL_HANDLER_ARGS
  600 {
  601         struct sockaddr_in addrs[2];
  602         struct inpcb *inp;
  603         int error, s;
  604 
  605         error = suser(req->p->p_ucred, &req->p->p_acflag);
  606         if (error)
  607                 return (error);
  608 
  609         if (req->newlen != sizeof(addrs))
  610                 return (EINVAL);
  611         if (req->oldlen != sizeof(struct ucred))
  612                 return (EINVAL);
  613         error = SYSCTL_IN(req, addrs, sizeof(addrs));
  614         if (error)
  615                 return (error);
  616         s = splnet();
  617         inp = in_pcblookup_hash(&tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
  618                                 addrs[0].sin_addr, addrs[0].sin_port, 0);
  619         if (!inp || !inp->inp_socket || !inp->inp_socket->so_cred) {
  620                 error = ENOENT;
  621                 goto out;
  622         }
  623         error = SYSCTL_OUT(req, inp->inp_socket->so_cred->pc_ucred,
  624                 sizeof(struct ucred));
  625 
  626 out:
  627         splx(s);
  628         return (error);
  629 }
  630 
  631 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0,
  632     tcp_getcred, "S,ucred", "Get the ucred of a TCP connection");
  633 
  634 void
  635 tcp_ctlinput(cmd, sa, vip)
  636         int cmd;
  637         struct sockaddr *sa;
  638         void *vip;
  639 {
  640         register struct ip *ip = vip;
  641         register struct tcphdr *th;
  642         void (*notify) __P((struct inpcb *, int)) = tcp_notify;
  643 
  644         if (cmd == PRC_QUENCH)
  645                 notify = tcp_quench;
  646         else if (cmd == PRC_MSGSIZE)
  647                 notify = tcp_mtudisc;
  648         else if (PRC_IS_REDIRECT(cmd)) {
  649                 /*
  650                  * Redirects go to all references to the destination,
  651                  * and use in_rtchange to invalidate the route cache.
  652                  */
  653                 ip = 0;
  654                 notify = in_rtchange;
  655         } else if (cmd == PRC_HOSTDEAD)
  656                 /*
  657                  * Dead host indications: notify all references to the destination.
  658                  */
  659                 ip = 0;
  660         else if ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0)
  661                 return;
  662         if (ip) {
  663                 th = (struct tcphdr *)((caddr_t)ip 
  664                                        + (IP_VHL_HL(ip->ip_vhl) << 2));
  665                 in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
  666                         cmd, notify);
  667         } else
  668                 in_pcbnotifyall(&tcb, sa, cmd, notify);
  669 }
  670 
  671 #define TCP_RNDISS_ROUNDS       16
  672 #define TCP_RNDISS_OUT  7200
  673 #define TCP_RNDISS_MAX  30000
  674 
  675 u_int8_t tcp_rndiss_sbox[128];
  676 u_int16_t tcp_rndiss_msb;
  677 u_int16_t tcp_rndiss_cnt;
  678 long tcp_rndiss_reseed;
  679 
  680 u_int16_t
  681 tcp_rndiss_encrypt(val)
  682         u_int16_t val;
  683 {
  684         u_int16_t sum = 0, i;
  685   
  686         for (i = 0; i < TCP_RNDISS_ROUNDS; i++) {
  687                 sum += 0x79b9;
  688                 val ^= ((u_int16_t)tcp_rndiss_sbox[(val^sum) & 0x7f]) << 7;
  689                 val = ((val & 0xff) << 7) | (val >> 8);
  690         }
  691 
  692         return val;
  693 }
  694 
  695 void
  696 tcp_rndiss_init()
  697 {
  698         struct timeval time;
  699 
  700         getmicrotime(&time);
  701         read_random_unlimited(tcp_rndiss_sbox, sizeof(tcp_rndiss_sbox));
  702 
  703         tcp_rndiss_reseed = time.tv_sec + TCP_RNDISS_OUT;
  704         tcp_rndiss_msb = tcp_rndiss_msb == 0x8000 ? 0 : 0x8000; 
  705         tcp_rndiss_cnt = 0;
  706 }
  707 
  708 tcp_seq
  709 tcp_rndiss_next()
  710 {
  711         u_int32_t tmp;
  712         struct timeval time;
  713 
  714         getmicrotime(&time);
  715 
  716         if (tcp_rndiss_cnt >= TCP_RNDISS_MAX ||
  717             time.tv_sec > tcp_rndiss_reseed)
  718                 tcp_rndiss_init();
  719         
  720         tmp = arc4random();
  721 
  722         /* (tmp & 0x7fff) ensures a 32768 byte gap between ISS */
  723         return ((tcp_rndiss_encrypt(tcp_rndiss_cnt++) | tcp_rndiss_msb) <<16) |
  724                 (tmp & 0x7fff);
  725 }
  726 
  727 /*
  728  * When a source quench is received, close congestion window
  729  * to one segment.  We will gradually open it again as we proceed.
  730  */
  731 void
  732 tcp_quench(inp, errno)
  733         struct inpcb *inp;
  734         int errno;
  735 {
  736         struct tcpcb *tp = intotcpcb(inp);
  737 
  738         if (tp)
  739                 tp->snd_cwnd = tp->t_maxseg;
  740 }
  741 
  742 /*
  743  * When `need fragmentation' ICMP is received, update our idea of the MSS
  744  * based on the new value in the route.  Also nudge TCP to send something,
  745  * since we know the packet we just sent was dropped.
  746  * This duplicates some code in the tcp_mss() function in tcp_input.c.
  747  */
  748 void
  749 tcp_mtudisc(inp, errno)
  750         struct inpcb *inp;
  751         int errno;
  752 {
  753         struct tcpcb *tp = intotcpcb(inp);
  754         struct rtentry *rt;
  755         struct rmxp_tao *taop;
  756         struct socket *so = inp->inp_socket;
  757         int offered;
  758         int mss;
  759 
  760         if (tp) {
  761                 rt = tcp_rtlookup(inp);
  762                 if (!rt || !rt->rt_rmx.rmx_mtu) {
  763                         tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
  764                         return;
  765                 }
  766                 taop = rmx_taop(rt->rt_rmx);
  767                 offered = taop->tao_mssopt;
  768                 mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
  769                 if (offered)
  770                         mss = min(mss, offered);
  771                 /*
  772                  * XXX - The above conditional probably violates the TCP
  773                  * spec.  The problem is that, since we don't know the
  774                  * other end's MSS, we are supposed to use a conservative
  775                  * default.  But, if we do that, then MTU discovery will
  776                  * never actually take place, because the conservative
  777                  * default is much less than the MTUs typically seen
  778                  * on the Internet today.  For the moment, we'll sweep
  779                  * this under the carpet.
  780                  *
  781                  * The conservative default might not actually be a problem
  782                  * if the only case this occurs is when sending an initial
  783                  * SYN with options and data to a host we've never talked
  784                  * to before.  Then, they will reply with an MSS value which
  785                  * will get recorded and the new parameters should get
  786                  * recomputed.  For Further Study.
  787                  */
  788                 if (tp->t_maxopd <= mss)
  789                         return;
  790                 tp->t_maxopd = mss;
  791 
  792                 if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
  793                     (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
  794                         mss -= TCPOLEN_TSTAMP_APPA;
  795                 if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
  796                     (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)
  797                         mss -= TCPOLEN_CC_APPA;
  798 #if     (MCLBYTES & (MCLBYTES - 1)) == 0
  799                 if (mss > MCLBYTES)
  800                         mss &= ~(MCLBYTES-1);
  801 #else
  802                 if (mss > MCLBYTES)
  803                         mss = mss / MCLBYTES * MCLBYTES;
  804 #endif
  805                 if (so->so_snd.sb_hiwat < mss)
  806                         mss = so->so_snd.sb_hiwat;
  807 
  808                 tp->t_maxseg = mss;
  809 
  810                 tcpstat.tcps_mturesent++;
  811                 tp->t_rtt = 0;
  812                 tp->snd_nxt = tp->snd_una;
  813                 tcp_output(tp);
  814         }
  815 }
  816 
  817 /*
  818  * Look-up the routing entry to the peer of this inpcb.  If no route
  819  * is found and it cannot be allocated the return NULL.  This routine
  820  * is called by TCP routines that access the rmx structure and by tcp_mss
  821  * to get the interface MTU.
  822  */
  823 struct rtentry *
  824 tcp_rtlookup(inp)
  825         struct inpcb *inp;
  826 {
  827         struct route *ro;
  828         struct rtentry *rt;
  829 
  830         ro = &inp->inp_route;
  831         rt = ro->ro_rt;
  832         if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
  833                 /* No route yet, so try to acquire one */
  834                 if (inp->inp_faddr.s_addr != INADDR_ANY) {
  835                         ro->ro_dst.sa_family = AF_INET;
  836                         ro->ro_dst.sa_len = sizeof(ro->ro_dst);
  837                         ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
  838                                 inp->inp_faddr;
  839                         rtalloc(ro);
  840                         rt = ro->ro_rt;
  841                 }
  842         }
  843         return rt;
  844 }
  845 
  846 /*
  847  * Return a pointer to the cached information about the remote host.
  848  * The cached information is stored in the protocol specific part of
  849  * the route metrics.
  850  */
  851 struct rmxp_tao *
  852 tcp_gettaocache(inp)
  853         struct inpcb *inp;
  854 {
  855         struct rtentry *rt = tcp_rtlookup(inp);
  856 
  857         /* Make sure this is a host route and is up. */
  858         if (rt == NULL ||
  859             (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
  860                 return NULL;
  861 
  862         return rmx_taop(rt->rt_rmx);
  863 }
  864 
  865 /*
  866  * Clear all the TAO cache entries, called from tcp_init.
  867  *
  868  * XXX
  869  * This routine is just an empty one, because we assume that the routing
  870  * routing tables are initialized at the same time when TCP, so there is
  871  * nothing in the cache left over.
  872  */
  873 static void
  874 tcp_cleartaocache()
  875 {
  876 }

Cache object: 57d8f36a21685dec9295aae6b791bc9e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.