The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_pcap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2015
    3  *      Jonathan Looney. All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  *
   26  * $FreeBSD: releng/12.0/sys/netinet/tcp_pcap.c 302374 2016-07-06 16:17:13Z jtl $
   27  */
   28 
   29 #include <sys/queue.h>
   30 #include <sys/param.h>
   31 #include <sys/types.h>
   32 #include <sys/socket.h>
   33 #include <sys/socketvar.h>
   34 #include <sys/sysctl.h>
   35 #include <sys/systm.h>
   36 #include <sys/mbuf.h>
   37 #include <sys/eventhandler.h>
   38 #include <machine/atomic.h>
   39 #include <netinet/tcp_var.h>
   40 #include <netinet/tcp_pcap.h>
   41 
   42 #define M_LEADINGSPACE_NOWRITE(m)                                       \
   43         ((m)->m_data - M_START(m))
   44 
   45 int tcp_pcap_aggressive_free = 1;
   46 static int tcp_pcap_clusters_referenced_cur = 0;
   47 static int tcp_pcap_clusters_referenced_max = 0;
   48 
   49 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_aggressive_free,
   50         CTLFLAG_RW, &tcp_pcap_aggressive_free, 0,
   51         "Free saved packets when the memory system comes under pressure");
   52 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_cur,
   53         CTLFLAG_RD, &tcp_pcap_clusters_referenced_cur, 0,
   54         "Number of clusters currently referenced on TCP PCAP queues");
   55 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_max,
   56         CTLFLAG_RW, &tcp_pcap_clusters_referenced_max, 0,
   57         "Maximum number of clusters allowed to be referenced on TCP PCAP "
   58         "queues");
   59 
   60 static int tcp_pcap_alloc_reuse_ext = 0;
   61 static int tcp_pcap_alloc_reuse_mbuf = 0;
   62 static int tcp_pcap_alloc_new_mbuf = 0;
   63 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_ext,
   64         CTLFLAG_RD, &tcp_pcap_alloc_reuse_ext, 0,
   65         "Number of mbufs with external storage reused for the TCP PCAP "
   66         "functionality");
   67 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_mbuf,
   68         CTLFLAG_RD, &tcp_pcap_alloc_reuse_mbuf, 0,
   69         "Number of mbufs with internal storage reused for the TCP PCAP "
   70         "functionality");
   71 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_new_mbuf,
   72         CTLFLAG_RD, &tcp_pcap_alloc_new_mbuf, 0,
   73         "Number of new mbufs allocated for the TCP PCAP functionality");
   74 
   75 VNET_DEFINE(int, tcp_pcap_packets) = 0;
   76 #define V_tcp_pcap_packets      VNET(tcp_pcap_packets)
   77 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_packets,
   78         CTLFLAG_RW, &VNET_NAME(tcp_pcap_packets), 0,
   79         "Default number of packets saved per direction per TCPCB");
   80 
   81 /* Initialize the values. */
   82 static void
   83 tcp_pcap_max_set(void)
   84 {
   85 
   86         tcp_pcap_clusters_referenced_max = nmbclusters / 4;
   87 }
   88 
   89 void
   90 tcp_pcap_init(void)
   91 {
   92 
   93         tcp_pcap_max_set();
   94         EVENTHANDLER_REGISTER(nmbclusters_change, tcp_pcap_max_set,
   95                 NULL, EVENTHANDLER_PRI_ANY);
   96 }
   97 
   98 /*
   99  * If we are below the maximum allowed cluster references,
  100  * increment the reference count and return TRUE. Otherwise,
  101  * leave the reference count alone and return FALSE.
  102  */
  103 static __inline bool
  104 tcp_pcap_take_cluster_reference(void)
  105 {
  106         if (atomic_fetchadd_int(&tcp_pcap_clusters_referenced_cur, 1) >=
  107                 tcp_pcap_clusters_referenced_max) {
  108                 atomic_add_int(&tcp_pcap_clusters_referenced_cur, -1);
  109                 return FALSE;
  110         }
  111         return TRUE;
  112 }
  113 
  114 /*
  115  * For all the external entries in m, apply the given adjustment.
  116  * This can be used to adjust the counter when an mbuf chain is
  117  * copied or freed.
  118  */
  119 static __inline void
  120 tcp_pcap_adj_cluster_reference(struct mbuf *m, int adj)
  121 {
  122         while (m) {
  123                 if (m->m_flags & M_EXT)
  124                         atomic_add_int(&tcp_pcap_clusters_referenced_cur, adj);
  125 
  126                 m = m->m_next;
  127         }
  128 }
  129 
  130 /*
  131  * Free all mbufs in a chain, decrementing the reference count as
  132  * necessary.
  133  *
  134  * Functions in this file should use this instead of m_freem() when
  135  * they are freeing mbuf chains that may contain clusters that were
  136  * already included in tcp_pcap_clusters_referenced_cur.
  137  */
  138 static void
  139 tcp_pcap_m_freem(struct mbuf *mb)
  140 {
  141         while (mb != NULL) {
  142                 if (mb->m_flags & M_EXT)
  143                         atomic_subtract_int(&tcp_pcap_clusters_referenced_cur,
  144                             1);
  145                 mb = m_free(mb);
  146         }
  147 }
  148 
  149 /*
  150  * Copy data from m to n, where n cannot fit all the data we might
  151  * want from m.
  152  *
  153  * Prioritize data like this:
  154  * 1. TCP header
  155  * 2. IP header
  156  * 3. Data
  157  */
  158 static void
  159 tcp_pcap_copy_bestfit(struct tcphdr *th, struct mbuf *m, struct mbuf *n)
  160 {
  161         struct mbuf *m_cur = m;
  162         int bytes_to_copy=0, trailing_data, skip=0, tcp_off;
  163 
  164         /* Below, we assume these will be non-NULL. */
  165         KASSERT(th, ("%s: called with th == NULL", __func__));
  166         KASSERT(m, ("%s: called with m == NULL", __func__));
  167         KASSERT(n, ("%s: called with n == NULL", __func__));
  168 
  169         /* We assume this initialization occurred elsewhere. */
  170         KASSERT(n->m_len == 0, ("%s: called with n->m_len=%d (expected 0)",
  171                 __func__, n->m_len));
  172         KASSERT(n->m_data == M_START(n),
  173                 ("%s: called with n->m_data != M_START(n)", __func__));
  174 
  175         /*
  176          * Calculate the size of the TCP header. We use this often
  177          * enough that it is worth just calculating at the start.
  178          */
  179         tcp_off = th->th_off << 2;
  180 
  181         /* Trim off leading empty mbufs. */
  182         while (m && m->m_len == 0)
  183                 m = m->m_next;
  184 
  185         if (m) {
  186                 m_cur = m;
  187         }
  188         else {
  189                 /*
  190                  * No data? Highly unusual. We would expect to at
  191                  * least see a TCP header in the mbuf.
  192                  * As we have a pointer to the TCP header, I guess
  193                  * we should just copy that. (???)
  194                  */
  195 fallback:
  196                 bytes_to_copy = tcp_off;
  197                 if (bytes_to_copy > M_SIZE(n))
  198                         bytes_to_copy = M_SIZE(n);
  199                 bcopy(th, n->m_data, bytes_to_copy);
  200                 n->m_len = bytes_to_copy;
  201                 return;
  202         }
  203 
  204         /*
  205          * Find TCP header. Record the total number of bytes up to,
  206          * and including, the TCP header.
  207          */
  208         while (m_cur) {
  209                 if ((caddr_t) th >= (caddr_t) m_cur->m_data &&
  210                         (caddr_t) th < (caddr_t) (m_cur->m_data + m_cur->m_len))
  211                         break;
  212                 bytes_to_copy += m_cur->m_len;
  213                 m_cur = m_cur->m_next;
  214         }
  215         if (m_cur)
  216                 bytes_to_copy += (caddr_t) th - (caddr_t) m_cur->m_data;
  217         else
  218                 goto fallback;
  219         bytes_to_copy += tcp_off;
  220 
  221         /*
  222          * If we already want to copy more bytes than we can hold
  223          * in the destination mbuf, skip leading bytes and copy
  224          * what we can.
  225          *
  226          * Otherwise, consider trailing data.
  227          */
  228         if (bytes_to_copy > M_SIZE(n)) {
  229                 skip  = bytes_to_copy - M_SIZE(n);
  230                 bytes_to_copy = M_SIZE(n);
  231         }
  232         else {
  233                 /*
  234                  * Determine how much trailing data is in the chain.
  235                  * We start with the length of this mbuf (the one
  236                  * containing th) and subtract the size of the TCP
  237                  * header (tcp_off) and the size of the data prior
  238                  * to th (th - m_cur->m_data).
  239                  *
  240                  * This *should not* be negative, as the TCP code
  241                  * should put the whole TCP header in a single
  242                  * mbuf. But, it isn't a problem if it is. We will
  243                  * simple work off our negative balance as we look
  244                  * at subsequent mbufs.
  245                  */
  246                 trailing_data = m_cur->m_len - tcp_off;
  247                 trailing_data -= (caddr_t) th - (caddr_t) m_cur->m_data;
  248                 m_cur = m_cur->m_next;
  249                 while (m_cur) {
  250                         trailing_data += m_cur->m_len;
  251                         m_cur = m_cur->m_next;
  252                 }
  253                 if ((bytes_to_copy + trailing_data) > M_SIZE(n))
  254                         bytes_to_copy = M_SIZE(n);
  255                 else
  256                         bytes_to_copy += trailing_data;
  257         }
  258 
  259         m_copydata(m, skip, bytes_to_copy, n->m_data);
  260         n->m_len = bytes_to_copy;
  261 }
  262 
  263 void
  264 tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct mbufq *queue)
  265 {
  266         struct mbuf *n = NULL, *mhead;
  267 
  268         KASSERT(th, ("%s: called with th == NULL", __func__));
  269         KASSERT(m, ("%s: called with m == NULL", __func__));
  270         KASSERT(queue, ("%s: called with queue == NULL", __func__));
  271 
  272         /* We only care about data packets. */
  273         while (m && m->m_type != MT_DATA)
  274                 m = m->m_next;
  275 
  276         /* We only need to do something if we still have an mbuf. */
  277         if (!m)
  278                 return;
  279 
  280         /* If we are not saving mbufs, return now. */
  281         if (queue->mq_maxlen == 0)
  282                 return;
  283 
  284         /*
  285          * Check to see if we will need to recycle mbufs.
  286          *
  287          * If we need to get rid of mbufs to stay below
  288          * our packet count, try to reuse the mbuf. Once
  289          * we already have a new mbuf (n), then we can
  290          * simply free subsequent mbufs.
  291          *
  292          * Note that most of the logic in here is to deal
  293          * with the reuse. If we are fine with constant
  294          * mbuf allocs/deallocs, we could ditch this logic.
  295          * But, it only seems to make sense to reuse
  296          * mbufs we already have.
  297          */
  298         while (mbufq_full(queue)) {
  299                 mhead = mbufq_dequeue(queue);
  300 
  301                 if (n) {
  302                         tcp_pcap_m_freem(mhead);
  303                 }
  304                 else {
  305                         /*
  306                          * If this held an external cluster, try to
  307                          * detach the cluster. But, if we held the
  308                          * last reference, go through the normal
  309                          * free-ing process.
  310                          */
  311                         if (mhead->m_flags & M_EXT) {
  312                                 switch (mhead->m_ext.ext_type) {
  313                                 case EXT_SFBUF:
  314                                         /* Don't mess around with these. */
  315                                         tcp_pcap_m_freem(mhead);
  316                                         continue;
  317                                 default:
  318                                         if (atomic_fetchadd_int(
  319                                                 mhead->m_ext.ext_cnt, -1) == 1)
  320                                         {
  321                                                 /*
  322                                                  * We held the last reference
  323                                                  * on this cluster. Restore
  324                                                  * the reference count and put
  325                                                  * it back in the pool.
  326                                                  */
  327                                                 *(mhead->m_ext.ext_cnt) = 1;
  328                                                 tcp_pcap_m_freem(mhead);
  329                                                 continue;
  330                                         }
  331                                         /*
  332                                          * We were able to cleanly free the
  333                                          * reference.
  334                                          */
  335                                         atomic_subtract_int(
  336                                             &tcp_pcap_clusters_referenced_cur,
  337                                             1);
  338                                         tcp_pcap_alloc_reuse_ext++;
  339                                         break;
  340                                 }
  341                         }
  342                         else {
  343                                 tcp_pcap_alloc_reuse_mbuf++;
  344                         }
  345 
  346                         n = mhead;
  347                         tcp_pcap_m_freem(n->m_next);
  348                         m_init(n, M_NOWAIT, MT_DATA, 0);
  349                 }
  350         }
  351 
  352         /* Check to see if we need to get a new mbuf. */
  353         if (!n) {
  354                 if (!(n = m_get(M_NOWAIT, MT_DATA)))
  355                         return;
  356                 tcp_pcap_alloc_new_mbuf++;
  357         }
  358 
  359         /*
  360          * What are we dealing with? If a cluster, attach it. Otherwise,
  361          * try to copy the data from the beginning of the mbuf to the
  362          * end of data. (There may be data between the start of the data
  363          * area and the current data pointer. We want to get this, because
  364          * it may contain header information that is useful.)
  365          * In cases where that isn't possible, settle for what we can
  366          * get.
  367          */
  368         if ((m->m_flags & M_EXT) && tcp_pcap_take_cluster_reference()) {
  369                 n->m_data = m->m_data;
  370                 n->m_len = m->m_len;
  371                 mb_dupcl(n, m);
  372         }
  373         else if (((m->m_data + m->m_len) - M_START(m)) <= M_SIZE(n)) {
  374                 /*
  375                  * At this point, n is guaranteed to be a normal mbuf
  376                  * with no cluster and no packet header. Because the
  377                  * logic in this code block requires this, the assert
  378                  * is here to catch any instances where someone
  379                  * changes the logic to invalidate that assumption.
  380                  */
  381                 KASSERT((n->m_flags & (M_EXT | M_PKTHDR)) == 0,
  382                         ("%s: Unexpected flags (%#x) for mbuf",
  383                         __func__, n->m_flags));
  384                 n->m_data = n->m_dat + M_LEADINGSPACE_NOWRITE(m);
  385                 n->m_len = m->m_len;
  386                 bcopy(M_START(m), n->m_dat,
  387                         m->m_len + M_LEADINGSPACE_NOWRITE(m));
  388         }
  389         else {
  390                 /*
  391                  * This is the case where we need to "settle for what
  392                  * we can get". The most probable way to this code
  393                  * path is that we've already taken references to the
  394                  * maximum number of mbuf clusters we can, and the data
  395                  * is too long to fit in an mbuf's internal storage.
  396                  * Try for a "best fit".
  397                  */
  398                 tcp_pcap_copy_bestfit(th, m, n);
  399 
  400                 /* Don't try to get additional data. */
  401                 goto add_to_queue;
  402         }
  403 
  404         if (m->m_next) {
  405                 n->m_next = m_copym(m->m_next, 0, M_COPYALL, M_NOWAIT);
  406                 tcp_pcap_adj_cluster_reference(n->m_next, 1);
  407         }
  408 
  409 add_to_queue:
  410         /* Add the new mbuf to the list. */
  411         if (mbufq_enqueue(queue, n)) {
  412                 /* This shouldn't happen. If INVARIANTS is defined, panic. */
  413                 KASSERT(0, ("%s: mbufq was unexpectedly full!", __func__));
  414                 tcp_pcap_m_freem(n);
  415         }
  416 }
  417 
  418 void
  419 tcp_pcap_drain(struct mbufq *queue)
  420 {
  421         struct mbuf *m;
  422         while ((m = mbufq_dequeue(queue)))
  423                 tcp_pcap_m_freem(m);
  424 }
  425 
  426 void
  427 tcp_pcap_tcpcb_init(struct tcpcb *tp)
  428 {
  429         mbufq_init(&(tp->t_inpkts), V_tcp_pcap_packets);
  430         mbufq_init(&(tp->t_outpkts), V_tcp_pcap_packets);
  431 }
  432 
  433 void
  434 tcp_pcap_set_sock_max(struct mbufq *queue, int newval)
  435 {
  436         queue->mq_maxlen = newval;
  437         while (queue->mq_len > queue->mq_maxlen)
  438                 tcp_pcap_m_freem(mbufq_dequeue(queue));
  439 }
  440 
  441 int
  442 tcp_pcap_get_sock_max(struct mbufq *queue)
  443 {
  444         return queue->mq_maxlen;
  445 }

Cache object: 9528c6d6e7388ddc52f885e8bc5f47ec


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.