The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_pcap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2015
    3  *      Jonathan Looney. All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  *
   26  * $FreeBSD$
   27  */
   28 
   29 #include <sys/queue.h>
   30 #include <sys/param.h>
   31 #include <sys/types.h>
   32 #include <sys/socket.h>
   33 #include <sys/socketvar.h>
   34 #include <sys/sysctl.h>
   35 #include <sys/systm.h>
   36 #include <sys/mbuf.h>
   37 #include <sys/eventhandler.h>
   38 #include <machine/atomic.h>
   39 #include <netinet/in.h>
   40 #include <netinet/in_pcb.h>
   41 #include <netinet/tcp_var.h>
   42 #include <netinet/tcp_pcap.h>
   43 
   44 #define M_LEADINGSPACE_NOWRITE(m)                                       \
   45         ((m)->m_data - M_START(m))
   46 
   47 int tcp_pcap_aggressive_free = 1;
   48 static int tcp_pcap_clusters_referenced_cur = 0;
   49 static int tcp_pcap_clusters_referenced_max = 0;
   50 
   51 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_aggressive_free,
   52         CTLFLAG_RW, &tcp_pcap_aggressive_free, 0,
   53         "Free saved packets when the memory system comes under pressure");
   54 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_cur,
   55         CTLFLAG_RD, &tcp_pcap_clusters_referenced_cur, 0,
   56         "Number of clusters currently referenced on TCP PCAP queues");
   57 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_clusters_referenced_max,
   58         CTLFLAG_RW, &tcp_pcap_clusters_referenced_max, 0,
   59         "Maximum number of clusters allowed to be referenced on TCP PCAP "
   60         "queues");
   61 
   62 static int tcp_pcap_alloc_reuse_ext = 0;
   63 static int tcp_pcap_alloc_reuse_mbuf = 0;
   64 static int tcp_pcap_alloc_new_mbuf = 0;
   65 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_ext,
   66         CTLFLAG_RD, &tcp_pcap_alloc_reuse_ext, 0,
   67         "Number of mbufs with external storage reused for the TCP PCAP "
   68         "functionality");
   69 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_reuse_mbuf,
   70         CTLFLAG_RD, &tcp_pcap_alloc_reuse_mbuf, 0,
   71         "Number of mbufs with internal storage reused for the TCP PCAP "
   72         "functionality");
   73 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_alloc_new_mbuf,
   74         CTLFLAG_RD, &tcp_pcap_alloc_new_mbuf, 0,
   75         "Number of new mbufs allocated for the TCP PCAP functionality");
   76 
   77 VNET_DEFINE(int, tcp_pcap_packets) = 0;
   78 #define V_tcp_pcap_packets      VNET(tcp_pcap_packets)
   79 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_pcap_packets,
   80         CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_pcap_packets), 0,
   81         "Default number of packets saved per direction per TCPCB");
   82 
   83 /* Initialize the values. */
   84 static void
   85 tcp_pcap_max_set(void)
   86 {
   87 
   88         tcp_pcap_clusters_referenced_max = nmbclusters / 4;
   89 }
   90 
   91 void
   92 tcp_pcap_init(void)
   93 {
   94 
   95         tcp_pcap_max_set();
   96         EVENTHANDLER_REGISTER(nmbclusters_change, tcp_pcap_max_set,
   97                 NULL, EVENTHANDLER_PRI_ANY);
   98 }
   99 
  100 /*
  101  * If we are below the maximum allowed cluster references,
  102  * increment the reference count and return TRUE. Otherwise,
  103  * leave the reference count alone and return FALSE.
  104  */
  105 static __inline bool
  106 tcp_pcap_take_cluster_reference(void)
  107 {
  108         if (atomic_fetchadd_int(&tcp_pcap_clusters_referenced_cur, 1) >=
  109                 tcp_pcap_clusters_referenced_max) {
  110                 atomic_add_int(&tcp_pcap_clusters_referenced_cur, -1);
  111                 return FALSE;
  112         }
  113         return TRUE;
  114 }
  115 
  116 /*
  117  * For all the external entries in m, apply the given adjustment.
  118  * This can be used to adjust the counter when an mbuf chain is
  119  * copied or freed.
  120  */
  121 static __inline void
  122 tcp_pcap_adj_cluster_reference(struct mbuf *m, int adj)
  123 {
  124         while (m) {
  125                 if (m->m_flags & M_EXT)
  126                         atomic_add_int(&tcp_pcap_clusters_referenced_cur, adj);
  127 
  128                 m = m->m_next;
  129         }
  130 }
  131 
  132 /*
  133  * Free all mbufs in a chain, decrementing the reference count as
  134  * necessary.
  135  *
  136  * Functions in this file should use this instead of m_freem() when
  137  * they are freeing mbuf chains that may contain clusters that were
  138  * already included in tcp_pcap_clusters_referenced_cur.
  139  */
  140 static void
  141 tcp_pcap_m_freem(struct mbuf *mb)
  142 {
  143         while (mb != NULL) {
  144                 if (mb->m_flags & M_EXT)
  145                         atomic_subtract_int(&tcp_pcap_clusters_referenced_cur,
  146                             1);
  147                 mb = m_free(mb);
  148         }
  149 }
  150 
  151 /*
  152  * Copy data from m to n, where n cannot fit all the data we might
  153  * want from m.
  154  *
  155  * Prioritize data like this:
  156  * 1. TCP header
  157  * 2. IP header
  158  * 3. Data
  159  */
  160 static void
  161 tcp_pcap_copy_bestfit(struct tcphdr *th, struct mbuf *m, struct mbuf *n)
  162 {
  163         struct mbuf *m_cur = m;
  164         int bytes_to_copy=0, trailing_data, skip=0, tcp_off;
  165 
  166         /* Below, we assume these will be non-NULL. */
  167         KASSERT(th, ("%s: called with th == NULL", __func__));
  168         KASSERT(m, ("%s: called with m == NULL", __func__));
  169         KASSERT(n, ("%s: called with n == NULL", __func__));
  170 
  171         /* We assume this initialization occurred elsewhere. */
  172         KASSERT(n->m_len == 0, ("%s: called with n->m_len=%d (expected 0)",
  173                 __func__, n->m_len));
  174         KASSERT(n->m_data == M_START(n),
  175                 ("%s: called with n->m_data != M_START(n)", __func__));
  176 
  177         /*
  178          * Calculate the size of the TCP header. We use this often
  179          * enough that it is worth just calculating at the start.
  180          */
  181         tcp_off = th->th_off << 2;
  182 
  183         /* Trim off leading empty mbufs. */
  184         while (m && m->m_len == 0)
  185                 m = m->m_next;
  186 
  187         if (m) {
  188                 m_cur = m;
  189         }
  190         else {
  191                 /*
  192                  * No data? Highly unusual. We would expect to at
  193                  * least see a TCP header in the mbuf.
  194                  * As we have a pointer to the TCP header, I guess
  195                  * we should just copy that. (???)
  196                  */
  197 fallback:
  198                 bytes_to_copy = tcp_off;
  199                 if (bytes_to_copy > M_SIZE(n))
  200                         bytes_to_copy = M_SIZE(n);
  201                 bcopy(th, n->m_data, bytes_to_copy);
  202                 n->m_len = bytes_to_copy;
  203                 return;
  204         }
  205 
  206         /*
  207          * Find TCP header. Record the total number of bytes up to,
  208          * and including, the TCP header.
  209          */
  210         while (m_cur) {
  211                 if ((caddr_t) th >= (caddr_t) m_cur->m_data &&
  212                         (caddr_t) th < (caddr_t) (m_cur->m_data + m_cur->m_len))
  213                         break;
  214                 bytes_to_copy += m_cur->m_len;
  215                 m_cur = m_cur->m_next;
  216         }
  217         if (m_cur)
  218                 bytes_to_copy += (caddr_t) th - (caddr_t) m_cur->m_data;
  219         else
  220                 goto fallback;
  221         bytes_to_copy += tcp_off;
  222 
  223         /*
  224          * If we already want to copy more bytes than we can hold
  225          * in the destination mbuf, skip leading bytes and copy
  226          * what we can.
  227          *
  228          * Otherwise, consider trailing data.
  229          */
  230         if (bytes_to_copy > M_SIZE(n)) {
  231                 skip  = bytes_to_copy - M_SIZE(n);
  232                 bytes_to_copy = M_SIZE(n);
  233         }
  234         else {
  235                 /*
  236                  * Determine how much trailing data is in the chain.
  237                  * We start with the length of this mbuf (the one
  238                  * containing th) and subtract the size of the TCP
  239                  * header (tcp_off) and the size of the data prior
  240                  * to th (th - m_cur->m_data).
  241                  *
  242                  * This *should not* be negative, as the TCP code
  243                  * should put the whole TCP header in a single
  244                  * mbuf. But, it isn't a problem if it is. We will
  245                  * simple work off our negative balance as we look
  246                  * at subsequent mbufs.
  247                  */
  248                 trailing_data = m_cur->m_len - tcp_off;
  249                 trailing_data -= (caddr_t) th - (caddr_t) m_cur->m_data;
  250                 m_cur = m_cur->m_next;
  251                 while (m_cur) {
  252                         trailing_data += m_cur->m_len;
  253                         m_cur = m_cur->m_next;
  254                 }
  255                 if ((bytes_to_copy + trailing_data) > M_SIZE(n))
  256                         bytes_to_copy = M_SIZE(n);
  257                 else
  258                         bytes_to_copy += trailing_data;
  259         }
  260 
  261         m_copydata(m, skip, bytes_to_copy, n->m_data);
  262         n->m_len = bytes_to_copy;
  263 }
  264 
  265 void
  266 tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct mbufq *queue)
  267 {
  268         struct mbuf *n = NULL, *mhead;
  269 
  270         KASSERT(th, ("%s: called with th == NULL", __func__));
  271         KASSERT(m, ("%s: called with m == NULL", __func__));
  272         KASSERT(queue, ("%s: called with queue == NULL", __func__));
  273 
  274         /* We only care about data packets. */
  275         while (m && m->m_type != MT_DATA)
  276                 m = m->m_next;
  277 
  278         /* We only need to do something if we still have an mbuf. */
  279         if (!m)
  280                 return;
  281 
  282         /* If we are not saving mbufs, return now. */
  283         if (queue->mq_maxlen == 0)
  284                 return;
  285 
  286         /*
  287          * Check to see if we will need to recycle mbufs.
  288          *
  289          * If we need to get rid of mbufs to stay below
  290          * our packet count, try to reuse the mbuf. Once
  291          * we already have a new mbuf (n), then we can
  292          * simply free subsequent mbufs.
  293          *
  294          * Note that most of the logic in here is to deal
  295          * with the reuse. If we are fine with constant
  296          * mbuf allocs/deallocs, we could ditch this logic.
  297          * But, it only seems to make sense to reuse
  298          * mbufs we already have.
  299          */
  300         while (mbufq_full(queue)) {
  301                 mhead = mbufq_dequeue(queue);
  302 
  303                 if (n) {
  304                         tcp_pcap_m_freem(mhead);
  305                 }
  306                 else {
  307                         /*
  308                          * If this held an external cluster, try to
  309                          * detach the cluster. But, if we held the
  310                          * last reference, go through the normal
  311                          * free-ing process.
  312                          */
  313                         if (mhead->m_flags & M_EXTPG) {
  314                                 /* Don't mess around with these. */
  315                                 tcp_pcap_m_freem(mhead);
  316                                 continue;
  317                         } else if (mhead->m_flags & M_EXT) {
  318                                 switch (mhead->m_ext.ext_type) {
  319                                 case EXT_SFBUF:
  320                                         /* Don't mess around with these. */
  321                                         tcp_pcap_m_freem(mhead);
  322                                         continue;
  323                                 default:
  324                                         if (atomic_fetchadd_int(
  325                                                 mhead->m_ext.ext_cnt, -1) == 1)
  326                                         {
  327                                                 /*
  328                                                  * We held the last reference
  329                                                  * on this cluster. Restore
  330                                                  * the reference count and put
  331                                                  * it back in the pool.
  332                                                  */
  333                                                 *(mhead->m_ext.ext_cnt) = 1;
  334                                                 tcp_pcap_m_freem(mhead);
  335                                                 continue;
  336                                         }
  337                                         /*
  338                                          * We were able to cleanly free the
  339                                          * reference.
  340                                          */
  341                                         atomic_subtract_int(
  342                                             &tcp_pcap_clusters_referenced_cur,
  343                                             1);
  344                                         tcp_pcap_alloc_reuse_ext++;
  345                                         break;
  346                                 }
  347                         } else {
  348                                 tcp_pcap_alloc_reuse_mbuf++;
  349                         }
  350 
  351                         n = mhead;
  352                         tcp_pcap_m_freem(n->m_next);
  353                         m_init(n, M_NOWAIT, MT_DATA, 0);
  354                 }
  355         }
  356 
  357         /* Check to see if we need to get a new mbuf. */
  358         if (!n) {
  359                 if (!(n = m_get(M_NOWAIT, MT_DATA)))
  360                         return;
  361                 tcp_pcap_alloc_new_mbuf++;
  362         }
  363 
  364         /*
  365          * What are we dealing with? If a cluster, attach it. Otherwise,
  366          * try to copy the data from the beginning of the mbuf to the
  367          * end of data. (There may be data between the start of the data
  368          * area and the current data pointer. We want to get this, because
  369          * it may contain header information that is useful.)
  370          * In cases where that isn't possible, settle for what we can
  371          * get.
  372          */
  373         if ((m->m_flags & (M_EXT|M_EXTPG)) &&
  374             tcp_pcap_take_cluster_reference()) {
  375                 n->m_data = m->m_data;
  376                 n->m_len = m->m_len;
  377                 mb_dupcl(n, m);
  378         }
  379         else if (((m->m_data + m->m_len) - M_START(m)) <= M_SIZE(n)) {
  380                 /*
  381                  * At this point, n is guaranteed to be a normal mbuf
  382                  * with no cluster and no packet header. Because the
  383                  * logic in this code block requires this, the assert
  384                  * is here to catch any instances where someone
  385                  * changes the logic to invalidate that assumption.
  386                  */
  387                 KASSERT((n->m_flags & (M_EXT | M_PKTHDR)) == 0,
  388                         ("%s: Unexpected flags (%#x) for mbuf",
  389                         __func__, n->m_flags));
  390                 n->m_data = n->m_dat + M_LEADINGSPACE_NOWRITE(m);
  391                 n->m_len = m->m_len;
  392                 if (m->m_flags & M_EXTPG)
  393                         m_copydata(m, 0, m->m_len, n->m_data);
  394                 else
  395                         bcopy(M_START(m), n->m_dat,
  396                             m->m_len + M_LEADINGSPACE_NOWRITE(m));
  397         }
  398         else {
  399                 /*
  400                  * This is the case where we need to "settle for what
  401                  * we can get". The most probable way to this code
  402                  * path is that we've already taken references to the
  403                  * maximum number of mbuf clusters we can, and the data
  404                  * is too long to fit in an mbuf's internal storage.
  405                  * Try for a "best fit".
  406                  */
  407                 tcp_pcap_copy_bestfit(th, m, n);
  408 
  409                 /* Don't try to get additional data. */
  410                 goto add_to_queue;
  411         }
  412 
  413         if (m->m_next) {
  414                 n->m_next = m_copym(m->m_next, 0, M_COPYALL, M_NOWAIT);
  415                 tcp_pcap_adj_cluster_reference(n->m_next, 1);
  416         }
  417 
  418 add_to_queue:
  419         /* Add the new mbuf to the list. */
  420         if (mbufq_enqueue(queue, n)) {
  421                 /* This shouldn't happen. If INVARIANTS is defined, panic. */
  422                 KASSERT(0, ("%s: mbufq was unexpectedly full!", __func__));
  423                 tcp_pcap_m_freem(n);
  424         }
  425 }
  426 
  427 void
  428 tcp_pcap_drain(struct mbufq *queue)
  429 {
  430         struct mbuf *m;
  431         while ((m = mbufq_dequeue(queue)))
  432                 tcp_pcap_m_freem(m);
  433 }
  434 
  435 void
  436 tcp_pcap_tcpcb_init(struct tcpcb *tp)
  437 {
  438         mbufq_init(&(tp->t_inpkts), V_tcp_pcap_packets);
  439         mbufq_init(&(tp->t_outpkts), V_tcp_pcap_packets);
  440 }
  441 
  442 void
  443 tcp_pcap_set_sock_max(struct mbufq *queue, int newval)
  444 {
  445         queue->mq_maxlen = newval;
  446         while (queue->mq_len > queue->mq_maxlen)
  447                 tcp_pcap_m_freem(mbufq_dequeue(queue));
  448 }
  449 
  450 int
  451 tcp_pcap_get_sock_max(struct mbufq *queue)
  452 {
  453         return queue->mq_maxlen;
  454 }

Cache object: 7d977ab5e964e05c88a46d18713e6bcf


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.