The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/tcp_log/tcp_log_dev.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2016-2017 Netflix, Inc.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  *
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include <sys/param.h>
   33 #include <sys/conf.h>
   34 #include <sys/fcntl.h>
   35 #include <sys/filio.h>
   36 #include <sys/kernel.h>
   37 #include <sys/lock.h>
   38 #include <sys/malloc.h>
   39 #include <sys/module.h>
   40 #include <sys/poll.h>
   41 #include <sys/queue.h>
   42 #include <sys/refcount.h>
   43 #include <sys/mutex.h>
   44 #include <sys/selinfo.h>
   45 #include <sys/socket.h>
   46 #include <sys/socketvar.h>
   47 #include <sys/sysctl.h>
   48 #include <sys/tree.h>
   49 #include <sys/uio.h>
   50 #include <machine/atomic.h>
   51 #include <sys/counter.h>
   52 
   53 #include <dev/tcp_log/tcp_log_dev.h>
   54 
   55 #ifdef TCPLOG_DEBUG_COUNTERS
   56 extern counter_u64_t tcp_log_que_read;
   57 extern counter_u64_t tcp_log_que_freed;
   58 #endif
   59 
   60 static struct cdev *tcp_log_dev;
   61 static struct selinfo tcp_log_sel;
   62 
   63 static struct log_queueh tcp_log_dev_queue_head = STAILQ_HEAD_INITIALIZER(tcp_log_dev_queue_head);
   64 static struct log_infoh tcp_log_dev_reader_head = STAILQ_HEAD_INITIALIZER(tcp_log_dev_reader_head);
   65 
   66 MALLOC_DEFINE(M_TCPLOGDEV, "tcp_log_dev", "TCP log device data structures");
   67 
   68 static int      tcp_log_dev_listeners = 0;
   69 
   70 static struct mtx tcp_log_dev_queue_lock;
   71 
   72 #define TCP_LOG_DEV_QUEUE_LOCK()        mtx_lock(&tcp_log_dev_queue_lock)
   73 #define TCP_LOG_DEV_QUEUE_UNLOCK()      mtx_unlock(&tcp_log_dev_queue_lock)
   74 #define TCP_LOG_DEV_QUEUE_LOCK_ASSERT() mtx_assert(&tcp_log_dev_queue_lock, MA_OWNED)
   75 #define TCP_LOG_DEV_QUEUE_UNLOCK_ASSERT() mtx_assert(&tcp_log_dev_queue_lock, MA_NOTOWNED)
   76 #define TCP_LOG_DEV_QUEUE_REF(tldq)     refcount_acquire(&((tldq)->tldq_refcnt))
   77 #define TCP_LOG_DEV_QUEUE_UNREF(tldq)   refcount_release(&((tldq)->tldq_refcnt))
   78 
   79 static void     tcp_log_dev_clear_refcount(struct tcp_log_dev_queue *entry);
   80 static void     tcp_log_dev_clear_cdevpriv(void *data);
   81 static int      tcp_log_dev_open(struct cdev *dev __unused, int flags,
   82     int devtype __unused, struct thread *td __unused);
   83 static int      tcp_log_dev_write(struct cdev *dev __unused,
   84     struct uio *uio __unused, int flags __unused);
   85 static int      tcp_log_dev_read(struct cdev *dev __unused, struct uio *uio,
   86     int flags __unused);
   87 static int      tcp_log_dev_ioctl(struct cdev *dev __unused, u_long cmd,
   88     caddr_t data, int fflag __unused, struct thread *td __unused);
   89 static int      tcp_log_dev_poll(struct cdev *dev __unused, int events,
   90     struct thread *td);
   91 
   92 enum tcp_log_dev_queue_lock_state {
   93         QUEUE_UNLOCKED = 0,
   94         QUEUE_LOCKED,
   95 };
   96 
   97 static struct cdevsw tcp_log_cdevsw = {
   98         .d_version =    D_VERSION,
   99         .d_read =       tcp_log_dev_read,
  100         .d_open =       tcp_log_dev_open,
  101         .d_write =      tcp_log_dev_write,
  102         .d_poll =       tcp_log_dev_poll,
  103         .d_ioctl =      tcp_log_dev_ioctl,
  104 #ifdef NOTYET
  105         .d_mmap =       tcp_log_dev_mmap,
  106 #endif
  107         .d_name =       "tcp_log",
  108 };
  109 
  110 static __inline void
  111 tcp_log_dev_queue_validate_lock(int lockstate)
  112 {
  113 
  114 #ifdef INVARIANTS
  115         switch (lockstate) {
  116         case QUEUE_LOCKED:
  117                 TCP_LOG_DEV_QUEUE_LOCK_ASSERT();
  118                 break;
  119         case QUEUE_UNLOCKED:
  120                 TCP_LOG_DEV_QUEUE_UNLOCK_ASSERT();
  121                 break;
  122         default:
  123                 kassert_panic("%s:%d: unknown queue lock state", __func__,
  124                     __LINE__);
  125         }
  126 #endif
  127 }
  128 
  129 /*
  130  * Clear the refcount. If appropriate, it will remove the entry from the
  131  * queue and call the destructor.
  132  *
  133  * This must be called with the queue lock held.
  134  */
  135 static void
  136 tcp_log_dev_clear_refcount(struct tcp_log_dev_queue *entry)
  137 {
  138 
  139         KASSERT(entry != NULL, ("%s: called with NULL entry", __func__));
  140 
  141         TCP_LOG_DEV_QUEUE_LOCK_ASSERT();
  142 
  143         if (TCP_LOG_DEV_QUEUE_UNREF(entry)) {
  144 #ifdef TCPLOG_DEBUG_COUNTERS
  145                 counter_u64_add(tcp_log_que_freed, 1);
  146 #endif
  147                 /* Remove the entry from the queue and call the destructor. */
  148                 STAILQ_REMOVE(&tcp_log_dev_queue_head, entry, tcp_log_dev_queue,
  149                     tldq_queue);
  150                 (*entry->tldq_dtor)(entry);
  151         }
  152 }
  153 
  154 static void
  155 tcp_log_dev_clear_cdevpriv(void *data)
  156 {
  157         struct tcp_log_dev_info *priv;
  158         struct tcp_log_dev_queue *entry, *entry_tmp;
  159 
  160         priv = (struct tcp_log_dev_info *)data;
  161         if (priv == NULL)
  162                 return;
  163 
  164         /*
  165          * Lock the queue and drop our references. We hold references to all
  166          * the entries starting with tldi_head (or, if tldi_head == NULL, all
  167          * entries in the queue).
  168          * 
  169          * Because we don't want anyone adding addition things to the queue
  170          * while we are doing this, we lock the queue.
  171          */
  172         TCP_LOG_DEV_QUEUE_LOCK();
  173         if (priv->tldi_head != NULL) {
  174                 entry = priv->tldi_head;
  175                 STAILQ_FOREACH_FROM_SAFE(entry, &tcp_log_dev_queue_head,
  176                     tldq_queue, entry_tmp) {
  177                         tcp_log_dev_clear_refcount(entry);
  178                 }
  179         }
  180         tcp_log_dev_listeners--;
  181         KASSERT(tcp_log_dev_listeners >= 0,
  182             ("%s: tcp_log_dev_listeners is unexpectedly negative", __func__));
  183         STAILQ_REMOVE(&tcp_log_dev_reader_head, priv, tcp_log_dev_info,
  184             tldi_list);
  185         TCP_LOG_DEV_QUEUE_LOCK_ASSERT();
  186         TCP_LOG_DEV_QUEUE_UNLOCK();
  187         free(priv, M_TCPLOGDEV);
  188 }
  189 
  190 static int
  191 tcp_log_dev_open(struct cdev *dev __unused, int flags, int devtype __unused,
  192     struct thread *td __unused)
  193 {
  194         struct tcp_log_dev_info *priv;
  195         struct tcp_log_dev_queue *entry;
  196         int rv;
  197 
  198         /*
  199          * Ideally, we shouldn't see these because of file system
  200          * permissions.
  201          */
  202         if (flags & (FWRITE | FEXEC | FAPPEND | O_TRUNC))
  203                 return (ENODEV);
  204 
  205         /* Allocate space to hold information about where we are. */
  206         priv = malloc(sizeof(struct tcp_log_dev_info), M_TCPLOGDEV,
  207             M_ZERO | M_WAITOK);
  208 
  209         /* Stash the private data away. */
  210         rv = devfs_set_cdevpriv((void *)priv, tcp_log_dev_clear_cdevpriv);
  211         if (!rv) {
  212                 /*
  213                  * Increase the listener count, add this reader to the list, and
  214                  * take references on all current queues.
  215                  */
  216                 TCP_LOG_DEV_QUEUE_LOCK();
  217                 tcp_log_dev_listeners++;
  218                 STAILQ_INSERT_HEAD(&tcp_log_dev_reader_head, priv, tldi_list);
  219                 priv->tldi_head = STAILQ_FIRST(&tcp_log_dev_queue_head);
  220                 if (priv->tldi_head != NULL)
  221                         priv->tldi_cur = priv->tldi_head->tldq_buf;
  222                 STAILQ_FOREACH(entry, &tcp_log_dev_queue_head, tldq_queue)
  223                         TCP_LOG_DEV_QUEUE_REF(entry);
  224                 TCP_LOG_DEV_QUEUE_UNLOCK();
  225         } else {
  226                 /* Free the entry. */
  227                 free(priv, M_TCPLOGDEV);
  228         }
  229         return (rv);
  230 }
  231 
  232 static int
  233 tcp_log_dev_write(struct cdev *dev __unused, struct uio *uio __unused,
  234     int flags __unused)
  235 {
  236 
  237         return (ENODEV);
  238 }
  239 
  240 static __inline void
  241 tcp_log_dev_rotate_bufs(struct tcp_log_dev_info *priv, int *lockstate)
  242 {
  243         struct tcp_log_dev_queue *entry;
  244 
  245         KASSERT(priv->tldi_head != NULL,
  246             ("%s:%d: priv->tldi_head unexpectedly NULL",
  247             __func__, __LINE__));
  248         KASSERT(priv->tldi_head->tldq_buf == priv->tldi_cur,
  249             ("%s:%d: buffer mismatch (%p vs %p)",
  250             __func__, __LINE__, priv->tldi_head->tldq_buf,
  251             priv->tldi_cur));
  252         tcp_log_dev_queue_validate_lock(*lockstate);
  253 
  254         if (*lockstate == QUEUE_UNLOCKED) {
  255                 TCP_LOG_DEV_QUEUE_LOCK();
  256                 *lockstate = QUEUE_LOCKED;
  257         }
  258         entry = priv->tldi_head;
  259         priv->tldi_head = STAILQ_NEXT(entry, tldq_queue);
  260         tcp_log_dev_clear_refcount(entry);
  261         priv->tldi_cur = NULL;
  262 }
  263 
  264 static int
  265 tcp_log_dev_read(struct cdev *dev __unused, struct uio *uio, int flags)
  266 {
  267         struct tcp_log_common_header *buf;
  268         struct tcp_log_dev_info *priv;
  269         struct tcp_log_dev_queue *entry;
  270         ssize_t len;
  271         int lockstate, rv;
  272 
  273         /* Get our private info. */
  274         rv = devfs_get_cdevpriv((void **)&priv);
  275         if (rv)
  276                 return (rv);
  277 
  278         lockstate = QUEUE_UNLOCKED;
  279 
  280         /* Do we need to get a new buffer? */
  281         while (priv->tldi_cur == NULL ||
  282             priv->tldi_cur->tlch_length <= priv->tldi_off) {
  283                 /* Did we somehow forget to rotate? */
  284                 KASSERT(priv->tldi_cur == NULL,
  285                     ("%s:%d: tldi_cur is unexpectedly non-NULL", __func__,
  286                     __LINE__));
  287                 if (priv->tldi_cur != NULL)
  288                         tcp_log_dev_rotate_bufs(priv, &lockstate);
  289 
  290                 /*
  291                  * Before we start looking at tldi_head, we need a lock on the
  292                  * queue to make sure tldi_head stays stable.
  293                  */
  294                 if (lockstate == QUEUE_UNLOCKED) {
  295                         TCP_LOG_DEV_QUEUE_LOCK();
  296                         lockstate = QUEUE_LOCKED;
  297                 }
  298 
  299                 /* We need the next buffer. Do we have one? */
  300                 if (priv->tldi_head == NULL && (flags & FNONBLOCK)) {
  301                         rv = EAGAIN;
  302                         goto done;
  303                 }
  304                 if (priv->tldi_head == NULL) {
  305                         /* Sleep and wait for more things we can read. */
  306                         rv = mtx_sleep(&tcp_log_dev_listeners,
  307                             &tcp_log_dev_queue_lock, PCATCH, "tcplogdev", 0);
  308                         if (rv)
  309                                 goto done;
  310                         if (priv->tldi_head == NULL)
  311                                 continue;
  312                 }
  313 
  314                 /*
  315                  * We have an entry to read. We want to try to create a
  316                  * buffer, if one doesn't already exist.
  317                  */
  318                 entry = priv->tldi_head;
  319                 if (entry->tldq_buf == NULL) {
  320                         TCP_LOG_DEV_QUEUE_LOCK_ASSERT();
  321                         buf = (*entry->tldq_xform)(entry);
  322                         if (buf == NULL) {
  323                                 rv = EBUSY;
  324                                 goto done;
  325                         }
  326                         entry->tldq_buf = buf;
  327                 }
  328 
  329                 priv->tldi_cur = entry->tldq_buf;
  330                 priv->tldi_off = 0;
  331         }
  332 
  333         /* Copy what we can from this buffer to the output buffer. */
  334         if (uio->uio_resid > 0) {
  335                 /* Drop locks so we can take page faults. */
  336                 if (lockstate == QUEUE_LOCKED)
  337                         TCP_LOG_DEV_QUEUE_UNLOCK();
  338                 lockstate = QUEUE_UNLOCKED;
  339 
  340                 KASSERT(priv->tldi_cur != NULL,
  341                     ("%s: priv->tldi_cur is unexpectedly NULL", __func__));
  342 
  343                 /* Copy as much as we can to this uio. */
  344                 len = priv->tldi_cur->tlch_length - priv->tldi_off;
  345                 if (len > uio->uio_resid)
  346                         len = uio->uio_resid;
  347                 rv = uiomove(((uint8_t *)priv->tldi_cur) + priv->tldi_off,
  348                     len, uio);
  349                 if (rv != 0)
  350                         goto done;
  351                 priv->tldi_off += len;
  352 #ifdef TCPLOG_DEBUG_COUNTERS
  353                 counter_u64_add(tcp_log_que_read, len);
  354 #endif
  355         }
  356         /* Are we done with this buffer? If so, find the next one. */
  357         if (priv->tldi_off >= priv->tldi_cur->tlch_length) {
  358                 KASSERT(priv->tldi_off == priv->tldi_cur->tlch_length,
  359                     ("%s: offset (%ju) exceeds length (%ju)", __func__,
  360                     (uintmax_t)priv->tldi_off,
  361                     (uintmax_t)priv->tldi_cur->tlch_length));
  362                 tcp_log_dev_rotate_bufs(priv, &lockstate);
  363         }
  364 done:
  365         tcp_log_dev_queue_validate_lock(lockstate);
  366         if (lockstate == QUEUE_LOCKED)
  367                 TCP_LOG_DEV_QUEUE_UNLOCK();
  368         return (rv);
  369 }
  370 
  371 static int
  372 tcp_log_dev_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data,
  373     int fflag __unused, struct thread *td __unused)
  374 {
  375         struct tcp_log_dev_info *priv;
  376         int rv;
  377 
  378         /* Get our private info. */
  379         rv = devfs_get_cdevpriv((void **)&priv);
  380         if (rv)
  381                 return (rv);
  382 
  383         /*
  384          * Set things. Here, we are most concerned about the non-blocking I/O
  385          * flag.
  386          */
  387         rv = 0;
  388         switch (cmd) {
  389         case FIONBIO:
  390                 break;
  391         case FIOASYNC:
  392                 if (*(int *)data != 0)
  393                         rv = EINVAL;
  394                 break;
  395         default:
  396                 rv = ENOIOCTL;
  397         }
  398         return (rv);
  399 }
  400 
  401 static int
  402 tcp_log_dev_poll(struct cdev *dev __unused, int events, struct thread *td)
  403 {
  404         struct tcp_log_dev_info *priv;
  405         int revents;
  406 
  407         /*
  408          * Get our private info. If this fails, claim that all events are
  409          * ready. That should prod the user to do something that will
  410          * make the error evident to them.
  411          */
  412         if (devfs_get_cdevpriv((void **)&priv))
  413                 return (events);
  414 
  415         revents = 0;
  416         if (events & (POLLIN | POLLRDNORM)) {
  417                 /*
  418                  * We can (probably) read right now if we are partway through
  419                  * a buffer or if we are just about to start a buffer.
  420                  * Because we are going to read tldi_head, we should acquire
  421                  * a read lock on the queue.
  422                  */
  423                 TCP_LOG_DEV_QUEUE_LOCK();
  424                 if ((priv->tldi_head != NULL && priv->tldi_cur == NULL) ||
  425                     (priv->tldi_cur != NULL &&
  426                     priv->tldi_off < priv->tldi_cur->tlch_length))
  427                         revents = events & (POLLIN | POLLRDNORM);
  428                 else
  429                         selrecord(td, &tcp_log_sel);
  430                 TCP_LOG_DEV_QUEUE_UNLOCK();
  431         } else {
  432                 /*
  433                  * It only makes sense to poll for reading. So, again, prod the
  434                  * user to do something that will make the error of their ways
  435                  * apparent.
  436                  */
  437                 revents = events;
  438         }
  439         return (revents);
  440 }
  441 
  442 int
  443 tcp_log_dev_add_log(struct tcp_log_dev_queue *entry)
  444 {
  445         struct tcp_log_dev_info *priv;
  446         int rv;
  447         bool wakeup_needed;
  448 
  449         KASSERT(entry->tldq_buf != NULL || entry->tldq_xform != NULL,
  450             ("%s: Called with both tldq_buf and tldq_xform set to NULL",
  451             __func__));
  452         KASSERT(entry->tldq_dtor != NULL,
  453             ("%s: Called with tldq_dtor set to NULL", __func__));
  454 
  455         /* Get a lock on the queue. */
  456         TCP_LOG_DEV_QUEUE_LOCK();
  457 
  458         /* If no one is listening, tell the caller to free the resources. */
  459         if (tcp_log_dev_listeners == 0) {
  460                 rv = ENXIO;
  461                 goto done;
  462         }
  463 
  464         /* Add this to the end of the tailq. */
  465         STAILQ_INSERT_TAIL(&tcp_log_dev_queue_head, entry, tldq_queue);
  466 
  467         /* Add references for all current listeners. */
  468         refcount_init(&entry->tldq_refcnt, tcp_log_dev_listeners);
  469 
  470         /*
  471          * If any listener is currently stuck on NULL, that means they are
  472          * waiting. Point their head to this new entry.
  473          */
  474         wakeup_needed = false;
  475         STAILQ_FOREACH(priv, &tcp_log_dev_reader_head, tldi_list)
  476                 if (priv->tldi_head == NULL) {
  477                         priv->tldi_head = entry;
  478                         wakeup_needed = true;
  479                 }
  480 
  481         if (wakeup_needed) {
  482                 selwakeup(&tcp_log_sel);
  483                 wakeup(&tcp_log_dev_listeners);
  484         }
  485 
  486         rv = 0;
  487 
  488 done:
  489         TCP_LOG_DEV_QUEUE_LOCK_ASSERT();
  490         TCP_LOG_DEV_QUEUE_UNLOCK();
  491         return (rv);
  492 }
  493 
  494 static int
  495 tcp_log_dev_modevent(module_t mod __unused, int type, void *data __unused)
  496 {
  497 
  498         /* TODO: Support intelligent unloading. */
  499         switch (type) {
  500         case MOD_LOAD:
  501                 if (bootverbose)
  502                         printf("tcp_log: tcp_log device\n");
  503                 memset(&tcp_log_sel, 0, sizeof(tcp_log_sel));
  504                 memset(&tcp_log_dev_queue_lock, 0, sizeof(struct mtx));
  505                 mtx_init(&tcp_log_dev_queue_lock, "tcp_log dev",
  506                          "tcp_log device queues", MTX_DEF);
  507                 tcp_log_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD,
  508                     &tcp_log_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0400,
  509                     "tcp_log");
  510                 break;
  511         default:
  512                 return (EOPNOTSUPP);
  513         }
  514 
  515         return (0);
  516 }
  517 
  518 DEV_MODULE(tcp_log_dev, tcp_log_dev_modevent, NULL);
  519 MODULE_VERSION(tcp_log_dev, 1);

Cache object: 4edaf346b39e9d58d8cb22b4fce51a7c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.