bpf.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: bpf.c,v 1.249 2022/11/30 06:02:37 ozaki-r Exp $        */
    2 
    3 /*
    4  * Copyright (c) 1990, 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * This code is derived from the Stanford/CMU enet packet filter,
    8  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
    9  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
   10  * Berkeley Laboratory.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      @(#)bpf.c       8.4 (Berkeley) 1/9/95
   37  * static char rcsid[] =
   38  * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
   39  */
   40 
   41 #include <sys/cdefs.h>
   42 __KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.249 2022/11/30 06:02:37 ozaki-r Exp $");
   43 
   44 #if defined(_KERNEL_OPT)
   45 #include "opt_bpf.h"
   46 #include "sl.h"
   47 #include "opt_net_mpsafe.h"
   48 #endif
   49 
   50 #include <sys/param.h>
   51 #include <sys/systm.h>
   52 #include <sys/mbuf.h>
   53 #include <sys/buf.h>
   54 #include <sys/time.h>
   55 #include <sys/proc.h>
   56 #include <sys/ioctl.h>
   57 #include <sys/conf.h>
   58 #include <sys/vnode.h>
   59 #include <sys/queue.h>
   60 #include <sys/stat.h>
   61 #include <sys/module.h>
   62 #include <sys/atomic.h>
   63 #include <sys/cpu.h>
   64 
   65 #include <sys/file.h>
   66 #include <sys/filedesc.h>
   67 #include <sys/tty.h>
   68 #include <sys/uio.h>
   69 
   70 #include <sys/protosw.h>
   71 #include <sys/socket.h>
   72 #include <sys/errno.h>
   73 #include <sys/kernel.h>
   74 #include <sys/poll.h>
   75 #include <sys/sysctl.h>
   76 #include <sys/kauth.h>
   77 #include <sys/syslog.h>
   78 #include <sys/percpu.h>
   79 #include <sys/pserialize.h>
   80 #include <sys/lwp.h>
   81 #include <sys/xcall.h>
   82 
   83 #include <net/if.h>
   84 #include <net/slip.h>
   85 
   86 #include <net/bpf.h>
   87 #include <net/bpfdesc.h>
   88 #include <net/bpfjit.h>
   89 
   90 #include <net/if_arc.h>
   91 #include <net/if_ether.h>
   92 #include <net/if_types.h>
   93 
   94 #include <netinet/in.h>
   95 #include <netinet/if_inarp.h>
   96 
   97 
   98 #include <compat/sys/sockio.h>
   99 
  100 #ifndef BPF_BUFSIZE
  101 /*
  102  * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
  103  * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
  104  */
  105 # define BPF_BUFSIZE 32768
  106 #endif
  107 
  108 #define PRINET  26                      /* interruptible */
  109 
  110 /*
  111  * The default read buffer size, and limit for BIOCSBLEN, is sysctl'able.
  112  * XXX the default values should be computed dynamically based
  113  * on available memory size and available mbuf clusters.
  114  */
  115 static int bpf_bufsize = BPF_BUFSIZE;
  116 static int bpf_maxbufsize = BPF_DFLTBUFSIZE;    /* XXX set dynamically, see above */
  117 static bool bpf_jit = false;
  118 
  119 struct bpfjit_ops bpfjit_module_ops = {
  120         .bj_generate_code = NULL,
  121         .bj_free_code = NULL
  122 };
  123 
  124 /*
  125  * Global BPF statistics returned by net.bpf.stats sysctl.
  126  */
  127 static struct percpu    *bpf_gstats_percpu; /* struct bpf_stat */
  128 
  129 #define BPF_STATINC(id)                                 \
  130         {                                               \
  131                 struct bpf_stat *__stats =              \
  132                     percpu_getref(bpf_gstats_percpu);   \
  133                 __stats->bs_##id++;                     \
  134                 percpu_putref(bpf_gstats_percpu);       \
  135         }
  136 
  137 /*
  138  * Locking notes:
  139  * - bpf_mtx (adaptive mutex) protects:
  140  *   - Gobal lists: bpf_iflist and bpf_dlist
  141  *   - struct bpf_if
  142  *   - bpf_close
  143  *   - bpf_psz (pserialize)
  144  * - struct bpf_d has two mutexes:
  145  *   - bd_buf_mtx (spin mutex) protects the buffers that can be accessed
  146  *     on packet tapping
  147  *   - bd_mtx (adaptive mutex) protects member variables other than the buffers
  148  * - Locking order: bpf_mtx => bpf_d#bd_mtx => bpf_d#bd_buf_mtx
  149  * - struct bpf_d obtained via fp->f_bpf in bpf_read and bpf_write is
  150  *   never freed because struct bpf_d is only freed in bpf_close and
  151  *   bpf_close never be called while executing bpf_read and bpf_write
  152  * - A filter that is assigned to bpf_d can be replaced with another filter
  153  *   while tapping packets, so it needs to be done atomically
  154  * - struct bpf_d is iterated on bpf_dlist with psz
  155  * - struct bpf_if is iterated on bpf_iflist with psz or psref
  156  */
  157 /*
  158  * Use a mutex to avoid a race condition between gathering the stats/peers
  159  * and opening/closing the device.
  160  */
  161 static kmutex_t bpf_mtx;
  162 
  163 static struct psref_class       *bpf_psref_class __read_mostly;
  164 static pserialize_t             bpf_psz;
  165 
  166 static inline void
  167 bpf_if_acquire(struct bpf_if *bp, struct psref *psref)
  168 {
  169 
  170         psref_acquire(psref, &bp->bif_psref, bpf_psref_class);
  171 }
  172 
  173 static inline void
  174 bpf_if_release(struct bpf_if *bp, struct psref *psref)
  175 {
  176 
  177         psref_release(psref, &bp->bif_psref, bpf_psref_class);
  178 }
  179 
  180 /*
  181  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
  182  *  bpf_dtab holds the descriptors, indexed by minor device #
  183  */
  184 static struct pslist_head bpf_iflist;
  185 static struct pslist_head bpf_dlist;
  186 
  187 /* Macros for bpf_d on bpf_dlist */
  188 #define BPF_DLIST_WRITER_INSERT_HEAD(__d)                               \
  189         PSLIST_WRITER_INSERT_HEAD(&bpf_dlist, (__d), bd_bpf_dlist_entry)
  190 #define BPF_DLIST_READER_FOREACH(__d)                                   \
  191         PSLIST_READER_FOREACH((__d), &bpf_dlist, struct bpf_d,          \
  192                               bd_bpf_dlist_entry)
  193 #define BPF_DLIST_WRITER_FOREACH(__d)                                   \
  194         PSLIST_WRITER_FOREACH((__d), &bpf_dlist, struct bpf_d,          \
  195                               bd_bpf_dlist_entry)
  196 #define BPF_DLIST_ENTRY_INIT(__d)                                       \
  197         PSLIST_ENTRY_INIT((__d), bd_bpf_dlist_entry)
  198 #define BPF_DLIST_WRITER_REMOVE(__d)                                    \
  199         PSLIST_WRITER_REMOVE((__d), bd_bpf_dlist_entry)
  200 #define BPF_DLIST_ENTRY_DESTROY(__d)                                    \
  201         PSLIST_ENTRY_DESTROY((__d), bd_bpf_dlist_entry)
  202 
  203 /* Macros for bpf_if on bpf_iflist */
  204 #define BPF_IFLIST_WRITER_INSERT_HEAD(__bp)                             \
  205         PSLIST_WRITER_INSERT_HEAD(&bpf_iflist, (__bp), bif_iflist_entry)
  206 #define BPF_IFLIST_READER_FOREACH(__bp)                                 \
  207         PSLIST_READER_FOREACH((__bp), &bpf_iflist, struct bpf_if,       \
  208                               bif_iflist_entry)
  209 #define BPF_IFLIST_WRITER_FOREACH(__bp)                                 \
  210         PSLIST_WRITER_FOREACH((__bp), &bpf_iflist, struct bpf_if,       \
  211                               bif_iflist_entry)
  212 #define BPF_IFLIST_WRITER_REMOVE(__bp)                                  \
  213         PSLIST_WRITER_REMOVE((__bp), bif_iflist_entry)
  214 #define BPF_IFLIST_ENTRY_INIT(__bp)                                     \
  215         PSLIST_ENTRY_INIT((__bp), bif_iflist_entry)
  216 #define BPF_IFLIST_ENTRY_DESTROY(__bp)                                  \
  217         PSLIST_ENTRY_DESTROY((__bp), bif_iflist_entry)
  218 
  219 /* Macros for bpf_d on bpf_if#bif_dlist_pslist */
  220 #define BPFIF_DLIST_READER_FOREACH(__d, __bp)                           \
  221         PSLIST_READER_FOREACH((__d), &(__bp)->bif_dlist_head, struct bpf_d, \
  222                               bd_bif_dlist_entry)
  223 #define BPFIF_DLIST_WRITER_INSERT_HEAD(__bp, __d)                       \
  224         PSLIST_WRITER_INSERT_HEAD(&(__bp)->bif_dlist_head, (__d),       \
  225                                   bd_bif_dlist_entry)
  226 #define BPFIF_DLIST_WRITER_REMOVE(__d)                                  \
  227         PSLIST_WRITER_REMOVE((__d), bd_bif_dlist_entry)
  228 #define BPFIF_DLIST_ENTRY_INIT(__d)                                     \
  229         PSLIST_ENTRY_INIT((__d), bd_bif_dlist_entry)
  230 #define BPFIF_DLIST_READER_EMPTY(__bp)                                  \
  231         (PSLIST_READER_FIRST(&(__bp)->bif_dlist_head, struct bpf_d,     \
  232                              bd_bif_dlist_entry) == NULL)
  233 #define BPFIF_DLIST_WRITER_EMPTY(__bp)                                  \
  234         (PSLIST_WRITER_FIRST(&(__bp)->bif_dlist_head, struct bpf_d,     \
  235                              bd_bif_dlist_entry) == NULL)
  236 #define BPFIF_DLIST_ENTRY_DESTROY(__d)                                  \
  237         PSLIST_ENTRY_DESTROY((__d), bd_bif_dlist_entry)
  238 
  239 static int      bpf_allocbufs(struct bpf_d *);
  240 static u_int    bpf_xfilter(struct bpf_filter **, void *, u_int, u_int);
  241 static void     bpf_deliver(struct bpf_if *,
  242                             void *(*cpfn)(void *, const void *, size_t),
  243                             void *, u_int, u_int, const u_int);
  244 static void     bpf_freed(struct bpf_d *);
  245 static void     bpf_free_filter(struct bpf_filter *);
  246 static void     bpf_ifname(struct ifnet *, struct ifreq *);
  247 static void     *bpf_mcpy(void *, const void *, size_t);
  248 static int      bpf_movein(struct ifnet *, struct uio *, int, uint64_t,
  249                                 struct mbuf **, struct sockaddr *,
  250                                 struct bpf_filter **);
  251 static void     bpf_attachd(struct bpf_d *, struct bpf_if *);
  252 static void     bpf_detachd(struct bpf_d *);
  253 static int      bpf_setif(struct bpf_d *, struct ifreq *);
  254 static int      bpf_setf(struct bpf_d *, struct bpf_program *, u_long);
  255 static void     bpf_timed_out(void *);
  256 static inline void
  257                 bpf_wakeup(struct bpf_d *);
  258 static int      bpf_hdrlen(struct bpf_d *);
  259 static void     catchpacket(struct bpf_d *, u_char *, u_int, u_int,
  260     void *(*)(void *, const void *, size_t), struct timespec *);
  261 static void     reset_d(struct bpf_d *);
  262 static int      bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
  263 static int      bpf_setdlt(struct bpf_d *, u_int);
  264 
  265 static int      bpf_read(struct file *, off_t *, struct uio *, kauth_cred_t,
  266     int);
  267 static int      bpf_write(struct file *, off_t *, struct uio *, kauth_cred_t,
  268     int);
  269 static int      bpf_ioctl(struct file *, u_long, void *);
  270 static int      bpf_poll(struct file *, int);
  271 static int      bpf_stat(struct file *, struct stat *);
  272 static int      bpf_close(struct file *);
  273 static int      bpf_kqfilter(struct file *, struct knote *);
  274 
  275 static const struct fileops bpf_fileops = {
  276         .fo_name = "bpf",
  277         .fo_read = bpf_read,
  278         .fo_write = bpf_write,
  279         .fo_ioctl = bpf_ioctl,
  280         .fo_fcntl = fnullop_fcntl,
  281         .fo_poll = bpf_poll,
  282         .fo_stat = bpf_stat,
  283         .fo_close = bpf_close,
  284         .fo_kqfilter = bpf_kqfilter,
  285         .fo_restart = fnullop_restart,
  286 };
  287 
  288 dev_type_open(bpfopen);
  289 
  290 const struct cdevsw bpf_cdevsw = {
  291         .d_open = bpfopen,
  292         .d_close = noclose,
  293         .d_read = noread,
  294         .d_write = nowrite,
  295         .d_ioctl = noioctl,
  296         .d_stop = nostop,
  297         .d_tty = notty,
  298         .d_poll = nopoll,
  299         .d_mmap = nommap,
  300         .d_kqfilter = nokqfilter,
  301         .d_discard = nodiscard,
  302         .d_flag = D_OTHER | D_MPSAFE
  303 };
  304 
  305 bpfjit_func_t
  306 bpf_jit_generate(bpf_ctx_t *bc, void *code, size_t size)
  307 {
  308         struct bpfjit_ops *ops = &bpfjit_module_ops;
  309         bpfjit_func_t (*generate_code)(const bpf_ctx_t *,
  310             const struct bpf_insn *, size_t);
  311 
  312         generate_code = atomic_load_acquire(&ops->bj_generate_code);
  313         if (generate_code != NULL) {
  314                 return generate_code(bc, code, size);
  315         }
  316         return NULL;
  317 }
  318 
  319 void
  320 bpf_jit_freecode(bpfjit_func_t jcode)
  321 {
  322         KASSERT(bpfjit_module_ops.bj_free_code != NULL);
  323         bpfjit_module_ops.bj_free_code(jcode);
  324 }
  325 
  326 static int
  327 bpf_movein(struct ifnet *ifp, struct uio *uio, int linktype, uint64_t mtu, struct mbuf **mp,
  328            struct sockaddr *sockp, struct bpf_filter **wfilter)
  329 {
  330         struct mbuf *m, *m0, *n;
  331         int error;
  332         size_t len;
  333         size_t hlen;
  334         size_t align;
  335         u_int slen;
  336 
  337         /*
  338          * Build a sockaddr based on the data link layer type.
  339          * We do this at this level because the ethernet header
  340          * is copied directly into the data field of the sockaddr.
  341          * In the case of SLIP, there is no header and the packet
  342          * is forwarded as is.
  343          * Also, we are careful to leave room at the front of the mbuf
  344          * for the link level header.
  345          */
  346         switch (linktype) {
  347 
  348         case DLT_SLIP:
  349                 sockp->sa_family = AF_INET;
  350                 hlen = 0;
  351                 align = 0;
  352                 break;
  353 
  354         case DLT_PPP:
  355                 sockp->sa_family = AF_UNSPEC;
  356                 hlen = 0;
  357                 align = 0;
  358                 break;
  359 
  360         case DLT_EN10MB:
  361                 sockp->sa_family = AF_UNSPEC;
  362                 /* XXX Would MAXLINKHDR be better? */
  363                 /* 6(dst)+6(src)+2(type) */
  364                 hlen = sizeof(struct ether_header);
  365                 align = 2;
  366                 break;
  367 
  368         case DLT_ARCNET:
  369                 sockp->sa_family = AF_UNSPEC;
  370                 hlen = ARC_HDRLEN;
  371                 align = 5;
  372                 break;
  373 
  374         case DLT_FDDI:
  375                 sockp->sa_family = AF_LINK;
  376                 /* XXX 4(FORMAC)+6(dst)+6(src) */
  377                 hlen = 16;
  378                 align = 0;
  379                 break;
  380 
  381         case DLT_ECONET:
  382                 sockp->sa_family = AF_UNSPEC;
  383                 hlen = 6;
  384                 align = 2;
  385                 break;
  386 
  387         case DLT_NULL:
  388                 sockp->sa_family = AF_UNSPEC;
  389                 if (ifp->if_type == IFT_LOOP) {
  390                         /* Set here to apply the following validations */
  391                         hlen = sizeof(uint32_t);
  392                 } else
  393                         hlen = 0;
  394                 align = 0;
  395                 break;
  396 
  397         default:
  398                 return (EIO);
  399         }
  400 
  401         len = uio->uio_resid;
  402         /*
  403          * If there aren't enough bytes for a link level header or the
  404          * packet length exceeds the interface mtu, return an error.
  405          */
  406         if (len - hlen > mtu)
  407                 return (EMSGSIZE);
  408 
  409         m0 = m = m_gethdr(M_WAIT, MT_DATA);
  410         m_reset_rcvif(m);
  411         m->m_pkthdr.len = (int)(len - hlen);
  412         if (len + align > MHLEN) {
  413                 m_clget(m, M_WAIT);
  414                 if ((m->m_flags & M_EXT) == 0) {
  415                         error = ENOBUFS;
  416                         goto bad;
  417                 }
  418         }
  419 
  420         /* Insure the data is properly aligned */
  421         if (align > 0)
  422                 m->m_data += align;
  423 
  424         for (;;) {
  425                 len = M_TRAILINGSPACE(m);
  426                 if (len > uio->uio_resid)
  427                         len = uio->uio_resid;
  428                 error = uiomove(mtod(m, void *), len, uio);
  429                 if (error)
  430                         goto bad;
  431                 m->m_len = len;
  432 
  433                 if (uio->uio_resid == 0)
  434                         break;
  435 
  436                 n = m_get(M_WAIT, MT_DATA);
  437                 m_clget(n, M_WAIT);     /* if fails, there is no problem */
  438                 m->m_next = n;
  439                 m = n;
  440         }
  441 
  442         slen = bpf_xfilter(wfilter, mtod(m, u_char *), len, len);
  443         if (slen == 0) {
  444                 error = EPERM;
  445                 goto bad;
  446         }
  447 
  448         if (hlen != 0) {
  449                 if (linktype == DLT_NULL && ifp->if_type == IFT_LOOP) {
  450                         uint32_t af;
  451                         /* the link header indicates the address family */
  452                         memcpy(&af, mtod(m0, void *), sizeof(af));
  453                         sockp->sa_family = af;
  454                 } else {
  455                         /* move link level header in the top of mbuf to sa_data */
  456                         memcpy(sockp->sa_data, mtod(m0, void *), hlen);
  457                 }
  458                 m0->m_data += hlen;
  459                 m0->m_len -= hlen;
  460         }
  461 
  462         *mp = m0;
  463         return (0);
  464 
  465 bad:
  466         m_freem(m0);
  467         return (error);
  468 }
  469 
  470 /*
  471  * Attach file to the bpf interface, i.e. make d listen on bp.
  472  */
  473 static void
  474 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
  475 {
  476         struct bpf_event_tracker *t;
  477 
  478         KASSERT(mutex_owned(&bpf_mtx));
  479         KASSERT(mutex_owned(d->bd_mtx));
  480         /*
  481          * Point d at bp, and add d to the interface's list of listeners.
  482          * Finally, point the driver's bpf cookie at the interface so
  483          * it will divert packets to bpf.
  484          */
  485         d->bd_bif = bp;
  486         BPFIF_DLIST_WRITER_INSERT_HEAD(bp, d);
  487 
  488         *bp->bif_driverp = bp;
  489 
  490         SLIST_FOREACH(t, &bp->bif_trackers, bet_entries) {
  491                 t->bet_notify(bp, bp->bif_ifp, bp->bif_dlt,
  492                     BPF_TRACK_EVENT_ATTACH);
  493         }
  494 }
  495 
  496 /*
  497  * Detach a file from its interface.
  498  */
  499 static void
  500 bpf_detachd(struct bpf_d *d)
  501 {
  502         struct bpf_if *bp;
  503         struct bpf_event_tracker *t;
  504 
  505         KASSERT(mutex_owned(&bpf_mtx));
  506         KASSERT(mutex_owned(d->bd_mtx));
  507 
  508         bp = d->bd_bif;
  509         /*
  510          * Check if this descriptor had requested promiscuous mode.
  511          * If so, turn it off.
  512          */
  513         if (d->bd_promisc) {
  514                 int error __diagused;
  515 
  516                 d->bd_promisc = 0;
  517                 /*
  518                  * Take device out of promiscuous mode.  Since we were
  519                  * able to enter promiscuous mode, we should be able
  520                  * to turn it off.  But we can get an error if
  521                  * the interface was configured down, so only panic
  522                  * if we don't get an unexpected error.
  523                  */
  524                 KERNEL_LOCK_UNLESS_NET_MPSAFE();
  525                 error = ifpromisc(bp->bif_ifp, 0);
  526                 KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
  527 #ifdef DIAGNOSTIC
  528                 if (error)
  529                         printf("%s: ifpromisc failed: %d", __func__, error);
  530 #endif
  531         }
  532 
  533         /* Remove d from the interface's descriptor list. */
  534         BPFIF_DLIST_WRITER_REMOVE(d);
  535 
  536         pserialize_perform(bpf_psz);
  537 
  538         if (BPFIF_DLIST_WRITER_EMPTY(bp)) {
  539                 /*
  540                  * Let the driver know that there are no more listeners.
  541                  */
  542                 *d->bd_bif->bif_driverp = NULL;
  543         }
  544 
  545         d->bd_bif = NULL;
  546 
  547         SLIST_FOREACH(t, &bp->bif_trackers, bet_entries) {
  548                 t->bet_notify(bp, bp->bif_ifp, bp->bif_dlt,
  549                     BPF_TRACK_EVENT_DETACH);
  550         }
  551 }
  552 
  553 static void
  554 bpf_init(void)
  555 {
  556 
  557         mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE);
  558         bpf_psz = pserialize_create();
  559         bpf_psref_class = psref_class_create("bpf", IPL_SOFTNET);
  560 
  561         PSLIST_INIT(&bpf_iflist);
  562         PSLIST_INIT(&bpf_dlist);
  563 
  564         bpf_gstats_percpu = percpu_alloc(sizeof(struct bpf_stat));
  565 
  566         return;
  567 }
  568 
  569 /*
  570  * bpfilterattach() is called at boot time.  We don't need to do anything
  571  * here, since any initialization will happen as part of module init code.
  572  */
  573 /* ARGSUSED */
  574 void
  575 bpfilterattach(int n)
  576 {
  577 
  578 }
  579 
  580 /*
  581  * Open ethernet device. Clones.
  582  */
  583 /* ARGSUSED */
  584 int
  585 bpfopen(dev_t dev, int flag, int mode, struct lwp *l)
  586 {
  587         struct bpf_d *d;
  588         struct file *fp;
  589         int error, fd;
  590 
  591         /* falloc() will fill in the descriptor for us. */
  592         if ((error = fd_allocfile(&fp, &fd)) != 0)
  593                 return error;
  594 
  595         d = kmem_zalloc(sizeof(*d), KM_SLEEP);
  596         d->bd_bufsize = bpf_bufsize;
  597         d->bd_direction = BPF_D_INOUT;
  598         d->bd_feedback = 0;
  599         d->bd_pid = l->l_proc->p_pid;
  600 #ifdef _LP64
  601         if (curproc->p_flag & PK_32)
  602                 d->bd_compat32 = 1;
  603 #endif
  604         getnanotime(&d->bd_btime);
  605         d->bd_atime = d->bd_mtime = d->bd_btime;
  606         callout_init(&d->bd_callout, CALLOUT_MPSAFE);
  607         selinit(&d->bd_sel);
  608         d->bd_jitcode = NULL;
  609         d->bd_rfilter = NULL;
  610         d->bd_wfilter = NULL;
  611         d->bd_locked = 0;
  612         BPF_DLIST_ENTRY_INIT(d);
  613         BPFIF_DLIST_ENTRY_INIT(d);
  614         d->bd_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
  615         d->bd_buf_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
  616         cv_init(&d->bd_cv, "bpf");
  617 
  618         mutex_enter(&bpf_mtx);
  619         BPF_DLIST_WRITER_INSERT_HEAD(d);
  620         mutex_exit(&bpf_mtx);
  621 
  622         return fd_clone(fp, fd, flag, &bpf_fileops, d);
  623 }
  624 
  625 /*
  626  * Close the descriptor by detaching it from its interface,
  627  * deallocating its buffers, and marking it free.
  628  */
  629 /* ARGSUSED */
  630 static int
  631 bpf_close(struct file *fp)
  632 {
  633         struct bpf_d *d;
  634 
  635         mutex_enter(&bpf_mtx);
  636 
  637         if ((d = fp->f_bpf) == NULL) {
  638                 mutex_exit(&bpf_mtx);
  639                 return 0;
  640         }
  641 
  642         /*
  643          * Refresh the PID associated with this bpf file.
  644          */
  645         d->bd_pid = curproc->p_pid;
  646 
  647         mutex_enter(d->bd_mtx);
  648         if (d->bd_state == BPF_WAITING)
  649                 callout_halt(&d->bd_callout, d->bd_mtx);
  650         d->bd_state = BPF_IDLE;
  651         if (d->bd_bif)
  652                 bpf_detachd(d);
  653         mutex_exit(d->bd_mtx);
  654 
  655         BPF_DLIST_WRITER_REMOVE(d);
  656 
  657         pserialize_perform(bpf_psz);
  658         mutex_exit(&bpf_mtx);
  659 
  660         BPFIF_DLIST_ENTRY_DESTROY(d);
  661         BPF_DLIST_ENTRY_DESTROY(d);
  662         fp->f_bpf = NULL;
  663         bpf_freed(d);
  664         callout_destroy(&d->bd_callout);
  665         seldestroy(&d->bd_sel);
  666         mutex_obj_free(d->bd_mtx);
  667         mutex_obj_free(d->bd_buf_mtx);
  668         cv_destroy(&d->bd_cv);
  669 
  670         kmem_free(d, sizeof(*d));
  671 
  672         return (0);
  673 }
  674 
  675 /*
  676  * Rotate the packet buffers in descriptor d.  Move the store buffer
  677  * into the hold slot, and the free buffer into the store slot.
  678  * Zero the length of the new store buffer.
  679  */
  680 #define ROTATE_BUFFERS(d) \
  681         (d)->bd_hbuf = (d)->bd_sbuf; \
  682         (d)->bd_hlen = (d)->bd_slen; \
  683         (d)->bd_sbuf = (d)->bd_fbuf; \
  684         (d)->bd_slen = 0; \
  685         (d)->bd_fbuf = NULL;
  686 /*
  687  *  bpfread - read next chunk of packets from buffers
  688  */
  689 static int
  690 bpf_read(struct file *fp, off_t *offp, struct uio *uio,
  691     kauth_cred_t cred, int flags)
  692 {
  693         struct bpf_d *d = fp->f_bpf;
  694         int timed_out;
  695         int error;
  696 
  697         /*
  698          * Refresh the PID associated with this bpf file.
  699          */
  700         d->bd_pid = curproc->p_pid;
  701 
  702         getnanotime(&d->bd_atime);
  703         /*
  704          * Restrict application to use a buffer the same size as
  705          * the kernel buffers.
  706          */
  707         if (uio->uio_resid != d->bd_bufsize)
  708                 return (EINVAL);
  709 
  710         mutex_enter(d->bd_mtx);
  711         if (d->bd_state == BPF_WAITING)
  712                 callout_halt(&d->bd_callout, d->bd_mtx);
  713         timed_out = (d->bd_state == BPF_TIMED_OUT);
  714         d->bd_state = BPF_IDLE;
  715         mutex_exit(d->bd_mtx);
  716         /*
  717          * If the hold buffer is empty, then do a timed sleep, which
  718          * ends when the timeout expires or when enough packets
  719          * have arrived to fill the store buffer.
  720          */
  721         mutex_enter(d->bd_buf_mtx);
  722         while (d->bd_hbuf == NULL) {
  723                 if (fp->f_flag & FNONBLOCK) {
  724                         if (d->bd_slen == 0) {
  725                                 error = EWOULDBLOCK;
  726                                 goto out;
  727                         }
  728                         ROTATE_BUFFERS(d);
  729                         break;
  730                 }
  731 
  732                 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
  733                         /*
  734                          * A packet(s) either arrived since the previous
  735                          * read or arrived while we were asleep.
  736                          * Rotate the buffers and return what's here.
  737                          */
  738                         ROTATE_BUFFERS(d);
  739                         break;
  740                 }
  741 
  742                 error = cv_timedwait_sig(&d->bd_cv, d->bd_buf_mtx, d->bd_rtout);
  743 
  744                 if (error == EINTR || error == ERESTART)
  745                         goto out;
  746 
  747                 if (error == EWOULDBLOCK) {
  748                         /*
  749                          * On a timeout, return what's in the buffer,
  750                          * which may be nothing.  If there is something
  751                          * in the store buffer, we can rotate the buffers.
  752                          */
  753                         if (d->bd_hbuf)
  754                                 /*
  755                                  * We filled up the buffer in between
  756                                  * getting the timeout and arriving
  757                                  * here, so we don't need to rotate.
  758                                  */
  759                                 break;
  760 
  761                         if (d->bd_slen == 0) {
  762                                 error = 0;
  763                                 goto out;
  764                         }
  765                         ROTATE_BUFFERS(d);
  766                         break;
  767                 }
  768                 if (error != 0)
  769                         goto out;
  770         }
  771         /*
  772          * At this point, we know we have something in the hold slot.
  773          */
  774         mutex_exit(d->bd_buf_mtx);
  775 
  776         /*
  777          * Move data from hold buffer into user space.
  778          * We know the entire buffer is transferred since
  779          * we checked above that the read buffer is bpf_bufsize bytes.
  780          */
  781         error = uiomove(d->bd_hbuf, d->bd_hlen, uio);
  782 
  783         mutex_enter(d->bd_buf_mtx);
  784         d->bd_fbuf = d->bd_hbuf;
  785         d->bd_hbuf = NULL;
  786         d->bd_hlen = 0;
  787 out:
  788         mutex_exit(d->bd_buf_mtx);
  789         return (error);
  790 }
  791 
  792 
  793 /*
  794  * If there are processes sleeping on this descriptor, wake them up.
  795  */
  796 static inline void
  797 bpf_wakeup(struct bpf_d *d)
  798 {
  799 
  800         mutex_enter(d->bd_buf_mtx);
  801         cv_broadcast(&d->bd_cv);
  802         mutex_exit(d->bd_buf_mtx);
  803 
  804         if (d->bd_async)
  805                 fownsignal(d->bd_pgid, SIGIO, 0, 0, NULL);
  806         selnotify(&d->bd_sel, 0, 0);
  807 }
  808 
  809 static void
  810 bpf_timed_out(void *arg)
  811 {
  812         struct bpf_d *d = arg;
  813 
  814         mutex_enter(d->bd_mtx);
  815         if (d->bd_state == BPF_WAITING) {
  816                 d->bd_state = BPF_TIMED_OUT;
  817                 if (d->bd_slen != 0)
  818                         bpf_wakeup(d);
  819         }
  820         mutex_exit(d->bd_mtx);
  821 }
  822 
  823 
  824 static int
  825 bpf_write(struct file *fp, off_t *offp, struct uio *uio,
  826     kauth_cred_t cred, int flags)
  827 {
  828         struct bpf_d *d = fp->f_bpf;
  829         struct bpf_if *bp;
  830         struct ifnet *ifp;
  831         struct mbuf *m, *mc;
  832         int error;
  833         static struct sockaddr_storage dst;
  834         struct psref psref;
  835         int bound;
  836 
  837         /*
  838          * Refresh the PID associated with this bpf file.
  839          */
  840         d->bd_pid = curproc->p_pid;
  841 
  842         m = NULL;       /* XXX gcc */
  843 
  844         bound = curlwp_bind();
  845         mutex_enter(d->bd_mtx);
  846         bp = d->bd_bif;
  847         if (bp == NULL) {
  848                 mutex_exit(d->bd_mtx);
  849                 error = ENXIO;
  850                 goto out_bindx;
  851         }
  852         bpf_if_acquire(bp, &psref);
  853         mutex_exit(d->bd_mtx);
  854 
  855         getnanotime(&d->bd_mtime);
  856 
  857         ifp = bp->bif_ifp;
  858         if (if_is_deactivated(ifp)) {
  859                 error = ENXIO;
  860                 goto out;
  861         }
  862 
  863         if (uio->uio_resid == 0) {
  864                 error = 0;
  865                 goto out;
  866         }
  867 
  868         error = bpf_movein(ifp, uio, (int)bp->bif_dlt, ifp->if_mtu, &m,
  869                 (struct sockaddr *) &dst, &d->bd_wfilter);
  870         if (error)
  871                 goto out;
  872 
  873         if (m->m_pkthdr.len > ifp->if_mtu) {
  874                 m_freem(m);
  875                 error = EMSGSIZE;
  876                 goto out;
  877         }
  878 
  879         if (d->bd_hdrcmplt)
  880                 dst.ss_family = pseudo_AF_HDRCMPLT;
  881 
  882         if (d->bd_feedback) {
  883                 mc = m_dup(m, 0, M_COPYALL, M_NOWAIT);
  884                 if (mc != NULL)
  885                         m_set_rcvif(mc, ifp);
  886                 /* Set M_PROMISC for outgoing packets to be discarded. */
  887                 if (1 /*d->bd_direction == BPF_D_INOUT*/)
  888                         m->m_flags |= M_PROMISC;
  889         } else  
  890                 mc = NULL;
  891 
  892         error = if_output_lock(ifp, ifp, m, (struct sockaddr *) &dst, NULL);
  893 
  894         if (mc != NULL) {
  895                 if (error == 0) {
  896                         int s = splsoftnet();
  897                         KERNEL_LOCK_UNLESS_IFP_MPSAFE(ifp);
  898                         ifp->_if_input(ifp, mc);
  899                         KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(ifp);
  900                         splx(s);
  901                 } else
  902                         m_freem(mc);
  903         }
  904         /*
  905          * The driver frees the mbuf.
  906          */
  907 out:
  908         bpf_if_release(bp, &psref);
  909 out_bindx:
  910         curlwp_bindx(bound);
  911         return error;
  912 }
  913 
  914 /*
  915  * Reset a descriptor by flushing its packet buffer and clearing the
  916  * receive and drop counts.
  917  */
  918 static void
  919 reset_d(struct bpf_d *d)
  920 {
  921 
  922         KASSERT(mutex_owned(d->bd_mtx));
  923 
  924         mutex_enter(d->bd_buf_mtx);
  925         if (d->bd_hbuf) {
  926                 /* Free the hold buffer. */
  927                 d->bd_fbuf = d->bd_hbuf;
  928                 d->bd_hbuf = NULL;
  929         }
  930         d->bd_slen = 0;
  931         d->bd_hlen = 0;
  932         d->bd_rcount = 0;
  933         d->bd_dcount = 0;
  934         d->bd_ccount = 0;
  935         mutex_exit(d->bd_buf_mtx);
  936 }
  937 
  938 /*
  939  *  FIONREAD            Check for read packet available.
  940  *  BIOCGBLEN           Get buffer len [for read()].
  941  *  BIOCSETF            Set ethernet read filter.
  942  *  BIOCFLUSH           Flush read packet buffer.
  943  *  BIOCPROMISC         Put interface into promiscuous mode.
  944  *  BIOCGDLT            Get link layer type.
  945  *  BIOCGETIF           Get interface name.
  946  *  BIOCSETIF           Set interface.
  947  *  BIOCSRTIMEOUT       Set read timeout.
  948  *  BIOCGRTIMEOUT       Get read timeout.
  949  *  BIOCGSTATS          Get packet stats.
  950  *  BIOCIMMEDIATE       Set immediate mode.
  951  *  BIOCVERSION         Get filter language version.
  952  *  BIOCGHDRCMPLT       Get "header already complete" flag.
  953  *  BIOCSHDRCMPLT       Set "header already complete" flag.
  954  *  BIOCSFEEDBACK       Set packet feedback mode.
  955  *  BIOCGFEEDBACK       Get packet feedback mode.
  956  *  BIOCGDIRECTION      Get packet direction flag
  957  *  BIOCSDIRECTION      Set packet direction flag
  958  */
  959 /* ARGSUSED */
  960 static int
  961 bpf_ioctl(struct file *fp, u_long cmd, void *addr)
  962 {
  963         struct bpf_d *d = fp->f_bpf;
  964         int error = 0;
  965 
  966         /*
  967          * Refresh the PID associated with this bpf file.
  968          */
  969         d->bd_pid = curproc->p_pid;
  970 #ifdef _LP64
  971         if (curproc->p_flag & PK_32)
  972                 d->bd_compat32 = 1;
  973         else
  974                 d->bd_compat32 = 0;
  975 #endif
  976 
  977         mutex_enter(d->bd_mtx);
  978         if (d->bd_state == BPF_WAITING)
  979                 callout_halt(&d->bd_callout, d->bd_mtx);
  980         d->bd_state = BPF_IDLE;
  981         mutex_exit(d->bd_mtx);
  982 
  983         if (d->bd_locked) {
  984                 switch (cmd) {
  985                 case BIOCGBLEN:         /* FALLTHROUGH */
  986                 case BIOCFLUSH:         /* FALLTHROUGH */
  987                 case BIOCGDLT:          /* FALLTHROUGH */
  988                 case BIOCGDLTLIST:      /* FALLTHROUGH */
  989                 case BIOCGETIF:         /* FALLTHROUGH */
  990                 case BIOCGRTIMEOUT:     /* FALLTHROUGH */
  991                 case BIOCGSTATS:        /* FALLTHROUGH */
  992                 case BIOCVERSION:       /* FALLTHROUGH */
  993                 case BIOCGHDRCMPLT:     /* FALLTHROUGH */
  994                 case FIONREAD:          /* FALLTHROUGH */
  995                 case BIOCLOCK:          /* FALLTHROUGH */
  996                 case BIOCSRTIMEOUT:     /* FALLTHROUGH */
  997                 case BIOCIMMEDIATE:     /* FALLTHROUGH */
  998                 case TIOCGPGRP:
  999                         break;
 1000                 default:
 1001                         return EPERM;
 1002                 }
 1003         }
 1004 
 1005         switch (cmd) {
 1006 
 1007         default:
 1008                 error = EINVAL;
 1009                 break;
 1010 
 1011         /*
 1012          * Check for read packet available.
 1013          */
 1014         case FIONREAD:
 1015                 {
 1016                         int n;
 1017 
 1018                         mutex_enter(d->bd_buf_mtx);
 1019                         n = d->bd_slen;
 1020                         if (d->bd_hbuf)
 1021                                 n += d->bd_hlen;
 1022                         mutex_exit(d->bd_buf_mtx);
 1023 
 1024                         *(int *)addr = n;
 1025                         break;
 1026                 }
 1027 
 1028         /*
 1029          * Get buffer len [for read()].
 1030          */
 1031         case BIOCGBLEN:
 1032                 *(u_int *)addr = d->bd_bufsize;
 1033                 break;
 1034 
 1035         /*
 1036          * Set buffer length.
 1037          */
 1038         case BIOCSBLEN:
 1039                 /*
 1040                  * Forbid to change the buffer length if buffers are already
 1041                  * allocated.
 1042                  */
 1043                 mutex_enter(d->bd_mtx);
 1044                 mutex_enter(d->bd_buf_mtx);
 1045                 if (d->bd_bif != NULL || d->bd_sbuf != NULL)
 1046                         error = EINVAL;
 1047                 else {
 1048                         u_int size = *(u_int *)addr;
 1049 
 1050                         if (size > bpf_maxbufsize)
 1051                                 *(u_int *)addr = size = bpf_maxbufsize;
 1052                         else if (size < BPF_MINBUFSIZE)
 1053                                 *(u_int *)addr = size = BPF_MINBUFSIZE;
 1054                         d->bd_bufsize = size;
 1055                 }
 1056                 mutex_exit(d->bd_buf_mtx);
 1057                 mutex_exit(d->bd_mtx);
 1058                 break;
 1059 
 1060         /*
 1061          * Set link layer read filter.
 1062          */
 1063         case BIOCSETF:          /* FALLTHROUGH */
 1064         case BIOCSETWF:
 1065                 error = bpf_setf(d, addr, cmd);
 1066                 break;
 1067 
 1068         case BIOCLOCK:
 1069                 d->bd_locked = 1;
 1070                 break;
 1071 
 1072         /*
 1073          * Flush read packet buffer.
 1074          */
 1075         case BIOCFLUSH:
 1076                 mutex_enter(d->bd_mtx);
 1077                 reset_d(d);
 1078                 mutex_exit(d->bd_mtx);
 1079                 break;
 1080 
 1081         /*
 1082          * Put interface into promiscuous mode.
 1083          */
 1084         case BIOCPROMISC:
 1085                 mutex_enter(d->bd_mtx);
 1086                 if (d->bd_bif == NULL) {
 1087                         mutex_exit(d->bd_mtx);
 1088                         /*
 1089                          * No interface attached yet.
 1090                          */
 1091                         error = EINVAL;
 1092                         break;
 1093                 }
 1094                 if (d->bd_promisc == 0) {
 1095                         KERNEL_LOCK_UNLESS_NET_MPSAFE();
 1096                         error = ifpromisc(d->bd_bif->bif_ifp, 1);
 1097                         KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
 1098                         if (error == 0)
 1099                                 d->bd_promisc = 1;
 1100                 }
 1101                 mutex_exit(d->bd_mtx);
 1102                 break;
 1103 
 1104         /*
 1105          * Get device parameters.
 1106          */
 1107         case BIOCGDLT:
 1108                 mutex_enter(d->bd_mtx);
 1109                 if (d->bd_bif == NULL)
 1110                         error = EINVAL;
 1111                 else
 1112                         *(u_int *)addr = d->bd_bif->bif_dlt;
 1113                 mutex_exit(d->bd_mtx);
 1114                 break;
 1115 
 1116         /*
 1117          * Get a list of supported device parameters.
 1118          */
 1119         case BIOCGDLTLIST:
 1120                 mutex_enter(d->bd_mtx);
 1121                 if (d->bd_bif == NULL)
 1122                         error = EINVAL;
 1123                 else
 1124                         error = bpf_getdltlist(d, addr);
 1125                 mutex_exit(d->bd_mtx);
 1126                 break;
 1127 
 1128         /*
 1129          * Set device parameters.
 1130          */
 1131         case BIOCSDLT:
 1132                 mutex_enter(&bpf_mtx);
 1133                 mutex_enter(d->bd_mtx);
 1134                 if (d->bd_bif == NULL)
 1135                         error = EINVAL;
 1136                 else
 1137                         error = bpf_setdlt(d, *(u_int *)addr);
 1138                 mutex_exit(d->bd_mtx);
 1139                 mutex_exit(&bpf_mtx);
 1140                 break;
 1141 
 1142         /*
 1143          * Set interface name.
 1144          */
 1145 #ifdef OBIOCGETIF
 1146         case OBIOCGETIF:
 1147 #endif
 1148         case BIOCGETIF:
 1149                 mutex_enter(d->bd_mtx);
 1150                 if (d->bd_bif == NULL)
 1151                         error = EINVAL;
 1152                 else
 1153                         bpf_ifname(d->bd_bif->bif_ifp, addr);
 1154                 mutex_exit(d->bd_mtx);
 1155                 break;
 1156 
 1157         /*
 1158          * Set interface.
 1159          */
 1160 #ifdef OBIOCSETIF
 1161         case OBIOCSETIF:
 1162 #endif
 1163         case BIOCSETIF:
 1164                 mutex_enter(&bpf_mtx);
 1165                 error = bpf_setif(d, addr);
 1166                 mutex_exit(&bpf_mtx);
 1167                 break;
 1168 
 1169         /*
 1170          * Set read timeout.
 1171          */
 1172         case BIOCSRTIMEOUT:
 1173                 {
 1174                         struct timeval *tv = addr;
 1175 
 1176                         /* Compute number of ticks. */
 1177                         if (tv->tv_sec < 0 ||
 1178                             tv->tv_usec < 0 || tv->tv_usec >= 1000000) {
 1179                                 error = EINVAL;
 1180                                 break;
 1181                         } else if (tv->tv_sec > INT_MAX/hz - 1) {
 1182                                 d->bd_rtout = INT_MAX;
 1183                         } else {
 1184                                 d->bd_rtout = tv->tv_sec * hz
 1185                                     + tv->tv_usec / tick;
 1186                         }
 1187                         if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
 1188                                 d->bd_rtout = 1;
 1189                         break;
 1190                 }
 1191 
 1192 #ifdef BIOCGORTIMEOUT
 1193         /*
 1194          * Get read timeout.
 1195          */
 1196         case BIOCGORTIMEOUT:
 1197                 {
 1198                         struct timeval50 *tv = addr;
 1199 
 1200                         tv->tv_sec = d->bd_rtout / hz;
 1201                         tv->tv_usec = (d->bd_rtout % hz) * tick;
 1202                         break;
 1203                 }
 1204 #endif
 1205 
 1206 #ifdef BIOCSORTIMEOUT
 1207         /*
 1208          * Set read timeout.
 1209          */
 1210         case BIOCSORTIMEOUT:
 1211                 {
 1212                         struct timeval50 *tv = addr;
 1213 
 1214                         /* Compute number of ticks. */
 1215                         if (tv->tv_sec < 0 ||
 1216                             tv->tv_usec < 0 || tv->tv_usec >= 1000000) {
 1217                                 error = EINVAL;
 1218                                 break;
 1219                         } else if (tv->tv_sec > INT_MAX/hz - 1) {
 1220                                 d->bd_rtout = INT_MAX;
 1221                         } else {
 1222                                 d->bd_rtout = tv->tv_sec * hz
 1223                                     + tv->tv_usec / tick;
 1224                         }
 1225                         if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
 1226                                 d->bd_rtout = 1;
 1227                         break;
 1228                 }
 1229 #endif
 1230 
 1231         /*
 1232          * Get read timeout.
 1233          */
 1234         case BIOCGRTIMEOUT:
 1235                 {
 1236                         struct timeval *tv = addr;
 1237 
 1238                         tv->tv_sec = d->bd_rtout / hz;
 1239                         tv->tv_usec = (d->bd_rtout % hz) * tick;
 1240                         break;
 1241                 }
 1242         /*
 1243          * Get packet stats.
 1244          */
 1245         case BIOCGSTATS:
 1246                 {
 1247                         struct bpf_stat *bs = addr;
 1248 
 1249                         bs->bs_recv = d->bd_rcount;
 1250                         bs->bs_drop = d->bd_dcount;
 1251                         bs->bs_capt = d->bd_ccount;
 1252                         break;
 1253                 }
 1254 
 1255         case BIOCGSTATSOLD:
 1256                 {
 1257                         struct bpf_stat_old *bs = addr;
 1258 
 1259                         bs->bs_recv = d->bd_rcount;
 1260                         bs->bs_drop = d->bd_dcount;
 1261                         break;
 1262                 }
 1263 
 1264         /*
 1265          * Set immediate mode.
 1266          */
 1267         case BIOCIMMEDIATE:
 1268                 d->bd_immediate = *(u_int *)addr;
 1269                 break;
 1270 
 1271         case BIOCVERSION:
 1272                 {
 1273                         struct bpf_version *bv = addr;
 1274 
 1275                         bv->bv_major = BPF_MAJOR_VERSION;
 1276                         bv->bv_minor = BPF_MINOR_VERSION;
 1277                         break;
 1278                 }
 1279 
 1280         case BIOCGHDRCMPLT:     /* get "header already complete" flag */
 1281                 *(u_int *)addr = d->bd_hdrcmplt;
 1282                 break;
 1283 
 1284         case BIOCSHDRCMPLT:     /* set "header already complete" flag */
 1285                 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
 1286                 break;
 1287 
 1288         /*
 1289          * Get packet direction flag
 1290          */
 1291         case BIOCGDIRECTION:
 1292                 *(u_int *)addr = d->bd_direction;
 1293                 break;
 1294 
 1295         /*
 1296          * Set packet direction flag
 1297          */
 1298         case BIOCSDIRECTION:
 1299                 {
 1300                         u_int   direction;
 1301 
 1302                         direction = *(u_int *)addr;
 1303                         switch (direction) {
 1304                         case BPF_D_IN:
 1305                         case BPF_D_INOUT:
 1306                         case BPF_D_OUT:
 1307                                 d->bd_direction = direction;
 1308                                 break;
 1309                         default:
 1310                                 error = EINVAL;
 1311                         }
 1312                 }
 1313                 break;
 1314 
 1315         /*
 1316          * Set "feed packets from bpf back to input" mode
 1317          */
 1318         case BIOCSFEEDBACK:
 1319                 d->bd_feedback = *(u_int *)addr;
 1320                 break;
 1321 
 1322         /*
 1323          * Get "feed packets from bpf back to input" mode
 1324          */
 1325         case BIOCGFEEDBACK:
 1326                 *(u_int *)addr = d->bd_feedback;
 1327                 break;
 1328 
 1329         case FIONBIO:           /* Non-blocking I/O */
 1330                 /*
 1331                  * No need to do anything special as we use IO_NDELAY in
 1332                  * bpfread() as an indication of whether or not to block
 1333                  * the read.
 1334                  */
 1335                 break;
 1336 
 1337         case FIOASYNC:          /* Send signal on receive packets */
 1338                 mutex_enter(d->bd_mtx);
 1339                 d->bd_async = *(int *)addr;
 1340                 mutex_exit(d->bd_mtx);
 1341                 break;
 1342 
 1343         case TIOCSPGRP:         /* Process or group to send signals to */
 1344         case FIOSETOWN:
 1345                 error = fsetown(&d->bd_pgid, cmd, addr);
 1346                 break;
 1347 
 1348         case TIOCGPGRP:
 1349         case FIOGETOWN:
 1350                 error = fgetown(d->bd_pgid, cmd, addr);
 1351                 break;
 1352         }
 1353         return (error);
 1354 }
 1355 
 1356 /*
 1357  * Set d's packet filter program to fp.  If this file already has a filter,
 1358  * free it and replace it.  Returns EINVAL for bogus requests.
 1359  */
 1360 static int
 1361 bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
 1362 {
 1363         struct bpf_insn *fcode;
 1364         bpfjit_func_t jcode;
 1365         size_t flen, size = 0;
 1366         struct bpf_filter *oldf, *newf, **storef;
 1367 
 1368         jcode = NULL;
 1369         flen = fp->bf_len;
 1370 
 1371         if ((fp->bf_insns == NULL && flen) || flen > BPF_MAXINSNS) {
 1372                 return EINVAL;
 1373         }
 1374 
 1375         if (flen) {
 1376                 /*
 1377                  * Allocate the buffer, copy the byte-code from
 1378                  * userspace and validate it.
 1379                  */
 1380                 size = flen * sizeof(*fp->bf_insns);
 1381                 fcode = kmem_alloc(size, KM_SLEEP);
 1382                 if (copyin(fp->bf_insns, fcode, size) != 0 ||
 1383                     !bpf_validate(fcode, (int)flen)) {
 1384                         kmem_free(fcode, size);
 1385                         return EINVAL;
 1386                 }
 1387                 if (bpf_jit)
 1388                         jcode = bpf_jit_generate(NULL, fcode, flen);
 1389         } else {
 1390                 fcode = NULL;
 1391         }
 1392 
 1393         newf = kmem_alloc(sizeof(*newf), KM_SLEEP);
 1394         newf->bf_insn = fcode;
 1395         newf->bf_size = size;
 1396         newf->bf_jitcode = jcode;
 1397         if (cmd == BIOCSETF)
 1398                 d->bd_jitcode = jcode; /* XXX just for kvm(3) users */
 1399 
 1400         /* Need to hold bpf_mtx for pserialize_perform */
 1401         mutex_enter(&bpf_mtx);
 1402         mutex_enter(d->bd_mtx);
 1403         if (cmd == BIOCSETWF) {
 1404                 oldf = d->bd_wfilter;
 1405                 storef = &d->bd_wfilter;
 1406         } else {
 1407                 oldf = d->bd_rfilter;
 1408                 storef = &d->bd_rfilter;
 1409         }
 1410         atomic_store_release(storef, newf);
 1411         reset_d(d);
 1412         pserialize_perform(bpf_psz);
 1413         mutex_exit(d->bd_mtx);
 1414         mutex_exit(&bpf_mtx);
 1415 
 1416         if (oldf != NULL)
 1417                 bpf_free_filter(oldf);
 1418 
 1419         return 0;
 1420 }
 1421 
 1422 /*
 1423  * Detach a file from its current interface (if attached at all) and attach
 1424  * to the interface indicated by the name stored in ifr.
 1425  * Return an errno or 0.
 1426  */
 1427 static int
 1428 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 1429 {
 1430         struct bpf_if *bp;
 1431         char *cp;
 1432         int unit_seen, i, error;
 1433 
 1434         KASSERT(mutex_owned(&bpf_mtx));
 1435         /*
 1436          * Make sure the provided name has a unit number, and default
 1437          * it to '' if not specified.
 1438          * XXX This is ugly ... do this differently?
 1439          */
 1440         unit_seen = 0;
 1441         cp = ifr->ifr_name;
 1442         cp[sizeof(ifr->ifr_name) - 1] = '\0';   /* sanity */
 1443         while (*cp++)
 1444                 if (*cp >= '' && *cp <= '9')
 1445                         unit_seen = 1;
 1446         if (!unit_seen) {
 1447                 /* Make sure to leave room for the '\0'. */
 1448                 for (i = 0; i < (IFNAMSIZ - 1); ++i) {
 1449                         if ((ifr->ifr_name[i] >= 'a' &&
 1450                              ifr->ifr_name[i] <= 'z') ||
 1451                             (ifr->ifr_name[i] >= 'A' &&
 1452                              ifr->ifr_name[i] <= 'Z'))
 1453                                 continue;
 1454                         ifr->ifr_name[i] = '';
 1455                 }
 1456         }
 1457 
 1458         /*
 1459          * Look through attached interfaces for the named one.
 1460          */
 1461         BPF_IFLIST_WRITER_FOREACH(bp) {
 1462                 struct ifnet *ifp = bp->bif_ifp;
 1463 
 1464                 if (ifp == NULL ||
 1465                     strcmp(ifp->if_xname, ifr->ifr_name) != 0)
 1466                         continue;
 1467                 /* skip additional entry */
 1468                 if (bp->bif_driverp != &ifp->if_bpf)
 1469                         continue;
 1470                 /*
 1471                  * We found the requested interface.
 1472                  * Allocate the packet buffers if we need to.
 1473                  * If we're already attached to requested interface,
 1474                  * just flush the buffer.
 1475                  */
 1476                 /*
 1477                  * bpf_allocbufs is called only here. bpf_mtx ensures that
 1478                  * no race condition happen on d->bd_sbuf.
 1479                  */
 1480                 if (d->bd_sbuf == NULL) {
 1481                         error = bpf_allocbufs(d);
 1482                         if (error != 0)
 1483                                 return (error);
 1484                 }
 1485                 mutex_enter(d->bd_mtx);
 1486                 if (bp != d->bd_bif) {
 1487                         if (d->bd_bif) {
 1488                                 /*
 1489                                  * Detach if attached to something else.
 1490                                  */
 1491                                 bpf_detachd(d);
 1492                                 BPFIF_DLIST_ENTRY_INIT(d);
 1493                         }
 1494 
 1495                         bpf_attachd(d, bp);
 1496                 }
 1497                 reset_d(d);
 1498                 mutex_exit(d->bd_mtx);
 1499                 return (0);
 1500         }
 1501         /* Not found. */
 1502         return (ENXIO);
 1503 }
 1504 
 1505 /*
 1506  * Copy the interface name to the ifreq.
 1507  */
 1508 static void
 1509 bpf_ifname(struct ifnet *ifp, struct ifreq *ifr)
 1510 {
 1511         memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
 1512 }
 1513 
 1514 static int
 1515 bpf_stat(struct file *fp, struct stat *st)
 1516 {
 1517         struct bpf_d *d = fp->f_bpf;
 1518 
 1519         (void)memset(st, 0, sizeof(*st));
 1520         mutex_enter(d->bd_mtx);
 1521         st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid);
 1522         st->st_atimespec = d->bd_atime;
 1523         st->st_mtimespec = d->bd_mtime;
 1524         st->st_ctimespec = st->st_birthtimespec = d->bd_btime;
 1525         st->st_uid = kauth_cred_geteuid(fp->f_cred);
 1526         st->st_gid = kauth_cred_getegid(fp->f_cred);
 1527         st->st_mode = S_IFCHR;
 1528         mutex_exit(d->bd_mtx);
 1529         return 0;
 1530 }
 1531 
 1532 /*
 1533  * Support for poll() system call
 1534  *
 1535  * Return true iff the specific operation will not block indefinitely - with
 1536  * the assumption that it is safe to positively acknowledge a request for the
 1537  * ability to write to the BPF device.
 1538  * Otherwise, return false but make a note that a selnotify() must be done.
 1539  */
 1540 static int
 1541 bpf_poll(struct file *fp, int events)
 1542 {
 1543         struct bpf_d *d = fp->f_bpf;
 1544         int revents;
 1545 
 1546         /*
 1547          * Refresh the PID associated with this bpf file.
 1548          */
 1549         mutex_enter(&bpf_mtx);
 1550         d->bd_pid = curproc->p_pid;
 1551 
 1552         revents = events & (POLLOUT | POLLWRNORM);
 1553         if (events & (POLLIN | POLLRDNORM)) {
 1554                 /*
 1555                  * An imitation of the FIONREAD ioctl code.
 1556                  */
 1557                 mutex_enter(d->bd_mtx);
 1558                 if (d->bd_hlen != 0 ||
 1559                     ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
 1560                      d->bd_slen != 0)) {
 1561                         revents |= events & (POLLIN | POLLRDNORM);
 1562                 } else {
 1563                         selrecord(curlwp, &d->bd_sel);
 1564                         /* Start the read timeout if necessary */
 1565                         if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
 1566                                 callout_reset(&d->bd_callout, d->bd_rtout,
 1567                                               bpf_timed_out, d);
 1568                                 d->bd_state = BPF_WAITING;
 1569                         }
 1570                 }
 1571                 mutex_exit(d->bd_mtx);
 1572         }
 1573 
 1574         mutex_exit(&bpf_mtx);
 1575         return (revents);
 1576 }
 1577 
 1578 static void
 1579 filt_bpfrdetach(struct knote *kn)
 1580 {
 1581         struct bpf_d *d = kn->kn_hook;
 1582 
 1583         mutex_enter(d->bd_buf_mtx);
 1584         selremove_knote(&d->bd_sel, kn);
 1585         mutex_exit(d->bd_buf_mtx);
 1586 }
 1587 
 1588 static int
 1589 filt_bpfread(struct knote *kn, long hint)
 1590 {
 1591         struct bpf_d *d = kn->kn_hook;
 1592         int rv;
 1593 
 1594         /*
 1595          * Refresh the PID associated with this bpf file.
 1596          */
 1597         d->bd_pid = curproc->p_pid;
 1598 
 1599         mutex_enter(d->bd_buf_mtx);
 1600         kn->kn_data = d->bd_hlen;
 1601         if (d->bd_immediate)
 1602                 kn->kn_data += d->bd_slen;
 1603         rv = (kn->kn_data > 0);
 1604         mutex_exit(d->bd_buf_mtx);
 1605         return rv;
 1606 }
 1607 
 1608 static const struct filterops bpfread_filtops = {
 1609         .f_flags = FILTEROP_ISFD,
 1610         .f_attach = NULL,
 1611         .f_detach = filt_bpfrdetach,
 1612         .f_event = filt_bpfread,
 1613 };
 1614 
 1615 static int
 1616 bpf_kqfilter(struct file *fp, struct knote *kn)
 1617 {
 1618         struct bpf_d *d = fp->f_bpf;
 1619 
 1620         switch (kn->kn_filter) {
 1621         case EVFILT_READ:
 1622                 kn->kn_fop = &bpfread_filtops;
 1623                 break;
 1624 
 1625         default:
 1626                 return (EINVAL);
 1627         }
 1628 
 1629         kn->kn_hook = d;
 1630 
 1631         mutex_enter(d->bd_buf_mtx);
 1632         selrecord_knote(&d->bd_sel, kn);
 1633         mutex_exit(d->bd_buf_mtx);
 1634 
 1635         return (0);
 1636 }
 1637 
 1638 /*
 1639  * Copy data from an mbuf chain into a buffer.  This code is derived
 1640  * from m_copydata in sys/uipc_mbuf.c.
 1641  */
 1642 static void *
 1643 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len)
 1644 {
 1645         const struct mbuf *m;
 1646         u_int count;
 1647         u_char *dst;
 1648 
 1649         m = src_arg;
 1650         dst = dst_arg;
 1651         while (len > 0) {
 1652                 if (m == NULL)
 1653                         panic("bpf_mcpy");
 1654                 count = uimin(m->m_len, len);
 1655                 memcpy(dst, mtod(m, const void *), count);
 1656                 m = m->m_next;
 1657                 dst += count;
 1658                 len -= count;
 1659         }
 1660         return dst_arg;
 1661 }
 1662 
 1663 static inline u_int
 1664 bpf_xfilter(struct bpf_filter **filter, void *pkt, u_int pktlen, u_int buflen)
 1665 {
 1666         struct bpf_filter *filt;
 1667         uint32_t mem[BPF_MEMWORDS];
 1668         bpf_args_t args = {
 1669                 .pkt = (const uint8_t *)pkt,
 1670                 .wirelen = pktlen,
 1671                 .buflen = buflen,
 1672                 .mem = mem,
 1673                 .arg = NULL
 1674         };
 1675         u_int slen;
 1676 
 1677         filt = atomic_load_consume(filter);
 1678         if (filt == NULL) /* No filter means accept all. */
 1679                 return (u_int)-1;
 1680 
 1681         if (filt->bf_jitcode != NULL)
 1682                 slen = filt->bf_jitcode(NULL, &args);
 1683         else
 1684                 slen = bpf_filter_ext(NULL, filt->bf_insn, &args);
 1685         return slen;
 1686 }
 1687 
 1688 /*
 1689  * Dispatch a packet to all the listeners on interface bp.
 1690  *
 1691  * pkt       pointer to the packet, either a data buffer or an mbuf chain
 1692  * buflen    buffer length, if pkt is a data buffer
 1693  * cpfn      a function that can copy pkt into the listener's buffer
 1694  * pktlen    length of the packet
 1695  * direction BPF_D_IN or BPF_D_OUT
 1696  */
 1697 static inline void
 1698 bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t),
 1699     void *pkt, u_int pktlen, u_int buflen, const u_int direction)
 1700 {
 1701         bool gottime = false;
 1702         struct timespec ts;
 1703         struct bpf_d *d;
 1704         int s;
 1705         u_int slen;
 1706 
 1707         KASSERT(!cpu_intr_p());
 1708 
 1709         /*
 1710          * Note that the IPL does not have to be raised at this point.
 1711          * The only problem that could arise here is that if two different
 1712          * interfaces shared any data.  This is not the case.
 1713          */
 1714         s = pserialize_read_enter();
 1715         BPFIF_DLIST_READER_FOREACH(d, bp) {
 1716                 if (direction == BPF_D_IN) {
 1717                         if (d->bd_direction == BPF_D_OUT)
 1718                                 continue;
 1719                 } else { /* BPF_D_OUT */
 1720                         if (d->bd_direction == BPF_D_IN)
 1721                                 continue;
 1722                 }
 1723 
 1724                 atomic_inc_ulong(&d->bd_rcount);
 1725                 BPF_STATINC(recv);
 1726 
 1727                 slen = bpf_xfilter(&d->bd_rfilter, pkt, pktlen, buflen);
 1728                 if (slen == 0)
 1729                         continue;
 1730 
 1731                 if (!gottime) {
 1732                         gottime = true;
 1733                         nanotime(&ts);
 1734                 }
 1735                 /* Assume catchpacket doesn't sleep */
 1736                 catchpacket(d, pkt, pktlen, slen, cpfn, &ts);
 1737         }
 1738         pserialize_read_exit(s);
 1739 }
 1740 
 1741 /*
 1742  * Incoming linkage from device drivers, when the head of the packet is in
 1743  * a buffer, and the tail is in an mbuf chain.
 1744  */
 1745 static void
 1746 _bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m,
 1747         u_int direction)
 1748 {
 1749         u_int pktlen;
 1750         struct mbuf mb;
 1751 
 1752         /* Skip outgoing duplicate packets. */
 1753         if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif_index == 0) {
 1754                 m->m_flags &= ~M_PROMISC;
 1755                 return;
 1756         }
 1757 
 1758         pktlen = m_length(m) + dlen;
 1759 
 1760         /*
 1761          * Craft on-stack mbuf suitable for passing to bpf_filter.
 1762          * Note that we cut corners here; we only setup what's
 1763          * absolutely needed--this mbuf should never go anywhere else.
 1764          */
 1765         (void)memset(&mb, 0, sizeof(mb));
 1766         mb.m_type = MT_DATA;
 1767         mb.m_next = m;
 1768         mb.m_data = data;
 1769         mb.m_len = dlen;
 1770 
 1771         bpf_deliver(bp, bpf_mcpy, &mb, pktlen, 0, direction);
 1772 }
 1773 
 1774 /*
 1775  * Incoming linkage from device drivers, when packet is in an mbuf chain.
 1776  */
 1777 static void
 1778 _bpf_mtap(struct bpf_if *bp, struct mbuf *m, u_int direction)
 1779 {
 1780         void *(*cpfn)(void *, const void *, size_t);
 1781         u_int pktlen, buflen;
 1782         void *marg;
 1783 
 1784         /* Skip outgoing duplicate packets. */
 1785         if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif_index == 0) {
 1786                 m->m_flags &= ~M_PROMISC;
 1787                 return;
 1788         }
 1789 
 1790         pktlen = m_length(m);
 1791 
 1792         /* Skip zero-sized packets. */
 1793         if (__predict_false(pktlen == 0)) {
 1794                 return;
 1795         }
 1796 
 1797         if (pktlen == m->m_len) {
 1798                 cpfn = (void *)memcpy;
 1799                 marg = mtod(m, void *);
 1800                 buflen = pktlen;
 1801                 KASSERT(buflen != 0);
 1802         } else {
 1803                 cpfn = bpf_mcpy;
 1804                 marg = m;
 1805                 buflen = 0;
 1806         }
 1807 
 1808         bpf_deliver(bp, cpfn, marg, pktlen, buflen, direction);
 1809 }
 1810 
 1811 /*
 1812  * We need to prepend the address family as
 1813  * a four byte field.  Cons up a dummy header
 1814  * to pacify bpf.  This is safe because bpf
 1815  * will only read from the mbuf (i.e., it won't
 1816  * try to free it or keep a pointer a to it).
 1817  */
 1818 static void
 1819 _bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m, u_int direction)
 1820 {
 1821         struct mbuf m0;
 1822 
 1823         m0.m_type = MT_DATA;
 1824         m0.m_flags = 0;
 1825         m0.m_next = m;
 1826         m0.m_nextpkt = NULL;
 1827         m0.m_owner = NULL;
 1828         m0.m_len = 4;
 1829         m0.m_data = (char *)&af;
 1830 
 1831         _bpf_mtap(bp, &m0, direction);
 1832 }
 1833 
 1834 /*
 1835  * Put the SLIP pseudo-"link header" in place.
 1836  * Note this M_PREPEND() should never fail,
 1837  * swince we know we always have enough space
 1838  * in the input buffer.
 1839  */
 1840 static void
 1841 _bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m)
 1842 {
 1843         u_char *hp;
 1844 
 1845         M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT);
 1846         if (*m == NULL)
 1847                 return;
 1848 
 1849         hp = mtod(*m, u_char *);
 1850         hp[SLX_DIR] = SLIPDIR_IN;
 1851         (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);
 1852 
 1853         _bpf_mtap(bp, *m, BPF_D_IN);
 1854 
 1855         m_adj(*m, SLIP_HDRLEN);
 1856 }
 1857 
 1858 /*
 1859  * Put the SLIP pseudo-"link header" in
 1860  * place.  The compressed header is now
 1861  * at the beginning of the mbuf.
 1862  */
 1863 static void
 1864 _bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m)
 1865 {
 1866         struct mbuf m0;
 1867         u_char *hp;
 1868 
 1869         m0.m_type = MT_DATA;
 1870         m0.m_flags = 0;
 1871         m0.m_next = m;
 1872         m0.m_nextpkt = NULL;
 1873         m0.m_owner = NULL;
 1874         m0.m_data = m0.m_dat;
 1875         m0.m_len = SLIP_HDRLEN;
 1876 
 1877         hp = mtod(&m0, u_char *);
 1878 
 1879         hp[SLX_DIR] = SLIPDIR_OUT;
 1880         (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);
 1881 
 1882         _bpf_mtap(bp, &m0, BPF_D_OUT);
 1883         m_freem(m);
 1884 }
 1885 
 1886 static struct mbuf *
 1887 bpf_mbuf_enqueue(struct bpf_if *bp, struct mbuf *m)
 1888 {
 1889         struct mbuf *dup;
 1890 
 1891         dup = m_dup(m, 0, M_COPYALL, M_NOWAIT);
 1892         if (dup == NULL)
 1893                 return NULL;
 1894 
 1895         if (bp->bif_mbuf_tail != NULL) {
 1896                 bp->bif_mbuf_tail->m_nextpkt = dup;
 1897         } else {
 1898                 bp->bif_mbuf_head = dup;
 1899         }
 1900         bp->bif_mbuf_tail = dup;
 1901 #ifdef BPF_MTAP_SOFTINT_DEBUG
 1902         log(LOG_DEBUG, "%s: enqueued mbuf=%p to %s\n",
 1903             __func__, dup, bp->bif_ifp->if_xname);
 1904 #endif
 1905 
 1906         return dup;
 1907 }
 1908 
 1909 static struct mbuf *
 1910 bpf_mbuf_dequeue(struct bpf_if *bp)
 1911 {
 1912         struct mbuf *m;
 1913         int s;
 1914 
 1915         /* XXX NOMPSAFE: assumed running on one CPU */
 1916         s = splnet();
 1917         m = bp->bif_mbuf_head;
 1918         if (m != NULL) {
 1919                 bp->bif_mbuf_head = m->m_nextpkt;
 1920                 m->m_nextpkt = NULL;
 1921 
 1922                 if (bp->bif_mbuf_head == NULL)
 1923                         bp->bif_mbuf_tail = NULL;
 1924 #ifdef BPF_MTAP_SOFTINT_DEBUG
 1925                 log(LOG_DEBUG, "%s: dequeued mbuf=%p from %s\n",
 1926                     __func__, m, bp->bif_ifp->if_xname);
 1927 #endif
 1928         }
 1929         splx(s);
 1930 
 1931         return m;
 1932 }
 1933 
 1934 static void
 1935 bpf_mtap_si(void *arg)
 1936 {
 1937         struct bpf_if *bp = arg;
 1938         struct mbuf *m;
 1939 
 1940         while ((m = bpf_mbuf_dequeue(bp)) != NULL) {
 1941 #ifdef BPF_MTAP_SOFTINT_DEBUG
 1942                 log(LOG_DEBUG, "%s: tapping mbuf=%p on %s\n",
 1943                     __func__, m, bp->bif_ifp->if_xname);
 1944 #endif
 1945                 bpf_ops->bpf_mtap(bp, m, BPF_D_IN);
 1946                 m_freem(m);
 1947         }
 1948 }
 1949 
 1950 static void
 1951 _bpf_mtap_softint(struct ifnet *ifp, struct mbuf *m)
 1952 {
 1953         struct bpf_if *bp = ifp->if_bpf;
 1954         struct mbuf *dup;
 1955 
 1956         KASSERT(cpu_intr_p());
 1957 
 1958         /* To avoid extra invocations of the softint */
 1959         if (BPFIF_DLIST_READER_EMPTY(bp))
 1960                 return;
 1961         KASSERT(bp->bif_si != NULL);
 1962 
 1963         dup = bpf_mbuf_enqueue(bp, m);
 1964         if (dup != NULL)
 1965                 softint_schedule(bp->bif_si);
 1966 }
 1967 
 1968 static int
 1969 bpf_hdrlen(struct bpf_d *d)
 1970 {
 1971         int hdrlen = d->bd_bif->bif_hdrlen;
 1972         /*
 1973          * Compute the length of the bpf header.  This is not necessarily
 1974          * equal to SIZEOF_BPF_HDR because we want to insert spacing such
 1975          * that the network layer header begins on a longword boundary (for
 1976          * performance reasons and to alleviate alignment restrictions).
 1977          */
 1978 #ifdef _LP64
 1979         if (d->bd_compat32)
 1980                 return (BPF_WORDALIGN32(hdrlen + SIZEOF_BPF_HDR32) - hdrlen);
 1981         else
 1982 #endif
 1983                 return (BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen);
 1984 }
 1985 
 1986 /*
 1987  * Move the packet data from interface memory (pkt) into the
 1988  * store buffer. Call the wakeup functions if it's time to wakeup
 1989  * a listener (buffer full), "cpfn" is the routine called to do the
 1990  * actual data transfer. memcpy is passed in to copy contiguous chunks,
 1991  * while bpf_mcpy is passed in to copy mbuf chains.  In the latter case,
 1992  * pkt is really an mbuf.
 1993  */
 1994 static void
 1995 catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
 1996     void *(*cpfn)(void *, const void *, size_t), struct timespec *ts)
 1997 {
 1998         char *h;
 1999         int totlen, curlen, caplen;
 2000         int hdrlen = bpf_hdrlen(d);
 2001         int do_wakeup = 0;
 2002 
 2003         atomic_inc_ulong(&d->bd_ccount);
 2004         BPF_STATINC(capt);
 2005         /*
 2006          * Figure out how many bytes to move.  If the packet is
 2007          * greater or equal to the snapshot length, transfer that
 2008          * much.  Otherwise, transfer the whole packet (unless
 2009          * we hit the buffer size limit).
 2010          */
 2011         totlen = hdrlen + uimin(snaplen, pktlen);
 2012         if (totlen > d->bd_bufsize)
 2013                 totlen = d->bd_bufsize;
 2014         /*
 2015          * If we adjusted totlen to fit the bufsize, it could be that
 2016          * totlen is smaller than hdrlen because of the link layer header.
 2017          */
 2018         caplen = totlen - hdrlen;
 2019         if (caplen < 0)
 2020                 caplen = 0;
 2021 
 2022         mutex_enter(d->bd_buf_mtx);
 2023         /*
 2024          * Round up the end of the previous packet to the next longword.
 2025          */
 2026 #ifdef _LP64
 2027         if (d->bd_compat32)
 2028                 curlen = BPF_WORDALIGN32(d->bd_slen);
 2029         else
 2030 #endif
 2031                 curlen = BPF_WORDALIGN(d->bd_slen);
 2032         if (curlen + totlen > d->bd_bufsize) {
 2033                 /*
 2034                  * This packet will overflow the storage buffer.
 2035                  * Rotate the buffers if we can, then wakeup any
 2036                  * pending reads.
 2037                  */
 2038                 if (d->bd_fbuf == NULL) {
 2039                         mutex_exit(d->bd_buf_mtx);
 2040                         /*
 2041                          * We haven't completed the previous read yet,
 2042                          * so drop the packet.
 2043                          */
 2044                         atomic_inc_ulong(&d->bd_dcount);
 2045                         BPF_STATINC(drop);
 2046                         return;
 2047                 }
 2048                 ROTATE_BUFFERS(d);
 2049                 do_wakeup = 1;
 2050                 curlen = 0;
 2051         } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
 2052                 /*
 2053                  * Immediate mode is set, or the read timeout has
 2054                  * already expired during a select call.  A packet
 2055                  * arrived, so the reader should be woken up.
 2056                  */
 2057                 do_wakeup = 1;
 2058         }
 2059 
 2060         /*
 2061          * Append the bpf header.
 2062          */
 2063         h = (char *)d->bd_sbuf + curlen;
 2064 #ifdef _LP64
 2065         if (d->bd_compat32) {
 2066                 struct bpf_hdr32 *hp32;
 2067 
 2068                 hp32 = (struct bpf_hdr32 *)h;
 2069                 hp32->bh_tstamp.tv_sec = ts->tv_sec;
 2070                 hp32->bh_tstamp.tv_usec = ts->tv_nsec / 1000;
 2071                 hp32->bh_datalen = pktlen;
 2072                 hp32->bh_hdrlen = hdrlen;
 2073                 hp32->bh_caplen = caplen;
 2074         } else
 2075 #endif
 2076         {
 2077                 struct bpf_hdr *hp;
 2078 
 2079                 hp = (struct bpf_hdr *)h;
 2080                 hp->bh_tstamp.tv_sec = ts->tv_sec;
 2081                 hp->bh_tstamp.tv_usec = ts->tv_nsec / 1000;
 2082                 hp->bh_datalen = pktlen;
 2083                 hp->bh_hdrlen = hdrlen;
 2084                 hp->bh_caplen = caplen;
 2085         }
 2086 
 2087         /*
 2088          * Copy the packet data into the store buffer and update its length.
 2089          */
 2090         (*cpfn)(h + hdrlen, pkt, caplen);
 2091         d->bd_slen = curlen + totlen;
 2092         mutex_exit(d->bd_buf_mtx);
 2093 
 2094         /*
 2095          * Call bpf_wakeup after bd_slen has been updated so that kevent(2)
 2096          * will cause filt_bpfread() to be called with it adjusted.
 2097          */
 2098         if (do_wakeup)
 2099                 bpf_wakeup(d);
 2100 }
 2101 
 2102 /*
 2103  * Initialize all nonzero fields of a descriptor.
 2104  */
 2105 static int
 2106 bpf_allocbufs(struct bpf_d *d)
 2107 {
 2108 
 2109         d->bd_fbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP);
 2110         if (!d->bd_fbuf)
 2111                 return (ENOBUFS);
 2112         d->bd_sbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP);
 2113         if (!d->bd_sbuf) {
 2114                 kmem_free(d->bd_fbuf, d->bd_bufsize);
 2115                 return (ENOBUFS);
 2116         }
 2117         d->bd_slen = 0;
 2118         d->bd_hlen = 0;
 2119         return (0);
 2120 }
 2121 
 2122 static void
 2123 bpf_free_filter(struct bpf_filter *filter)
 2124 {
 2125 
 2126         KASSERT(filter != NULL);
 2127 
 2128         if (filter->bf_insn != NULL)
 2129                 kmem_free(filter->bf_insn, filter->bf_size);
 2130         if (filter->bf_jitcode != NULL)
 2131                 bpf_jit_freecode(filter->bf_jitcode);
 2132         kmem_free(filter, sizeof(*filter));
 2133 }
 2134 
 2135 /*
 2136  * Free buffers currently in use by a descriptor.
 2137  * Called on close.
 2138  */
 2139 static void
 2140 bpf_freed(struct bpf_d *d)
 2141 {
 2142         /*
 2143          * We don't need to lock out interrupts since this descriptor has
 2144          * been detached from its interface and it yet hasn't been marked
 2145          * free.
 2146          */
 2147         if (d->bd_sbuf != NULL) {
 2148                 kmem_free(d->bd_sbuf, d->bd_bufsize);
 2149                 if (d->bd_hbuf != NULL)
 2150                         kmem_free(d->bd_hbuf, d->bd_bufsize);
 2151                 if (d->bd_fbuf != NULL)
 2152                         kmem_free(d->bd_fbuf, d->bd_bufsize);
 2153         }
 2154         if (d->bd_rfilter != NULL) {
 2155                 bpf_free_filter(d->bd_rfilter);
 2156                 d->bd_rfilter = NULL;
 2157         }
 2158         if (d->bd_wfilter != NULL) {
 2159                 bpf_free_filter(d->bd_wfilter);
 2160                 d->bd_wfilter = NULL;
 2161         }
 2162         d->bd_jitcode = NULL;
 2163 }
 2164 
 2165 /*
 2166  * Attach an interface to bpf.  dlt is the link layer type;
 2167  * hdrlen is the fixed size of the link header for the specified dlt
 2168  * (variable length headers not yet supported).
 2169  */
 2170 static void
 2171 _bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
 2172 {
 2173         struct bpf_if *bp;
 2174 
 2175         bp = kmem_alloc(sizeof(*bp), KM_SLEEP);
 2176 
 2177         mutex_enter(&bpf_mtx);
 2178         bp->bif_driverp = driverp;
 2179         bp->bif_ifp = ifp;
 2180         bp->bif_dlt = dlt;
 2181         bp->bif_si = NULL;
 2182         BPF_IFLIST_ENTRY_INIT(bp);
 2183         PSLIST_INIT(&bp->bif_dlist_head);
 2184         psref_target_init(&bp->bif_psref, bpf_psref_class);
 2185         SLIST_INIT(&bp->bif_trackers);
 2186 
 2187         BPF_IFLIST_WRITER_INSERT_HEAD(bp);
 2188 
 2189         *bp->bif_driverp = NULL;
 2190 
 2191         bp->bif_hdrlen = hdrlen;
 2192         mutex_exit(&bpf_mtx);
 2193 #if 0
 2194         printf("bpf: %s attached with dlt %x\n", ifp->if_xname, dlt);
 2195 #endif
 2196 }
 2197 
 2198 static void
 2199 _bpf_mtap_softint_init(struct ifnet *ifp)
 2200 {
 2201         struct bpf_if *bp;
 2202 
 2203         mutex_enter(&bpf_mtx);
 2204         BPF_IFLIST_WRITER_FOREACH(bp) {
 2205                 if (bp->bif_ifp != ifp)
 2206                         continue;
 2207 
 2208                 bp->bif_mbuf_head = NULL;
 2209                 bp->bif_mbuf_tail = NULL;
 2210                 bp->bif_si = softint_establish(SOFTINT_NET, bpf_mtap_si, bp);
 2211                 if (bp->bif_si == NULL)
 2212                         panic("%s: softint_establish() failed", __func__);
 2213                 break;
 2214         }
 2215         mutex_exit(&bpf_mtx);
 2216 
 2217         if (bp == NULL)
 2218                 panic("%s: no bpf_if found for %s", __func__, ifp->if_xname);
 2219 }
 2220 
 2221 /*
 2222  * Remove an interface from bpf.
 2223  */
 2224 static void
 2225 _bpfdetach(struct ifnet *ifp)
 2226 {
 2227         struct bpf_if *bp;
 2228         struct bpf_d *d;
 2229         int s;
 2230 
 2231         mutex_enter(&bpf_mtx);
 2232         /* Nuke the vnodes for any open instances */
 2233   again_d:
 2234         BPF_DLIST_WRITER_FOREACH(d) {
 2235                 mutex_enter(d->bd_mtx);
 2236                 if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) {
 2237                         /*
 2238                          * Detach the descriptor from an interface now.
 2239                          * It will be free'ed later by close routine.
 2240                          */
 2241                         bpf_detachd(d);
 2242                         mutex_exit(d->bd_mtx);
 2243                         goto again_d;
 2244                 }
 2245                 mutex_exit(d->bd_mtx);
 2246         }
 2247 
 2248   again:
 2249         BPF_IFLIST_WRITER_FOREACH(bp) {
 2250                 if (bp->bif_ifp == ifp) {
 2251                         BPF_IFLIST_WRITER_REMOVE(bp);
 2252 
 2253                         pserialize_perform(bpf_psz);
 2254                         psref_target_destroy(&bp->bif_psref, bpf_psref_class);
 2255 
 2256                         while (!SLIST_EMPTY(&bp->bif_trackers)) {
 2257                                 struct bpf_event_tracker *t =
 2258                                     SLIST_FIRST(&bp->bif_trackers);
 2259                                 SLIST_REMOVE_HEAD(&bp->bif_trackers,
 2260                                     bet_entries);
 2261                                 kmem_free(t, sizeof(*t));
 2262                         }
 2263 
 2264                         BPF_IFLIST_ENTRY_DESTROY(bp);
 2265                         if (bp->bif_si != NULL) {
 2266                                 /* XXX NOMPSAFE: assumed running on one CPU */
 2267                                 s = splnet();
 2268                                 while (bp->bif_mbuf_head != NULL) {
 2269                                         struct mbuf *m = bp->bif_mbuf_head;
 2270                                         bp->bif_mbuf_head = m->m_nextpkt;
 2271                                         m_freem(m);
 2272                                 }
 2273                                 splx(s);
 2274                                 softint_disestablish(bp->bif_si);
 2275                         }
 2276                         kmem_free(bp, sizeof(*bp));
 2277                         goto again;
 2278                 }
 2279         }
 2280         mutex_exit(&bpf_mtx);
 2281 }
 2282 
 2283 /*
 2284  * Change the data link type of a interface.
 2285  */
 2286 static void
 2287 _bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen)
 2288 {
 2289         struct bpf_if *bp;
 2290 
 2291         mutex_enter(&bpf_mtx);
 2292         BPF_IFLIST_WRITER_FOREACH(bp) {
 2293                 if (bp->bif_driverp == &ifp->if_bpf)
 2294                         break;
 2295         }
 2296         if (bp == NULL)
 2297                 panic("bpf_change_type");
 2298 
 2299         bp->bif_dlt = dlt;
 2300 
 2301         bp->bif_hdrlen = hdrlen;
 2302         mutex_exit(&bpf_mtx);
 2303 }
 2304 
 2305 /*
 2306  * Get a list of available data link type of the interface.
 2307  */
 2308 static int
 2309 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 2310 {
 2311         int n, error;
 2312         struct ifnet *ifp;
 2313         struct bpf_if *bp;
 2314         int s, bound;
 2315 
 2316         KASSERT(mutex_owned(d->bd_mtx));
 2317 
 2318         ifp = d->bd_bif->bif_ifp;
 2319         n = 0;
 2320         error = 0;
 2321 
 2322         bound = curlwp_bind();
 2323         s = pserialize_read_enter();
 2324         BPF_IFLIST_READER_FOREACH(bp) {
 2325                 if (bp->bif_ifp != ifp)
 2326                         continue;
 2327                 if (bfl->bfl_list != NULL) {
 2328                         struct psref psref;
 2329 
 2330                         if (n >= bfl->bfl_len) {
 2331                                 pserialize_read_exit(s);
 2332                                 return ENOMEM;
 2333                         }
 2334 
 2335                         bpf_if_acquire(bp, &psref);
 2336                         pserialize_read_exit(s);
 2337 
 2338                         error = copyout(&bp->bif_dlt,
 2339                             bfl->bfl_list + n, sizeof(u_int));
 2340 
 2341                         s = pserialize_read_enter();
 2342                         bpf_if_release(bp, &psref);
 2343                 }
 2344                 n++;
 2345         }
 2346         pserialize_read_exit(s);
 2347         curlwp_bindx(bound);
 2348 
 2349         bfl->bfl_len = n;
 2350         return error;
 2351 }
 2352 
 2353 /*
 2354  * Set the data link type of a BPF instance.
 2355  */
 2356 static int
 2357 bpf_setdlt(struct bpf_d *d, u_int dlt)
 2358 {
 2359         int error, opromisc;
 2360         struct ifnet *ifp;
 2361         struct bpf_if *bp;
 2362 
 2363         KASSERT(mutex_owned(&bpf_mtx));
 2364         KASSERT(mutex_owned(d->bd_mtx));
 2365 
 2366         if (d->bd_bif->bif_dlt == dlt)
 2367                 return 0;
 2368         ifp = d->bd_bif->bif_ifp;
 2369         BPF_IFLIST_WRITER_FOREACH(bp) {
 2370                 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
 2371                         break;
 2372         }
 2373         if (bp == NULL)
 2374                 return EINVAL;
 2375         opromisc = d->bd_promisc;
 2376         bpf_detachd(d);
 2377         BPFIF_DLIST_ENTRY_INIT(d);
 2378         bpf_attachd(d, bp);
 2379         reset_d(d);
 2380         if (opromisc) {
 2381                 KERNEL_LOCK_UNLESS_NET_MPSAFE();
 2382                 error = ifpromisc(bp->bif_ifp, 1);
 2383                 KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
 2384                 if (error)
 2385                         printf("%s: bpf_setdlt: ifpromisc failed (%d)\n",
 2386                             bp->bif_ifp->if_xname, error);
 2387                 else
 2388                         d->bd_promisc = 1;
 2389         }
 2390         return 0;
 2391 }
 2392 
 2393 static int
 2394 sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS)
 2395 {
 2396         int newsize, error;
 2397         struct sysctlnode node;
 2398 
 2399         node = *rnode;
 2400         node.sysctl_data = &newsize;
 2401         newsize = bpf_maxbufsize;
 2402         error = sysctl_lookup(SYSCTLFN_CALL(&node));
 2403         if (error || newp == NULL)
 2404                 return (error);
 2405 
 2406         if (newsize < BPF_MINBUFSIZE || newsize > BPF_MAXBUFSIZE)
 2407                 return (EINVAL);
 2408 
 2409         bpf_maxbufsize = newsize;
 2410 
 2411         return (0);
 2412 }
 2413 
 2414 #if defined(MODULAR) || defined(BPFJIT)
 2415 static int
 2416 sysctl_net_bpf_jit(SYSCTLFN_ARGS)
 2417 {
 2418         bool newval;
 2419         int error;
 2420         struct sysctlnode node;
 2421 
 2422         node = *rnode;
 2423         node.sysctl_data = &newval;
 2424         newval = bpf_jit;
 2425         error = sysctl_lookup(SYSCTLFN_CALL(&node));
 2426         if (error != 0 || newp == NULL)
 2427                 return error;
 2428 
 2429         bpf_jit = newval;
 2430         if (newval && bpfjit_module_ops.bj_generate_code == NULL) {
 2431                 printf("JIT compilation is postponed "
 2432                     "until after bpfjit module is loaded\n");
 2433         }
 2434 
 2435         return 0;
 2436 }
 2437 #endif
 2438 
 2439 static int
 2440 sysctl_net_bpf_peers(SYSCTLFN_ARGS)
 2441 {
 2442         int    error, elem_count;
 2443         struct bpf_d     *dp;
 2444         struct bpf_d_ext  dpe;
 2445         size_t len, needed, elem_size, out_size;
 2446         char   *sp;
 2447 
 2448         if (namelen == 1 && name[0] == CTL_QUERY)
 2449                 return (sysctl_query(SYSCTLFN_CALL(rnode)));
 2450 
 2451         if (namelen != 2)
 2452                 return (EINVAL);
 2453 
 2454         /* BPF peers is privileged information. */
 2455         error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
 2456             KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL);
 2457         if (error)
 2458                 return (EPERM);
 2459 
 2460         len = (oldp != NULL) ? *oldlenp : 0;
 2461         sp = oldp;
 2462         elem_size = name[0];
 2463         elem_count = name[1];
 2464         out_size = MIN(sizeof(dpe), elem_size);
 2465         needed = 0;
 2466 
 2467         if (elem_size < 1 || elem_count < 0)
 2468                 return (EINVAL);
 2469 
 2470         mutex_enter(&bpf_mtx);
 2471         BPF_DLIST_WRITER_FOREACH(dp) {
 2472                 if (len >= elem_size && elem_count > 0) {
 2473 #define BPF_EXT(field)  dpe.bde_ ## field = dp->bd_ ## field
 2474                         BPF_EXT(bufsize);
 2475                         BPF_EXT(promisc);
 2476                         BPF_EXT(state);
 2477                         BPF_EXT(immediate);
 2478                         BPF_EXT(hdrcmplt);
 2479                         BPF_EXT(direction);
 2480                         BPF_EXT(pid);
 2481                         BPF_EXT(rcount);
 2482                         BPF_EXT(dcount);
 2483                         BPF_EXT(ccount);
 2484 #undef BPF_EXT
 2485                         mutex_enter(dp->bd_mtx);
 2486                         if (dp->bd_bif)
 2487                                 (void)strlcpy(dpe.bde_ifname,
 2488                                     dp->bd_bif->bif_ifp->if_xname,
 2489                                     IFNAMSIZ - 1);
 2490                         else
 2491                                 dpe.bde_ifname[0] = '\0';
 2492                         dpe.bde_locked = dp->bd_locked;
 2493                         mutex_exit(dp->bd_mtx);
 2494 
 2495                         error = copyout(&dpe, sp, out_size);
 2496                         if (error)
 2497                                 break;
 2498                         sp += elem_size;
 2499                         len -= elem_size;
 2500                 }
 2501                 needed += elem_size;
 2502                 if (elem_count > 0 && elem_count != INT_MAX)
 2503                         elem_count--;
 2504         }
 2505         mutex_exit(&bpf_mtx);
 2506 
 2507         *oldlenp = needed;
 2508 
 2509         return (error);
 2510 }
 2511 
 2512 static void
 2513 bpf_stats(void *p, void *arg, struct cpu_info *ci __unused)
 2514 {
 2515         struct bpf_stat *const stats = p;
 2516         struct bpf_stat *sum = arg;
 2517 
 2518         int s = splnet();
 2519 
 2520         sum->bs_recv += stats->bs_recv;
 2521         sum->bs_drop += stats->bs_drop;
 2522         sum->bs_capt += stats->bs_capt;
 2523 
 2524         splx(s);
 2525 }
 2526 
 2527 static int
 2528 bpf_sysctl_gstats_handler(SYSCTLFN_ARGS)
 2529 {
 2530         struct sysctlnode node;
 2531         int error;
 2532         struct bpf_stat sum;
 2533 
 2534         memset(&sum, 0, sizeof(sum));
 2535         node = *rnode;
 2536 
 2537         percpu_foreach_xcall(bpf_gstats_percpu, XC_HIGHPRI_IPL(IPL_SOFTNET),
 2538             bpf_stats, &sum);
 2539 
 2540         node.sysctl_data = &sum;
 2541         node.sysctl_size = sizeof(sum);
 2542         error = sysctl_lookup(SYSCTLFN_CALL(&node));
 2543         if (error != 0 || newp == NULL)
 2544                 return error;
 2545 
 2546         return 0;
 2547 }
 2548 
 2549 SYSCTL_SETUP(sysctl_net_bpf_setup, "bpf sysctls")
 2550 {
 2551         const struct sysctlnode *node;
 2552 
 2553         node = NULL;
 2554         sysctl_createv(clog, 0, NULL, &node,
 2555                        CTLFLAG_PERMANENT,
 2556                        CTLTYPE_NODE, "bpf",
 2557                        SYSCTL_DESCR("BPF options"),
 2558                        NULL, 0, NULL, 0,
 2559                        CTL_NET, CTL_CREATE, CTL_EOL);
 2560         if (node != NULL) {
 2561 #if defined(MODULAR) || defined(BPFJIT)
 2562                 sysctl_createv(clog, 0, NULL, NULL,
 2563                         CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2564                         CTLTYPE_BOOL, "jit",
 2565                         SYSCTL_DESCR("Toggle Just-In-Time compilation"),
 2566                         sysctl_net_bpf_jit, 0, &bpf_jit, 0,
 2567                         CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
 2568 #endif
 2569                 sysctl_createv(clog, 0, NULL, NULL,
 2570                         CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2571                         CTLTYPE_INT, "maxbufsize",
 2572                         SYSCTL_DESCR("Maximum size for data capture buffer"),
 2573                         sysctl_net_bpf_maxbufsize, 0, &bpf_maxbufsize, 0,
 2574                         CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
 2575                 sysctl_createv(clog, 0, NULL, NULL,
 2576                         CTLFLAG_PERMANENT,
 2577                         CTLTYPE_STRUCT, "stats",
 2578                         SYSCTL_DESCR("BPF stats"),
 2579                         bpf_sysctl_gstats_handler, 0, NULL, 0,
 2580                         CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
 2581                 sysctl_createv(clog, 0, NULL, NULL,
 2582                         CTLFLAG_PERMANENT,
 2583                         CTLTYPE_STRUCT, "peers",
 2584                         SYSCTL_DESCR("BPF peers"),
 2585                         sysctl_net_bpf_peers, 0, NULL, 0,
 2586                         CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
 2587         }
 2588 
 2589 }
 2590 
 2591 static int
 2592 _bpf_register_track_event(struct bpf_if **driverp,
 2593             void (*_fun)(struct bpf_if *, struct ifnet *, int, int))
 2594 {
 2595         struct bpf_if *bp;
 2596         struct bpf_event_tracker *t;
 2597         int ret = ENOENT;
 2598 
 2599         t = kmem_zalloc(sizeof(*t), KM_SLEEP);
 2600         if (!t)
 2601                 return ENOMEM;
 2602         t->bet_notify = _fun;
 2603 
 2604         mutex_enter(&bpf_mtx);
 2605         BPF_IFLIST_WRITER_FOREACH(bp) {
 2606                 if (bp->bif_driverp != driverp)
 2607                         continue;
 2608                 SLIST_INSERT_HEAD(&bp->bif_trackers, t, bet_entries);
 2609                 ret = 0;
 2610                 break;
 2611         }
 2612         mutex_exit(&bpf_mtx);
 2613 
 2614         return ret;
 2615 }
 2616 
 2617 static int
 2618 _bpf_deregister_track_event(struct bpf_if **driverp,
 2619             void (*_fun)(struct bpf_if *, struct ifnet *, int, int))
 2620 {
 2621         struct bpf_if *bp;
 2622         struct bpf_event_tracker *t = NULL;
 2623         int ret = ENOENT;
 2624 
 2625         mutex_enter(&bpf_mtx);
 2626         BPF_IFLIST_WRITER_FOREACH(bp) {
 2627                 if (bp->bif_driverp != driverp)
 2628                         continue;
 2629                 SLIST_FOREACH(t, &bp->bif_trackers, bet_entries) {
 2630                         if (t->bet_notify == _fun) {
 2631                                 ret = 0;
 2632                                 break;
 2633                         }
 2634                 }
 2635                 if (ret == 0)
 2636                         break;
 2637         }
 2638         if (ret == 0 && t && t->bet_notify == _fun) {
 2639                 SLIST_REMOVE(&bp->bif_trackers, t, bpf_event_tracker,
 2640                     bet_entries);
 2641         }
 2642         mutex_exit(&bpf_mtx);
 2643         if (ret == 0)
 2644                 kmem_free(t, sizeof(*t));
 2645         return ret;
 2646 }
 2647 
 2648 struct bpf_ops bpf_ops_kernel = {
 2649         .bpf_attach =           _bpfattach,
 2650         .bpf_detach =           _bpfdetach,
 2651         .bpf_change_type =      _bpf_change_type,
 2652         .bpf_register_track_event = _bpf_register_track_event,
 2653         .bpf_deregister_track_event = _bpf_deregister_track_event,
 2654 
 2655         .bpf_mtap =             _bpf_mtap,
 2656         .bpf_mtap2 =            _bpf_mtap2,
 2657         .bpf_mtap_af =          _bpf_mtap_af,
 2658         .bpf_mtap_sl_in =       _bpf_mtap_sl_in,
 2659         .bpf_mtap_sl_out =      _bpf_mtap_sl_out,
 2660 
 2661         .bpf_mtap_softint =             _bpf_mtap_softint,
 2662         .bpf_mtap_softint_init =        _bpf_mtap_softint_init,
 2663 };
 2664 
 2665 MODULE(MODULE_CLASS_DRIVER, bpf, "bpf_filter");
 2666 
 2667 static int
 2668 bpf_modcmd(modcmd_t cmd, void *arg)
 2669 {
 2670 #ifdef _MODULE
 2671         devmajor_t bmajor, cmajor;
 2672 #endif
 2673         int error = 0;
 2674 
 2675         switch (cmd) {
 2676         case MODULE_CMD_INIT:
 2677                 bpf_init();
 2678 #ifdef _MODULE
 2679                 bmajor = cmajor = NODEVMAJOR;
 2680                 error = devsw_attach("bpf", NULL, &bmajor,
 2681                     &bpf_cdevsw, &cmajor);
 2682                 if (error)
 2683                         break;
 2684 #endif
 2685 
 2686                 bpf_ops_handover_enter(&bpf_ops_kernel);
 2687                 atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel);
 2688                 bpf_ops_handover_exit();
 2689                 break;
 2690 
 2691         case MODULE_CMD_FINI:
 2692                 /*
 2693                  * While there is no reference counting for bpf callers,
 2694                  * unload could at least in theory be done similarly to 
 2695                  * system call disestablishment.  This should even be
 2696                  * a little simpler:
 2697                  * 
 2698                  * 1) replace op vector with stubs
 2699                  * 2) post update to all cpus with xc
 2700                  * 3) check that nobody is in bpf anymore
 2701                  *    (it's doubtful we'd want something like l_sysent,
 2702                  *     but we could do something like *signed* percpu
 2703                  *     counters.  if the sum is 0, we're good).
 2704                  * 4) if fail, unroll changes
 2705                  *
 2706                  * NOTE: change won't be atomic to the outside.  some
 2707                  * packets may be not captured even if unload is
 2708                  * not successful.  I think packet capture not working
 2709                  * is a perfectly logical consequence of trying to
 2710                  * disable packet capture.
 2711                  */
 2712                 error = EOPNOTSUPP;
 2713                 break;
 2714 
 2715         default:
 2716                 error = ENOTTY;
 2717                 break;
 2718         }
 2719 
 2720         return error;
 2721 }
Cache object: 8911c4c588c62f6ded6224828fcdec20
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/net/bpf.c

FreeBSD/Linux Kernel Cross Reference
sys/net/bpf.c