The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/bpf.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: bpf.c,v 1.219 2022/07/09 12:48:21 visa Exp $  */
    2 /*      $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */
    3 
    4 /*
    5  * Copyright (c) 1990, 1991, 1993
    6  *      The Regents of the University of California.  All rights reserved.
    7  * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org>
    8  *
    9  * This code is derived from the Stanford/CMU enet packet filter,
   10  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
   11  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
   12  * Berkeley Laboratory.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)bpf.c       8.2 (Berkeley) 3/28/94
   39  */
   40 
   41 #include "bpfilter.h"
   42 
   43 #include <sys/param.h>
   44 #include <sys/systm.h>
   45 #include <sys/mbuf.h>
   46 #include <sys/proc.h>
   47 #include <sys/signalvar.h>
   48 #include <sys/ioctl.h>
   49 #include <sys/conf.h>
   50 #include <sys/vnode.h>
   51 #include <sys/fcntl.h>
   52 #include <sys/socket.h>
   53 #include <sys/kernel.h>
   54 #include <sys/sysctl.h>
   55 #include <sys/rwlock.h>
   56 #include <sys/atomic.h>
   57 #include <sys/event.h>
   58 #include <sys/mutex.h>
   59 #include <sys/refcnt.h>
   60 #include <sys/smr.h>
   61 #include <sys/specdev.h>
   62 #include <sys/sigio.h>
   63 #include <sys/task.h>
   64 #include <sys/time.h>
   65 
   66 #include <net/if.h>
   67 #include <net/bpf.h>
   68 #include <net/bpfdesc.h>
   69 
   70 #include <netinet/in.h>
   71 #include <netinet/if_ether.h>
   72 
   73 #include "vlan.h"
   74 #if NVLAN > 0
   75 #include <net/if_vlan_var.h>
   76 #endif
   77 
   78 #define BPF_BUFSIZE 32768
   79 
   80 #define PRINET  26                      /* interruptible */
   81 
   82 /*
   83  * The default read buffer size is patchable.
   84  */
   85 int bpf_bufsize = BPF_BUFSIZE;
   86 int bpf_maxbufsize = BPF_MAXBUFSIZE;
   87 
   88 /*
   89  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
   90  *  bpf_d_list is the list of descriptors
   91  */
   92 struct bpf_if   *bpf_iflist;
   93 LIST_HEAD(, bpf_d) bpf_d_list;
   94 
   95 int     bpf_allocbufs(struct bpf_d *);
   96 void    bpf_ifname(struct bpf_if*, struct ifreq *);
   97 void    bpf_mcopy(const void *, void *, size_t);
   98 int     bpf_movein(struct uio *, struct bpf_d *, struct mbuf **,
   99             struct sockaddr *);
  100 int     bpf_setif(struct bpf_d *, struct ifreq *);
  101 int     bpfkqfilter(dev_t, struct knote *);
  102 void    bpf_wakeup(struct bpf_d *);
  103 void    bpf_wakeup_cb(void *);
  104 int     _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int);
  105 void    bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
  106             const struct bpf_hdr *);
  107 int     bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
  108 int     bpf_setdlt(struct bpf_d *, u_int);
  109 
  110 void    filt_bpfrdetach(struct knote *);
  111 int     filt_bpfread(struct knote *, long);
  112 int     filt_bpfreadmodify(struct kevent *, struct knote *);
  113 int     filt_bpfreadprocess(struct knote *, struct kevent *);
  114 
  115 int     bpf_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t);
  116 
  117 struct bpf_d *bpfilter_lookup(int);
  118 
  119 /*
  120  * Called holding ``bd_mtx''.
  121  */
  122 void    bpf_attachd(struct bpf_d *, struct bpf_if *);
  123 void    bpf_detachd(struct bpf_d *);
  124 void    bpf_resetd(struct bpf_d *);
  125 
  126 void    bpf_prog_smr(void *);
  127 void    bpf_d_smr(void *);
  128 
  129 /*
  130  * Reference count access to descriptor buffers
  131  */
  132 void    bpf_get(struct bpf_d *);
  133 void    bpf_put(struct bpf_d *);
  134 
  135 
  136 struct rwlock bpf_sysctl_lk = RWLOCK_INITIALIZER("bpfsz");
  137 
  138 int
  139 bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp,
  140     struct sockaddr *sockp)
  141 {
  142         struct bpf_program_smr *bps;
  143         struct bpf_insn *fcode = NULL;
  144         struct mbuf *m;
  145         struct m_tag *mtag;
  146         int error;
  147         u_int hlen, alen, mlen;
  148         u_int len;
  149         u_int linktype;
  150         u_int slen;
  151 
  152         /*
  153          * Build a sockaddr based on the data link layer type.
  154          * We do this at this level because the ethernet header
  155          * is copied directly into the data field of the sockaddr.
  156          * In the case of SLIP, there is no header and the packet
  157          * is forwarded as is.
  158          * Also, we are careful to leave room at the front of the mbuf
  159          * for the link level header.
  160          */
  161         linktype = d->bd_bif->bif_dlt;
  162         switch (linktype) {
  163 
  164         case DLT_SLIP:
  165                 sockp->sa_family = AF_INET;
  166                 hlen = 0;
  167                 break;
  168 
  169         case DLT_PPP:
  170                 sockp->sa_family = AF_UNSPEC;
  171                 hlen = 0;
  172                 break;
  173 
  174         case DLT_EN10MB:
  175                 sockp->sa_family = AF_UNSPEC;
  176                 /* XXX Would MAXLINKHDR be better? */
  177                 hlen = ETHER_HDR_LEN;
  178                 break;
  179 
  180         case DLT_IEEE802_11:
  181         case DLT_IEEE802_11_RADIO:
  182                 sockp->sa_family = AF_UNSPEC;
  183                 hlen = 0;
  184                 break;
  185 
  186         case DLT_RAW:
  187         case DLT_NULL:
  188                 sockp->sa_family = AF_UNSPEC;
  189                 hlen = 0;
  190                 break;
  191 
  192         case DLT_LOOP:
  193                 sockp->sa_family = AF_UNSPEC;
  194                 hlen = sizeof(u_int32_t);
  195                 break;
  196 
  197         default:
  198                 return (EIO);
  199         }
  200 
  201         if (uio->uio_resid > MAXMCLBYTES)
  202                 return (EMSGSIZE);
  203         len = uio->uio_resid;
  204         if (len < hlen)
  205                 return (EINVAL);
  206 
  207         /*
  208          * Get the length of the payload so we can align it properly.
  209          */
  210         alen = len - hlen;
  211 
  212         /*
  213          * Allocate enough space for headers and the aligned payload.
  214          */
  215         mlen = max(max_linkhdr, hlen) + roundup(alen, sizeof(long));
  216         if (mlen > MAXMCLBYTES)
  217                 return (EMSGSIZE);
  218 
  219         MGETHDR(m, M_WAIT, MT_DATA);
  220         if (mlen > MHLEN) {
  221                 MCLGETL(m, M_WAIT, mlen);
  222                 if ((m->m_flags & M_EXT) == 0) {
  223                         error = ENOBUFS;
  224                         goto bad;
  225                 }
  226         }
  227 
  228         m_align(m, alen); /* Align the payload. */
  229         m->m_data -= hlen;
  230 
  231         m->m_pkthdr.ph_ifidx = 0;
  232         m->m_pkthdr.len = len;
  233         m->m_len = len;
  234 
  235         error = uiomove(mtod(m, caddr_t), len, uio);
  236         if (error)
  237                 goto bad;
  238 
  239         smr_read_enter();
  240         bps = SMR_PTR_GET(&d->bd_wfilter);
  241         if (bps != NULL)
  242                 fcode = bps->bps_bf.bf_insns;
  243         slen = bpf_filter(fcode, mtod(m, u_char *), len, len);
  244         smr_read_leave();
  245 
  246         if (slen < len) {
  247                 error = EPERM;
  248                 goto bad;
  249         }
  250 
  251         /*
  252          * Make room for link header, and copy it to sockaddr
  253          */
  254         if (hlen != 0) {
  255                 if (linktype == DLT_LOOP) {
  256                         u_int32_t af;
  257 
  258                         /* the link header indicates the address family */
  259                         KASSERT(hlen == sizeof(u_int32_t));
  260                         memcpy(&af, m->m_data, hlen);
  261                         sockp->sa_family = ntohl(af);
  262                 } else
  263                         memcpy(sockp->sa_data, m->m_data, hlen);
  264 
  265                 m->m_pkthdr.len -= hlen;
  266                 m->m_len -= hlen;
  267                 m->m_data += hlen;
  268         }
  269 
  270         /*
  271          * Prepend the data link type as a mbuf tag
  272          */
  273         mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT);
  274         *(u_int *)(mtag + 1) = linktype;
  275         m_tag_prepend(m, mtag);
  276 
  277         *mp = m;
  278         return (0);
  279  bad:
  280         m_freem(m);
  281         return (error);
  282 }
  283 
  284 /*
  285  * Attach file to the bpf interface, i.e. make d listen on bp.
  286  */
  287 void
  288 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
  289 {
  290         MUTEX_ASSERT_LOCKED(&d->bd_mtx);
  291 
  292         /*
  293          * Point d at bp, and add d to the interface's list of listeners.
  294          * Finally, point the driver's bpf cookie at the interface so
  295          * it will divert packets to bpf.
  296          */
  297 
  298         d->bd_bif = bp;
  299 
  300         KERNEL_ASSERT_LOCKED();
  301         SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next);
  302 
  303         *bp->bif_driverp = bp;
  304 }
  305 
  306 /*
  307  * Detach a file from its interface.
  308  */
  309 void
  310 bpf_detachd(struct bpf_d *d)
  311 {
  312         struct bpf_if *bp;
  313 
  314         MUTEX_ASSERT_LOCKED(&d->bd_mtx);
  315 
  316         bp = d->bd_bif;
  317         /* Not attached. */
  318         if (bp == NULL)
  319                 return;
  320 
  321         /* Remove ``d'' from the interface's descriptor list. */
  322         KERNEL_ASSERT_LOCKED();
  323         SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next);
  324 
  325         if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) {
  326                 /*
  327                  * Let the driver know that there are no more listeners.
  328                  */
  329                 *bp->bif_driverp = NULL;
  330         }
  331 
  332         d->bd_bif = NULL;
  333 
  334         /*
  335          * Check if this descriptor had requested promiscuous mode.
  336          * If so, turn it off.
  337          */
  338         if (d->bd_promisc) {
  339                 int error;
  340 
  341                 KASSERT(bp->bif_ifp != NULL);
  342 
  343                 d->bd_promisc = 0;
  344 
  345                 bpf_get(d);
  346                 mtx_leave(&d->bd_mtx);
  347                 NET_LOCK();
  348                 error = ifpromisc(bp->bif_ifp, 0);
  349                 NET_UNLOCK();
  350                 mtx_enter(&d->bd_mtx);
  351                 bpf_put(d);
  352 
  353                 if (error && !(error == EINVAL || error == ENODEV ||
  354                     error == ENXIO))
  355                         /*
  356                          * Something is really wrong if we were able to put
  357                          * the driver into promiscuous mode, but can't
  358                          * take it out.
  359                          */
  360                         panic("bpf: ifpromisc failed");
  361         }
  362 }
  363 
  364 void
  365 bpfilterattach(int n)
  366 {
  367         LIST_INIT(&bpf_d_list);
  368 }
  369 
  370 /*
  371  * Open ethernet device.  Returns ENXIO for illegal minor device number,
  372  * EBUSY if file is open by another process.
  373  */
  374 int
  375 bpfopen(dev_t dev, int flag, int mode, struct proc *p)
  376 {
  377         struct bpf_d *bd;
  378         int unit = minor(dev);
  379 
  380         if (unit & ((1 << CLONE_SHIFT) - 1))
  381                 return (ENXIO);
  382 
  383         KASSERT(bpfilter_lookup(unit) == NULL);
  384 
  385         /* create on demand */
  386         if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
  387                 return (EBUSY);
  388 
  389         /* Mark "free" and do most initialization. */
  390         bd->bd_unit = unit;
  391         bd->bd_bufsize = bpf_bufsize;
  392         bd->bd_sig = SIGIO;
  393         mtx_init(&bd->bd_mtx, IPL_NET);
  394         task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
  395         smr_init(&bd->bd_smr);
  396         sigio_init(&bd->bd_sigio);
  397         klist_init_mutex(&bd->bd_klist, &bd->bd_mtx);
  398 
  399         bd->bd_rtout = 0;       /* no timeout by default */
  400 
  401         refcnt_init(&bd->bd_refcnt);
  402         LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
  403 
  404         return (0);
  405 }
  406 
  407 /*
  408  * Close the descriptor by detaching it from its interface,
  409  * deallocating its buffers, and marking it free.
  410  */
  411 int
  412 bpfclose(dev_t dev, int flag, int mode, struct proc *p)
  413 {
  414         struct bpf_d *d;
  415 
  416         d = bpfilter_lookup(minor(dev));
  417         mtx_enter(&d->bd_mtx);
  418         bpf_detachd(d);
  419         bpf_wakeup(d);
  420         LIST_REMOVE(d, bd_list);
  421         mtx_leave(&d->bd_mtx);
  422         bpf_put(d);
  423 
  424         return (0);
  425 }
  426 
  427 /*
  428  * Rotate the packet buffers in descriptor d.  Move the store buffer
  429  * into the hold slot, and the free buffer into the store slot.
  430  * Zero the length of the new store buffer.
  431  */
  432 #define ROTATE_BUFFERS(d) \
  433         KASSERT(d->bd_in_uiomove == 0); \
  434         MUTEX_ASSERT_LOCKED(&d->bd_mtx); \
  435         (d)->bd_hbuf = (d)->bd_sbuf; \
  436         (d)->bd_hlen = (d)->bd_slen; \
  437         (d)->bd_sbuf = (d)->bd_fbuf; \
  438         (d)->bd_slen = 0; \
  439         (d)->bd_fbuf = NULL;
  440 
  441 /*
  442  *  bpfread - read next chunk of packets from buffers
  443  */
  444 int
  445 bpfread(dev_t dev, struct uio *uio, int ioflag)
  446 {
  447         uint64_t end, now;
  448         struct bpf_d *d;
  449         caddr_t hbuf;
  450         int error, hlen;
  451 
  452         KERNEL_ASSERT_LOCKED();
  453 
  454         d = bpfilter_lookup(minor(dev));
  455         if (d->bd_bif == NULL)
  456                 return (ENXIO);
  457 
  458         bpf_get(d);
  459         mtx_enter(&d->bd_mtx);
  460 
  461         /*
  462          * Restrict application to use a buffer the same size as
  463          * as kernel buffers.
  464          */
  465         if (uio->uio_resid != d->bd_bufsize) {
  466                 error = EINVAL;
  467                 goto out;
  468         }
  469 
  470         /*
  471          * If there's a timeout, mark when the read should end.
  472          */
  473         if (d->bd_rtout != 0) {
  474                 now = nsecuptime();
  475                 end = now + d->bd_rtout;
  476                 if (end < now)
  477                         end = UINT64_MAX;
  478         }
  479 
  480         /*
  481          * If the hold buffer is empty, then do a timed sleep, which
  482          * ends when the timeout expires or when enough packets
  483          * have arrived to fill the store buffer.
  484          */
  485         while (d->bd_hbuf == NULL) {
  486                 if (d->bd_bif == NULL) {
  487                         /* interface is gone */
  488                         if (d->bd_slen == 0) {
  489                                 error = EIO;
  490                                 goto out;
  491                         }
  492                         ROTATE_BUFFERS(d);
  493                         break;
  494                 }
  495                 if (d->bd_immediate && d->bd_slen != 0) {
  496                         /*
  497                          * A packet(s) either arrived since the previous
  498                          * read or arrived while we were asleep.
  499                          * Rotate the buffers and return what's here.
  500                          */
  501                         ROTATE_BUFFERS(d);
  502                         break;
  503                 }
  504                 if (ISSET(ioflag, IO_NDELAY)) {
  505                         /* User requested non-blocking I/O */
  506                         error = EWOULDBLOCK;
  507                 } else if (d->bd_rtout == 0) {
  508                         /* No read timeout set. */
  509                         d->bd_nreaders++;
  510                         error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH,
  511                             "bpf", INFSLP);
  512                         d->bd_nreaders--;
  513                 } else if ((now = nsecuptime()) < end) {
  514                         /* Read timeout has not expired yet. */
  515                         d->bd_nreaders++;
  516                         error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH,
  517                             "bpf", end - now);
  518                         d->bd_nreaders--;
  519                 } else {
  520                         /* Read timeout has expired. */
  521                         error = EWOULDBLOCK;
  522                 }
  523                 if (error == EINTR || error == ERESTART)
  524                         goto out;
  525                 if (error == EWOULDBLOCK) {
  526                         /*
  527                          * On a timeout, return what's in the buffer,
  528                          * which may be nothing.  If there is something
  529                          * in the store buffer, we can rotate the buffers.
  530                          */
  531                         if (d->bd_hbuf != NULL)
  532                                 /*
  533                                  * We filled up the buffer in between
  534                                  * getting the timeout and arriving
  535                                  * here, so we don't need to rotate.
  536                                  */
  537                                 break;
  538 
  539                         if (d->bd_slen == 0) {
  540                                 error = 0;
  541                                 goto out;
  542                         }
  543                         ROTATE_BUFFERS(d);
  544                         break;
  545                 }
  546         }
  547         /*
  548          * At this point, we know we have something in the hold slot.
  549          */
  550         hbuf = d->bd_hbuf;
  551         hlen = d->bd_hlen;
  552         d->bd_hbuf = NULL;
  553         d->bd_hlen = 0;
  554         d->bd_fbuf = NULL;
  555         d->bd_in_uiomove = 1;
  556 
  557         /*
  558          * Move data from hold buffer into user space.
  559          * We know the entire buffer is transferred since
  560          * we checked above that the read buffer is bpf_bufsize bytes.
  561          */
  562         mtx_leave(&d->bd_mtx);
  563         error = uiomove(hbuf, hlen, uio);
  564         mtx_enter(&d->bd_mtx);
  565 
  566         /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */
  567         KASSERT(d->bd_fbuf == NULL);
  568         KASSERT(d->bd_hbuf == NULL);
  569         d->bd_fbuf = hbuf;
  570         d->bd_in_uiomove = 0;
  571 out:
  572         mtx_leave(&d->bd_mtx);
  573         bpf_put(d);
  574 
  575         return (error);
  576 }
  577 
  578 /*
  579  * If there are processes sleeping on this descriptor, wake them up.
  580  */
  581 void
  582 bpf_wakeup(struct bpf_d *d)
  583 {
  584         MUTEX_ASSERT_LOCKED(&d->bd_mtx);
  585 
  586         if (d->bd_nreaders)
  587                 wakeup(d);
  588 
  589         KNOTE(&d->bd_klist, 0);
  590 
  591         /*
  592          * As long as pgsigio() needs to be protected
  593          * by the KERNEL_LOCK() we have to delay the wakeup to
  594          * another context to keep the hot path KERNEL_LOCK()-free.
  595          */
  596         if (d->bd_async && d->bd_sig) {
  597                 bpf_get(d);
  598                 if (!task_add(systq, &d->bd_wake_task))
  599                         bpf_put(d);
  600         }
  601 }
  602 
  603 void
  604 bpf_wakeup_cb(void *xd)
  605 {
  606         struct bpf_d *d = xd;
  607 
  608         if (d->bd_async && d->bd_sig)
  609                 pgsigio(&d->bd_sigio, d->bd_sig, 0);
  610 
  611         bpf_put(d);
  612 }
  613 
  614 int
  615 bpfwrite(dev_t dev, struct uio *uio, int ioflag)
  616 {
  617         struct bpf_d *d;
  618         struct ifnet *ifp;
  619         struct mbuf *m;
  620         int error;
  621         struct sockaddr_storage dst;
  622 
  623         KERNEL_ASSERT_LOCKED();
  624 
  625         d = bpfilter_lookup(minor(dev));
  626         if (d->bd_bif == NULL)
  627                 return (ENXIO);
  628 
  629         bpf_get(d);
  630         ifp = d->bd_bif->bif_ifp;
  631 
  632         if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) {
  633                 error = ENETDOWN;
  634                 goto out;
  635         }
  636 
  637         if (uio->uio_resid == 0) {
  638                 error = 0;
  639                 goto out;
  640         }
  641 
  642         error = bpf_movein(uio, d, &m, sstosa(&dst));
  643         if (error)
  644                 goto out;
  645 
  646         if (m->m_pkthdr.len > ifp->if_mtu) {
  647                 m_freem(m);
  648                 error = EMSGSIZE;
  649                 goto out;
  650         }
  651 
  652         m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
  653         m->m_pkthdr.pf.prio = ifp->if_llprio;
  654 
  655         if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC)
  656                 dst.ss_family = pseudo_AF_HDRCMPLT;
  657 
  658         NET_LOCK();
  659         error = ifp->if_output(ifp, m, sstosa(&dst), NULL);
  660         NET_UNLOCK();
  661 
  662 out:
  663         bpf_put(d);
  664         return (error);
  665 }
  666 
  667 /*
  668  * Reset a descriptor by flushing its packet buffer and clearing the
  669  * receive and drop counts.
  670  */
  671 void
  672 bpf_resetd(struct bpf_d *d)
  673 {
  674         MUTEX_ASSERT_LOCKED(&d->bd_mtx);
  675         KASSERT(d->bd_in_uiomove == 0);
  676 
  677         if (d->bd_hbuf != NULL) {
  678                 /* Free the hold buffer. */
  679                 d->bd_fbuf = d->bd_hbuf;
  680                 d->bd_hbuf = NULL;
  681         }
  682         d->bd_slen = 0;
  683         d->bd_hlen = 0;
  684         d->bd_rcount = 0;
  685         d->bd_dcount = 0;
  686 }
  687 
  688 /*
  689  *  FIONREAD            Check for read packet available.
  690  *  BIOCGBLEN           Get buffer len [for read()].
  691  *  BIOCSETF            Set ethernet read filter.
  692  *  BIOCFLUSH           Flush read packet buffer.
  693  *  BIOCPROMISC         Put interface into promiscuous mode.
  694  *  BIOCGDLTLIST        Get supported link layer types.
  695  *  BIOCGDLT            Get link layer type.
  696  *  BIOCSDLT            Set link layer type.
  697  *  BIOCGETIF           Get interface name.
  698  *  BIOCSETIF           Set interface.
  699  *  BIOCSRTIMEOUT       Set read timeout.
  700  *  BIOCGRTIMEOUT       Get read timeout.
  701  *  BIOCGSTATS          Get packet stats.
  702  *  BIOCIMMEDIATE       Set immediate mode.
  703  *  BIOCVERSION         Get filter language version.
  704  *  BIOCGHDRCMPLT       Get "header already complete" flag
  705  *  BIOCSHDRCMPLT       Set "header already complete" flag
  706  */
  707 int
  708 bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
  709 {
  710         struct bpf_d *d;
  711         int error = 0;
  712 
  713         d = bpfilter_lookup(minor(dev));
  714         if (d->bd_locked && suser(p) != 0) {
  715                 /* list of allowed ioctls when locked and not root */
  716                 switch (cmd) {
  717                 case BIOCGBLEN:
  718                 case BIOCFLUSH:
  719                 case BIOCGDLT:
  720                 case BIOCGDLTLIST:
  721                 case BIOCGETIF:
  722                 case BIOCGRTIMEOUT:
  723                 case BIOCGSTATS:
  724                 case BIOCVERSION:
  725                 case BIOCGRSIG:
  726                 case BIOCGHDRCMPLT:
  727                 case FIONREAD:
  728                 case BIOCLOCK:
  729                 case BIOCSRTIMEOUT:
  730                 case BIOCIMMEDIATE:
  731                 case TIOCGPGRP:
  732                 case BIOCGDIRFILT:
  733                         break;
  734                 default:
  735                         return (EPERM);
  736                 }
  737         }
  738 
  739         bpf_get(d);
  740 
  741         switch (cmd) {
  742         default:
  743                 error = EINVAL;
  744                 break;
  745 
  746         /*
  747          * Check for read packet available.
  748          */
  749         case FIONREAD:
  750                 {
  751                         int n;
  752 
  753                         mtx_enter(&d->bd_mtx);
  754                         n = d->bd_slen;
  755                         if (d->bd_hbuf != NULL)
  756                                 n += d->bd_hlen;
  757                         mtx_leave(&d->bd_mtx);
  758 
  759                         *(int *)addr = n;
  760                         break;
  761                 }
  762 
  763         /*
  764          * Get buffer len [for read()].
  765          */
  766         case BIOCGBLEN:
  767                 *(u_int *)addr = d->bd_bufsize;
  768                 break;
  769 
  770         /*
  771          * Set buffer length.
  772          */
  773         case BIOCSBLEN:
  774                 if (d->bd_bif != NULL)
  775                         error = EINVAL;
  776                 else {
  777                         u_int size = *(u_int *)addr;
  778 
  779                         if (size > bpf_maxbufsize)
  780                                 *(u_int *)addr = size = bpf_maxbufsize;
  781                         else if (size < BPF_MINBUFSIZE)
  782                                 *(u_int *)addr = size = BPF_MINBUFSIZE;
  783                         mtx_enter(&d->bd_mtx);
  784                         d->bd_bufsize = size;
  785                         mtx_leave(&d->bd_mtx);
  786                 }
  787                 break;
  788 
  789         /*
  790          * Set link layer read filter.
  791          */
  792         case BIOCSETF:
  793                 error = bpf_setf(d, (struct bpf_program *)addr, 0);
  794                 break;
  795 
  796         /*
  797          * Set link layer write filter.
  798          */
  799         case BIOCSETWF:
  800                 error = bpf_setf(d, (struct bpf_program *)addr, 1);
  801                 break;
  802 
  803         /*
  804          * Flush read packet buffer.
  805          */
  806         case BIOCFLUSH:
  807                 mtx_enter(&d->bd_mtx);
  808                 bpf_resetd(d);
  809                 mtx_leave(&d->bd_mtx);
  810                 break;
  811 
  812         /*
  813          * Put interface into promiscuous mode.
  814          */
  815         case BIOCPROMISC:
  816                 if (d->bd_bif == NULL) {
  817                         /*
  818                          * No interface attached yet.
  819                          */
  820                         error = EINVAL;
  821                 } else if (d->bd_bif->bif_ifp != NULL) {
  822                         if (d->bd_promisc == 0) {
  823                                 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx);
  824                                 NET_LOCK();
  825                                 error = ifpromisc(d->bd_bif->bif_ifp, 1);
  826                                 NET_UNLOCK();
  827                                 if (error == 0)
  828                                         d->bd_promisc = 1;
  829                         }
  830                 }
  831                 break;
  832 
  833         /*
  834          * Get a list of supported device parameters.
  835          */
  836         case BIOCGDLTLIST:
  837                 if (d->bd_bif == NULL)
  838                         error = EINVAL;
  839                 else
  840                         error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
  841                 break;
  842 
  843         /*
  844          * Get device parameters.
  845          */
  846         case BIOCGDLT:
  847                 if (d->bd_bif == NULL)
  848                         error = EINVAL;
  849                 else
  850                         *(u_int *)addr = d->bd_bif->bif_dlt;
  851                 break;
  852 
  853         /*
  854          * Set device parameters.
  855          */
  856         case BIOCSDLT:
  857                 if (d->bd_bif == NULL)
  858                         error = EINVAL;
  859                 else {
  860                         mtx_enter(&d->bd_mtx);
  861                         error = bpf_setdlt(d, *(u_int *)addr);
  862                         mtx_leave(&d->bd_mtx);
  863                 }
  864                 break;
  865 
  866         /*
  867          * Set interface name.
  868          */
  869         case BIOCGETIF:
  870                 if (d->bd_bif == NULL)
  871                         error = EINVAL;
  872                 else
  873                         bpf_ifname(d->bd_bif, (struct ifreq *)addr);
  874                 break;
  875 
  876         /*
  877          * Set interface.
  878          */
  879         case BIOCSETIF:
  880                 error = bpf_setif(d, (struct ifreq *)addr);
  881                 break;
  882 
  883         /*
  884          * Set read timeout.
  885          */
  886         case BIOCSRTIMEOUT:
  887                 {
  888                         struct timeval *tv = (struct timeval *)addr;
  889                         uint64_t rtout;
  890 
  891                         if (tv->tv_sec < 0 || !timerisvalid(tv)) {
  892                                 error = EINVAL;
  893                                 break;
  894                         }
  895                         rtout = TIMEVAL_TO_NSEC(tv);
  896                         if (rtout > MAXTSLP) {
  897                                 error = EOVERFLOW;
  898                                 break;
  899                         }
  900                         mtx_enter(&d->bd_mtx);
  901                         d->bd_rtout = rtout;
  902                         mtx_leave(&d->bd_mtx);
  903                         break;
  904                 }
  905 
  906         /*
  907          * Get read timeout.
  908          */
  909         case BIOCGRTIMEOUT:
  910                 {
  911                         struct timeval *tv = (struct timeval *)addr;
  912 
  913                         memset(tv, 0, sizeof(*tv));
  914                         mtx_enter(&d->bd_mtx);
  915                         NSEC_TO_TIMEVAL(d->bd_rtout, tv);
  916                         mtx_leave(&d->bd_mtx);
  917                         break;
  918                 }
  919 
  920         /*
  921          * Get packet stats.
  922          */
  923         case BIOCGSTATS:
  924                 {
  925                         struct bpf_stat *bs = (struct bpf_stat *)addr;
  926 
  927                         bs->bs_recv = d->bd_rcount;
  928                         bs->bs_drop = d->bd_dcount;
  929                         break;
  930                 }
  931 
  932         /*
  933          * Set immediate mode.
  934          */
  935         case BIOCIMMEDIATE:
  936                 d->bd_immediate = *(u_int *)addr;
  937                 break;
  938 
  939         case BIOCVERSION:
  940                 {
  941                         struct bpf_version *bv = (struct bpf_version *)addr;
  942 
  943                         bv->bv_major = BPF_MAJOR_VERSION;
  944                         bv->bv_minor = BPF_MINOR_VERSION;
  945                         break;
  946                 }
  947 
  948         case BIOCGHDRCMPLT:     /* get "header already complete" flag */
  949                 *(u_int *)addr = d->bd_hdrcmplt;
  950                 break;
  951 
  952         case BIOCSHDRCMPLT:     /* set "header already complete" flag */
  953                 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
  954                 break;
  955 
  956         case BIOCLOCK:          /* set "locked" flag (no reset) */
  957                 d->bd_locked = 1;
  958                 break;
  959 
  960         case BIOCGFILDROP:      /* get "filter-drop" flag */
  961                 *(u_int *)addr = d->bd_fildrop;
  962                 break;
  963 
  964         case BIOCSFILDROP: {    /* set "filter-drop" flag */
  965                 unsigned int fildrop = *(u_int *)addr;
  966                 switch (fildrop) {
  967                 case BPF_FILDROP_PASS:
  968                 case BPF_FILDROP_CAPTURE:
  969                 case BPF_FILDROP_DROP:
  970                         d->bd_fildrop = fildrop;
  971                         break;
  972                 default:
  973                         error = EINVAL;
  974                         break;
  975                 }
  976                 break;
  977         }
  978 
  979         case BIOCGDIRFILT:      /* get direction filter */
  980                 *(u_int *)addr = d->bd_dirfilt;
  981                 break;
  982 
  983         case BIOCSDIRFILT:      /* set direction filter */
  984                 d->bd_dirfilt = (*(u_int *)addr) &
  985                     (BPF_DIRECTION_IN|BPF_DIRECTION_OUT);
  986                 break;
  987 
  988         case FIONBIO:           /* Non-blocking I/O */
  989                 /* let vfs to keep track of this */
  990                 break;
  991 
  992         case FIOASYNC:          /* Send signal on receive packets */
  993                 d->bd_async = *(int *)addr;
  994                 break;
  995 
  996         case FIOSETOWN:         /* Process or group to send signals to */
  997         case TIOCSPGRP:
  998                 error = sigio_setown(&d->bd_sigio, cmd, addr);
  999                 break;
 1000 
 1001         case FIOGETOWN:
 1002         case TIOCGPGRP:
 1003                 sigio_getown(&d->bd_sigio, cmd, addr);
 1004                 break;
 1005 
 1006         case BIOCSRSIG:         /* Set receive signal */
 1007                 {
 1008                         u_int sig;
 1009 
 1010                         sig = *(u_int *)addr;
 1011 
 1012                         if (sig >= NSIG)
 1013                                 error = EINVAL;
 1014                         else
 1015                                 d->bd_sig = sig;
 1016                         break;
 1017                 }
 1018         case BIOCGRSIG:
 1019                 *(u_int *)addr = d->bd_sig;
 1020                 break;
 1021         }
 1022 
 1023         bpf_put(d);
 1024         return (error);
 1025 }
 1026 
 1027 /*
 1028  * Set d's packet filter program to fp.  If this file already has a filter,
 1029  * free it and replace it.  Returns EINVAL for bogus requests.
 1030  */
 1031 int
 1032 bpf_setf(struct bpf_d *d, struct bpf_program *fp, int wf)
 1033 {
 1034         struct bpf_program_smr *bps, *old_bps;
 1035         struct bpf_insn *fcode;
 1036         u_int flen, size;
 1037 
 1038         KERNEL_ASSERT_LOCKED();
 1039 
 1040         if (fp->bf_insns == 0) {
 1041                 if (fp->bf_len != 0)
 1042                         return (EINVAL);
 1043                 bps = NULL;
 1044         } else {
 1045                 flen = fp->bf_len;
 1046                 if (flen > BPF_MAXINSNS)
 1047                         return (EINVAL);
 1048 
 1049                 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF,
 1050                     M_WAITOK | M_CANFAIL);
 1051                 if (fcode == NULL)
 1052                         return (ENOMEM);
 1053 
 1054                 size = flen * sizeof(*fp->bf_insns);
 1055                 if (copyin(fp->bf_insns, fcode, size) != 0 ||
 1056                     bpf_validate(fcode, (int)flen) == 0) {
 1057                         free(fcode, M_DEVBUF, size);
 1058                         return (EINVAL);
 1059                 }
 1060 
 1061                 bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK);
 1062                 smr_init(&bps->bps_smr);
 1063                 bps->bps_bf.bf_len = flen;
 1064                 bps->bps_bf.bf_insns = fcode;
 1065         }
 1066 
 1067         if (wf == 0) {
 1068                 old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter);
 1069                 SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps);
 1070         } else {
 1071                 old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter);
 1072                 SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps);
 1073         }
 1074 
 1075         mtx_enter(&d->bd_mtx);
 1076         bpf_resetd(d);
 1077         mtx_leave(&d->bd_mtx);
 1078         if (old_bps != NULL)
 1079                 smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps);
 1080 
 1081         return (0);
 1082 }
 1083 
 1084 /*
 1085  * Detach a file from its current interface (if attached at all) and attach
 1086  * to the interface indicated by the name stored in ifr.
 1087  * Return an errno or 0.
 1088  */
 1089 int
 1090 bpf_setif(struct bpf_d *d, struct ifreq *ifr)
 1091 {
 1092         struct bpf_if *bp, *candidate = NULL;
 1093         int error = 0;
 1094 
 1095         /*
 1096          * Look through attached interfaces for the named one.
 1097          */
 1098         for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
 1099                 if (strcmp(bp->bif_name, ifr->ifr_name) != 0)
 1100                         continue;
 1101 
 1102                 if (candidate == NULL || candidate->bif_dlt > bp->bif_dlt)
 1103                         candidate = bp;
 1104         }
 1105 
 1106         /* Not found. */
 1107         if (candidate == NULL)
 1108                 return (ENXIO);
 1109 
 1110         /*
 1111          * Allocate the packet buffers if we need to.
 1112          * If we're already attached to requested interface,
 1113          * just flush the buffer.
 1114          */
 1115         mtx_enter(&d->bd_mtx);
 1116         if (d->bd_sbuf == NULL) {
 1117                 if ((error = bpf_allocbufs(d)))
 1118                         goto out;
 1119         }
 1120         if (candidate != d->bd_bif) {
 1121                 /*
 1122                  * Detach if attached to something else.
 1123                  */
 1124                 bpf_detachd(d);
 1125                 bpf_attachd(d, candidate);
 1126         }
 1127         bpf_resetd(d);
 1128 out:
 1129         mtx_leave(&d->bd_mtx);
 1130         return (error);
 1131 }
 1132 
 1133 /*
 1134  * Copy the interface name to the ifreq.
 1135  */
 1136 void
 1137 bpf_ifname(struct bpf_if *bif, struct ifreq *ifr)
 1138 {
 1139         bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name));
 1140 }
 1141 
 1142 const struct filterops bpfread_filtops = {
 1143         .f_flags        = FILTEROP_ISFD | FILTEROP_MPSAFE,
 1144         .f_attach       = NULL,
 1145         .f_detach       = filt_bpfrdetach,
 1146         .f_event        = filt_bpfread,
 1147         .f_modify       = filt_bpfreadmodify,
 1148         .f_process      = filt_bpfreadprocess,
 1149 };
 1150 
 1151 int
 1152 bpfkqfilter(dev_t dev, struct knote *kn)
 1153 {
 1154         struct bpf_d *d;
 1155         struct klist *klist;
 1156 
 1157         KERNEL_ASSERT_LOCKED();
 1158 
 1159         d = bpfilter_lookup(minor(dev));
 1160         if (d == NULL)
 1161                 return (ENXIO);
 1162 
 1163         switch (kn->kn_filter) {
 1164         case EVFILT_READ:
 1165                 klist = &d->bd_klist;
 1166                 kn->kn_fop = &bpfread_filtops;
 1167                 break;
 1168         default:
 1169                 return (EINVAL);
 1170         }
 1171 
 1172         bpf_get(d);
 1173         kn->kn_hook = d;
 1174         klist_insert(klist, kn);
 1175 
 1176         return (0);
 1177 }
 1178 
 1179 void
 1180 filt_bpfrdetach(struct knote *kn)
 1181 {
 1182         struct bpf_d *d = kn->kn_hook;
 1183 
 1184         klist_remove(&d->bd_klist, kn);
 1185         bpf_put(d);
 1186 }
 1187 
 1188 int
 1189 filt_bpfread(struct knote *kn, long hint)
 1190 {
 1191         struct bpf_d *d = kn->kn_hook;
 1192 
 1193         MUTEX_ASSERT_LOCKED(&d->bd_mtx);
 1194 
 1195         kn->kn_data = d->bd_hlen;
 1196         if (d->bd_immediate)
 1197                 kn->kn_data += d->bd_slen;
 1198 
 1199         return (kn->kn_data > 0);
 1200 }
 1201 
 1202 int
 1203 filt_bpfreadmodify(struct kevent *kev, struct knote *kn)
 1204 {
 1205         struct bpf_d *d = kn->kn_hook;
 1206         int active;
 1207 
 1208         mtx_enter(&d->bd_mtx);
 1209         active = knote_modify_fn(kev, kn, filt_bpfread);
 1210         mtx_leave(&d->bd_mtx);
 1211 
 1212         return (active);
 1213 }
 1214 
 1215 int
 1216 filt_bpfreadprocess(struct knote *kn, struct kevent *kev)
 1217 {
 1218         struct bpf_d *d = kn->kn_hook;
 1219         int active;
 1220 
 1221         mtx_enter(&d->bd_mtx);
 1222         active = knote_process_fn(kn, kev, filt_bpfread);
 1223         mtx_leave(&d->bd_mtx);
 1224 
 1225         return (active);
 1226 }
 1227 
 1228 /*
 1229  * Copy data from an mbuf chain into a buffer.  This code is derived
 1230  * from m_copydata in sys/uipc_mbuf.c.
 1231  */
 1232 void
 1233 bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
 1234 {
 1235         const struct mbuf *m;
 1236         u_int count;
 1237         u_char *dst;
 1238 
 1239         m = src_arg;
 1240         dst = dst_arg;
 1241         while (len > 0) {
 1242                 if (m == NULL)
 1243                         panic("bpf_mcopy");
 1244                 count = min(m->m_len, len);
 1245                 bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
 1246                 m = m->m_next;
 1247                 dst += count;
 1248                 len -= count;
 1249         }
 1250 }
 1251 
 1252 int
 1253 bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
 1254 {
 1255         return _bpf_mtap(arg, m, m, direction);
 1256 }
 1257 
 1258 int
 1259 _bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m,
 1260     u_int direction)
 1261 {
 1262         struct bpf_if *bp = (struct bpf_if *)arg;
 1263         struct bpf_d *d;
 1264         size_t pktlen, slen;
 1265         const struct mbuf *m0;
 1266         struct bpf_hdr tbh;
 1267         int gothdr = 0;
 1268         int drop = 0;
 1269 
 1270         if (m == NULL)
 1271                 return (0);
 1272 
 1273         if (bp == NULL)
 1274                 return (0);
 1275 
 1276         pktlen = 0;
 1277         for (m0 = m; m0 != NULL; m0 = m0->m_next)
 1278                 pktlen += m0->m_len;
 1279 
 1280         smr_read_enter();
 1281         SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) {
 1282                 struct bpf_program_smr *bps;
 1283                 struct bpf_insn *fcode = NULL;
 1284 
 1285                 atomic_inc_long(&d->bd_rcount);
 1286 
 1287                 if (ISSET(d->bd_dirfilt, direction))
 1288                         continue;
 1289 
 1290                 bps = SMR_PTR_GET(&d->bd_rfilter);
 1291                 if (bps != NULL)
 1292                         fcode = bps->bps_bf.bf_insns;
 1293                 slen = bpf_mfilter(fcode, m, pktlen);
 1294 
 1295                 if (slen == 0)
 1296                         continue;
 1297                 if (d->bd_fildrop != BPF_FILDROP_PASS)
 1298                         drop = 1;
 1299                 if (d->bd_fildrop != BPF_FILDROP_DROP) {
 1300                         if (!gothdr) {
 1301                                 struct timeval tv;
 1302                                 memset(&tbh, 0, sizeof(tbh));
 1303 
 1304                                 if (ISSET(mp->m_flags, M_PKTHDR)) {
 1305                                         tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx;
 1306                                         tbh.bh_flowid = mp->m_pkthdr.ph_flowid;
 1307                                         tbh.bh_flags = mp->m_pkthdr.pf.prio;
 1308                                         if (ISSET(mp->m_pkthdr.csum_flags,
 1309                                             M_FLOWID))
 1310                                                 SET(tbh.bh_flags, BPF_F_FLOWID);
 1311 
 1312                                         m_microtime(mp, &tv);
 1313                                 } else
 1314                                         microtime(&tv);
 1315 
 1316                                 tbh.bh_tstamp.tv_sec = tv.tv_sec;
 1317                                 tbh.bh_tstamp.tv_usec = tv.tv_usec;
 1318                                 SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT);
 1319 
 1320                                 gothdr = 1;
 1321                         }
 1322 
 1323                         mtx_enter(&d->bd_mtx);
 1324                         bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh);
 1325                         mtx_leave(&d->bd_mtx);
 1326                 }
 1327         }
 1328         smr_read_leave();
 1329 
 1330         return (drop);
 1331 }
 1332 
 1333 /*
 1334  * Incoming linkage from device drivers, where a data buffer should be
 1335  * prepended by an arbitrary header. In this situation we already have a
 1336  * way of representing a chain of memory buffers, ie, mbufs, so reuse
 1337  * the existing functionality by attaching the buffers to mbufs.
 1338  *
 1339  * Con up a minimal mbuf chain to pacify bpf by allocating (only) a
 1340  * struct m_hdr each for the header and data on the stack.
 1341  */
 1342 int
 1343 bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen,
 1344     const void *buf, unsigned int buflen, u_int direction)
 1345 {
 1346         struct m_hdr mh, md;
 1347         struct mbuf *m0 = NULL;
 1348         struct mbuf **mp = &m0;
 1349 
 1350         if (hdr != NULL) {
 1351                 mh.mh_flags = 0;
 1352                 mh.mh_next = NULL;
 1353                 mh.mh_len = hdrlen;
 1354                 mh.mh_data = (void *)hdr;
 1355 
 1356                 *mp = (struct mbuf *)&mh;
 1357                 mp = &mh.mh_next;
 1358         }
 1359 
 1360         if (buf != NULL) {
 1361                 md.mh_flags = 0;
 1362                 md.mh_next = NULL;
 1363                 md.mh_len = buflen;
 1364                 md.mh_data = (void *)buf;
 1365 
 1366                 *mp = (struct mbuf *)&md;
 1367         }
 1368 
 1369         return bpf_mtap(arg, m0, direction);
 1370 }
 1371 
 1372 /*
 1373  * Incoming linkage from device drivers, where we have a mbuf chain
 1374  * but need to prepend some arbitrary header from a linear buffer.
 1375  *
 1376  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
 1377  * struct m_hdr on the stack.  This is safe as bpf only reads from the
 1378  * fields in this header that we initialize, and will not try to free
 1379  * it or keep a pointer to it.
 1380  */
 1381 int
 1382 bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m,
 1383     u_int direction)
 1384 {
 1385         struct m_hdr mh;
 1386         const struct mbuf *m0;
 1387 
 1388         if (dlen > 0) {
 1389                 mh.mh_flags = 0;
 1390                 mh.mh_next = (struct mbuf *)m;
 1391                 mh.mh_len = dlen;
 1392                 mh.mh_data = (void *)data;
 1393                 m0 = (struct mbuf *)&mh;
 1394         } else
 1395                 m0 = m;
 1396 
 1397         return _bpf_mtap(arg, m, m0, direction);
 1398 }
 1399 
 1400 /*
 1401  * Incoming linkage from device drivers, where we have a mbuf chain
 1402  * but need to prepend the address family.
 1403  *
 1404  * Con up a minimal dummy header to pacify bpf.  We allocate (only) a
 1405  * struct m_hdr on the stack.  This is safe as bpf only reads from the
 1406  * fields in this header that we initialize, and will not try to free
 1407  * it or keep a pointer to it.
 1408  */
 1409 int
 1410 bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction)
 1411 {
 1412         u_int32_t    afh;
 1413 
 1414         afh = htonl(af);
 1415 
 1416         return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction);
 1417 }
 1418 
 1419 /*
 1420  * Incoming linkage from device drivers, where we have a mbuf chain
 1421  * but need to prepend a VLAN encapsulation header.
 1422  *
 1423  * Con up a minimal dummy header to pacify bpf.  Allocate (only) a
 1424  * struct m_hdr on the stack.  This is safe as bpf only reads from the
 1425  * fields in this header that we initialize, and will not try to free
 1426  * it or keep a pointer to it.
 1427  */
 1428 int
 1429 bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction)
 1430 {
 1431 #if NVLAN > 0
 1432         struct ether_vlan_header evh;
 1433         struct m_hdr mh, md;
 1434 
 1435         if ((m->m_flags & M_VLANTAG) == 0)
 1436 #endif
 1437         {
 1438                 return _bpf_mtap(arg, m, m, direction);
 1439         }
 1440 
 1441 #if NVLAN > 0
 1442         KASSERT(m->m_len >= ETHER_HDR_LEN);
 1443 
 1444         memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN);
 1445         evh.evl_proto = evh.evl_encap_proto;
 1446         evh.evl_encap_proto = htons(ETHERTYPE_VLAN);
 1447         evh.evl_tag = htons(m->m_pkthdr.ether_vtag);
 1448 
 1449         mh.mh_flags = 0;
 1450         mh.mh_data = (caddr_t)&evh;
 1451         mh.mh_len = sizeof(evh);
 1452         mh.mh_next = (struct mbuf *)&md;
 1453 
 1454         md.mh_flags = 0;
 1455         md.mh_data = m->m_data + ETHER_HDR_LEN;
 1456         md.mh_len = m->m_len - ETHER_HDR_LEN;
 1457         md.mh_next = m->m_next;
 1458 
 1459         return _bpf_mtap(arg, m, (struct mbuf *)&mh, direction);
 1460 #endif
 1461 }
 1462 
 1463 /*
 1464  * Move the packet data from interface memory (pkt) into the
 1465  * store buffer.  Wake up listeners if needed.
 1466  * "copy" is the routine called to do the actual data
 1467  * transfer.  bcopy is passed in to copy contiguous chunks, while
 1468  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
 1469  * pkt is really an mbuf.
 1470  */
 1471 void
 1472 bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
 1473     const struct bpf_hdr *tbh)
 1474 {
 1475         struct bpf_hdr *bh;
 1476         int totlen, curlen;
 1477         int hdrlen, do_wakeup = 0;
 1478 
 1479         MUTEX_ASSERT_LOCKED(&d->bd_mtx);
 1480         if (d->bd_bif == NULL)
 1481                 return;
 1482 
 1483         hdrlen = d->bd_bif->bif_hdrlen;
 1484 
 1485         /*
 1486          * Figure out how many bytes to move.  If the packet is
 1487          * greater or equal to the snapshot length, transfer that
 1488          * much.  Otherwise, transfer the whole packet (unless
 1489          * we hit the buffer size limit).
 1490          */
 1491         totlen = hdrlen + min(snaplen, pktlen);
 1492         if (totlen > d->bd_bufsize)
 1493                 totlen = d->bd_bufsize;
 1494 
 1495         /*
 1496          * Round up the end of the previous packet to the next longword.
 1497          */
 1498         curlen = BPF_WORDALIGN(d->bd_slen);
 1499         if (curlen + totlen > d->bd_bufsize) {
 1500                 /*
 1501                  * This packet will overflow the storage buffer.
 1502                  * Rotate the buffers if we can, then wakeup any
 1503                  * pending reads.
 1504                  */
 1505                 if (d->bd_fbuf == NULL) {
 1506                         /*
 1507                          * We haven't completed the previous read yet,
 1508                          * so drop the packet.
 1509                          */
 1510                         ++d->bd_dcount;
 1511                         return;
 1512                 }
 1513                 ROTATE_BUFFERS(d);
 1514                 do_wakeup = 1;
 1515                 curlen = 0;
 1516         }
 1517 
 1518         /*
 1519          * Append the bpf header.
 1520          */
 1521         bh = (struct bpf_hdr *)(d->bd_sbuf + curlen);
 1522         *bh = *tbh;
 1523         bh->bh_datalen = pktlen;
 1524         bh->bh_hdrlen = hdrlen;
 1525         bh->bh_caplen = totlen - hdrlen;
 1526 
 1527         /*
 1528          * Copy the packet data into the store buffer and update its length.
 1529          */
 1530         bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen);
 1531         d->bd_slen = curlen + totlen;
 1532 
 1533         if (d->bd_immediate) {
 1534                 /*
 1535                  * Immediate mode is set.  A packet arrived so any
 1536                  * reads should be woken up.
 1537                  */
 1538                 do_wakeup = 1;
 1539         }
 1540 
 1541         if (do_wakeup)
 1542                 bpf_wakeup(d);
 1543 }
 1544 
 1545 /*
 1546  * Initialize all nonzero fields of a descriptor.
 1547  */
 1548 int
 1549 bpf_allocbufs(struct bpf_d *d)
 1550 {
 1551         MUTEX_ASSERT_LOCKED(&d->bd_mtx);
 1552 
 1553         d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
 1554         if (d->bd_fbuf == NULL)
 1555                 return (ENOMEM);
 1556 
 1557         d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
 1558         if (d->bd_sbuf == NULL) {
 1559                 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize);
 1560                 d->bd_fbuf = NULL;
 1561                 return (ENOMEM);
 1562         }
 1563 
 1564         d->bd_slen = 0;
 1565         d->bd_hlen = 0;
 1566 
 1567         return (0);
 1568 }
 1569 
 1570 void
 1571 bpf_prog_smr(void *bps_arg)
 1572 {
 1573         struct bpf_program_smr *bps = bps_arg;
 1574 
 1575         free(bps->bps_bf.bf_insns, M_DEVBUF,
 1576             bps->bps_bf.bf_len * sizeof(struct bpf_insn));
 1577         free(bps, M_DEVBUF, sizeof(struct bpf_program_smr));
 1578 }
 1579 
 1580 void
 1581 bpf_d_smr(void *smr)
 1582 {
 1583         struct bpf_d    *bd = smr;
 1584 
 1585         sigio_free(&bd->bd_sigio);
 1586         free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize);
 1587         free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize);
 1588         free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize);
 1589 
 1590         if (bd->bd_rfilter != NULL)
 1591                 bpf_prog_smr(bd->bd_rfilter);
 1592         if (bd->bd_wfilter != NULL)
 1593                 bpf_prog_smr(bd->bd_wfilter);
 1594 
 1595         klist_free(&bd->bd_klist);
 1596         free(bd, M_DEVBUF, sizeof(*bd));
 1597 }
 1598 
 1599 void
 1600 bpf_get(struct bpf_d *bd)
 1601 {
 1602         refcnt_take(&bd->bd_refcnt);
 1603 }
 1604 
 1605 /*
 1606  * Free buffers currently in use by a descriptor
 1607  * when the reference count drops to zero.
 1608  */
 1609 void
 1610 bpf_put(struct bpf_d *bd)
 1611 {
 1612         if (refcnt_rele(&bd->bd_refcnt) == 0)
 1613                 return;
 1614 
 1615         smr_call(&bd->bd_smr, bpf_d_smr, bd);
 1616 }
 1617 
 1618 void *
 1619 bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen)
 1620 {
 1621         struct bpf_if *bp;
 1622 
 1623         if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL)
 1624                 panic("bpfattach");
 1625         SMR_SLIST_INIT(&bp->bif_dlist);
 1626         bp->bif_driverp = (struct bpf_if **)bpfp;
 1627         bp->bif_name = name;
 1628         bp->bif_ifp = NULL;
 1629         bp->bif_dlt = dlt;
 1630 
 1631         bp->bif_next = bpf_iflist;
 1632         bpf_iflist = bp;
 1633 
 1634         *bp->bif_driverp = NULL;
 1635 
 1636         /*
 1637          * Compute the length of the bpf header.  This is not necessarily
 1638          * equal to SIZEOF_BPF_HDR because we want to insert spacing such
 1639          * that the network layer header begins on a longword boundary (for
 1640          * performance reasons and to alleviate alignment restrictions).
 1641          */
 1642         bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
 1643 
 1644         return (bp);
 1645 }
 1646 
 1647 void
 1648 bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen)
 1649 {
 1650         struct bpf_if *bp;
 1651 
 1652         bp = bpfsattach(driverp, ifp->if_xname, dlt, hdrlen);
 1653         bp->bif_ifp = ifp;
 1654 }
 1655 
 1656 /* Detach an interface from its attached bpf device.  */
 1657 void
 1658 bpfdetach(struct ifnet *ifp)
 1659 {
 1660         struct bpf_if *bp, *nbp;
 1661 
 1662         KERNEL_ASSERT_LOCKED();
 1663 
 1664         for (bp = bpf_iflist; bp; bp = nbp) {
 1665                 nbp = bp->bif_next;
 1666                 if (bp->bif_ifp == ifp)
 1667                         bpfsdetach(bp);
 1668         }
 1669         ifp->if_bpf = NULL;
 1670 }
 1671 
 1672 void
 1673 bpfsdetach(void *p)
 1674 {
 1675         struct bpf_if *bp = p, *tbp;
 1676         struct bpf_d *bd;
 1677         int maj;
 1678 
 1679         KERNEL_ASSERT_LOCKED();
 1680 
 1681         /* Locate the major number. */
 1682         for (maj = 0; maj < nchrdev; maj++)
 1683                 if (cdevsw[maj].d_open == bpfopen)
 1684                         break;
 1685 
 1686         while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) {
 1687                 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR);
 1688                 klist_invalidate(&bd->bd_klist);
 1689         }
 1690 
 1691         for (tbp = bpf_iflist; tbp; tbp = tbp->bif_next) {
 1692                 if (tbp->bif_next == bp) {
 1693                         tbp->bif_next = bp->bif_next;
 1694                         break;
 1695                 }
 1696         }
 1697 
 1698         if (bpf_iflist == bp)
 1699                 bpf_iflist = bp->bif_next;
 1700 
 1701         free(bp, M_DEVBUF, sizeof(*bp));
 1702 }
 1703 
 1704 int
 1705 bpf_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
 1706     void *newp, size_t newlen)
 1707 {
 1708         switch (name[0]) {
 1709         case NET_BPF_BUFSIZE:
 1710                 return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
 1711                     &bpf_bufsize, BPF_MINBUFSIZE, bpf_maxbufsize);
 1712         case NET_BPF_MAXBUFSIZE:
 1713                 return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
 1714                     &bpf_maxbufsize, BPF_MINBUFSIZE, INT_MAX);
 1715         default:
 1716                 return (EOPNOTSUPP);
 1717         }
 1718 }
 1719 
 1720 int
 1721 bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
 1722     size_t newlen)
 1723 {
 1724         int flags = RW_INTR;
 1725         int error;
 1726 
 1727         if (namelen != 1)
 1728                 return (ENOTDIR);
 1729 
 1730         flags |= (newp == NULL) ? RW_READ : RW_WRITE;
 1731 
 1732         error = rw_enter(&bpf_sysctl_lk, flags);
 1733         if (error != 0)
 1734                 return (error);
 1735 
 1736         error = bpf_sysctl_locked(name, namelen, oldp, oldlenp, newp, newlen);
 1737 
 1738         rw_exit(&bpf_sysctl_lk);
 1739 
 1740         return (error);
 1741 }
 1742 
 1743 struct bpf_d *
 1744 bpfilter_lookup(int unit)
 1745 {
 1746         struct bpf_d *bd;
 1747 
 1748         KERNEL_ASSERT_LOCKED();
 1749 
 1750         LIST_FOREACH(bd, &bpf_d_list, bd_list)
 1751                 if (bd->bd_unit == unit)
 1752                         return (bd);
 1753         return (NULL);
 1754 }
 1755 
 1756 /*
 1757  * Get a list of available data link type of the interface.
 1758  */
 1759 int
 1760 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
 1761 {
 1762         int n, error;
 1763         struct bpf_if *bp;
 1764         const char *name;
 1765 
 1766         name = d->bd_bif->bif_name;
 1767         n = 0;
 1768         error = 0;
 1769         for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
 1770                 if (strcmp(name, bp->bif_name) != 0)
 1771                         continue;
 1772                 if (bfl->bfl_list != NULL) {
 1773                         if (n >= bfl->bfl_len)
 1774                                 return (ENOMEM);
 1775                         error = copyout(&bp->bif_dlt,
 1776                             bfl->bfl_list + n, sizeof(u_int));
 1777                         if (error)
 1778                                 break;
 1779                 }
 1780                 n++;
 1781         }
 1782 
 1783         bfl->bfl_len = n;
 1784         return (error);
 1785 }
 1786 
 1787 /*
 1788  * Set the data link type of a BPF instance.
 1789  */
 1790 int
 1791 bpf_setdlt(struct bpf_d *d, u_int dlt)
 1792 {
 1793         const char *name;
 1794         struct bpf_if *bp;
 1795 
 1796         MUTEX_ASSERT_LOCKED(&d->bd_mtx);
 1797         if (d->bd_bif->bif_dlt == dlt)
 1798                 return (0);
 1799         name = d->bd_bif->bif_name;
 1800         for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
 1801                 if (strcmp(name, bp->bif_name) != 0)
 1802                         continue;
 1803                 if (bp->bif_dlt == dlt)
 1804                         break;
 1805         }
 1806         if (bp == NULL)
 1807                 return (EINVAL);
 1808         bpf_detachd(d);
 1809         bpf_attachd(d, bp);
 1810         bpf_resetd(d);
 1811         return (0);
 1812 }
 1813 
 1814 u_int32_t       bpf_mbuf_ldw(const void *, u_int32_t, int *);
 1815 u_int32_t       bpf_mbuf_ldh(const void *, u_int32_t, int *);
 1816 u_int32_t       bpf_mbuf_ldb(const void *, u_int32_t, int *);
 1817 
 1818 int             bpf_mbuf_copy(const struct mbuf *, u_int32_t,
 1819                     void *, u_int32_t);
 1820 
 1821 const struct bpf_ops bpf_mbuf_ops = {
 1822         bpf_mbuf_ldw,
 1823         bpf_mbuf_ldh,
 1824         bpf_mbuf_ldb,
 1825 };
 1826 
 1827 int
 1828 bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len)
 1829 {
 1830         u_int8_t *cp = buf;
 1831         u_int32_t count;
 1832 
 1833         while (off >= m->m_len) {
 1834                 off -= m->m_len;
 1835 
 1836                 m = m->m_next;
 1837                 if (m == NULL)
 1838                         return (-1);
 1839         }
 1840 
 1841         for (;;) {
 1842                 count = min(m->m_len - off, len);
 1843 
 1844                 memcpy(cp, m->m_data + off, count);
 1845                 len -= count;
 1846 
 1847                 if (len == 0)
 1848                         return (0);
 1849 
 1850                 m = m->m_next;
 1851                 if (m == NULL)
 1852                         break;
 1853 
 1854                 cp += count;
 1855                 off = 0;
 1856         }
 1857 
 1858         return (-1);
 1859 }
 1860 
 1861 u_int32_t
 1862 bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err)
 1863 {
 1864         u_int32_t v;
 1865 
 1866         if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
 1867                 *err = 1;
 1868                 return (0);
 1869         }
 1870 
 1871         *err = 0;
 1872         return ntohl(v);
 1873 }
 1874 
 1875 u_int32_t
 1876 bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err)
 1877 {
 1878         u_int16_t v;
 1879 
 1880         if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
 1881                 *err = 1;
 1882                 return (0);
 1883         }
 1884 
 1885         *err = 0;
 1886         return ntohs(v);
 1887 }
 1888 
 1889 u_int32_t
 1890 bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err)
 1891 {
 1892         const struct mbuf *m = m0;
 1893         u_int8_t v;
 1894 
 1895         while (k >= m->m_len) {
 1896                 k -= m->m_len;
 1897 
 1898                 m = m->m_next;
 1899                 if (m == NULL) {
 1900                         *err = 1;
 1901                         return (0);
 1902                 }
 1903         }
 1904         v = m->m_data[k];
 1905 
 1906         *err = 0;
 1907         return v;
 1908 }
 1909 
 1910 u_int
 1911 bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen)
 1912 {
 1913         return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen);
 1914 }

Cache object: b3d38d71da58dcafb48816cacc7f18a1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.