[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]

FreeBSD/Linux Kernel Cross Reference
sys/net/bpf.c

Version: -  FREEBSD  -  FREEBSD7  -  FREEBSD70  -  FREEBSD6  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  OPENSOLARIS  -  minix-3-1-1  -  TRUSTEDBSD-SEBSD  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

  1 /*-
  2  * Copyright (c) 1990, 1991, 1993
  3  *      The Regents of the University of California.  All rights reserved.
  4  *
  5  * This code is derived from the Stanford/CMU enet packet filter,
  6  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  7  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  8  * Berkeley Laboratory.
  9  *
 10  * Redistribution and use in source and binary forms, with or without
 11  * modification, are permitted provided that the following conditions
 12  * are met:
 13  * 1. Redistributions of source code must retain the above copyright
 14  *    notice, this list of conditions and the following disclaimer.
 15  * 2. Redistributions in binary form must reproduce the above copyright
 16  *    notice, this list of conditions and the following disclaimer in the
 17  *    documentation and/or other materials provided with the distribution.
 18  * 4. Neither the name of the University nor the names of its contributors
 19  *    may be used to endorse or promote products derived from this software
 20  *    without specific prior written permission.
 21  *
 22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 32  * SUCH DAMAGE.
 33  *
 34  *      @(#)bpf.c       8.4 (Berkeley) 1/9/95
 35  */
 36 
 37 #include <sys/cdefs.h>
 38 __FBSDID("$FreeBSD: src/sys/net/bpf.c,v 1.202 2008/10/23 15:53:51 des Exp $");
 39 
 40 #include "opt_bpf.h"
 41 #include "opt_mac.h"
 42 #include "opt_netgraph.h"
 43 
 44 #include <sys/types.h>
 45 #include <sys/param.h>
 46 #include <sys/systm.h>
 47 #include <sys/conf.h>
 48 #include <sys/fcntl.h>
 49 #include <sys/malloc.h>
 50 #include <sys/mbuf.h>
 51 #include <sys/time.h>
 52 #include <sys/priv.h>
 53 #include <sys/proc.h>
 54 #include <sys/signalvar.h>
 55 #include <sys/filio.h>
 56 #include <sys/sockio.h>
 57 #include <sys/ttycom.h>
 58 #include <sys/uio.h>
 59 
 60 #include <sys/event.h>
 61 #include <sys/file.h>
 62 #include <sys/poll.h>
 63 #include <sys/proc.h>
 64 
 65 #include <sys/socket.h>
 66 
 67 #include <net/if.h>
 68 #include <net/bpf.h>
 69 #include <net/bpf_buffer.h>
 70 #ifdef BPF_JITTER
 71 #include <net/bpf_jitter.h>
 72 #endif
 73 #include <net/bpf_zerocopy.h>
 74 #include <net/bpfdesc.h>
 75 
 76 #include <netinet/in.h>
 77 #include <netinet/if_ether.h>
 78 #include <sys/kernel.h>
 79 #include <sys/sysctl.h>
 80 
 81 #include <net80211/ieee80211_freebsd.h>
 82 
 83 #include <security/mac/mac_framework.h>
 84 
 85 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
 86 
 87 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
 88 
 89 #define PRINET  26                      /* interruptible */
 90 
 91 /*
 92  * bpf_iflist is a list of BPF interface structures, each corresponding to a
 93  * specific DLT.  The same network interface might have several BPF interface
 94  * structures registered by different layers in the stack (i.e., 802.11
 95  * frames, ethernet frames, etc).
 96  */
 97 static LIST_HEAD(, bpf_if)      bpf_iflist;
 98 static struct mtx       bpf_mtx;                /* bpf global lock */
 99 static int              bpf_bpfd_cnt;
100 
101 static void     bpf_attachd(struct bpf_d *, struct bpf_if *);
102 static void     bpf_detachd(struct bpf_d *);
103 static void     bpf_freed(struct bpf_d *);
104 static int      bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
105                     struct sockaddr *, int *, struct bpf_insn *);
106 static int      bpf_setif(struct bpf_d *, struct ifreq *);
107 static void     bpf_timed_out(void *);
108 static __inline void
109                 bpf_wakeup(struct bpf_d *);
110 static void     catchpacket(struct bpf_d *, u_char *, u_int, u_int,
111                     void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
112                     struct timeval *);
113 static void     reset_d(struct bpf_d *);
114 static int       bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
115 static int      bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
116 static int      bpf_setdlt(struct bpf_d *, u_int);
117 static void     filt_bpfdetach(struct knote *);
118 static int      filt_bpfread(struct knote *, long);
119 static void     bpf_drvinit(void *);
120 static int      bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
121 
122 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
123 int bpf_maxinsns = BPF_MAXINSNS;
124 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
125     &bpf_maxinsns, 0, "Maximum bpf program instructions");
126 static int bpf_zerocopy_enable = 0;
127 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
128     &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
129 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW,
130     bpf_stats_sysctl, "bpf statistics portal");
131 
132 static  d_open_t        bpfopen;
133 static  d_read_t        bpfread;
134 static  d_write_t       bpfwrite;
135 static  d_ioctl_t       bpfioctl;
136 static  d_poll_t        bpfpoll;
137 static  d_kqfilter_t    bpfkqfilter;
138 
139 static struct cdevsw bpf_cdevsw = {
140         .d_version =    D_VERSION,
141         .d_open =       bpfopen,
142         .d_read =       bpfread,
143         .d_write =      bpfwrite,
144         .d_ioctl =      bpfioctl,
145         .d_poll =       bpfpoll,
146         .d_name =       "bpf",
147         .d_kqfilter =   bpfkqfilter,
148 };
149 
150 static struct filterops bpfread_filtops =
151         { 1, NULL, filt_bpfdetach, filt_bpfread };
152 
153 /*
154  * Wrapper functions for various buffering methods.  If the set of buffer
155  * modes expands, we will probably want to introduce a switch data structure
156  * similar to protosw, et.
157  */
158 static void
159 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
160     u_int len)
161 {
162 
163         BPFD_LOCK_ASSERT(d);
164 
165         switch (d->bd_bufmode) {
166         case BPF_BUFMODE_BUFFER:
167                 return (bpf_buffer_append_bytes(d, buf, offset, src, len));
168 
169         case BPF_BUFMODE_ZBUF:
170                 d->bd_zcopy++;
171                 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
172 
173         default:
174                 panic("bpf_buf_append_bytes");
175         }
176 }
177 
178 static void
179 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
180     u_int len)
181 {
182 
183         BPFD_LOCK_ASSERT(d);
184 
185         switch (d->bd_bufmode) {
186         case BPF_BUFMODE_BUFFER:
187                 return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
188 
189         case BPF_BUFMODE_ZBUF:
190                 d->bd_zcopy++;
191                 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
192 
193         default:
194                 panic("bpf_buf_append_mbuf");
195         }
196 }
197 
198 /*
199  * This function gets called when the free buffer is re-assigned.
200  */
201 static void
202 bpf_buf_reclaimed(struct bpf_d *d)
203 {
204 
205         BPFD_LOCK_ASSERT(d);
206 
207         switch (d->bd_bufmode) {
208         case BPF_BUFMODE_BUFFER:
209                 return;
210 
211         case BPF_BUFMODE_ZBUF:
212                 bpf_zerocopy_buf_reclaimed(d);
213                 return;
214 
215         default:
216                 panic("bpf_buf_reclaimed");
217         }
218 }
219 
220 /*
221  * If the buffer mechanism has a way to decide that a held buffer can be made
222  * free, then it is exposed via the bpf_canfreebuf() interface.  (1) is
223  * returned if the buffer can be discarded, (0) is returned if it cannot.
224  */
225 static int
226 bpf_canfreebuf(struct bpf_d *d)
227 {
228 
229         BPFD_LOCK_ASSERT(d);
230 
231         switch (d->bd_bufmode) {
232         case BPF_BUFMODE_ZBUF:
233                 return (bpf_zerocopy_canfreebuf(d));
234         }
235         return (0);
236 }
237 
238 /*
239  * Allow the buffer model to indicate that the current store buffer is
240  * immutable, regardless of the appearance of space.  Return (1) if the
241  * buffer is writable, and (0) if not.
242  */
243 static int
244 bpf_canwritebuf(struct bpf_d *d)
245 {
246 
247         BPFD_LOCK_ASSERT(d);
248 
249         switch (d->bd_bufmode) {
250         case BPF_BUFMODE_ZBUF:
251                 return (bpf_zerocopy_canwritebuf(d));
252         }
253         return (1);
254 }
255 
256 /*
257  * Notify buffer model that an attempt to write to the store buffer has
258  * resulted in a dropped packet, in which case the buffer may be considered
259  * full.
260  */
261 static void
262 bpf_buffull(struct bpf_d *d)
263 {
264 
265         BPFD_LOCK_ASSERT(d);
266 
267         switch (d->bd_bufmode) {
268         case BPF_BUFMODE_ZBUF:
269                 bpf_zerocopy_buffull(d);
270                 break;
271         }
272 }
273 
274 /*
275  * Notify the buffer model that a buffer has moved into the hold position.
276  */
277 void
278 bpf_bufheld(struct bpf_d *d)
279 {
280 
281         BPFD_LOCK_ASSERT(d);
282 
283         switch (d->bd_bufmode) {
284         case BPF_BUFMODE_ZBUF:
285                 bpf_zerocopy_bufheld(d);
286                 break;
287         }
288 }
289 
290 static void
291 bpf_free(struct bpf_d *d)
292 {
293 
294         switch (d->bd_bufmode) {
295         case BPF_BUFMODE_BUFFER:
296                 return (bpf_buffer_free(d));
297 
298         case BPF_BUFMODE_ZBUF:
299                 return (bpf_zerocopy_free(d));
300 
301         default:
302                 panic("bpf_buf_free");
303         }
304 }
305 
306 static int
307 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
308 {
309 
310         if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
311                 return (EOPNOTSUPP);
312         return (bpf_buffer_uiomove(d, buf, len, uio));
313 }
314 
315 static int
316 bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
317 {
318 
319         if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
320                 return (EOPNOTSUPP);
321         return (bpf_buffer_ioctl_sblen(d, i));
322 }
323 
324 static int
325 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
326 {
327 
328         if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
329                 return (EOPNOTSUPP);
330         return (bpf_zerocopy_ioctl_getzmax(td, d, i));
331 }
332 
333 static int
334 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
335 {
336 
337         if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
338                 return (EOPNOTSUPP);
339         return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
340 }
341 
342 static int
343 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
344 {
345 
346         if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
347                 return (EOPNOTSUPP);
348         return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
349 }
350 
351 /*
352  * General BPF functions.
353  */
354 static int
355 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
356     struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
357 {
358         const struct ieee80211_bpf_params *p;
359         struct ether_header *eh;
360         struct mbuf *m;
361         int error;
362         int len;
363         int hlen;
364         int slen;
365 
366         /*
367          * Build a sockaddr based on the data link layer type.
368          * We do this at this level because the ethernet header
369          * is copied directly into the data field of the sockaddr.
370          * In the case of SLIP, there is no header and the packet
371          * is forwarded as is.
372          * Also, we are careful to leave room at the front of the mbuf
373          * for the link level header.
374          */
375         switch (linktype) {
376 
377         case DLT_SLIP:
378                 sockp->sa_family = AF_INET;
379                 hlen = 0;
380                 break;
381 
382         case DLT_EN10MB:
383                 sockp->sa_family = AF_UNSPEC;
384                 /* XXX Would MAXLINKHDR be better? */
385                 hlen = ETHER_HDR_LEN;
386                 break;
387 
388         case DLT_FDDI:
389                 sockp->sa_family = AF_IMPLINK;
390                 hlen = 0;
391                 break;
392 
393         case DLT_RAW:
394                 sockp->sa_family = AF_UNSPEC;
395                 hlen = 0;
396                 break;
397 
398         case DLT_NULL:
399                 /*
400                  * null interface types require a 4 byte pseudo header which
401                  * corresponds to the address family of the packet.
402                  */
403                 sockp->sa_family = AF_UNSPEC;
404                 hlen = 4;
405                 break;
406 
407         case DLT_ATM_RFC1483:
408                 /*
409                  * en atm driver requires 4-byte atm pseudo header.
410                  * though it isn't standard, vpi:vci needs to be
411                  * specified anyway.
412                  */
413                 sockp->sa_family = AF_UNSPEC;
414                 hlen = 12;      /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
415                 break;
416 
417         case DLT_PPP:
418                 sockp->sa_family = AF_UNSPEC;
419                 hlen = 4;       /* This should match PPP_HDRLEN */
420                 break;
421 
422         case DLT_IEEE802_11:            /* IEEE 802.11 wireless */
423                 sockp->sa_family = AF_IEEE80211;
424                 hlen = 0;
425                 break;
426 
427         case DLT_IEEE802_11_RADIO:      /* IEEE 802.11 wireless w/ phy params */
428                 sockp->sa_family = AF_IEEE80211;
429                 sockp->sa_len = 12;     /* XXX != 0 */
430                 hlen = sizeof(struct ieee80211_bpf_params);
431                 break;
432 
433         default:
434                 return (EIO);
435         }
436 
437         len = uio->uio_resid;
438 
439         if (len - hlen > ifp->if_mtu)
440                 return (EMSGSIZE);
441 
442         if ((unsigned)len > MJUM16BYTES)
443                 return (EIO);
444 
445         if (len <= MHLEN)
446                 MGETHDR(m, M_WAIT, MT_DATA);
447         else if (len <= MCLBYTES)
448                 m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
449         else
450                 m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
451 #if (MJUMPAGESIZE > MCLBYTES)
452                     len <= MJUMPAGESIZE ? MJUMPAGESIZE :
453 #endif
454                     (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
455         m->m_pkthdr.len = m->m_len = len;
456         m->m_pkthdr.rcvif = NULL;
457         *mp = m;
458 
459         if (m->m_len < hlen) {
460                 error = EPERM;
461                 goto bad;
462         }
463 
464         error = uiomove(mtod(m, u_char *), len, uio);
465         if (error)
466                 goto bad;
467 
468         slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
469         if (slen == 0) {
470                 error = EPERM;
471                 goto bad;
472         }
473 
474         /* Check for multicast destination */
475         switch (linktype) {
476         case DLT_EN10MB:
477                 eh = mtod(m, struct ether_header *);
478                 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
479                         if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
480                             ETHER_ADDR_LEN) == 0)
481                                 m->m_flags |= M_BCAST;
482                         else
483                                 m->m_flags |= M_MCAST;
484                 }
485                 break;
486         }
487 
488         /*
489          * Make room for link header, and copy it to sockaddr
490          */
491         if (hlen != 0) {
492                 if (sockp->sa_family == AF_IEEE80211) {
493                         /*
494                          * Collect true length from the parameter header
495                          * NB: sockp is known to be zero'd so if we do a
496                          *     short copy unspecified parameters will be
497                          *     zero.
498                          * NB: packet may not be aligned after stripping
499                          *     bpf params
500                          * XXX check ibp_vers
501                          */
502                         p = mtod(m, const struct ieee80211_bpf_params *);
503                         hlen = p->ibp_len;
504                         if (hlen > sizeof(sockp->sa_data)) {
505                                 error = EINVAL;
506                                 goto bad;
507                         }
508                 }
509                 bcopy(m->m_data, sockp->sa_data, hlen);
510         }
511         *hdrlen = hlen;
512 
513         return (0);
514 bad:
515         m_freem(m);
516         return (error);
517 }
518 
519 /*
520  * Attach file to the bpf interface, i.e. make d listen on bp.
521  */
522 static void
523 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
524 {
525         /*
526          * Point d at bp, and add d to the interface's list of listeners.
527          * Finally, point the driver's bpf cookie at the interface so
528          * it will divert packets to bpf.
529          */
530         BPFIF_LOCK(bp);
531         d->bd_bif = bp;
532         LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
533 
534         bpf_bpfd_cnt++;
535         BPFIF_UNLOCK(bp);
536 }
537 
538 /*
539  * Detach a file from its interface.
540  */
541 static void
542 bpf_detachd(struct bpf_d *d)
543 {
544         int error;
545         struct bpf_if *bp;
546         struct ifnet *ifp;
547 
548         bp = d->bd_bif;
549         BPFIF_LOCK(bp);
550         BPFD_LOCK(d);
551         ifp = d->bd_bif->bif_ifp;
552 
553         /*
554          * Remove d from the interface's descriptor list.
555          */
556         LIST_REMOVE(d, bd_next);
557 
558         bpf_bpfd_cnt--;
559         d->bd_bif = NULL;
560         BPFD_UNLOCK(d);
561         BPFIF_UNLOCK(bp);
562 
563         /*
564          * Check if this descriptor had requested promiscuous mode.
565          * If so, turn it off.
566          */
567         if (d->bd_promisc) {
568                 d->bd_promisc = 0;
569                 error = ifpromisc(ifp, 0);
570                 if (error != 0 && error != ENXIO) {
571                         /*
572                          * ENXIO can happen if a pccard is unplugged
573                          * Something is really wrong if we were able to put
574                          * the driver into promiscuous mode, but can't
575                          * take it out.
576                          */
577                         if_printf(bp->bif_ifp,
578                                 "bpf_detach: ifpromisc failed (%d)\n", error);
579                 }
580         }
581 }
582 
583 /*
584  * Close the descriptor by detaching it from its interface,
585  * deallocating its buffers, and marking it free.
586  */
587 static void
588 bpf_dtor(void *data)
589 {
590         struct bpf_d *d = data;
591 
592         BPFD_LOCK(d);
593         if (d->bd_state == BPF_WAITING)
594                 callout_stop(&d->bd_callout);
595         d->bd_state = BPF_IDLE;
596         BPFD_UNLOCK(d);
597         funsetown(&d->bd_sigio);
598         mtx_lock(&bpf_mtx);
599         if (d->bd_bif)
600                 bpf_detachd(d);
601         mtx_unlock(&bpf_mtx);
602         selwakeuppri(&d->bd_sel, PRINET);
603 #ifdef MAC
604         mac_bpfdesc_destroy(d);
605 #endif /* MAC */
606         knlist_destroy(&d->bd_sel.si_note);
607         bpf_freed(d);
608         free(d, M_BPF);
609 }
610 
611 /*
612  * Open ethernet device.  Returns ENXIO for illegal minor device number,
613  * EBUSY if file is open by another process.
614  */
615 /* ARGSUSED */
616 static  int
617 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
618 {
619         struct bpf_d *d;
620         int error;
621 
622         d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
623         error = devfs_set_cdevpriv(d, bpf_dtor);
624         if (error != 0) {
625                 free(d, M_BPF);
626                 return (error);
627         }
628 
629         /*
630          * For historical reasons, perform a one-time initialization call to
631          * the buffer routines, even though we're not yet committed to a
632          * particular buffer method.
633          */
634         bpf_buffer_init(d);
635         d->bd_bufmode = BPF_BUFMODE_BUFFER;
636         d->bd_sig = SIGIO;
637         d->bd_direction = BPF_D_INOUT;
638         d->bd_pid = td->td_proc->p_pid;
639 #ifdef MAC
640         mac_bpfdesc_init(d);
641         mac_bpfdesc_create(td->td_ucred, d);
642 #endif
643         mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
644         callout_init(&d->bd_callout, CALLOUT_MPSAFE);
645         knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
646 
647         return (0);
648 }
649 
650 /*
651  *  bpfread - read next chunk of packets from buffers
652  */
653 static  int
654 bpfread(struct cdev *dev, struct uio *uio, int ioflag)
655 {
656         struct bpf_d *d;
657         int timed_out;
658         int error;
659 
660         error = devfs_get_cdevpriv((void **)&d);
661         if (error != 0)
662                 return (error);
663 
664         /*
665          * Restrict application to use a buffer the same size as
666          * as kernel buffers.
667          */
668         if (uio->uio_resid != d->bd_bufsize)
669                 return (EINVAL);
670 
671         BPFD_LOCK(d);
672         d->bd_pid = curthread->td_proc->p_pid;
673         if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
674                 BPFD_UNLOCK(d);
675                 return (EOPNOTSUPP);
676         }
677         if (d->bd_state == BPF_WAITING)
678                 callout_stop(&d->bd_callout);
679         timed_out = (d->bd_state == BPF_TIMED_OUT);
680         d->bd_state = BPF_IDLE;
681         /*
682          * If the hold buffer is empty, then do a timed sleep, which
683          * ends when the timeout expires or when enough packets
684          * have arrived to fill the store buffer.
685          */
686         while (d->bd_hbuf == NULL) {
687                 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
688                         /*
689                          * A packet(s) either arrived since the previous
690                          * read or arrived while we were asleep.
691                          * Rotate the buffers and return what's here.
692                          */
693                         ROTATE_BUFFERS(d);
694                         break;
695                 }
696 
697                 /*
698                  * No data is available, check to see if the bpf device
699                  * is still pointed at a real interface.  If not, return
700                  * ENXIO so that the userland process knows to rebind
701                  * it before using it again.
702                  */
703                 if (d->bd_bif == NULL) {
704                         BPFD_UNLOCK(d);
705                         return (ENXIO);
706                 }
707 
708                 if (ioflag & O_NONBLOCK) {
709                         BPFD_UNLOCK(d);
710                         return (EWOULDBLOCK);
711                 }
712                 error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
713                      "bpf", d->bd_rtout);
714                 if (error == EINTR || error == ERESTART) {
715                         BPFD_UNLOCK(d);
716                         return (error);
717                 }
718                 if (error == EWOULDBLOCK) {
719                         /*
720                          * On a timeout, return what's in the buffer,
721                          * which may be nothing.  If there is something
722                          * in the store buffer, we can rotate the buffers.
723                          */
724                         if (d->bd_hbuf)
725                                 /*
726                                  * We filled up the buffer in between
727                                  * getting the timeout and arriving
728                                  * here, so we don't need to rotate.
729                                  */
730                                 break;
731 
732                         if (d->bd_slen == 0) {
733                                 BPFD_UNLOCK(d);
734                                 return (0);
735                         }
736                         ROTATE_BUFFERS(d);
737                         break;
738                 }
739         }
740         /*
741          * At this point, we know we have something in the hold slot.
742          */
743         BPFD_UNLOCK(d);
744 
745         /*
746          * Move data from hold buffer into user space.
747          * We know the entire buffer is transferred since
748          * we checked above that the read buffer is bpf_bufsize bytes.
749          *
750          * XXXRW: More synchronization needed here: what if a second thread
751          * issues a read on the same fd at the same time?  Don't want this
752          * getting invalidated.
753          */
754         error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
755 
756         BPFD_LOCK(d);
757         d->bd_fbuf = d->bd_hbuf;
758         d->bd_hbuf = NULL;
759         d->bd_hlen = 0;
760         bpf_buf_reclaimed(d);
761         BPFD_UNLOCK(d);
762 
763         return (error);
764 }
765 
766 /*
767  * If there are processes sleeping on this descriptor, wake them up.
768  */
769 static __inline void
770 bpf_wakeup(struct bpf_d *d)
771 {
772 
773         BPFD_LOCK_ASSERT(d);
774         if (d->bd_state == BPF_WAITING) {
775                 callout_stop(&d->bd_callout);
776                 d->bd_state = BPF_IDLE;
777         }
778         wakeup(d);
779         if (d->bd_async && d->bd_sig && d->bd_sigio)
780                 pgsigio(&d->bd_sigio, d->bd_sig, 0);
781 
782         selwakeuppri(&d->bd_sel, PRINET);
783         KNOTE_LOCKED(&d->bd_sel.si_note, 0);
784 }
785 
786 static void
787 bpf_timed_out(void *arg)
788 {
789         struct bpf_d *d = (struct bpf_d *)arg;
790 
791         BPFD_LOCK(d);
792         if (d->bd_state == BPF_WAITING) {
793                 d->bd_state = BPF_TIMED_OUT;
794                 if (d->bd_slen != 0)
795                         bpf_wakeup(d);
796         }
797         BPFD_UNLOCK(d);
798 }
799 
800 static int
801 bpf_ready(struct bpf_d *d)
802 {
803 
804         BPFD_LOCK_ASSERT(d);
805 
806         if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
807                 return (1);
808         if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
809             d->bd_slen != 0)
810                 return (1);
811         return (0);
812 }
813 
814 static int
815 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
816 {
817         struct bpf_d *d;
818         struct ifnet *ifp;
819         struct mbuf *m, *mc;
820         struct sockaddr dst;
821         int error, hlen;
822 
823         error = devfs_get_cdevpriv((void **)&d);
824         if (error != 0)
825                 return (error);
826 
827         d->bd_pid = curthread->td_proc->p_pid;
828         d->bd_wcount++;
829         if (d->bd_bif == NULL) {
830                 d->bd_wdcount++;
831                 return (ENXIO);
832         }
833 
834         ifp = d->bd_bif->bif_ifp;
835 
836         if ((ifp->if_flags & IFF_UP) == 0) {
837                 d->bd_wdcount++;
838                 return (ENETDOWN);
839         }
840 
841         if (uio->uio_resid == 0) {
842                 d->bd_wdcount++;
843                 return (0);
844         }
845 
846         bzero(&dst, sizeof(dst));
847         m = NULL;
848         hlen = 0;
849         error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
850             &m, &dst, &hlen, d->bd_wfilter);
851         if (error) {
852                 d->bd_wdcount++;
853                 return (error);
854         }
855         d->bd_wfcount++;
856         if (d->bd_hdrcmplt)
857                 dst.sa_family = pseudo_AF_HDRCMPLT;
858 
859         if (d->bd_feedback) {
860                 mc = m_dup(m, M_DONTWAIT);
861                 if (mc != NULL)
862                         mc->m_pkthdr.rcvif = ifp;
863                 /* Set M_PROMISC for outgoing packets to be discarded. */
864                 if (d->bd_direction == BPF_D_INOUT)
865                         m->m_flags |= M_PROMISC;
866         } else
867                 mc = NULL;
868 
869         m->m_pkthdr.len -= hlen;
870         m->m_len -= hlen;
871         m->m_data += hlen;      /* XXX */
872 
873 #ifdef MAC
874         BPFD_LOCK(d);
875         mac_bpfdesc_create_mbuf(d, m);
876         if (mc != NULL)
877                 mac_bpfdesc_create_mbuf(d, mc);
878         BPFD_UNLOCK(d);
879 #endif
880 
881         error = (*ifp->if_output)(ifp, m, &dst, NULL);
882         if (error)
883                 d->bd_wdcount++;
884 
885         if (mc != NULL) {
886                 if (error == 0)
887                         (*ifp->if_input)(ifp, mc);
888                 else
889                         m_freem(mc);
890         }
891 
892         return (error);
893 }
894 
895 /*
896  * Reset a descriptor by flushing its packet buffer and clearing the
897  * receive and drop counts.
898  */
899 static void
900 reset_d(struct bpf_d *d)
901 {
902 
903         mtx_assert(&d->bd_mtx, MA_OWNED);
904         if (d->bd_hbuf) {
905                 /* Free the hold buffer. */
906                 d->bd_fbuf = d->bd_hbuf;
907                 d->bd_hbuf = NULL;
908                 bpf_buf_reclaimed(d);
909         }
910         d->bd_slen = 0;
911         d->bd_hlen = 0;
912         d->bd_rcount = 0;
913         d->bd_dcount = 0;
914         d->bd_fcount = 0;
915         d->bd_wcount = 0;
916         d->bd_wfcount = 0;
917         d->bd_wdcount = 0;
918         d->bd_zcopy = 0;
919 }
920 
921 /*
922  *  FIONREAD            Check for read packet available.
923  *  SIOCGIFADDR         Get interface address - convenient hook to driver.
924  *  BIOCGBLEN           Get buffer len [for read()].
925  *  BIOCSETF            Set read filter.
926  *  BIOCSETFNR          Set read filter without resetting descriptor.
927  *  BIOCSETWF           Set write filter.
928  *  BIOCFLUSH           Flush read packet buffer.
929  *  BIOCPROMISC         Put interface into promiscuous mode.
930  *  BIOCGDLT            Get link layer type.
931  *  BIOCGETIF           Get interface name.
932  *  BIOCSETIF           Set interface.
933  *  BIOCSRTIMEOUT       Set read timeout.
934  *  BIOCGRTIMEOUT       Get read timeout.
935  *  BIOCGSTATS          Get packet stats.
936  *  BIOCIMMEDIATE       Set immediate mode.
937  *  BIOCVERSION         Get filter language version.
938  *  BIOCGHDRCMPLT       Get "header already complete" flag
939  *  BIOCSHDRCMPLT       Set "header already complete" flag
940  *  BIOCGDIRECTION      Get packet direction flag
941  *  BIOCSDIRECTION      Set packet direction flag
942  *  BIOCLOCK            Set "locked" flag
943  *  BIOCFEEDBACK        Set packet feedback mode.
944  *  BIOCSETZBUF         Set current zero-copy buffer locations.
945  *  BIOCGETZMAX         Get maximum zero-copy buffer size.
946  *  BIOCROTZBUF         Force rotation of zero-copy buffer
947  *  BIOCSETBUFMODE      Set buffer mode.
948  *  BIOCGETBUFMODE      Get current buffer mode.
949  */
950 /* ARGSUSED */
951 static  int
952 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
953     struct thread *td)
954 {
955         struct bpf_d *d;
956         int error;
957 
958         error = devfs_get_cdevpriv((void **)&d);
959         if (error != 0)
960                 return (error);
961 
962         /*
963          * Refresh PID associated with this descriptor.
964          */
965         BPFD_LOCK(d);
966         d->bd_pid = td->td_proc->p_pid;
967         if (d->bd_state == BPF_WAITING)
968                 callout_stop(&d->bd_callout);
969         d->bd_state = BPF_IDLE;
970         BPFD_UNLOCK(d);
971 
972         if (d->bd_locked == 1) {
973                 switch (cmd) {
974                 case BIOCGBLEN:
975                 case BIOCFLUSH:
976                 case BIOCGDLT:
977                 case BIOCGDLTLIST:
978                 case BIOCGETIF:
979                 case BIOCGRTIMEOUT:
980                 case BIOCGSTATS:
981                 case BIOCVERSION:
982                 case BIOCGRSIG:
983                 case BIOCGHDRCMPLT:
984                 case BIOCFEEDBACK:
985                 case FIONREAD:
986                 case BIOCLOCK:
987                 case BIOCSRTIMEOUT:
988                 case BIOCIMMEDIATE:
989                 case TIOCGPGRP:
990                 case BIOCROTZBUF:
991                         break;
992                 default:
993                         return (EPERM);
994                 }
995         }
996         switch (cmd) {
997 
998         default:
999                 error = EINVAL;
1000                 break;
1001 
1002         /*
1003          * Check for read packet available.
1004          */
1005         case FIONREAD:
1006                 {
1007                         int n;
1008 
1009                         BPFD_LOCK(d);
1010                         n = d->bd_slen;
1011                         if (d->bd_hbuf)
1012                                 n += d->bd_hlen;
1013                         BPFD_UNLOCK(d);
1014 
1015                         *(int *)addr = n;
1016                         break;
1017                 }
1018 
1019         case SIOCGIFADDR:
1020                 {
1021                         struct ifnet *ifp;
1022 
1023                         if (d->bd_bif == NULL)
1024                                 error = EINVAL;
1025                         else {
1026                                 ifp = d->bd_bif->bif_ifp;
1027                                 error = (*ifp->if_ioctl)(ifp, cmd, addr);
1028                         }
1029                         break;
1030                 }
1031 
1032         /*
1033          * Get buffer len [for read()].
1034          */
1035         case BIOCGBLEN:
1036                 *(u_int *)addr = d->bd_bufsize;
1037                 break;
1038 
1039         /*
1040          * Set buffer length.
1041          */
1042         case BIOCSBLEN:
1043                 error = bpf_ioctl_sblen(d, (u_int *)addr);
1044                 break;
1045 
1046         /*
1047          * Set link layer read filter.
1048          */
1049         case BIOCSETF:
1050         case BIOCSETFNR:
1051         case BIOCSETWF:
1052                 error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1053                 break;
1054 
1055         /*
1056          * Flush read packet buffer.
1057          */
1058         case BIOCFLUSH:
1059                 BPFD_LOCK(d);
1060                 reset_d(d);
1061                 BPFD_UNLOCK(d);
1062                 break;
1063 
1064         /*
1065          * Put interface into promiscuous mode.
1066          */
1067         case BIOCPROMISC:
1068                 if (d->bd_bif == NULL) {
1069                         /*
1070                          * No interface attached yet.
1071                          */
1072                         error = EINVAL;
1073                         break;
1074                 }
1075                 if (d->bd_promisc == 0) {
1076                         error = ifpromisc(d->bd_bif->bif_ifp, 1);
1077                         if (error == 0)
1078                                 d->bd_promisc = 1;
1079                 }
1080                 break;
1081 
1082         /*
1083          * Get current data link type.
1084          */
1085         case BIOCGDLT:
1086                 if (d->bd_bif == NULL)
1087                         error = EINVAL;
1088                 else
1089                         *(u_int *)addr = d->bd_bif->bif_dlt;
1090                 break;
1091 
1092         /*
1093          * Get a list of supported data link types.
1094          */
1095         case BIOCGDLTLIST:
1096                 if (d->bd_bif == NULL)
1097                         error = EINVAL;
1098                 else
1099                         error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1100                 break;
1101 
1102         /*
1103          * Set data link type.
1104          */
1105         case BIOCSDLT:
1106                 if (d->bd_bif == NULL)
1107                         error = EINVAL;
1108                 else
1109                         error = bpf_setdlt(d, *(u_int *)addr);
1110                 break;
1111 
1112         /*
1113          * Get interface name.
1114          */
1115         case BIOCGETIF:
1116                 if (d->bd_bif == NULL)
1117                         error = EINVAL;
1118                 else {
1119                         struct ifnet *const ifp = d->bd_bif->bif_ifp;
1120                         struct ifreq *const ifr = (struct ifreq *)addr;
1121 
1122                         strlcpy(ifr->ifr_name, ifp->if_xname,
1123                             sizeof(ifr->ifr_name));
1124                 }
1125                 break;
1126 
1127         /*
1128          * Set interface.
1129          */
1130         case BIOCSETIF:
1131                 error = bpf_setif(d, (struct ifreq *)addr);
1132                 break;
1133 
1134         /*
1135          * Set read timeout.
1136          */
1137         case BIOCSRTIMEOUT:
1138                 {
1139                         struct timeval *tv = (struct timeval *)addr;
1140 
1141                         /*
1142                          * Subtract 1 tick from tvtohz() since this isn't
1143                          * a one-shot timer.
1144                          */
1145                         if ((error =