FreeBSD/Linux Kernel Cross Reference
sys/net/bpf.c
1 /*-
2 * Copyright (c) 1990, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)bpf.c 8.4 (Berkeley) 1/9/95
35 */
36
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD: src/sys/net/bpf.c,v 1.202 2008/10/23 15:53:51 des Exp $");
39
40 #include "opt_bpf.h"
41 #include "opt_mac.h"
42 #include "opt_netgraph.h"
43
44 #include <sys/types.h>
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/conf.h>
48 #include <sys/fcntl.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/time.h>
52 #include <sys/priv.h>
53 #include <sys/proc.h>
54 #include <sys/signalvar.h>
55 #include <sys/filio.h>
56 #include <sys/sockio.h>
57 #include <sys/ttycom.h>
58 #include <sys/uio.h>
59
60 #include <sys/event.h>
61 #include <sys/file.h>
62 #include <sys/poll.h>
63 #include <sys/proc.h>
64
65 #include <sys/socket.h>
66
67 #include <net/if.h>
68 #include <net/bpf.h>
69 #include <net/bpf_buffer.h>
70 #ifdef BPF_JITTER
71 #include <net/bpf_jitter.h>
72 #endif
73 #include <net/bpf_zerocopy.h>
74 #include <net/bpfdesc.h>
75
76 #include <netinet/in.h>
77 #include <netinet/if_ether.h>
78 #include <sys/kernel.h>
79 #include <sys/sysctl.h>
80
81 #include <net80211/ieee80211_freebsd.h>
82
83 #include <security/mac/mac_framework.h>
84
85 MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
86
87 #if defined(DEV_BPF) || defined(NETGRAPH_BPF)
88
89 #define PRINET 26 /* interruptible */
90
91 /*
92 * bpf_iflist is a list of BPF interface structures, each corresponding to a
93 * specific DLT. The same network interface might have several BPF interface
94 * structures registered by different layers in the stack (i.e., 802.11
95 * frames, ethernet frames, etc).
96 */
97 static LIST_HEAD(, bpf_if) bpf_iflist;
98 static struct mtx bpf_mtx; /* bpf global lock */
99 static int bpf_bpfd_cnt;
100
101 static void bpf_attachd(struct bpf_d *, struct bpf_if *);
102 static void bpf_detachd(struct bpf_d *);
103 static void bpf_freed(struct bpf_d *);
104 static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
105 struct sockaddr *, int *, struct bpf_insn *);
106 static int bpf_setif(struct bpf_d *, struct ifreq *);
107 static void bpf_timed_out(void *);
108 static __inline void
109 bpf_wakeup(struct bpf_d *);
110 static void catchpacket(struct bpf_d *, u_char *, u_int, u_int,
111 void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
112 struct timeval *);
113 static void reset_d(struct bpf_d *);
114 static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
115 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
116 static int bpf_setdlt(struct bpf_d *, u_int);
117 static void filt_bpfdetach(struct knote *);
118 static int filt_bpfread(struct knote *, long);
119 static void bpf_drvinit(void *);
120 static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
121
122 SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
123 int bpf_maxinsns = BPF_MAXINSNS;
124 SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
125 &bpf_maxinsns, 0, "Maximum bpf program instructions");
126 static int bpf_zerocopy_enable = 0;
127 SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
128 &bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
129 SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_RW,
130 bpf_stats_sysctl, "bpf statistics portal");
131
132 static d_open_t bpfopen;
133 static d_read_t bpfread;
134 static d_write_t bpfwrite;
135 static d_ioctl_t bpfioctl;
136 static d_poll_t bpfpoll;
137 static d_kqfilter_t bpfkqfilter;
138
139 static struct cdevsw bpf_cdevsw = {
140 .d_version = D_VERSION,
141 .d_open = bpfopen,
142 .d_read = bpfread,
143 .d_write = bpfwrite,
144 .d_ioctl = bpfioctl,
145 .d_poll = bpfpoll,
146 .d_name = "bpf",
147 .d_kqfilter = bpfkqfilter,
148 };
149
150 static struct filterops bpfread_filtops =
151 { 1, NULL, filt_bpfdetach, filt_bpfread };
152
153 /*
154 * Wrapper functions for various buffering methods. If the set of buffer
155 * modes expands, we will probably want to introduce a switch data structure
156 * similar to protosw, et.
157 */
158 static void
159 bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
160 u_int len)
161 {
162
163 BPFD_LOCK_ASSERT(d);
164
165 switch (d->bd_bufmode) {
166 case BPF_BUFMODE_BUFFER:
167 return (bpf_buffer_append_bytes(d, buf, offset, src, len));
168
169 case BPF_BUFMODE_ZBUF:
170 d->bd_zcopy++;
171 return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
172
173 default:
174 panic("bpf_buf_append_bytes");
175 }
176 }
177
178 static void
179 bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
180 u_int len)
181 {
182
183 BPFD_LOCK_ASSERT(d);
184
185 switch (d->bd_bufmode) {
186 case BPF_BUFMODE_BUFFER:
187 return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
188
189 case BPF_BUFMODE_ZBUF:
190 d->bd_zcopy++;
191 return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
192
193 default:
194 panic("bpf_buf_append_mbuf");
195 }
196 }
197
198 /*
199 * This function gets called when the free buffer is re-assigned.
200 */
201 static void
202 bpf_buf_reclaimed(struct bpf_d *d)
203 {
204
205 BPFD_LOCK_ASSERT(d);
206
207 switch (d->bd_bufmode) {
208 case BPF_BUFMODE_BUFFER:
209 return;
210
211 case BPF_BUFMODE_ZBUF:
212 bpf_zerocopy_buf_reclaimed(d);
213 return;
214
215 default:
216 panic("bpf_buf_reclaimed");
217 }
218 }
219
220 /*
221 * If the buffer mechanism has a way to decide that a held buffer can be made
222 * free, then it is exposed via the bpf_canfreebuf() interface. (1) is
223 * returned if the buffer can be discarded, (0) is returned if it cannot.
224 */
225 static int
226 bpf_canfreebuf(struct bpf_d *d)
227 {
228
229 BPFD_LOCK_ASSERT(d);
230
231 switch (d->bd_bufmode) {
232 case BPF_BUFMODE_ZBUF:
233 return (bpf_zerocopy_canfreebuf(d));
234 }
235 return (0);
236 }
237
238 /*
239 * Allow the buffer model to indicate that the current store buffer is
240 * immutable, regardless of the appearance of space. Return (1) if the
241 * buffer is writable, and (0) if not.
242 */
243 static int
244 bpf_canwritebuf(struct bpf_d *d)
245 {
246
247 BPFD_LOCK_ASSERT(d);
248
249 switch (d->bd_bufmode) {
250 case BPF_BUFMODE_ZBUF:
251 return (bpf_zerocopy_canwritebuf(d));
252 }
253 return (1);
254 }
255
256 /*
257 * Notify buffer model that an attempt to write to the store buffer has
258 * resulted in a dropped packet, in which case the buffer may be considered
259 * full.
260 */
261 static void
262 bpf_buffull(struct bpf_d *d)
263 {
264
265 BPFD_LOCK_ASSERT(d);
266
267 switch (d->bd_bufmode) {
268 case BPF_BUFMODE_ZBUF:
269 bpf_zerocopy_buffull(d);
270 break;
271 }
272 }
273
274 /*
275 * Notify the buffer model that a buffer has moved into the hold position.
276 */
277 void
278 bpf_bufheld(struct bpf_d *d)
279 {
280
281 BPFD_LOCK_ASSERT(d);
282
283 switch (d->bd_bufmode) {
284 case BPF_BUFMODE_ZBUF:
285 bpf_zerocopy_bufheld(d);
286 break;
287 }
288 }
289
290 static void
291 bpf_free(struct bpf_d *d)
292 {
293
294 switch (d->bd_bufmode) {
295 case BPF_BUFMODE_BUFFER:
296 return (bpf_buffer_free(d));
297
298 case BPF_BUFMODE_ZBUF:
299 return (bpf_zerocopy_free(d));
300
301 default:
302 panic("bpf_buf_free");
303 }
304 }
305
306 static int
307 bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
308 {
309
310 if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
311 return (EOPNOTSUPP);
312 return (bpf_buffer_uiomove(d, buf, len, uio));
313 }
314
315 static int
316 bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
317 {
318
319 if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
320 return (EOPNOTSUPP);
321 return (bpf_buffer_ioctl_sblen(d, i));
322 }
323
324 static int
325 bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
326 {
327
328 if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
329 return (EOPNOTSUPP);
330 return (bpf_zerocopy_ioctl_getzmax(td, d, i));
331 }
332
333 static int
334 bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
335 {
336
337 if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
338 return (EOPNOTSUPP);
339 return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
340 }
341
342 static int
343 bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
344 {
345
346 if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
347 return (EOPNOTSUPP);
348 return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
349 }
350
351 /*
352 * General BPF functions.
353 */
354 static int
355 bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
356 struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
357 {
358 const struct ieee80211_bpf_params *p;
359 struct ether_header *eh;
360 struct mbuf *m;
361 int error;
362 int len;
363 int hlen;
364 int slen;
365
366 /*
367 * Build a sockaddr based on the data link layer type.
368 * We do this at this level because the ethernet header
369 * is copied directly into the data field of the sockaddr.
370 * In the case of SLIP, there is no header and the packet
371 * is forwarded as is.
372 * Also, we are careful to leave room at the front of the mbuf
373 * for the link level header.
374 */
375 switch (linktype) {
376
377 case DLT_SLIP:
378 sockp->sa_family = AF_INET;
379 hlen = 0;
380 break;
381
382 case DLT_EN10MB:
383 sockp->sa_family = AF_UNSPEC;
384 /* XXX Would MAXLINKHDR be better? */
385 hlen = ETHER_HDR_LEN;
386 break;
387
388 case DLT_FDDI:
389 sockp->sa_family = AF_IMPLINK;
390 hlen = 0;
391 break;
392
393 case DLT_RAW:
394 sockp->sa_family = AF_UNSPEC;
395 hlen = 0;
396 break;
397
398 case DLT_NULL:
399 /*
400 * null interface types require a 4 byte pseudo header which
401 * corresponds to the address family of the packet.
402 */
403 sockp->sa_family = AF_UNSPEC;
404 hlen = 4;
405 break;
406
407 case DLT_ATM_RFC1483:
408 /*
409 * en atm driver requires 4-byte atm pseudo header.
410 * though it isn't standard, vpi:vci needs to be
411 * specified anyway.
412 */
413 sockp->sa_family = AF_UNSPEC;
414 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
415 break;
416
417 case DLT_PPP:
418 sockp->sa_family = AF_UNSPEC;
419 hlen = 4; /* This should match PPP_HDRLEN */
420 break;
421
422 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
423 sockp->sa_family = AF_IEEE80211;
424 hlen = 0;
425 break;
426
427 case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */
428 sockp->sa_family = AF_IEEE80211;
429 sockp->sa_len = 12; /* XXX != 0 */
430 hlen = sizeof(struct ieee80211_bpf_params);
431 break;
432
433 default:
434 return (EIO);
435 }
436
437 len = uio->uio_resid;
438
439 if (len - hlen > ifp->if_mtu)
440 return (EMSGSIZE);
441
442 if ((unsigned)len > MJUM16BYTES)
443 return (EIO);
444
445 if (len <= MHLEN)
446 MGETHDR(m, M_WAIT, MT_DATA);
447 else if (len <= MCLBYTES)
448 m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
449 else
450 m = m_getjcl(M_WAIT, MT_DATA, M_PKTHDR,
451 #if (MJUMPAGESIZE > MCLBYTES)
452 len <= MJUMPAGESIZE ? MJUMPAGESIZE :
453 #endif
454 (len <= MJUM9BYTES ? MJUM9BYTES : MJUM16BYTES));
455 m->m_pkthdr.len = m->m_len = len;
456 m->m_pkthdr.rcvif = NULL;
457 *mp = m;
458
459 if (m->m_len < hlen) {
460 error = EPERM;
461 goto bad;
462 }
463
464 error = uiomove(mtod(m, u_char *), len, uio);
465 if (error)
466 goto bad;
467
468 slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
469 if (slen == 0) {
470 error = EPERM;
471 goto bad;
472 }
473
474 /* Check for multicast destination */
475 switch (linktype) {
476 case DLT_EN10MB:
477 eh = mtod(m, struct ether_header *);
478 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
479 if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
480 ETHER_ADDR_LEN) == 0)
481 m->m_flags |= M_BCAST;
482 else
483 m->m_flags |= M_MCAST;
484 }
485 break;
486 }
487
488 /*
489 * Make room for link header, and copy it to sockaddr
490 */
491 if (hlen != 0) {
492 if (sockp->sa_family == AF_IEEE80211) {
493 /*
494 * Collect true length from the parameter header
495 * NB: sockp is known to be zero'd so if we do a
496 * short copy unspecified parameters will be
497 * zero.
498 * NB: packet may not be aligned after stripping
499 * bpf params
500 * XXX check ibp_vers
501 */
502 p = mtod(m, const struct ieee80211_bpf_params *);
503 hlen = p->ibp_len;
504 if (hlen > sizeof(sockp->sa_data)) {
505 error = EINVAL;
506 goto bad;
507 }
508 }
509 bcopy(m->m_data, sockp->sa_data, hlen);
510 }
511 *hdrlen = hlen;
512
513 return (0);
514 bad:
515 m_freem(m);
516 return (error);
517 }
518
519 /*
520 * Attach file to the bpf interface, i.e. make d listen on bp.
521 */
522 static void
523 bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
524 {
525 /*
526 * Point d at bp, and add d to the interface's list of listeners.
527 * Finally, point the driver's bpf cookie at the interface so
528 * it will divert packets to bpf.
529 */
530 BPFIF_LOCK(bp);
531 d->bd_bif = bp;
532 LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
533
534 bpf_bpfd_cnt++;
535 BPFIF_UNLOCK(bp);
536 }
537
538 /*
539 * Detach a file from its interface.
540 */
541 static void
542 bpf_detachd(struct bpf_d *d)
543 {
544 int error;
545 struct bpf_if *bp;
546 struct ifnet *ifp;
547
548 bp = d->bd_bif;
549 BPFIF_LOCK(bp);
550 BPFD_LOCK(d);
551 ifp = d->bd_bif->bif_ifp;
552
553 /*
554 * Remove d from the interface's descriptor list.
555 */
556 LIST_REMOVE(d, bd_next);
557
558 bpf_bpfd_cnt--;
559 d->bd_bif = NULL;
560 BPFD_UNLOCK(d);
561 BPFIF_UNLOCK(bp);
562
563 /*
564 * Check if this descriptor had requested promiscuous mode.
565 * If so, turn it off.
566 */
567 if (d->bd_promisc) {
568 d->bd_promisc = 0;
569 error = ifpromisc(ifp, 0);
570 if (error != 0 && error != ENXIO) {
571 /*
572 * ENXIO can happen if a pccard is unplugged
573 * Something is really wrong if we were able to put
574 * the driver into promiscuous mode, but can't
575 * take it out.
576 */
577 if_printf(bp->bif_ifp,
578 "bpf_detach: ifpromisc failed (%d)\n", error);
579 }
580 }
581 }
582
583 /*
584 * Close the descriptor by detaching it from its interface,
585 * deallocating its buffers, and marking it free.
586 */
587 static void
588 bpf_dtor(void *data)
589 {
590 struct bpf_d *d = data;
591
592 BPFD_LOCK(d);
593 if (d->bd_state == BPF_WAITING)
594 callout_stop(&d->bd_callout);
595 d->bd_state = BPF_IDLE;
596 BPFD_UNLOCK(d);
597 funsetown(&d->bd_sigio);
598 mtx_lock(&bpf_mtx);
599 if (d->bd_bif)
600 bpf_detachd(d);
601 mtx_unlock(&bpf_mtx);
602 selwakeuppri(&d->bd_sel, PRINET);
603 #ifdef MAC
604 mac_bpfdesc_destroy(d);
605 #endif /* MAC */
606 knlist_destroy(&d->bd_sel.si_note);
607 bpf_freed(d);
608 free(d, M_BPF);
609 }
610
611 /*
612 * Open ethernet device. Returns ENXIO for illegal minor device number,
613 * EBUSY if file is open by another process.
614 */
615 /* ARGSUSED */
616 static int
617 bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
618 {
619 struct bpf_d *d;
620 int error;
621
622 d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
623 error = devfs_set_cdevpriv(d, bpf_dtor);
624 if (error != 0) {
625 free(d, M_BPF);
626 return (error);
627 }
628
629 /*
630 * For historical reasons, perform a one-time initialization call to
631 * the buffer routines, even though we're not yet committed to a
632 * particular buffer method.
633 */
634 bpf_buffer_init(d);
635 d->bd_bufmode = BPF_BUFMODE_BUFFER;
636 d->bd_sig = SIGIO;
637 d->bd_direction = BPF_D_INOUT;
638 d->bd_pid = td->td_proc->p_pid;
639 #ifdef MAC
640 mac_bpfdesc_init(d);
641 mac_bpfdesc_create(td->td_ucred, d);
642 #endif
643 mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
644 callout_init(&d->bd_callout, CALLOUT_MPSAFE);
645 knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
646
647 return (0);
648 }
649
650 /*
651 * bpfread - read next chunk of packets from buffers
652 */
653 static int
654 bpfread(struct cdev *dev, struct uio *uio, int ioflag)
655 {
656 struct bpf_d *d;
657 int timed_out;
658 int error;
659
660 error = devfs_get_cdevpriv((void **)&d);
661 if (error != 0)
662 return (error);
663
664 /*
665 * Restrict application to use a buffer the same size as
666 * as kernel buffers.
667 */
668 if (uio->uio_resid != d->bd_bufsize)
669 return (EINVAL);
670
671 BPFD_LOCK(d);
672 d->bd_pid = curthread->td_proc->p_pid;
673 if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
674 BPFD_UNLOCK(d);
675 return (EOPNOTSUPP);
676 }
677 if (d->bd_state == BPF_WAITING)
678 callout_stop(&d->bd_callout);
679 timed_out = (d->bd_state == BPF_TIMED_OUT);
680 d->bd_state = BPF_IDLE;
681 /*
682 * If the hold buffer is empty, then do a timed sleep, which
683 * ends when the timeout expires or when enough packets
684 * have arrived to fill the store buffer.
685 */
686 while (d->bd_hbuf == NULL) {
687 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
688 /*
689 * A packet(s) either arrived since the previous
690 * read or arrived while we were asleep.
691 * Rotate the buffers and return what's here.
692 */
693 ROTATE_BUFFERS(d);
694 break;
695 }
696
697 /*
698 * No data is available, check to see if the bpf device
699 * is still pointed at a real interface. If not, return
700 * ENXIO so that the userland process knows to rebind
701 * it before using it again.
702 */
703 if (d->bd_bif == NULL) {
704 BPFD_UNLOCK(d);
705 return (ENXIO);
706 }
707
708 if (ioflag & O_NONBLOCK) {
709 BPFD_UNLOCK(d);
710 return (EWOULDBLOCK);
711 }
712 error = msleep(d, &d->bd_mtx, PRINET|PCATCH,
713 "bpf", d->bd_rtout);
714 if (error == EINTR || error == ERESTART) {
715 BPFD_UNLOCK(d);
716 return (error);
717 }
718 if (error == EWOULDBLOCK) {
719 /*
720 * On a timeout, return what's in the buffer,
721 * which may be nothing. If there is something
722 * in the store buffer, we can rotate the buffers.
723 */
724 if (d->bd_hbuf)
725 /*
726 * We filled up the buffer in between
727 * getting the timeout and arriving
728 * here, so we don't need to rotate.
729 */
730 break;
731
732 if (d->bd_slen == 0) {
733 BPFD_UNLOCK(d);
734 return (0);
735 }
736 ROTATE_BUFFERS(d);
737 break;
738 }
739 }
740 /*
741 * At this point, we know we have something in the hold slot.
742 */
743 BPFD_UNLOCK(d);
744
745 /*
746 * Move data from hold buffer into user space.
747 * We know the entire buffer is transferred since
748 * we checked above that the read buffer is bpf_bufsize bytes.
749 *
750 * XXXRW: More synchronization needed here: what if a second thread
751 * issues a read on the same fd at the same time? Don't want this
752 * getting invalidated.
753 */
754 error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
755
756 BPFD_LOCK(d);
757 d->bd_fbuf = d->bd_hbuf;
758 d->bd_hbuf = NULL;
759 d->bd_hlen = 0;
760 bpf_buf_reclaimed(d);
761 BPFD_UNLOCK(d);
762
763 return (error);
764 }
765
766 /*
767 * If there are processes sleeping on this descriptor, wake them up.
768 */
769 static __inline void
770 bpf_wakeup(struct bpf_d *d)
771 {
772
773 BPFD_LOCK_ASSERT(d);
774 if (d->bd_state == BPF_WAITING) {
775 callout_stop(&d->bd_callout);
776 d->bd_state = BPF_IDLE;
777 }
778 wakeup(d);
779 if (d->bd_async && d->bd_sig && d->bd_sigio)
780 pgsigio(&d->bd_sigio, d->bd_sig, 0);
781
782 selwakeuppri(&d->bd_sel, PRINET);
783 KNOTE_LOCKED(&d->bd_sel.si_note, 0);
784 }
785
786 static void
787 bpf_timed_out(void *arg)
788 {
789 struct bpf_d *d = (struct bpf_d *)arg;
790
791 BPFD_LOCK(d);
792 if (d->bd_state == BPF_WAITING) {
793 d->bd_state = BPF_TIMED_OUT;
794 if (d->bd_slen != 0)
795 bpf_wakeup(d);
796 }
797 BPFD_UNLOCK(d);
798 }
799
800 static int
801 bpf_ready(struct bpf_d *d)
802 {
803
804 BPFD_LOCK_ASSERT(d);
805
806 if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
807 return (1);
808 if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
809 d->bd_slen != 0)
810 return (1);
811 return (0);
812 }
813
814 static int
815 bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
816 {
817 struct bpf_d *d;
818 struct ifnet *ifp;
819 struct mbuf *m, *mc;
820 struct sockaddr dst;
821 int error, hlen;
822
823 error = devfs_get_cdevpriv((void **)&d);
824 if (error != 0)
825 return (error);
826
827 d->bd_pid = curthread->td_proc->p_pid;
828 d->bd_wcount++;
829 if (d->bd_bif == NULL) {
830 d->bd_wdcount++;
831 return (ENXIO);
832 }
833
834 ifp = d->bd_bif->bif_ifp;
835
836 if ((ifp->if_flags & IFF_UP) == 0) {
837 d->bd_wdcount++;
838 return (ENETDOWN);
839 }
840
841 if (uio->uio_resid == 0) {
842 d->bd_wdcount++;
843 return (0);
844 }
845
846 bzero(&dst, sizeof(dst));
847 m = NULL;
848 hlen = 0;
849 error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
850 &m, &dst, &hlen, d->bd_wfilter);
851 if (error) {
852 d->bd_wdcount++;
853 return (error);
854 }
855 d->bd_wfcount++;
856 if (d->bd_hdrcmplt)
857 dst.sa_family = pseudo_AF_HDRCMPLT;
858
859 if (d->bd_feedback) {
860 mc = m_dup(m, M_DONTWAIT);
861 if (mc != NULL)
862 mc->m_pkthdr.rcvif = ifp;
863 /* Set M_PROMISC for outgoing packets to be discarded. */
864 if (d->bd_direction == BPF_D_INOUT)
865 m->m_flags |= M_PROMISC;
866 } else
867 mc = NULL;
868
869 m->m_pkthdr.len -= hlen;
870 m->m_len -= hlen;
871 m->m_data += hlen; /* XXX */
872
873 #ifdef MAC
874 BPFD_LOCK(d);
875 mac_bpfdesc_create_mbuf(d, m);
876 if (mc != NULL)
877 mac_bpfdesc_create_mbuf(d, mc);
878 BPFD_UNLOCK(d);
879 #endif
880
881 error = (*ifp->if_output)(ifp, m, &dst, NULL);
882 if (error)
883 d->bd_wdcount++;
884
885 if (mc != NULL) {
886 if (error == 0)
887 (*ifp->if_input)(ifp, mc);
888 else
889 m_freem(mc);
890 }
891
892 return (error);
893 }
894
895 /*
896 * Reset a descriptor by flushing its packet buffer and clearing the
897 * receive and drop counts.
898 */
899 static void
900 reset_d(struct bpf_d *d)
901 {
902
903 mtx_assert(&d->bd_mtx, MA_OWNED);
904 if (d->bd_hbuf) {
905 /* Free the hold buffer. */
906 d->bd_fbuf = d->bd_hbuf;
907 d->bd_hbuf = NULL;
908 bpf_buf_reclaimed(d);
909 }
910 d->bd_slen = 0;
911 d->bd_hlen = 0;
912 d->bd_rcount = 0;
913 d->bd_dcount = 0;
914 d->bd_fcount = 0;
915 d->bd_wcount = 0;
916 d->bd_wfcount = 0;
917 d->bd_wdcount = 0;
918 d->bd_zcopy = 0;
919 }
920
921 /*
922 * FIONREAD Check for read packet available.
923 * SIOCGIFADDR Get interface address - convenient hook to driver.
924 * BIOCGBLEN Get buffer len [for read()].
925 * BIOCSETF Set read filter.
926 * BIOCSETFNR Set read filter without resetting descriptor.
927 * BIOCSETWF Set write filter.
928 * BIOCFLUSH Flush read packet buffer.
929 * BIOCPROMISC Put interface into promiscuous mode.
930 * BIOCGDLT Get link layer type.
931 * BIOCGETIF Get interface name.
932 * BIOCSETIF Set interface.
933 * BIOCSRTIMEOUT Set read timeout.
934 * BIOCGRTIMEOUT Get read timeout.
935 * BIOCGSTATS Get packet stats.
936 * BIOCIMMEDIATE Set immediate mode.
937 * BIOCVERSION Get filter language version.
938 * BIOCGHDRCMPLT Get "header already complete" flag
939 * BIOCSHDRCMPLT Set "header already complete" flag
940 * BIOCGDIRECTION Get packet direction flag
941 * BIOCSDIRECTION Set packet direction flag
942 * BIOCLOCK Set "locked" flag
943 * BIOCFEEDBACK Set packet feedback mode.
944 * BIOCSETZBUF Set current zero-copy buffer locations.
945 * BIOCGETZMAX Get maximum zero-copy buffer size.
946 * BIOCROTZBUF Force rotation of zero-copy buffer
947 * BIOCSETBUFMODE Set buffer mode.
948 * BIOCGETBUFMODE Get current buffer mode.
949 */
950 /* ARGSUSED */
951 static int
952 bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
953 struct thread *td)
954 {
955 struct bpf_d *d;
956 int error;
957
958 error = devfs_get_cdevpriv((void **)&d);
959 if (error != 0)
960 return (error);
961
962 /*
963 * Refresh PID associated with this descriptor.
964 */
965 BPFD_LOCK(d);
966 d->bd_pid = td->td_proc->p_pid;
967 if (d->bd_state == BPF_WAITING)
968 callout_stop(&d->bd_callout);
969 d->bd_state = BPF_IDLE;
970 BPFD_UNLOCK(d);
971
972 if (d->bd_locked == 1) {
973 switch (cmd) {
974 case BIOCGBLEN:
975 case BIOCFLUSH:
976 case BIOCGDLT:
977 case BIOCGDLTLIST:
978 case BIOCGETIF:
979 case BIOCGRTIMEOUT:
980 case BIOCGSTATS:
981 case BIOCVERSION:
982 case BIOCGRSIG:
983 case BIOCGHDRCMPLT:
984 case BIOCFEEDBACK:
985 case FIONREAD:
986 case BIOCLOCK:
987 case BIOCSRTIMEOUT:
988 case BIOCIMMEDIATE:
989 case TIOCGPGRP:
990 case BIOCROTZBUF:
991 break;
992 default:
993 return (EPERM);
994 }
995 }
996 switch (cmd) {
997
998 default:
999 error = EINVAL;
1000 break;
1001
1002 /*
1003 * Check for read packet available.
1004 */
1005 case FIONREAD:
1006 {
1007 int n;
1008
1009 BPFD_LOCK(d);
1010 n = d->bd_slen;
1011 if (d->bd_hbuf)
1012 n += d->bd_hlen;
1013 BPFD_UNLOCK(d);
1014
1015 *(int *)addr = n;
1016 break;
1017 }
1018
1019 case SIOCGIFADDR:
1020 {
1021 struct ifnet *ifp;
1022
1023 if (d->bd_bif == NULL)
1024 error = EINVAL;
1025 else {
1026 ifp = d->bd_bif->bif_ifp;
1027 error = (*ifp->if_ioctl)(ifp, cmd, addr);
1028 }
1029 break;
1030 }
1031
1032 /*
1033 * Get buffer len [for read()].
1034 */
1035 case BIOCGBLEN:
1036 *(u_int *)addr = d->bd_bufsize;
1037 break;
1038
1039 /*
1040 * Set buffer length.
1041 */
1042 case BIOCSBLEN:
1043 error = bpf_ioctl_sblen(d, (u_int *)addr);
1044 break;
1045
1046 /*
1047 * Set link layer read filter.
1048 */
1049 case BIOCSETF:
1050 case BIOCSETFNR:
1051 case BIOCSETWF:
1052 error = bpf_setf(d, (struct bpf_program *)addr, cmd);
1053 break;
1054
1055 /*
1056 * Flush read packet buffer.
1057 */
1058 case BIOCFLUSH:
1059 BPFD_LOCK(d);
1060 reset_d(d);
1061 BPFD_UNLOCK(d);
1062 break;
1063
1064 /*
1065 * Put interface into promiscuous mode.
1066 */
1067 case BIOCPROMISC:
1068 if (d->bd_bif == NULL) {
1069 /*
1070 * No interface attached yet.
1071 */
1072 error = EINVAL;
1073 break;
1074 }
1075 if (d->bd_promisc == 0) {
1076 error = ifpromisc(d->bd_bif->bif_ifp, 1);
1077 if (error == 0)
1078 d->bd_promisc = 1;
1079 }
1080 break;
1081
1082 /*
1083 * Get current data link type.
1084 */
1085 case BIOCGDLT:
1086 if (d->bd_bif == NULL)
1087 error = EINVAL;
1088 else
1089 *(u_int *)addr = d->bd_bif->bif_dlt;
1090 break;
1091
1092 /*
1093 * Get a list of supported data link types.
1094 */
1095 case BIOCGDLTLIST:
1096 if (d->bd_bif == NULL)
1097 error = EINVAL;
1098 else
1099 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
1100 break;
1101
1102 /*
1103 * Set data link type.
1104 */
1105 case BIOCSDLT:
1106 if (d->bd_bif == NULL)
1107 error = EINVAL;
1108 else
1109 error = bpf_setdlt(d, *(u_int *)addr);
1110 break;
1111
1112 /*
1113 * Get interface name.
1114 */
1115 case BIOCGETIF:
1116 if (d->bd_bif == NULL)
1117 error = EINVAL;
1118 else {
1119 struct ifnet *const ifp = d->bd_bif->bif_ifp;
1120 struct ifreq *const ifr = (struct ifreq *)addr;
1121
1122 strlcpy(ifr->ifr_name, ifp->if_xname,
1123 sizeof(ifr->ifr_name));
1124 }
1125 break;
1126
1127 /*
1128 * Set interface.
1129 */
1130 case BIOCSETIF:
1131 error = bpf_setif(d, (struct ifreq *)addr);
1132 break;
1133
1134 /*
1135 * Set read timeout.
1136 */
1137 case BIOCSRTIMEOUT:
1138 {
1139 struct timeval *tv = (struct timeval *)addr;
1140
1141 /*
1142 * Subtract 1 tick from tvtohz() since this isn't
1143 * a one-shot timer.
1144 */
1145 if ((error = |