FreeBSD/Linux Kernel Cross Reference
sys/device/net_io.c
1 /*
2 * Mach Operating System
3 * Copyright (c) 1993-1989 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
19 * School of Computer Science
20 * Carnegie Mellon University
21 * Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie Mellon
24 * the rights to redistribute these changes.
25 */
26 /*
27 * HISTORY
28 * $Log: net_io.c,v $
29 * Revision 2.30 93/11/17 16:34:06 dbg
30 * Changed 'long' to 'int' in BPF routines for 64-bit cleanup.
31 * [93/10/26 dbg]
32 *
33 * Added AST_KERNEL_CHECK to net_thread loop, to check for timer
34 * ASTs. Added ANSI function prototypes.
35 * [93/09/02 dbg]
36 *
37 * Revision 2.29 93/08/10 15:10:56 mrt
38 * Incorporated BPF+MATCH support from Masanobu Yuhara:
39 * Changed: garbage collection of dead filters for BPF.
40 * Added: BPF_MATCH_IMM support.
41 * Added: BPF support. Derived from tcpdump-2.2.1/bpf/net/bpf.h.
42 * [93/04/14 16:51:13 yuhara]
43 *
44 * Revision 2.28 93/05/15 18:53:22 mrt
45 * machparam.h -> machspl.h
46 *
47 * Revision 2.27 93/05/10 23:23:27 rvb
48 * Added TTD teledebug code to check for debugging packets.
49 * [93/03/01 grm]
50 *
51 * Revision 2.26 93/05/10 17:46:12 rvb
52 * Added test to check that buffer is large enough to hold data
53 * returned by net_getstat.
54 * [93/04/20 kivinen]
55 *
56 * Revision 2.25 93/01/14 17:27:08 danner
57 * 64bit cleanup.
58 * [92/11/30 af]
59 *
60 * Revision 2.24 92/08/03 17:33:48 jfriedl
61 * removed silly prototypes
62 * [92/08/02 jfriedl]
63 *
64 * Revision 2.23 92/05/21 17:09:38 jfriedl
65 * Cleanup to quiet gcc warnings.
66 * [92/05/16 jfriedl]
67 *
68 * Revision 2.22 92/03/10 16:25:28 jsb
69 * Changed parameters to netipc_net_packet.
70 * [92/03/09 12:57:30 jsb]
71 *
72 * Revision 2.21 92/01/03 20:03:57 dbg
73 * Add: NETF_PUSHHDR, NETF_PUSHSTK, NETF_PUSHIND, NETF_PUSHHDRIND.
74 * [91/12/23 dbg]
75 *
76 * Revision 2.20 91/08/28 11:11:28 jsb
77 * Panic if network write attempted with continuation.
78 * [91/08/12 17:29:53 dlb]
79 *
80 * Revision 2.19 91/08/24 11:55:55 af
81 * Missing include for Spls definitions.
82 * [91/08/02 02:45:16 af]
83 *
84 * Revision 2.18 91/08/03 18:17:43 jsb
85 * Added NORMA_ETHER support.
86 * [91/07/24 22:54:41 jsb]
87 *
88 * Revision 2.17 91/05/14 15:59:34 mrt
89 * Correcting copyright
90 *
91 * Revision 2.16 91/05/10 11:48:47 dbg
92 * Don't forget to copy the packet size when duplicating a packet
93 * for multiple filters in net_filter().
94 * [91/05/09 dpj]
95 *
96 * Revision 2.15 91/03/16 14:43:14 rpd
97 * Added net_thread, net_thread_continue.
98 * [91/02/13 rpd]
99 * Split net_rcv_msg_queue into high and low priority queues.
100 * Cap the total number of buffers allocated.
101 * [91/01/14 rpd]
102 *
103 * Added net_rcv_msg_queue_size, net_rcv_msg_queue_max.
104 * [91/01/12 rpd]
105 *
106 * Revision 2.14 91/02/14 14:37:07 mrt
107 * Added garbage collection of dead filters.
108 * [91/02/12 12:11:10 af]
109 *
110 * Revision 2.13 91/02/05 17:09:54 mrt
111 * Changed to new Mach copyright
112 * [91/01/31 17:30:04 mrt]
113 *
114 * Revision 2.12 91/01/08 15:09:48 rpd
115 * Replaced NET_KMSG_GET, NET_KMSG_FREE
116 * with net_kmsg_get, net_kmsg_put, net_kmsg_collect.
117 * Increased net_kmsg_ilist_min to 4.
118 * [91/01/05 rpd]
119 * Fixed net_rcv_msg_thread to round message sizes up to an int multiple.
120 * [90/12/07 rpd]
121 *
122 * Fixed net_rcv_msg_thread to not set vm_privilege.
123 * [90/11/29 rpd]
124 *
125 * Revision 2.11 90/09/09 23:20:00 rpd
126 * Zero the mapped_size stats for non mappable interfaces.
127 * [90/08/30 17:41:00 af]
128 *
129 * Revision 2.10 90/08/27 21:55:18 dbg
130 * If multiple filters receive a packet, copy the header as well as
131 * the body. Fix from Dan Julin.
132 * [90/08/27 dbg]
133 *
134 * Fix filter check to account for literal word.
135 * [90/07/17 dbg]
136 *
137 * Revision 2.9 90/08/06 15:06:57 rwd
138 * Fixed a bug in parse_net_filter(), that was reading the
139 * litteral from NETF_PUSHLIT as an instruction.
140 * [90/07/18 21:56:20 dpj]
141 *
142 * Revision 2.8 90/06/02 14:48:14 rpd
143 * Converted to new IPC.
144 * [90/03/26 21:57:43 rpd]
145 *
146 * Revision 2.7 90/02/22 20:02:21 dbg
147 * Track changes to kmsg structure.
148 * [90/01/31 dbg]
149 *
150 * Revision 2.6 90/01/11 11:42:20 dbg
151 * Make run in parallel.
152 * [89/12/15 dbg]
153 *
154 * Revision 2.5 89/12/08 19:52:22 rwd
155 * Picked up changes from rfr to minimize wired down memory
156 * [89/11/21 rwd]
157 *
158 * Revision 2.4 89/09/08 11:24:35 dbg
159 * Convert to run in kernel task. Removed some lint.
160 * [89/07/26 dbg]
161 *
162 * Revision 2.3 89/08/11 17:55:18 rwd
163 * Picked up change from rfr which made zone collectable and
164 * decreased min net_kmesg to 2.
165 * [89/08/10 rwd]
166 *
167 * Revision 2.2 89/08/05 16:06:58 rwd
168 * Changed device_map to device_task_map
169 * [89/08/04 rwd]
170 *
171 * 13-Mar-89 David Golub (dbg) at Carnegie-Mellon University
172 * Created.
173 *
174 */
175 /*
176 * Author: David B. Golub, Carnegie Mellon University
177 * Date: 3/98
178 *
179 * Network IO.
180 *
181 * Packet filter code taken from vaxif/enet.c written
182 * CMU and Stanford.
183 */
184
185 /*
186 * Note: don't depend on anything in this file.
187 * It may change a lot real soon. -cmaeda 11 June 1993
188 */
189
190 #include <mach_ttd.h>
191 #include <norma_ether.h>
192
193 #include <sys/types.h>
194
195 #include <device/net_status.h>
196 #include <device/net_io.h>
197 #include <device/if_hdr.h>
198 #include <device/io_req.h>
199 #include <device/ds_routines.h>
200
201 #include <mach/boolean.h>
202 #include <mach/vm_param.h>
203
204 #include <ipc/ipc_port.h>
205 #include <ipc/ipc_kmsg.h>
206 #include <ipc/ipc_mqueue.h>
207
208 #include <kern/counters.h>
209 #include <kern/lock.h>
210 #include <kern/memory.h>
211 #include <kern/queue.h>
212 #include <kern/sched_prim.h>
213 #include <kern/thread.h>
214
215 #if NORMA_ETHER
216 #include <norma/ipc_ether.h>
217 #endif /*NORMA_ETHER*/
218
219 #include <machine/machspl.h>
220
221 #if MACH_TTD
222 #include <ttd/ttd_stub.h>
223 #endif /* MACH_TTD */
224
225 #if MACH_TTD
226 int kttd_async_counter= 0;
227 #endif /* MACH_TTD */
228
229
230 /*
231 * Packet Buffer Management
232 *
233 * This module manages a private pool of kmsg buffers.
234 */
235
236 /*
237 * List of net kmsgs queued to be sent to users.
238 * Messages can be high priority or low priority.
239 * The network thread processes high priority messages first.
240 */
241 decl_simple_lock_data(,net_queue_lock)
242 boolean_t net_thread_awake = FALSE;
243 struct ipc_kmsg_queue net_queue_high;
244 int net_queue_high_size = 0;
245 int net_queue_high_max = 0; /* for debugging */
246 struct ipc_kmsg_queue net_queue_low;
247 int net_queue_low_size = 0;
248 int net_queue_low_max = 0; /* for debugging */
249
250 /*
251 * List of net kmsgs that can be touched at interrupt level.
252 * If it is empty, we will also steal low priority messages.
253 */
254 decl_simple_lock_data(,net_queue_free_lock)
255 struct ipc_kmsg_queue net_queue_free;
256 int net_queue_free_size = 0; /* on free list */
257 int net_queue_free_max = 0; /* for debugging */
258
259 /*
260 * This value is critical to network performance.
261 * At least this many buffers should be sitting in net_queue_free.
262 * If this is set too small, we will drop network packets.
263 * Even a low drop rate (<1%) can cause severe network throughput problems.
264 * We add one to net_queue_free_min for every filter.
265 */
266 int net_queue_free_min = 3;
267
268 int net_queue_free_hits = 0; /* for debugging */
269 int net_queue_free_steals = 0; /* for debugging */
270 int net_queue_free_misses = 0; /* for debugging */
271
272 int net_kmsg_send_high_hits = 0; /* for debugging */
273 int net_kmsg_send_low_hits = 0; /* for debugging */
274 int net_kmsg_send_high_misses = 0; /* for debugging */
275 int net_kmsg_send_low_misses = 0; /* for debugging */
276
277 int net_thread_awaken = 0; /* for debugging */
278 int net_ast_taken = 0; /* for debugging */
279
280 decl_simple_lock_data(,net_kmsg_total_lock)
281 int net_kmsg_total = 0; /* total allocated */
282 int net_kmsg_max; /* initialized below */
283
284 vm_size_t net_kmsg_size; /* initialized below */
285
286 /*
287 * We want more buffers when there aren't enough in the free queue
288 * and the low priority queue. However, we don't want to allocate
289 * more than net_kmsg_max.
290 */
291
292 #define net_kmsg_want_more() \
293 (((net_queue_free_size + net_queue_low_size) < net_queue_free_min) && \
294 (net_kmsg_total < net_kmsg_max))
295
296 ipc_kmsg_t
297 net_kmsg_get(void)
298 {
299 register ipc_kmsg_t kmsg;
300 spl_t s;
301
302 /*
303 * First check the list of free buffers.
304 */
305 s = splimp();
306 simple_lock(&net_queue_free_lock);
307 kmsg = ipc_kmsg_queue_first(&net_queue_free);
308 if (kmsg != IKM_NULL) {
309 ipc_kmsg_rmqueue_first_macro(&net_queue_free, kmsg);
310 net_queue_free_size--;
311 net_queue_free_hits++;
312 }
313 simple_unlock(&net_queue_free_lock);
314
315 if (kmsg == IKM_NULL) {
316 /*
317 * Try to steal from the low priority queue.
318 */
319 simple_lock(&net_queue_lock);
320 kmsg = ipc_kmsg_queue_first(&net_queue_low);
321 if (kmsg != IKM_NULL) {
322 ipc_kmsg_rmqueue_first_macro(&net_queue_low, kmsg);
323 net_queue_low_size--;
324 net_queue_free_steals++;
325 }
326 simple_unlock(&net_queue_lock);
327 }
328
329 if (kmsg == IKM_NULL)
330 net_queue_free_misses++;
331 splx(s);
332
333 if (net_kmsg_want_more() || (kmsg == IKM_NULL)) {
334 boolean_t awake;
335
336 s = splimp();
337 simple_lock(&net_queue_lock);
338 awake = net_thread_awake;
339 net_thread_awake = TRUE;
340 simple_unlock(&net_queue_lock);
341 splx(s);
342
343 if (!awake)
344 thread_wakeup((event_t) &net_thread_awake);
345 }
346
347 return kmsg;
348 }
349
350 void
351 net_kmsg_put(register ipc_kmsg_t kmsg)
352 {
353 spl_t s;
354
355 s = splimp();
356 simple_lock(&net_queue_free_lock);
357 ipc_kmsg_enqueue_macro(&net_queue_free, kmsg);
358 if (++net_queue_free_size > net_queue_free_max)
359 net_queue_free_max = net_queue_free_size;
360 simple_unlock(&net_queue_free_lock);
361 splx(s);
362 }
363
364 void
365 net_kmsg_collect(void)
366 {
367 register ipc_kmsg_t kmsg;
368 spl_t s;
369
370 s = splimp();
371 simple_lock(&net_queue_free_lock);
372 while (net_queue_free_size > net_queue_free_min) {
373 kmsg = ipc_kmsg_dequeue(&net_queue_free);
374 net_queue_free_size--;
375 simple_unlock(&net_queue_free_lock);
376 splx(s);
377
378 net_kmsg_free(kmsg);
379 simple_lock(&net_kmsg_total_lock);
380 net_kmsg_total--;
381 simple_unlock(&net_kmsg_total_lock);
382
383 s = splimp();
384 simple_lock(&net_queue_free_lock);
385 }
386 simple_unlock(&net_queue_free_lock);
387 splx(s);
388 }
389
390 void
391 net_kmsg_more(void)
392 {
393 register ipc_kmsg_t kmsg;
394
395 /*
396 * Replenish net kmsg pool if low. We don't have the locks
397 * necessary to look at these variables, but that's OK because
398 * misread values aren't critical. The danger in this code is
399 * that while we allocate buffers, interrupts are happening
400 * which take buffers out of the free list. If we are not
401 * careful, we will sit in the loop and allocate a zillion
402 * buffers while a burst of packets arrives. So we count
403 * buffers in the low priority queue as available, because
404 * net_kmsg_get will make use of them, and we cap the total
405 * number of buffers we are willing to allocate.
406 */
407
408 while (net_kmsg_want_more()) {
409 simple_lock(&net_kmsg_total_lock);
410 net_kmsg_total++;
411 simple_unlock(&net_kmsg_total_lock);
412 kmsg = net_kmsg_alloc();
413 net_kmsg_put(kmsg);
414 }
415 }
416
417 /*
418 * Packet Filter Data Structures
419 *
420 * Each network interface has a set of packet filters
421 * that are run on incoming packets.
422 *
423 * Each packet filter may represent a single network
424 * session or multiple network sessions. For example,
425 * all application level TCP sessions would be represented
426 * by a single packet filter data structure.
427 *
428 * If a packet filter has a single session, we use a
429 * struct net_rcv_port to represent it. If the packet
430 * filter represents multiple sessions, we use a
431 * struct net_hash_header to represent it.
432 */
433
434 /*
435 * Each interface has a write port and a set of read ports.
436 * Each read port has one or more filters to determine what packets
437 * should go to that port.
438 */
439
440 /*
441 * Receive port for net, with packet filter.
442 * This data structure by itself represents a packet
443 * filter for a single session.
444 */
445 struct net_rcv_port {
446 queue_chain_t chain; /* list of open_descriptors */
447 ipc_port_t rcv_port; /* port to send packet to */
448 int rcv_qlimit; /* port's qlimit */
449 int rcv_count; /* number of packets received */
450 int priority; /* priority for filter */
451 filter_t *filter_end; /* pointer to end of filter */
452 filter_t filter[NET_MAX_FILTER];
453 /* filter operations */
454 };
455 typedef struct net_rcv_port *net_rcv_port_t;
456
457 zone_t net_rcv_zone; /* zone of net_rcv_port structs */
458
459
460 #define NET_HASH_SIZE 256
461 #define N_NET_HASH 4
462 #define N_NET_HASH_KEYS 4
463
464 unsigned int bpf_hash (int, unsigned int *);
465 boolean_t
466 bpf_eq (
467 register bpf_insn_t f1,
468 register bpf_insn_t f2,
469 register int bytes);
470 int
471 bpf_validate(
472 bpf_insn_t f,
473 int bytes,
474 bpf_insn_t *match); /* forward */
475
476 /*
477 * A single hash entry.
478 */
479 struct net_hash_entry {
480 queue_chain_t chain; /* list of entries with same hval */
481 #define he_next chain.next
482 #define he_prev chain.prev
483 ipc_port_t rcv_port; /* destination port */
484 int rcv_qlimit; /* qlimit for the port */
485 unsigned int keys[N_NET_HASH_KEYS];
486 };
487 typedef struct net_hash_entry *net_hash_entry_t;
488
489 zone_t net_hash_entry_zone;
490
491 /*
492 * This structure represents a packet filter with multiple sessions.
493 *
494 * For example, all application level TCP sessions might be
495 * represented by one of these structures. It looks like a
496 * net_rcv_port struct so that both types can live on the
497 * same packet filter queues.
498 */
499 struct net_hash_header {
500 struct net_rcv_port rcv;
501 int n_keys; /* zero if not used */
502 int ref_count; /* reference count */
503 net_hash_entry_t table[NET_HASH_SIZE];
504 } filter_hash_header[N_NET_HASH];
505
506 typedef struct net_hash_header *net_hash_header_t;
507
508 decl_simple_lock_data(,net_hash_header_lock)
509
510 #define HASH_ITERATE(head, elt) (elt) = (net_hash_entry_t) (head); do {
511 #define HASH_ITERATE_END(head, elt) \
512 (elt) = (net_hash_entry_t) queue_next((queue_entry_t) (elt)); \
513 } while ((elt) != (head));
514
515
516 #define FILTER_ITERATE(ifp, fp, nextfp) \
517 for ((fp) = (net_rcv_port_t) queue_first(&(ifp)->if_rcv_port_list);\
518 !queue_end(&(ifp)->if_rcv_port_list, (queue_entry_t)(fp)); \
519 (fp) = (nextfp)) { \
520 (nextfp) = (net_rcv_port_t) queue_next(&(fp)->chain);
521 #define FILTER_ITERATE_END }
522
523 /* entry_p must be net_rcv_port_t or net_hash_entry_t */
524 #define ENQUEUE_DEAD(dead, entry_p) { \
525 queue_next(&(entry_p)->chain) = (queue_entry_t) (dead); \
526 (dead) = (queue_entry_t)(entry_p); \
527 }
528
529 int
530 net_add_q_info (
531 ipc_port_t rcv_port); /* forward */
532 void
533 net_free_dead_infp (
534 queue_entry_t dead_infp); /* forward */
535 void
536 net_free_dead_entp (
537 queue_entry_t dead_entp); /* forward */
538
539 boolean_t
540 hash_ent_remove (
541 struct ifnet *ifp,
542 net_hash_header_t hp,
543 int used,
544 net_hash_entry_t *head,
545 net_hash_entry_t entp,
546 queue_entry_t *dead_p); /* forward */
547
548 boolean_t
549 net_do_filter( /* CSPF */
550 net_rcv_port_t infp,
551 char * data,
552 unsigned int data_count,
553 char * header);
554 int
555 bpf_do_filter( /* BPF */
556 net_rcv_port_t infp,
557 char * p, /* packet data */
558 unsigned int wirelen, /* data_count (in bytes) */
559 char * header,
560 net_hash_entry_t **hash_headpp,
561 net_hash_entry_t *entpp); /* out */
562
563
564 /*
565 * ethernet_priority:
566 *
567 * This function properly belongs in the ethernet interfaces;
568 * it should not be called by this module. (We get packet
569 * priorities as an argument to net_filter.) It is here
570 * to avoid massive code duplication.
571 *
572 * Returns TRUE for high-priority packets.
573 */
574
575 boolean_t ethernet_priority(
576 ipc_kmsg_t kmsg)
577 {
578 register unsigned char *addr =
579 (unsigned char *) net_kmsg(kmsg)->header;
580
581 /*
582 * A simplistic check for broadcast packets.
583 */
584
585 if ((addr[0] == 0xff) && (addr[1] == 0xff) &&
586 (addr[2] == 0xff) && (addr[3] == 0xff) &&
587 (addr[4] == 0xff) && (addr[5] == 0xff))
588 return FALSE;
589 else
590 return TRUE;
591 }
592
593 mach_msg_type_t header_type = {
594 MACH_MSG_TYPE_BYTE,
595 8,
596 NET_HDW_HDR_MAX,
597 TRUE,
598 FALSE,
599 FALSE,
600 0
601 };
602
603 mach_msg_type_t packet_type = {
604 MACH_MSG_TYPE_BYTE, /* name */
605 8, /* size */
606 0, /* number */
607 TRUE, /* inline */
608 FALSE, /* longform */
609 FALSE /* deallocate */
610 };
611
612 /*
613 * net_deliver:
614 *
615 * Called and returns holding net_queue_lock, at splimp.
616 * Dequeues a message and delivers it at spl0.
617 * Returns FALSE if no messages.
618 */
619 boolean_t net_deliver(
620 boolean_t nonblocking)
621 {
622 register ipc_kmsg_t kmsg;
623 boolean_t high_priority;
624 struct ipc_kmsg_queue send_list;
625
626 /*
627 * Pick up a pending network message and deliver it.
628 * Deliver high priority messages before low priority.
629 */
630
631 if ((kmsg = ipc_kmsg_dequeue(&net_queue_high)) != IKM_NULL) {
632 net_queue_high_size--;
633 high_priority = TRUE;
634 } else if ((kmsg = ipc_kmsg_dequeue(&net_queue_low)) != IKM_NULL) {
635 net_queue_low_size--;
636 high_priority = FALSE;
637 } else
638 return FALSE;
639 simple_unlock(&net_queue_lock);
640 (void) spl0();
641
642 /*
643 * Run the packet through the filters,
644 * getting back a queue of packets to send.
645 */
646 net_filter(kmsg, &send_list);
647
648 if (!nonblocking) {
649 /*
650 * There is a danger of running out of available buffers
651 * because they all get moved into the high priority queue
652 * or a port queue. In particular, we might need to
653 * allocate more buffers as we pull (previously available)
654 * buffers out of the low priority queue. But we can only
655 * allocate if we are allowed to block.
656 */
657 net_kmsg_more();
658 }
659
660 while ((kmsg = ipc_kmsg_dequeue(&send_list)) != IKM_NULL) {
661 int count;
662
663 /*
664 * Fill in the rest of the kmsg.
665 */
666 count = net_kmsg(kmsg)->net_rcv_msg_packet_count;
667
668 ikm_init_special(kmsg, IKM_SIZE_NETWORK);
669
670 kmsg->ikm_header.msgh_bits =
671 MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, 0);
672 /* remember message sizes must be rounded up */
673 kmsg->ikm_header.msgh_size =
674 (((mach_msg_size_t) (sizeof(struct net_rcv_msg)
675 - NET_RCV_MAX + count))+3) &~ 3;
676 kmsg->ikm_header.msgh_local_port = MACH_PORT_NULL;
677 kmsg->ikm_header.msgh_kind = MACH_MSGH_KIND_NORMAL;
678 kmsg->ikm_header.msgh_id = NET_RCV_MSG_ID;
679
680 net_kmsg(kmsg)->header_type = header_type;
681 net_kmsg(kmsg)->packet_type = packet_type;
682 net_kmsg(kmsg)->net_rcv_msg_packet_count = count;
683
684 /*
685 * Send the packet to the destination port. Drop it
686 * if the destination port is over its backlog.
687 */
688
689 if (ipc_mqueue_send(kmsg, MACH_SEND_TIMEOUT, 0) ==
690 MACH_MSG_SUCCESS) {
691 if (high_priority)
692 net_kmsg_send_high_hits++;
693 else
694 net_kmsg_send_low_hits++;
695 /* the receiver is responsible for the message now */
696 } else {
697 if (high_priority)
698 net_kmsg_send_high_misses++;
699 else
700 net_kmsg_send_low_misses++;
701 ipc_kmsg_destroy(kmsg);
702 }
703 }
704
705 /*
706 * Handle other ASTs here if blocking. Must
707 * pay particular attention to the timer queue.
708 *
709 * *** This will call net_ast() if AST_NETWORK is
710 * set. However, since net_thread_awake should
711 * also be set if nonblocking, net_ast will not
712 * recursively call net_deliver().
713 *
714 * *** We should really just check AST_TIMER.
715 */
716 if (!nonblocking) {
717 AST_KERNEL_CHECK(cpu_number());
718 }
719
720 (void) splimp();
721 simple_lock(&net_queue_lock);
722 return TRUE;
723 }
724
725 /*
726 * We want to deliver packets using ASTs, so we can avoid the
727 * thread_wakeup/thread_block needed to get to the network
728 * thread. However, we can't allocate memory in the AST handler,
729 * because memory allocation might block. Hence we have the
730 * network thread to allocate memory. The network thread also
731 * delivers packets, so it can be allocating and delivering for a
732 * burst. net_thread_awake is protected by net_queue_lock
733 * (instead of net_queue_free_lock) so that net_packet and
734 * net_ast can safely determine if the network thread is running.
735 * This prevents a race that might leave a packet sitting without
736 * being delivered. It is possible for net_kmsg_get to think
737 * the network thread is awake, and so avoid a wakeup, and then
738 * have the network thread sleep without allocating. The next
739 * net_kmsg_get will do a wakeup.
740 */
741
742 void net_ast(void)
743 {
744 spl_t s;
745
746 net_ast_taken++;
747
748 /*
749 * If the network thread is awake, then we would
750 * rather deliver messages from it, because
751 * it can also allocate memory.
752 */
753
754 s = splimp();
755 simple_lock(&net_queue_lock);
756 while (!net_thread_awake && net_deliver(TRUE))
757 continue;
758
759 /*
760 * Prevent an unnecessary AST. Either the network
761 * thread will deliver the messages, or there are
762 * no messages left to deliver.
763 */
764
765 simple_unlock(&net_queue_lock);
766 (void) splsched();
767 ast_off(cpu_number(), AST_NETWORK);
768 splx(s);
769 }
770
771 no_return net_thread_continue(void)
772 {
773 for (;;) {
774 spl_t s;
775
776 net_thread_awaken++;
777
778 /*
779 * First get more buffers.
780 */
781 net_kmsg_more();
782
783 s = splimp();
784 simple_lock(&net_queue_lock);
785 while (net_deliver(FALSE))
786 continue;
787
788 net_thread_awake = FALSE;
789 assert_wait(&net_thread_awake, FALSE);
790 simple_unlock(&net_queue_lock);
791 splx(s);
792 counter(c_net_thread_block++);
793 thread_block(net_thread_continue);
794 /*NOTREACHED*/
795 }
796 }
797
798 no_return net_thread(void)
799 {
800 spl_t s;
801
802 /*
803 * We should be very high priority.
804 */
805
806 thread_set_own_priority(0);
807
808 /*
809 * We sleep initially, so that we don't allocate any buffers
810 * unless the network is really in use and they are needed.
811 */
812
813 s = splimp();
814 simple_lock(&net_queue_lock);
815 net_thread_awake = FALSE;
816 assert_wait(&net_thread_awake, FALSE);
817 simple_unlock(&net_queue_lock);
818 splx(s);
819 counter(c_net_thread_block++);
820 thread_block_noreturn(net_thread_continue);
821 /*NOTREACHED*/
822 }
823
824 void
825 reorder_queue(
826 register queue_t first,
827 register queue_t last)
828 {
829 register queue_entry_t prev, next;
830
831 prev = first->prev;
832 next = last->next;
833
834 prev->next = last;
835 next->prev = first;
836
837 last->prev = prev;
838 last->next = first;
839
840 first->next = next;
841 first->prev = last;
842 }
843
844 /*
845 * Incoming packet. Header has already been moved to proper place.
846 * We are already at splimp.
847 */
848 void
849 net_packet(
850 register struct ifnet *ifp,
851 register ipc_kmsg_t kmsg,
852 unsigned int count,
853 boolean_t priority)
854 {
855 boolean_t awake;
856
857 #if NORMA_ETHER
858 if (netipc_net_packet(kmsg, count)) {
859 return;
860 }
861 #endif /* NORMA_ETHER */
862
863 #if MACH_TTD
864 /*
865 * Do a quick check to see if it is a kernel TTD packet.
866 *
867 * Only check if KernelTTD is enabled, ie. the current
868 * device driver supports TTD, and the bootp succeded.
869 */
870 if (kttd_enabled && kttd_handle_async(kmsg)) {
871 /*
872 * Packet was a valid ttd packet and
873 * doesn't need to be passed up to filter.
874 * The ttd code put the used kmsg buffer
875 * back onto the free list.
876 */
877 if (kttd_debug)
878 printf("**%x**", kttd_async_counter++);
879 return;
880 }
881 #endif /* MACH_TTD */
882
883 kmsg->ikm_header.msgh_remote_port = (mach_port_t) ifp;
884 net_kmsg(kmsg)->net_rcv_msg_packet_count = count;
885
886 simple_lock(&net_queue_lock);
887 if (priority) {
888 ipc_kmsg_enqueue(&net_queue_high, kmsg);
889 if (++net_queue_high_size > net_queue_high_max)
890 net_queue_high_max = net_queue_high_size;
891 } else {
892 ipc_kmsg_enqueue(&net_queue_low, kmsg);
893 if (++net_queue_low_size > net_queue_low_max)
894 net_queue_low_max = net_queue_low_size;
895 }
896 /*
897 * If the network thread is awake, then we don't
898 * need to take an AST, because the thread will
899 * deliver the packet.
900 */
901 awake = net_thread_awake;
902 simple_unlock(&net_queue_lock);
903
904 if (!awake) {
905 spl_t s = splsched();
906 ast_on(cpu_number(), AST_NETWORK);
907 splx(s);
908 }
909 }
910
911 int net_filter_queue_reorder = 0; /* non-zero to enable reordering */
912
913 /*
914 * Run a packet through the filters, returning a list of messages.
915 * We are *not* called at interrupt level.
916 */
917 void
918 net_filter(
919 register ipc_kmsg_t kmsg,
920 ipc_kmsg_queue_t send_list)
921 {
922 register struct ifnet *ifp;
923 register net_rcv_port_t infp, nextfp;
924 register ipc_kmsg_t new_kmsg;
925
926 net_hash_entry_t entp, *hash_headp;
927 ipc_port_t dest;
928 queue_entry_t dead_infp = (queue_entry_t) 0;
929 queue_entry_t dead_entp = (queue_entry_t) 0;
930 unsigned int ret_count;
931
932 int count = net_kmsg(kmsg)->net_rcv_msg_packet_count;
933 ifp = (struct ifnet *) kmsg->ikm_header.msgh_remote_port;
934 ipc_kmsg_queue_init(send_list);
935
936 /*
937 * Unfortunately we can't allocate or deallocate memory
938 * while holding this lock. And we can't drop the lock
939 * while examining the filter list.
940 */
941 simple_lock(&ifp->if_rcv_port_list_lock);
942 FILTER_ITERATE(ifp, infp, nextfp)
943 {
944 entp = (net_hash_entry_t) 0;
945 if (infp->filter[0] == NETF_BPF) {
946 ret_count = bpf_do_filter(infp, net_kmsg(kmsg)->packet, count,
947 net_kmsg(kmsg)->header,
948 &hash_headp, &entp);
949 if (entp == (net_hash_entry_t) 0)
950 dest = infp->rcv_port;
951 else
952 dest = entp->rcv_port;
953 } else {
954 ret_count = net_do_filter(infp, net_kmsg(kmsg)->packet, count,
955 net_kmsg(kmsg)->header);
956 if (ret_count)
957 ret_count = count;
958 dest = infp->rcv_port;
959 }
960
961 if (ret_count) {
962
963 /*
964 * Make a send right for the destination.
965 */
966
967 dest = ipc_port_copy_send(dest);
968 if (!IP_VALID(dest)) {
969 /*
970 * This filter is dead. We remove it from the
971 * filter list and set it aside for deallocation.
972 */
973
974 if (entp == (net_hash_entry_t) 0) {
975 queue_remove(&ifp->if_rcv_port_list, infp,
976 net_rcv_port_t, chain);
977 ENQUEUE_DEAD(dead_infp, infp);
978 continue;
979 } else {
980 (void) hash_ent_remove (
981 ifp,
982 (net_hash_header_t)infp,
983 FALSE, /* no longer used */
984 hash_headp,
985 entp,
986 &dead_entp);
987 continue;
988 }
989 }
990
991 /*
992 * Deliver copy of packet to this channel.
993 */
994 if (ipc_kmsg_queue_empty(send_list)) {
995 /*
996 * Only receiver, so far
997 */
998 new_kmsg = kmsg;
999 } else {
1000 /*
1001 * Other receivers - must allocate message and copy.
1002 */
1003 new_kmsg = net_kmsg_get();
1004 if (new_kmsg == IKM_NULL) {
1005 ipc_port_release_send(dest);
1006 break;
1007 }
1008
1009 bcopy(
1010 net_kmsg(kmsg)->packet,
1011 net_kmsg(new_kmsg)->packet,
1012 ret_count);
1013 bcopy(
1014 net_kmsg(kmsg)->header,
1015 net_kmsg(new_kmsg)->header,
1016 NET_HDW_HDR_MAX);
1017 }
1018 net_kmsg(new_kmsg)->net_rcv_msg_packet_count = ret_count;
1019 new_kmsg->ikm_header.msgh_remote_port = (mach_port_t) dest;
1020 ipc_kmsg_enqueue(send_list, new_kmsg);
1021
1022 {
1023 register net_rcv_port_t prevfp;
1024 int rcount = ++infp->rcv_count;
1025
1026 /*
1027 * See if ordering of filters is wrong
1028 */
1029 if (infp->priority >= NET_HI_PRI) {
1030 prevfp = (net_rcv_port_t) queue_prev(&infp->chain);
1031 /*
1032 * If infp is not the first element on the queue,
1033 * and the previous element is at equal priority
1034 * but has a lower count, then promote infp to
1035 * be in front of prevfp.
1036 */
1037 if ((queue_t)prevfp != &ifp->if_rcv_port_list &&
1038 infp->priority == prevfp->priority) {
1039 /*
1040 * Threshold difference to prevent thrashing
1041 */
1042 if (net_filter_queue_reorder
1043 && (100 + prevfp->rcv_count < rcount))
1044 reorder_queue(&prevfp->chain, &infp->chain);
1045 }
1046 /*
1047 * High-priority filter -> no more deliveries
1048 */
1049 break;
1050 }
1051 }
1052 }
1053 }
1054 FILTER_ITERATE_END
1055
1056 simple_unlock(&ifp->if_rcv_port_list_lock);
1057
1058 /*
1059 * Deallocate dead filters.
1060 */
1061 if (dead_infp != 0)
1062 net_free_dead_infp(dead_infp);
1063 if (dead_entp != 0)
1064 net_free_dead_entp(dead_entp);
1065
1066 if (ipc_kmsg_queue_empty(send_list)) {
1067 /* Not sent - recycle */
1068 net_kmsg_put(kmsg);
1069 }
1070 }
1071
1072 boolean_t
1073 net_do_filter(
1074 net_rcv_port_t infp,
1075 char * data,
1076 unsigned int data_count,
1077 char * header)
1078 {
1079 int stack[NET_FILTER_STACK_DEPTH+1];
1080 register int *sp;
1081 register filter_t *fp, *fpe;
1082 register unsigned int op, arg;
1083
1084 /*
1085 * The filter accesses the header and data
1086 * as unsigned short words.
1087 */
1088 data_count /= sizeof(unsigned short);
1089
1090 #define data_word ((unsigned short *)data)
1091 #define header_word ((unsigned short *)header)
1092
1093 sp = &stack[NET_FILTER_STACK_DEPTH];
1094 fp = &infp->filter[0];
1095 fpe = infp->filter_end;
1096
1097 *sp = TRUE;
1098
1099 while (fp < fpe) {
1100 arg = *fp++;
1101 op = NETF_OP(arg);
1102 arg = NETF_ARG(arg);
1103
1104 switch (arg) {
1105 case NETF_NOPUSH:
1106 arg = *sp++;
1107 break;
1108 case NETF_PUSHZERO:
1109 arg = 0;
1110 break;
1111 case NETF_PUSHLIT:
1112 arg = *fp++;
1113 break;
1114 case NETF_PUSHIND:
1115 arg = *sp++;
1116 if (arg >= data_count)
1117 return FALSE;
1118 arg = data_word[arg];
1119 break;
1120 case NETF_PUSHHDRIND:
1121 arg = *sp++;
1122 if (arg >= NET_HDW_HDR_MAX/sizeof(unsigned short))
1123 return FALSE;
1124 arg = header_word[arg];
1125 break;
1126 default:
1127 if (arg >= NETF_PUSHSTK) {
1128 arg = sp[arg - NETF_PUSHSTK];
1129 }
1130 else if (arg >= NETF_PUSHHDR) {
1131 arg = header_word[arg - NETF_PUSHHDR];
1132 }
1133 else {
1134 arg -= NETF_PUSHWORD;
1135 if (arg >= data_count)
1136 return FALSE;
1137 arg = data_word[arg];
1138 }
1139 break;
1140
1141 }
1142 switch (op) {
1143 case NETF_OP(NETF_NOP):
1144 *--sp = arg;
1145 break;
1146 case NETF_OP(NETF_AND):
1147 *sp &= arg;
1148 break;
1149 case NETF_OP(NETF_OR):
1150 *sp |= arg;
1151 break;
1152 case NETF_OP(NETF_XOR):
1153 *sp ^= arg;
1154 break;
1155 case NETF_OP(NETF_EQ):
1156 *sp = (*sp == arg);
1157 break;
1158 case NETF_OP(NETF_NEQ):
1159 *sp = (*sp != arg);
1160 break;
1161 case NETF_OP(NETF_LT):
1162 *sp = (*sp < arg);
1163 break;
1164 case NETF_OP(NETF_LE):
1165 *sp = (*sp <= arg);
1166 break;
1167 case NETF_OP(NETF_GT):
1168 *sp = (*sp > arg);
1169 break;
1170 case NETF_OP(NETF_GE):
1171 *sp = (*sp >= arg);
1172 break;
1173 case NETF_OP(NETF_COR):
1174 if (*sp++ == arg)
1175 return TRUE;
1176 break;
1177 case NETF_OP(NETF_CAND):
1178 if (*sp++ != arg)
1179 return FALSE;
1180 break;
1181 case NETF_OP(NETF_CNOR):
1182 if (*sp++ == arg)
1183 return FALSE;
1184 break;
1185 case NETF_OP(NETF_CNAND):
1186 if (*sp++ != arg)
1187 return TRUE;
1188 break;
1189 case NETF_OP(NETF_LSH):
1190 *sp <<= arg;
1191 break;
1192 case NETF_OP(NETF_RSH):
1193 *sp >>= arg;
1194 break;
1195 case NETF_OP(NETF_ADD):
1196 *sp += arg;
1197 break;
1198 case NETF_OP(NETF_SUB):
1199 *sp -= arg;
1200 break;
1201 }
1202 }
1203 return (*sp) ? TRUE : FALSE;
1204
1205 #undef data_word
1206 #undef header_word
1207 }
1208
1209 /*
1210 * Check filter for invalid operations or stack over/under-flow.
1211 */
1212 boolean_t
1213 parse_net_filter(
1214 register filter_t *filter,
1215 unsigned int count)
1216 {
1217 register int sp;
1218 register filter_t *fpe = &filter[count];
1219 register filter_t op, arg;
1220
1221 sp = NET_FILTER_STACK_DEPTH;
1222
1223 for (; filter < fpe; filter++) {
1224 op = NETF_OP(*filter);
1225 arg = NETF_ARG(*filter);
1226
1227 switch (arg) {
1228 case NETF_NOPUSH:
1229 break;
1230 case NETF_PUSHZERO:
1231 sp--;
1232 break;
1233 case NETF_PUSHLIT:
1234 filter++;
1235 if (filter >= fpe)
1236 return FALSE; /* literal value not in filter */
1237 sp--;
1238 break;
1239 case NETF_PUSHIND:
1240 case NETF_PUSHHDRIND:
1241 break;
1242 default:
1243 if (arg >= NETF_PUSHSTK) {
1244 if (arg - NETF_PUSHSTK + sp > NET_FILTER_STACK_DEPTH)
1245 return FALSE;
1246 }
1247 else if (arg >= NETF_PUSHHDR) {
1248 if (arg - NETF_PUSHHDR >=
1249 NET_HDW_HDR_MAX/sizeof(unsigned short))
1250 return FALSE;
1251 }
1252 /* else... cannot check for packet bounds
1253 without packet */
1254 sp--;
1255 break;
1256 }
1257 if (sp < 2) {
1258 return FALSE; /* stack overflow */
1259 }
1260 if (op == NETF_OP(NETF_NOP))
1261 continue;
1262
1263 /*
1264 * all non-NOP operators are binary.
1265 */
1266 if (sp > NET_MAX_FILTER-2)
1267 return FALSE;
1268
1269 sp++;
1270 switch (op) {
1271 case NETF_OP(NETF_AND):
1272 case NETF_OP(NETF_OR):
1273 case NETF_OP(NETF_XOR):
1274 case NETF_OP(NETF_EQ):
1275 case NETF_OP(NETF_NEQ):
1276 case NETF_OP(NETF_LT):
1277 case NETF_OP(NETF_LE):
1278 case NETF_OP(NETF_GT):
1279 case NETF_OP(NETF_GE):
1280 case NETF_OP(NETF_COR):
1281 case NETF_OP(NETF_CAND):
1282 case NETF_OP(NETF_CNOR):
1283 case NETF_OP(NETF_CNAND):
1284 case NETF_OP(NETF_LSH):
1285 case NETF_OP(NETF_RSH):
1286 case NETF_OP(NETF_ADD):
1287 case NETF_OP(NETF_SUB):
1288 break;
1289 default:
1290 return FALSE;
1291 }
1292 }
1293 return TRUE;
1294 }
1295
1296 /*
1297 * Set a filter for a network interface.
1298 *
1299 * We are given a naked send right for the rcv_port.
1300 * If we are successful, we must consume that right.
1301 */
1302 io_return_t
1303 net_set_filter(
1304 struct ifnet *ifp,
1305 ipc_port_t rcv_port,
1306 int priority,
1307 filter_t *filter,
1308 unsigned int filter_count)
1309 {
1310 int filter_bytes;
1311 bpf_insn_t match;
1312 register net_rcv_port_t infp, my_infp;
1313 net_rcv_port_t nextfp;
1314 net_hash_header_t hhp;
1315 register net_hash_entry_t entp, hash_entp;
1316 net_hash_entry_t *head, nextentp;
1317 queue_entry_t dead_infp, dead_entp;
1318 int i;
1319 int ret, is_new_infp;
1320 io_return_t rval;
1321
1322 /*
1323 * Check the filter syntax.
1324 */
1325
1326 filter_bytes = CSPF_BYTES(filter_count);
1327 match = (bpf_insn_t) 0;
1328
1329 if (filter_count > 0 && filter[0] == NETF_BPF) {
1330 ret = bpf_validate((bpf_insn_t)filter, filter_bytes, &match);
1331 if (!ret)
1332 return D_INVALID_OPERATION;
1333 } else {
1334 if (!parse_net_filter(filter, filter_count))
1335 return D_INVALID_OPERATION;
1336 }
1337
1338 rval = D_SUCCESS; /* default return value */
1339 dead_infp = dead_entp = 0;
1340
1341 if (match == (bpf_insn_t) 0) {
1342 /*
1343 * If there is no match instruction, we allocate
1344 * a normal packet filter structure.
1345 */
1346 my_infp = (net_rcv_port_t) zalloc(net_rcv_zone);
1347 my_infp->rcv_port = rcv_port;
1348 is_new_infp = TRUE;
1349 } else {
1350 /*
1351 * If there is a match instruction, we assume there will
1352 * multiple session with a common substructure and allocate
1353 * a hash table to deal with them.
1354 */
1355 my_infp = 0;
1356 hash_entp = (net_hash_entry_t) zalloc(net_hash_entry_zone);
1357 is_new_infp = FALSE;
1358 }
1359
1360 /*
1361 * Look for an existing filter on the same reply port.
1362 * Look for filters with dead ports (for GC).
1363 * Look for a filter with the same code except KEY insns.
1364 */
1365
1366 simple_lock(&ifp->if_rcv_port_list_lock);
1367
1368 FILTER_ITERATE(ifp, infp, nextfp)
1369 {
1370 if (infp->rcv_port == MACH_PORT_NULL) {
1371 if (match != 0
1372 && infp->priority == priority
1373 && my_infp == 0
1374 && (infp->filter_end - infp->filter) == filter_count
1375 && bpf_eq((bpf_insn_t)infp->filter,
1376 (bpf_insn_t)filter, filter_bytes))
1377 {
1378 my_infp = infp;
1379 }
1380
1381 for (i = 0; i < NET_HASH_SIZE; i++) {
1382 head = &((net_hash_header_t) infp)->table[i];
1383 if (*head == 0)
1384 continue;
1385
1386 /*
1387 * Check each hash entry to make sure the
1388 * destination port is still valid. Remove
1389 * any invalid entries.
1390 */
1391 entp = *head;
1392 do {
1393 nextentp = (net_hash_entry_t) entp->he_next;
1394
1395 /* checked without
1396 ip_lock(entp->rcv_port) */
1397 if (entp->rcv_port == rcv_port
1398 || !IP_VALID(entp->rcv_port)
1399 || !ip_active(entp->rcv_port)) {
1400
1401 ret = hash_ent_remove (ifp,
1402 (net_hash_header_t)infp,
1403 (my_infp == infp),
1404 head,
1405 entp,
1406 &dead_entp);
1407 if (ret)
1408 goto hash_loop_end;
1409 }
1410
1411 entp = nextentp;
1412 /* While test checks head since hash_ent_remove
1413 might modify it.
1414 */
1415 } while (*head != 0 && entp != *head);
1416 }
1417 hash_loop_end:
1418 ;
1419
1420 } else if (infp->rcv_port == rcv_port
1421 || !IP_VALID(infp->rcv_port)
1422 || !ip_active(infp->rcv_port)) {
1423 /* Remove the old filter from list */
1424 remqueue(&ifp->if_rcv_port_list, (queue_entry_t)infp);
1425 ENQUEUE_DEAD(dead_infp, infp);
1426 }
1427 }
1428 FILTER_ITERATE_END
1429
1430 if (my_infp == 0) {
1431 /* Allocate a dummy infp */
1432 simple_lock(&net_hash_header_lock);
1433 for (i = 0; i < N_NET_HASH; i++) {
1434 if (filter_hash_header[i].n_keys == 0)
1435 break;
1436 }
1437 if (i == N_NET_HASH) {
1438 simple_unlock(&net_hash_header_lock);
1439 simple_unlock(&ifp->if_rcv_port_list_lock);
1440
1441 ipc_port_release_send(rcv_port);
1442 if (match != 0)
1443 zfree (net_hash_entry_zone, (vm_offset_t)hash_entp);
1444
1445 rval = D_NO_MEMORY;
1446 goto clean_and_return;
1447 }
1448
1449 hhp = &filter_hash_header[i];
1450 hhp->n_keys = match->jt;
1451 simple_unlock(&net_hash_header_lock);
1452
1453 hhp->ref_count = 0;
1454 for (i = 0; i < NET_HASH_SIZE; i++)
1455 hhp->table[i] = 0;
1456
1457 my_infp = (net_rcv_port_t)hhp;
1458 my_infp->rcv_port = MACH_PORT_NULL; /* indication of dummy */
1459 is_new_infp = TRUE;
1460 }
1461
1462 if (is_new_infp) {
1463 my_infp->priority = priority;
1464 my_infp->rcv_count = 0;
1465
1466 /* Copy filter program. */
1467 bcopy (filter, my_infp->filter, filter_bytes);
1468 my_infp->filter_end =
1469 (filter_t *)((char *)my_infp->filter + filter_bytes);
1470
1471 if (match == 0) {
1472 my_infp->rcv_qlimit = net_add_q_info(rcv_port);
1473 } else {
1474 my_infp->rcv_qlimit = 0;
1475 }
1476
1477 /* Insert my_infp according to priority */
1478 queue_iterate(&ifp->if_rcv_port_list, infp, net_rcv_port_t, chain)
1479 if (priority > infp->priority)
1480 break;
1481 enqueue_tail((queue_t)&infp->chain, (queue_entry_t)my_infp);
1482 }
1483
1484 if (match != 0)
1485 { /* Insert to hash list */
1486 net_hash_entry_t *p;
1487
1488 hash_entp->rcv_port = rcv_port;
1489 for (i = 0; i < match->jt; i++) /* match->jt is n_keys */
1490 hash_entp->keys[i] = match[i+1].k;
1491 p = &((net_hash_header_t)my_infp)->
1492 table[bpf_hash(match->jt, hash_entp->keys)];
1493
1494 /* Not checking for the same key values */
1495 if (*p == 0) {
1496 queue_init ((queue_t) hash_entp);
1497 *p = hash_entp;
1498 } else {
1499 enqueue_tail((queue_t) *p, (queue_entry_t) hash_entp);
1500 }
1501
1502 ((net_hash_header_t)my_infp)->ref_count++;
1503 hash_entp->rcv_qlimit = net_add_q_info(rcv_port);
1504
1505 }
1506
1507 simple_unlock(&ifp->if_rcv_port_list_lock);
1508
1509 clean_and_return:
1510 /* No locks are held at this point. */
1511
1512 if (dead_infp != 0)
1513 net_free_dead_infp(dead_infp);
1514 if (dead_entp != 0)
1515 net_free_dead_entp(dead_entp);
1516
1517 return rval;
1518 }
1519
1520 /*
1521 * Other network operations
1522 */
1523 io_return_t
1524 net_getstat(
1525 struct ifnet *ifp,
1526 dev_flavor_t flavor,
1527 dev_status_t status, /* pointer to OUT array */
1528 natural_t *count) /* OUT */
1529 {
1530 switch (flavor) {
1531 case NET_STATUS:
1532 {
1533 register struct net_status *ns = (struct net_status *)status;
1534
1535 if (*count < NET_STATUS_COUNT)
1536 return D_INVALID_OPERATION;
1537
1538 ns->min_packet_size = ifp->if_header_size;
1539 ns->max_packet_size = ifp->if_header_size + ifp->if_mtu;
1540 ns->header_format = ifp->if_header_format;
1541 ns->header_size = ifp->if_header_size;
1542 ns->address_size = ifp->if_address_size;
1543 ns->flags = ifp->if_flags;
1544 ns->mapped_size = 0;
1545
1546 *count = NET_STATUS_COUNT;
1547 break;
1548 }
1549 case NET_ADDRESS:
1550 {
1551 register int addr_byte_count;
1552 register int addr_int_count;
1553 register int i;
1554
1555 addr_byte_count = ifp->if_address_size;
1556 addr_int_count = (addr_byte_count + (sizeof(int)-1))
1557 / sizeof(int);
1558
1559 if (*count < addr_int_count)
1560 return D_INVALID_OPERATION;
1561
1562 bcopy((char *)ifp->if_address,
1563 (char *)status,
1564 (unsigned) addr_byte_count);
1565 if (addr_byte_count < addr_int_count * sizeof(int))
1566 bzero((char *)status + addr_byte_count,
1567 (unsigned) (addr_int_count * sizeof(int)
1568 - addr_byte_count));
1569
1570 for (i = 0; i < addr_int_count; i++) {
1571 register int word;
1572
1573 word = status[i];
1574 status[i] = htonl(word);
1575 }
1576 *count = addr_int_count;
1577 break;
1578 }
1579 default:
1580 return D_INVALID_OPERATION;
1581 }
1582 return D_SUCCESS;
1583 }
1584
1585 io_return_t
1586 net_write(
1587 register struct ifnet *ifp,
1588 void (*start)(int),
1589 io_req_t ior)
1590 {
1591 spl_t s;
1592 kern_return_t rc;
1593 boolean_t wait;
1594
1595 /*
1596 * Reject the write if the interface is down.
1597 */
1598 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
1599 return D_DEVICE_DOWN;
1600
1601 /*
1602 * Reject the write if the packet is too large or too small.
1603 */
1604 if (ior->io_count < ifp->if_header_size ||
1605 ior->io_count > ifp->if_header_size + ifp->if_mtu)
1606 return D_INVALID_SIZE;
1607
1608 /*
1609 * Wire down the memory.
1610 */
1611
1612 rc = device_write_get(ior, &wait);
1613 if (rc != KERN_SUCCESS)
1614 return rc;
1615
1616 /*
1617 * Network interfaces can't cope with VM continuations.
1618 * If wait is set, just panic.
1619 */
1620 if (wait) {
1621 panic("net_write: VM continuation");
1622 }
1623
1624 /*
1625 * Queue the packet on the output queue, and
1626 * start the device.
1627 */
1628 s = splimp();
1629 IF_ENQUEUE(&ifp->if_snd, ior);
1630 (*start)(ifp->if_unit);
1631 splx(s);
1632
1633 return D_IO_QUEUED;
1634 }
1635
1636 /*
1637 * Initialize the whole package.
1638 */
1639 void
1640 net_io_init(void)
1641 {
1642 register vm_size_t size;
1643
1644 size = sizeof(struct net_rcv_port);
1645 net_rcv_zone = zinit(size,
1646 size * 1000,
1647 PAGE_SIZE,
1648 FALSE,
1649 "net_rcv_port");
1650
1651 size = sizeof(struct net_hash_entry);
1652 net_hash_entry_zone = zinit(size,
1653 size * 100,
1654 PAGE_SIZE,
1655 FALSE,
1656 "net_hash_entry");
1657
1658 size = ikm_plus_overhead(sizeof(struct net_rcv_msg));
1659 net_kmsg_size = round_page(size);
1660
1661 /*
1662 * net_kmsg_max caps the number of buffers
1663 * we are willing to allocate. By default,
1664 * we allow for net_queue_free_min plus
1665 * the queue limit for each filter.
1666 * (Added as the filters are added.)
1667 */
1668
1669 simple_lock_init(&net_kmsg_total_lock);
1670 if (net_kmsg_max == 0)
1671 net_kmsg_max = net_queue_free_min;
1672
1673 simple_lock_init(&net_queue_free_lock);
1674 ipc_kmsg_queue_init(&net_queue_free);
1675
1676 simple_lock_init(&net_queue_lock);
1677 ipc_kmsg_queue_init(&net_queue_high);
1678 ipc_kmsg_queue_init(&net_queue_low);
1679
1680 simple_lock_init(&net_hash_header_lock);
1681 }
1682
1683
1684 /* ======== BPF: Berkeley Packet Filter ======== */
1685
1686 /*-
1687 * Copyright (c) 1990-1991 The Regents of the University of California.
1688 * All rights reserved.
1689 *
1690 * This code is derived from the Stanford/CMU enet packet filter,
1691 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
1692 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
1693 * Berkeley Laboratory.
1694 *
1695 * Redistribution and use in source and binary forms, with or without
1696 * modification, are permitted provided that the following conditions
1697 * are met:
1698 * 1. Redistributions of source code must retain the above copyright
1699 * notice, this list of conditions and the following disclaimer.
1700 * 2. Redistributions in binary form must reproduce the above copyright
1701 * notice, this list of conditions and the following disclaimer in the
1702 * documentation and/or other materials provided with the distribution.
1703 * 3. All advertising materials mentioning features or use of this software
1704 * must display the following acknowledgement:
1705 * This product includes software developed by the University of
1706 * California, Berkeley and its contributors.
1707 * 4. Neither the name of the University nor the names of its contributors
1708 * may be used to endorse or promote products derived from this software
1709 * without specific prior written permission.
1710 *
1711 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
1712 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1713 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1714 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
1715 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1716 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1717 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1718 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
1719 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
1720 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
1721 * SUCH DAMAGE.
1722 *
1723 * @(#)bpf.c 7.5 (Berkeley) 7/15/91
1724 *
1725 * static char rcsid[] =
1726 * "$Header: net_io.c,v 2.30 93/11/17 16:34:06 dbg Exp $";
1727 */
1728 #if !(defined(lint) || defined(KERNEL))
1729 static char rcsid[] =
1730 "@(#) $Header: net_io.c,v 2.30 93/11/17 16:34:06 dbg Exp $ (LBL)";
1731 #endif
1732
1733 #if defined(sparc) || defined(mips) || defined(ibm032) || defined(alpha)
1734 #define BPF_ALIGN
1735 #endif
1736
1737 #ifndef BPF_ALIGN
1738 #define EXTRACT_SHORT(p) ((unsigned short)ntohs(*(unsigned short *)p))
1739 #define EXTRACT_LONG(p) ((unsigned int) ntohl(*(unsigned int *) p))
1740 #else
1741 #define EXTRACT_SHORT(p)\
1742 ((unsigned short)\
1743 ((unsigned short)*((unsigned char *)p+0)<<8|\
1744 (unsigned short)*((unsigned char *)p+1)<<0))
1745 #define EXTRACT_LONG(p)\
1746 ((unsigned int) *((unsigned char *)p+0)<<24|\
1747 (unsigned int) *((unsigned char *)p+1)<<16|\
1748 (unsigned int) *((unsigned char *)p+2)<<8|\
1749 (unsigned int) *((unsigned char *)p+3)<<0)
1750 #endif
1751
1752 boolean_t
1753 bpf_match (
1754 net_hash_header_t hash,
1755 int n_keys,
1756 unsigned int *keys,
1757 net_hash_entry_t **hash_headpp,
1758 net_hash_entry_t *entpp); /* forward */
1759
1760 /*
1761 * Execute the filter program starting at pc on the packet p
1762 * wirelen is the length of the original packet
1763 * buflen is the amount of data present
1764 */
1765
1766 int
1767 bpf_do_filter(
1768 net_rcv_port_t infp,
1769 char * p, /* packet data */
1770 unsigned int wirelen, /* data_count (in bytes) */
1771 char * header,
1772 net_hash_entry_t **hash_headpp,
1773 net_hash_entry_t *entpp) /* out */
1774 {
1775 register bpf_insn_t pc, pc_end;
1776 register unsigned int buflen;
1777
1778 register unsigned int A, X;
1779 register int k;
1780 unsigned int mem[BPF_MEMWORDS];
1781
1782 pc = ((bpf_insn_t) infp->filter) + 1;
1783 /* filter[0].code is BPF_BEGIN */
1784 pc_end = (bpf_insn_t)infp->filter_end;
1785 buflen = NET_RCV_MAX;
1786 *entpp = 0; /* default */
1787
1788 #ifdef lint
1789 A = 0;
1790 X = 0;
1791 #endif
1792 for (; pc < pc_end; ++pc) {
1793 switch (pc->code) {
1794
1795 default:
1796 #ifdef KERNEL
1797 return 0;
1798 #else
1799 abort();
1800 #endif
1801 case BPF_RET|BPF_K:
1802 if (infp->rcv_port == MACH_PORT_NULL &&
1803 *entpp == 0) {
1804 return 0;
1805 }
1806 return ((unsigned int)pc->k <= wirelen)
1807 ? pc->k : wirelen;
1808
1809 case BPF_RET|BPF_A:
1810 if (infp->rcv_port == MACH_PORT_NULL &&
1811 *entpp == 0) {
1812 return 0;
1813 }
1814 return ((unsigned int)A <= wirelen)
1815 ? A : wirelen;
1816
1817 case BPF_RET|BPF_MATCH_IMM:
1818 if (bpf_match ((net_hash_header_t)infp, pc->jt, mem,
1819 hash_headpp, entpp)) {
1820 return ((unsigned int)pc->k <= wirelen) ?
1821 pc->k : wirelen;
1822 }
1823 return 0;
1824
1825 case BPF_LD|BPF_W|BPF_ABS:
1826 k = pc->k;
1827 if ((unsigned int)k + sizeof(int) <= buflen) {
1828 #ifdef BPF_ALIGN
1829 if (((int)(p + k) & 3) != 0)
1830 A = EXTRACT_LONG(&p[k]);
1831 else
1832 #endif
1833 A = ntohl(*(int *)(p + k));
1834 continue;
1835 }
1836
1837 k -= BPF_DLBASE;
1838 if ((unsigned int)k + sizeof(int) <= NET_HDW_HDR_MAX) {
1839 #ifdef BPF_ALIGN
1840 if (((int)(header + k) & 3) != 0)
1841 A = EXTRACT_LONG(&header[k]);
1842 else
1843 #endif
1844 A = ntohl(*(int *)(header + k));
1845 continue;
1846 } else {
1847 return 0;
1848 }
1849
1850 case BPF_LD|BPF_H|BPF_ABS:
1851 k = pc->k;
1852 if ((unsigned int)k + sizeof(short) <= buflen) {
1853 A = EXTRACT_SHORT(&p[k]);
1854 continue;
1855 }
1856
1857 k -= BPF_DLBASE;
1858 if ((unsigned int)k + sizeof(short)
1859 <= NET_HDW_HDR_MAX)
1860 {
1861 A = EXTRACT_SHORT(&header[k]);
1862 continue;
1863 } else {
1864 return 0;
1865 }
1866
1867 case BPF_LD|BPF_B|BPF_ABS:
1868 k = pc->k;
1869 if ((unsigned int)k < buflen) {
1870 A = p[k];
1871 continue;
1872 }
1873
1874 k -= BPF_DLBASE;
1875 if ((unsigned int)k < NET_HDW_HDR_MAX) {
1876 A = header[k];
1877 continue;
1878 } else {
1879 return 0;
1880 }
1881
1882 case BPF_LD|BPF_W|BPF_LEN:
1883 A = wirelen;
1884 continue;
1885
1886 case BPF_LDX|BPF_W|BPF_LEN:
1887 X = wirelen;
1888 continue;
1889
1890 case BPF_LD|BPF_W|BPF_IND:
1891 k = X + pc->k;
1892 if (k + sizeof(int) > buflen)
1893 return 0;
1894 #ifdef BPF_ALIGN
1895 if (((int)(p + k) & 3) != 0)
1896 A = EXTRACT_LONG(&p[k]);
1897 else
1898 #endif
1899 A = ntohl(*(int *)(p + k));
1900 continue;
1901
1902 case BPF_LD|BPF_H|BPF_IND:
1903 k = X + pc->k;
1904 if (k + sizeof(short) > buflen)
1905 return 0;
1906 A = EXTRACT_SHORT(&p[k]);
1907 continue;
1908
1909 case BPF_LD|BPF_B|BPF_IND:
1910 k = X + pc->k;
1911 if (k >= buflen)
1912 return 0;
1913 A = p[k];
1914 continue;
1915
1916 case BPF_LDX|BPF_MSH|BPF_B:
1917 k = pc->k;
1918 if (k >= buflen)
1919 return 0;
1920 X = (p[pc->k] & 0xf) << 2;
1921 continue;
1922
1923 case BPF_LD|BPF_IMM:
1924 A = pc->k;
1925 continue;
1926
1927 case BPF_LDX|BPF_IMM:
1928 X = pc->k;
1929 continue;
1930
1931 case BPF_LD|BPF_MEM:
1932 A = mem[pc->k];
1933 continue;
1934
1935 case BPF_LDX|BPF_MEM:
1936 X = mem[pc->k];
1937 continue;
1938
1939 case BPF_ST:
1940 mem[pc->k] = A;
1941 continue;
1942
1943 case BPF_STX:
1944 mem[pc->k] = X;
1945 continue;
1946
1947 case BPF_JMP|BPF_JA:
1948 pc += pc->k;
1949 continue;
1950
1951 case BPF_JMP|BPF_JGT|BPF_K:
1952 pc += (A > pc->k) ? pc->jt : pc->jf;
1953 continue;
1954
1955 case BPF_JMP|BPF_JGE|BPF_K:
1956 pc += (A >= pc->k) ? pc->jt : pc->jf;
1957 continue;
1958
1959 case BPF_JMP|BPF_JEQ|BPF_K:
1960 pc += (A == pc->k) ? pc->jt : pc->jf;
1961 continue;
1962
1963 case BPF_JMP|BPF_JSET|BPF_K:
1964 pc += (A & pc->k) ? pc->jt : pc->jf;
1965 continue;
1966
1967 case BPF_JMP|BPF_JGT|BPF_X:
1968 pc += (A > X) ? pc->jt : pc->jf;
1969 continue;
1970
1971 case BPF_JMP|BPF_JGE|BPF_X:
1972 pc += (A >= X) ? pc->jt : pc->jf;
1973 continue;
1974
1975 case BPF_JMP|BPF_JEQ|BPF_X:
1976 pc += (A == X) ? pc->jt : pc->jf;
1977 continue;
1978
1979 case BPF_JMP|BPF_JSET|BPF_X:
1980 pc += (A & X) ? pc->jt : pc->jf;
1981 continue;
1982
1983 case BPF_ALU|BPF_ADD|BPF_X:
1984 A += X;
1985 continue;
1986
1987 case BPF_ALU|BPF_SUB|BPF_X:
1988 A -= X;
1989 continue;
1990
1991 case BPF_ALU|BPF_MUL|BPF_X:
1992 A *= X;
1993 continue;
1994
1995 case BPF_ALU|BPF_DIV|BPF_X:
1996 if (X == 0)
1997 return 0;
1998 A /= X;
1999 continue;
2000
2001 case BPF_ALU|BPF_AND|BPF_X:
2002 A &= X;
2003 continue;
2004
2005 case BPF_ALU|BPF_OR|BPF_X:
2006 A |= X;
2007 continue;
2008
2009 case BPF_ALU|BPF_LSH|BPF_X:
2010 A <<= X;
2011 continue;
2012
2013 case BPF_ALU|BPF_RSH|BPF_X:
2014 A >>= X;
2015 continue;
2016
2017 case BPF_ALU|BPF_ADD|BPF_K:
2018 A += pc->k;
2019 continue;
2020
2021 case BPF_ALU|BPF_SUB|BPF_K:
2022 A -= pc->k;
2023 continue;
2024
2025 case BPF_ALU|BPF_MUL|BPF_K:
2026 A *= pc->k;
2027 continue;
2028
2029 case BPF_ALU|BPF_DIV|BPF_K:
2030 A /= pc->k;
2031 continue;
2032
2033 case BPF_ALU|BPF_AND|BPF_K:
2034 A &= pc->k;
2035 continue;
2036
2037 case BPF_ALU|BPF_OR|BPF_K:
2038 A |= pc->k;
2039 continue;
2040
2041 case BPF_ALU|BPF_LSH|BPF_K:
2042 A <<= pc->k;
2043 continue;
2044
2045 case BPF_ALU|BPF_RSH|BPF_K:
2046 A >>= pc->k;
2047 continue;
2048
2049 case BPF_ALU|BPF_NEG:
2050 A = -A;
2051 continue;
2052
2053 case BPF_MISC|BPF_TAX:
2054 X = A;
2055 continue;
2056
2057 case BPF_MISC|BPF_TXA:
2058 A = X;
2059 continue;
2060 }
2061 }
2062
2063 return 0;
2064 }
2065
2066 /*
2067 * Return 1 if the 'f' is a valid filter program without a MATCH
2068 * instruction. Return 2 if it is a valid filter program with a MATCH
2069 * instruction. Otherwise, return 0.
2070 * The constraints are that each jump be forward and to a valid
2071 * code. The code must terminate with either an accept or reject.
2072 * 'valid' is an array for use by the routine (it must be at least
2073 * 'len' bytes long).
2074 *
2075 * The kernel needs to be able to verify an application's filter code.
2076 * Otherwise, a bogus program could easily crash the system.
2077 */
2078 int
2079 bpf_validate(
2080 bpf_insn_t f,
2081 int bytes,
2082 bpf_insn_t *match)
2083 {
2084 register int i, j, len;
2085 register bpf_insn_t p;
2086
2087 len = BPF_BYTES2LEN(bytes);
2088 /* f[0].code is already checked to be BPF_BEGIN. So skip f[0]. */
2089
2090 for (i = 1; i < len; ++i) {
2091 /*
2092 * Check that that jumps are forward, and within
2093 * the code block.
2094 */
2095 p = &f[i];
2096 if (BPF_CLASS(p->code) == BPF_JMP) {
2097 register int from = i + 1;
2098
2099 if (BPF_OP(p->code) == BPF_JA) {
2100 if (from + p->k >= len)
2101 return 0;
2102 }
2103 else if (from + p->jt >= len || from + p->jf >= len)
2104 return 0;
2105 }
2106 /*
2107 * Check that memory operations use valid addresses.
2108 */
2109 if ((BPF_CLASS(p->code) == BPF_ST ||
2110 (BPF_CLASS(p->code) == BPF_LD &&
2111 (p->code & 0xe0) == BPF_MEM)) &&
2112 (p->k >= BPF_MEMWORDS || p->k < 0))
2113 return 0;
2114 /*
2115 * Check for constant division by 0.
2116 */
2117 if (p->code == (BPF_ALU|BPF_DIV|BPF_K) && p->k == 0)
2118 return 0;
2119 /*
2120 * Check for match instruction.
2121 * Only one match instruction per filter is allowed.
2122 */
2123 if (p->code == (BPF_RET|BPF_MATCH_IMM)) {
2124 if (*match != 0 ||
2125 p->jt == 0 ||
2126 p->jt > N_NET_HASH_KEYS)
2127 return 0;
2128 i += p->jt; /* skip keys */
2129 if (i + 1 > len)
2130 return 0;
2131
2132 for (j = 1; j <= p->jt; j++) {
2133 if (p[j].code != (BPF_MISC|BPF_KEY))
2134 return 0;
2135 }
2136
2137 *match = p;
2138 }
2139 }
2140 if (BPF_CLASS(f[len - 1].code) == BPF_RET)
2141 return (*match == 0) ? 1 : 2;
2142 else
2143 return 0;
2144 }
2145
2146 boolean_t
2147 bpf_eq (
2148 register bpf_insn_t f1,
2149 register bpf_insn_t f2,
2150 register int bytes)
2151 {
2152 register int count;
2153
2154 count = BPF_BYTES2LEN(bytes);
2155 for (; count--; f1++, f2++) {
2156 if (!BPF_INSN_EQ(f1, f2)) {
2157 if ( f1->code == (BPF_MISC|BPF_KEY) &&
2158 f2->code == (BPF_MISC|BPF_KEY) )
2159 continue;
2160 return FALSE;
2161 }
2162 };
2163 return TRUE;
2164 }
2165
2166 unsigned int
2167 bpf_hash (
2168 register int n,
2169 register unsigned int *keys)
2170 {
2171 register unsigned int hval = 0;
2172
2173 while (n--) {
2174 hval += *keys++;
2175 }
2176 return hval % NET_HASH_SIZE;
2177 }
2178
2179
2180 boolean_t
2181 bpf_match (
2182 net_hash_header_t hash,
2183 register int n_keys,
2184 register unsigned int *keys,
2185 net_hash_entry_t **hash_headpp,
2186 net_hash_entry_t *entpp)
2187 {
2188 register net_hash_entry_t head, entp;
2189 register int i;
2190
2191 if (n_keys != hash->n_keys)
2192 return FALSE;
2193
2194 *hash_headpp = &hash->table[bpf_hash(n_keys, keys)];
2195 head = **hash_headpp;
2196
2197 if (head == 0)
2198 return FALSE;
2199
2200 HASH_ITERATE (head, entp)
2201 {
2202 for (i = 0; i < n_keys; i++) {
2203 if (keys[i] != entp->keys[i])
2204 break;
2205 }
2206 if (i == n_keys) {
2207 *entpp = entp;
2208 return TRUE;
2209 }
2210 }
2211 HASH_ITERATE_END (head, entp)
2212 return FALSE;
2213 }
2214
2215
2216 /*
2217 * Removes a hash entry (ENTP) from its queue (HEAD).
2218 * If the reference count of filter (HP) becomes zero and not USED,
2219 * HP is removed from ifp->if_rcv_port_list and is freed.
2220 */
2221
2222 boolean_t
2223 hash_ent_remove (
2224 struct ifnet *ifp,
2225 net_hash_header_t hp,
2226 int used,
2227 net_hash_entry_t *head,
2228 net_hash_entry_t entp,
2229 queue_entry_t *dead_p)
2230 {
2231 hp->ref_count--;
2232
2233 if (*head == entp) {
2234
2235 if (queue_empty((queue_t) entp)) {
2236 *head = 0;
2237 ENQUEUE_DEAD(*dead_p, entp);
2238 if (hp->ref_count == 0 && !used) {
2239 remqueue((queue_t) &ifp->if_rcv_port_list,
2240 (queue_entry_t)hp);
2241 hp->n_keys = 0;
2242 return TRUE;
2243 }
2244 return FALSE;
2245 } else {
2246 *head = (net_hash_entry_t)queue_next((queue_t) entp);
2247 }
2248 }
2249
2250 remqueue((queue_t)*head, (queue_entry_t)entp);
2251 ENQUEUE_DEAD(*dead_p, entp);
2252 return FALSE;
2253 }
2254
2255 int
2256 net_add_q_info (
2257 ipc_port_t rcv_port)
2258 {
2259 mach_port_msgcount_t qlimit = 0;
2260
2261 /*
2262 * We use a new port, so increase net_queue_free_min
2263 * and net_kmsg_max to allow for more queued messages.
2264 */
2265
2266 if (IP_VALID(rcv_port)) {
2267 ip_lock(rcv_port);
2268 if (ip_active(rcv_port))
2269 qlimit = rcv_port->ip_qlimit;
2270 ip_unlock(rcv_port);
2271 }
2272
2273 simple_lock(&net_kmsg_total_lock);
2274 net_queue_free_min++;
2275 net_kmsg_max += qlimit + 1;
2276 simple_unlock(&net_kmsg_total_lock);
2277
2278 return (int)qlimit;
2279 }
2280
2281 void
2282 net_del_q_info (
2283 int qlimit)
2284 {
2285 simple_lock(&net_kmsg_total_lock);
2286 net_queue_free_min--;
2287 net_kmsg_max -= qlimit + 1;
2288 simple_unlock(&net_kmsg_total_lock);
2289 }
2290
2291
2292 /*
2293 * net_free_dead_infp (dead_infp)
2294 * queue_entry_t dead_infp; list of dead net_rcv_port_t.
2295 *
2296 * Deallocates dead net_rcv_port_t.
2297 * No locks should be held when called.
2298 */
2299 void
2300 net_free_dead_infp (
2301 queue_entry_t dead_infp)
2302 {
2303 register net_rcv_port_t infp, nextfp;
2304
2305 for (infp = (net_rcv_port_t) dead_infp; infp != 0; infp = nextfp)
2306 {
2307 nextfp = (net_rcv_port_t) queue_next(&infp->chain);
2308 ipc_port_release_send(infp->rcv_port);
2309 net_del_q_info(infp->rcv_qlimit);
2310 zfree(net_rcv_zone, (vm_offset_t) infp);
2311 }
2312 }
2313
2314 /*
2315 * net_free_dead_entp (dead_entp)
2316 * queue_entry_t dead_entp; list of dead net_hash_entry_t.
2317 *
2318 * Deallocates dead net_hash_entry_t.
2319 * No locks should be held when called.
2320 */
2321 void
2322 net_free_dead_entp (
2323 queue_entry_t dead_entp)
2324 {
2325 register net_hash_entry_t entp, nextentp;
2326
2327 for (entp = (net_hash_entry_t)dead_entp; entp != 0; entp = nextentp)
2328 {
2329 nextentp = (net_hash_entry_t) queue_next(&entp->chain);
2330
2331 ipc_port_release_send(entp->rcv_port);
2332 net_del_q_info(entp->rcv_qlimit);
2333 zfree(net_hash_entry_zone, (vm_offset_t) entp);
2334 }
2335 }
2336
Cache object: 099ddd927ec1fb917416ceb77af84c9e
|