net_io.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /* 
    2  * Mach Operating System
    3  * Copyright (c) 1993-1989 Carnegie Mellon University
    4  * All Rights Reserved.
    5  * 
    6  * Permission to use, copy, modify and distribute this software and its
    7  * documentation is hereby granted, provided that both the copyright
    8  * notice and this permission notice appear in all copies of the
    9  * software, derivative works or modified versions, and any portions
   10  * thereof, and that both notices appear in supporting documentation.
   11  * 
   12  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   13  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
   14  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   15  * 
   16  * Carnegie Mellon requests users of this software to return to
   17  * 
   18  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   19  *  School of Computer Science
   20  *  Carnegie Mellon University
   21  *  Pittsburgh PA 15213-3890
   22  * 
   23  * any improvements or extensions that they make and grant Carnegie Mellon
   24  * the rights to redistribute these changes.
   25  */
   26 /*
   27  * HISTORY
   28  * $Log:        net_io.c,v $
   29  * Revision 2.29  93/08/10  15:10:56  mrt
   30  *      Incorporated BPF+MATCH support from Masanobu Yuhara:
   31  *              Changed: garbage collection of dead filters for BPF.
   32  *              Added: BPF_MATCH_IMM support.
   33  *              Added: BPF support. Derived from tcpdump-2.2.1/bpf/net/bpf.h.
   34  *              [93/04/14  16:51:13  yuhara]
   35  * 
   36  * Revision 2.28  93/05/15  18:53:22  mrt
   37  *      machparam.h -> machspl.h
   38  * 
   39  * Revision 2.27  93/05/10  23:23:27  rvb
   40  *      Checkin for MK80 branch.
   41  *      [93/05/10  15:16:19  grm]
   42  * 
   43  * Revision 2.25.1.1  93/03/01  15:19:48  grm
   44  *      Added TTD teledebug code to check for debugging packets.
   45  *      [93/03/01            grm]
   46  * 
   47  * Revision 2.26  93/05/10  17:46:12  rvb
   48  *      Added test to check that buffer is large enough to hold data
   49  *      returned by net_getstat.
   50  *      [93/04/20            kivinen]
   51  * 
   52  * Revision 2.25  93/01/14  17:27:08  danner
   53  *      64bit cleanup.
   54  *      [92/11/30            af]
   55  * 
   56  * Revision 2.24  92/08/03  17:33:48  jfriedl
   57  *      removed silly prototypes
   58  *      [92/08/02            jfriedl]
   59  * 
   60  * Revision 2.23  92/05/21  17:09:38  jfriedl
   61  *      Cleanup to quiet gcc warnings.
   62  *      [92/05/16            jfriedl]
   63  * 
   64  * Revision 2.22  92/03/10  16:25:28  jsb
   65  *      Changed parameters to netipc_net_packet.
   66  *      [92/03/09  12:57:30  jsb]
   67  * 
   68  * Revision 2.21  92/01/03  20:03:57  dbg
   69  *      Add: NETF_PUSHHDR, NETF_PUSHSTK, NETF_PUSHIND, NETF_PUSHHDRIND.
   70  *      [91/12/23            dbg]
   71  * 
   72  * Revision 2.20  91/08/28  11:11:28  jsb
   73  *      Panic if network write attempted with continuation.
   74  *      [91/08/12  17:29:53  dlb]
   75  * 
   76  * Revision 2.19  91/08/24  11:55:55  af
   77  *      Missing include for Spls definitions.
   78  *      [91/08/02  02:45:16  af]
   79  * 
   80  * Revision 2.18  91/08/03  18:17:43  jsb
   81  *      Added NORMA_ETHER support.
   82  *      [91/07/24  22:54:41  jsb]
   83  * 
   84  * Revision 2.17  91/05/14  15:59:34  mrt
   85  *      Correcting copyright
   86  * 
   87  * Revision 2.16  91/05/10  11:48:47  dbg
   88  *      Don't forget to copy the packet size when duplicating a packet
   89  *      for multiple filters in net_filter().
   90  *      [91/05/09            dpj]
   91  * 
   92  * Revision 2.15  91/03/16  14:43:14  rpd
   93  *      Added net_thread, net_thread_continue.
   94  *      [91/02/13            rpd]
   95  *      Split net_rcv_msg_queue into high and low priority queues.
   96  *      Cap the total number of buffers allocated.
   97  *      [91/01/14            rpd]
   98  * 
   99  *      Added net_rcv_msg_queue_size, net_rcv_msg_queue_max.
  100  *      [91/01/12            rpd]
  101  * 
  102  * Revision 2.14  91/02/14  14:37:07  mrt
  103  *      Added garbage collection of dead filters.
  104  *      [91/02/12  12:11:10  af]
  105  * 
  106  * Revision 2.13  91/02/05  17:09:54  mrt
  107  *      Changed to new Mach copyright
  108  *      [91/01/31  17:30:04  mrt]
  109  * 
  110  * Revision 2.12  91/01/08  15:09:48  rpd
  111  *      Replaced NET_KMSG_GET, NET_KMSG_FREE
  112  *      with net_kmsg_get, net_kmsg_put, net_kmsg_collect.
  113  *      Increased net_kmsg_ilist_min to 4.
  114  *      [91/01/05            rpd]
  115  *      Fixed net_rcv_msg_thread to round message sizes up to an int multiple.
  116  *      [90/12/07            rpd]
  117  * 
  118  *      Fixed net_rcv_msg_thread to not set vm_privilege.
  119  *      [90/11/29            rpd]
  120  * 
  121  * Revision 2.11  90/09/09  23:20:00  rpd
  122  *      Zero the mapped_size stats for non mappable interfaces.
  123  *      [90/08/30  17:41:00  af]
  124  * 
  125  * Revision 2.10  90/08/27  21:55:18  dbg
  126  *      If multiple filters receive a packet, copy the header as well as
  127  *      the body.  Fix from Dan Julin.
  128  *      [90/08/27            dbg]
  129  * 
  130  *      Fix filter check to account for literal word.
  131  *      [90/07/17            dbg]
  132  * 
  133  * Revision 2.9  90/08/06  15:06:57  rwd
  134  *      Fixed a bug in parse_net_filter(), that was reading the
  135  *      litteral from NETF_PUSHLIT as an instruction.
  136  *      [90/07/18  21:56:20  dpj]
  137  * 
  138  * Revision 2.8  90/06/02  14:48:14  rpd
  139  *      Converted to new IPC.
  140  *      [90/03/26  21:57:43  rpd]
  141  * 
  142  * Revision 2.7  90/02/22  20:02:21  dbg
  143  *      Track changes to kmsg structure.
  144  *      [90/01/31            dbg]
  145  * 
  146  * Revision 2.6  90/01/11  11:42:20  dbg
  147  *      Make run in parallel.
  148  *      [89/12/15            dbg]
  149  * 
  150  * Revision 2.5  89/12/08  19:52:22  rwd
  151  *      Picked up changes from rfr to minimize wired down memory
  152  *      [89/11/21            rwd]
  153  * 
  154  * Revision 2.4  89/09/08  11:24:35  dbg
  155  *      Convert to run in kernel task.  Removed some lint.
  156  *      [89/07/26            dbg]
  157  * 
  158  * Revision 2.3  89/08/11  17:55:18  rwd
  159  *      Picked up change from rfr which made zone collectable and
  160  *      decreased min net_kmesg to 2.
  161  *      [89/08/10            rwd]
  162  * 
  163  * Revision 2.2  89/08/05  16:06:58  rwd
  164  *      Changed device_map to device_task_map
  165  *      [89/08/04            rwd]
  166  * 
  167  * 13-Mar-89  David Golub (dbg) at Carnegie-Mellon University
  168  *      Created.  
  169  *
  170  */
  171 /*
  172  *      Author: David B. Golub, Carnegie Mellon University
  173  *      Date:   3/98
  174  *
  175  *      Network IO.
  176  *
  177  *      Packet filter code taken from vaxif/enet.c written               
  178  *              CMU and Stanford. 
  179  */
  180 
  181 /*
  182  *      Note:  don't depend on anything in this file.
  183  *      It may change a lot real soon.  -cmaeda 11 June 1993
  184  */
  185 
  186 #include <norma_ether.h>
  187 #include <mach_ttd.h>
  188 
  189 #include <sys/types.h>
  190 #include <device/net_status.h>
  191 #include <machine/machspl.h>            /* spl definitions */
  192 #include <device/net_io.h>
  193 #include <device/if_hdr.h>
  194 #include <device/io_req.h>
  195 #include <device/ds_routines.h>
  196 
  197 #include <mach/boolean.h>
  198 #include <mach/vm_param.h>
  199 
  200 #include <ipc/ipc_port.h>
  201 #include <ipc/ipc_kmsg.h>
  202 #include <ipc/ipc_mqueue.h>
  203 
  204 #include <kern/counters.h>
  205 #include <kern/lock.h>
  206 #include <kern/queue.h>
  207 #include <kern/sched_prim.h>
  208 #include <kern/thread.h>
  209 
  210 #if     NORMA_ETHER
  211 #include <norma/ipc_ether.h>
  212 #endif  /*NORMA_ETHER*/
  213 
  214 #include <machine/machspl.h>
  215 
  216 #if     MACH_TTD
  217 #include <ttd/ttd_stub.h>
  218 #endif  /* MACH_TTD */
  219 
  220 #if     MACH_TTD
  221 int kttd_async_counter= 0;
  222 #endif  /* MACH_TTD */
  223 
  224 
  225 /*
  226  *      Packet Buffer Management
  227  *
  228  *      This module manages a private pool of kmsg buffers.
  229  */
  230 
  231 /*
  232  * List of net kmsgs queued to be sent to users.
  233  * Messages can be high priority or low priority.
  234  * The network thread processes high priority messages first.
  235  */
  236 decl_simple_lock_data(,net_queue_lock)
  237 boolean_t       net_thread_awake = FALSE;
  238 struct ipc_kmsg_queue   net_queue_high;
  239 int             net_queue_high_size = 0;
  240 int             net_queue_high_max = 0;         /* for debugging */
  241 struct ipc_kmsg_queue   net_queue_low;
  242 int             net_queue_low_size = 0;
  243 int             net_queue_low_max = 0;          /* for debugging */
  244 
  245 /*
  246  * List of net kmsgs that can be touched at interrupt level.
  247  * If it is empty, we will also steal low priority messages.
  248  */
  249 decl_simple_lock_data(,net_queue_free_lock)
  250 struct ipc_kmsg_queue   net_queue_free;
  251 int             net_queue_free_size = 0;        /* on free list */
  252 int             net_queue_free_max = 0;         /* for debugging */
  253 
  254 /*
  255  * This value is critical to network performance.
  256  * At least this many buffers should be sitting in net_queue_free.
  257  * If this is set too small, we will drop network packets.
  258  * Even a low drop rate (<1%) can cause severe network throughput problems.
  259  * We add one to net_queue_free_min for every filter.
  260  */
  261 int             net_queue_free_min = 3;
  262 
  263 int             net_queue_free_hits = 0;        /* for debugging */
  264 int             net_queue_free_steals = 0;      /* for debugging */
  265 int             net_queue_free_misses = 0;      /* for debugging */
  266 
  267 int             net_kmsg_send_high_hits = 0;    /* for debugging */
  268 int             net_kmsg_send_low_hits = 0;     /* for debugging */
  269 int             net_kmsg_send_high_misses = 0;  /* for debugging */
  270 int             net_kmsg_send_low_misses = 0;   /* for debugging */
  271 
  272 int             net_thread_awaken = 0;          /* for debugging */
  273 int             net_ast_taken = 0;              /* for debugging */
  274 
  275 decl_simple_lock_data(,net_kmsg_total_lock)
  276 int             net_kmsg_total = 0;             /* total allocated */
  277 int             net_kmsg_max;                   /* initialized below */
  278 
  279 vm_size_t       net_kmsg_size;                  /* initialized below */
  280 
  281 /*
  282  *      We want more buffers when there aren't enough in the free queue
  283  *      and the low priority queue.  However, we don't want to allocate
  284  *      more than net_kmsg_max.
  285  */
  286 
  287 #define net_kmsg_want_more()            \
  288         (((net_queue_free_size + net_queue_low_size) < net_queue_free_min) && \
  289          (net_kmsg_total < net_kmsg_max))
  290 
  291 ipc_kmsg_t
  292 net_kmsg_get(void)
  293 {
  294         register ipc_kmsg_t kmsg;
  295         spl_t s;
  296 
  297         /*
  298          *      First check the list of free buffers.
  299          */
  300         s = splimp();
  301         simple_lock(&net_queue_free_lock);
  302         kmsg = ipc_kmsg_queue_first(&net_queue_free);
  303         if (kmsg != IKM_NULL) {
  304             ipc_kmsg_rmqueue_first_macro(&net_queue_free, kmsg);
  305             net_queue_free_size--;
  306             net_queue_free_hits++;
  307         }
  308         simple_unlock(&net_queue_free_lock);
  309 
  310         if (kmsg == IKM_NULL) {
  311             /*
  312              *  Try to steal from the low priority queue.
  313              */
  314             simple_lock(&net_queue_lock);
  315             kmsg = ipc_kmsg_queue_first(&net_queue_low);
  316             if (kmsg != IKM_NULL) {
  317                 ipc_kmsg_rmqueue_first_macro(&net_queue_low, kmsg);
  318                 net_queue_low_size--;
  319                 net_queue_free_steals++;
  320             }
  321             simple_unlock(&net_queue_lock);
  322         }
  323 
  324         if (kmsg == IKM_NULL)
  325             net_queue_free_misses++;
  326         (void) splx(s);
  327 
  328         if (net_kmsg_want_more() || (kmsg == IKM_NULL)) {
  329             boolean_t awake;
  330 
  331             s = splimp();
  332             simple_lock(&net_queue_lock);
  333             awake = net_thread_awake;
  334             net_thread_awake = TRUE;
  335             simple_unlock(&net_queue_lock);
  336             (void) splx(s);
  337 
  338             if (!awake)
  339                 thread_wakeup((event_t) &net_thread_awake);
  340         }
  341 
  342         return kmsg;
  343 }
  344 
  345 void
  346 net_kmsg_put(register ipc_kmsg_t kmsg)
  347 {
  348         spl_t s;
  349 
  350         s = splimp();
  351         simple_lock(&net_queue_free_lock);
  352         ipc_kmsg_enqueue_macro(&net_queue_free, kmsg);
  353         if (++net_queue_free_size > net_queue_free_max)
  354             net_queue_free_max = net_queue_free_size;
  355         simple_unlock(&net_queue_free_lock);
  356         (void) splx(s);
  357 }
  358 
  359 void
  360 net_kmsg_collect(void)
  361 {
  362         register ipc_kmsg_t kmsg;
  363         spl_t s;
  364 
  365         s = splimp();
  366         simple_lock(&net_queue_free_lock);
  367         while (net_queue_free_size > net_queue_free_min) {
  368             kmsg = ipc_kmsg_dequeue(&net_queue_free);
  369             net_queue_free_size--;
  370             simple_unlock(&net_queue_free_lock);
  371             (void) splx(s);
  372 
  373             net_kmsg_free(kmsg);
  374             simple_lock(&net_kmsg_total_lock);
  375             net_kmsg_total--;
  376             simple_unlock(&net_kmsg_total_lock);
  377 
  378             s = splimp();
  379             simple_lock(&net_queue_free_lock);
  380         }
  381         simple_unlock(&net_queue_free_lock);
  382         (void) splx(s);
  383 }
  384 
  385 void
  386 net_kmsg_more(void)
  387 {
  388         register ipc_kmsg_t kmsg;
  389 
  390         /*
  391          * Replenish net kmsg pool if low.  We don't have the locks
  392          * necessary to look at these variables, but that's OK because
  393          * misread values aren't critical.  The danger in this code is
  394          * that while we allocate buffers, interrupts are happening
  395          * which take buffers out of the free list.  If we are not
  396          * careful, we will sit in the loop and allocate a zillion
  397          * buffers while a burst of packets arrives.  So we count
  398          * buffers in the low priority queue as available, because
  399          * net_kmsg_get will make use of them, and we cap the total
  400          * number of buffers we are willing to allocate.
  401          */
  402 
  403         while (net_kmsg_want_more()) {
  404             simple_lock(&net_kmsg_total_lock);
  405             net_kmsg_total++;
  406             simple_unlock(&net_kmsg_total_lock);
  407             kmsg = net_kmsg_alloc();
  408             net_kmsg_put(kmsg);
  409         }
  410 }
  411 
  412 /*
  413  *      Packet Filter Data Structures
  414  *
  415  *      Each network interface has a set of packet filters
  416  *      that are run on incoming packets.
  417  *
  418  *      Each packet filter may represent a single network
  419  *      session or multiple network sessions.  For example,
  420  *      all application level TCP sessions would be represented
  421  *      by a single packet filter data structure.
  422  *      
  423  *      If a packet filter has a single session, we use a
  424  *      struct net_rcv_port to represent it.  If the packet
  425  *      filter represents multiple sessions, we use a 
  426  *      struct net_hash_header to represent it.
  427  */
  428 
  429 /*
  430  * Each interface has a write port and a set of read ports.
  431  * Each read port has one or more filters to determine what packets
  432  * should go to that port.
  433  */
  434 
  435 /*
  436  * Receive port for net, with packet filter.
  437  * This data structure by itself represents a packet
  438  * filter for a single session.
  439  */
  440 struct net_rcv_port {
  441         queue_chain_t   chain;          /* list of open_descriptors */
  442         ipc_port_t      rcv_port;       /* port to send packet to */
  443         int             rcv_qlimit;     /* port's qlimit */
  444         int             rcv_count;      /* number of packets received */
  445         int             priority;       /* priority for filter */
  446         filter_t        *filter_end;    /* pointer to end of filter */
  447         filter_t        filter[NET_MAX_FILTER];
  448                                         /* filter operations */
  449 };
  450 typedef struct net_rcv_port *net_rcv_port_t;
  451 
  452 zone_t          net_rcv_zone;   /* zone of net_rcv_port structs */
  453 
  454 
  455 #define NET_HASH_SIZE   256
  456 #define N_NET_HASH      4
  457 #define N_NET_HASH_KEYS 4
  458 
  459 unsigned int bpf_hash (int, unsigned int *);
  460 
  461 /*
  462  * A single hash entry.
  463  */
  464 struct net_hash_entry {
  465         queue_chain_t   chain;          /* list of entries with same hval */
  466 #define he_next chain.next
  467 #define he_prev chain.prev
  468         ipc_port_t      rcv_port;       /* destination port */
  469         int             rcv_qlimit;     /* qlimit for the port */
  470         unsigned int    keys[N_NET_HASH_KEYS];
  471 };
  472 typedef struct net_hash_entry *net_hash_entry_t;
  473 
  474 zone_t  net_hash_entry_zone;
  475 
  476 /*
  477  * This structure represents a packet filter with multiple sessions.
  478  *
  479  * For example, all application level TCP sessions might be
  480  * represented by one of these structures.  It looks like a 
  481  * net_rcv_port struct so that both types can live on the
  482  * same packet filter queues.
  483  */
  484 struct net_hash_header {
  485         struct net_rcv_port rcv;
  486         int n_keys;                     /* zero if not used */
  487         int ref_count;                  /* reference count */
  488         net_hash_entry_t table[NET_HASH_SIZE];
  489 } filter_hash_header[N_NET_HASH];
  490 
  491 typedef struct net_hash_header *net_hash_header_t;
  492 
  493 decl_simple_lock_data(,net_hash_header_lock)
  494 
  495 #define HASH_ITERATE(head, elt) (elt) = (net_hash_entry_t) (head); do {
  496 #define HASH_ITERATE_END(head, elt) \
  497         (elt) = (net_hash_entry_t) queue_next((queue_entry_t) (elt));      \
  498         } while ((elt) != (head));
  499 
  500 
  501 #define FILTER_ITERATE(ifp, fp, nextfp) \
  502         for ((fp) = (net_rcv_port_t) queue_first(&(ifp)->if_rcv_port_list);\
  503              !queue_end(&(ifp)->if_rcv_port_list, (queue_entry_t)(fp));    \
  504              (fp) = (nextfp)) {                                            \
  505                 (nextfp) = (net_rcv_port_t) queue_next(&(fp)->chain);
  506 #define FILTER_ITERATE_END }
  507 
  508 /* entry_p must be net_rcv_port_t or net_hash_entry_t */
  509 #define ENQUEUE_DEAD(dead, entry_p) { \
  510         queue_next(&(entry_p)->chain) = (queue_entry_t) (dead); \
  511         (dead) = (queue_entry_t)(entry_p);                      \
  512 }
  513 
  514 extern boolean_t net_do_filter();       /* CSPF */
  515 extern int bpf_do_filter();             /* BPF */
  516 
  517 
  518 /*
  519  *      ethernet_priority:
  520  *
  521  *      This function properly belongs in the ethernet interfaces;
  522  *      it should not be called by this module.  (We get packet
  523  *      priorities as an argument to net_filter.)  It is here
  524  *      to avoid massive code duplication.
  525  *
  526  *      Returns TRUE for high-priority packets.
  527  */
  528 
  529 boolean_t ethernet_priority(kmsg)
  530         ipc_kmsg_t kmsg;
  531 {
  532         register unsigned char *addr =
  533                 (unsigned char *) net_kmsg(kmsg)->header;
  534 
  535         /*
  536          *      A simplistic check for broadcast packets.
  537          */
  538 
  539         if ((addr[0] == 0xff) && (addr[1] == 0xff) &&
  540             (addr[2] == 0xff) && (addr[3] == 0xff) &&
  541             (addr[4] == 0xff) && (addr[5] == 0xff))
  542             return FALSE;
  543         else
  544             return TRUE;
  545 }
  546 
  547 mach_msg_type_t header_type = {
  548         MACH_MSG_TYPE_BYTE,
  549         8,
  550         NET_HDW_HDR_MAX,
  551         TRUE,
  552         FALSE,
  553         FALSE,
  554         0
  555 };
  556 
  557 mach_msg_type_t packet_type = {
  558         MACH_MSG_TYPE_BYTE,     /* name */
  559         8,                      /* size */
  560         0,                      /* number */
  561         TRUE,                   /* inline */
  562         FALSE,                  /* longform */
  563         FALSE                   /* deallocate */
  564 };
  565 
  566 /*
  567  *      net_deliver:
  568  *
  569  *      Called and returns holding net_queue_lock, at splimp.
  570  *      Dequeues a message and delivers it at spl0.
  571  *      Returns FALSE if no messages.
  572  */
  573 boolean_t net_deliver(nonblocking)
  574         boolean_t nonblocking;
  575 {
  576         register ipc_kmsg_t kmsg;
  577         boolean_t high_priority;
  578         struct ipc_kmsg_queue send_list;
  579 
  580         /*
  581          * Pick up a pending network message and deliver it.
  582          * Deliver high priority messages before low priority.
  583          */
  584 
  585         if ((kmsg = ipc_kmsg_dequeue(&net_queue_high)) != IKM_NULL) {
  586             net_queue_high_size--;
  587             high_priority = TRUE;
  588         } else if ((kmsg = ipc_kmsg_dequeue(&net_queue_low)) != IKM_NULL) {
  589             net_queue_low_size--;
  590             high_priority = FALSE;
  591         } else
  592             return FALSE;
  593         simple_unlock(&net_queue_lock);
  594         (void) spl0();
  595 
  596         /*
  597          * Run the packet through the filters,
  598          * getting back a queue of packets to send.
  599          */
  600         net_filter(kmsg, &send_list);
  601 
  602         if (!nonblocking) {
  603             /*
  604              * There is a danger of running out of available buffers
  605              * because they all get moved into the high priority queue
  606              * or a port queue.  In particular, we might need to
  607              * allocate more buffers as we pull (previously available)
  608              * buffers out of the low priority queue.  But we can only
  609              * allocate if we are allowed to block.
  610              */
  611             net_kmsg_more();
  612         }
  613 
  614         while ((kmsg = ipc_kmsg_dequeue(&send_list)) != IKM_NULL) {
  615             int count;
  616 
  617             /*
  618              * Fill in the rest of the kmsg.
  619              */
  620             count = net_kmsg(kmsg)->net_rcv_msg_packet_count;
  621 
  622             ikm_init_special(kmsg, IKM_SIZE_NETWORK);
  623 
  624             kmsg->ikm_header.msgh_bits =
  625                     MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, 0);
  626             /* remember message sizes must be rounded up */
  627             kmsg->ikm_header.msgh_size =
  628                     ((mach_msg_size_t) (sizeof(struct net_rcv_msg)
  629                                         - NET_RCV_MAX + count))+3 &~ 3;
  630             kmsg->ikm_header.msgh_local_port = MACH_PORT_NULL;
  631             kmsg->ikm_header.msgh_kind = MACH_MSGH_KIND_NORMAL;
  632             kmsg->ikm_header.msgh_id = NET_RCV_MSG_ID;
  633 
  634             net_kmsg(kmsg)->header_type = header_type;
  635             net_kmsg(kmsg)->packet_type = packet_type;
  636             net_kmsg(kmsg)->net_rcv_msg_packet_count = count;
  637 
  638             /*
  639              * Send the packet to the destination port.  Drop it
  640              * if the destination port is over its backlog.
  641              */
  642 
  643             if (ipc_mqueue_send(kmsg, MACH_SEND_TIMEOUT, 0) ==
  644                                                     MACH_MSG_SUCCESS) {
  645                 if (high_priority)
  646                     net_kmsg_send_high_hits++;
  647                 else
  648                     net_kmsg_send_low_hits++;
  649                 /* the receiver is responsible for the message now */
  650             } else {
  651                 if (high_priority)
  652                     net_kmsg_send_high_misses++;
  653                 else
  654                     net_kmsg_send_low_misses++;
  655                 ipc_kmsg_destroy(kmsg);
  656             }
  657         }
  658 
  659         (void) splimp();
  660         simple_lock(&net_queue_lock);
  661         return TRUE;
  662 }
  663 
  664 /*
  665  *      We want to deliver packets using ASTs, so we can avoid the
  666  *      thread_wakeup/thread_block needed to get to the network
  667  *      thread.  However, we can't allocate memory in the AST handler,
  668  *      because memory allocation might block.  Hence we have the
  669  *      network thread to allocate memory.  The network thread also
  670  *      delivers packets, so it can be allocating and delivering for a
  671  *      burst.  net_thread_awake is protected by net_queue_lock
  672  *      (instead of net_queue_free_lock) so that net_packet and
  673  *      net_ast can safely determine if the network thread is running.
  674  *      This prevents a race that might leave a packet sitting without
  675  *      being delivered.  It is possible for net_kmsg_get to think
  676  *      the network thread is awake, and so avoid a wakeup, and then
  677  *      have the network thread sleep without allocating.  The next
  678  *      net_kmsg_get will do a wakeup.
  679  */
  680 
  681 void net_ast()
  682 {
  683         spl_t s;
  684 
  685         net_ast_taken++;
  686 
  687         /*
  688          *      If the network thread is awake, then we would
  689          *      rather deliver messages from it, because
  690          *      it can also allocate memory.
  691          */
  692 
  693         s = splimp();
  694         simple_lock(&net_queue_lock);
  695         while (!net_thread_awake && net_deliver(TRUE))
  696                 continue;
  697 
  698         /*
  699          *      Prevent an unnecessary AST.  Either the network
  700          *      thread will deliver the messages, or there are
  701          *      no messages left to deliver.
  702          */
  703 
  704         simple_unlock(&net_queue_lock);
  705         (void) splsched();
  706         ast_off(cpu_number(), AST_NETWORK);
  707         (void) splx(s);
  708 }
  709 
  710 void net_thread_continue()
  711 {
  712         for (;;) {
  713                 spl_t s;
  714 
  715                 net_thread_awaken++;
  716 
  717                 /*
  718                  *      First get more buffers.
  719                  */
  720                 net_kmsg_more();
  721 
  722                 s = splimp();
  723                 simple_lock(&net_queue_lock);
  724                 while (net_deliver(FALSE))
  725                         continue;
  726 
  727                 net_thread_awake = FALSE;
  728                 assert_wait(&net_thread_awake, FALSE);
  729                 simple_unlock(&net_queue_lock);
  730                 (void) splx(s);
  731                 counter(c_net_thread_block++);
  732                 thread_block(net_thread_continue);
  733         }
  734 }
  735 
  736 void net_thread()
  737 {
  738         spl_t s;
  739 
  740         /*
  741          *      We should be very high priority.
  742          */
  743 
  744         thread_set_own_priority(0);
  745 
  746         /*
  747          *      We sleep initially, so that we don't allocate any buffers
  748          *      unless the network is really in use and they are needed.
  749          */
  750 
  751         s = splimp();
  752         simple_lock(&net_queue_lock);
  753         net_thread_awake = FALSE;
  754         assert_wait(&net_thread_awake, FALSE);
  755         simple_unlock(&net_queue_lock);
  756         (void) splx(s);
  757         counter(c_net_thread_block++);
  758         thread_block(net_thread_continue);
  759         net_thread_continue();
  760         /*NOTREACHED*/
  761 }
  762 
  763 void
  764 reorder_queue(first, last)
  765         register queue_t        first, last;
  766 {
  767         register queue_entry_t  prev, next;
  768 
  769         prev = first->prev;
  770         next = last->next;
  771 
  772         prev->next = last;
  773         next->prev = first;
  774 
  775         last->prev = prev;
  776         last->next = first;
  777 
  778         first->next = next;
  779         first->prev = last;
  780 }
  781 
  782 /*
  783  * Incoming packet.  Header has already been moved to proper place.
  784  * We are already at splimp.
  785  */
  786 void
  787 net_packet(ifp, kmsg, count, priority)
  788         register struct ifnet   *ifp;
  789         register ipc_kmsg_t     kmsg;
  790         unsigned int            count;
  791         boolean_t               priority;
  792 {
  793         boolean_t awake;
  794 
  795 #if     NORMA_ETHER
  796         if (netipc_net_packet(kmsg, count)) {
  797                 return;
  798         }
  799 #endif  NORMA_ETHER
  800 
  801 #if     MACH_TTD
  802         /*
  803          * Do a quick check to see if it is a kernel TTD packet.
  804          *
  805          * Only check if KernelTTD is enabled, ie. the current
  806          * device driver supports TTD, and the bootp succeded.
  807          */
  808         if (kttd_enabled && kttd_handle_async(kmsg)) {
  809                 /* 
  810                  * Packet was a valid ttd packet and
  811                  * doesn't need to be passed up to filter.
  812                  * The ttd code put the used kmsg buffer
  813                  * back onto the free list.
  814                  */
  815                 if (kttd_debug)
  816                         printf("**%x**", kttd_async_counter++);
  817                 return;
  818         }
  819 #endif  /* MACH_TTD */
  820 
  821         kmsg->ikm_header.msgh_remote_port = (mach_port_t) ifp;
  822         net_kmsg(kmsg)->net_rcv_msg_packet_count = count;
  823 
  824         simple_lock(&net_queue_lock);
  825         if (priority) {
  826             ipc_kmsg_enqueue(&net_queue_high, kmsg);
  827             if (++net_queue_high_size > net_queue_high_max)
  828                 net_queue_high_max = net_queue_high_size;
  829         } else {
  830             ipc_kmsg_enqueue(&net_queue_low, kmsg);
  831             if (++net_queue_low_size > net_queue_low_max)
  832                 net_queue_low_max = net_queue_low_size;
  833         }
  834         /*
  835          *      If the network thread is awake, then we don't
  836          *      need to take an AST, because the thread will
  837          *      deliver the packet.
  838          */
  839         awake = net_thread_awake;
  840         simple_unlock(&net_queue_lock);
  841 
  842         if (!awake) {
  843             spl_t s = splsched();
  844             ast_on(cpu_number(), AST_NETWORK);
  845             (void) splx(s);
  846         }
  847 }
  848 
  849 int net_filter_queue_reorder = 0; /* non-zero to enable reordering */
  850 
  851 /*
  852  * Run a packet through the filters, returning a list of messages.
  853  * We are *not* called at interrupt level.
  854  */
  855 void
  856 net_filter(kmsg, send_list)
  857         register ipc_kmsg_t     kmsg;
  858         ipc_kmsg_queue_t        send_list;
  859 {
  860         register struct ifnet   *ifp;
  861         register net_rcv_port_t infp, nextfp;
  862         register ipc_kmsg_t     new_kmsg;
  863 
  864         net_hash_entry_t        entp, *hash_headp;
  865         ipc_port_t              dest;
  866         queue_entry_t           dead_infp = (queue_entry_t) 0;
  867         queue_entry_t           dead_entp = (queue_entry_t) 0;
  868         unsigned int            ret_count;
  869 
  870         int count = net_kmsg(kmsg)->net_rcv_msg_packet_count;
  871         ifp = (struct ifnet *) kmsg->ikm_header.msgh_remote_port;
  872         ipc_kmsg_queue_init(send_list);
  873 
  874         /*
  875          * Unfortunately we can't allocate or deallocate memory
  876          * while holding this lock.  And we can't drop the lock
  877          * while examining the filter list.
  878          */
  879         simple_lock(&ifp->if_rcv_port_list_lock);
  880         FILTER_ITERATE(ifp, infp, nextfp)
  881         {
  882             entp = (net_hash_entry_t) 0;
  883             if (infp->filter[0] == NETF_BPF) {
  884                 ret_count = bpf_do_filter(infp, net_kmsg(kmsg)->packet, count,
  885                                           net_kmsg(kmsg)->header,
  886                                           &hash_headp, &entp);
  887                 if (entp == (net_hash_entry_t) 0)
  888                   dest = infp->rcv_port;
  889                 else
  890                   dest = entp->rcv_port;
  891             } else {
  892                 ret_count = net_do_filter(infp, net_kmsg(kmsg)->packet, count,
  893                                           net_kmsg(kmsg)->header);
  894                 if (ret_count)
  895                     ret_count = count;
  896                 dest = infp->rcv_port;
  897             }               
  898 
  899             if (ret_count) {
  900 
  901                 /*
  902                  * Make a send right for the destination.
  903                  */
  904 
  905                 dest = ipc_port_copy_send(dest);
  906                 if (!IP_VALID(dest)) {
  907                     /*
  908                      * This filter is dead.  We remove it from the
  909                      * filter list and set it aside for deallocation.
  910                      */
  911 
  912                     if (entp == (net_hash_entry_t) 0) {
  913                         queue_remove(&ifp->if_rcv_port_list, infp,
  914                                      net_rcv_port_t, chain);
  915                         ENQUEUE_DEAD(dead_infp, infp);
  916                         continue;
  917                     } else {
  918                         hash_ent_remove (ifp,
  919                                          (net_hash_header_t)infp,
  920                                          FALSE,         /* no longer used */
  921                                          hash_headp,
  922                                          entp,
  923                                          &dead_entp);
  924                         continue;
  925                     }
  926                 }
  927 
  928                 /*
  929                  * Deliver copy of packet to this channel.
  930                  */
  931                 if (ipc_kmsg_queue_empty(send_list)) {
  932                     /*
  933                      * Only receiver, so far
  934                      */
  935                     new_kmsg = kmsg;
  936                 } else {
  937                     /*
  938                      * Other receivers - must allocate message and copy.
  939                      */
  940                     new_kmsg = net_kmsg_get();
  941                     if (new_kmsg == IKM_NULL) {
  942                         ipc_port_release_send(dest);
  943                         break;
  944                     }
  945 
  946                     bcopy(
  947                         net_kmsg(kmsg)->packet,
  948                         net_kmsg(new_kmsg)->packet,
  949                         ret_count);
  950                     bcopy(
  951                         net_kmsg(kmsg)->header,
  952                         net_kmsg(new_kmsg)->header,
  953                         NET_HDW_HDR_MAX);
  954                 }
  955                 net_kmsg(new_kmsg)->net_rcv_msg_packet_count = ret_count;
  956                 new_kmsg->ikm_header.msgh_remote_port = (mach_port_t) dest;
  957                 ipc_kmsg_enqueue(send_list, new_kmsg);
  958 
  959             {
  960                 register net_rcv_port_t prevfp;
  961                 int rcount = ++infp->rcv_count;
  962 
  963                 /*
  964                  * See if ordering of filters is wrong
  965                  */
  966                 if (infp->priority >= NET_HI_PRI) {
  967                     prevfp = (net_rcv_port_t) queue_prev(&infp->chain);
  968                     /*
  969                      * If infp is not the first element on the queue,
  970                      * and the previous element is at equal priority
  971                      * but has a lower count, then promote infp to
  972                      * be in front of prevfp.
  973                      */
  974                     if ((queue_t)prevfp != &ifp->if_rcv_port_list &&
  975                         infp->priority == prevfp->priority) {
  976                         /*
  977                          * Threshold difference to prevent thrashing
  978                          */
  979                         if (net_filter_queue_reorder
  980                             && (100 + prevfp->rcv_count < rcount))
  981                                 reorder_queue(&prevfp->chain, &infp->chain);
  982                     }
  983                     /*
  984                      * High-priority filter -> no more deliveries
  985                      */
  986                     break;
  987                 }
  988             }
  989             }
  990         }
  991         FILTER_ITERATE_END
  992 
  993         simple_unlock(&ifp->if_rcv_port_list_lock);
  994 
  995         /*
  996          * Deallocate dead filters.
  997          */
  998         if (dead_infp != 0)
  999                 net_free_dead_infp(dead_infp);
 1000         if (dead_entp != 0)
 1001                 net_free_dead_entp(dead_entp);
 1002 
 1003         if (ipc_kmsg_queue_empty(send_list)) {
 1004             /* Not sent - recycle */
 1005             net_kmsg_put(kmsg);
 1006         }
 1007 }
 1008 
 1009 boolean_t
 1010 net_do_filter(infp, data, data_count, header)
 1011         net_rcv_port_t  infp;
 1012         char *          data;
 1013         unsigned int    data_count;
 1014         char *          header;
 1015 {
 1016         int             stack[NET_FILTER_STACK_DEPTH+1];
 1017         register int    *sp;
 1018         register filter_t       *fp, *fpe;
 1019         register unsigned int   op, arg;
 1020 
 1021         /*
 1022          * The filter accesses the header and data
 1023          * as unsigned short words.
 1024          */
 1025         data_count /= sizeof(unsigned short);
 1026 
 1027 #define data_word       ((unsigned short *)data)
 1028 #define header_word     ((unsigned short *)header)
 1029 
 1030         sp = &stack[NET_FILTER_STACK_DEPTH];
 1031         fp = &infp->filter[0];
 1032         fpe = infp->filter_end;
 1033 
 1034         *sp = TRUE;
 1035 
 1036         while (fp < fpe) {
 1037             arg = *fp++;
 1038             op = NETF_OP(arg);
 1039             arg = NETF_ARG(arg);
 1040 
 1041             switch (arg) {
 1042                 case NETF_NOPUSH:
 1043                     arg = *sp++;
 1044                     break;
 1045                 case NETF_PUSHZERO:
 1046                     arg = 0;
 1047                     break;
 1048                 case NETF_PUSHLIT:
 1049                     arg = *fp++;
 1050                     break;
 1051                 case NETF_PUSHIND:
 1052                     arg = *sp++;
 1053                     if (arg >= data_count)
 1054                         return FALSE;
 1055                     arg = data_word[arg];
 1056                     break;
 1057                 case NETF_PUSHHDRIND:
 1058                     arg = *sp++;
 1059                     if (arg >= NET_HDW_HDR_MAX/sizeof(unsigned short))
 1060                         return FALSE;
 1061                     arg = header_word[arg];
 1062                     break;
 1063                 default:
 1064                     if (arg >= NETF_PUSHSTK) {
 1065                         arg = sp[arg - NETF_PUSHSTK];
 1066                     }
 1067                     else if (arg >= NETF_PUSHHDR) {
 1068                         arg = header_word[arg - NETF_PUSHHDR];
 1069                     }
 1070                     else {
 1071                         arg -= NETF_PUSHWORD;
 1072                         if (arg >= data_count)
 1073                             return FALSE;
 1074                         arg = data_word[arg];
 1075                     }
 1076                     break;
 1077 
 1078             }
 1079             switch (op) {
 1080                 case NETF_OP(NETF_NOP):
 1081                     *--sp = arg;
 1082                     break;
 1083                 case NETF_OP(NETF_AND):
 1084                     *sp &= arg;
 1085                     break;
 1086                 case NETF_OP(NETF_OR):
 1087                     *sp |= arg;
 1088                     break;
 1089                 case NETF_OP(NETF_XOR):
 1090                     *sp ^= arg;
 1091                     break;
 1092                 case NETF_OP(NETF_EQ):
 1093                     *sp = (*sp == arg);
 1094                     break;
 1095                 case NETF_OP(NETF_NEQ):
 1096                     *sp = (*sp != arg);
 1097                     break;
 1098                 case NETF_OP(NETF_LT):
 1099                     *sp = (*sp < arg);
 1100                     break;
 1101                 case NETF_OP(NETF_LE):
 1102                     *sp = (*sp <= arg);
 1103                     break;
 1104                 case NETF_OP(NETF_GT):
 1105                     *sp = (*sp > arg);
 1106                     break;
 1107                 case NETF_OP(NETF_GE):
 1108                     *sp = (*sp >= arg);
 1109                     break;
 1110                 case NETF_OP(NETF_COR):
 1111                     if (*sp++ == arg)
 1112                         return (TRUE);
 1113                     break;
 1114                 case NETF_OP(NETF_CAND):
 1115                     if (*sp++ != arg)
 1116                         return (FALSE);
 1117                     break;
 1118                 case NETF_OP(NETF_CNOR):
 1119                     if (*sp++ == arg)
 1120                         return (FALSE);
 1121                     break;
 1122                 case NETF_OP(NETF_CNAND):
 1123                     if (*sp++ != arg)
 1124                         return (TRUE);
 1125                     break;
 1126                 case NETF_OP(NETF_LSH):
 1127                     *sp <<= arg;
 1128                     break;
 1129                 case NETF_OP(NETF_RSH):
 1130                     *sp >>= arg;
 1131                     break;
 1132                 case NETF_OP(NETF_ADD):
 1133                     *sp += arg;
 1134                     break;
 1135                 case NETF_OP(NETF_SUB):
 1136                     *sp -= arg;
 1137                     break;
 1138             }
 1139         }
 1140         return ((*sp) ? TRUE : FALSE);
 1141 
 1142 #undef  data_word
 1143 #undef  header_word
 1144 }
 1145 
 1146 /*
 1147  * Check filter for invalid operations or stack over/under-flow.
 1148  */
 1149 boolean_t
 1150 parse_net_filter(filter, count)
 1151         register filter_t       *filter;
 1152         unsigned int            count;
 1153 {
 1154         register int    sp;
 1155         register filter_t       *fpe = &filter[count];
 1156         register filter_t       op, arg;
 1157 
 1158         sp = NET_FILTER_STACK_DEPTH;
 1159 
 1160         for (; filter < fpe; filter++) {
 1161             op = NETF_OP(*filter);
 1162             arg = NETF_ARG(*filter);
 1163 
 1164             switch (arg) {
 1165                 case NETF_NOPUSH:
 1166                     break;
 1167                 case NETF_PUSHZERO:
 1168                     sp--;
 1169                     break;
 1170                 case NETF_PUSHLIT:
 1171                     filter++;
 1172                     if (filter >= fpe)
 1173                         return (FALSE); /* literal value not in filter */
 1174                     sp--;
 1175                     break;
 1176                 case NETF_PUSHIND:
 1177                 case NETF_PUSHHDRIND:
 1178                     break;
 1179                 default:
 1180                     if (arg >= NETF_PUSHSTK) {
 1181                         if (arg - NETF_PUSHSTK + sp > NET_FILTER_STACK_DEPTH)
 1182                             return FALSE;
 1183                     }
 1184                     else if (arg >= NETF_PUSHHDR) {
 1185                         if (arg - NETF_PUSHHDR >=
 1186                                 NET_HDW_HDR_MAX/sizeof(unsigned short))
 1187                             return FALSE;
 1188                     }
 1189                     /* else... cannot check for packet bounds
 1190                                 without packet */
 1191                     sp--;
 1192                     break;
 1193             }
 1194             if (sp < 2) {
 1195                 return (FALSE); /* stack overflow */
 1196             }
 1197             if (op == NETF_OP(NETF_NOP))
 1198                 continue;
 1199 
 1200             /*
 1201              * all non-NOP operators are binary.
 1202              */
 1203             if (sp > NET_MAX_FILTER-2)
 1204                 return (FALSE);
 1205 
 1206             sp++;
 1207             switch (op) {
 1208                 case NETF_OP(NETF_AND):
 1209                 case NETF_OP(NETF_OR):
 1210                 case NETF_OP(NETF_XOR):
 1211                 case NETF_OP(NETF_EQ):
 1212                 case NETF_OP(NETF_NEQ):
 1213                 case NETF_OP(NETF_LT):
 1214                 case NETF_OP(NETF_LE):
 1215                 case NETF_OP(NETF_GT):
 1216                 case NETF_OP(NETF_GE):
 1217                 case NETF_OP(NETF_COR):
 1218                 case NETF_OP(NETF_CAND):
 1219                 case NETF_OP(NETF_CNOR):
 1220                 case NETF_OP(NETF_CNAND):
 1221                 case NETF_OP(NETF_LSH):
 1222                 case NETF_OP(NETF_RSH):
 1223                 case NETF_OP(NETF_ADD):
 1224                 case NETF_OP(NETF_SUB):
 1225                     break;
 1226                 default:
 1227                     return (FALSE);
 1228             }
 1229         }
 1230         return (TRUE);
 1231 }
 1232 
 1233 /*
 1234  * Set a filter for a network interface.
 1235  *
 1236  * We are given a naked send right for the rcv_port.
 1237  * If we are successful, we must consume that right.
 1238  */
 1239 io_return_t
 1240 net_set_filter(ifp, rcv_port, priority, filter, filter_count)
 1241         struct ifnet    *ifp;
 1242         ipc_port_t      rcv_port;
 1243         int             priority;
 1244         filter_t        *filter;
 1245         unsigned int    filter_count;
 1246 {
 1247     int                         filter_bytes;
 1248     bpf_insn_t                  match;
 1249     register net_rcv_port_t     infp, my_infp;
 1250     net_rcv_port_t              nextfp;
 1251     net_hash_header_t           hhp;
 1252     register net_hash_entry_t   entp, hash_entp;
 1253     net_hash_entry_t            *head, nextentp;
 1254     queue_entry_t               dead_infp, dead_entp;
 1255     int                         i;
 1256     int                         ret, is_new_infp;
 1257     io_return_t                 rval;
 1258 
 1259     /*
 1260      * Check the filter syntax.
 1261      */
 1262 
 1263     filter_bytes = CSPF_BYTES(filter_count);
 1264     match = (bpf_insn_t) 0;
 1265 
 1266     if (filter_count > 0 && filter[0] == NETF_BPF) {
 1267         ret = bpf_validate((bpf_insn_t)filter, filter_bytes, &match);
 1268         if (!ret)
 1269             return (D_INVALID_OPERATION);
 1270     } else {
 1271         if (!parse_net_filter(filter, filter_count))
 1272             return (D_INVALID_OPERATION);
 1273     }
 1274 
 1275     rval = D_SUCCESS;                   /* default return value */
 1276     dead_infp = dead_entp = 0;
 1277 
 1278     if (match == (bpf_insn_t) 0) {
 1279         /*
 1280          * If there is no match instruction, we allocate
 1281          * a normal packet filter structure.
 1282          */
 1283         my_infp = (net_rcv_port_t) zalloc(net_rcv_zone);
 1284         my_infp->rcv_port = rcv_port;
 1285         is_new_infp = TRUE;
 1286     } else {
 1287         /*
 1288          * If there is a match instruction, we assume there will
 1289          * multiple session with a common substructure and allocate
 1290          * a hash table to deal with them.
 1291          */
 1292         my_infp = 0;
 1293         hash_entp = (net_hash_entry_t) zalloc(net_hash_entry_zone);
 1294         is_new_infp = FALSE;
 1295     }    
 1296 
 1297     /*
 1298      * Look for an existing filter on the same reply port.
 1299      * Look for filters with dead ports (for GC).
 1300      * Look for a filter with the same code except KEY insns.
 1301      */
 1302     
 1303     simple_lock(&ifp->if_rcv_port_list_lock);
 1304     
 1305     FILTER_ITERATE(ifp, infp, nextfp)
 1306     {
 1307             if (infp->rcv_port == MACH_PORT_NULL) {
 1308                     if (match != 0
 1309                         && infp->priority == priority
 1310                         && my_infp == 0
 1311                         && (infp->filter_end - infp->filter) == filter_count
 1312                         && bpf_eq((bpf_insn_t)infp->filter,
 1313                                   filter, filter_bytes))
 1314                             {
 1315                                     my_infp = infp;
 1316                             }
 1317 
 1318                     for (i = 0; i < NET_HASH_SIZE; i++) {
 1319                             head = &((net_hash_header_t) infp)->table[i];
 1320                             if (*head == 0)
 1321                                     continue;
 1322 
 1323                             /*
 1324                              * Check each hash entry to make sure the
 1325                              * destination port is still valid.  Remove
 1326                              * any invalid entries.
 1327                              */
 1328                             entp = *head;
 1329                             do {
 1330                                     nextentp = (net_hash_entry_t) entp->he_next;
 1331   
 1332                                     /* checked without 
 1333                                        ip_lock(entp->rcv_port) */
 1334                                     if (entp->rcv_port == rcv_port
 1335                                         || !IP_VALID(entp->rcv_port)
 1336                                         || !ip_active(entp->rcv_port)) {
 1337                                 
 1338                                             ret = hash_ent_remove (ifp,
 1339                                                 (net_hash_header_t)infp,
 1340                                                 (my_infp == infp),
 1341                                                 head,
 1342                                                 entp,
 1343                                                 &dead_entp);
 1344                                             if (ret)
 1345                                                     goto hash_loop_end;
 1346                                     }
 1347                         
 1348                                     entp = nextentp;
 1349                             /* While test checks head since hash_ent_remove
 1350                                might modify it.
 1351                                */
 1352                             } while (*head != 0 && entp != *head);
 1353                     }
 1354                 hash_loop_end:
 1355                     ;
 1356                     
 1357             } else if (infp->rcv_port == rcv_port
 1358                        || !IP_VALID(infp->rcv_port)
 1359                        || !ip_active(infp->rcv_port)) {
 1360                     /* Remove the old filter from list */
 1361                     remqueue(&ifp->if_rcv_port_list, (queue_entry_t)infp);
 1362                     ENQUEUE_DEAD(dead_infp, infp);
 1363             }
 1364     }
 1365     FILTER_ITERATE_END
 1366 
 1367     if (my_infp == 0) {
 1368         /* Allocate a dummy infp */
 1369         simple_lock(&net_hash_header_lock);
 1370         for (i = 0; i < N_NET_HASH; i++) {
 1371             if (filter_hash_header[i].n_keys == 0)
 1372                 break;
 1373         }
 1374         if (i == N_NET_HASH) {
 1375             simple_unlock(&net_hash_header_lock);
 1376             simple_unlock(&ifp->if_rcv_port_list_lock);
 1377 
 1378             ipc_port_release_send(rcv_port);
 1379             if (match != 0)
 1380                     zfree (net_hash_entry_zone, (vm_offset_t)hash_entp);
 1381 
 1382             rval = D_NO_MEMORY;
 1383             goto clean_and_return;
 1384         }
 1385 
 1386         hhp = &filter_hash_header[i];
 1387         hhp->n_keys = match->jt;
 1388         simple_unlock(&net_hash_header_lock);
 1389 
 1390         hhp->ref_count = 0;
 1391         for (i = 0; i < NET_HASH_SIZE; i++)
 1392             hhp->table[i] = 0;
 1393 
 1394         my_infp = (net_rcv_port_t)hhp;
 1395         my_infp->rcv_port = MACH_PORT_NULL;     /* indication of dummy */
 1396         is_new_infp = TRUE;
 1397     }
 1398 
 1399     if (is_new_infp) {
 1400         my_infp->priority = priority;
 1401         my_infp->rcv_count = 0;
 1402 
 1403         /* Copy filter program. */
 1404         bcopy ((vm_offset_t)filter, (vm_offset_t)my_infp->filter,
 1405                filter_bytes);
 1406         my_infp->filter_end =
 1407             (filter_t *)((char *)my_infp->filter + filter_bytes);
 1408 
 1409         if (match == 0) {
 1410             my_infp->rcv_qlimit = net_add_q_info(rcv_port);
 1411         } else {
 1412             my_infp->rcv_qlimit = 0;
 1413         }
 1414 
 1415         /* Insert my_infp according to priority */
 1416         queue_iterate(&ifp->if_rcv_port_list, infp, net_rcv_port_t, chain)
 1417             if (priority > infp->priority)
 1418                 break;
 1419         enqueue_tail((queue_t)&infp->chain, (queue_entry_t)my_infp);
 1420     }
 1421     
 1422     if (match != 0)
 1423     {       /* Insert to hash list */
 1424         net_hash_entry_t *p;
 1425         int j;
 1426         
 1427         hash_entp->rcv_port = rcv_port;
 1428         for (i = 0; i < match->jt; i++)         /* match->jt is n_keys */
 1429             hash_entp->keys[i] = match[i+1].k;
 1430         p = &((net_hash_header_t)my_infp)->
 1431                         table[bpf_hash(match->jt, hash_entp->keys)];
 1432         
 1433         /* Not checking for the same key values */
 1434         if (*p == 0) {
 1435             queue_init ((queue_t) hash_entp);
 1436             *p = hash_entp;
 1437         } else {
 1438             enqueue_tail((queue_t)*p, hash_entp);
 1439         }
 1440 
 1441         ((net_hash_header_t)my_infp)->ref_count++;
 1442         hash_entp->rcv_qlimit = net_add_q_info(rcv_port);
 1443 
 1444     }
 1445     
 1446     simple_unlock(&ifp->if_rcv_port_list_lock);
 1447 
 1448 clean_and_return:
 1449     /* No locks are held at this point. */
 1450 
 1451     if (dead_infp != 0)
 1452             net_free_dead_infp(dead_infp);
 1453     if (dead_entp != 0)
 1454             net_free_dead_entp(dead_entp);
 1455     
 1456     return (rval);
 1457 }
 1458 
 1459 /*
 1460  * Other network operations
 1461  */
 1462 io_return_t
 1463 net_getstat(ifp, flavor, status, count)
 1464         struct ifnet    *ifp;
 1465         dev_flavor_t    flavor;
 1466         dev_status_t    status;         /* pointer to OUT array */
 1467         natural_t       *count;         /* OUT */
 1468 {
 1469         switch (flavor) {
 1470             case NET_STATUS:
 1471             {
 1472                 register struct net_status *ns = (struct net_status *)status;
 1473 
 1474                 if (*count < NET_STATUS_COUNT)
 1475                     return (D_INVALID_OPERATION);
 1476                 
 1477                 ns->min_packet_size = ifp->if_header_size;
 1478                 ns->max_packet_size = ifp->if_header_size + ifp->if_mtu;
 1479                 ns->header_format   = ifp->if_header_format;
 1480                 ns->header_size     = ifp->if_header_size;
 1481                 ns->address_size    = ifp->if_address_size;
 1482                 ns->flags           = ifp->if_flags;
 1483                 ns->mapped_size     = 0;
 1484 
 1485                 *count = NET_STATUS_COUNT;
 1486                 break;
 1487             }
 1488             case NET_ADDRESS:
 1489             {
 1490                 register int    addr_byte_count;
 1491                 register int    addr_int_count;
 1492                 register int    i;
 1493 
 1494                 addr_byte_count = ifp->if_address_size;
 1495                 addr_int_count = (addr_byte_count + (sizeof(int)-1))
 1496                                          / sizeof(int);
 1497 
 1498                 if (*count < addr_int_count)
 1499                     return (D_INVALID_OPERATION);
 1500 
 1501                 bcopy((char *)ifp->if_address,
 1502                       (char *)status,
 1503                       (unsigned) addr_byte_count);
 1504                 if (addr_byte_count < addr_int_count * sizeof(int))
 1505                     bzero((char *)status + addr_byte_count,
 1506                           (unsigned) (addr_int_count * sizeof(int)
 1507                                       - addr_byte_count));
 1508 
 1509                 for (i = 0; i < addr_int_count; i++) {
 1510                     register int word;
 1511 
 1512                     word = status[i];
 1513                     status[i] = htonl(word);
 1514                 }
 1515                 *count = addr_int_count;
 1516                 break;
 1517             }
 1518             default:
 1519                 return (D_INVALID_OPERATION);
 1520         }
 1521         return (D_SUCCESS);
 1522 }
 1523 
 1524 io_return_t
 1525 net_write(ifp, start, ior)
 1526         register struct ifnet *ifp;
 1527         int             (*start)();
 1528         io_req_t        ior;
 1529 {
 1530         spl_t   s;
 1531         kern_return_t   rc;
 1532         boolean_t       wait;
 1533 
 1534         /*
 1535          * Reject the write if the interface is down.
 1536          */
 1537         if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
 1538             return (D_DEVICE_DOWN);
 1539 
 1540         /*
 1541          * Reject the write if the packet is too large or too small.
 1542          */
 1543         if (ior->io_count < ifp->if_header_size ||
 1544             ior->io_count > ifp->if_header_size + ifp->if_mtu)
 1545             return (D_INVALID_SIZE);
 1546 
 1547         /*
 1548          * Wire down the memory.
 1549          */
 1550 
 1551         rc = device_write_get(ior, &wait);
 1552         if (rc != KERN_SUCCESS)
 1553             return (rc);
 1554 
 1555         /*
 1556          *      Network interfaces can't cope with VM continuations.
 1557          *      If wait is set, just panic.
 1558         */
 1559         if (wait) {
 1560                 panic("net_write: VM continuation");
 1561         }
 1562 
 1563         /*
 1564          * Queue the packet on the output queue, and
 1565          * start the device.
 1566          */
 1567         s = splimp();
 1568         IF_ENQUEUE(&ifp->if_snd, ior);
 1569         (*start)(ifp->if_unit);
 1570         splx(s);
 1571         
 1572         return (D_IO_QUEUED);
 1573 }
 1574 
 1575 /*
 1576  * Initialize the whole package.
 1577  */
 1578 void
 1579 net_io_init()
 1580 {
 1581         register vm_size_t      size;
 1582 
 1583         size = sizeof(struct net_rcv_port);
 1584         net_rcv_zone = zinit(size,
 1585                              size * 1000,
 1586                              PAGE_SIZE,
 1587                              FALSE,
 1588                              "net_rcv_port");
 1589 
 1590         size = sizeof(struct net_hash_entry);
 1591         net_hash_entry_zone = zinit(size,
 1592                                     size * 100,
 1593                                     PAGE_SIZE,
 1594                                     FALSE,
 1595                                     "net_hash_entry");
 1596 
 1597         size = ikm_plus_overhead(sizeof(struct net_rcv_msg));
 1598         net_kmsg_size = round_page(size);
 1599 
 1600         /*
 1601          *      net_kmsg_max caps the number of buffers
 1602          *      we are willing to allocate.  By default,
 1603          *      we allow for net_queue_free_min plus
 1604          *      the queue limit for each filter.
 1605          *      (Added as the filters are added.)
 1606          */
 1607 
 1608         simple_lock_init(&net_kmsg_total_lock);
 1609         if (net_kmsg_max == 0)
 1610             net_kmsg_max = net_queue_free_min;
 1611 
 1612         simple_lock_init(&net_queue_free_lock);
 1613         ipc_kmsg_queue_init(&net_queue_free);
 1614 
 1615         simple_lock_init(&net_queue_lock);
 1616         ipc_kmsg_queue_init(&net_queue_high);
 1617         ipc_kmsg_queue_init(&net_queue_low);
 1618 
 1619         simple_lock_init(&net_hash_header_lock);
 1620 }
 1621 
 1622 
 1623 /* ======== BPF: Berkeley Packet Filter ======== */
 1624 
 1625 /*-
 1626  * Copyright (c) 1990-1991 The Regents of the University of California.
 1627  * All rights reserved.
 1628  *
 1629  * This code is derived from the Stanford/CMU enet packet filter,
 1630  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
 1631  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 
 1632  * Berkeley Laboratory.
 1633  *
 1634  * Redistribution and use in source and binary forms, with or without
 1635  * modification, are permitted provided that the following conditions
 1636  * are met:
 1637  * 1. Redistributions of source code must retain the above copyright
 1638  *    notice, this list of conditions and the following disclaimer.
 1639  * 2. Redistributions in binary form must reproduce the above copyright
 1640  *    notice, this list of conditions and the following disclaimer in the
 1641  *    documentation and/or other materials provided with the distribution.
 1642  * 3. All advertising materials mentioning features or use of this software
 1643  *    must display the following acknowledgement:
 1644  *      This product includes software developed by the University of
 1645  *      California, Berkeley and its contributors.
 1646  * 4. Neither the name of the University nor the names of its contributors
 1647  *    may be used to endorse or promote products derived from this software
 1648  *    without specific prior written permission.
 1649  *
 1650  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 1651  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 1652  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 1653  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 1654  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 1655  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 1656  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 1657  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 1658  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 1659  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 1660  * SUCH DAMAGE.
 1661  *
 1662  *      @(#)bpf.c       7.5 (Berkeley) 7/15/91
 1663  *
 1664  * static char rcsid[] =
 1665  * "$Header: net_io.c,v 2.29 93/08/10 15:10:56 mrt Exp $";
 1666  */
 1667 #if !(defined(lint) || defined(KERNEL))
 1668 static char rcsid[] =
 1669     "@(#) $Header: net_io.c,v 2.29 93/08/10 15:10:56 mrt Exp $ (LBL)";
 1670 #endif
 1671 
 1672 #if defined(sparc) || defined(mips) || defined(ibm032) || defined(alpha)
 1673 #define BPF_ALIGN
 1674 #endif
 1675 
 1676 typedef unsigned long u_long;   /* XXX */
 1677 
 1678 #ifndef BPF_ALIGN
 1679 #define EXTRACT_SHORT(p)        ((u_short)ntohs(*(u_short *)p))
 1680 #define EXTRACT_LONG(p)         (ntohl(*(u_long *)p))
 1681 #else
 1682 #define EXTRACT_SHORT(p)\
 1683         ((u_short)\
 1684                 ((u_short)*((u_char *)p+0)<<8|\
 1685                  (u_short)*((u_char *)p+1)<<0))
 1686 #define EXTRACT_LONG(p)\
 1687                 ((u_long)*((u_char *)p+0)<<24|\
 1688                  (u_long)*((u_char *)p+1)<<16|\
 1689                  (u_long)*((u_char *)p+2)<<8|\
 1690                  (u_long)*((u_char *)p+3)<<0)
 1691 #endif
 1692 
 1693 /*
 1694  * Execute the filter program starting at pc on the packet p
 1695  * wirelen is the length of the original packet
 1696  * buflen is the amount of data present
 1697  */
 1698 
 1699 int
 1700 bpf_do_filter(infp, p, wirelen, header, hash_headpp, entpp)
 1701         net_rcv_port_t  infp;
 1702         char *          p;              /* packet data */
 1703         unsigned int    wirelen;        /* data_count (in bytes) */
 1704         char *          header;
 1705         net_hash_entry_t        **hash_headpp, *entpp;  /* out */
 1706 {
 1707         register bpf_insn_t pc, pc_end;
 1708         register unsigned int buflen;
 1709 
 1710         register unsigned long A, X;
 1711         register int k;
 1712         long mem[BPF_MEMWORDS];
 1713 
 1714         pc = ((bpf_insn_t) infp->filter) + 1;
 1715                                         /* filter[0].code is BPF_BEGIN */
 1716         pc_end = (bpf_insn_t)infp->filter_end;
 1717         buflen = NET_RCV_MAX;
 1718         *entpp = 0;                     /* default */
 1719 
 1720 #ifdef lint
 1721         A = 0;
 1722         X = 0;
 1723 #endif
 1724         for (; pc < pc_end; ++pc) {
 1725                 switch (pc->code) {
 1726 
 1727                 default:
 1728 #ifdef KERNEL
 1729                         return 0;
 1730 #else
 1731                         abort();
 1732 #endif                  
 1733                 case BPF_RET|BPF_K:
 1734                         if (infp->rcv_port == MACH_PORT_NULL &&
 1735                             *entpp == 0) {
 1736                                 return 0;
 1737                         }
 1738                         return ((u_int)pc->k <= wirelen) ?
 1739                                                 pc->k : wirelen;
 1740 
 1741                 case BPF_RET|BPF_A:
 1742                         if (infp->rcv_port == MACH_PORT_NULL &&
 1743                             *entpp == 0) {
 1744                                 return 0;
 1745                         }
 1746                         return ((u_int)A <= wirelen) ?
 1747                                                 A : wirelen;
 1748 
 1749                 case BPF_RET|BPF_MATCH_IMM:
 1750                         if (bpf_match ((net_hash_header_t)infp, pc->jt, mem,
 1751                                        hash_headpp, entpp)) {
 1752                                 return ((u_int)pc->k <= wirelen) ?
 1753                                                         pc->k : wirelen;
 1754                         }
 1755                         return 0;
 1756 
 1757                 case BPF_LD|BPF_W|BPF_ABS:
 1758                         k = pc->k;
 1759                         if ((u_int)k + sizeof(long) <= buflen) {
 1760 #ifdef BPF_ALIGN
 1761                                 if (((int)(p + k) & 3) != 0)
 1762                                         A = EXTRACT_LONG(&p[k]);
 1763                                 else
 1764 #endif
 1765                                         A = ntohl(*(long *)(p + k));
 1766                                 continue;
 1767                         }
 1768 
 1769                         k -= BPF_DLBASE;
 1770                         if ((u_int)k + sizeof(long) <= NET_HDW_HDR_MAX) {
 1771 #ifdef BPF_ALIGN
 1772                                 if (((int)(header + k) & 3) != 0)
 1773                                         A = EXTRACT_LONG(&header[k]);
 1774                                 else
 1775 #endif
 1776                                         A = ntohl(*(long *)(header + k));
 1777                                 continue;
 1778                         } else {
 1779                                 return 0;
 1780                         }
 1781 
 1782                 case BPF_LD|BPF_H|BPF_ABS:
 1783                         k = pc->k;
 1784                         if ((u_int)k + sizeof(short) <= buflen) {
 1785                                 A = EXTRACT_SHORT(&p[k]);
 1786                                 continue;
 1787                         }
 1788 
 1789                         k -= BPF_DLBASE;
 1790                         if ((u_int)k + sizeof(short) <= NET_HDW_HDR_MAX) {
 1791                                 A = EXTRACT_SHORT(&header[k]);
 1792                                 continue;
 1793                         } else {
 1794                                 return 0;
 1795                         }
 1796 
 1797                 case BPF_LD|BPF_B|BPF_ABS:
 1798                         k = pc->k;
 1799                         if ((u_int)k < buflen) {
 1800                                 A = p[k];
 1801                                 continue;
 1802                         }
 1803                         
 1804                         k -= BPF_DLBASE;
 1805                         if ((u_int)k < NET_HDW_HDR_MAX) {
 1806                                 A = header[k];
 1807                                 continue;
 1808                         } else {
 1809                                 return 0;
 1810                         }
 1811 
 1812                 case BPF_LD|BPF_W|BPF_LEN:
 1813                         A = wirelen;
 1814                         continue;
 1815 
 1816                 case BPF_LDX|BPF_W|BPF_LEN:
 1817                         X = wirelen;
 1818                         continue;
 1819 
 1820                 case BPF_LD|BPF_W|BPF_IND:
 1821                         k = X + pc->k;
 1822                         if (k + sizeof(long) > buflen)
 1823                                 return 0;
 1824 #ifdef BPF_ALIGN
 1825                         if (((int)(p + k) & 3) != 0)
 1826                                 A = EXTRACT_LONG(&p[k]);
 1827                         else
 1828 #endif
 1829                                 A = ntohl(*(long *)(p + k));
 1830                         continue;
 1831 
 1832                 case BPF_LD|BPF_H|BPF_IND:
 1833                         k = X + pc->k;
 1834                         if (k + sizeof(short) > buflen)
 1835                                 return 0;
 1836                         A = EXTRACT_SHORT(&p[k]);
 1837                         continue;
 1838 
 1839                 case BPF_LD|BPF_B|BPF_IND:
 1840                         k = X + pc->k;
 1841                         if (k >= buflen)
 1842                                 return 0;
 1843                         A = p[k];
 1844                         continue;
 1845 
 1846                 case BPF_LDX|BPF_MSH|BPF_B:
 1847                         k = pc->k;
 1848                         if (k >= buflen)
 1849                                 return 0;
 1850                         X = (p[pc->k] & 0xf) << 2;
 1851                         continue;
 1852 
 1853                 case BPF_LD|BPF_IMM:
 1854                         A = pc->k;
 1855                         continue;
 1856 
 1857                 case BPF_LDX|BPF_IMM:
 1858                         X = pc->k;
 1859                         continue;
 1860 
 1861                 case BPF_LD|BPF_MEM:
 1862                         A = mem[pc->k];
 1863                         continue;
 1864                         
 1865                 case BPF_LDX|BPF_MEM:
 1866                         X = mem[pc->k];
 1867                         continue;
 1868 
 1869                 case BPF_ST:
 1870                         mem[pc->k] = A;
 1871                         continue;
 1872 
 1873                 case BPF_STX:
 1874                         mem[pc->k] = X;
 1875                         continue;
 1876 
 1877                 case BPF_JMP|BPF_JA:
 1878                         pc += pc->k;
 1879                         continue;
 1880 
 1881                 case BPF_JMP|BPF_JGT|BPF_K:
 1882                         pc += (A > pc->k) ? pc->jt : pc->jf;
 1883                         continue;
 1884 
 1885                 case BPF_JMP|BPF_JGE|BPF_K:
 1886                         pc += (A >= pc->k) ? pc->jt : pc->jf;
 1887                         continue;
 1888 
 1889                 case BPF_JMP|BPF_JEQ|BPF_K:
 1890                         pc += (A == pc->k) ? pc->jt : pc->jf;
 1891                         continue;
 1892 
 1893                 case BPF_JMP|BPF_JSET|BPF_K:
 1894                         pc += (A & pc->k) ? pc->jt : pc->jf;
 1895                         continue;
 1896 
 1897                 case BPF_JMP|BPF_JGT|BPF_X:
 1898                         pc += (A > X) ? pc->jt : pc->jf;
 1899                         continue;
 1900 
 1901                 case BPF_JMP|BPF_JGE|BPF_X:
 1902                         pc += (A >= X) ? pc->jt : pc->jf;
 1903                         continue;
 1904 
 1905                 case BPF_JMP|BPF_JEQ|BPF_X:
 1906                         pc += (A == X) ? pc->jt : pc->jf;
 1907                         continue;
 1908 
 1909                 case BPF_JMP|BPF_JSET|BPF_X:
 1910                         pc += (A & X) ? pc->jt : pc->jf;
 1911                         continue;
 1912 
 1913                 case BPF_ALU|BPF_ADD|BPF_X:
 1914                         A += X;
 1915                         continue;
 1916                         
 1917                 case BPF_ALU|BPF_SUB|BPF_X:
 1918                         A -= X;
 1919                         continue;
 1920                         
 1921                 case BPF_ALU|BPF_MUL|BPF_X:
 1922                         A *= X;
 1923                         continue;
 1924                         
 1925                 case BPF_ALU|BPF_DIV|BPF_X:
 1926                         if (X == 0)
 1927                                 return 0;
 1928                         A /= X;
 1929                         continue;
 1930                         
 1931                 case BPF_ALU|BPF_AND|BPF_X:
 1932                         A &= X;
 1933                         continue;
 1934                         
 1935                 case BPF_ALU|BPF_OR|BPF_X:
 1936                         A |= X;
 1937                         continue;
 1938 
 1939                 case BPF_ALU|BPF_LSH|BPF_X:
 1940                         A <<= X;
 1941                         continue;
 1942 
 1943                 case BPF_ALU|BPF_RSH|BPF_X:
 1944                         A >>= X;
 1945                         continue;
 1946 
 1947                 case BPF_ALU|BPF_ADD|BPF_K:
 1948                         A += pc->k;
 1949                         continue;
 1950                         
 1951                 case BPF_ALU|BPF_SUB|BPF_K:
 1952                         A -= pc->k;
 1953                         continue;
 1954                         
 1955                 case BPF_ALU|BPF_MUL|BPF_K:
 1956                         A *= pc->k;
 1957                         continue;
 1958                         
 1959                 case BPF_ALU|BPF_DIV|BPF_K:
 1960                         A /= pc->k;
 1961                         continue;
 1962                         
 1963                 case BPF_ALU|BPF_AND|BPF_K:
 1964                         A &= pc->k;
 1965                         continue;
 1966                         
 1967                 case BPF_ALU|BPF_OR|BPF_K:
 1968                         A |= pc->k;
 1969                         continue;
 1970 
 1971                 case BPF_ALU|BPF_LSH|BPF_K:
 1972                         A <<= pc->k;
 1973                         continue;
 1974 
 1975                 case BPF_ALU|BPF_RSH|BPF_K:
 1976                         A >>= pc->k;
 1977                         continue;
 1978 
 1979                 case BPF_ALU|BPF_NEG:
 1980                         A = -A;
 1981                         continue;
 1982 
 1983                 case BPF_MISC|BPF_TAX:
 1984                         X = A;
 1985                         continue;
 1986 
 1987                 case BPF_MISC|BPF_TXA:
 1988                         A = X;
 1989                         continue;
 1990                 }
 1991         }
 1992 
 1993         return 0;
 1994 }
 1995 
 1996 /*
 1997  * Return 1 if the 'f' is a valid filter program without a MATCH
 1998  * instruction. Return 2 if it is a valid filter program with a MATCH
 1999  * instruction. Otherwise, return 0.
 2000  * The constraints are that each jump be forward and to a valid
 2001  * code.  The code must terminate with either an accept or reject. 
 2002  * 'valid' is an array for use by the routine (it must be at least
 2003  * 'len' bytes long).  
 2004  *
 2005  * The kernel needs to be able to verify an application's filter code.
 2006  * Otherwise, a bogus program could easily crash the system.
 2007  */
 2008 int
 2009 bpf_validate(f, bytes, match)
 2010         bpf_insn_t f;
 2011         int bytes;
 2012         bpf_insn_t *match;
 2013 {
 2014         register int i, j, len;
 2015         register bpf_insn_t p;
 2016 
 2017         len = BPF_BYTES2LEN(bytes);
 2018         /* f[0].code is already checked to be BPF_BEGIN. So skip f[0]. */
 2019 
 2020         for (i = 1; i < len; ++i) {
 2021                 /*
 2022                  * Check that that jumps are forward, and within 
 2023                  * the code block.
 2024                  */
 2025                 p = &f[i];
 2026                 if (BPF_CLASS(p->code) == BPF_JMP) {
 2027                         register int from = i + 1;
 2028 
 2029                         if (BPF_OP(p->code) == BPF_JA) {
 2030                                 if (from + p->k >= len)
 2031                                         return 0;
 2032                         }
 2033                         else if (from + p->jt >= len || from + p->jf >= len)
 2034                                 return 0;
 2035                 }
 2036                 /*
 2037                  * Check that memory operations use valid addresses.
 2038                  */
 2039                 if ((BPF_CLASS(p->code) == BPF_ST ||
 2040                      (BPF_CLASS(p->code) == BPF_LD && 
 2041                       (p->code & 0xe0) == BPF_MEM)) &&
 2042                     (p->k >= BPF_MEMWORDS || p->k < 0))
 2043                         return 0;
 2044                 /*
 2045                  * Check for constant division by 0.
 2046                  */
 2047                 if (p->code == (BPF_ALU|BPF_DIV|BPF_K) && p->k == 0)
 2048                         return 0;
 2049                 /*
 2050                  * Check for match instruction.
 2051                  * Only one match instruction per filter is allowed.
 2052                  */
 2053                 if (p->code == (BPF_RET|BPF_MATCH_IMM)) {
 2054                         if (*match != 0 ||
 2055                             p->jt == 0 ||
 2056                             p->jt > N_NET_HASH_KEYS)
 2057                                 return 0;
 2058                         i += p->jt;             /* skip keys */
 2059                         if (i + 1 > len)
 2060                                 return 0;
 2061 
 2062                         for (j = 1; j <= p->jt; j++) {
 2063                             if (p[j].code != (BPF_MISC|BPF_KEY))
 2064                                 return 0;
 2065                         }
 2066 
 2067                         *match = p;
 2068                 }
 2069         }
 2070         if (BPF_CLASS(f[len - 1].code) == BPF_RET)
 2071                 return ((*match == 0) ? 1 : 2);
 2072         else
 2073                 return 0;
 2074 }
 2075 
 2076 int
 2077 bpf_eq (f1, f2, bytes)
 2078         register bpf_insn_t f1, f2;
 2079         register int bytes;
 2080 {
 2081         register int count;
 2082 
 2083         count = BPF_BYTES2LEN(bytes);
 2084         for (; count--; f1++, f2++) {
 2085                 if (!BPF_INSN_EQ(f1, f2)) {
 2086                         if ( f1->code == (BPF_MISC|BPF_KEY) &&
 2087                              f2->code == (BPF_MISC|BPF_KEY) )
 2088                                 continue;
 2089                         return FALSE;
 2090                 }
 2091         };
 2092         return TRUE;
 2093 }
 2094 
 2095 unsigned int
 2096 bpf_hash (n, keys)
 2097         register int n;
 2098         register unsigned int *keys;
 2099 {
 2100         register unsigned int hval = 0;
 2101         
 2102         while (n--) {
 2103                 hval += *keys++;
 2104         }
 2105         return (hval % NET_HASH_SIZE);
 2106 }
 2107 
 2108 
 2109 int
 2110 bpf_match (hash, n_keys, keys, hash_headpp, entpp)
 2111         net_hash_header_t hash;
 2112         register int n_keys;
 2113         register unsigned int *keys;
 2114         net_hash_entry_t **hash_headpp, *entpp;
 2115 {
 2116         register net_hash_entry_t head, entp;
 2117         register int i;
 2118 
 2119         if (n_keys != hash->n_keys)
 2120                 return FALSE;
 2121 
 2122         *hash_headpp = &hash->table[bpf_hash(n_keys, keys)];
 2123         head = **hash_headpp;
 2124 
 2125         if (head == 0)
 2126                 return FALSE;
 2127 
 2128         HASH_ITERATE (head, entp)
 2129         {
 2130                 for (i = 0; i < n_keys; i++) {
 2131                         if (keys[i] != entp->keys[i])
 2132                                 break;
 2133                 }
 2134                 if (i == n_keys) {
 2135                         *entpp = entp;
 2136                         return TRUE;
 2137                 }
 2138         }
 2139         HASH_ITERATE_END (head, entp)
 2140         return FALSE;
 2141 }       
 2142 
 2143 
 2144 /*
 2145  * Removes a hash entry (ENTP) from its queue (HEAD).
 2146  * If the reference count of filter (HP) becomes zero and not USED,
 2147  * HP is removed from ifp->if_rcv_port_list and is freed.
 2148  */
 2149 
 2150 int
 2151 hash_ent_remove (ifp, hp, used, head, entp, dead_p)
 2152     struct ifnet        *ifp;
 2153     net_hash_header_t   hp;
 2154     int                 used;
 2155     net_hash_entry_t    *head, entp;
 2156     queue_entry_t       *dead_p;
 2157 {    
 2158         hp->ref_count--;
 2159 
 2160         if (*head == entp) {
 2161 
 2162                 if (queue_empty((queue_t) entp)) {
 2163                         *head = 0;
 2164                         ENQUEUE_DEAD(*dead_p, entp);
 2165                         if (hp->ref_count == 0 && !used) {
 2166                                 remqueue((queue_t) &ifp->if_rcv_port_list,
 2167                                          (queue_entry_t)hp);
 2168                                 hp->n_keys = 0;
 2169                                 return TRUE;
 2170                         }
 2171                         return FALSE;
 2172                 } else {
 2173                         *head = (net_hash_entry_t)queue_next((queue_t) entp);
 2174                 }
 2175         }
 2176 
 2177         remqueue((queue_t)*head, (queue_entry_t)entp);
 2178         ENQUEUE_DEAD(*dead_p, entp);
 2179         return FALSE;
 2180 }    
 2181 
 2182 int
 2183 net_add_q_info (rcv_port)
 2184         ipc_port_t      rcv_port;
 2185 {
 2186         mach_port_msgcount_t qlimit = 0;
 2187             
 2188         /*
 2189          * We use a new port, so increase net_queue_free_min
 2190          * and net_kmsg_max to allow for more queued messages.
 2191          */
 2192             
 2193         if (IP_VALID(rcv_port)) {
 2194                 ip_lock(rcv_port);
 2195                 if (ip_active(rcv_port))
 2196                         qlimit = rcv_port->ip_qlimit;
 2197                 ip_unlock(rcv_port);
 2198         }
 2199             
 2200         simple_lock(&net_kmsg_total_lock);
 2201         net_queue_free_min++;
 2202         net_kmsg_max += qlimit + 1;
 2203         simple_unlock(&net_kmsg_total_lock);
 2204 
 2205         return (int)qlimit;
 2206 }
 2207 
 2208 net_del_q_info (qlimit)
 2209         int qlimit;
 2210 {
 2211         simple_lock(&net_kmsg_total_lock);
 2212         net_queue_free_min--;
 2213         net_kmsg_max -= qlimit + 1;
 2214         simple_unlock(&net_kmsg_total_lock);
 2215 }
 2216 
 2217 
 2218 /*
 2219  * net_free_dead_infp (dead_infp)
 2220  *      queue_entry_t dead_infp;        list of dead net_rcv_port_t.
 2221  *
 2222  * Deallocates dead net_rcv_port_t.
 2223  * No locks should be held when called.
 2224  */
 2225 net_free_dead_infp (dead_infp)
 2226         queue_entry_t dead_infp;
 2227 {
 2228         register net_rcv_port_t infp, nextfp;
 2229 
 2230         for (infp = (net_rcv_port_t) dead_infp; infp != 0; infp = nextfp)
 2231         {
 2232                 nextfp = (net_rcv_port_t) queue_next(&infp->chain);
 2233                 ipc_port_release_send(infp->rcv_port);
 2234                 net_del_q_info(infp->rcv_qlimit);
 2235                 zfree(net_rcv_zone, (vm_offset_t) infp);
 2236         }           
 2237 }
 2238     
 2239 /*
 2240  * net_free_dead_entp (dead_entp)
 2241  *      queue_entry_t dead_entp;        list of dead net_hash_entry_t.
 2242  *
 2243  * Deallocates dead net_hash_entry_t.
 2244  * No locks should be held when called.
 2245  */
 2246 net_free_dead_entp (dead_entp)
 2247         queue_entry_t dead_entp;
 2248 {
 2249         register net_hash_entry_t entp, nextentp;
 2250 
 2251         for (entp = (net_hash_entry_t)dead_entp; entp != 0; entp = nextentp)
 2252         {
 2253                 nextentp = (net_hash_entry_t) queue_next(&entp->chain);
 2254 
 2255                 ipc_port_release_send(entp->rcv_port);
 2256                 net_del_q_info(entp->rcv_qlimit);
 2257                 zfree(net_hash_entry_zone, (vm_offset_t) entp);
 2258         }
 2259 }
 2260
Cache object: c2df912743091ca10f281ce3268ed31d
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/device/net_io.c

FreeBSD/Linux Kernel Cross Reference
sys/device/net_io.c