The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/device/net_io.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* 
    2  * Mach Operating System
    3  * Copyright (c) 1993-1989 Carnegie Mellon University
    4  * All Rights Reserved.
    5  * 
    6  * Permission to use, copy, modify and distribute this software and its
    7  * documentation is hereby granted, provided that both the copyright
    8  * notice and this permission notice appear in all copies of the
    9  * software, derivative works or modified versions, and any portions
   10  * thereof, and that both notices appear in supporting documentation.
   11  * 
   12  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   13  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
   14  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   15  * 
   16  * Carnegie Mellon requests users of this software to return to
   17  * 
   18  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   19  *  School of Computer Science
   20  *  Carnegie Mellon University
   21  *  Pittsburgh PA 15213-3890
   22  * 
   23  * any improvements or extensions that they make and grant Carnegie Mellon
   24  * the rights to redistribute these changes.
   25  */
   26 /*
   27  * HISTORY
   28  * $Log:        net_io.c,v $
   29  * Revision 2.30  93/11/17  16:34:06  dbg
   30  *      Changed 'long' to 'int' in BPF routines for 64-bit cleanup.
   31  *      [93/10/26            dbg]
   32  * 
   33  *      Added AST_KERNEL_CHECK to net_thread loop, to check for timer
   34  *      ASTs.  Added ANSI function prototypes.
   35  *      [93/09/02            dbg]
   36  * 
   37  * Revision 2.29  93/08/10  15:10:56  mrt
   38  *      Incorporated BPF+MATCH support from Masanobu Yuhara:
   39  *              Changed: garbage collection of dead filters for BPF.
   40  *              Added: BPF_MATCH_IMM support.
   41  *              Added: BPF support. Derived from tcpdump-2.2.1/bpf/net/bpf.h.
   42  *              [93/04/14  16:51:13  yuhara]
   43  * 
   44  * Revision 2.28  93/05/15  18:53:22  mrt
   45  *      machparam.h -> machspl.h
   46  * 
   47  * Revision 2.27  93/05/10  23:23:27  rvb
   48  *      Added TTD teledebug code to check for debugging packets.
   49  *      [93/03/01            grm]
   50  * 
   51  * Revision 2.26  93/05/10  17:46:12  rvb
   52  *      Added test to check that buffer is large enough to hold data
   53  *      returned by net_getstat.
   54  *      [93/04/20            kivinen]
   55  * 
   56  * Revision 2.25  93/01/14  17:27:08  danner
   57  *      64bit cleanup.
   58  *      [92/11/30            af]
   59  * 
   60  * Revision 2.24  92/08/03  17:33:48  jfriedl
   61  *      removed silly prototypes
   62  *      [92/08/02            jfriedl]
   63  * 
   64  * Revision 2.23  92/05/21  17:09:38  jfriedl
   65  *      Cleanup to quiet gcc warnings.
   66  *      [92/05/16            jfriedl]
   67  * 
   68  * Revision 2.22  92/03/10  16:25:28  jsb
   69  *      Changed parameters to netipc_net_packet.
   70  *      [92/03/09  12:57:30  jsb]
   71  * 
   72  * Revision 2.21  92/01/03  20:03:57  dbg
   73  *      Add: NETF_PUSHHDR, NETF_PUSHSTK, NETF_PUSHIND, NETF_PUSHHDRIND.
   74  *      [91/12/23            dbg]
   75  * 
   76  * Revision 2.20  91/08/28  11:11:28  jsb
   77  *      Panic if network write attempted with continuation.
   78  *      [91/08/12  17:29:53  dlb]
   79  * 
   80  * Revision 2.19  91/08/24  11:55:55  af
   81  *      Missing include for Spls definitions.
   82  *      [91/08/02  02:45:16  af]
   83  * 
   84  * Revision 2.18  91/08/03  18:17:43  jsb
   85  *      Added NORMA_ETHER support.
   86  *      [91/07/24  22:54:41  jsb]
   87  * 
   88  * Revision 2.17  91/05/14  15:59:34  mrt
   89  *      Correcting copyright
   90  * 
   91  * Revision 2.16  91/05/10  11:48:47  dbg
   92  *      Don't forget to copy the packet size when duplicating a packet
   93  *      for multiple filters in net_filter().
   94  *      [91/05/09            dpj]
   95  * 
   96  * Revision 2.15  91/03/16  14:43:14  rpd
   97  *      Added net_thread, net_thread_continue.
   98  *      [91/02/13            rpd]
   99  *      Split net_rcv_msg_queue into high and low priority queues.
  100  *      Cap the total number of buffers allocated.
  101  *      [91/01/14            rpd]
  102  * 
  103  *      Added net_rcv_msg_queue_size, net_rcv_msg_queue_max.
  104  *      [91/01/12            rpd]
  105  * 
  106  * Revision 2.14  91/02/14  14:37:07  mrt
  107  *      Added garbage collection of dead filters.
  108  *      [91/02/12  12:11:10  af]
  109  * 
  110  * Revision 2.13  91/02/05  17:09:54  mrt
  111  *      Changed to new Mach copyright
  112  *      [91/01/31  17:30:04  mrt]
  113  * 
  114  * Revision 2.12  91/01/08  15:09:48  rpd
  115  *      Replaced NET_KMSG_GET, NET_KMSG_FREE
  116  *      with net_kmsg_get, net_kmsg_put, net_kmsg_collect.
  117  *      Increased net_kmsg_ilist_min to 4.
  118  *      [91/01/05            rpd]
  119  *      Fixed net_rcv_msg_thread to round message sizes up to an int multiple.
  120  *      [90/12/07            rpd]
  121  * 
  122  *      Fixed net_rcv_msg_thread to not set vm_privilege.
  123  *      [90/11/29            rpd]
  124  * 
  125  * Revision 2.11  90/09/09  23:20:00  rpd
  126  *      Zero the mapped_size stats for non mappable interfaces.
  127  *      [90/08/30  17:41:00  af]
  128  * 
  129  * Revision 2.10  90/08/27  21:55:18  dbg
  130  *      If multiple filters receive a packet, copy the header as well as
  131  *      the body.  Fix from Dan Julin.
  132  *      [90/08/27            dbg]
  133  * 
  134  *      Fix filter check to account for literal word.
  135  *      [90/07/17            dbg]
  136  * 
  137  * Revision 2.9  90/08/06  15:06:57  rwd
  138  *      Fixed a bug in parse_net_filter(), that was reading the
  139  *      litteral from NETF_PUSHLIT as an instruction.
  140  *      [90/07/18  21:56:20  dpj]
  141  * 
  142  * Revision 2.8  90/06/02  14:48:14  rpd
  143  *      Converted to new IPC.
  144  *      [90/03/26  21:57:43  rpd]
  145  * 
  146  * Revision 2.7  90/02/22  20:02:21  dbg
  147  *      Track changes to kmsg structure.
  148  *      [90/01/31            dbg]
  149  * 
  150  * Revision 2.6  90/01/11  11:42:20  dbg
  151  *      Make run in parallel.
  152  *      [89/12/15            dbg]
  153  * 
  154  * Revision 2.5  89/12/08  19:52:22  rwd
  155  *      Picked up changes from rfr to minimize wired down memory
  156  *      [89/11/21            rwd]
  157  * 
  158  * Revision 2.4  89/09/08  11:24:35  dbg
  159  *      Convert to run in kernel task.  Removed some lint.
  160  *      [89/07/26            dbg]
  161  * 
  162  * Revision 2.3  89/08/11  17:55:18  rwd
  163  *      Picked up change from rfr which made zone collectable and
  164  *      decreased min net_kmesg to 2.
  165  *      [89/08/10            rwd]
  166  * 
  167  * Revision 2.2  89/08/05  16:06:58  rwd
  168  *      Changed device_map to device_task_map
  169  *      [89/08/04            rwd]
  170  * 
  171  * 13-Mar-89  David Golub (dbg) at Carnegie-Mellon University
  172  *      Created.  
  173  *
  174  */
  175 /*
  176  *      Author: David B. Golub, Carnegie Mellon University
  177  *      Date:   3/98
  178  *
  179  *      Network IO.
  180  *
  181  *      Packet filter code taken from vaxif/enet.c written               
  182  *              CMU and Stanford. 
  183  */
  184 
  185 /*
  186  *      Note:  don't depend on anything in this file.
  187  *      It may change a lot real soon.  -cmaeda 11 June 1993
  188  */
  189 
  190 #include <mach_ttd.h>
  191 #include <norma_ether.h>
  192 
  193 #include <sys/types.h>
  194 
  195 #include <device/net_status.h>
  196 #include <device/net_io.h>
  197 #include <device/if_hdr.h>
  198 #include <device/io_req.h>
  199 #include <device/ds_routines.h>
  200 
  201 #include <mach/boolean.h>
  202 #include <mach/vm_param.h>
  203 
  204 #include <ipc/ipc_port.h>
  205 #include <ipc/ipc_kmsg.h>
  206 #include <ipc/ipc_mqueue.h>
  207 
  208 #include <kern/counters.h>
  209 #include <kern/lock.h>
  210 #include <kern/memory.h>
  211 #include <kern/queue.h>
  212 #include <kern/sched_prim.h>
  213 #include <kern/thread.h>
  214 
  215 #if     NORMA_ETHER
  216 #include <norma/ipc_ether.h>
  217 #endif  /*NORMA_ETHER*/
  218 
  219 #include <machine/machspl.h>
  220 
  221 #if     MACH_TTD
  222 #include <ttd/ttd_stub.h>
  223 #endif  /* MACH_TTD */
  224 
  225 #if     MACH_TTD
  226 int kttd_async_counter= 0;
  227 #endif  /* MACH_TTD */
  228 
  229 
  230 /*
  231  *      Packet Buffer Management
  232  *
  233  *      This module manages a private pool of kmsg buffers.
  234  */
  235 
  236 /*
  237  * List of net kmsgs queued to be sent to users.
  238  * Messages can be high priority or low priority.
  239  * The network thread processes high priority messages first.
  240  */
  241 decl_simple_lock_data(,net_queue_lock)
  242 boolean_t       net_thread_awake = FALSE;
  243 struct ipc_kmsg_queue   net_queue_high;
  244 int             net_queue_high_size = 0;
  245 int             net_queue_high_max = 0;         /* for debugging */
  246 struct ipc_kmsg_queue   net_queue_low;
  247 int             net_queue_low_size = 0;
  248 int             net_queue_low_max = 0;          /* for debugging */
  249 
  250 /*
  251  * List of net kmsgs that can be touched at interrupt level.
  252  * If it is empty, we will also steal low priority messages.
  253  */
  254 decl_simple_lock_data(,net_queue_free_lock)
  255 struct ipc_kmsg_queue   net_queue_free;
  256 int             net_queue_free_size = 0;        /* on free list */
  257 int             net_queue_free_max = 0;         /* for debugging */
  258 
  259 /*
  260  * This value is critical to network performance.
  261  * At least this many buffers should be sitting in net_queue_free.
  262  * If this is set too small, we will drop network packets.
  263  * Even a low drop rate (<1%) can cause severe network throughput problems.
  264  * We add one to net_queue_free_min for every filter.
  265  */
  266 int             net_queue_free_min = 3;
  267 
  268 int             net_queue_free_hits = 0;        /* for debugging */
  269 int             net_queue_free_steals = 0;      /* for debugging */
  270 int             net_queue_free_misses = 0;      /* for debugging */
  271 
  272 int             net_kmsg_send_high_hits = 0;    /* for debugging */
  273 int             net_kmsg_send_low_hits = 0;     /* for debugging */
  274 int             net_kmsg_send_high_misses = 0;  /* for debugging */
  275 int             net_kmsg_send_low_misses = 0;   /* for debugging */
  276 
  277 int             net_thread_awaken = 0;          /* for debugging */
  278 int             net_ast_taken = 0;              /* for debugging */
  279 
  280 decl_simple_lock_data(,net_kmsg_total_lock)
  281 int             net_kmsg_total = 0;             /* total allocated */
  282 int             net_kmsg_max;                   /* initialized below */
  283 
  284 vm_size_t       net_kmsg_size;                  /* initialized below */
  285 
  286 /*
  287  *      We want more buffers when there aren't enough in the free queue
  288  *      and the low priority queue.  However, we don't want to allocate
  289  *      more than net_kmsg_max.
  290  */
  291 
  292 #define net_kmsg_want_more()            \
  293         (((net_queue_free_size + net_queue_low_size) < net_queue_free_min) && \
  294          (net_kmsg_total < net_kmsg_max))
  295 
  296 ipc_kmsg_t
  297 net_kmsg_get(void)
  298 {
  299         register ipc_kmsg_t kmsg;
  300         spl_t s;
  301 
  302         /*
  303          *      First check the list of free buffers.
  304          */
  305         s = splimp();
  306         simple_lock(&net_queue_free_lock);
  307         kmsg = ipc_kmsg_queue_first(&net_queue_free);
  308         if (kmsg != IKM_NULL) {
  309             ipc_kmsg_rmqueue_first_macro(&net_queue_free, kmsg);
  310             net_queue_free_size--;
  311             net_queue_free_hits++;
  312         }
  313         simple_unlock(&net_queue_free_lock);
  314 
  315         if (kmsg == IKM_NULL) {
  316             /*
  317              *  Try to steal from the low priority queue.
  318              */
  319             simple_lock(&net_queue_lock);
  320             kmsg = ipc_kmsg_queue_first(&net_queue_low);
  321             if (kmsg != IKM_NULL) {
  322                 ipc_kmsg_rmqueue_first_macro(&net_queue_low, kmsg);
  323                 net_queue_low_size--;
  324                 net_queue_free_steals++;
  325             }
  326             simple_unlock(&net_queue_lock);
  327         }
  328 
  329         if (kmsg == IKM_NULL)
  330             net_queue_free_misses++;
  331         splx(s);
  332 
  333         if (net_kmsg_want_more() || (kmsg == IKM_NULL)) {
  334             boolean_t awake;
  335 
  336             s = splimp();
  337             simple_lock(&net_queue_lock);
  338             awake = net_thread_awake;
  339             net_thread_awake = TRUE;
  340             simple_unlock(&net_queue_lock);
  341             splx(s);
  342 
  343             if (!awake)
  344                 thread_wakeup((event_t) &net_thread_awake);
  345         }
  346 
  347         return kmsg;
  348 }
  349 
  350 void
  351 net_kmsg_put(register ipc_kmsg_t kmsg)
  352 {
  353         spl_t s;
  354 
  355         s = splimp();
  356         simple_lock(&net_queue_free_lock);
  357         ipc_kmsg_enqueue_macro(&net_queue_free, kmsg);
  358         if (++net_queue_free_size > net_queue_free_max)
  359             net_queue_free_max = net_queue_free_size;
  360         simple_unlock(&net_queue_free_lock);
  361         splx(s);
  362 }
  363 
  364 void
  365 net_kmsg_collect(void)
  366 {
  367         register ipc_kmsg_t kmsg;
  368         spl_t s;
  369 
  370         s = splimp();
  371         simple_lock(&net_queue_free_lock);
  372         while (net_queue_free_size > net_queue_free_min) {
  373             kmsg = ipc_kmsg_dequeue(&net_queue_free);
  374             net_queue_free_size--;
  375             simple_unlock(&net_queue_free_lock);
  376             splx(s);
  377 
  378             net_kmsg_free(kmsg);
  379             simple_lock(&net_kmsg_total_lock);
  380             net_kmsg_total--;
  381             simple_unlock(&net_kmsg_total_lock);
  382 
  383             s = splimp();
  384             simple_lock(&net_queue_free_lock);
  385         }
  386         simple_unlock(&net_queue_free_lock);
  387         splx(s);
  388 }
  389 
  390 void
  391 net_kmsg_more(void)
  392 {
  393         register ipc_kmsg_t kmsg;
  394 
  395         /*
  396          * Replenish net kmsg pool if low.  We don't have the locks
  397          * necessary to look at these variables, but that's OK because
  398          * misread values aren't critical.  The danger in this code is
  399          * that while we allocate buffers, interrupts are happening
  400          * which take buffers out of the free list.  If we are not
  401          * careful, we will sit in the loop and allocate a zillion
  402          * buffers while a burst of packets arrives.  So we count
  403          * buffers in the low priority queue as available, because
  404          * net_kmsg_get will make use of them, and we cap the total
  405          * number of buffers we are willing to allocate.
  406          */
  407 
  408         while (net_kmsg_want_more()) {
  409             simple_lock(&net_kmsg_total_lock);
  410             net_kmsg_total++;
  411             simple_unlock(&net_kmsg_total_lock);
  412             kmsg = net_kmsg_alloc();
  413             net_kmsg_put(kmsg);
  414         }
  415 }
  416 
  417 /*
  418  *      Packet Filter Data Structures
  419  *
  420  *      Each network interface has a set of packet filters
  421  *      that are run on incoming packets.
  422  *
  423  *      Each packet filter may represent a single network
  424  *      session or multiple network sessions.  For example,
  425  *      all application level TCP sessions would be represented
  426  *      by a single packet filter data structure.
  427  *      
  428  *      If a packet filter has a single session, we use a
  429  *      struct net_rcv_port to represent it.  If the packet
  430  *      filter represents multiple sessions, we use a 
  431  *      struct net_hash_header to represent it.
  432  */
  433 
  434 /*
  435  * Each interface has a write port and a set of read ports.
  436  * Each read port has one or more filters to determine what packets
  437  * should go to that port.
  438  */
  439 
  440 /*
  441  * Receive port for net, with packet filter.
  442  * This data structure by itself represents a packet
  443  * filter for a single session.
  444  */
  445 struct net_rcv_port {
  446         queue_chain_t   chain;          /* list of open_descriptors */
  447         ipc_port_t      rcv_port;       /* port to send packet to */
  448         int             rcv_qlimit;     /* port's qlimit */
  449         int             rcv_count;      /* number of packets received */
  450         int             priority;       /* priority for filter */
  451         filter_t        *filter_end;    /* pointer to end of filter */
  452         filter_t        filter[NET_MAX_FILTER];
  453                                         /* filter operations */
  454 };
  455 typedef struct net_rcv_port *net_rcv_port_t;
  456 
  457 zone_t          net_rcv_zone;   /* zone of net_rcv_port structs */
  458 
  459 
  460 #define NET_HASH_SIZE   256
  461 #define N_NET_HASH      4
  462 #define N_NET_HASH_KEYS 4
  463 
  464 unsigned int bpf_hash (int, unsigned int *);
  465 boolean_t
  466 bpf_eq (
  467         register bpf_insn_t f1,
  468         register bpf_insn_t f2,
  469         register int bytes);
  470 int
  471 bpf_validate(
  472         bpf_insn_t f,
  473         int bytes,
  474         bpf_insn_t *match);     /* forward */
  475 
  476 /*
  477  * A single hash entry.
  478  */
  479 struct net_hash_entry {
  480         queue_chain_t   chain;          /* list of entries with same hval */
  481 #define he_next chain.next
  482 #define he_prev chain.prev
  483         ipc_port_t      rcv_port;       /* destination port */
  484         int             rcv_qlimit;     /* qlimit for the port */
  485         unsigned int    keys[N_NET_HASH_KEYS];
  486 };
  487 typedef struct net_hash_entry *net_hash_entry_t;
  488 
  489 zone_t  net_hash_entry_zone;
  490 
  491 /*
  492  * This structure represents a packet filter with multiple sessions.
  493  *
  494  * For example, all application level TCP sessions might be
  495  * represented by one of these structures.  It looks like a 
  496  * net_rcv_port struct so that both types can live on the
  497  * same packet filter queues.
  498  */
  499 struct net_hash_header {
  500         struct net_rcv_port rcv;
  501         int n_keys;                     /* zero if not used */
  502         int ref_count;                  /* reference count */
  503         net_hash_entry_t table[NET_HASH_SIZE];
  504 } filter_hash_header[N_NET_HASH];
  505 
  506 typedef struct net_hash_header *net_hash_header_t;
  507 
  508 decl_simple_lock_data(,net_hash_header_lock)
  509 
  510 #define HASH_ITERATE(head, elt) (elt) = (net_hash_entry_t) (head); do {
  511 #define HASH_ITERATE_END(head, elt) \
  512         (elt) = (net_hash_entry_t) queue_next((queue_entry_t) (elt));      \
  513         } while ((elt) != (head));
  514 
  515 
  516 #define FILTER_ITERATE(ifp, fp, nextfp) \
  517         for ((fp) = (net_rcv_port_t) queue_first(&(ifp)->if_rcv_port_list);\
  518              !queue_end(&(ifp)->if_rcv_port_list, (queue_entry_t)(fp));    \
  519              (fp) = (nextfp)) {                                            \
  520                 (nextfp) = (net_rcv_port_t) queue_next(&(fp)->chain);
  521 #define FILTER_ITERATE_END }
  522 
  523 /* entry_p must be net_rcv_port_t or net_hash_entry_t */
  524 #define ENQUEUE_DEAD(dead, entry_p) { \
  525         queue_next(&(entry_p)->chain) = (queue_entry_t) (dead); \
  526         (dead) = (queue_entry_t)(entry_p);                      \
  527 }
  528 
  529 int
  530 net_add_q_info (
  531         ipc_port_t      rcv_port);      /* forward */
  532 void
  533 net_free_dead_infp (
  534         queue_entry_t dead_infp);       /* forward */
  535 void
  536 net_free_dead_entp (
  537         queue_entry_t dead_entp);       /* forward */
  538 
  539 boolean_t
  540 hash_ent_remove (
  541     struct ifnet        *ifp,
  542     net_hash_header_t   hp,
  543     int                 used,
  544     net_hash_entry_t    *head,
  545     net_hash_entry_t    entp,
  546     queue_entry_t       *dead_p);       /* forward */
  547 
  548 boolean_t
  549 net_do_filter(                          /* CSPF */
  550         net_rcv_port_t  infp,
  551         char *          data,
  552         unsigned int    data_count,
  553         char *          header);
  554 int
  555 bpf_do_filter(                          /* BPF */
  556         net_rcv_port_t  infp,
  557         char *          p,              /* packet data */
  558         unsigned int    wirelen,        /* data_count (in bytes) */
  559         char *          header,
  560         net_hash_entry_t **hash_headpp,
  561         net_hash_entry_t *entpp);       /* out */
  562 
  563 
  564 /*
  565  *      ethernet_priority:
  566  *
  567  *      This function properly belongs in the ethernet interfaces;
  568  *      it should not be called by this module.  (We get packet
  569  *      priorities as an argument to net_filter.)  It is here
  570  *      to avoid massive code duplication.
  571  *
  572  *      Returns TRUE for high-priority packets.
  573  */
  574 
  575 boolean_t ethernet_priority(
  576         ipc_kmsg_t kmsg)
  577 {
  578         register unsigned char *addr =
  579                 (unsigned char *) net_kmsg(kmsg)->header;
  580 
  581         /*
  582          *      A simplistic check for broadcast packets.
  583          */
  584 
  585         if ((addr[0] == 0xff) && (addr[1] == 0xff) &&
  586             (addr[2] == 0xff) && (addr[3] == 0xff) &&
  587             (addr[4] == 0xff) && (addr[5] == 0xff))
  588             return FALSE;
  589         else
  590             return TRUE;
  591 }
  592 
  593 mach_msg_type_t header_type = {
  594         MACH_MSG_TYPE_BYTE,
  595         8,
  596         NET_HDW_HDR_MAX,
  597         TRUE,
  598         FALSE,
  599         FALSE,
  600         0
  601 };
  602 
  603 mach_msg_type_t packet_type = {
  604         MACH_MSG_TYPE_BYTE,     /* name */
  605         8,                      /* size */
  606         0,                      /* number */
  607         TRUE,                   /* inline */
  608         FALSE,                  /* longform */
  609         FALSE                   /* deallocate */
  610 };
  611 
  612 /*
  613  *      net_deliver:
  614  *
  615  *      Called and returns holding net_queue_lock, at splimp.
  616  *      Dequeues a message and delivers it at spl0.
  617  *      Returns FALSE if no messages.
  618  */
  619 boolean_t net_deliver(
  620         boolean_t nonblocking)
  621 {
  622         register ipc_kmsg_t kmsg;
  623         boolean_t high_priority;
  624         struct ipc_kmsg_queue send_list;
  625 
  626         /*
  627          * Pick up a pending network message and deliver it.
  628          * Deliver high priority messages before low priority.
  629          */
  630 
  631         if ((kmsg = ipc_kmsg_dequeue(&net_queue_high)) != IKM_NULL) {
  632             net_queue_high_size--;
  633             high_priority = TRUE;
  634         } else if ((kmsg = ipc_kmsg_dequeue(&net_queue_low)) != IKM_NULL) {
  635             net_queue_low_size--;
  636             high_priority = FALSE;
  637         } else
  638             return FALSE;
  639         simple_unlock(&net_queue_lock);
  640         (void) spl0();
  641 
  642         /*
  643          * Run the packet through the filters,
  644          * getting back a queue of packets to send.
  645          */
  646         net_filter(kmsg, &send_list);
  647 
  648         if (!nonblocking) {
  649             /*
  650              * There is a danger of running out of available buffers
  651              * because they all get moved into the high priority queue
  652              * or a port queue.  In particular, we might need to
  653              * allocate more buffers as we pull (previously available)
  654              * buffers out of the low priority queue.  But we can only
  655              * allocate if we are allowed to block.
  656              */
  657             net_kmsg_more();
  658         }
  659 
  660         while ((kmsg = ipc_kmsg_dequeue(&send_list)) != IKM_NULL) {
  661             int count;
  662 
  663             /*
  664              * Fill in the rest of the kmsg.
  665              */
  666             count = net_kmsg(kmsg)->net_rcv_msg_packet_count;
  667 
  668             ikm_init_special(kmsg, IKM_SIZE_NETWORK);
  669 
  670             kmsg->ikm_header.msgh_bits =
  671                     MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, 0);
  672             /* remember message sizes must be rounded up */
  673             kmsg->ikm_header.msgh_size =
  674                     (((mach_msg_size_t) (sizeof(struct net_rcv_msg)
  675                                         - NET_RCV_MAX + count))+3) &~ 3;
  676             kmsg->ikm_header.msgh_local_port = MACH_PORT_NULL;
  677             kmsg->ikm_header.msgh_kind = MACH_MSGH_KIND_NORMAL;
  678             kmsg->ikm_header.msgh_id = NET_RCV_MSG_ID;
  679 
  680             net_kmsg(kmsg)->header_type = header_type;
  681             net_kmsg(kmsg)->packet_type = packet_type;
  682             net_kmsg(kmsg)->net_rcv_msg_packet_count = count;
  683 
  684             /*
  685              * Send the packet to the destination port.  Drop it
  686              * if the destination port is over its backlog.
  687              */
  688 
  689             if (ipc_mqueue_send(kmsg, MACH_SEND_TIMEOUT, 0) ==
  690                                                     MACH_MSG_SUCCESS) {
  691                 if (high_priority)
  692                     net_kmsg_send_high_hits++;
  693                 else
  694                     net_kmsg_send_low_hits++;
  695                 /* the receiver is responsible for the message now */
  696             } else {
  697                 if (high_priority)
  698                     net_kmsg_send_high_misses++;
  699                 else
  700                     net_kmsg_send_low_misses++;
  701                 ipc_kmsg_destroy(kmsg);
  702             }
  703         }
  704 
  705         /*
  706          *      Handle other ASTs here if blocking.  Must
  707          *      pay particular attention to the timer queue.
  708          *
  709          * ***  This will call net_ast() if AST_NETWORK is
  710          *      set.  However, since net_thread_awake should
  711          *      also be set if nonblocking, net_ast will not
  712          *      recursively call net_deliver().
  713          *
  714          * ***  We should really just check AST_TIMER.
  715          */
  716         if (!nonblocking) {
  717             AST_KERNEL_CHECK(cpu_number());
  718         }
  719 
  720         (void) splimp();
  721         simple_lock(&net_queue_lock);
  722         return TRUE;
  723 }
  724 
  725 /*
  726  *      We want to deliver packets using ASTs, so we can avoid the
  727  *      thread_wakeup/thread_block needed to get to the network
  728  *      thread.  However, we can't allocate memory in the AST handler,
  729  *      because memory allocation might block.  Hence we have the
  730  *      network thread to allocate memory.  The network thread also
  731  *      delivers packets, so it can be allocating and delivering for a
  732  *      burst.  net_thread_awake is protected by net_queue_lock
  733  *      (instead of net_queue_free_lock) so that net_packet and
  734  *      net_ast can safely determine if the network thread is running.
  735  *      This prevents a race that might leave a packet sitting without
  736  *      being delivered.  It is possible for net_kmsg_get to think
  737  *      the network thread is awake, and so avoid a wakeup, and then
  738  *      have the network thread sleep without allocating.  The next
  739  *      net_kmsg_get will do a wakeup.
  740  */
  741 
  742 void net_ast(void)
  743 {
  744         spl_t s;
  745 
  746         net_ast_taken++;
  747 
  748         /*
  749          *      If the network thread is awake, then we would
  750          *      rather deliver messages from it, because
  751          *      it can also allocate memory.
  752          */
  753 
  754         s = splimp();
  755         simple_lock(&net_queue_lock);
  756         while (!net_thread_awake && net_deliver(TRUE))
  757                 continue;
  758 
  759         /*
  760          *      Prevent an unnecessary AST.  Either the network
  761          *      thread will deliver the messages, or there are
  762          *      no messages left to deliver.
  763          */
  764 
  765         simple_unlock(&net_queue_lock);
  766         (void) splsched();
  767         ast_off(cpu_number(), AST_NETWORK);
  768         splx(s);
  769 }
  770 
  771 no_return net_thread_continue(void)
  772 {
  773         for (;;) {
  774                 spl_t s;
  775 
  776                 net_thread_awaken++;
  777 
  778                 /*
  779                  *      First get more buffers.
  780                  */
  781                 net_kmsg_more();
  782 
  783                 s = splimp();
  784                 simple_lock(&net_queue_lock);
  785                 while (net_deliver(FALSE))
  786                         continue;
  787 
  788                 net_thread_awake = FALSE;
  789                 assert_wait(&net_thread_awake, FALSE);
  790                 simple_unlock(&net_queue_lock);
  791                 splx(s);
  792                 counter(c_net_thread_block++);
  793                 thread_block(net_thread_continue);
  794                 /*NOTREACHED*/
  795         }
  796 }
  797 
  798 no_return net_thread(void)
  799 {
  800         spl_t s;
  801 
  802         /*
  803          *      We should be very high priority.
  804          */
  805 
  806         thread_set_own_priority(0);
  807 
  808         /*
  809          *      We sleep initially, so that we don't allocate any buffers
  810          *      unless the network is really in use and they are needed.
  811          */
  812 
  813         s = splimp();
  814         simple_lock(&net_queue_lock);
  815         net_thread_awake = FALSE;
  816         assert_wait(&net_thread_awake, FALSE);
  817         simple_unlock(&net_queue_lock);
  818         splx(s);
  819         counter(c_net_thread_block++);
  820         thread_block_noreturn(net_thread_continue);
  821         /*NOTREACHED*/
  822 }
  823 
  824 void
  825 reorder_queue(
  826         register queue_t        first,
  827         register queue_t        last)
  828 {
  829         register queue_entry_t  prev, next;
  830 
  831         prev = first->prev;
  832         next = last->next;
  833 
  834         prev->next = last;
  835         next->prev = first;
  836 
  837         last->prev = prev;
  838         last->next = first;
  839 
  840         first->next = next;
  841         first->prev = last;
  842 }
  843 
  844 /*
  845  * Incoming packet.  Header has already been moved to proper place.
  846  * We are already at splimp.
  847  */
  848 void
  849 net_packet(
  850         register struct ifnet   *ifp,
  851         register ipc_kmsg_t     kmsg,
  852         unsigned int            count,
  853         boolean_t               priority)
  854 {
  855         boolean_t awake;
  856 
  857 #if     NORMA_ETHER
  858         if (netipc_net_packet(kmsg, count)) {
  859                 return;
  860         }
  861 #endif  /* NORMA_ETHER */
  862 
  863 #if     MACH_TTD
  864         /*
  865          * Do a quick check to see if it is a kernel TTD packet.
  866          *
  867          * Only check if KernelTTD is enabled, ie. the current
  868          * device driver supports TTD, and the bootp succeded.
  869          */
  870         if (kttd_enabled && kttd_handle_async(kmsg)) {
  871                 /* 
  872                  * Packet was a valid ttd packet and
  873                  * doesn't need to be passed up to filter.
  874                  * The ttd code put the used kmsg buffer
  875                  * back onto the free list.
  876                  */
  877                 if (kttd_debug)
  878                         printf("**%x**", kttd_async_counter++);
  879                 return;
  880         }
  881 #endif  /* MACH_TTD */
  882 
  883         kmsg->ikm_header.msgh_remote_port = (mach_port_t) ifp;
  884         net_kmsg(kmsg)->net_rcv_msg_packet_count = count;
  885 
  886         simple_lock(&net_queue_lock);
  887         if (priority) {
  888             ipc_kmsg_enqueue(&net_queue_high, kmsg);
  889             if (++net_queue_high_size > net_queue_high_max)
  890                 net_queue_high_max = net_queue_high_size;
  891         } else {
  892             ipc_kmsg_enqueue(&net_queue_low, kmsg);
  893             if (++net_queue_low_size > net_queue_low_max)
  894                 net_queue_low_max = net_queue_low_size;
  895         }
  896         /*
  897          *      If the network thread is awake, then we don't
  898          *      need to take an AST, because the thread will
  899          *      deliver the packet.
  900          */
  901         awake = net_thread_awake;
  902         simple_unlock(&net_queue_lock);
  903 
  904         if (!awake) {
  905             spl_t s = splsched();
  906             ast_on(cpu_number(), AST_NETWORK);
  907             splx(s);
  908         }
  909 }
  910 
  911 int net_filter_queue_reorder = 0; /* non-zero to enable reordering */
  912 
  913 /*
  914  * Run a packet through the filters, returning a list of messages.
  915  * We are *not* called at interrupt level.
  916  */
  917 void
  918 net_filter(
  919         register ipc_kmsg_t     kmsg,
  920         ipc_kmsg_queue_t        send_list)
  921 {
  922         register struct ifnet   *ifp;
  923         register net_rcv_port_t infp, nextfp;
  924         register ipc_kmsg_t     new_kmsg;
  925 
  926         net_hash_entry_t        entp, *hash_headp;
  927         ipc_port_t              dest;
  928         queue_entry_t           dead_infp = (queue_entry_t) 0;
  929         queue_entry_t           dead_entp = (queue_entry_t) 0;
  930         unsigned int            ret_count;
  931 
  932         int count = net_kmsg(kmsg)->net_rcv_msg_packet_count;
  933         ifp = (struct ifnet *) kmsg->ikm_header.msgh_remote_port;
  934         ipc_kmsg_queue_init(send_list);
  935 
  936         /*
  937          * Unfortunately we can't allocate or deallocate memory
  938          * while holding this lock.  And we can't drop the lock
  939          * while examining the filter list.
  940          */
  941         simple_lock(&ifp->if_rcv_port_list_lock);
  942         FILTER_ITERATE(ifp, infp, nextfp)
  943         {
  944             entp = (net_hash_entry_t) 0;
  945             if (infp->filter[0] == NETF_BPF) {
  946                 ret_count = bpf_do_filter(infp, net_kmsg(kmsg)->packet, count,
  947                                           net_kmsg(kmsg)->header,
  948                                           &hash_headp, &entp);
  949                 if (entp == (net_hash_entry_t) 0)
  950                   dest = infp->rcv_port;
  951                 else
  952                   dest = entp->rcv_port;
  953             } else {
  954                 ret_count = net_do_filter(infp, net_kmsg(kmsg)->packet, count,
  955                                           net_kmsg(kmsg)->header);
  956                 if (ret_count)
  957                     ret_count = count;
  958                 dest = infp->rcv_port;
  959             }               
  960 
  961             if (ret_count) {
  962 
  963                 /*
  964                  * Make a send right for the destination.
  965                  */
  966 
  967                 dest = ipc_port_copy_send(dest);
  968                 if (!IP_VALID(dest)) {
  969                     /*
  970                      * This filter is dead.  We remove it from the
  971                      * filter list and set it aside for deallocation.
  972                      */
  973 
  974                     if (entp == (net_hash_entry_t) 0) {
  975                         queue_remove(&ifp->if_rcv_port_list, infp,
  976                                      net_rcv_port_t, chain);
  977                         ENQUEUE_DEAD(dead_infp, infp);
  978                         continue;
  979                     } else {
  980                         (void) hash_ent_remove (
  981                                          ifp,
  982                                          (net_hash_header_t)infp,
  983                                          FALSE,         /* no longer used */
  984                                          hash_headp,
  985                                          entp,
  986                                          &dead_entp);
  987                         continue;
  988                     }
  989                 }
  990 
  991                 /*
  992                  * Deliver copy of packet to this channel.
  993                  */
  994                 if (ipc_kmsg_queue_empty(send_list)) {
  995                     /*
  996                      * Only receiver, so far
  997                      */
  998                     new_kmsg = kmsg;
  999                 } else {
 1000                     /*
 1001                      * Other receivers - must allocate message and copy.
 1002                      */
 1003                     new_kmsg = net_kmsg_get();
 1004                     if (new_kmsg == IKM_NULL) {
 1005                         ipc_port_release_send(dest);
 1006                         break;
 1007                     }
 1008 
 1009                     bcopy(
 1010                         net_kmsg(kmsg)->packet,
 1011                         net_kmsg(new_kmsg)->packet,
 1012                         ret_count);
 1013                     bcopy(
 1014                         net_kmsg(kmsg)->header,
 1015                         net_kmsg(new_kmsg)->header,
 1016                         NET_HDW_HDR_MAX);
 1017                 }
 1018                 net_kmsg(new_kmsg)->net_rcv_msg_packet_count = ret_count;
 1019                 new_kmsg->ikm_header.msgh_remote_port = (mach_port_t) dest;
 1020                 ipc_kmsg_enqueue(send_list, new_kmsg);
 1021 
 1022             {
 1023                 register net_rcv_port_t prevfp;
 1024                 int rcount = ++infp->rcv_count;
 1025 
 1026                 /*
 1027                  * See if ordering of filters is wrong
 1028                  */
 1029                 if (infp->priority >= NET_HI_PRI) {
 1030                     prevfp = (net_rcv_port_t) queue_prev(&infp->chain);
 1031                     /*
 1032                      * If infp is not the first element on the queue,
 1033                      * and the previous element is at equal priority
 1034                      * but has a lower count, then promote infp to
 1035                      * be in front of prevfp.
 1036                      */
 1037                     if ((queue_t)prevfp != &ifp->if_rcv_port_list &&
 1038                         infp->priority == prevfp->priority) {
 1039                         /*
 1040                          * Threshold difference to prevent thrashing
 1041                          */
 1042                         if (net_filter_queue_reorder
 1043                             && (100 + prevfp->rcv_count < rcount))
 1044                                 reorder_queue(&prevfp->chain, &infp->chain);
 1045                     }
 1046                     /*
 1047                      * High-priority filter -> no more deliveries
 1048                      */
 1049                     break;
 1050                 }
 1051             }
 1052             }
 1053         }
 1054         FILTER_ITERATE_END
 1055 
 1056         simple_unlock(&ifp->if_rcv_port_list_lock);
 1057 
 1058         /*
 1059          * Deallocate dead filters.
 1060          */
 1061         if (dead_infp != 0)
 1062                 net_free_dead_infp(dead_infp);
 1063         if (dead_entp != 0)
 1064                 net_free_dead_entp(dead_entp);
 1065 
 1066         if (ipc_kmsg_queue_empty(send_list)) {
 1067             /* Not sent - recycle */
 1068             net_kmsg_put(kmsg);
 1069         }
 1070 }
 1071 
 1072 boolean_t
 1073 net_do_filter(
 1074         net_rcv_port_t  infp,
 1075         char *          data,
 1076         unsigned int    data_count,
 1077         char *          header)
 1078 {
 1079         int             stack[NET_FILTER_STACK_DEPTH+1];
 1080         register int    *sp;
 1081         register filter_t       *fp, *fpe;
 1082         register unsigned int   op, arg;
 1083 
 1084         /*
 1085          * The filter accesses the header and data
 1086          * as unsigned short words.
 1087          */
 1088         data_count /= sizeof(unsigned short);
 1089 
 1090 #define data_word       ((unsigned short *)data)
 1091 #define header_word     ((unsigned short *)header)
 1092 
 1093         sp = &stack[NET_FILTER_STACK_DEPTH];
 1094         fp = &infp->filter[0];
 1095         fpe = infp->filter_end;
 1096 
 1097         *sp = TRUE;
 1098 
 1099         while (fp < fpe) {
 1100             arg = *fp++;
 1101             op = NETF_OP(arg);
 1102             arg = NETF_ARG(arg);
 1103 
 1104             switch (arg) {
 1105                 case NETF_NOPUSH:
 1106                     arg = *sp++;
 1107                     break;
 1108                 case NETF_PUSHZERO:
 1109                     arg = 0;
 1110                     break;
 1111                 case NETF_PUSHLIT:
 1112                     arg = *fp++;
 1113                     break;
 1114                 case NETF_PUSHIND:
 1115                     arg = *sp++;
 1116                     if (arg >= data_count)
 1117                         return FALSE;
 1118                     arg = data_word[arg];
 1119                     break;
 1120                 case NETF_PUSHHDRIND:
 1121                     arg = *sp++;
 1122                     if (arg >= NET_HDW_HDR_MAX/sizeof(unsigned short))
 1123                         return FALSE;
 1124                     arg = header_word[arg];
 1125                     break;
 1126                 default:
 1127                     if (arg >= NETF_PUSHSTK) {
 1128                         arg = sp[arg - NETF_PUSHSTK];
 1129                     }
 1130                     else if (arg >= NETF_PUSHHDR) {
 1131                         arg = header_word[arg - NETF_PUSHHDR];
 1132                     }
 1133                     else {
 1134                         arg -= NETF_PUSHWORD;
 1135                         if (arg >= data_count)
 1136                             return FALSE;
 1137                         arg = data_word[arg];
 1138                     }
 1139                     break;
 1140 
 1141             }
 1142             switch (op) {
 1143                 case NETF_OP(NETF_NOP):
 1144                     *--sp = arg;
 1145                     break;
 1146                 case NETF_OP(NETF_AND):
 1147                     *sp &= arg;
 1148                     break;
 1149                 case NETF_OP(NETF_OR):
 1150                     *sp |= arg;
 1151                     break;
 1152                 case NETF_OP(NETF_XOR):
 1153                     *sp ^= arg;
 1154                     break;
 1155                 case NETF_OP(NETF_EQ):
 1156                     *sp = (*sp == arg);
 1157                     break;
 1158                 case NETF_OP(NETF_NEQ):
 1159                     *sp = (*sp != arg);
 1160                     break;
 1161                 case NETF_OP(NETF_LT):
 1162                     *sp = (*sp < arg);
 1163                     break;
 1164                 case NETF_OP(NETF_LE):
 1165                     *sp = (*sp <= arg);
 1166                     break;
 1167                 case NETF_OP(NETF_GT):
 1168                     *sp = (*sp > arg);
 1169                     break;
 1170                 case NETF_OP(NETF_GE):
 1171                     *sp = (*sp >= arg);
 1172                     break;
 1173                 case NETF_OP(NETF_COR):
 1174                     if (*sp++ == arg)
 1175                         return TRUE;
 1176                     break;
 1177                 case NETF_OP(NETF_CAND):
 1178                     if (*sp++ != arg)
 1179                         return FALSE;
 1180                     break;
 1181                 case NETF_OP(NETF_CNOR):
 1182                     if (*sp++ == arg)
 1183                         return FALSE;
 1184                     break;
 1185                 case NETF_OP(NETF_CNAND):
 1186                     if (*sp++ != arg)
 1187                         return TRUE;
 1188                     break;
 1189                 case NETF_OP(NETF_LSH):
 1190                     *sp <<= arg;
 1191                     break;
 1192                 case NETF_OP(NETF_RSH):
 1193                     *sp >>= arg;
 1194                     break;
 1195                 case NETF_OP(NETF_ADD):
 1196                     *sp += arg;
 1197                     break;
 1198                 case NETF_OP(NETF_SUB):
 1199                     *sp -= arg;
 1200                     break;
 1201             }
 1202         }
 1203         return (*sp) ? TRUE : FALSE;
 1204 
 1205 #undef  data_word
 1206 #undef  header_word
 1207 }
 1208 
 1209 /*
 1210  * Check filter for invalid operations or stack over/under-flow.
 1211  */
 1212 boolean_t
 1213 parse_net_filter(
 1214         register filter_t       *filter,
 1215         unsigned int            count)
 1216 {
 1217         register int    sp;
 1218         register filter_t       *fpe = &filter[count];
 1219         register filter_t       op, arg;
 1220 
 1221         sp = NET_FILTER_STACK_DEPTH;
 1222 
 1223         for (; filter < fpe; filter++) {
 1224             op = NETF_OP(*filter);
 1225             arg = NETF_ARG(*filter);
 1226 
 1227             switch (arg) {
 1228                 case NETF_NOPUSH:
 1229                     break;
 1230                 case NETF_PUSHZERO:
 1231                     sp--;
 1232                     break;
 1233                 case NETF_PUSHLIT:
 1234                     filter++;
 1235                     if (filter >= fpe)
 1236                         return FALSE;   /* literal value not in filter */
 1237                     sp--;
 1238                     break;
 1239                 case NETF_PUSHIND:
 1240                 case NETF_PUSHHDRIND:
 1241                     break;
 1242                 default:
 1243                     if (arg >= NETF_PUSHSTK) {
 1244                         if (arg - NETF_PUSHSTK + sp > NET_FILTER_STACK_DEPTH)
 1245                             return FALSE;
 1246                     }
 1247                     else if (arg >= NETF_PUSHHDR) {
 1248                         if (arg - NETF_PUSHHDR >=
 1249                                 NET_HDW_HDR_MAX/sizeof(unsigned short))
 1250                             return FALSE;
 1251                     }
 1252                     /* else... cannot check for packet bounds
 1253                                 without packet */
 1254                     sp--;
 1255                     break;
 1256             }
 1257             if (sp < 2) {
 1258                 return FALSE;   /* stack overflow */
 1259             }
 1260             if (op == NETF_OP(NETF_NOP))
 1261                 continue;
 1262 
 1263             /*
 1264              * all non-NOP operators are binary.
 1265              */
 1266             if (sp > NET_MAX_FILTER-2)
 1267                 return FALSE;
 1268 
 1269             sp++;
 1270             switch (op) {
 1271                 case NETF_OP(NETF_AND):
 1272                 case NETF_OP(NETF_OR):
 1273                 case NETF_OP(NETF_XOR):
 1274                 case NETF_OP(NETF_EQ):
 1275                 case NETF_OP(NETF_NEQ):
 1276                 case NETF_OP(NETF_LT):
 1277                 case NETF_OP(NETF_LE):
 1278                 case NETF_OP(NETF_GT):
 1279                 case NETF_OP(NETF_GE):
 1280                 case NETF_OP(NETF_COR):
 1281                 case NETF_OP(NETF_CAND):
 1282                 case NETF_OP(NETF_CNOR):
 1283                 case NETF_OP(NETF_CNAND):
 1284                 case NETF_OP(NETF_LSH):
 1285                 case NETF_OP(NETF_RSH):
 1286                 case NETF_OP(NETF_ADD):
 1287                 case NETF_OP(NETF_SUB):
 1288                     break;
 1289                 default:
 1290                     return FALSE;
 1291             }
 1292         }
 1293         return TRUE;
 1294 }
 1295 
 1296 /*
 1297  * Set a filter for a network interface.
 1298  *
 1299  * We are given a naked send right for the rcv_port.
 1300  * If we are successful, we must consume that right.
 1301  */
 1302 io_return_t
 1303 net_set_filter(
 1304         struct ifnet    *ifp,
 1305         ipc_port_t      rcv_port,
 1306         int             priority,
 1307         filter_t        *filter,
 1308         unsigned int    filter_count)
 1309 {
 1310     int                         filter_bytes;
 1311     bpf_insn_t                  match;
 1312     register net_rcv_port_t     infp, my_infp;
 1313     net_rcv_port_t              nextfp;
 1314     net_hash_header_t           hhp;
 1315     register net_hash_entry_t   entp, hash_entp;
 1316     net_hash_entry_t            *head, nextentp;
 1317     queue_entry_t               dead_infp, dead_entp;
 1318     int                         i;
 1319     int                         ret, is_new_infp;
 1320     io_return_t                 rval;
 1321 
 1322     /*
 1323      * Check the filter syntax.
 1324      */
 1325 
 1326     filter_bytes = CSPF_BYTES(filter_count);
 1327     match = (bpf_insn_t) 0;
 1328 
 1329     if (filter_count > 0 && filter[0] == NETF_BPF) {
 1330         ret = bpf_validate((bpf_insn_t)filter, filter_bytes, &match);
 1331         if (!ret)
 1332             return D_INVALID_OPERATION;
 1333     } else {
 1334         if (!parse_net_filter(filter, filter_count))
 1335             return D_INVALID_OPERATION;
 1336     }
 1337 
 1338     rval = D_SUCCESS;                   /* default return value */
 1339     dead_infp = dead_entp = 0;
 1340 
 1341     if (match == (bpf_insn_t) 0) {
 1342         /*
 1343          * If there is no match instruction, we allocate
 1344          * a normal packet filter structure.
 1345          */
 1346         my_infp = (net_rcv_port_t) zalloc(net_rcv_zone);
 1347         my_infp->rcv_port = rcv_port;
 1348         is_new_infp = TRUE;
 1349     } else {
 1350         /*
 1351          * If there is a match instruction, we assume there will
 1352          * multiple session with a common substructure and allocate
 1353          * a hash table to deal with them.
 1354          */
 1355         my_infp = 0;
 1356         hash_entp = (net_hash_entry_t) zalloc(net_hash_entry_zone);
 1357         is_new_infp = FALSE;
 1358     }    
 1359 
 1360     /*
 1361      * Look for an existing filter on the same reply port.
 1362      * Look for filters with dead ports (for GC).
 1363      * Look for a filter with the same code except KEY insns.
 1364      */
 1365     
 1366     simple_lock(&ifp->if_rcv_port_list_lock);
 1367     
 1368     FILTER_ITERATE(ifp, infp, nextfp)
 1369     {
 1370             if (infp->rcv_port == MACH_PORT_NULL) {
 1371                     if (match != 0
 1372                         && infp->priority == priority
 1373                         && my_infp == 0
 1374                         && (infp->filter_end - infp->filter) == filter_count
 1375                         && bpf_eq((bpf_insn_t)infp->filter,
 1376                                   (bpf_insn_t)filter, filter_bytes))
 1377                             {
 1378                                     my_infp = infp;
 1379                             }
 1380 
 1381                     for (i = 0; i < NET_HASH_SIZE; i++) {
 1382                             head = &((net_hash_header_t) infp)->table[i];
 1383                             if (*head == 0)
 1384                                     continue;
 1385 
 1386                             /*
 1387                              * Check each hash entry to make sure the
 1388                              * destination port is still valid.  Remove
 1389                              * any invalid entries.
 1390                              */
 1391                             entp = *head;
 1392                             do {
 1393                                     nextentp = (net_hash_entry_t) entp->he_next;
 1394   
 1395                                     /* checked without 
 1396                                        ip_lock(entp->rcv_port) */
 1397                                     if (entp->rcv_port == rcv_port
 1398                                         || !IP_VALID(entp->rcv_port)
 1399                                         || !ip_active(entp->rcv_port)) {
 1400                                 
 1401                                             ret = hash_ent_remove (ifp,
 1402                                                 (net_hash_header_t)infp,
 1403                                                 (my_infp == infp),
 1404                                                 head,
 1405                                                 entp,
 1406                                                 &dead_entp);
 1407                                             if (ret)
 1408                                                     goto hash_loop_end;
 1409                                     }
 1410                         
 1411                                     entp = nextentp;
 1412                             /* While test checks head since hash_ent_remove
 1413                                might modify it.
 1414                                */
 1415                             } while (*head != 0 && entp != *head);
 1416                     }
 1417                 hash_loop_end:
 1418                     ;
 1419                     
 1420             } else if (infp->rcv_port == rcv_port
 1421                        || !IP_VALID(infp->rcv_port)
 1422                        || !ip_active(infp->rcv_port)) {
 1423                     /* Remove the old filter from list */
 1424                     remqueue(&ifp->if_rcv_port_list, (queue_entry_t)infp);
 1425                     ENQUEUE_DEAD(dead_infp, infp);
 1426             }
 1427     }
 1428     FILTER_ITERATE_END
 1429 
 1430     if (my_infp == 0) {
 1431         /* Allocate a dummy infp */
 1432         simple_lock(&net_hash_header_lock);
 1433         for (i = 0; i < N_NET_HASH; i++) {
 1434             if (filter_hash_header[i].n_keys == 0)
 1435                 break;
 1436         }
 1437         if (i == N_NET_HASH) {
 1438             simple_unlock(&net_hash_header_lock);
 1439             simple_unlock(&ifp->if_rcv_port_list_lock);
 1440 
 1441             ipc_port_release_send(rcv_port);
 1442             if (match != 0)
 1443                     zfree (net_hash_entry_zone, (vm_offset_t)hash_entp);
 1444 
 1445             rval = D_NO_MEMORY;
 1446             goto clean_and_return;
 1447         }
 1448 
 1449         hhp = &filter_hash_header[i];
 1450         hhp->n_keys = match->jt;
 1451         simple_unlock(&net_hash_header_lock);
 1452 
 1453         hhp->ref_count = 0;
 1454         for (i = 0; i < NET_HASH_SIZE; i++)
 1455             hhp->table[i] = 0;
 1456 
 1457         my_infp = (net_rcv_port_t)hhp;
 1458         my_infp->rcv_port = MACH_PORT_NULL;     /* indication of dummy */
 1459         is_new_infp = TRUE;
 1460     }
 1461 
 1462     if (is_new_infp) {
 1463         my_infp->priority = priority;
 1464         my_infp->rcv_count = 0;
 1465 
 1466         /* Copy filter program. */
 1467         bcopy (filter, my_infp->filter, filter_bytes);
 1468         my_infp->filter_end =
 1469             (filter_t *)((char *)my_infp->filter + filter_bytes);
 1470 
 1471         if (match == 0) {
 1472             my_infp->rcv_qlimit = net_add_q_info(rcv_port);
 1473         } else {
 1474             my_infp->rcv_qlimit = 0;
 1475         }
 1476 
 1477         /* Insert my_infp according to priority */
 1478         queue_iterate(&ifp->if_rcv_port_list, infp, net_rcv_port_t, chain)
 1479             if (priority > infp->priority)
 1480                 break;
 1481         enqueue_tail((queue_t)&infp->chain, (queue_entry_t)my_infp);
 1482     }
 1483     
 1484     if (match != 0)
 1485     {       /* Insert to hash list */
 1486         net_hash_entry_t *p;
 1487         
 1488         hash_entp->rcv_port = rcv_port;
 1489         for (i = 0; i < match->jt; i++)         /* match->jt is n_keys */
 1490             hash_entp->keys[i] = match[i+1].k;
 1491         p = &((net_hash_header_t)my_infp)->
 1492                         table[bpf_hash(match->jt, hash_entp->keys)];
 1493         
 1494         /* Not checking for the same key values */
 1495         if (*p == 0) {
 1496             queue_init ((queue_t) hash_entp);
 1497             *p = hash_entp;
 1498         } else {
 1499             enqueue_tail((queue_t) *p, (queue_entry_t) hash_entp);
 1500         }
 1501 
 1502         ((net_hash_header_t)my_infp)->ref_count++;
 1503         hash_entp->rcv_qlimit = net_add_q_info(rcv_port);
 1504 
 1505     }
 1506     
 1507     simple_unlock(&ifp->if_rcv_port_list_lock);
 1508 
 1509 clean_and_return:
 1510     /* No locks are held at this point. */
 1511 
 1512     if (dead_infp != 0)
 1513             net_free_dead_infp(dead_infp);
 1514     if (dead_entp != 0)
 1515             net_free_dead_entp(dead_entp);
 1516     
 1517     return rval;
 1518 }
 1519 
 1520 /*
 1521  * Other network operations
 1522  */
 1523 io_return_t
 1524 net_getstat(
 1525         struct ifnet    *ifp,
 1526         dev_flavor_t    flavor,
 1527         dev_status_t    status,         /* pointer to OUT array */
 1528         natural_t       *count)         /* OUT */
 1529 {
 1530         switch (flavor) {
 1531             case NET_STATUS:
 1532             {
 1533                 register struct net_status *ns = (struct net_status *)status;
 1534 
 1535                 if (*count < NET_STATUS_COUNT)
 1536                     return D_INVALID_OPERATION;
 1537                 
 1538                 ns->min_packet_size = ifp->if_header_size;
 1539                 ns->max_packet_size = ifp->if_header_size + ifp->if_mtu;
 1540                 ns->header_format   = ifp->if_header_format;
 1541                 ns->header_size     = ifp->if_header_size;
 1542                 ns->address_size    = ifp->if_address_size;
 1543                 ns->flags           = ifp->if_flags;
 1544                 ns->mapped_size     = 0;
 1545 
 1546                 *count = NET_STATUS_COUNT;
 1547                 break;
 1548             }
 1549             case NET_ADDRESS:
 1550             {
 1551                 register int    addr_byte_count;
 1552                 register int    addr_int_count;
 1553                 register int    i;
 1554 
 1555                 addr_byte_count = ifp->if_address_size;
 1556                 addr_int_count = (addr_byte_count + (sizeof(int)-1))
 1557                                          / sizeof(int);
 1558 
 1559                 if (*count < addr_int_count)
 1560                     return D_INVALID_OPERATION;
 1561 
 1562                 bcopy((char *)ifp->if_address,
 1563                       (char *)status,
 1564                       (unsigned) addr_byte_count);
 1565                 if (addr_byte_count < addr_int_count * sizeof(int))
 1566                     bzero((char *)status + addr_byte_count,
 1567                           (unsigned) (addr_int_count * sizeof(int)
 1568                                       - addr_byte_count));
 1569 
 1570                 for (i = 0; i < addr_int_count; i++) {
 1571                     register int word;
 1572 
 1573                     word = status[i];
 1574                     status[i] = htonl(word);
 1575                 }
 1576                 *count = addr_int_count;
 1577                 break;
 1578             }
 1579             default:
 1580                 return D_INVALID_OPERATION;
 1581         }
 1582         return D_SUCCESS;
 1583 }
 1584 
 1585 io_return_t
 1586 net_write(
 1587         register struct ifnet *ifp,
 1588         void            (*start)(int),
 1589         io_req_t        ior)
 1590 {
 1591         spl_t   s;
 1592         kern_return_t   rc;
 1593         boolean_t       wait;
 1594 
 1595         /*
 1596          * Reject the write if the interface is down.
 1597          */
 1598         if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
 1599             return D_DEVICE_DOWN;
 1600 
 1601         /*
 1602          * Reject the write if the packet is too large or too small.
 1603          */
 1604         if (ior->io_count < ifp->if_header_size ||
 1605             ior->io_count > ifp->if_header_size + ifp->if_mtu)
 1606             return D_INVALID_SIZE;
 1607 
 1608         /*
 1609          * Wire down the memory.
 1610          */
 1611 
 1612         rc = device_write_get(ior, &wait);
 1613         if (rc != KERN_SUCCESS)
 1614             return rc;
 1615 
 1616         /*
 1617          *      Network interfaces can't cope with VM continuations.
 1618          *      If wait is set, just panic.
 1619         */
 1620         if (wait) {
 1621                 panic("net_write: VM continuation");
 1622         }
 1623 
 1624         /*
 1625          * Queue the packet on the output queue, and
 1626          * start the device.
 1627          */
 1628         s = splimp();
 1629         IF_ENQUEUE(&ifp->if_snd, ior);
 1630         (*start)(ifp->if_unit);
 1631         splx(s);
 1632         
 1633         return D_IO_QUEUED;
 1634 }
 1635 
 1636 /*
 1637  * Initialize the whole package.
 1638  */
 1639 void
 1640 net_io_init(void)
 1641 {
 1642         register vm_size_t      size;
 1643 
 1644         size = sizeof(struct net_rcv_port);
 1645         net_rcv_zone = zinit(size,
 1646                              size * 1000,
 1647                              PAGE_SIZE,
 1648                              FALSE,
 1649                              "net_rcv_port");
 1650 
 1651         size = sizeof(struct net_hash_entry);
 1652         net_hash_entry_zone = zinit(size,
 1653                                     size * 100,
 1654                                     PAGE_SIZE,
 1655                                     FALSE,
 1656                                     "net_hash_entry");
 1657 
 1658         size = ikm_plus_overhead(sizeof(struct net_rcv_msg));
 1659         net_kmsg_size = round_page(size);
 1660 
 1661         /*
 1662          *      net_kmsg_max caps the number of buffers
 1663          *      we are willing to allocate.  By default,
 1664          *      we allow for net_queue_free_min plus
 1665          *      the queue limit for each filter.
 1666          *      (Added as the filters are added.)
 1667          */
 1668 
 1669         simple_lock_init(&net_kmsg_total_lock);
 1670         if (net_kmsg_max == 0)
 1671             net_kmsg_max = net_queue_free_min;
 1672 
 1673         simple_lock_init(&net_queue_free_lock);
 1674         ipc_kmsg_queue_init(&net_queue_free);
 1675 
 1676         simple_lock_init(&net_queue_lock);
 1677         ipc_kmsg_queue_init(&net_queue_high);
 1678         ipc_kmsg_queue_init(&net_queue_low);
 1679 
 1680         simple_lock_init(&net_hash_header_lock);
 1681 }
 1682 
 1683 
 1684 /* ======== BPF: Berkeley Packet Filter ======== */
 1685 
 1686 /*-
 1687  * Copyright (c) 1990-1991 The Regents of the University of California.
 1688  * All rights reserved.
 1689  *
 1690  * This code is derived from the Stanford/CMU enet packet filter,
 1691  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
 1692  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 
 1693  * Berkeley Laboratory.
 1694  *
 1695  * Redistribution and use in source and binary forms, with or without
 1696  * modification, are permitted provided that the following conditions
 1697  * are met:
 1698  * 1. Redistributions of source code must retain the above copyright
 1699  *    notice, this list of conditions and the following disclaimer.
 1700  * 2. Redistributions in binary form must reproduce the above copyright
 1701  *    notice, this list of conditions and the following disclaimer in the
 1702  *    documentation and/or other materials provided with the distribution.
 1703  * 3. All advertising materials mentioning features or use of this software
 1704  *    must display the following acknowledgement:
 1705  *      This product includes software developed by the University of
 1706  *      California, Berkeley and its contributors.
 1707  * 4. Neither the name of the University nor the names of its contributors
 1708  *    may be used to endorse or promote products derived from this software
 1709  *    without specific prior written permission.
 1710  *
 1711  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 1712  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 1713  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 1714  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 1715  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 1716  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 1717  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 1718  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 1719  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 1720  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 1721  * SUCH DAMAGE.
 1722  *
 1723  *      @(#)bpf.c       7.5 (Berkeley) 7/15/91
 1724  *
 1725  * static char rcsid[] =
 1726  * "$Header: net_io.c,v 2.30 93/11/17 16:34:06 dbg Exp $";
 1727  */
 1728 #if !(defined(lint) || defined(KERNEL))
 1729 static char rcsid[] =
 1730     "@(#) $Header: net_io.c,v 2.30 93/11/17 16:34:06 dbg Exp $ (LBL)";
 1731 #endif
 1732 
 1733 #if defined(sparc) || defined(mips) || defined(ibm032) || defined(alpha)
 1734 #define BPF_ALIGN
 1735 #endif
 1736 
 1737 #ifndef BPF_ALIGN
 1738 #define EXTRACT_SHORT(p)        ((unsigned short)ntohs(*(unsigned short *)p))
 1739 #define EXTRACT_LONG(p)         ((unsigned int)  ntohl(*(unsigned int *)  p))
 1740 #else
 1741 #define EXTRACT_SHORT(p)\
 1742         ((unsigned short)\
 1743                 ((unsigned short)*((unsigned char *)p+0)<<8|\
 1744                  (unsigned short)*((unsigned char *)p+1)<<0))
 1745 #define EXTRACT_LONG(p)\
 1746                 ((unsigned int)  *((unsigned char *)p+0)<<24|\
 1747                  (unsigned int)  *((unsigned char *)p+1)<<16|\
 1748                  (unsigned int)  *((unsigned char *)p+2)<<8|\
 1749                  (unsigned int)  *((unsigned char *)p+3)<<0)
 1750 #endif
 1751 
 1752 boolean_t
 1753 bpf_match (
 1754         net_hash_header_t hash,
 1755         int n_keys,
 1756         unsigned int *keys,
 1757         net_hash_entry_t **hash_headpp,
 1758         net_hash_entry_t *entpp);       /* forward */
 1759 
 1760 /*
 1761  * Execute the filter program starting at pc on the packet p
 1762  * wirelen is the length of the original packet
 1763  * buflen is the amount of data present
 1764  */
 1765 
 1766 int
 1767 bpf_do_filter(
 1768         net_rcv_port_t  infp,
 1769         char *          p,              /* packet data */
 1770         unsigned int    wirelen,        /* data_count (in bytes) */
 1771         char *          header,
 1772         net_hash_entry_t **hash_headpp,
 1773         net_hash_entry_t *entpp)        /* out */
 1774 {
 1775         register bpf_insn_t pc, pc_end;
 1776         register unsigned int buflen;
 1777 
 1778         register unsigned int A, X;
 1779         register int k;
 1780         unsigned int mem[BPF_MEMWORDS];
 1781 
 1782         pc = ((bpf_insn_t) infp->filter) + 1;
 1783                                         /* filter[0].code is BPF_BEGIN */
 1784         pc_end = (bpf_insn_t)infp->filter_end;
 1785         buflen = NET_RCV_MAX;
 1786         *entpp = 0;                     /* default */
 1787 
 1788 #ifdef lint
 1789         A = 0;
 1790         X = 0;
 1791 #endif
 1792         for (; pc < pc_end; ++pc) {
 1793                 switch (pc->code) {
 1794 
 1795                 default:
 1796 #ifdef KERNEL
 1797                         return 0;
 1798 #else
 1799                         abort();
 1800 #endif                  
 1801                 case BPF_RET|BPF_K:
 1802                         if (infp->rcv_port == MACH_PORT_NULL &&
 1803                             *entpp == 0) {
 1804                                 return 0;
 1805                         }
 1806                         return ((unsigned int)pc->k <= wirelen)
 1807                                                 ? pc->k : wirelen;
 1808 
 1809                 case BPF_RET|BPF_A:
 1810                         if (infp->rcv_port == MACH_PORT_NULL &&
 1811                             *entpp == 0) {
 1812                                 return 0;
 1813                         }
 1814                         return ((unsigned int)A <= wirelen)
 1815                                                 ? A : wirelen;
 1816 
 1817                 case BPF_RET|BPF_MATCH_IMM:
 1818                         if (bpf_match ((net_hash_header_t)infp, pc->jt, mem,
 1819                                        hash_headpp, entpp)) {
 1820                                 return ((unsigned int)pc->k <= wirelen) ?
 1821                                                         pc->k : wirelen;
 1822                         }
 1823                         return 0;
 1824 
 1825                 case BPF_LD|BPF_W|BPF_ABS:
 1826                         k = pc->k;
 1827                         if ((unsigned int)k + sizeof(int) <= buflen) {
 1828 #ifdef BPF_ALIGN
 1829                                 if (((int)(p + k) & 3) != 0)
 1830                                         A = EXTRACT_LONG(&p[k]);
 1831                                 else
 1832 #endif
 1833                                         A = ntohl(*(int *)(p + k));
 1834                                 continue;
 1835                         }
 1836 
 1837                         k -= BPF_DLBASE;
 1838                         if ((unsigned int)k + sizeof(int) <= NET_HDW_HDR_MAX) {
 1839 #ifdef BPF_ALIGN
 1840                                 if (((int)(header + k) & 3) != 0)
 1841                                         A = EXTRACT_LONG(&header[k]);
 1842                                 else
 1843 #endif
 1844                                         A = ntohl(*(int *)(header + k));
 1845                                 continue;
 1846                         } else {
 1847                                 return 0;
 1848                         }
 1849 
 1850                 case BPF_LD|BPF_H|BPF_ABS:
 1851                         k = pc->k;
 1852                         if ((unsigned int)k + sizeof(short) <= buflen) {
 1853                                 A = EXTRACT_SHORT(&p[k]);
 1854                                 continue;
 1855                         }
 1856 
 1857                         k -= BPF_DLBASE;
 1858                         if ((unsigned int)k + sizeof(short)
 1859                                                 <= NET_HDW_HDR_MAX)
 1860                         {
 1861                                 A = EXTRACT_SHORT(&header[k]);
 1862                                 continue;
 1863                         } else {
 1864                                 return 0;
 1865                         }
 1866 
 1867                 case BPF_LD|BPF_B|BPF_ABS:
 1868                         k = pc->k;
 1869                         if ((unsigned int)k < buflen) {
 1870                                 A = p[k];
 1871                                 continue;
 1872                         }
 1873                         
 1874                         k -= BPF_DLBASE;
 1875                         if ((unsigned int)k < NET_HDW_HDR_MAX) {
 1876                                 A = header[k];
 1877                                 continue;
 1878                         } else {
 1879                                 return 0;
 1880                         }
 1881 
 1882                 case BPF_LD|BPF_W|BPF_LEN:
 1883                         A = wirelen;
 1884                         continue;
 1885 
 1886                 case BPF_LDX|BPF_W|BPF_LEN:
 1887                         X = wirelen;
 1888                         continue;
 1889 
 1890                 case BPF_LD|BPF_W|BPF_IND:
 1891                         k = X + pc->k;
 1892                         if (k + sizeof(int) > buflen)
 1893                                 return 0;
 1894 #ifdef BPF_ALIGN
 1895                         if (((int)(p + k) & 3) != 0)
 1896                                 A = EXTRACT_LONG(&p[k]);
 1897                         else
 1898 #endif
 1899                                 A = ntohl(*(int *)(p + k));
 1900                         continue;
 1901 
 1902                 case BPF_LD|BPF_H|BPF_IND:
 1903                         k = X + pc->k;
 1904                         if (k + sizeof(short) > buflen)
 1905                                 return 0;
 1906                         A = EXTRACT_SHORT(&p[k]);
 1907                         continue;
 1908 
 1909                 case BPF_LD|BPF_B|BPF_IND:
 1910                         k = X + pc->k;
 1911                         if (k >= buflen)
 1912                                 return 0;
 1913                         A = p[k];
 1914                         continue;
 1915 
 1916                 case BPF_LDX|BPF_MSH|BPF_B:
 1917                         k = pc->k;
 1918                         if (k >= buflen)
 1919                                 return 0;
 1920                         X = (p[pc->k] & 0xf) << 2;
 1921                         continue;
 1922 
 1923                 case BPF_LD|BPF_IMM:
 1924                         A = pc->k;
 1925                         continue;
 1926 
 1927                 case BPF_LDX|BPF_IMM:
 1928                         X = pc->k;
 1929                         continue;
 1930 
 1931                 case BPF_LD|BPF_MEM:
 1932                         A = mem[pc->k];
 1933                         continue;
 1934                         
 1935                 case BPF_LDX|BPF_MEM:
 1936                         X = mem[pc->k];
 1937                         continue;
 1938 
 1939                 case BPF_ST:
 1940                         mem[pc->k] = A;
 1941                         continue;
 1942 
 1943                 case BPF_STX:
 1944                         mem[pc->k] = X;
 1945                         continue;
 1946 
 1947                 case BPF_JMP|BPF_JA:
 1948                         pc += pc->k;
 1949                         continue;
 1950 
 1951                 case BPF_JMP|BPF_JGT|BPF_K:
 1952                         pc += (A > pc->k) ? pc->jt : pc->jf;
 1953                         continue;
 1954 
 1955                 case BPF_JMP|BPF_JGE|BPF_K:
 1956                         pc += (A >= pc->k) ? pc->jt : pc->jf;
 1957                         continue;
 1958 
 1959                 case BPF_JMP|BPF_JEQ|BPF_K:
 1960                         pc += (A == pc->k) ? pc->jt : pc->jf;
 1961                         continue;
 1962 
 1963                 case BPF_JMP|BPF_JSET|BPF_K:
 1964                         pc += (A & pc->k) ? pc->jt : pc->jf;
 1965                         continue;
 1966 
 1967                 case BPF_JMP|BPF_JGT|BPF_X:
 1968                         pc += (A > X) ? pc->jt : pc->jf;
 1969                         continue;
 1970 
 1971                 case BPF_JMP|BPF_JGE|BPF_X:
 1972                         pc += (A >= X) ? pc->jt : pc->jf;
 1973                         continue;
 1974 
 1975                 case BPF_JMP|BPF_JEQ|BPF_X:
 1976                         pc += (A == X) ? pc->jt : pc->jf;
 1977                         continue;
 1978 
 1979                 case BPF_JMP|BPF_JSET|BPF_X:
 1980                         pc += (A & X) ? pc->jt : pc->jf;
 1981                         continue;
 1982 
 1983                 case BPF_ALU|BPF_ADD|BPF_X:
 1984                         A += X;
 1985                         continue;
 1986                         
 1987                 case BPF_ALU|BPF_SUB|BPF_X:
 1988                         A -= X;
 1989                         continue;
 1990                         
 1991                 case BPF_ALU|BPF_MUL|BPF_X:
 1992                         A *= X;
 1993                         continue;
 1994                         
 1995                 case BPF_ALU|BPF_DIV|BPF_X:
 1996                         if (X == 0)
 1997                                 return 0;
 1998                         A /= X;
 1999                         continue;
 2000                         
 2001                 case BPF_ALU|BPF_AND|BPF_X:
 2002                         A &= X;
 2003                         continue;
 2004                         
 2005                 case BPF_ALU|BPF_OR|BPF_X:
 2006                         A |= X;
 2007                         continue;
 2008 
 2009                 case BPF_ALU|BPF_LSH|BPF_X:
 2010                         A <<= X;
 2011                         continue;
 2012 
 2013                 case BPF_ALU|BPF_RSH|BPF_X:
 2014                         A >>= X;
 2015                         continue;
 2016 
 2017                 case BPF_ALU|BPF_ADD|BPF_K:
 2018                         A += pc->k;
 2019                         continue;
 2020                         
 2021                 case BPF_ALU|BPF_SUB|BPF_K:
 2022                         A -= pc->k;
 2023                         continue;
 2024                         
 2025                 case BPF_ALU|BPF_MUL|BPF_K:
 2026                         A *= pc->k;
 2027                         continue;
 2028                         
 2029                 case BPF_ALU|BPF_DIV|BPF_K:
 2030                         A /= pc->k;
 2031                         continue;
 2032                         
 2033                 case BPF_ALU|BPF_AND|BPF_K:
 2034                         A &= pc->k;
 2035                         continue;
 2036                         
 2037                 case BPF_ALU|BPF_OR|BPF_K:
 2038                         A |= pc->k;
 2039                         continue;
 2040 
 2041                 case BPF_ALU|BPF_LSH|BPF_K:
 2042                         A <<= pc->k;
 2043                         continue;
 2044 
 2045                 case BPF_ALU|BPF_RSH|BPF_K:
 2046                         A >>= pc->k;
 2047                         continue;
 2048 
 2049                 case BPF_ALU|BPF_NEG:
 2050                         A = -A;
 2051                         continue;
 2052 
 2053                 case BPF_MISC|BPF_TAX:
 2054                         X = A;
 2055                         continue;
 2056 
 2057                 case BPF_MISC|BPF_TXA:
 2058                         A = X;
 2059                         continue;
 2060                 }
 2061         }
 2062 
 2063         return 0;
 2064 }
 2065 
 2066 /*
 2067  * Return 1 if the 'f' is a valid filter program without a MATCH
 2068  * instruction. Return 2 if it is a valid filter program with a MATCH
 2069  * instruction. Otherwise, return 0.
 2070  * The constraints are that each jump be forward and to a valid
 2071  * code.  The code must terminate with either an accept or reject. 
 2072  * 'valid' is an array for use by the routine (it must be at least
 2073  * 'len' bytes long).  
 2074  *
 2075  * The kernel needs to be able to verify an application's filter code.
 2076  * Otherwise, a bogus program could easily crash the system.
 2077  */
 2078 int
 2079 bpf_validate(
 2080         bpf_insn_t f,
 2081         int bytes,
 2082         bpf_insn_t *match)
 2083 {
 2084         register int i, j, len;
 2085         register bpf_insn_t p;
 2086 
 2087         len = BPF_BYTES2LEN(bytes);
 2088         /* f[0].code is already checked to be BPF_BEGIN. So skip f[0]. */
 2089 
 2090         for (i = 1; i < len; ++i) {
 2091                 /*
 2092                  * Check that that jumps are forward, and within 
 2093                  * the code block.
 2094                  */
 2095                 p = &f[i];
 2096                 if (BPF_CLASS(p->code) == BPF_JMP) {
 2097                         register int from = i + 1;
 2098 
 2099                         if (BPF_OP(p->code) == BPF_JA) {
 2100                                 if (from + p->k >= len)
 2101                                         return 0;
 2102                         }
 2103                         else if (from + p->jt >= len || from + p->jf >= len)
 2104                                 return 0;
 2105                 }
 2106                 /*
 2107                  * Check that memory operations use valid addresses.
 2108                  */
 2109                 if ((BPF_CLASS(p->code) == BPF_ST ||
 2110                      (BPF_CLASS(p->code) == BPF_LD && 
 2111                       (p->code & 0xe0) == BPF_MEM)) &&
 2112                     (p->k >= BPF_MEMWORDS || p->k < 0))
 2113                         return 0;
 2114                 /*
 2115                  * Check for constant division by 0.
 2116                  */
 2117                 if (p->code == (BPF_ALU|BPF_DIV|BPF_K) && p->k == 0)
 2118                         return 0;
 2119                 /*
 2120                  * Check for match instruction.
 2121                  * Only one match instruction per filter is allowed.
 2122                  */
 2123                 if (p->code == (BPF_RET|BPF_MATCH_IMM)) {
 2124                         if (*match != 0 ||
 2125                             p->jt == 0 ||
 2126                             p->jt > N_NET_HASH_KEYS)
 2127                                 return 0;
 2128                         i += p->jt;             /* skip keys */
 2129                         if (i + 1 > len)
 2130                                 return 0;
 2131 
 2132                         for (j = 1; j <= p->jt; j++) {
 2133                             if (p[j].code != (BPF_MISC|BPF_KEY))
 2134                                 return 0;
 2135                         }
 2136 
 2137                         *match = p;
 2138                 }
 2139         }
 2140         if (BPF_CLASS(f[len - 1].code) == BPF_RET)
 2141                 return (*match == 0) ? 1 : 2;
 2142         else
 2143                 return 0;
 2144 }
 2145 
 2146 boolean_t
 2147 bpf_eq (
 2148         register bpf_insn_t f1,
 2149         register bpf_insn_t f2,
 2150         register int bytes)
 2151 {
 2152         register int count;
 2153 
 2154         count = BPF_BYTES2LEN(bytes);
 2155         for (; count--; f1++, f2++) {
 2156                 if (!BPF_INSN_EQ(f1, f2)) {
 2157                         if ( f1->code == (BPF_MISC|BPF_KEY) &&
 2158                              f2->code == (BPF_MISC|BPF_KEY) )
 2159                                 continue;
 2160                         return FALSE;
 2161                 }
 2162         };
 2163         return TRUE;
 2164 }
 2165 
 2166 unsigned int
 2167 bpf_hash (
 2168         register int n,
 2169         register unsigned int *keys)
 2170 {
 2171         register unsigned int hval = 0;
 2172         
 2173         while (n--) {
 2174                 hval += *keys++;
 2175         }
 2176         return hval % NET_HASH_SIZE;
 2177 }
 2178 
 2179 
 2180 boolean_t
 2181 bpf_match (
 2182         net_hash_header_t hash,
 2183         register int n_keys,
 2184         register unsigned int *keys,
 2185         net_hash_entry_t **hash_headpp,
 2186         net_hash_entry_t *entpp)
 2187 {
 2188         register net_hash_entry_t head, entp;
 2189         register int i;
 2190 
 2191         if (n_keys != hash->n_keys)
 2192                 return FALSE;
 2193 
 2194         *hash_headpp = &hash->table[bpf_hash(n_keys, keys)];
 2195         head = **hash_headpp;
 2196 
 2197         if (head == 0)
 2198                 return FALSE;
 2199 
 2200         HASH_ITERATE (head, entp)
 2201         {
 2202                 for (i = 0; i < n_keys; i++) {
 2203                         if (keys[i] != entp->keys[i])
 2204                                 break;
 2205                 }
 2206                 if (i == n_keys) {
 2207                         *entpp = entp;
 2208                         return TRUE;
 2209                 }
 2210         }
 2211         HASH_ITERATE_END (head, entp)
 2212         return FALSE;
 2213 }       
 2214 
 2215 
 2216 /*
 2217  * Removes a hash entry (ENTP) from its queue (HEAD).
 2218  * If the reference count of filter (HP) becomes zero and not USED,
 2219  * HP is removed from ifp->if_rcv_port_list and is freed.
 2220  */
 2221 
 2222 boolean_t
 2223 hash_ent_remove (
 2224     struct ifnet        *ifp,
 2225     net_hash_header_t   hp,
 2226     int                 used,
 2227     net_hash_entry_t    *head,
 2228     net_hash_entry_t    entp,
 2229     queue_entry_t       *dead_p)
 2230 {    
 2231         hp->ref_count--;
 2232 
 2233         if (*head == entp) {
 2234 
 2235                 if (queue_empty((queue_t) entp)) {
 2236                         *head = 0;
 2237                         ENQUEUE_DEAD(*dead_p, entp);
 2238                         if (hp->ref_count == 0 && !used) {
 2239                                 remqueue((queue_t) &ifp->if_rcv_port_list,
 2240                                          (queue_entry_t)hp);
 2241                                 hp->n_keys = 0;
 2242                                 return TRUE;
 2243                         }
 2244                         return FALSE;
 2245                 } else {
 2246                         *head = (net_hash_entry_t)queue_next((queue_t) entp);
 2247                 }
 2248         }
 2249 
 2250         remqueue((queue_t)*head, (queue_entry_t)entp);
 2251         ENQUEUE_DEAD(*dead_p, entp);
 2252         return FALSE;
 2253 }    
 2254 
 2255 int
 2256 net_add_q_info (
 2257         ipc_port_t      rcv_port)
 2258 {
 2259         mach_port_msgcount_t qlimit = 0;
 2260             
 2261         /*
 2262          * We use a new port, so increase net_queue_free_min
 2263          * and net_kmsg_max to allow for more queued messages.
 2264          */
 2265             
 2266         if (IP_VALID(rcv_port)) {
 2267                 ip_lock(rcv_port);
 2268                 if (ip_active(rcv_port))
 2269                         qlimit = rcv_port->ip_qlimit;
 2270                 ip_unlock(rcv_port);
 2271         }
 2272             
 2273         simple_lock(&net_kmsg_total_lock);
 2274         net_queue_free_min++;
 2275         net_kmsg_max += qlimit + 1;
 2276         simple_unlock(&net_kmsg_total_lock);
 2277 
 2278         return (int)qlimit;
 2279 }
 2280 
 2281 void
 2282 net_del_q_info (
 2283         int qlimit)
 2284 {
 2285         simple_lock(&net_kmsg_total_lock);
 2286         net_queue_free_min--;
 2287         net_kmsg_max -= qlimit + 1;
 2288         simple_unlock(&net_kmsg_total_lock);
 2289 }
 2290 
 2291 
 2292 /*
 2293  * net_free_dead_infp (dead_infp)
 2294  *      queue_entry_t dead_infp;        list of dead net_rcv_port_t.
 2295  *
 2296  * Deallocates dead net_rcv_port_t.
 2297  * No locks should be held when called.
 2298  */
 2299 void
 2300 net_free_dead_infp (
 2301         queue_entry_t dead_infp)
 2302 {
 2303         register net_rcv_port_t infp, nextfp;
 2304 
 2305         for (infp = (net_rcv_port_t) dead_infp; infp != 0; infp = nextfp)
 2306         {
 2307                 nextfp = (net_rcv_port_t) queue_next(&infp->chain);
 2308                 ipc_port_release_send(infp->rcv_port);
 2309                 net_del_q_info(infp->rcv_qlimit);
 2310                 zfree(net_rcv_zone, (vm_offset_t) infp);
 2311         }           
 2312 }
 2313     
 2314 /*
 2315  * net_free_dead_entp (dead_entp)
 2316  *      queue_entry_t dead_entp;        list of dead net_hash_entry_t.
 2317  *
 2318  * Deallocates dead net_hash_entry_t.
 2319  * No locks should be held when called.
 2320  */
 2321 void
 2322 net_free_dead_entp (
 2323         queue_entry_t dead_entp)
 2324 {
 2325         register net_hash_entry_t entp, nextentp;
 2326 
 2327         for (entp = (net_hash_entry_t)dead_entp; entp != 0; entp = nextentp)
 2328         {
 2329                 nextentp = (net_hash_entry_t) queue_next(&entp->chain);
 2330 
 2331                 ipc_port_release_send(entp->rcv_port);
 2332                 net_del_q_info(entp->rcv_qlimit);
 2333                 zfree(net_hash_entry_zone, (vm_offset_t) entp);
 2334         }
 2335 }
 2336 

Cache object: 099ddd927ec1fb917416ceb77af84c9e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.