[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_fw2.c

Version: -  FREEBSD  -  FREEBSD7  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  OPENSOLARIS  -  minix-3-1-1  -  TRUSTEDBSD-SEBSD  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

  1 /*-
  2  * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
  3  *
  4  * Redistribution and use in source and binary forms, with or without
  5  * modification, are permitted provided that the following conditions
  6  * are met:
  7  * 1. Redistributions of source code must retain the above copyright
  8  *    notice, this list of conditions and the following disclaimer.
  9  * 2. Redistributions in binary form must reproduce the above copyright
 10  *    notice, this list of conditions and the following disclaimer in the
 11  *    documentation and/or other materials provided with the distribution.
 12  *
 13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 23  * SUCH DAMAGE.
 24  */
 25 
 26 #include <sys/cdefs.h>
 27 __FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.201 2008/12/02 21:37:28 bz Exp $");
 28 
 29 #define        DEB(x)
 30 #define        DDB(x) x
 31 
 32 /*
 33  * Implement IP packet firewall (new version)
 34  */
 35 
 36 #if !defined(KLD_MODULE)
 37 #include "opt_ipfw.h"
 38 #include "opt_ipdivert.h"
 39 #include "opt_ipdn.h"
 40 #include "opt_inet.h"
 41 #ifndef INET
 42 #error IPFIREWALL requires INET.
 43 #endif /* INET */
 44 #endif
 45 #include "opt_inet6.h"
 46 #include "opt_ipsec.h"
 47 #include "opt_mac.h"
 48 
 49 #include <sys/param.h>
 50 #include <sys/systm.h>
 51 #include <sys/condvar.h>
 52 #include <sys/eventhandler.h>
 53 #include <sys/malloc.h>
 54 #include <sys/mbuf.h>
 55 #include <sys/kernel.h>
 56 #include <sys/lock.h>
 57 #include <sys/jail.h>
 58 #include <sys/module.h>
 59 #include <sys/priv.h>
 60 #include <sys/proc.h>
 61 #include <sys/rwlock.h>
 62 #include <sys/socket.h>
 63 #include <sys/socketvar.h>
 64 #include <sys/sysctl.h>
 65 #include <sys/syslog.h>
 66 #include <sys/ucred.h>
 67 #include <sys/vimage.h>
 68 #include <net/if.h>
 69 #include <net/radix.h>
 70 #include <net/route.h>
 71 #include <net/pf_mtag.h>
 72 #include <net/vnet.h>
 73 
 74 #define IPFW_INTERNAL   /* Access to protected data structures in ip_fw.h. */
 75 
 76 #include <netinet/in.h>
 77 #include <netinet/in_systm.h>
 78 #include <netinet/in_var.h>
 79 #include <netinet/in_pcb.h>
 80 #include <netinet/ip.h>
 81 #include <netinet/ip_var.h>
 82 #include <netinet/ip_icmp.h>
 83 #include <netinet/ip_fw.h>
 84 #include <netinet/ip_divert.h>
 85 #include <netinet/ip_dummynet.h>
 86 #include <netinet/ip_carp.h>
 87 #include <netinet/pim.h>
 88 #include <netinet/tcp.h>
 89 #include <netinet/tcp_timer.h>
 90 #include <netinet/tcp_var.h>
 91 #include <netinet/tcpip.h>
 92 #include <netinet/udp.h>
 93 #include <netinet/udp_var.h>
 94 #include <netinet/sctp.h>
 95 #include <netinet/vinet.h>
 96 
 97 #include <netgraph/ng_ipfw.h>
 98 
 99 #include <altq/if_altq.h>
100 
101 #include <netinet/ip6.h>
102 #include <netinet/icmp6.h>
103 #ifdef INET6
104 #include <netinet6/scope6_var.h>
105 #endif
106 
107 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */
108 
109 #include <machine/in_cksum.h>   /* XXX for in_cksum */
110 
111 #include <security/mac/mac_framework.h>
112 
113 /*
114  * set_disable contains one bit per set value (0..31).
115  * If the bit is set, all rules with the corresponding set
116  * are disabled. Set RESVD_SET(31) is reserved for the default rule
117  * and rules that are not deleted by the flush command,
118  * and CANNOT be disabled.
119  * Rules in set RESVD_SET can only be deleted explicitly.
120  */
121 static u_int32_t set_disable;
122 
123 static int fw_verbose;
124 static int verbose_limit;
125 
126 static struct callout ipfw_timeout;
127 static uma_zone_t ipfw_dyn_rule_zone;
128 
129 /*
130  * Data structure to cache our ucred related
131  * information. This structure only gets used if
132  * the user specified UID/GID based constraints in
133  * a firewall rule.
134  */
135 struct ip_fw_ugid {
136         gid_t           fw_groups[NGROUPS];
137         int             fw_ngroups;
138         uid_t           fw_uid;
139         int             fw_prid;
140 };
141 
142 /*
143  * list of rules for layer 3
144  */
145 struct ip_fw_chain layer3_chain;
146 
147 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
148 MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
149 #define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL)
150 ipfw_nat_t *ipfw_nat_ptr = NULL;
151 ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
152 ipfw_nat_cfg_t *ipfw_nat_del_ptr;
153 ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
154 ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
155 
156 struct table_entry {
157         struct radix_node       rn[2];
158         struct sockaddr_in      addr, mask;
159         u_int32_t               value;
160 };
161 
162 static int fw_debug = 1;
163 static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
164 
165 extern int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
166 
167 #ifdef SYSCTL_NODE
168 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
169 SYSCTL_V_PROC(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, enable,
170     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, fw_enable, 0,
171     ipfw_chg_hook, "I", "Enable ipfw");
172 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, autoinc_step,
173     CTLFLAG_RW, autoinc_step, 0, "Rule number autincrement step");
174 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, one_pass,
175     CTLFLAG_RW | CTLFLAG_SECURE3, fw_one_pass, 0,
176     "Only do a single pass through ipfw when using dummynet(4)");
177 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW,
178     fw_debug, 0, "Enable printing of debug ip_fw statements");
179 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, verbose,
180     CTLFLAG_RW | CTLFLAG_SECURE3,
181     fw_verbose, 0, "Log matches to ipfw rules");
182 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW,
183     &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged");
184 SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
185     NULL, IPFW_DEFAULT_RULE, "The default/max possible rule number.");
186 SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD,
187     NULL, IPFW_TABLES_MAX, "The maximum number of tables.");
188 
189 /*
190  * Description of dynamic rules.
191  *
192  * Dynamic rules are stored in lists accessed through a hash table
193  * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can
194  * be modified through the sysctl variable dyn_buckets which is
195  * updated when the table becomes empty.
196  *
197  * XXX currently there is only one list, ipfw_dyn.
198  *
199  * When a packet is received, its address fields are first masked
200  * with the mask defined for the rule, then hashed, then matched
201  * against the entries in the corresponding list.
202  * Dynamic rules can be used for different purposes:
203  *  + stateful rules;
204  *  + enforcing limits on the number of sessions;
205  *  + in-kernel NAT (not implemented yet)
206  *
207  * The lifetime of dynamic rules is regulated by dyn_*_lifetime,
208  * measured in seconds and depending on the flags.
209  *
210  * The total number of dynamic rules is stored in dyn_count.
211  * The max number of dynamic rules is dyn_max. When we reach
212  * the maximum number of rules we do not create anymore. This is
213  * done to avoid consuming too much memory, but also too much
214  * time when searching on each packet (ideally, we should try instead
215  * to put a limit on the length of the list on each bucket...).
216  *
217  * Each dynamic rule holds a pointer to the parent ipfw rule so
218  * we know what action to perform. Dynamic rules are removed when
219  * the parent rule is deleted. XXX we should make them survive.
220  *
221  * There are some limitations with dynamic rules -- we do not
222  * obey the 'randomized match', and we do not do multiple
223  * passes through the firewall. XXX check the latter!!!
224  */
225 static ipfw_dyn_rule **ipfw_dyn_v = NULL;
226 static u_int32_t dyn_buckets = 256; /* must be power of 2 */
227 static u_int32_t curr_dyn_buckets = 256; /* must be power of 2 */
228 
229 static struct mtx ipfw_dyn_mtx;         /* mutex guarding dynamic rules */
230 #define IPFW_DYN_LOCK_INIT() \
231         mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF)
232 #define IPFW_DYN_LOCK_DESTROY() mtx_destroy(&ipfw_dyn_mtx)
233 #define IPFW_DYN_LOCK()         mtx_lock(&ipfw_dyn_mtx)
234 #define IPFW_DYN_UNLOCK()       mtx_unlock(&ipfw_dyn_mtx)
235 #define IPFW_DYN_LOCK_ASSERT()  mtx_assert(&ipfw_dyn_mtx, MA_OWNED)
236 
237 /*
238  * Timeouts for various events in handing dynamic rules.
239  */
240 static u_int32_t dyn_ack_lifetime = 300;
241 static u_int32_t dyn_syn_lifetime = 20;
242 static u_int32_t dyn_fin_lifetime = 1;
243 static u_int32_t dyn_rst_lifetime = 1;
244 static u_int32_t dyn_udp_lifetime = 10;
245 static u_int32_t dyn_short_lifetime = 5;
246 
247 /*
248  * Keepalives are sent if dyn_keepalive is set. They are sent every
249  * dyn_keepalive_period seconds, in the last dyn_keepalive_interval
250  * seconds of lifetime of a rule.
251  * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower
252  * than dyn_keepalive_period.
253  */
254 
255 static u_int32_t dyn_keepalive_interval = 20;
256 static u_int32_t dyn_keepalive_period = 5;
257 static u_int32_t dyn_keepalive = 1;     /* do send keepalives */
258 
259 static u_int32_t static_count;  /* # of static rules */
260 static u_int32_t static_len;    /* size in bytes of static rules */
261 static u_int32_t dyn_count;             /* # of dynamic rules */
262 static u_int32_t dyn_max = 4096;        /* max # of dynamic rules */
263 
264 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, dyn_buckets,
265     CTLFLAG_RW, dyn_buckets, 0, "Number of dyn. buckets");
266 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, curr_dyn_buckets,
267     CTLFLAG_RD, curr_dyn_buckets, 0, "Current Number of dyn. buckets");
268 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, dyn_count,
269     CTLFLAG_RD, dyn_count, 0, "Number of dyn. rules");
270 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, dyn_max,
271     CTLFLAG_RW, dyn_max, 0, "Max number of dyn. rules");
272 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, static_count,
273     CTLFLAG_RD, static_count, 0, "Number of static rules");
274 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime,
275     CTLFLAG_RW, dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks");
276 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime,
277     CTLFLAG_RW, dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn");
278 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime,
279     CTLFLAG_RW, dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin");
280 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime,
281     CTLFLAG_RW, dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst");
282 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime,
283     CTLFLAG_RW, dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP");
284 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, dyn_short_lifetime,
285     CTLFLAG_RW, dyn_short_lifetime, 0,
286     "Lifetime of dyn. rules for other situations");
287 SYSCTL_V_INT(V_NET, vnet_ipfw, _net_inet_ip_fw, OID_AUTO, dyn_keepalive,
288     CTLFLAG_RW, dyn_keepalive, 0, "Enable keepalives for dyn. rules");
289 
290 
291 #ifdef INET6
292 /*
293  * IPv6 specific variables
294  */
295 SYSCTL_DECL(_net_inet6_ip6);
296 
297 static struct sysctl_ctx_list ip6_fw_sysctl_ctx;
298 static struct sysctl_oid *ip6_fw_sysctl_tree;
299 #endif /* INET6 */
300 #endif /* SYSCTL_NODE */
301 
302 static int fw_deny_unknown_exthdrs = 1;
303 
304 
305 /*
306  * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T
307  * Other macros just cast void * into the appropriate type
308  */
309 #define L3HDR(T, ip)    ((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
310 #define TCP(p)          ((struct tcphdr *)(p))
311 #define SCTP(p)         ((struct sctphdr *)(p))
312 #define UDP(p)          ((struct udphdr *)(p))
313 #define ICMP(p)         ((struct icmphdr *)(p))
314 #define ICMP6(p)        ((struct icmp6_hdr *)(p))
315 
316 static __inline int
317 icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd)
318 {
319         int type = icmp->icmp_type;
320 
321         return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) );
322 }
323 
324 #define TT      ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \
325     (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) )
326 
327 static int
328 is_icmp_query(struct icmphdr *icmp)
329 {
330         int type = icmp->icmp_type;
331 
332         return (type <= ICMP_MAXTYPE && (TT & (1<<type)) );
333 }
334 #undef TT
335 
336 /*
337  * The following checks use two arrays of 8 or 16 bits to store the
338  * bits that we want set or clear, respectively. They are in the
339  * low and high half of cmd->arg1 or cmd->d[0].
340  *
341  * We scan options and store the bits we find set. We succeed if
342  *
343  *      (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear
344  *
345  * The code is sometimes optimized not to store additional variables.
346  */
347 
348 static int
349 flags_match(ipfw_insn *cmd, u_int8_t bits)
350 {
351         u_char want_clear;
352         bits = ~bits;
353 
354         if ( ((cmd->arg1 & 0xff) & bits) != 0)
355                 return 0; /* some bits we want set were clear */
356         want_clear = (cmd->arg1 >> 8) & 0xff;
357         if ( (want_clear & bits) != want_clear)
358                 return 0; /* some bits we want clear were set */
359         return 1;
360 }
361 
362 static int
363 ipopts_match(struct ip *ip, ipfw_insn *cmd)
364 {
365         int optlen, bits = 0;
366         u_char *cp = (u_char *)(ip + 1);
367         int x = (ip->ip_hl << 2) - sizeof (struct ip);
368 
369         for (; x > 0; x -= optlen, cp += optlen) {
370                 int opt = cp[IPOPT_OPTVAL];
371 
372                 if (opt == IPOPT_EOL)
373                         break;
374                 if (opt == IPOPT_NOP)
375                         optlen = 1;
376                 else {
377                         optlen = cp[IPOPT_OLEN];
378                         if (optlen <= 0 || optlen > x)
379                                 return 0; /* invalid or truncated */
380                 }
381                 switch (opt) {
382 
383                 default:
384                         break;
385 
386                 case IPOPT_LSRR:
387                         bits |= IP_FW_IPOPT_LSRR;
388                         break;
389 
390                 case IPOPT_SSRR:
391                         bits |= IP_FW_IPOPT_SSRR;
392                         break;
393 
394                 case IPOPT_RR:
395                         bits |= IP_FW_IPOPT_RR;
396                         break;
397 
398                 case IPOPT_TS:
399                         bits |= IP_FW_IPOPT_TS;
400                         break;
401                 }
402         }
403         return (flags_match(cmd, bits));
404 }
405 
406 static int
407 tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd)
408 {
409         int optlen, bits = 0;
410         u_char *cp = (u_char *)(tcp + 1);
411         int x = (tcp->th_off << 2) - sizeof(struct tcphdr);
412 
413         for (; x > 0; x -= optlen, cp += optlen) {
414                 int opt = cp[0];
415                 if (opt == TCPOPT_EOL)
416                         break;
417                 if (opt == TCPOPT_NOP)
418                         optlen = 1;
419                 else {
420                         optlen = cp[1];
421                         if (optlen <= 0)
422                                 break;
423                 }
424 
425                 switch (opt) {
426 
427                 default:
428                         break;
429 
430                 case TCPOPT_MAXSEG:
431                         bits |= IP_FW_TCPOPT_MSS;
432                         break;
433 
434                 case TCPOPT_WINDOW:
435                         bits |= IP_FW_TCPOPT_WINDOW;
436                         break;
437 
438                 case TCPOPT_SACK_PERMITTED:
439                 case TCPOPT_SACK:
440                         bits |= IP_FW_TCPOPT_SACK;
441                         break;
442 
443                 case TCPOPT_TIMESTAMP:
444                         bits |= IP_FW_TCPOPT_TS;
445                         break;
446 
447                 }
448         }
449         return (flags_match(cmd, bits));
450 }
451 
452 static int
453 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd)
454 {
455         if (ifp == NULL)        /* no iface with this packet, match fails */
456                 return 0;
457         /* Check by name or by IP address */
458         if (cmd->name[0] != '\0') { /* match by name */
459                 /* Check name */
460                 if (cmd->p.glob) {
461                         if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
462                                 return(1);
463                 } else {
464                         if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
465                                 return(1);
466                 }
467         } else {
468                 struct ifaddr *ia;
469 
470                 /* XXX lock? */
471                 TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
472                         if (ia->ifa_addr->sa_family != AF_INET)
473                                 continue;
474                         if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
475                             (ia->ifa_addr))->sin_addr.s_addr)
476                                 return(1);      /* match */
477                 }
478         }
479         return(0);      /* no match, fail ... */
480 }
481 
482 /*
483  * The verify_path function checks if a route to the src exists and
484  * if it is reachable via ifp (when provided).
485  * 
486  * The 'verrevpath' option checks that the interface that an IP packet
487  * arrives on is the same interface that traffic destined for the
488  * packet's source address would be routed out of.  The 'versrcreach'
489  * option just checks that the source address is reachable via any route
490  * (except default) in the routing table.  These two are a measure to block
491  * forged packets.  This is also commonly known as "anti-spoofing" or Unicast
492  * Reverse Path Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs
493  * is purposely reminiscent of the Cisco IOS command,
494  *
495  *   ip verify unicast reverse-path
496  *   ip verify unicast source reachable-via any
497  *
498  * which implements the same functionality. But note that syntax is
499  * misleading. The check may be performed on all IP packets whether unicast,
500  * multicast, or broadcast.
501  */
502 static int
503 verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
504 {
505         struct route ro;
506         struct sockaddr_in *dst;
507 
508         bzero(&ro, sizeof(ro));
509 
510         dst = (struct sockaddr_in *)&(ro.ro_dst);
511         dst->sin_family = AF_INET;
512         dst->sin_len = sizeof(*dst);
513         dst->sin_addr = src;
514         in_rtalloc_ign(&ro, RTF_CLONING, fib);
515 
516         if (ro.ro_rt == NULL)
517                 return 0;
518 
519         /*
520          * If ifp is provided, check for equality with rtentry.
521          * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
522          * in order to pass packets injected back by if_simloop():
523          * if useloopback == 1 routing entry (via lo0) for our own address
524          * may exist, so we need to handle routing assymetry.
525          */
526         if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
527                 RTFREE(ro.ro_rt);
528                 return 0;
529         }
530 
531         /* if no ifp provided, check if rtentry is not default route */
532         if (ifp == NULL &&
533              satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) {
534                 RTFREE(ro.ro_rt);
535                 return 0;
536         }
537 
538         /* or if this is a blackhole/reject route */
539         if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
540                 RTFREE(ro.ro_rt);
541                 return 0;
542         }
543 
544         /* found valid route */
545         RTFREE(ro.ro_rt);
546         return 1;
547 }
548 
549 #ifdef INET6
550 /*
551  * ipv6 specific rules here...
552  */
553 static __inline int
554 icmp6type_match (int type, ipfw_insn_u32 *cmd)
555 {
556         return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) );
557 }
558 
559 static int
560 flow6id_match( int curr_flow, ipfw_insn_u32 *cmd )
561 {
562         int i;
563         for (i=0; i <= cmd->o.arg1; ++i )
564                 if (curr_flow == cmd->d[i] )
565                         return 1;
566         return 0;
567 }
568 
569 /* support for IP6_*_ME opcodes */
570 static int
571 search_ip6_addr_net (struct in6_addr * ip6_addr)
572 {
573         INIT_VNET_NET(curvnet);
574         struct ifnet *mdc;
575         struct ifaddr *mdc2;
576         struct in6_ifaddr *fdm;
577         struct in6_addr copia;
578 
579         TAILQ_FOREACH(mdc, &V_ifnet, if_link)
580                 TAILQ_FOREACH(mdc2, &mdc->if_addrlist, ifa_list) {
581                         if (mdc2->ifa_addr->sa_family == AF_INET6) {
582                                 fdm = (struct in6_ifaddr *)mdc2;
583                                 copia = fdm->ia_addr.sin6_addr;
584                                 /* need for leaving scope_id in the sock_addr */
585                                 in6_clearscope(&copia);
586                                 if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia))
587                                         return 1;
588                         }
589                 }
590         return 0;
591 }
592 
593 static int
594 verify_path6(struct in6_addr *src, struct ifnet *ifp)
595 {
596         struct route_in6 ro;
597         struct sockaddr_in6 *dst;
598 
599         bzero(&ro, sizeof(ro));
600 
601         dst = (struct sockaddr_in6 * )&(ro.ro_dst);
602         dst->sin6_family = AF_INET6;
603         dst->sin6_len = sizeof(*dst);
604         dst->sin6_addr = *src;
605         /* XXX MRT 0 for ipv6 at this time */
606         rtalloc_ign((struct route *)&ro, RTF_CLONING);
607 
608         if (ro.ro_rt == NULL)
609                 return 0;
610 
611         /* 
612          * if ifp is provided, check for equality with rtentry
613          * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp,
614          * to support the case of sending packets to an address of our own.
615          * (where the former interface is the first argument of if_simloop()
616          *  (=ifp), the latter is lo0)
617          */
618         if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) {
619                 RTFREE(ro.ro_rt);
620                 return 0;
621         }
622 
623         /* if no ifp provided, check if rtentry is not default route */
624         if (ifp == NULL &&
625             IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) {
626                 RTFREE(ro.ro_rt);
627                 return 0;
628         }
629 
630         /* or if this is a blackhole/reject route */
631         if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
632                 RTFREE(ro.ro_rt);
633                 return 0;
634         }
635 
636         /* found valid route */
637         RTFREE(ro.ro_rt);
638         return 1;
639 
640 }
641 static __inline int
642 hash_packet6(struct ipfw_flow_id *id)
643 {
644         u_int32_t i;
645         i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
646             (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^
647             (id->src_ip6.__u6_addr.__u6_addr32[2]) ^
648             (id->src_ip6.__u6_addr.__u6_addr32[3]) ^
649             (id->dst_port) ^ (id->src_port);
650         return i;
651 }
652 
653 static int
654 is_icmp6_query(int icmp6_type)
655 {
656         if ((icmp6_type <= ICMP6_MAXTYPE) &&
657             (icmp6_type == ICMP6_ECHO_REQUEST ||
658             icmp6_type == ICMP6_MEMBERSHIP_QUERY ||
659             icmp6_type == ICMP6_WRUREQUEST ||
660             icmp6_type == ICMP6_FQDN_QUERY ||
661             icmp6_type == ICMP6_NI_QUERY))
662                 return (1);
663 
664         return (0);
665 }
666 
667 static void
668 send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
669 {
670         struct mbuf *m;
671 
672         m = args->m;
673         if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
674                 struct tcphdr *tcp;
675                 tcp_seq ack, seq;
676                 int flags;
677                 struct {
678                         struct ip6_hdr ip6;
679                         struct tcphdr th;
680                 } ti;
681                 tcp = (struct tcphdr *)((char *)ip6 + hlen);
682 
683                 if ((tcp->th_flags & TH_RST) != 0) {
684                         m_freem(m);
685                         args->m = NULL;
686                         return;
687                 }
688 
689                 ti.ip6 = *ip6;
690                 ti.th = *tcp;
691                 ti.th.th_seq = ntohl(ti.th.th_seq);
692                 ti.th.th_ack = ntohl(ti.th.th_ack);
693                 ti.ip6.ip6_nxt = IPPROTO_TCP;
694 
695                 if (ti.th.th_flags & TH_ACK) {
696                         ack = 0;
697                         seq = ti.th.th_ack;
698                         flags = TH_RST;
699                 } else {
700                         ack = ti.th.th_seq;
701                         if ((m->m_flags & M_PKTHDR) != 0) {
702                                 /*
703                                  * total new data to ACK is:
704                                  * total packet length,
705                                  * minus the header length,
706                                  * minus the tcp header length.
707                                  */
708                                 ack += m->m_pkthdr.len - hlen
709                                         - (ti.th.th_off << 2);
710                         } else if (ip6->ip6_plen) {
711                                 ack += ntohs(ip6->ip6_plen) + sizeof(*ip6) -
712                                     hlen - (ti.th.th_off << 2);
713                         } else {
714                                 m_freem(m);
715                                 return;
716                         }
717                         if (tcp->th_flags & TH_SYN)
718                                 ack++;
719                         seq = 0;
720                         flags = TH_RST|TH_ACK;
721                 }
722                 bcopy(&ti, ip6, sizeof(ti));
723                 /*
724                  * m is only used to recycle the mbuf
725                  * The data in it is never read so we don't need
726                  * to correct the offsets or anything
727                  */
728                 tcp_respond(NULL, ip6, tcp, m, ack, seq, flags);
729         } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */
730 #if 0
731                 /*
732                  * Unlike above, the mbufs need to line up with the ip6 hdr,
733                  * as the contents are read. We need to m_adj() the
734                  * needed amount.
735                  * The mbuf will however be thrown away so we can adjust it.
736                  * Remember we did an m_pullup on it already so we
737                  * can make some assumptions about contiguousness.
738                  */
739                 if (args->L3offset)
740                         m_adj(m, args->L3offset);
741 #endif
742                 icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
743         } else
744                 m_freem(m);
745 
746         args->m = NULL;
747 }
748 
749 #endif /* INET6 */
750 
751 static u_int64_t norule_counter;        /* counter for ipfw_log(NULL...) */
752 
753 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0
754 #define SNP(buf) buf, sizeof(buf)
755 
756 /*
757  * We enter here when we have a rule with O_LOG.
758  * XXX this function alone takes about 2Kbytes of code!
759  */
760 static void
761 ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
762     struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
763     struct ip *ip)
764 {
765         INIT_VNET_IPFW(curvnet);
766         struct ether_header *eh = args->eh;
767         char *action;
768         int limit_reached = 0;
769         char action2[40], proto[128], fragment[32];
770 
771         fragment[0] = '\0';
772         proto[0] = '\0';
773 
774         if (f == NULL) {        /* bogus pkt */
775                 if (V_verbose_limit != 0 && V_norule_counter >= V_verbose_limit)
776                         return;
777                 V_norule_counter++;
778                 if (V_norule_counter == V_verbose_limit)
779                         limit_reached = V_verbose_limit;
780                 action = "Refuse";
781         } else {        /* O_LOG is the first action, find the real one */
782                 ipfw_insn *cmd = ACTION_PTR(f);
783                 ipfw_insn_log *l = (ipfw_insn_log *)cmd;
784 
785                 if (l->max_log != 0 && l->log_left == 0)
786                         return;
787                 l->log_left--;
788                 if (l->log_left == 0)
789                         limit_reached = l->max_log;
790                 cmd += F_LEN(cmd);      /* point to first action */
791                 if (cmd->opcode == O_ALTQ) {
792                         ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
793 
794                         snprintf(SNPARGS(action2, 0), "Altq %d",
795                                 altq->qid);
796                         cmd += F_LEN(cmd);
797                 }
798                 if (cmd->opcode == O_PROB)
799                         cmd += F_LEN(cmd);
800 
801                 if (cmd->opcode == O_TAG)
802                         cmd += F_LEN(cmd);
803 
804                 action = action2;
805                 switch (cmd->opcode) {
806                 case O_DENY:
807                         action = "Deny";
808                         break;
809 
810                 case O_REJECT:
811                         if (cmd->arg1==ICMP_REJECT_RST)
812                                 action = "Reset";
813                         else if (cmd->arg1==ICMP_UNREACH_HOST)
814                                 action = "Reject";
815                         else
816                                 snprintf(SNPARGS(action2, 0), "Unreach %d",
817                                         cmd->arg1);
818                         break;
819 
820                 case O_UNREACH6:
821                         if (cmd->arg1==ICMP6_UNREACH_RST)
822                                 action = "Reset";
823                         else
824                                 snprintf(SNPARGS(action2, 0), "Unreach %d",
825                                         cmd->arg1);
826                         break;
827 
828                 case O_ACCEPT:
829                         action = "Accept";
830                         break;
831                 case O_COUNT:
832                         action = "Count";
833                         break;
834                 case O_DIVERT:
835                         snprintf(SNPARGS(action2, 0), "Divert %d",
836                                 cmd->arg1);
837                         break;
838                 case O_TEE:
839                         snprintf(SNPARGS(action2, 0), "Tee %d",
840                                 cmd->arg1);
841                         break;
842                 case O_SETFIB:
843                         snprintf(SNPARGS(action2, 0), "SetFib %d",
844                                 cmd->arg1);
845                         break;
846                 case O_SKIPTO:
847                         snprintf(SNPARGS(action2, 0), "SkipTo %d",
848                                 cmd->arg1);
849                         break;
850                 case O_PIPE:
851                         snprintf(SNPARGS(action2, 0), "Pipe %d",
852                                 cmd->arg1);
853                         break;
854                 case O_QUEUE:
855                         snprintf(SNPARGS(action2, 0), "Queue %d",
856                                 cmd->arg1);
857                         break;
858                 case O_FORWARD_IP: {
859                         ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
860                         int len;
861                         struct in_addr dummyaddr;
862                         if (sa->sa.sin_addr.s_addr == INADDR_ANY)
863                                 dummyaddr.s_addr = htonl(tablearg);
864                         else
865                                 dummyaddr.s_addr = sa->sa.sin_addr.s_addr;
866 
867                         len = snprintf(SNPARGS(action2, 0), "Forward to %s",
868                                 inet_ntoa(dummyaddr));
869 
870                         if (sa->sa.sin_port)
871                                 snprintf(SNPARGS(action2, len), ":%d",
872                                     sa->sa.sin_port);
873                         }
874                         break;
875                 case O_NETGRAPH:
876                         snprintf(SNPARGS(action2, 0), "Netgraph %d",
877                                 cmd->arg1);
878                         break;
879                 case O_NGTEE:
880                         snprintf(SNPARGS(action2, 0), "Ngtee %d",
881                                 cmd->arg1);
882                         break;
883                 case O_NAT:
884                         action = "Nat";
885                         break;
886                 default:
887                         action = "UNKNOWN";
888                         break;
889                 }
890         }
891 
892         if (hlen == 0) {        /* non-ip */
893                 snprintf(SNPARGS(proto, 0), "MAC");
894 
895         } else {
896                 int len;
897                 char src[48], dst[48];
898                 struct icmphdr *icmp;
899                 struct tcphdr *tcp;
900                 struct udphdr *udp;
901 #ifdef INET6
902                 struct ip6_hdr *ip6 = NULL;
903                 struct icmp6_hdr *icmp6;
904 #endif
905                 src[0] = '\0';
906                 dst[0] = '\0';
907 #ifdef INET6
908                 if (IS_IP6_FLOW_ID(&(args->f_id))) {
909                         char ip6buf[INET6_ADDRSTRLEN];
910                         snprintf(src, sizeof(src), "[%s]",
911                             ip6_sprintf(ip6buf, &args->f_id.src_ip6));
912                         snprintf(dst, sizeof(dst), "[%s]",
913                             ip6_sprintf(ip6buf, &args->f_id.dst_ip6));
914 
915                         ip6 = (struct ip6_hdr *)ip;
916                         tcp = (struct tcphdr *)(((char *)ip) + hlen);
917                         udp = (struct udphdr *)(((char *)ip) + hlen);
918                 } else
919 #endif
920                 {
921                         tcp = L3HDR(struct tcphdr, ip);
922                         udp = L3HDR(struct udphdr, ip);
923 
924                         inet_ntoa_r(ip->ip_src, src);
925                         inet_ntoa_r(ip->ip_dst, dst);
926                 }
927 
928                 switch (args->f_id.proto) {
929                 case IPPROTO_TCP:
930                         len = snprintf(SNPARGS(proto, 0), "TCP %s", src);
931                         if (offset == 0)
932                                 snprintf(SNPARGS(proto, len), ":%d %s:%d",
933                                     ntohs(tcp->th_sport),
934                                     dst,
935                                     ntohs(tcp->th_dport));
936                         else
937                                 snprintf(SNPARGS(proto, len), " %s", dst);
938                         break;
939 
940                 case IPPROTO_UDP:
941                         len = snprintf(SNPARGS(proto, 0), "UDP %s", src);
942                         if (offset == 0)
943                                 snprintf(SNPARGS(proto, len), ":%d %s:%d",
944                                     ntohs(udp->uh_sport),
945                                     dst,
946                                     ntohs(udp->uh_dport));
947                         else
948                                 snprintf(SNPARGS(proto, len), " %s", dst);
949                         break;
950 
951                 case IPPROTO_ICMP:
952                         icmp = L3HDR(struct icmphdr, ip);
953                         if (offset == 0)
954                                 len = snprintf(SNPARGS(proto, 0),
955                                     "ICMP:%u.%u ",
956                                     icmp->icmp_type, icmp->icmp_code);
957                         else
958                                 len = snprintf(SNPARGS(proto, 0), "ICMP ");
959                         len += snprintf(SNPARGS(proto, len), "%s", src);
960                         snprintf(SNPARGS(proto, len), " %s", dst);
961                         break;
962 #ifdef INET6
963                 case IPPROTO_ICMPV6:
964                         icmp6 = (struct icmp6_hdr *)(((char *)ip) + hlen);
965                         if (offset == 0)
966                                 len = snprintf(SNPARGS(proto, 0),
967                                     "ICMPv6:%u.%u ",
968                                     icmp6->icmp6_type, icmp6->icmp6_code);
969                         else
970                                 len = snprintf(SNPARGS(proto, 0), "ICMPv6 ");
971                         len += snprintf(SNPARGS(proto, len), "%s", src);
972                         snprintf(SNPARGS(proto, len), " %s", dst);
973                         break;
974 #endif
975                 default:
976                         len = snprintf(SNPARGS(proto, 0), "P:%d %s",
977                             args->f_id.proto, src);
978                         snprintf(SNPARGS(proto, len), " %s", dst);
979                         break;
980                 }
981 
982 #ifdef INET6
983                 if (IS_IP6_FLOW_ID(&(args->f_id))) {
984                         if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG))
985                                 snprintf(SNPARGS(fragment, 0),
986                                     " (frag %08x:%d@%d%s)",
987                                     args->f_id.frag_id6,
988                                     ntohs(ip6->ip6_plen) - hlen,
989