The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netpfil/ipfilter/netinet/ip_state.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $FreeBSD$ */
    2 
    3 /*
    4  * Copyright (C) 2012 by Darren Reed.
    5  *
    6  * See the IPFILTER.LICENCE file for details on licencing.
    7  *
    8  * Copyright 2008 Sun Microsystems.
    9  *
   10  * $Id$
   11  */
   12 #if defined(KERNEL) || defined(_KERNEL)
   13 # undef KERNEL
   14 # undef _KERNEL
   15 # define        KERNEL  1
   16 # define        _KERNEL 1
   17 #endif
   18 #include <sys/errno.h>
   19 #include <sys/types.h>
   20 #include <sys/param.h>
   21 #include <sys/file.h>
   22 #if defined(_KERNEL) && defined(__FreeBSD__) && \
   23     !defined(KLD_MODULE)
   24 #include "opt_inet6.h"
   25 #endif
   26 #if !defined(_KERNEL) && !defined(__KERNEL__)
   27 # include <stdio.h>
   28 # include <stdlib.h>
   29 # include <string.h>
   30 # define _KERNEL
   31 # include <sys/uio.h>
   32 # undef _KERNEL
   33 #endif
   34 #if defined(_KERNEL) && defined(__FreeBSD__)
   35 # include <sys/filio.h>
   36 # include <sys/fcntl.h>
   37 #else
   38 # include <sys/ioctl.h>
   39 #endif
   40 #include <sys/time.h>
   41 # include <sys/protosw.h>
   42 #include <sys/socket.h>
   43 #if defined(_KERNEL)
   44 # include <sys/systm.h>
   45 # if !defined(__SVR4)
   46 #  include <sys/mbuf.h>
   47 # endif
   48 #endif
   49 #if defined(__SVR4)
   50 # include <sys/filio.h>
   51 # include <sys/byteorder.h>
   52 # ifdef _KERNEL
   53 #  include <sys/dditypes.h>
   54 # endif
   55 # include <sys/stream.h>
   56 # include <sys/kmem.h>
   57 #endif
   58 
   59 #include <net/if.h>
   60 #ifdef sun
   61 # include <net/af.h>
   62 #endif
   63 #include <netinet/in.h>
   64 #include <netinet/in_systm.h>
   65 #include <netinet/ip.h>
   66 #include <netinet/tcp.h>
   67 # include <netinet/tcp_fsm.h>
   68 #include <netinet/udp.h>
   69 #include <netinet/ip_icmp.h>
   70 #if !defined(_KERNEL)
   71 # include "ipf.h"
   72 #endif
   73 #include "netinet/ip_compat.h"
   74 #include "netinet/ip_fil.h"
   75 #include "netinet/ip_nat.h"
   76 #include "netinet/ip_frag.h"
   77 #include "netinet/ip_state.h"
   78 #include "netinet/ip_proxy.h"
   79 #include "netinet/ip_lookup.h"
   80 #include "netinet/ip_dstlist.h"
   81 #include "netinet/ip_sync.h"
   82 #ifdef  USE_INET6
   83 #include <netinet/icmp6.h>
   84 #endif
   85 #ifdef __FreeBSD__
   86 # include <sys/malloc.h>
   87 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
   88 #  include <sys/libkern.h>
   89 #  include <sys/systm.h>
   90 # endif
   91 #endif
   92 /* END OF INCLUDES */
   93 
   94 
   95 #if !defined(lint)
   96 static const char sccsid[] = "@(#)ip_state.c    1.8 6/5/96 (C) 1993-2000 Darren Reed";
   97 static const char rcsid[] = "@(#)$Id$";
   98 #endif
   99 
  100 
  101 static ipftuneable_t ipf_state_tuneables[] = {
  102         { { (void *)offsetof(ipf_state_softc_t, ipf_state_max) },
  103                 "state_max",            1,      0x7fffffff,
  104                 stsizeof(ipf_state_softc_t, ipf_state_max),
  105                 0,                      NULL,   NULL },
  106         { { (void *)offsetof(ipf_state_softc_t, ipf_state_size) },
  107                 "state_size",           1,      0x7fffffff,
  108                 stsizeof(ipf_state_softc_t, ipf_state_size),
  109                 0,                      NULL,   ipf_state_rehash },
  110         { { (void *)offsetof(ipf_state_softc_t, ipf_state_lock) },
  111                 "state_lock",           0,      1,
  112                 stsizeof(ipf_state_softc_t, ipf_state_lock),
  113                 IPFT_RDONLY,            NULL,   NULL },
  114         { { (void *)offsetof(ipf_state_softc_t, ipf_state_maxbucket) },
  115                 "state_maxbucket",      1,      0x7fffffff,
  116                 stsizeof(ipf_state_softc_t, ipf_state_maxbucket),
  117                 0,                      NULL,   NULL },
  118         { { (void *)offsetof(ipf_state_softc_t, ipf_state_logging) },
  119                 "state_logging",0,      1,
  120                 stsizeof(ipf_state_softc_t, ipf_state_logging),
  121                 0,                      NULL,   NULL },
  122         { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_high) },
  123                 "state_wm_high",2,      100,
  124                 stsizeof(ipf_state_softc_t, ipf_state_wm_high),
  125                 0,                      NULL,   NULL },
  126         { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_low) },
  127                 "state_wm_low", 1,      99,
  128                 stsizeof(ipf_state_softc_t, ipf_state_wm_low),
  129                 0,                      NULL,   NULL },
  130         { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_freq) },
  131                 "state_wm_freq",2,      999999,
  132                 stsizeof(ipf_state_softc_t, ipf_state_wm_freq),
  133                 0,                      NULL,   NULL },
  134         { { NULL },
  135                 NULL,                   0,      0,
  136                 0,
  137                 0,      NULL, NULL }
  138 };
  139 
  140 #define SINCL(x)        ATOMIC_INCL(softs->x)
  141 #define SBUMP(x)        (softs->x)++
  142 #define SBUMPD(x, y)    do { (softs->x.y)++; DT(y); } while (0)
  143 #define SBUMPDX(x, y, z)do { (softs->x.y)++; DT(z); } while (0)
  144 
  145 #ifdef  USE_INET6
  146 static ipstate_t *ipf_checkicmp6matchingstate(fr_info_t *);
  147 #endif
  148 static int ipf_allowstateicmp(fr_info_t *, ipstate_t *, i6addr_t *);
  149 static ipstate_t *ipf_matchsrcdst(fr_info_t *, ipstate_t *, i6addr_t *,
  150                                       i6addr_t *, tcphdr_t *, u_32_t);
  151 static ipstate_t *ipf_checkicmpmatchingstate(fr_info_t *);
  152 static int ipf_state_flush_entry(ipf_main_softc_t *, void *);
  153 static ips_stat_t *ipf_state_stats(ipf_main_softc_t *);
  154 static int ipf_state_del(ipf_main_softc_t *, ipstate_t *, int);
  155 static int ipf_state_remove(ipf_main_softc_t *, caddr_t);
  156 static int ipf_state_match(ipstate_t *is1, ipstate_t *is2);
  157 static int ipf_state_matchaddresses(ipstate_t *is1, ipstate_t *is2);
  158 static int ipf_state_matchipv4addrs(ipstate_t *is1, ipstate_t *is2);
  159 static int ipf_state_matchipv6addrs(ipstate_t *is1, ipstate_t *is2);
  160 static int ipf_state_matchisps(ipstate_t *is1, ipstate_t *is2);
  161 static int ipf_state_matchports(udpinfo_t *is1, udpinfo_t *is2);
  162 static int ipf_state_matcharray(ipstate_t *, int *, u_long);
  163 static void ipf_ipsmove(ipf_state_softc_t *, ipstate_t *, u_int);
  164 static int ipf_state_tcp(ipf_main_softc_t *, ipf_state_softc_t *,
  165                               fr_info_t *, tcphdr_t *, ipstate_t *);
  166 static int ipf_tcpoptions(ipf_state_softc_t *, fr_info_t *,
  167                                tcphdr_t *, tcpdata_t *);
  168 static ipstate_t *ipf_state_clone(fr_info_t *, tcphdr_t *, ipstate_t *);
  169 static void ipf_fixinisn(fr_info_t *, ipstate_t *);
  170 static void ipf_fixoutisn(fr_info_t *, ipstate_t *);
  171 static void ipf_checknewisn(fr_info_t *, ipstate_t *);
  172 static int ipf_state_iter(ipf_main_softc_t *, ipftoken_t *,
  173                                ipfgeniter_t *, ipfobj_t *);
  174 static int ipf_state_gettable(ipf_main_softc_t *, ipf_state_softc_t *,
  175                                    char *);
  176 static  int ipf_state_tcpinwindow(struct fr_info *, struct tcpdata *,
  177                                        struct tcpdata *, tcphdr_t *, int);
  178 
  179 static int ipf_state_getent(ipf_main_softc_t *, ipf_state_softc_t *,
  180                                  caddr_t);
  181 static int ipf_state_putent(ipf_main_softc_t *, ipf_state_softc_t *,
  182                                  caddr_t);
  183 
  184 #define ONE_DAY         IPF_TTLVAL(1 * 86400)   /* 1 day */
  185 #define FIVE_DAYS       (5 * ONE_DAY)
  186 #define DOUBLE_HASH(x)  (((x) + softs->ipf_state_seed[(x) % \
  187                          softs->ipf_state_size]) % softs->ipf_state_size)
  188 
  189 
  190 /* ------------------------------------------------------------------------ */
  191 /* Function:    ipf_state_main_load                                         */
  192 /* Returns:     int - 0 == success, -1 == failure                           */
  193 /* Parameters:  Nil                                                         */
  194 /*                                                                          */
  195 /* A null-op function that exists as a placeholder so that the flow in      */
  196 /* other functions is obvious.                                              */
  197 /* ------------------------------------------------------------------------ */
  198 int
  199 ipf_state_main_load(void)
  200 {
  201         return (0);
  202 }
  203 
  204 
  205 /* ------------------------------------------------------------------------ */
  206 /* Function:    ipf_state_main_unload                                       */
  207 /* Returns:     int - 0 == success, -1 == failure                           */
  208 /* Parameters:  Nil                                                         */
  209 /*                                                                          */
  210 /* A null-op function that exists as a placeholder so that the flow in      */
  211 /* other functions is obvious.                                              */
  212 /* ------------------------------------------------------------------------ */
  213 int
  214 ipf_state_main_unload(void)
  215 {
  216         return (0);
  217 }
  218 
  219 
  220 /* ------------------------------------------------------------------------ */
  221 /* Function:    ipf_state_soft_create                                       */
  222 /* Returns:     void *   - NULL = failure, else pointer to soft context     */
  223 /* Parameters:  softc(I) - pointer to soft context main structure           */
  224 /*                                                                          */
  225 /* Create a new state soft context structure and populate it with the list  */
  226 /* of tunables and other default settings.                                  */
  227 /* ------------------------------------------------------------------------ */
  228 void *
  229 ipf_state_soft_create(ipf_main_softc_t *softc)
  230 {
  231         ipf_state_softc_t *softs;
  232 
  233         KMALLOC(softs, ipf_state_softc_t *);
  234         if (softs == NULL)
  235                 return (NULL);
  236 
  237         bzero((char *)softs, sizeof(*softs));
  238 
  239         softs->ipf_state_tune = ipf_tune_array_copy(softs,
  240                                                     sizeof(ipf_state_tuneables),
  241                                                     ipf_state_tuneables);
  242         if (softs->ipf_state_tune == NULL) {
  243                 ipf_state_soft_destroy(softc, softs);
  244                 return (NULL);
  245         }
  246         if (ipf_tune_array_link(softc, softs->ipf_state_tune) == -1) {
  247                 ipf_state_soft_destroy(softc, softs);
  248                 return (NULL);
  249         }
  250 
  251 #ifdef  IPFILTER_LOG
  252         softs->ipf_state_logging = 1;
  253 #else
  254         softs->ipf_state_logging = 0;
  255 #endif
  256         softs->ipf_state_size = IPSTATE_SIZE,
  257         softs->ipf_state_maxbucket = 0;
  258         softs->ipf_state_wm_freq = IPF_TTLVAL(10);
  259         softs->ipf_state_max = IPSTATE_MAX;
  260         softs->ipf_state_wm_last = 0;
  261         softs->ipf_state_wm_high = 99;
  262         softs->ipf_state_wm_low = 90;
  263         softs->ipf_state_inited = 0;
  264         softs->ipf_state_lock = 0;
  265         softs->ipf_state_doflush = 0;
  266 
  267         return (softs);
  268 }
  269 
  270 
  271 /* ------------------------------------------------------------------------ */
  272 /* Function:    ipf_state_soft_destroy                                      */
  273 /* Returns:     Nil                                                         */
  274 /* Parameters:  softc(I) - pointer to soft context main structure           */
  275 /*              arg(I)   - pointer to local context to use                  */
  276 /*                                                                          */
  277 /* Undo only what we did in soft create: unlink and free the tunables and   */
  278 /* free the soft context structure itself.                                  */
  279 /* ------------------------------------------------------------------------ */
  280 void
  281 ipf_state_soft_destroy(ipf_main_softc_t *softc, void *arg)
  282 {
  283         ipf_state_softc_t *softs = arg;
  284 
  285         if (softs->ipf_state_tune != NULL) {
  286                 ipf_tune_array_unlink(softc, softs->ipf_state_tune);
  287                 KFREES(softs->ipf_state_tune, sizeof(ipf_state_tuneables));
  288                 softs->ipf_state_tune = NULL;
  289         }
  290 
  291         KFREE(softs);
  292 }
  293 
  294 static void *
  295 ipf_state_seed_alloc(u_int state_size, u_int state_max)
  296 {
  297         u_int i;
  298         u_long *state_seed;
  299         KMALLOCS(state_seed, u_long *, state_size * sizeof(*state_seed));
  300         if (state_seed == NULL)
  301                 return (NULL);
  302 
  303         for (i = 0; i < state_size; i++) {
  304                 /*
  305                  * XXX - ipf_state_seed[X] should be a random number of sorts.
  306                  */
  307 #ifdef __FreeBSD__
  308                 state_seed[i] = arc4random();
  309 #else
  310                 state_seed[i] = ((u_long)state_seed + i) * state_size;
  311                 state_seed[i] ^= 0xa5a55a5a;
  312                 state_seed[i] *= (u_long)state_seed;
  313                 state_seed[i] ^= 0x5a5aa5a5;
  314                 state_seed[i] *= state_max;
  315 #endif
  316         }
  317         return (state_seed);
  318 }
  319 
  320 
  321 /* ------------------------------------------------------------------------ */
  322 /* Function:    ipf_state_soft_init                                         */
  323 /* Returns:     int      - 0 == success, -1 == failure                      */
  324 /* Parameters:  softc(I) - pointer to soft context main structure           */
  325 /*              arg(I)   - pointer to local context to use                  */
  326 /*                                                                          */
  327 /* Initialise the state soft context structure so it is ready for use.      */
  328 /* This involves:                                                           */
  329 /* - allocating a hash table and zero'ing it out                            */
  330 /* - building a secondary table of seeds for double hashing to make it more */
  331 /*   difficult to attempt to attack the hash table itself (for DoS)         */
  332 /* - initialise all of the timeout queues, including a table for TCP, some  */
  333 /*   pairs of query/response for UDP and other IP protocols (typically the  */
  334 /*   reply queue has a shorter timeout than the query)                      */
  335 /* ------------------------------------------------------------------------ */
  336 int
  337 ipf_state_soft_init(ipf_main_softc_t *softc, void *arg)
  338 {
  339         ipf_state_softc_t *softs = arg;
  340         int i;
  341 
  342         KMALLOCS(softs->ipf_state_table,
  343                  ipstate_t **, softs->ipf_state_size * sizeof(ipstate_t *));
  344         if (softs->ipf_state_table == NULL)
  345                 return (-1);
  346 
  347         bzero((char *)softs->ipf_state_table,
  348               softs->ipf_state_size * sizeof(ipstate_t *));
  349 
  350         softs->ipf_state_seed = ipf_state_seed_alloc(softs->ipf_state_size,
  351             softs->ipf_state_max);
  352         if (softs->ipf_state_seed == NULL)
  353                 return (-2);
  354 
  355         KMALLOCS(softs->ipf_state_stats.iss_bucketlen, u_int *,
  356                  softs->ipf_state_size * sizeof(u_int));
  357         if (softs->ipf_state_stats.iss_bucketlen == NULL)
  358                 return (-3);
  359 
  360         bzero((char *)softs->ipf_state_stats.iss_bucketlen,
  361               softs->ipf_state_size * sizeof(u_int));
  362 
  363         if (softs->ipf_state_maxbucket == 0) {
  364                 for (i = softs->ipf_state_size; i > 0; i >>= 1)
  365                         softs->ipf_state_maxbucket++;
  366                 softs->ipf_state_maxbucket *= 2;
  367         }
  368 
  369         ipf_sttab_init(softc, softs->ipf_state_tcptq);
  370         softs->ipf_state_stats.iss_tcptab = softs->ipf_state_tcptq;
  371         softs->ipf_state_tcptq[IPF_TCP_NSTATES - 1].ifq_next =
  372                                                 &softs->ipf_state_udptq;
  373 
  374         IPFTQ_INIT(&softs->ipf_state_udptq, softc->ipf_udptimeout,
  375                    "ipftq udp tab");
  376         softs->ipf_state_udptq.ifq_next = &softs->ipf_state_udpacktq;
  377 
  378         IPFTQ_INIT(&softs->ipf_state_udpacktq, softc->ipf_udpacktimeout,
  379                    "ipftq udpack tab");
  380         softs->ipf_state_udpacktq.ifq_next = &softs->ipf_state_icmptq;
  381 
  382         IPFTQ_INIT(&softs->ipf_state_icmptq, softc->ipf_icmptimeout,
  383                    "ipftq icmp tab");
  384         softs->ipf_state_icmptq.ifq_next = &softs->ipf_state_icmpacktq;
  385 
  386         IPFTQ_INIT(&softs->ipf_state_icmpacktq, softc->ipf_icmpacktimeout,
  387                   "ipftq icmpack tab");
  388         softs->ipf_state_icmpacktq.ifq_next = &softs->ipf_state_iptq;
  389 
  390         IPFTQ_INIT(&softs->ipf_state_iptq, softc->ipf_iptimeout,
  391                    "ipftq iptimeout tab");
  392         softs->ipf_state_iptq.ifq_next = &softs->ipf_state_pending;
  393 
  394         IPFTQ_INIT(&softs->ipf_state_pending, IPF_HZ_DIVIDE, "ipftq pending");
  395         softs->ipf_state_pending.ifq_next = &softs->ipf_state_deletetq;
  396 
  397         IPFTQ_INIT(&softs->ipf_state_deletetq, 1, "ipftq delete");
  398         softs->ipf_state_deletetq.ifq_next = NULL;
  399 
  400         MUTEX_INIT(&softs->ipf_stinsert, "ipf state insert mutex");
  401 
  402 
  403         softs->ipf_state_wm_last = softc->ipf_ticks;
  404         softs->ipf_state_inited = 1;
  405 
  406         return (0);
  407 }
  408 
  409 
  410 /* ------------------------------------------------------------------------ */
  411 /* Function:    ipf_state_soft_fini                                         */
  412 /* Returns:     int      - 0 = success, -1 = failure                        */
  413 /* Parameters:  softc(I) - pointer to soft context main structure           */
  414 /*              arg(I)   - pointer to local context to use                  */
  415 /*                                                                          */
  416 /* Release and destroy any resources acquired or initialised so that        */
  417 /* IPFilter can be unloaded or re-initialised.                              */
  418 /* ------------------------------------------------------------------------ */
  419 int
  420 ipf_state_soft_fini(ipf_main_softc_t *softc, void *arg)
  421 {
  422         ipf_state_softc_t *softs = arg;
  423         ipftq_t *ifq, *ifqnext;
  424         ipstate_t *is;
  425 
  426         while ((is = softs->ipf_state_list) != NULL)
  427                 ipf_state_del(softc, is, ISL_UNLOAD);
  428 
  429         /*
  430          * Proxy timeout queues are not cleaned here because although they
  431          * exist on the state list, appr_unload is called after
  432          * ipf_state_unload and the proxies actually are responsible for them
  433          * being created. Should the proxy timeouts have their own list?
  434          * There's no real justification as this is the only complication.
  435          */
  436         for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
  437                 ifqnext = ifq->ifq_next;
  438 
  439                 if (ipf_deletetimeoutqueue(ifq) == 0)
  440                         ipf_freetimeoutqueue(softc, ifq);
  441         }
  442 
  443         softs->ipf_state_stats.iss_inuse = 0;
  444         softs->ipf_state_stats.iss_active = 0;
  445 
  446         if (softs->ipf_state_inited == 1) {
  447                 softs->ipf_state_inited = 0;
  448                 ipf_sttab_destroy(softs->ipf_state_tcptq);
  449                 MUTEX_DESTROY(&softs->ipf_state_udptq.ifq_lock);
  450                 MUTEX_DESTROY(&softs->ipf_state_icmptq.ifq_lock);
  451                 MUTEX_DESTROY(&softs->ipf_state_udpacktq.ifq_lock);
  452                 MUTEX_DESTROY(&softs->ipf_state_icmpacktq.ifq_lock);
  453                 MUTEX_DESTROY(&softs->ipf_state_iptq.ifq_lock);
  454                 MUTEX_DESTROY(&softs->ipf_state_deletetq.ifq_lock);
  455                 MUTEX_DESTROY(&softs->ipf_state_pending.ifq_lock);
  456                 MUTEX_DESTROY(&softs->ipf_stinsert);
  457         }
  458 
  459         if (softs->ipf_state_table != NULL) {
  460                 KFREES(softs->ipf_state_table,
  461                        softs->ipf_state_size * sizeof(*softs->ipf_state_table));
  462                 softs->ipf_state_table = NULL;
  463         }
  464 
  465         if (softs->ipf_state_seed != NULL) {
  466                 KFREES(softs->ipf_state_seed,
  467                        softs->ipf_state_size * sizeof(*softs->ipf_state_seed));
  468                 softs->ipf_state_seed = NULL;
  469         }
  470 
  471         if (softs->ipf_state_stats.iss_bucketlen != NULL) {
  472                 KFREES(softs->ipf_state_stats.iss_bucketlen,
  473                        softs->ipf_state_size * sizeof(u_int));
  474                 softs->ipf_state_stats.iss_bucketlen = NULL;
  475         }
  476 
  477         return (0);
  478 }
  479 
  480 
  481 /* ------------------------------------------------------------------------ */
  482 /* Function:    ipf_state_setlock                                           */
  483 /* Returns:     Nil                                                         */
  484 /* Parameters:  arg(I) - pointer to local context to use                    */
  485 /*              tmp(I) - new value for lock                                 */
  486 /*                                                                          */
  487 /* Stub function that allows for external manipulation of ipf_state_lock    */
  488 /* ------------------------------------------------------------------------ */
  489 void
  490 ipf_state_setlock(void *arg, int tmp)
  491 {
  492         ipf_state_softc_t *softs = arg;
  493 
  494         softs->ipf_state_lock = tmp;
  495 }
  496 
  497 
  498 /* ------------------------------------------------------------------------ */
  499 /* Function:    ipf_state_stats                                             */
  500 /* Returns:     ips_state_t* - pointer to state stats structure             */
  501 /* Parameters:  softc(I) - pointer to soft context main structure           */
  502 /*                                                                          */
  503 /* Put all the current numbers and pointers into a single struct and return */
  504 /* a pointer to it.                                                         */
  505 /* ------------------------------------------------------------------------ */
  506 static ips_stat_t *
  507 ipf_state_stats(ipf_main_softc_t *softc)
  508 {
  509         ipf_state_softc_t *softs = softc->ipf_state_soft;
  510         ips_stat_t *issp = &softs->ipf_state_stats;
  511 
  512         issp->iss_state_size = softs->ipf_state_size;
  513         issp->iss_state_max = softs->ipf_state_max;
  514         issp->iss_table = softs->ipf_state_table;
  515         issp->iss_list = softs->ipf_state_list;
  516         issp->iss_ticks = softc->ipf_ticks;
  517 
  518 #ifdef IPFILTER_LOGGING
  519         issp->iss_log_ok = ipf_log_logok(softc, IPF_LOGSTATE);
  520         issp->iss_log_fail = ipf_log_failures(softc, IPF_LOGSTATE);
  521 #else
  522         issp->iss_log_ok = 0;
  523         issp->iss_log_fail = 0;
  524 #endif
  525         return (issp);
  526 }
  527 
  528 /* ------------------------------------------------------------------------ */
  529 /* Function:    ipf_state_remove                                            */
  530 /* Returns:     int - 0 == success, != 0 == failure                         */
  531 /* Parameters:  softc(I) - pointer to soft context main structure           */
  532 /*              data(I)  - pointer to state structure to delete from table  */
  533 /*                                                                          */
  534 /* Search for a state structure that matches the one passed, according to   */
  535 /* the IP addresses and other protocol specific information.                */
  536 /* ------------------------------------------------------------------------ */
  537 static int
  538 ipf_state_remove(ipf_main_softc_t *softc, caddr_t data)
  539 {
  540         ipf_state_softc_t *softs = softc->ipf_state_soft;
  541         ipstate_t *sp, st;
  542         int error;
  543 
  544         sp = &st;
  545         error = ipf_inobj(softc, data, NULL, &st, IPFOBJ_IPSTATE);
  546         if (error)
  547                 return (EFAULT);
  548 
  549         WRITE_ENTER(&softc->ipf_state);
  550         for (sp = softs->ipf_state_list; sp; sp = sp->is_next)
  551                 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) &&
  552                     !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src,
  553                           sizeof(st.is_src)) &&
  554                     !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst,
  555                           sizeof(st.is_dst)) &&
  556                     !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps,
  557                           sizeof(st.is_ps))) {
  558                         ipf_state_del(softc, sp, ISL_REMOVE);
  559                         RWLOCK_EXIT(&softc->ipf_state);
  560                         return (0);
  561                 }
  562         RWLOCK_EXIT(&softc->ipf_state);
  563 
  564         IPFERROR(100001);
  565         return (ESRCH);
  566 }
  567 
  568 
  569 /* ------------------------------------------------------------------------ */
  570 /* Function:    ipf_state_ioctl                                             */
  571 /* Returns:     int - 0 == success, != 0 == failure                         */
  572 /* Parameters:  softc(I) - pointer to soft context main structure           */
  573 /*              data(I)  - pointer to ioctl data                            */
  574 /*              cmd(I)   - ioctl command integer                            */
  575 /*              mode(I)  - file mode bits used with open                    */
  576 /*              uid(I)   - uid of process making the ioctl call             */
  577 /*              ctx(I)   - pointer specific to context of the call          */
  578 /*                                                                          */
  579 /* Processes an ioctl call made to operate on the IP Filter state device.   */
  580 /* ------------------------------------------------------------------------ */
  581 int
  582 ipf_state_ioctl(ipf_main_softc_t *softc, caddr_t data, ioctlcmd_t cmd,
  583         int mode, int uid, void *ctx)
  584 {
  585         ipf_state_softc_t *softs = softc->ipf_state_soft;
  586         int arg, ret, error = 0;
  587         SPL_INT(s);
  588 
  589         switch (cmd)
  590         {
  591         /*
  592          * Delete an entry from the state table.
  593          */
  594         case SIOCDELST :
  595                 error = ipf_state_remove(softc, data);
  596                 break;
  597 
  598         /*
  599          * Flush the state table
  600          */
  601         case SIOCIPFFL :
  602                 error = BCOPYIN(data, &arg, sizeof(arg));
  603                 if (error != 0) {
  604                         IPFERROR(100002);
  605                         error = EFAULT;
  606 
  607                 } else {
  608                         WRITE_ENTER(&softc->ipf_state);
  609                         ret = ipf_state_flush(softc, arg, 4);
  610                         RWLOCK_EXIT(&softc->ipf_state);
  611 
  612                         error = BCOPYOUT(&ret, data, sizeof(ret));
  613                         if (error != 0) {
  614                                 IPFERROR(100003);
  615                                 error = EFAULT;
  616                         }
  617                 }
  618                 break;
  619 
  620 #ifdef  USE_INET6
  621         case SIOCIPFL6 :
  622                 error = BCOPYIN(data, &arg, sizeof(arg));
  623                 if (error != 0) {
  624                         IPFERROR(100004);
  625                         error = EFAULT;
  626 
  627                 } else {
  628                         WRITE_ENTER(&softc->ipf_state);
  629                         ret = ipf_state_flush(softc, arg, 6);
  630                         RWLOCK_EXIT(&softc->ipf_state);
  631 
  632                         error = BCOPYOUT(&ret, data, sizeof(ret));
  633                         if (error != 0) {
  634                                 IPFERROR(100005);
  635                                 error = EFAULT;
  636                         }
  637                 }
  638                 break;
  639 #endif
  640 
  641         case SIOCMATCHFLUSH :
  642                 WRITE_ENTER(&softc->ipf_state);
  643                 error = ipf_state_matchflush(softc, data);
  644                 RWLOCK_EXIT(&softc->ipf_state);
  645                 break;
  646 
  647 #ifdef  IPFILTER_LOG
  648         /*
  649          * Flush the state log.
  650          */
  651         case SIOCIPFFB :
  652                 if (!(mode & FWRITE)) {
  653                         IPFERROR(100008);
  654                         error = EPERM;
  655                 } else {
  656                         int tmp;
  657 
  658                         tmp = ipf_log_clear(softc, IPL_LOGSTATE);
  659                         error = BCOPYOUT(&tmp, data, sizeof(tmp));
  660                         if (error != 0) {
  661                                 IPFERROR(100009);
  662                                 error = EFAULT;
  663                         }
  664                 }
  665                 break;
  666 
  667         /*
  668          * Turn logging of state information on/off.
  669          */
  670         case SIOCSETLG :
  671                 if (!(mode & FWRITE)) {
  672                         IPFERROR(100010);
  673                         error = EPERM;
  674                 } else {
  675                         error = BCOPYIN(data, &softs->ipf_state_logging,
  676                                         sizeof(softs->ipf_state_logging));
  677                         if (error != 0) {
  678                                 IPFERROR(100011);
  679                                 error = EFAULT;
  680                         }
  681                 }
  682                 break;
  683 
  684         /*
  685          * Return the current state of logging.
  686          */
  687         case SIOCGETLG :
  688                 error = BCOPYOUT(&softs->ipf_state_logging, data,
  689                                  sizeof(softs->ipf_state_logging));
  690                 if (error != 0) {
  691                         IPFERROR(100012);
  692                         error = EFAULT;
  693                 }
  694                 break;
  695 
  696         /*
  697          * Return the number of bytes currently waiting to be read.
  698          */
  699         case FIONREAD :
  700                 arg = ipf_log_bytesused(softc, IPL_LOGSTATE);
  701                 error = BCOPYOUT(&arg, data, sizeof(arg));
  702                 if (error != 0) {
  703                         IPFERROR(100013);
  704                         error = EFAULT;
  705                 }
  706                 break;
  707 #endif
  708 
  709         /*
  710          * Get the current state statistics.
  711          */
  712         case SIOCGETFS :
  713                 error = ipf_outobj(softc, data, ipf_state_stats(softc),
  714                                    IPFOBJ_STATESTAT);
  715                 break;
  716 
  717         /*
  718          * Lock/Unlock the state table.  (Locking prevents any changes, which
  719          * means no packets match).
  720          */
  721         case SIOCSTLCK :
  722                 if (!(mode & FWRITE)) {
  723                         IPFERROR(100014);
  724                         error = EPERM;
  725                 } else {
  726                         error = ipf_lock(data, &softs->ipf_state_lock);
  727                 }
  728                 break;
  729 
  730         /*
  731          * Add an entry to the current state table.
  732          */
  733         case SIOCSTPUT :
  734                 if (!softs->ipf_state_lock || !(mode &FWRITE)) {
  735                         IPFERROR(100015);
  736                         error = EACCES;
  737                         break;
  738                 }
  739                 error = ipf_state_putent(softc, softs, data);
  740                 break;
  741 
  742         /*
  743          * Get a state table entry.
  744          */
  745         case SIOCSTGET :
  746                 if (!softs->ipf_state_lock) {
  747                         IPFERROR(100016);
  748                         error = EACCES;
  749                         break;
  750                 }
  751                 error = ipf_state_getent(softc, softs, data);
  752                 break;
  753 
  754         case SIOCGENITER :
  755             {
  756                 ipftoken_t *token;
  757                 ipfgeniter_t iter;
  758                 ipfobj_t obj;
  759 
  760                 error = ipf_inobj(softc, data, &obj, &iter, IPFOBJ_GENITER);
  761                 if (error != 0)
  762                         break;
  763 
  764                 SPL_SCHED(s);
  765                 token = ipf_token_find(softc, IPFGENITER_STATE, uid, ctx);
  766                 if (token != NULL) {
  767                         error = ipf_state_iter(softc, token, &iter, &obj);
  768                         WRITE_ENTER(&softc->ipf_tokens);
  769                         ipf_token_deref(softc, token);
  770                         RWLOCK_EXIT(&softc->ipf_tokens);
  771                 } else {
  772                         IPFERROR(100018);
  773                         error = ESRCH;
  774                 }
  775                 SPL_X(s);
  776                 break;
  777             }
  778 
  779         case SIOCGTABL :
  780                 error = ipf_state_gettable(softc, softs, data);
  781                 break;
  782 
  783         case SIOCIPFDELTOK :
  784                 error = BCOPYIN(data, &arg, sizeof(arg));
  785                 if (error != 0) {
  786                         IPFERROR(100019);
  787                         error = EFAULT;
  788                 } else {
  789                         SPL_SCHED(s);
  790                         error = ipf_token_del(softc, arg, uid, ctx);
  791                         SPL_X(s);
  792                 }
  793                 break;
  794 
  795         case SIOCGTQTAB :
  796                 error = ipf_outobj(softc, data, softs->ipf_state_tcptq,
  797                                    IPFOBJ_STATETQTAB);
  798                 break;
  799 
  800         default :
  801                 IPFERROR(100020);
  802                 error = EINVAL;
  803                 break;
  804         }
  805         return (error);
  806 }
  807 
  808 
  809 /* ------------------------------------------------------------------------ */
  810 /* Function:    ipf_state_getent                                            */
  811 /* Returns:     int - 0 == success, != 0 == failure                         */
  812 /* Parameters:  softc(I) - pointer to soft context main structure           */
  813 /*              softs(I) - pointer to state context structure               */
  814 /*              data(I)  - pointer to state structure to retrieve from table*/
  815 /*                                                                          */
  816 /* Copy out state information from the kernel to a user space process.  If  */
  817 /* there is a filter rule associated with the state entry, copy that out    */
  818 /* as well.  The entry to copy out is taken from the value of "ips_next" in */
  819 /* the struct passed in and if not null and not found in the list of current*/
  820 /* state entries, the retrieval fails.                                      */
  821 /* ------------------------------------------------------------------------ */
  822 static int
  823 ipf_state_getent(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
  824         caddr_t data)
  825 {
  826         ipstate_t *is, *isn;
  827         ipstate_save_t ips;
  828         int error;
  829 
  830         error = ipf_inobj(softc, data, NULL, &ips, IPFOBJ_STATESAVE);
  831         if (error)
  832                 return (EFAULT);
  833 
  834         READ_ENTER(&softc->ipf_state);
  835         isn = ips.ips_next;
  836         if (isn == NULL) {
  837                 isn = softs->ipf_state_list;
  838                 if (isn == NULL) {
  839                         if (ips.ips_next == NULL) {
  840                                 RWLOCK_EXIT(&softc->ipf_state);
  841                                 IPFERROR(100021);
  842                                 return (ENOENT);
  843                         }
  844                         return (0);
  845                 }
  846         } else {
  847                 /*
  848                  * Make sure the pointer we're copying from exists in the
  849                  * current list of entries.  Security precaution to prevent
  850                  * copying of random kernel data.
  851                  */
  852                 for (is = softs->ipf_state_list; is; is = is->is_next)
  853                         if (is == isn)
  854                                 break;
  855                 if (!is) {
  856                         RWLOCK_EXIT(&softc->ipf_state);
  857                         IPFERROR(100022);
  858                         return (ESRCH);
  859                 }
  860         }
  861         ips.ips_next = isn->is_next;
  862         bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is));
  863         ips.ips_rule = isn->is_rule;
  864         if (isn->is_rule != NULL)
  865                 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr,
  866                       sizeof(ips.ips_fr));
  867         RWLOCK_EXIT(&softc->ipf_state);
  868         error = ipf_outobj(softc, data, &ips, IPFOBJ_STATESAVE);
  869         return (error);
  870 }
  871 
  872 
  873 /* ------------------------------------------------------------------------ */
  874 /* Function:    ipf_state_putent                                            */
  875 /* Returns:     int - 0 == success, != 0 == failure                         */
  876 /* Parameters:  softc(I) - pointer to soft context main structure           */
  877 /*              softs(I) - pointer to state context structure               */
  878 /*              data(I)  - pointer to state information struct              */
  879 /*                                                                          */
  880 /* This function implements the SIOCSTPUT ioctl: insert a state entry into  */
  881 /* the state table.  If the state info. includes a pointer to a filter rule */
  882 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */
  883 /* output.                                                                  */
  884 /* ------------------------------------------------------------------------ */
  885 int
  886 ipf_state_putent(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
  887         caddr_t data)
  888 {
  889         ipstate_t *is, *isn;
  890         ipstate_save_t ips;
  891         int error, out, i;
  892         frentry_t *fr;
  893         char *name;
  894 
  895         error = ipf_inobj(softc, data, NULL, &ips, IPFOBJ_STATESAVE);
  896         if (error != 0)
  897                 return (error);
  898 
  899         KMALLOC(isn, ipstate_t *);
  900         if (isn == NULL) {
  901                 IPFERROR(100023);
  902                 return (ENOMEM);
  903         }
  904 
  905         bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn));
  906         bzero((char *)isn, offsetof(struct ipstate, is_pkts));
  907         isn->is_sti.tqe_pnext = NULL;
  908         isn->is_sti.tqe_next = NULL;
  909         isn->is_sti.tqe_ifq = NULL;
  910         isn->is_sti.tqe_parent = isn;
  911         isn->is_ifp[0] = NULL;
  912         isn->is_ifp[1] = NULL;
  913         isn->is_ifp[2] = NULL;
  914         isn->is_ifp[3] = NULL;
  915         isn->is_sync = NULL;
  916         fr = ips.ips_rule;
  917 
  918         if (fr == NULL) {
  919                 int inserr;
  920 
  921                 READ_ENTER(&softc->ipf_state);
  922                 inserr = ipf_state_insert(softc, isn, 0);
  923                 MUTEX_EXIT(&isn->is_lock);
  924                 RWLOCK_EXIT(&softc->ipf_state);
  925 
  926                 return (inserr);
  927         }
  928 
  929         if (isn->is_flags & SI_NEWFR) {
  930                 KMALLOC(fr, frentry_t *);
  931                 if (fr == NULL) {
  932                         KFREE(isn);
  933                         IPFERROR(100024);
  934                         return (ENOMEM);
  935                 }
  936                 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr));
  937                 out = fr->fr_flags & FR_OUTQUE ? 1 : 0;
  938                 isn->is_rule = fr;
  939                 ips.ips_is.is_rule = fr;
  940                 MUTEX_NUKE(&fr->fr_lock);
  941                 MUTEX_INIT(&fr->fr_lock, "state filter rule lock");
  942 
  943                 /*
  944                  * Look up all the interface names in the rule.
  945                  */
  946                 for (i = 0; i < FR_NUM(fr->fr_ifnames); i++) {
  947                         if (fr->fr_ifnames[i] == -1) {
  948                                 fr->fr_ifas[i] = NULL;
  949                                 continue;
  950                         }
  951                         name = FR_NAME(fr, fr_ifnames[i]);
  952                         fr->fr_ifas[i] = ipf_resolvenic(softc, name,
  953                                                         fr->fr_family);
  954                 }
  955 
  956                 for (i = 0; i < FR_NUM(isn->is_ifname); i++) {
  957                         name = isn->is_ifname[i];
  958                         isn->is_ifp[i] = ipf_resolvenic(softc, name,
  959                                                         isn->is_v);
  960                 }
  961 
  962                 fr->fr_ref = 0;
  963                 fr->fr_dsize = 0;
  964                 fr->fr_data = NULL;
  965                 fr->fr_type = FR_T_NONE;
  966 
  967                 (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_tifs[0],
  968                                 fr->fr_family);
  969                 (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_tifs[1],
  970                                 fr->fr_family);
  971                 (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_dif,
  972                                 fr->fr_family);
  973 
  974                 /*
  975                  * send a copy back to userland of what we ended up
  976                  * to allow for verification.
  977                  */
  978                 error = ipf_outobj(softc, data, &ips, IPFOBJ_STATESAVE);
  979                 if (error != 0) {
  980                         KFREE(isn);
  981                         MUTEX_DESTROY(&fr->fr_lock);
  982                         KFREE(fr);
  983                         IPFERROR(100025);
  984                         return (EFAULT);
  985                 }
  986                 READ_ENTER(&softc->ipf_state);
  987                 error = ipf_state_insert(softc, isn, 0);
  988                 MUTEX_EXIT(&isn->is_lock);
  989                 RWLOCK_EXIT(&softc->ipf_state);
  990 
  991         } else {
  992                 READ_ENTER(&softc->ipf_state);
  993                 for (is = softs->ipf_state_list; is; is = is->is_next)
  994                         if (is->is_rule == fr) {
  995                                 error = ipf_state_insert(softc, isn, 0);
  996                                 MUTEX_EXIT(&isn->is_lock);
  997                                 break;
  998                         }
  999 
 1000                 if (is == NULL) {
 1001                         KFREE(isn);
 1002                         isn = NULL;
 1003                 }
 1004                 RWLOCK_EXIT(&softc->ipf_state);
 1005 
 1006                 if (isn == NULL) {
 1007                         IPFERROR(100033);
 1008                         error = ESRCH;
 1009                 }
 1010         }
 1011 
 1012         return (error);
 1013 }
 1014 
 1015 
 1016 /* ------------------------------------------------------------------------ */
 1017 /* Function:    ipf_state_insert                                            */
 1018 /* Returns:     int    - 0 == success, -1 == failure                        */
 1019 /* Parameters:  softc(I) - pointer to soft context main structure           */
 1020 /* Parameters:  is(I)    - pointer to state structure                       */
 1021 /*              rev(I) - flag indicating direction of packet                */
 1022 /*                                                                          */
 1023 /* Inserts a state structure into the hash table (for lookups) and the list */
 1024 /* of state entries (for enumeration).  Resolves all of the interface names */
 1025 /* to pointers and adjusts running stats for the hash table as appropriate. */
 1026 /*                                                                          */
 1027 /* This function can fail if the filter rule has had a population policy of */
 1028 /* IP addresses used with stateful filtering assigned to it.                */
 1029 /*                                                                          */
 1030 /* Locking: it is assumed that some kind of lock on ipf_state is held.      */
 1031 /*          Exits with is_lock initialised and held - *EVEN IF ERROR*.      */
 1032 /* ------------------------------------------------------------------------ */
 1033 int
 1034 ipf_state_insert(ipf_main_softc_t *softc, ipstate_t *is, int rev)
 1035 {
 1036         ipf_state_softc_t *softs = softc->ipf_state_soft;
 1037         frentry_t *fr;
 1038         u_int hv;
 1039         int i;
 1040 
 1041         /*
 1042          * Look up all the interface names in the state entry.
 1043          */
 1044         for (i = 0; i < FR_NUM(is->is_ifp); i++) {
 1045                 if (is->is_ifp[i] != NULL)
 1046                         continue;
 1047                 is->is_ifp[i] = ipf_resolvenic(softc, is->is_ifname[i],
 1048                                                is->is_v);
 1049         }
 1050 
 1051         /*
 1052          * If we could trust is_hv, then the modulus would not be needed,
 1053          * but when running with IPFILTER_SYNC, this stops bad values.
 1054          */
 1055         hv = is->is_hv % softs->ipf_state_size;
 1056         /* TRACE is, hv */
 1057         is->is_hv = hv;
 1058 
 1059         /*
 1060          * We need to get both of these locks...the first because it is
 1061          * possible that once the insert is complete another packet might
 1062          * come along, match the entry and want to update it.
 1063          */
 1064         MUTEX_INIT(&is->is_lock, "ipf state entry");
 1065         MUTEX_ENTER(&is->is_lock);
 1066         MUTEX_ENTER(&softs->ipf_stinsert);
 1067 
 1068         fr = is->is_rule;
 1069         if (fr != NULL) {
 1070                 if ((fr->fr_srctrack.ht_max_nodes != 0) &&
 1071                     (ipf_ht_node_add(softc, &fr->fr_srctrack,
 1072                                      is->is_family, &is->is_src) == -1)) {
 1073                         SBUMPD(ipf_state_stats, iss_max_track);
 1074                         MUTEX_EXIT(&softs->ipf_stinsert);
 1075                         return (-1);
 1076                 }
 1077 
 1078                 MUTEX_ENTER(&fr->fr_lock);
 1079                 fr->fr_ref++;
 1080                 MUTEX_EXIT(&fr->fr_lock);
 1081                 fr->fr_statecnt++;
 1082         }
 1083 
 1084         if (is->is_flags & (SI_WILDP|SI_WILDA)) {
 1085                 DT(iss_wild_plus_one);
 1086                 SINCL(ipf_state_stats.iss_wild);
 1087         }
 1088 
 1089         SBUMP(ipf_state_stats.iss_proto[is->is_p]);
 1090         SBUMP(ipf_state_stats.iss_active_proto[is->is_p]);
 1091 
 1092         /*
 1093          * add into list table.
 1094          */
 1095         if (softs->ipf_state_list != NULL)
 1096                 softs->ipf_state_list->is_pnext = &is->is_next;
 1097         is->is_pnext = &softs->ipf_state_list;
 1098         is->is_next = softs->ipf_state_list;
 1099         softs->ipf_state_list = is;
 1100 
 1101         if (softs->ipf_state_table[hv] != NULL)
 1102                 softs->ipf_state_table[hv]->is_phnext = &is->is_hnext;
 1103         else
 1104                 softs->ipf_state_stats.iss_inuse++;
 1105         is->is_phnext = softs->ipf_state_table + hv;
 1106         is->is_hnext = softs->ipf_state_table[hv];
 1107         softs->ipf_state_table[hv] = is;
 1108         softs->ipf_state_stats.iss_bucketlen[hv]++;
 1109         softs->ipf_state_stats.iss_active++;
 1110         MUTEX_EXIT(&softs->ipf_stinsert);
 1111 
 1112         ipf_state_setqueue(softc, is, rev);
 1113 
 1114         return (0);
 1115 }
 1116 
 1117 
 1118 /* ------------------------------------------------------------------------ */
 1119 /* Function:    ipf_state_matchipv4addrs                                    */
 1120 /* Returns:     int - 2 addresses match (strong match), 1 reverse match,    */
 1121 /*                    0 no match                                            */
 1122 /* Parameters:  is1, is2 pointers to states we are checking                 */
 1123 /*                                                                          */
 1124 /* Function matches IPv4 addresses it returns strong match for ICMP proto   */
 1125 /* even there is only reverse match                                         */
 1126 /* ------------------------------------------------------------------------ */
 1127 static int
 1128 ipf_state_matchipv4addrs(ipstate_t *is1, ipstate_t *is2)
 1129 {
 1130         int     rv;
 1131 
 1132         if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr)
 1133                 rv = 2;
 1134         else if (is1->is_saddr == is2->is_daddr &&
 1135             is1->is_daddr == is2->is_saddr) {
 1136                 /* force strong match for ICMP protocol */
 1137                 rv = (is1->is_p == IPPROTO_ICMP) ? 2 : 1;
 1138         }
 1139         else
 1140                 rv = 0;
 1141 
 1142         return (rv);
 1143 }
 1144 
 1145 
 1146 /* ------------------------------------------------------------------------ */
 1147 /* Function:    ipf_state_matchipv6addrs                                    */
 1148 /* Returns:     int - 2 addresses match (strong match), 1 reverse match,    */
 1149 /*                    0 no match                                            */
 1150 /* Parameters:  is1, is2 pointers to states we are checking                 */
 1151 /*                                                                          */
 1152 /* Function matches IPv6 addresses it returns strong match for ICMP proto   */
 1153 /* even there is only reverse match                                         */
 1154 /* ------------------------------------------------------------------------ */
 1155 static int
 1156 ipf_state_matchipv6addrs(ipstate_t *is1, ipstate_t *is2)
 1157 {
 1158         int     rv;
 1159 
 1160         if (IP6_EQ(&is1->is_src, &is2->is_src) &&
 1161             IP6_EQ(&is1->is_dst, &is2->is_dst))
 1162                 rv = 2;
 1163         else if (IP6_EQ(&is1->is_src, &is2->is_dst) &&
 1164             IP6_EQ(&is1->is_dst, &is2->is_src)) {
 1165                 /* force strong match for ICMPv6 protocol */
 1166                 rv = (is1->is_p == IPPROTO_ICMPV6) ? 2 : 1;
 1167         }
 1168         else
 1169                 rv = 0;
 1170 
 1171         return (rv);
 1172 }
 1173 
 1174 
 1175 /* ------------------------------------------------------------------------ */
 1176 /* Function:    ipf_state_matchaddresses                                    */
 1177 /* Returns:     int - 2 addresses match, 1 reverse match, zero no match     */
 1178 /* Parameters:  is1, is2 pointers to states we are checking                 */
 1179 /*                                                                          */
 1180 /* function retruns true if two pairs of addresses belong to single         */
 1181 /* connection. suppose there are two endpoints:                             */
 1182 /*      endpoint1 1.1.1.1                                                   */
 1183 /*      endpoint2 1.1.1.2                                                   */
 1184 /*                                                                          */
 1185 /* the state is established by packet flying from .1 to .2 so we see:       */
 1186 /*      is1->src = 1.1.1.1                                                  */
 1187 /*      is1->dst = 1.1.1.2                                                  */
 1188 /* now endpoint 1.1.1.2 sends answer                                        */
 1189 /* retreives is1 record created by first packat and compares it with is2    */
 1190 /* temporal record, is2 is initialized as follows:                          */
 1191 /*      is2->src = 1.1.1.2                                                  */
 1192 /*      is2->dst = 1.1.1.1                                                  */
 1193 /* in this case 1 will be returned                                          */
 1194 /*                                                                          */
 1195 /* the ipf_matchaddresses() assumes those two records to be same. of course */
 1196 /* the ipf_matchaddresses() also assume records are same in case you pass   */
 1197 /* identical arguments (i.e. ipf_matchaddress(is1, is1) would return 2      */
 1198 /* ------------------------------------------------------------------------ */
 1199 static int
 1200 ipf_state_matchaddresses(ipstate_t *is1, ipstate_t *is2)
 1201 {
 1202         int     rv;
 1203 
 1204         if (is1->is_v == 4) {
 1205                 rv = ipf_state_matchipv4addrs(is1, is2);
 1206         }
 1207         else {
 1208                 rv = ipf_state_matchipv6addrs(is1, is2);
 1209         }
 1210 
 1211         return (rv);
 1212 }
 1213 
 1214 
 1215 /* ------------------------------------------------------------------------ */
 1216 /* Function:    ipf_matchports                                              */
 1217 /* Returns:     int - 2 match, 1 rverse match, 0 no match                   */
 1218 /* Parameters:  ppairs1, ppairs - src, dst ports we want to match           */
 1219 /*                                                                          */
 1220 /* performs the same match for isps members as for addresses                */
 1221 /* ------------------------------------------------------------------------ */
 1222 static int
 1223 ipf_state_matchports(udpinfo_t *ppairs1, udpinfo_t *ppairs2)
 1224 {
 1225         int     rv;
 1226 
 1227         if (ppairs1->us_sport == ppairs2->us_sport &&
 1228             ppairs1->us_dport == ppairs2->us_dport)
 1229                 rv = 2;
 1230         else if (ppairs1->us_sport == ppairs2->us_dport &&
 1231                     ppairs1->us_dport == ppairs2->us_sport)
 1232                 rv = 1;
 1233         else
 1234                 rv = 0;
 1235 
 1236         return (rv);
 1237 }
 1238 
 1239 
 1240 /* ------------------------------------------------------------------------ */
 1241 /* Function:    ipf_matchisps                                               */
 1242 /* Returns:     int - nonzero if isps members match, 0 nomatch              */
 1243 /* Parameters:  is1, is2 - states we want to match                          */
 1244 /*                                                                          */
 1245 /* performs the same match for isps members as for addresses                */
 1246 /* ------------------------------------------------------------------------ */
 1247 static int
 1248 ipf_state_matchisps(ipstate_t *is1, ipstate_t *is2)
 1249 {
 1250         int     rv;
 1251 
 1252         if (is1->is_p == is2->is_p) {
 1253                 switch (is1->is_p)
 1254                 {
 1255                 case IPPROTO_TCP :
 1256                 case IPPROTO_UDP :
 1257                 case IPPROTO_GRE :
 1258                         /* greinfo_t can be also interprted as port pair */
 1259                         rv = ipf_state_matchports(&is1->is_ps.is_us,
 1260                                                   &is2->is_ps.is_us);
 1261                         break;
 1262 
 1263                 case IPPROTO_ICMP :
 1264                 case IPPROTO_ICMPV6 :
 1265                         /* force strong match for ICMP datagram. */
 1266                         if (bcmp(&is1->is_ps, &is2->is_ps,
 1267                                  sizeof(icmpinfo_t)) == 0)  {
 1268                                 rv = 2;
 1269                         } else {
 1270                                 rv = 0;
 1271                         }
 1272                         break;
 1273 
 1274                 default:
 1275                         rv = 0;
 1276                 }
 1277         } else {
 1278                 rv = 0;
 1279         }
 1280 
 1281         return (rv);
 1282 }
 1283 
 1284 
 1285 /* ------------------------------------------------------------------------ */
 1286 /* Function:    ipf_state_match                                             */
 1287 /* Returns:     int - nonzero match, zero no match                          */
 1288 /* Parameters:  is1, is2 - states we want to match                          */
 1289 /*                                                                          */
 1290 /* ------------------------------------------------------------------------ */
 1291 static int
 1292 ipf_state_match(ipstate_t *is1, ipstate_t *is2)
 1293 {
 1294         int     rv;
 1295         int     amatch;
 1296         int     pomatch;
 1297 
 1298         if (bcmp(&is1->is_pass, &is2->is_pass,
 1299                  offsetof(struct ipstate, is_authmsk) -
 1300                  offsetof(struct ipstate, is_pass)) == 0) {
 1301 
 1302                 pomatch = ipf_state_matchisps(is1, is2);
 1303                 amatch = ipf_state_matchaddresses(is1, is2);
 1304                 rv = (amatch != 0) && (amatch == pomatch);
 1305         } else {
 1306                 rv = 0;
 1307         }
 1308 
 1309         return (rv);
 1310 }
 1311 
 1312 /* ------------------------------------------------------------------------ */
 1313 /* Function:    ipf_state_add                                               */
 1314 /* Returns:     ipstate_t - 0 = success                                     */
 1315 /* Parameters:  softc(I)  - pointer to soft context main structure          */
 1316 /*              fin(I)    - pointer to packet information                   */
 1317 /*              stsave(O) - pointer to place to save pointer to created     */
 1318 /*                          state structure.                                */
 1319 /*              flags(I)  - flags to use when creating the structure        */
 1320 /*                                                                          */
 1321 /* Creates a new IP state structure from the packet information collected.  */
 1322 /* Inserts it into the state table and appends to the bottom of the active  */
 1323 /* list.  If the capacity of the table has reached the maximum allowed then */
 1324 /* the call will fail and a flush is scheduled for the next timeout call.   */
 1325 /*                                                                          */
 1326 /* NOTE: The use of stsave to point to nat_state will result in memory      */
 1327 /*       corruption.  It should only be used to point to objects that will  */
 1328 /*       either outlive this (not expired) or will deref the ip_state_t     */
 1329 /*       when they are deleted.                                             */
 1330 /* ------------------------------------------------------------------------ */
 1331 int
 1332 ipf_state_add(ipf_main_softc_t *softc, fr_info_t *fin, ipstate_t **stsave,
 1333         u_int flags)
 1334 {
 1335         ipf_state_softc_t *softs = softc->ipf_state_soft;
 1336         ipstate_t *is, ips;
 1337         struct icmp *ic;
 1338         u_int pass, hv;
 1339         frentry_t *fr;
 1340         tcphdr_t *tcp;
 1341         frdest_t *fdp;
 1342         int out;
 1343 
 1344         /*
 1345          * If a locally created packet is trying to egress but it
 1346          * does not match because of this lock, it is likely that
 1347         * the policy will block it and return network unreachable further
 1348          * up the stack. To mitigate this error, EAGAIN is returned instead,
 1349          * telling the IP stack to try sending this packet again later.
 1350          */
 1351         if (softs->ipf_state_lock) {
 1352                 SBUMPD(ipf_state_stats, iss_add_locked);
 1353                 fin->fin_error = EAGAIN;
 1354                 return (-1);
 1355         }
 1356 
 1357         if (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD)) {
 1358                 SBUMPD(ipf_state_stats, iss_add_bad);
 1359                 return (-1);
 1360         }
 1361 
 1362         if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) {
 1363                 SBUMPD(ipf_state_stats, iss_add_oow);
 1364                 return (-1);
 1365         }
 1366 
 1367         if ((softs->ipf_state_stats.iss_active * 100 / softs->ipf_state_max) >
 1368             softs->ipf_state_wm_high) {
 1369                 softs->ipf_state_doflush = 1;
 1370         }
 1371 
 1372         /*
 1373          * If a "keep state" rule has reached the maximum number of references
 1374          * to it, then schedule an automatic flush in case we can clear out
 1375          * some "dead old wood".  Note that because the lock isn't held on
 1376          * fr it is possible that we could overflow.  The cost of overflowing
 1377          * is being ignored here as the number by which it can overflow is
 1378          * a product of the number of simultaneous threads that could be
 1379          * executing in here, so a limit of 100 won't result in 200, but could
 1380          * result in 101 or 102.
 1381          */
 1382         fr = fin->fin_fr;
 1383         if (fr != NULL) {
 1384                 if ((softs->ipf_state_stats.iss_active >=
 1385                      softs->ipf_state_max) && (fr->fr_statemax == 0)) {
 1386                         SBUMPD(ipf_state_stats, iss_max);
 1387                         return (1);
 1388                 }
 1389                 if ((fr->fr_statemax != 0) &&
 1390                     (fr->fr_statecnt >= fr->fr_statemax)) {
 1391                         SBUMPD(ipf_state_stats, iss_max_ref);
 1392                         return (2);
 1393                 }
 1394         }
 1395 
 1396         is = &ips;
 1397         if (fr == NULL) {
 1398                 pass = softc->ipf_flags;
 1399                 is->is_tag = FR_NOLOGTAG;
 1400         } else {
 1401                 pass = fr->fr_flags;
 1402         }
 1403 
 1404         ic = NULL;
 1405         tcp = NULL;
 1406         out = fin->fin_out;
 1407         bzero((char *)is, sizeof(*is));
 1408         is->is_die = 1 + softc->ipf_ticks;
 1409         /*
 1410          * We want to check everything that is a property of this packet,
 1411          * but we don't (automatically) care about its fragment status as
 1412          * this may change.
 1413          */
 1414         is->is_pass = pass;
 1415         is->is_v = fin->fin_v;
 1416         is->is_sec = fin->fin_secmsk;
 1417         is->is_secmsk = 0xffff;
 1418         is->is_auth = fin->fin_auth;
 1419         is->is_authmsk = 0xffff;
 1420         is->is_family = fin->fin_family;
 1421         is->is_opt[0] = fin->fin_optmsk;
 1422         is->is_optmsk[0] = 0xffffffff;
 1423         if (is->is_v == 6) {
 1424                 is->is_opt[0] &= ~0x8;
 1425                 is->is_optmsk[0] &= ~0x8;
 1426         }
 1427 
 1428         /*
 1429          * Copy and calculate...
 1430          */
 1431         hv = (is->is_p = fin->fin_fi.fi_p);
 1432         is->is_src = fin->fin_fi.fi_src;
 1433         hv += is->is_saddr;
 1434         is->is_dst = fin->fin_fi.fi_dst;
 1435         hv += is->is_daddr;
 1436 #ifdef  USE_INET6
 1437         if (fin->fin_v == 6) {
 1438                 /*
 1439                  * For ICMPv6, we check to see if the destination address is
 1440                  * a multicast address.  If it is, do not include it in the
 1441                  * calculation of the hash because the correct reply will come
 1442                  * back from a real address, not a multicast address.
 1443                  */
 1444                 if ((is->is_p == IPPROTO_ICMPV6) &&
 1445                     IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) {
 1446                         /*
 1447                          * So you can do keep state with neighbour discovery.
 1448                          *
 1449                          * Here we could use the address from the neighbour
 1450                          * solicit message to put in the state structure and
 1451                          * we could use that without a wildcard flag too...
 1452                          */
 1453                         flags |= SI_W_DADDR;
 1454                         hv -= is->is_daddr;
 1455                 } else {
 1456                         hv += is->is_dst.i6[1];
 1457                         hv += is->is_dst.i6[2];
 1458                         hv += is->is_dst.i6[3];
 1459                 }
 1460                 hv += is->is_src.i6[1];
 1461                 hv += is->is_src.i6[2];
 1462                 hv += is->is_src.i6[3];
 1463         }
 1464 #endif
 1465         if ((fin->fin_v == 4) &&
 1466             (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) {
 1467                 flags |= SI_W_DADDR;
 1468                 hv -= is->is_daddr;
 1469         }
 1470 
 1471         switch (is->is_p)
 1472         {
 1473 #ifdef  USE_INET6
 1474         case IPPROTO_ICMPV6 :
 1475                 ic = fin->fin_dp;
 1476 
 1477                 switch (ic->icmp_type)
 1478                 {
 1479                 case ICMP6_ECHO_REQUEST :
 1480                         hv += (is->is_icmp.ici_id = ic->icmp_id);
 1481                         /*FALLTHROUGH*/
 1482                 case ICMP6_MEMBERSHIP_QUERY :
 1483                 case ND_ROUTER_SOLICIT :
 1484                 case ND_NEIGHBOR_SOLICIT :
 1485                 case ICMP6_NI_QUERY :
 1486                         is->is_icmp.ici_type = ic->icmp_type;
 1487                         break;
 1488                 default :
 1489                         SBUMPD(ipf_state_stats, iss_icmp6_notquery);
 1490                         return (-2);
 1491                 }
 1492                 break;
 1493 #endif
 1494         case IPPROTO_ICMP :
 1495                 ic = fin->fin_dp;
 1496 
 1497                 switch (ic->icmp_type)
 1498                 {
 1499                 case ICMP_ECHO :
 1500                 case ICMP_TSTAMP :
 1501                 case ICMP_IREQ :
 1502                 case ICMP_MASKREQ :
 1503                         is->is_icmp.ici_type = ic->icmp_type;
 1504                         hv += (is->is_icmp.ici_id = ic->icmp_id);
 1505                         break;
 1506                 default :
 1507                         SBUMPD(ipf_state_stats, iss_icmp_notquery);
 1508                         return (-3);
 1509                 }
 1510                 break;
 1511 
 1512 #if 0
 1513         case IPPROTO_GRE :
 1514                 gre = fin->fin_dp;
 1515 
 1516                 is->is_gre.gs_flags = gre->gr_flags;
 1517                 is->is_gre.gs_ptype = gre->gr_ptype;
 1518                 if (GRE_REV(is->is_gre.gs_flags) == 1) {
 1519                         is->is_call[0] = fin->fin_data[0];
 1520                         is->is_call[1] = fin->fin_data[1];
 1521                 }
 1522                 break;
 1523 #endif
 1524 
 1525         case IPPROTO_TCP :
 1526                 tcp = fin->fin_dp;
 1527 
 1528                 if (tcp->th_flags & TH_RST) {
 1529                         SBUMPD(ipf_state_stats, iss_tcp_rstadd);
 1530                         return (-4);
 1531                 }
 1532 
 1533                 /* TRACE is, flags, hv */
 1534 
 1535                 /*
 1536                  * The endian of the ports doesn't matter, but the ack and
 1537                  * sequence numbers do as we do mathematics on them later.
 1538                  */
 1539                 is->is_sport = htons(fin->fin_data[0]);
 1540                 is->is_dport = htons(fin->fin_data[1]);
 1541                 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
 1542                         hv += is->is_sport;
 1543                         hv += is->is_dport;
 1544                 }
 1545 
 1546                 /* TRACE is, flags, hv */
 1547 
 1548                 /*
 1549                  * If this is a real packet then initialise fields in the
 1550                  * state information structure from the TCP header information.
 1551                  */
 1552 
 1553                 is->is_maxdwin = 1;
 1554                 is->is_maxswin = ntohs(tcp->th_win);
 1555                 if (is->is_maxswin == 0)
 1556                         is->is_maxswin = 1;
 1557 
 1558                 if ((fin->fin_flx & FI_IGNORE) == 0) {
 1559                         is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen -
 1560                                       (TCP_OFF(tcp) << 2) +
 1561                                       ((tcp->th_flags & TH_SYN) ? 1 : 0) +
 1562                                       ((tcp->th_flags & TH_FIN) ? 1 : 0);
 1563                         is->is_maxsend = is->is_send;
 1564 
 1565                         /*
 1566                          * Window scale option is only present in
 1567                          * SYN/SYN-ACK packet.
 1568                          */
 1569                         if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) ==
 1570                             TH_SYN &&
 1571                             (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
 1572                                 if (ipf_tcpoptions(softs, fin, tcp,
 1573                                               &is->is_tcp.ts_data[0]) == -1) {
 1574                                         fin->fin_flx |= FI_BAD;
 1575                                         DT1(ipf_fi_bad_tcpoptions_th_fin_ack_ecnall, fr_info_t *, fin);
 1576                                 }
 1577                         }
 1578 
 1579                         if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) {
 1580                                 ipf_checknewisn(fin, is);
 1581                                 ipf_fixoutisn(fin, is);
 1582                         }
 1583 
 1584                         if ((tcp->th_flags & TH_OPENING) == TH_SYN)
 1585                                 flags |= IS_TCPFSM;
 1586                         else {
 1587                                 is->is_maxdwin = is->is_maxswin * 2;
 1588                                 is->is_dend = ntohl(tcp->th_ack);
 1589                                 is->is_maxdend = ntohl(tcp->th_ack);
 1590                                 is->is_maxdwin *= 2;
 1591                         }
 1592                 }
 1593 
 1594                 /*
 1595                  * If we're creating state for a starting connection, start
 1596                  * the timer on it as we'll never see an error if it fails
 1597                  * to connect.
 1598                  */
 1599                 break;
 1600 
 1601         case IPPROTO_UDP :
 1602                 tcp = fin->fin_dp;
 1603 
 1604                 is->is_sport = htons(fin->fin_data[0]);
 1605                 is->is_dport = htons(fin->fin_data[1]);
 1606                 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
 1607                         hv += tcp->th_dport;
 1608                         hv += tcp->th_sport;
 1609                 }
 1610                 break;
 1611 
 1612         default :
 1613                 break;
 1614         }
 1615         hv = DOUBLE_HASH(hv);
 1616         is->is_hv = hv;
 1617 
 1618         /*
 1619          * Look for identical state.
 1620          */
 1621         for (is = softs->ipf_state_table[hv % softs->ipf_state_size];
 1622              is != NULL; is = is->is_hnext) {
 1623                 if (ipf_state_match(&ips, is) == 1)
 1624                         break;
 1625         }
 1626         if (is != NULL) {
 1627                 SBUMPD(ipf_state_stats, iss_add_dup);
 1628                 return (3);
 1629         }
 1630 
 1631         if (softs->ipf_state_stats.iss_bucketlen[hv] >=
 1632             softs->ipf_state_maxbucket) {
 1633                 SBUMPD(ipf_state_stats, iss_bucket_full);
 1634                 return (4);
 1635         }
 1636 
 1637         /*
 1638          * No existing state; create new
 1639          */
 1640         KMALLOC(is, ipstate_t *);
 1641         if (is == NULL) {
 1642                 SBUMPD(ipf_state_stats, iss_nomem);
 1643                 return (5);
 1644         }
 1645         bcopy((char *)&ips, (char *)is, sizeof(*is));
 1646         is->is_flags = flags & IS_INHERITED;
 1647         is->is_rulen = fin->fin_rule;
 1648         is->is_rule = fr;
 1649 
 1650         /*
 1651          * Do not do the modulus here, it is done in ipf_state_insert().
 1652          */
 1653         if (fr != NULL) {
 1654                 ipftq_t *tq;
 1655 
 1656                 (void) strncpy(is->is_group, FR_NAME(fr, fr_group),
 1657                                FR_GROUPLEN);
 1658                 if (fr->fr_age[0] != 0) {
 1659                         tq = ipf_addtimeoutqueue(softc,
 1660                                                  &softs->ipf_state_usertq,
 1661                                                  fr->fr_age[0]);
 1662                         is->is_tqehead[0] = tq;
 1663                         is->is_sti.tqe_flags |= TQE_RULEBASED;
 1664                 }
 1665                 if (fr->fr_age[1] != 0) {
 1666                         tq = ipf_addtimeoutqueue(softc,
 1667                                                  &softs->ipf_state_usertq,
 1668                                                  fr->fr_age[1]);
 1669                         is->is_tqehead[1] = tq;
 1670                         is->is_sti.tqe_flags |= TQE_RULEBASED;
 1671                 }
 1672 
 1673                 is->is_tag = fr->fr_logtag;
 1674         }
 1675 
 1676         /*
 1677          * It may seem strange to set is_ref to 2, but if stsave is not NULL
 1678          * then a copy of the pointer is being stored somewhere else and in
 1679          * the end, it will expect to be able to do something with it.
 1680          */
 1681         is->is_me = stsave;
 1682         if (stsave != NULL) {
 1683                 *stsave = is;
 1684                 is->is_ref = 2;
 1685         } else {
 1686                 is->is_ref = 1;
 1687         }
 1688         is->is_pkts[0] = 0, is->is_bytes[0] = 0;
 1689         is->is_pkts[1] = 0, is->is_bytes[1] = 0;
 1690         is->is_pkts[2] = 0, is->is_bytes[2] = 0;
 1691         is->is_pkts[3] = 0, is->is_bytes[3] = 0;
 1692         if ((fin->fin_flx & FI_IGNORE) == 0) {
 1693                 is->is_pkts[out] = 1;
 1694                 fin->fin_pktnum = 1;
 1695                 is->is_bytes[out] = fin->fin_plen;
 1696                 is->is_flx[out][0] = fin->fin_flx & FI_CMP;
 1697                 is->is_flx[out][0] &= ~FI_OOW;
 1698         }
 1699 
 1700         if (pass & FR_STLOOSE)
 1701                 is->is_flags |= IS_LOOSE;
 1702 
 1703         if (pass & FR_STSTRICT)
 1704                 is->is_flags |= IS_STRICT;
 1705 
 1706         if (pass & FR_STATESYNC)
 1707                 is->is_flags |= IS_STATESYNC;
 1708 
 1709         if (pass & FR_LOGFIRST)
 1710                 is->is_pass &= ~(FR_LOGFIRST|FR_LOG);
 1711 
 1712         READ_ENTER(&softc->ipf_state);
 1713 
 1714         if (ipf_state_insert(softc, is, fin->fin_rev) == -1) {
 1715                 RWLOCK_EXIT(&softc->ipf_state);
 1716                 /*
 1717                  * This is a bit more manual than it should be but
 1718                  * ipf_state_del cannot be called.
 1719                  */
 1720                 MUTEX_EXIT(&is->is_lock);
 1721                 MUTEX_DESTROY(&is->is_lock);
 1722                 if (is->is_tqehead[0] != NULL) {
 1723                         if (ipf_deletetimeoutqueue(is->is_tqehead[0]) == 0)
 1724                                 ipf_freetimeoutqueue(softc, is->is_tqehead[0]);
 1725                         is->is_tqehead[0] = NULL;
 1726                 }
 1727                 if (is->is_tqehead[1] != NULL) {
 1728                         if (ipf_deletetimeoutqueue(is->is_tqehead[1]) == 0)
 1729                                 ipf_freetimeoutqueue(softc, is->is_tqehead[1]);
 1730                         is->is_tqehead[1] = NULL;
 1731                 }
 1732                 KFREE(is);
 1733                 return (-1);
 1734         }
 1735 
 1736         /*
 1737          * Filling in the interface name is after the insert so that an
 1738          * event (such as add/delete) of an interface that is referenced
 1739          * by this rule will see this state entry.
 1740          */
 1741         if (fr != NULL) {
 1742                 /*
 1743                  * The name '-' is special for network interfaces and causes
 1744                  * a NULL name to be present, always, allowing packets to
 1745                  * match it, regardless of their interface.
 1746                  */
 1747                 if ((fin->fin_ifp == NULL) ||
 1748                     (fr->fr_ifnames[out << 1] != -1 &&
 1749                      fr->fr_names[fr->fr_ifnames[out << 1] + 0] == '-' &&
 1750                      fr->fr_names[fr->fr_ifnames[out << 1] + 1] == '\0')) {
 1751                         is->is_ifp[out << 1] = fr->fr_ifas[0];
 1752                         strncpy(is->is_ifname[out << 1],
 1753                                 FR_NAME(fr, fr_ifnames[0]),
 1754                                 sizeof(fr->fr_ifnames[0]));
 1755                 } else {
 1756                         is->is_ifp[out << 1] = fin->fin_ifp;
 1757                         COPYIFNAME(fin->fin_v, fin->fin_ifp,
 1758                                    is->is_ifname[out << 1]);
 1759                 }
 1760 
 1761                 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1];
 1762                 if (fr->fr_ifnames[1] != -1) {
 1763                         strncpy(is->is_ifname[(out << 1) + 1],
 1764                                 FR_NAME(fr, fr_ifnames[1]),
 1765                                 sizeof(fr->fr_ifnames[1]));
 1766                 }
 1767 
 1768                 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2];
 1769                 if (fr->fr_ifnames[2] != -1) {
 1770                         strncpy(is->is_ifname[((1 - out) << 1)],
 1771                                 FR_NAME(fr, fr_ifnames[2]),
 1772                                 sizeof(fr->fr_ifnames[2]));
 1773                 }
 1774 
 1775                 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3];
 1776                 if (fr->fr_ifnames[3] != -1) {
 1777                         strncpy(is->is_ifname[((1 - out) << 1) + 1],
 1778                                 FR_NAME(fr, fr_ifnames[3]),
 1779                                 sizeof(fr->fr_ifnames[3]));
 1780                 }
 1781         } else {
 1782                 if (fin->fin_ifp != NULL) {
 1783                         is->is_ifp[out << 1] = fin->fin_ifp;
 1784                         COPYIFNAME(fin->fin_v, fin->fin_ifp,
 1785                                    is->is_ifname[out << 1]);
 1786                 }
 1787         }
 1788 
 1789         if (fin->fin_p == IPPROTO_TCP) {
 1790                 /*
 1791                 * If we're creating state for a starting connection, start the
 1792                 * timer on it as we'll never see an error if it fails to
 1793                 * connect.
 1794                 */
 1795                 (void) ipf_tcp_age(&is->is_sti, fin, softs->ipf_state_tcptq,
 1796                                    is->is_flags, 2);
 1797         }
 1798         MUTEX_EXIT(&is->is_lock);
 1799         if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0))
 1800                 is->is_sync = ipf_sync_new(softc, SMC_STATE, fin, is);
 1801         if (softs->ipf_state_logging)
 1802                 ipf_state_log(softc, is, ISL_NEW);
 1803 
 1804         RWLOCK_EXIT(&softc->ipf_state);
 1805 
 1806         fin->fin_flx |= FI_STATE;
 1807         if (fin->fin_flx & FI_FRAG)
 1808                 (void) ipf_frag_new(softc, fin, pass);
 1809 
 1810         fdp = &fr->fr_tifs[0];
 1811         if (fdp->fd_type == FRD_DSTLIST) {
 1812                 ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
 1813                                         &is->is_tifs[0]);
 1814         } else {
 1815                 bcopy(fdp, &is->is_tifs[0], sizeof(*fdp));
 1816         }
 1817 
 1818         fdp = &fr->fr_tifs[1];
 1819         if (fdp->fd_type == FRD_DSTLIST) {
 1820                 ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
 1821                                         &is->is_tifs[1]);
 1822         } else {
 1823                 bcopy(fdp, &is->is_tifs[1], sizeof(*fdp));
 1824         }
 1825         fin->fin_tif = &is->is_tifs[fin->fin_rev];
 1826 
 1827         fdp = &fr->fr_dif;
 1828         if (fdp->fd_type == FRD_DSTLIST) {
 1829                 ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
 1830                                         &is->is_dif);
 1831         } else {
 1832                 bcopy(fdp, &is->is_dif, sizeof(*fdp));
 1833         }
 1834         fin->fin_dif = &is->is_dif;
 1835 
 1836         return (0);
 1837 }
 1838 
 1839 
 1840 /* ------------------------------------------------------------------------ */
 1841 /* Function:    ipf_tcpoptions                                              */
 1842 /* Returns:     int - 1 == packet matches state entry, 0 == it does not,    */
 1843 /*                   -1 == packet has bad TCP options data                  */
 1844 /* Parameters:  softs(I) - pointer to state context structure               */
 1845 /*              fin(I) - pointer to packet information                      */
 1846 /*              tcp(I) - pointer to TCP packet header                       */
 1847 /*              td(I)  - pointer to TCP data held as part of the state      */
 1848 /*                                                                          */
 1849 /* Look after the TCP header for any options and deal with those that are   */
 1850 /* present.  Record details about those that we recogise.                   */
 1851 /* ------------------------------------------------------------------------ */
 1852 static int
 1853 ipf_tcpoptions(ipf_state_softc_t *softs, fr_info_t *fin, tcphdr_t *tcp,
 1854         tcpdata_t *td)
 1855 {
 1856         int off, mlen, ol, i, len, retval;
 1857         char buf[64], *s, opt;
 1858         mb_t *m = NULL;
 1859 
 1860         len = (TCP_OFF(tcp) << 2);
 1861         if (fin->fin_dlen < len) {
 1862                 SBUMPD(ipf_state_stats, iss_tcp_toosmall);
 1863                 return (0);
 1864         }
 1865         len -= sizeof(*tcp);
 1866 
 1867         off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff;
 1868 
 1869         m = fin->fin_m;
 1870         mlen = MSGDSIZE(m) - off;
 1871         if (len > mlen) {
 1872                 len = mlen;
 1873                 retval = 0;
 1874         } else {
 1875                 retval = 1;
 1876         }
 1877 
 1878         COPYDATA(m, off, len, buf);
 1879 
 1880         for (s = buf; len > 0; ) {
 1881                 opt = *s;
 1882                 if (opt == TCPOPT_EOL)
 1883                         break;
 1884                 else if (opt == TCPOPT_NOP)
 1885                         ol = 1;
 1886                 else {
 1887                         if (len < 2)
 1888                                 break;
 1889                         ol = (int)*(s + 1);
 1890                         if (ol < 2 || ol > len)
 1891                                 break;
 1892 
 1893                         /*
 1894                          * Extract the TCP options we are interested in out of
 1895                          * the header and store them in the tcpdata struct.
 1896                          */
 1897                         switch (opt)
 1898                         {
 1899                         case TCPOPT_WINDOW :
 1900                                 if (ol == TCPOLEN_WINDOW) {
 1901                                         i = (int)*(s + 2);
 1902                                         if (i > TCP_WSCALE_MAX)
 1903                                                 i = TCP_WSCALE_MAX;
 1904                                         else if (i < 0)
 1905                                                 i = 0;
 1906                                         td->td_winscale = i;
 1907                                         td->td_winflags |= TCP_WSCALE_SEEN|
 1908                                                            TCP_WSCALE_FIRST;
 1909                                 } else
 1910                                         retval = -1;
 1911                                 break;
 1912                         case TCPOPT_MAXSEG :
 1913                                 /*
 1914                                  * So, if we wanted to set the TCP MAXSEG,
 1915                                  * it should be done here...
 1916                                  */
 1917                                 if (ol == TCPOLEN_MAXSEG) {
 1918                                         i = (int)*(s + 2);
 1919                                         i <<= 8;
 1920                                         i += (int)*(s + 3);
 1921                                         td->td_maxseg = i;
 1922                                 } else
 1923                                         retval = -1;
 1924                                 break;
 1925                         case TCPOPT_SACK_PERMITTED :
 1926                                 if (ol == TCPOLEN_SACK_PERMITTED)
 1927                                         td->td_winflags |= TCP_SACK_PERMIT;
 1928                                 else
 1929                                         retval = -1;
 1930                                 break;
 1931                         }
 1932                 }
 1933                 len -= ol;
 1934                 s += ol;
 1935         }
 1936         if (retval == -1) {
 1937                 SBUMPD(ipf_state_stats, iss_tcp_badopt);
 1938         }
 1939         return (retval);
 1940 }
 1941 
 1942 
 1943 /* ------------------------------------------------------------------------ */
 1944 /* Function:    ipf_state_tcp                                               */
 1945 /* Returns:     int - 1 == packet matches state entry, 0 == it does not     */
 1946 /* Parameters:  softc(I)  - pointer to soft context main structure          */
 1947 /*              softs(I) - pointer to state context structure               */
 1948 /*              fin(I)   - pointer to packet information                    */
 1949 /*              tcp(I)   - pointer to TCP packet header                     */
 1950 /*              is(I)  - pointer to master state structure                  */
 1951 /*                                                                          */
 1952 /* Check to see if a packet with TCP headers fits within the TCP window.    */
 1953 /* Change timeout depending on whether new packet is a SYN-ACK returning    */
 1954 /* for a SYN or a RST or FIN which indicate time to close up shop.          */
 1955 /* ------------------------------------------------------------------------ */
 1956 static int
 1957 ipf_state_tcp(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
 1958         fr_info_t *fin, tcphdr_t *tcp, ipstate_t *is)
 1959 {
 1960         tcpdata_t  *fdata, *tdata;
 1961         int source, ret, flags;
 1962 
 1963         source = !fin->fin_rev;
 1964         if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) &&
 1965             (ntohs(is->is_sport) != fin->fin_data[0]))
 1966                 source = 0;
 1967         fdata = &is->is_tcp.ts_data[!source];
 1968         tdata = &is->is_tcp.ts_data[source];
 1969 
 1970         MUTEX_ENTER(&is->is_lock);
 1971 
 1972         /*
 1973          * If a SYN packet is received for a connection that is on the way out
 1974          * but hasn't yet departed then advance this session along the way.
 1975          */
 1976         if ((tcp->th_flags & TH_OPENING) == TH_SYN) {
 1977                 if ((is->is_state[0] > IPF_TCPS_ESTABLISHED) &&
 1978                     (is->is_state[1] > IPF_TCPS_ESTABLISHED)) {
 1979                         is->is_state[!source] = IPF_TCPS_CLOSED;
 1980                         ipf_movequeue(softc->ipf_ticks, &is->is_sti,
 1981                                       is->is_sti.tqe_ifq,
 1982                                       &softs->ipf_state_deletetq);
 1983                         MUTEX_EXIT(&is->is_lock);
 1984                         DT1(iss_tcp_closing, ipstate_t *, is);
 1985                         SBUMP(ipf_state_stats.iss_tcp_closing);
 1986                         return (0);
 1987                 }
 1988         }
 1989 
 1990         if (is->is_flags & IS_LOOSE)
 1991                 ret = 1;
 1992         else
 1993                 ret = ipf_state_tcpinwindow(fin, fdata, tdata, tcp,
 1994                                             is->is_flags);
 1995         if (ret > 0) {
 1996                 /*
 1997                  * Nearing end of connection, start timeout.
 1998                  */
 1999                 ret = ipf_tcp_age(&is->is_sti, fin, softs->ipf_state_tcptq,
 2000                                   is->is_flags, ret);
 2001                 if (ret == 0) {
 2002                         MUTEX_EXIT(&is->is_lock);
 2003                         DT2(iss_tcp_fsm, fr_info_t *, fin, ipstate_t *, is);
 2004                         SBUMP(ipf_state_stats.iss_tcp_fsm);
 2005                         return (0);
 2006                 }
 2007 
 2008                 if (softs->ipf_state_logging > 4)
 2009                         ipf_state_log(softc, is, ISL_STATECHANGE);
 2010 
 2011                 /*
 2012                  * set s0's as appropriate.  Use syn-ack packet as it
 2013                  * contains both pieces of required information.
 2014                  */
 2015                 /*
 2016                  * Window scale option is only present in SYN/SYN-ACK packet.
 2017                  * Compare with ~TH_FIN to mask out T/TCP setups.
 2018                  */
 2019                 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL);
 2020                 if (flags == (TH_SYN|TH_ACK)) {
 2021                         is->is_s0[source] = ntohl(tcp->th_ack);
 2022                         is->is_s0[!source] = ntohl(tcp->th_seq) + 1;
 2023                         if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
 2024                                 if (ipf_tcpoptions(softs, fin, tcp,
 2025                                                    fdata) == -1) {
 2026                                         fin->fin_flx |= FI_BAD;
 2027                                         DT1(ipf_fi_bad_winscale_syn_ack, fr_info_t *, fin);
 2028                                 }
 2029                         }
 2030                         if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
 2031                                 ipf_checknewisn(fin, is);
 2032                 } else if (flags == TH_SYN) {
 2033                         is->is_s0[source] = ntohl(tcp->th_seq) + 1;
 2034                         if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
 2035                                 if (ipf_tcpoptions(softs, fin, tcp,
 2036                                                    fdata) == -1) {
 2037                                         fin->fin_flx |= FI_BAD;
 2038                                         DT1(ipf_fi_bad_winscale_syn, fr_info_t *, fin);
 2039                                 }
 2040                         }
 2041 
 2042                         if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
 2043                                 ipf_checknewisn(fin, is);
 2044 
 2045                 }
 2046                 ret = 1;
 2047         } else {
 2048                 DT2(iss_tcp_oow, fr_info_t *, fin, ipstate_t *, is);
 2049                 SBUMP(ipf_state_stats.iss_tcp_oow);
 2050                 ret = 0;
 2051         }
 2052         MUTEX_EXIT(&is->is_lock);
 2053         return (ret);
 2054 }
 2055 
 2056 
 2057 /* ------------------------------------------------------------------------ */
 2058 /* Function:    ipf_checknewisn                                             */
 2059 /* Returns:     Nil                                                         */
 2060 /* Parameters:  fin(I)   - pointer to packet information                    */
 2061 /*              is(I)  - pointer to master state structure                  */
 2062 /*                                                                          */
 2063 /* Check to see if this TCP connection is expecting and needs a new         */
 2064 /* sequence number for a particular direction of the connection.            */
 2065 /*                                                                          */
 2066 /* NOTE: This does not actually change the sequence numbers, only gets new  */
 2067 /* one ready.                                                               */
 2068 /* ------------------------------------------------------------------------ */
 2069 static void
 2070 ipf_checknewisn(fr_info_t *fin, ipstate_t *is)
 2071 {
 2072         u_32_t sumd, old, new;
 2073         tcphdr_t *tcp;
 2074         int i;
 2075 
 2076         i = fin->fin_rev;
 2077         tcp = fin->fin_dp;
 2078 
 2079         if (((i == 0) && !(is->is_flags & IS_ISNSYN)) ||
 2080             ((i == 1) && !(is->is_flags & IS_ISNACK))) {
 2081                 old = ntohl(tcp->th_seq);
 2082                 new = ipf_newisn(fin);
 2083                 is->is_isninc[i] = new - old;
 2084                 CALC_SUMD(old, new, sumd);
 2085                 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16);
 2086 
 2087                 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK);
 2088         }
 2089 }
 2090 
 2091 
 2092 /* ------------------------------------------------------------------------ */
 2093 /* Function:    ipf_state_tcpinwindow                                       */
 2094 /* Returns:     int - 1 == packet inside TCP "window", 0 == not inside.     */
 2095 /* Parameters:  fin(I)   - pointer to packet information                    */
 2096 /*              fdata(I) - pointer to tcp state informatio (forward)        */
 2097 /*              tdata(I) - pointer to tcp state informatio (reverse)        */
 2098 /*              tcp(I)   - pointer to TCP packet header                     */
 2099 /*                                                                          */
 2100 /* Given a packet has matched addresses and ports, check to see if it is    */
 2101 /* within the TCP data window.  In a show of generosity, allow packets that */
 2102 /* are within the window space behind the current sequence # as well.       */
 2103 /* ------------------------------------------------------------------------ */
 2104 static int
 2105 ipf_state_tcpinwindow(fr_info_t *fin, tcpdata_t  *fdata, tcpdata_t *tdata,
 2106         tcphdr_t *tcp, int flags)
 2107 {
 2108         ipf_main_softc_t *softc = fin->fin_main_soft;
 2109         ipf_state_softc_t *softs = softc->ipf_state_soft;
 2110         tcp_seq seq, ack, end;
 2111         int ackskew, tcpflags;
 2112         u_32_t win, maxwin;
 2113         int dsize, inseq;
 2114 
 2115         /*
 2116          * Find difference between last checked packet and this packet.
 2117          */
 2118         tcpflags = tcp->th_flags;
 2119         seq = ntohl(tcp->th_seq);
 2120         ack = ntohl(tcp->th_ack);
 2121         if (tcpflags & TH_SYN)
 2122                 win = ntohs(tcp->th_win);
 2123         else
 2124                 win = ntohs(tcp->th_win) << fdata->td_winscale;
 2125 
 2126         /*
 2127          * A window of 0 produces undesirable behaviour from this function.
 2128          */
 2129         if (win == 0)
 2130                 win = 1;
 2131 
 2132         dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
 2133                 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0);
 2134 
 2135         /*
 2136          * if window scaling is present, the scaling is only allowed
 2137          * for windows not in the first SYN packet. In that packet the
 2138          * window is 65535 to specify the largest window possible
 2139          * for receivers not implementing the window scale option.
 2140          * Currently, we do not assume TTCP here. That means that
 2141          * if we see a second packet from a host (after the initial
 2142          * SYN), we can assume that the receiver of the SYN did
 2143          * already send back the SYN/ACK (and thus that we know if
 2144          * the receiver also does window scaling)
 2145          */
 2146         if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) {
 2147                 fdata->td_winflags &= ~TCP_WSCALE_FIRST;
 2148                 fdata->td_maxwin = win;
 2149         }
 2150 
 2151         end = seq + dsize;
 2152 
 2153         if ((fdata->td_end == 0) &&
 2154             (!(flags & IS_TCPFSM) ||
 2155              ((tcpflags & TH_OPENING) == TH_OPENING))) {
 2156                 /*
 2157                  * Must be a (outgoing) SYN-ACK in reply to a SYN.
 2158                  */
 2159                 fdata->td_end = end - 1;
 2160                 fdata->td_maxwin = 1;
 2161                 fdata->td_maxend = end + win;
 2162         }
 2163 
 2164         if (!(tcpflags & TH_ACK)) {  /* Pretend an ack was sent */
 2165                 ack = tdata->td_end;
 2166         } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) &&
 2167                    (ack == 0)) {
 2168                 /* gross hack to get around certain broken tcp stacks */
 2169                 ack = tdata->td_end;
 2170         }
 2171 
 2172         maxwin = tdata->td_maxwin;
 2173         ackskew = tdata->td_end - ack;
 2174 
 2175         /*
 2176          * Strict sequencing only allows in-order delivery.
 2177          */
 2178         if ((flags & IS_STRICT) != 0) {
 2179                 if (seq != fdata->td_end) {
 2180                         DT2(iss_tcp_struct, tcpdata_t *, fdata, int, seq);
 2181                         SBUMP(ipf_state_stats.iss_tcp_strict);
 2182                         fin->fin_flx |= FI_OOW;
 2183                         return (0);
 2184                 }
 2185         }
 2186 
 2187 #define SEQ_GE(a,b)     ((int)((a) - (b)) >= 0)
 2188 #define SEQ_GT(a,b)     ((int)((a) - (b)) > 0)
 2189         inseq = 0;
 2190         if ((SEQ_GE(fdata->td_maxend, end)) &&
 2191             (SEQ_GE(seq, fdata->td_end - maxwin)) &&
 2192 /* XXX what about big packets */
 2193 #define MAXACKWINDOW 66000
 2194             (-ackskew <= (MAXACKWINDOW)) &&
 2195             ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) {
 2196                 inseq = 1;
 2197         /*
 2198          * Microsoft Windows will send the next packet to the right of the
 2199          * window if SACK is in use.
 2200          */
 2201         } else if ((seq == fdata->td_maxend) && (ackskew == 0) &&
 2202             (fdata->td_winflags & TCP_SACK_PERMIT) &&
 2203             (tdata->td_winflags & TCP_SACK_PERMIT)) {
 2204                 DT2(iss_sinsack, tcpdata_t *, fdata, int, seq);
 2205                 SBUMP(ipf_state_stats.iss_winsack);
 2206                 inseq = 1;
 2207         /*
 2208          * Sometimes a TCP RST will be generated with only the ACK field
 2209          * set to non-zero.
 2210          */
 2211         } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) &&
 2212                    (ackskew >= -1) && (ackskew <= 1)) {
 2213                 inseq = 1;
 2214         } else if (!(flags & IS_TCPFSM)) {
 2215                 int i;
 2216 
 2217                 i = (fin->fin_rev << 1) + fin->fin_out;
 2218 
 2219 #if 0
 2220                 if (is_pkts[i]0 == 0) {
 2221                         /*
 2222                          * Picking up a connection in the middle, the "next"
 2223                          * packet seen from a direction that is new should be
 2224                          * accepted, even if it appears out of sequence.
 2225                          */
 2226                         inseq = 1;
 2227                 } else
 2228 #endif
 2229                 if (!(fdata->td_winflags &
 2230                             (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) {
 2231                         /*
 2232                          * No TCPFSM and no window scaling, so make some
 2233                          * extra guesses.
 2234                          */
 2235                         if ((seq == fdata->td_maxend) && (ackskew == 0))
 2236                                 inseq = 1;
 2237                         else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin))
 2238                                 inseq = 1;
 2239                 }
 2240         }
 2241 
 2242         /* TRACE(inseq, fdata, tdata, seq, end, ack, ackskew, win, maxwin) */
 2243 
 2244         if (inseq) {
 2245                 /* if ackskew < 0 then this should be due to fragmented
 2246                  * packets. There is no way to know the length of the
 2247                  * total packet in advance.
 2248                  * We do know the total length from the fragment cache though.
 2249                  * Note however that there might be more sessions with
 2250                  * exactly the same source and destination parameters in the
 2251                  * state cache (and source and destination is the only stuff
 2252                  * that is saved in the fragment cache). Note further that
 2253                  * some TCP connections in the state cache are hashed with
 2254                  * sport and dport as well which makes it not worthwhile to
 2255                  * look for them.
 2256                  * Thus, when ackskew is negative but still seems to belong
 2257                  * to this session, we bump up the destinations end value.
 2258                  */
 2259                 if (ackskew < 0)
 2260                         tdata->td_end = ack;
 2261 
 2262                 /* update max window seen */
 2263                 if (fdata->td_maxwin < win)
 2264                         fdata->td_maxwin = win;
 2265                 if (SEQ_GT(end, fdata->td_end))
 2266                         fdata->td_end = end;
 2267                 if (SEQ_GE(ack + win, tdata->td_maxend))
 2268                         tdata->td_maxend = ack + win;
 2269                 return (1);
 2270         }
 2271         SBUMP(ipf_state_stats.iss_oow);
 2272         fin->fin_flx |= FI_OOW;
 2273         return (0);
 2274 }
 2275 
 2276 
 2277 /* ------------------------------------------------------------------------ */
 2278 /* Function:    ipf_state_clone                                             */
 2279 /* Returns:     ipstate_t* - NULL == cloning failed,                        */
 2280 /*                           else pointer to new state structure            */
 2281 /* Parameters:  fin(I) - pointer to packet information                      */
 2282 /*              tcp(I) - pointer to TCP/UDP header                          */
 2283 /*              is(I)  - pointer to master state structure                  */
 2284 /*                                                                          */
 2285 /* Create a "duplcate" state table entry from the master.                   */
 2286 /* ------------------------------------------------------------------------ */
 2287 static ipstate_t *
 2288 ipf_state_clone(fr_info_t *fin, tcphdr_t *tcp, ipstate_t *is)
 2289 {
 2290         ipf_main_softc_t *softc = fin->fin_main_soft;
 2291         ipf_state_softc_t *softs = softc->ipf_state_soft;
 2292         ipstate_t *clone;
 2293         u_32_t send;
 2294 
 2295         if (softs->ipf_state_stats.iss_active == softs->ipf_state_max) {
 2296                 SBUMPD(ipf_state_stats, iss_max);
 2297                 softs->ipf_state_doflush = 1;
 2298                 return (NULL);
 2299         }
 2300         KMALLOC(clone, ipstate_t *);
 2301         if (clone == NULL) {
 2302                 SBUMPD(ipf_state_stats, iss_clone_nomem);
 2303                 return (NULL);
 2304         }
 2305         bcopy((char *)is, (char *)clone, sizeof(*clone));
 2306 
 2307         MUTEX_NUKE(&clone->is_lock);
 2308         /*
 2309          * It has not yet been placed on any timeout queue, so make sure
 2310          * all of that data is zero'd out.
 2311          */
 2312         clone->is_sti.tqe_pnext = NULL;
 2313         clone->is_sti.tqe_next = NULL;
 2314         clone->is_sti.tqe_ifq = NULL;
 2315         clone->is_sti.tqe_parent = clone;
 2316 
 2317         clone->is_die = ONE_DAY + softc->ipf_ticks;
 2318         clone->is_state[0] = 0;
 2319         clone->is_state[1] = 0;
 2320         send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) +
 2321                 ((tcp->th_flags & TH_SYN) ? 1 : 0) +
 2322                 ((tcp->th_flags & TH_FIN) ? 1 : 0);
 2323 
 2324         if (fin->fin_rev == 1) {
 2325                 clone->is_dend = send;
 2326                 clone->is_maxdend = send;
 2327                 clone->is_send = 0;
 2328                 clone->is_maxswin = 1;
 2329                 clone->is_maxdwin = ntohs(tcp->th_win);
 2330                 if (clone->is_maxdwin == 0)
 2331                         clone->is_maxdwin = 1;
 2332         } else {
 2333                 clone->is_send = send;
 2334                 clone->is_maxsend = send;
 2335                 clone->is_dend = 0;
 2336                 clone->is_maxdwin = 1;
 2337                 clone->is_maxswin = ntohs(tcp->th_win);
 2338                 if (clone->is_maxswin == 0)
 2339                         clone->is_maxswin = 1;
 2340         }
 2341 
 2342         clone->is_flags &= ~SI_CLONE;
 2343         clone->is_flags |= SI_CLONED;
 2344         if (ipf_state_insert(softc, clone, fin->fin_rev) == -1) {
 2345                 KFREE(clone);
 2346                 return (NULL);
 2347         }
 2348 
 2349         clone->is_ref = 1;
 2350         if (clone->is_p == IPPROTO_TCP) {
 2351                 (void) ipf_tcp_age(&clone->is_sti, fin, softs->ipf_state_tcptq,
 2352                                    clone->is_flags, 2);
 2353         }
 2354         MUTEX_EXIT(&clone->is_lock);
 2355         if (is->is_flags & IS_STATESYNC)
 2356                 clone->is_sync = ipf_sync_new(softc, SMC_STATE, fin, clone);
 2357         DT2(iss_clone, ipstate_t *, is, ipstate_t *, clone);
 2358         SBUMP(ipf_state_stats.iss_cloned);
 2359         return (clone);
 2360 }
 2361 
 2362 
 2363 /* ------------------------------------------------------------------------ */
 2364 /* Function:    ipf_matchsrcdst                                             */
 2365 /* Returns:     Nil                                                         */
 2366 /* Parameters:  fin(I)   - pointer to packet information                    */
 2367 /*              is(I)    - pointer to state structure                       */
 2368 /*              src(I)   - pointer to source address                        */
 2369 /*              dst(I)   - pointer to destination address                   */
 2370 /*              tcp(I)   - pointer to TCP/UDP header                        */
 2371 /*              cmask(I) - mask of FI_* bits to check                       */
 2372 /*                                                                          */
 2373 /* Match a state table entry against an IP packet.  The logic below is that */
 2374 /* ret gets set to one if the match succeeds, else remains 0.  If it is     */
 2375 /* still 0 after the test. no match.                                        */
 2376 /* ------------------------------------------------------------------------ */
 2377 static ipstate_t *
 2378 ipf_matchsrcdst(fr_info_t *fin, ipstate_t *is, i6addr_t *src, i6addr_t *dst,
 2379         tcphdr_t *tcp, u_32_t cmask)
 2380 {
 2381         ipf_main_softc_t *softc = fin->fin_main_soft;
 2382         ipf_state_softc_t *softs = softc->ipf_state_soft;
 2383         int ret = 0, rev, out, flags, flx = 0, idx;
 2384         u_short sp, dp;
 2385         u_32_t cflx;
 2386         void *ifp;
 2387 
 2388         /*
 2389          * If a connection is about to be deleted, no packets
 2390          * are allowed to match it.
 2391          */
 2392         if (is->is_sti.tqe_ifq == &softs->ipf_state_deletetq)
 2393                 return (NULL);
 2394 
 2395         rev = IP6_NEQ(&is->is_dst, dst);
 2396         ifp = fin->fin_ifp;
 2397         out = fin->fin_out;
 2398         flags = is->is_flags;
 2399         sp = 0;
 2400         dp = 0;
 2401 
 2402         if (tcp != NULL) {
 2403                 sp = htons(fin->fin_sport);
 2404                 dp = ntohs(fin->fin_dport);
 2405         }
 2406         if (!rev) {
 2407                 if (tcp != NULL) {
 2408                         if (!(flags & SI_W_SPORT) && (sp != is->is_sport))
 2409                                 rev = 1;
 2410                         else if (!(flags & SI_W_DPORT) && (dp != is->is_dport))
 2411                                 rev = 1;
 2412                 }
 2413         }
 2414 
 2415         idx = (out << 1) + rev;
 2416 
 2417         /*
 2418          * If the interface for this 'direction' is set, make sure it matches.
 2419          * An interface name that is not set matches any, as does a name of *.
 2420          */
 2421         if ((is->is_ifp[idx] == ifp) || (is->is_ifp[idx] == NULL &&
 2422             (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '-' ||
 2423              *is->is_ifname[idx] == '*')))
 2424                 ret = 1;
 2425 
 2426         if (ret == 0) {
 2427                 DT2(iss_lookup_badifp, fr_info_t *, fin, ipstate_t *, is);
 2428                 SBUMP(ipf_state_stats.iss_lookup_badifp);
 2429                 /* TRACE is, out, rev, idx */
 2430                 return (NULL);
 2431         }
 2432         ret = 0;
 2433 
 2434         /*
 2435          * Match addresses and ports.
 2436          */
 2437         if (rev == 0) {
 2438                 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) &&
 2439                     (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) {
 2440                         if (tcp) {
 2441                                 if ((sp == is->is_sport || flags & SI_W_SPORT)
 2442                                     &&
 2443                                     (dp == is->is_dport || flags & SI_W_DPORT))
 2444                                         ret = 1;
 2445                         } else {
 2446                                 ret = 1;
 2447                         }
 2448                 }
 2449         } else {
 2450                 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) &&
 2451                     (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) {
 2452                         if (tcp) {
 2453                                 if ((dp == is->is_sport || flags & SI_W_SPORT)
 2454                                     &&
 2455                                     (sp == is->is_dport || flags & SI_W_DPORT))
 2456                                         ret = 1;
 2457                         } else {
 2458                                 ret = 1;
 2459                         }
 2460                 }
 2461         }
 2462 
 2463         if (ret == 0) {
 2464                 SBUMP(ipf_state_stats.iss_lookup_badport);
 2465                 DT2(iss_lookup_badport, fr_info_t *, fin, ipstate_t *, is);
 2466                 /* TRACE rev, is, sp, dp, src, dst */
 2467                 return (NULL);
 2468         }
 2469 
 2470         /*
 2471          * Whether or not this should be here, is questionable, but the aim
 2472          * is to get this out of the main line.
 2473          */
 2474         if (tcp == NULL)
 2475                 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED);
 2476 
 2477         /*
 2478          * Only one of the source or destination address can be flaged as a
 2479          * wildcard.  Fill in the missing address, if set.
 2480          * For IPv6, if the address being copied in is multicast, then
 2481          * don't reset the wild flag - multicast causes it to be set in the
 2482          * first place!
 2483          */
 2484         if ((flags & (SI_W_SADDR|SI_W_DADDR))) {
 2485                 fr_ip_t *fi = &fin->fin_fi;
 2486 
 2487                 if ((flags & SI_W_SADDR) != 0) {
 2488                         if (rev == 0) {
 2489                                 is->is_src = fi->fi_src;
 2490                                 is->is_flags &= ~SI_W_SADDR;
 2491                         } else {
 2492                                 if (!(fin->fin_flx & (FI_MULTICAST|FI_MBCAST))){
 2493                                         is->is_src = fi->fi_dst;
 2494                                         is->is_flags &= ~SI_W_SADDR;
 2495                                 }
 2496                         }
 2497                 } else if ((flags & SI_W_DADDR) != 0) {
 2498                         if (rev == 0) {
 2499                                 if (!(fin->fin_flx & (FI_MULTICAST|FI_MBCAST))){
 2500                                         is->is_dst = fi->fi_dst;
 2501                                         is->is_flags &= ~SI_W_DADDR;
 2502                                 }
 2503                         } else {
 2504                                 is->is_dst = fi->fi_src;
 2505                                 is->is_flags &= ~SI_W_DADDR;
 2506                         }
 2507                 }
 2508                 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) {
 2509                         ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
 2510                 }
 2511         }
 2512 
 2513         flx = fin->fin_flx & cmask;
 2514         cflx = is->is_flx[out][rev];
 2515 
 2516         /*
 2517          * Match up any flags set from IP options.
 2518          */
 2519         if ((cflx && (flx != (cflx & cmask))) ||
 2520             ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) ||
 2521             ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) ||
 2522             ((fin->fin_auth & is->is_authmsk) != is->is_auth)) {
 2523                 SBUMPD(ipf_state_stats, iss_miss_mask);
 2524                 return (NULL);
 2525         }
 2526 
 2527         if ((fin->fin_flx & FI_IGNORE) != 0) {
 2528                 fin->fin_rev = rev;
 2529                 return (is);
 2530         }
 2531 
 2532         /*
 2533          * Only one of the source or destination port can be flagged as a
 2534          * wildcard.  When filling it in, fill in a copy of the matched entry
 2535          * if it has the cloning flag set.
 2536          */
 2537         if ((flags & (SI_W_SPORT|SI_W_DPORT))) {
 2538                 if ((flags & SI_CLONE) != 0) {
 2539                         ipstate_t *clone;
 2540 
 2541                         clone = ipf_state_clone(fin, tcp, is);
 2542                         if (clone == NULL)
 2543                                 return (NULL);
 2544                         is = clone;
 2545                 } else {
 2546                         ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
 2547                 }
 2548 
 2549                 if ((flags & SI_W_SPORT) != 0) {
 2550                         if (rev == 0) {
 2551                                 is->is_sport = sp;
 2552                                 is->is_send = ntohl(tcp->th_seq);
 2553                         } else {
 2554                                 is->is_sport = dp;
 2555                                 is->is_send = ntohl(tcp->th_ack);
 2556                         }
 2557                         is->is_maxsend = is->is_send + 1;
 2558                 } else if ((flags & SI_W_DPORT) != 0) {
 2559                         if (rev == 0) {
 2560                                 is->is_dport = dp;
 2561                                 is->is_dend = ntohl(tcp->th_ack);
 2562                         } else {
 2563                                 is->is_dport = sp;
 2564                                 is->is_dend = ntohl(tcp->th_seq);
 2565                         }
 2566                         is->is_maxdend = is->is_dend + 1;
 2567                 }
 2568                 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT);
 2569                 if ((flags & SI_CLONED) && softs->ipf_state_logging)
 2570                         ipf_state_log(softc, is, ISL_CLONE);
 2571         }
 2572 
 2573         ret = -1;
 2574 
 2575         if (is->is_flx[out][rev] == 0) {
 2576                 is->is_flx[out][rev] = flx;
 2577                 if (rev == 1 && is->is_optmsk[1] == 0) {
 2578                         is->is_opt[1] = fin->fin_optmsk;
 2579                         is->is_optmsk[1] = 0xffffffff;
 2580                         if (is->is_v == 6) {
 2581                                 is->is_opt[1] &= ~0x8;
 2582                                 is->is_optmsk[1] &= ~0x8;
 2583                         }
 2584                 }
 2585         }
 2586 
 2587         /*
 2588          * Check if the interface name for this "direction" is set and if not,
 2589          * fill it in.
 2590          */
 2591         if (is->is_ifp[idx] == NULL &&
 2592             (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) {
 2593                 is->is_ifp[idx] = ifp;
 2594                 COPYIFNAME(fin->fin_v, ifp, is->is_ifname[idx]);
 2595         }
 2596         fin->fin_rev = rev;
 2597         return (is);
 2598 }
 2599 
 2600 
 2601 /* ------------------------------------------------------------------------ */
 2602 /* Function:    ipf_checkicmpmatchingstate                                  */
 2603 /* Returns:     Nil                                                         */
 2604 /* Parameters:  fin(I) - pointer to packet information                      */
 2605 /*                                                                          */
 2606 /* If we've got an ICMP error message, using the information stored in the  */
 2607 /* ICMP packet, look for a matching state table entry.                      */
 2608 /*                                                                          */
 2609 /* If we return NULL then no lock on ipf_state is held.                     */
 2610 /* If we return non-null then a read-lock on ipf_state is held.             */
 2611 /* ------------------------------------------------------------------------ */
 2612 static ipstate_t *
 2613 ipf_checkicmpmatchingstate(fr_info_t *fin)
 2614 {
 2615         ipf_main_softc_t *softc = fin->fin_main_soft;
 2616         ipf_state_softc_t *softs = softc->ipf_state_soft;
 2617         ipstate_t *is, **isp;
 2618         i6addr_t dst, src;
 2619         struct icmp *ic;
 2620         u_short savelen;
 2621         icmphdr_t *icmp;
 2622         fr_info_t ofin;
 2623         tcphdr_t *tcp;
 2624         int type, len;
 2625         u_char  pr;
 2626         ip_t *oip;
 2627         u_int hv;
 2628 
 2629         /*
 2630          * Does it at least have the return (basic) IP header ?
 2631          * Is it an actual recognised ICMP error type?
 2632          * Only a basic IP header (no options) should be with
 2633          * an ICMP error header.
 2634          */
 2635         if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) ||
 2636             (fin->fin_plen < ICMPERR_MINPKTLEN) ||
 2637             !(fin->fin_flx & FI_ICMPERR)) {
 2638                 SBUMPD(ipf_state_stats, iss_icmp_bad);
 2639                 return (NULL);
 2640         }
 2641         ic = fin->fin_dp;
 2642         type = ic->icmp_type;
 2643 
 2644         oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN);
 2645         /*
 2646          * Check if the at least the old IP header (with options) and
 2647          * 8 bytes of payload is present.
 2648          */
 2649         if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) {
 2650                 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_1);
 2651                 return (NULL);
 2652         }
 2653 
 2654         /*
 2655          * Sanity Checks.
 2656          */
 2657         len = fin->fin_dlen - ICMPERR_ICMPHLEN;
 2658         if ((len <= 0) || ((IP_HL(oip) << 2) > len)) {
 2659                 DT2(iss_icmp_len, fr_info_t *, fin, struct ip*, oip);
 2660                 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_1);
 2661                 return (NULL);
 2662         }
 2663 
 2664         /*
 2665          * Is the buffer big enough for all of it ?  It's the size of the IP
 2666          * header claimed in the encapsulated part which is of concern.  It
 2667          * may be too big to be in this buffer but not so big that it's
 2668          * outside the ICMP packet, leading to TCP deref's causing problems.
 2669          * This is possible because we don't know how big oip_hl is when we
 2670          * do the pullup early in ipf_check() and thus can't guarantee it is
 2671          * all here now.
 2672          */
 2673 #ifdef  _KERNEL
 2674         {
 2675         mb_t *m;
 2676 
 2677         m = fin->fin_m;
 2678 # if SOLARIS
 2679         if ((char *)oip + len > (char *)m->b_wptr) {
 2680                 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_2);
 2681                 return (NULL);
 2682         }
 2683 # else
 2684         if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) {
 2685                 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_3);
 2686                 return (NULL);
 2687         }
 2688 # endif
 2689         }
 2690 #endif
 2691 
 2692         bcopy((char *)fin, (char *)&ofin, sizeof(*fin));
 2693 
 2694         /*
 2695          * in the IPv4 case we must zero the i6addr union otherwise
 2696          * the IP6_EQ and IP6_NEQ macros produce the wrong results because
 2697          * of the 'junk' in the unused part of the union
 2698          */
 2699         bzero((char *)&src, sizeof(src));
 2700         bzero((char *)&dst, sizeof(dst));
 2701 
 2702         /*
 2703          * we make an fin entry to be able to feed it to
 2704          * matchsrcdst note that not all fields are encessary
 2705          * but this is the cleanest way. Note further we fill
 2706          * in fin_mp such that if someone uses it we'll get
 2707          * a kernel panic. ipf_matchsrcdst does not use this.
 2708          *
 2709          * watch out here, as ip is in host order and oip in network
 2710          * order. Any change we make must be undone afterwards, like
 2711          * oip->ip_len.
 2712          */
 2713         savelen = oip->ip_len;
 2714         oip->ip_len = htons(len);
 2715 
 2716         ofin.fin_flx = FI_NOCKSUM;
 2717         ofin.fin_v = 4;
 2718         ofin.fin_ip = oip;
 2719         ofin.fin_m = NULL;      /* if dereferenced, panic XXX */
 2720         ofin.fin_mp = NULL;     /* if dereferenced, panic XXX */
 2721         (void) ipf_makefrip(IP_HL(oip) << 2, oip, &ofin);
 2722         ofin.fin_ifp = fin->fin_ifp;
 2723         ofin.fin_out = !fin->fin_out;
 2724 
 2725         hv = (pr = oip->ip_p);
 2726         src.in4 = oip->ip_src;
 2727         hv += src.in4.s_addr;
 2728         dst.in4 = oip->ip_dst;
 2729         hv += dst.in4.s_addr;
 2730 
 2731         /*
 2732          * Reset the short and bad flag here because in ipf_matchsrcdst()
 2733          * the flags for the current packet (fin_flx) are compared against
 2734          * those for the existing session.
 2735          */
 2736         ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
 2737 
 2738         /*
 2739          * Put old values of ip_len back as we don't know
 2740          * if we have to forward the packet or process it again.
 2741          */
 2742         oip->ip_len = savelen;
 2743 
 2744         switch (oip->ip_p)
 2745         {
 2746         case IPPROTO_ICMP :
 2747                 /*
 2748                  * an ICMP error can only be generated as a result of an
 2749                  * ICMP query, not as the response on an ICMP error
 2750                  *
 2751                  * XXX theoretically ICMP_ECHOREP and the other reply's are
 2752                  * ICMP query's as well, but adding them here seems strange XXX
 2753                  */
 2754                 if ((ofin.fin_flx & FI_ICMPERR) != 0) {
 2755                         DT1(iss_icmp_icmperr, fr_info_t *, &ofin);
 2756                         SBUMP(ipf_state_stats.iss_icmp_icmperr);
 2757                         return (NULL);
 2758                 }
 2759 
 2760                 /*
 2761                  * perform a lookup of the ICMP packet in the state table
 2762                  */
 2763                 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
 2764                 hv += icmp->icmp_id;
 2765                 hv = DOUBLE_HASH(hv);
 2766 
 2767                 READ_ENTER(&softc->ipf_state);
 2768                 for (isp = &softs->ipf_state_table[hv];
 2769                      ((is = *isp) != NULL); ) {
 2770                         isp = &is->is_hnext;
 2771                         if ((is->is_p != pr) || (is->is_v != 4))
 2772                                 continue;
 2773                         if (is->is_pass & FR_NOICMPERR)
 2774                                 continue;
 2775 
 2776                         is = ipf_matchsrcdst(&ofin, is, &src, &dst,
 2777                                             NULL, FI_ICMPCMP);
 2778                         if ((is != NULL) && !ipf_allowstateicmp(fin, is, &src))
 2779                                 return (is);
 2780                 }
 2781                 RWLOCK_EXIT(&softc->ipf_state);
 2782                 SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_1);
 2783                 return (NULL);
 2784         case IPPROTO_TCP :
 2785         case IPPROTO_UDP :
 2786                 break;
 2787         default :
 2788                 SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_2);
 2789                 return (NULL);
 2790         }
 2791 
 2792         tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
 2793 
 2794         hv += tcp->th_dport;
 2795         hv += tcp->th_sport;
 2796         hv = DOUBLE_HASH(hv);
 2797 
 2798         READ_ENTER(&softc->ipf_state);
 2799         for (isp = &softs->ipf_state_table[hv]; ((is = *isp) != NULL); ) {
 2800                 isp = &is->is_hnext;
 2801                 /*
 2802                  * Only allow this icmp though if the
 2803                  * encapsulated packet was allowed through the
 2804                  * other way around. Note that the minimal amount
 2805                  * of info present does not allow for checking against
 2806                  * tcp internals such as seq and ack numbers.   Only the
 2807                  * ports are known to be present and can be even if the
 2808                  * short flag is set.
 2809                  */
 2810                 if ((is->is_p == pr) && (is->is_v == 4) &&
 2811                     (is = ipf_matchsrcdst(&ofin, is, &src, &dst,
 2812                                           tcp, FI_ICMPCMP))) {
 2813                         if (ipf_allowstateicmp(fin, is, &src) == 0)
 2814                                 return (is);
 2815                 }
 2816         }
 2817         RWLOCK_EXIT(&softc->ipf_state);
 2818         SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_3);
 2819         return (NULL);
 2820 }
 2821 
 2822 
 2823 /* ------------------------------------------------------------------------ */
 2824 /* Function:    ipf_allowstateicmp                                          */
 2825 /* Returns:     int - 1 = packet denied, 0 = packet allowed                 */
 2826 /* Parameters:  fin(I) - pointer to packet information                      */
 2827 /*              is(I)  - pointer to state table entry                       */
 2828 /*              src(I) - source address to check permission for             */
 2829 /*                                                                          */
 2830 /* For an ICMP packet that has so far matched a state table entry, check if */
 2831 /* there are any further refinements that might mean we want to block this  */
 2832 /* packet.  This code isn't specific to either IPv4 or IPv6.                */
 2833 /* ------------------------------------------------------------------------ */
 2834 static int
 2835 ipf_allowstateicmp(fr_info_t *fin, ipstate_t *is, i6addr_t *src)
 2836 {
 2837         ipf_main_softc_t *softc = fin->fin_main_soft;
 2838         ipf_state_softc_t *softs = softc->ipf_state_soft;
 2839         frentry_t *savefr;
 2840         frentry_t *fr;
 2841         u_32_t ipass;
 2842         int backward;
 2843         int oi;
 2844         int i;
 2845 
 2846         fr = is->is_rule;
 2847         if (fr != NULL && fr->fr_icmpgrp != NULL) {
 2848                 savefr = fin->fin_fr;
 2849                 fin->fin_fr = fr->fr_icmpgrp->fg_start;
 2850 
 2851                 ipass = ipf_scanlist(fin, softc->ipf_pass);
 2852                 fin->fin_fr = savefr;
 2853                 if (FR_ISBLOCK(ipass)) {
 2854                         SBUMPD(ipf_state_stats, iss_icmp_headblock);
 2855                         return (1);
 2856                 }
 2857         }
 2858 
 2859         /*
 2860          * i  : the index of this packet (the icmp unreachable)
 2861          * oi : the index of the original packet found in the
 2862          *      icmp header (i.e. the packet causing this icmp)
 2863          * backward : original packet was backward compared to
 2864          *            the state
 2865          */
 2866         backward = IP6_NEQ(&is->is_src, src);
 2867         fin->fin_rev = !backward;
 2868         i = (!backward << 1) + fin->fin_out;
 2869         oi = (backward << 1) + !fin->fin_out;
 2870 
 2871         if (is->is_pass & FR_NOICMPERR) {
 2872                 SBUMPD(ipf_state_stats, iss_icmp_banned);
 2873                 return (1);
 2874         }
 2875         if (is->is_icmppkts[i] > is->is_pkts[oi]) {
 2876                 SBUMPD(ipf_state_stats, iss_icmp_toomany);
 2877                 return (1);
 2878         }
 2879 
 2880         DT2(iss_icmp_hits, fr_info_t *, fin, ipstate_t *, is);
 2881         SBUMP(ipf_state_stats.iss_icmp_hits);
 2882         is->is_icmppkts[i]++;
 2883 
 2884         /*
 2885          * we deliberately do not touch the timeouts
 2886          * for the accompanying state table entry.
 2887          * It remains to be seen if that is correct. XXX
 2888          */
 2889         return (0);
 2890 }
 2891 
 2892 
 2893 /* ------------------------------------------------------------------------ */
 2894 /* Function:    ipf_ipsmove                                                 */
 2895 /* Returns:     Nil                                                         */
 2896 /* Parameters:  is(I) - pointer to state table entry                        */
 2897 /*              hv(I) - new hash value for state table entry                */
 2898 /* Write Locks: ipf_state                                                   */
 2899 /*                                                                          */
 2900 /* Move a state entry from one position in the hash table to another.       */
 2901 /* ------------------------------------------------------------------------ */
 2902 static void
 2903 ipf_ipsmove(ipf_state_softc_t *softs, ipstate_t *is, u_int hv)
 2904 {
 2905         ipstate_t **isp;
 2906         u_int hvm;
 2907 
 2908         hvm = is->is_hv;
 2909 
 2910         /* TRACE is, is_hv, hvm */
 2911 
 2912         /*
 2913          * Remove the hash from the old location...
 2914          */
 2915         isp = is->is_phnext;
 2916         if (is->is_hnext)
 2917                 is->is_hnext->is_phnext = isp;
 2918         *isp = is->is_hnext;
 2919         if (softs->ipf_state_table[hvm] == NULL)
 2920                 softs->ipf_state_stats.iss_inuse--;
 2921         softs->ipf_state_stats.iss_bucketlen[hvm]--;
 2922 
 2923         /*
 2924          * ...and put the hash in the new one.
 2925          */
 2926         hvm = DOUBLE_HASH(hv);
 2927         is->is_hv = hvm;
 2928 
 2929         /* TRACE is, hv, is_hv, hvm */
 2930 
 2931         isp = &softs->ipf_state_table[hvm];
 2932         if (*isp)
 2933                 (*isp)->is_phnext = &is->is_hnext;
 2934         else
 2935                 softs->ipf_state_stats.iss_inuse++;
 2936         softs->ipf_state_stats.iss_bucketlen[hvm]++;
 2937         is->is_phnext = isp;
 2938         is->is_hnext = *isp;
 2939         *isp = is;
 2940 }
 2941 
 2942 
 2943 /* ------------------------------------------------------------------------ */
 2944 /* Function:    ipf_state_lookup                                            */
 2945 /* Returns:     ipstate_t* - NULL == no matching state found,               */
 2946 /*                           else pointer to state information is returned  */
 2947 /* Parameters:  fin(I)  - pointer to packet information                     */
 2948 /*              tcp(I)  - pointer to TCP/UDP header.                        */
 2949 /*              ifqp(O) - pointer for storing tailq timeout                 */
 2950 /*                                                                          */
 2951 /* Search the state table for a matching entry to the packet described by   */
 2952 /* the contents of *fin. For certain protocols, when a match is found the   */
 2953 /* timeout queue is also selected and stored in ifpq if it is non-NULL.     */
 2954 /*                                                                          */
 2955 /* If we return NULL then no lock on ipf_state is held.                     */
 2956 /* If we return non-null then a read-lock on ipf_state is held.             */
 2957 /* ------------------------------------------------------------------------ */
 2958 ipstate_t *
 2959 ipf_state_lookup(fr_info_t *fin, tcphdr_t *tcp, ipftq_t **ifqp)
 2960 {
 2961         ipf_main_softc_t *softc = fin->fin_main_soft;
 2962         ipf_state_softc_t *softs = softc->ipf_state_soft;
 2963         u_int hv, hvm, pr, v, tryagain;
 2964         ipstate_t *is, **isp;
 2965         u_short dport, sport;
 2966         i6addr_t src, dst;
 2967         struct icmp *ic;
 2968         ipftq_t *ifq;
 2969         int oow;
 2970 
 2971         is = NULL;
 2972         ifq = NULL;
 2973         tcp = fin->fin_dp;
 2974         ic = (struct icmp *)tcp;
 2975         hv = (pr = fin->fin_fi.fi_p);
 2976         src = fin->fin_fi.fi_src;
 2977         dst = fin->fin_fi.fi_dst;
 2978         hv += src.in4.s_addr;
 2979         hv += dst.in4.s_addr;
 2980 
 2981         v = fin->fin_fi.fi_v;
 2982 #ifdef  USE_INET6
 2983         if (v == 6) {
 2984                 hv  += fin->fin_fi.fi_src.i6[1];
 2985                 hv  += fin->fin_fi.fi_src.i6[2];
 2986                 hv  += fin->fin_fi.fi_src.i6[3];
 2987 
 2988                 if ((fin->fin_p == IPPROTO_ICMPV6) &&
 2989                     IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) {
 2990                         hv -= dst.in4.s_addr;
 2991                 } else {
 2992                         hv += fin->fin_fi.fi_dst.i6[1];
 2993                         hv += fin->fin_fi.fi_dst.i6[2];
 2994                         hv += fin->fin_fi.fi_dst.i6[3];
 2995                 }
 2996         }
 2997 #endif
 2998         if ((v == 4) &&
 2999             (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) {
 3000                 if (fin->fin_out == 0) {
 3001                         hv -= src.in4.s_addr;
 3002                 } else {
 3003                         hv -= dst.in4.s_addr;
 3004                 }
 3005         }
 3006 
 3007         /* TRACE fin_saddr, fin_daddr, hv */
 3008 
 3009         /*
 3010          * Search the hash table for matching packet header info.
 3011          */
 3012         switch (pr)
 3013         {
 3014 #ifdef  USE_INET6
 3015         case IPPROTO_ICMPV6 :
 3016                 tryagain = 0;
 3017                 if (v == 6) {
 3018                         if ((ic->icmp_type == ICMP6_ECHO_REQUEST) ||
 3019                             (ic->icmp_type == ICMP6_ECHO_REPLY)) {
 3020                                 hv += ic->icmp_id;
 3021                         }
 3022                 }
 3023                 READ_ENTER(&softc->ipf_state);
 3024 icmp6again:
 3025                 hvm = DOUBLE_HASH(hv);
 3026                 for (isp = &softs->ipf_state_table[hvm];
 3027                      ((is = *isp) != NULL); ) {
 3028                         isp = &is->is_hnext;
 3029                         if ((is->is_p != pr) || (is->is_v != v))
 3030                                 continue;
 3031                         is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
 3032                         if (is != NULL &&
 3033                             ipf_matchicmpqueryreply(v, &is->is_icmp,
 3034                                                    ic, fin->fin_rev)) {
 3035                                 if (fin->fin_rev)
 3036                                         ifq = &softs->ipf_state_icmpacktq;
 3037                                 else
 3038                                         ifq = &softs->ipf_state_icmptq;
 3039                                 break;
 3040                         }
 3041                 }
 3042 
 3043                 if (is != NULL) {
 3044                         if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) {
 3045                                 hv += fin->fin_fi.fi_src.i6[0];
 3046                                 hv += fin->fin_fi.fi_src.i6[1];
 3047                                 hv += fin->fin_fi.fi_src.i6[2];
 3048                                 hv += fin->fin_fi.fi_src.i6[3];
 3049                                 ipf_ipsmove(softs, is, hv);
 3050                                 MUTEX_DOWNGRADE(&softc->ipf_state);
 3051                         }
 3052                         break;
 3053                 }
 3054                 RWLOCK_EXIT(&softc->ipf_state);
 3055 
 3056                 /*
 3057                  * No matching icmp state entry. Perhaps this is a
 3058                  * response to another state entry.
 3059                  *
 3060                  * XXX With some ICMP6 packets, the "other" address is already
 3061                  * in the packet, after the ICMP6 header, and this could be
 3062                  * used in place of the multicast address.  However, taking
 3063                  * advantage of this requires some significant code changes
 3064                  * to handle the specific types where that is the case.
 3065                  */
 3066                 if ((softs->ipf_state_stats.iss_wild != 0) &&
 3067                     ((fin->fin_flx & FI_NOWILD) == 0) &&
 3068                     (v == 6) && (tryagain == 0)) {
 3069                         hv -= fin->fin_fi.fi_src.i6[0];
 3070                         hv -= fin->fin_fi.fi_src.i6[1];
 3071                         hv -= fin->fin_fi.fi_src.i6[2];
 3072                         hv -= fin->fin_fi.fi_src.i6[3];
 3073                         tryagain = 1;
 3074                         WRITE_ENTER(&softc->ipf_state);
 3075                         goto icmp6again;
 3076                 }
 3077 
 3078                 is = ipf_checkicmp6matchingstate(fin);
 3079                 if (is != NULL)
 3080                         return (is);
 3081                 break;
 3082 #endif
 3083 
 3084         case IPPROTO_ICMP :
 3085                 if (v == 4) {
 3086                         hv += ic->icmp_id;
 3087                 }
 3088                 hv = DOUBLE_HASH(hv);
 3089                 READ_ENTER(&softc->ipf_state);
 3090                 for (isp = &softs->ipf_state_table[hv];
 3091                      ((is = *isp) != NULL); ) {
 3092                         isp = &is->is_hnext;
 3093                         if ((is->is_p != pr) || (is->is_v != v))
 3094                                 continue;
 3095                         is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
 3096                         if ((is != NULL) &&
 3097                             (ic->icmp_id == is->is_icmp.ici_id) &&
 3098                             ipf_matchicmpqueryreply(v, &is->is_icmp,
 3099                                                    ic, fin->fin_rev)) {
 3100                                 if (fin->fin_rev)
 3101                                         ifq = &softs->ipf_state_icmpacktq;
 3102                                 else
 3103                                         ifq = &softs->ipf_state_icmptq;
 3104                                 break;
 3105                         }
 3106                 }
 3107                 if (is == NULL) {
 3108                         RWLOCK_EXIT(&softc->ipf_state);
 3109                 }
 3110                 break;
 3111 
 3112         case IPPROTO_TCP :
 3113         case IPPROTO_UDP :
 3114                 ifqp = NULL;
 3115                 sport = htons(fin->fin_data[0]);
 3116                 hv += sport;
 3117                 dport = htons(fin->fin_data[1]);
 3118                 hv += dport;
 3119                 oow = 0;
 3120                 tryagain = 0;
 3121                 READ_ENTER(&softc->ipf_state);
 3122 retry_tcpudp:
 3123                 hvm = DOUBLE_HASH(hv);
 3124 
 3125                 /* TRACE hv, hvm */
 3126 
 3127                 for (isp = &softs->ipf_state_table[hvm];
 3128                      ((is = *isp) != NULL); ) {
 3129                         isp = &is->is_hnext;
 3130                         if ((is->is_p != pr) || (is->is_v != v))
 3131                                 continue;
 3132                         fin->fin_flx &= ~FI_OOW;
 3133                         is = ipf_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP);
 3134                         if (is != NULL) {
 3135                                 if (pr == IPPROTO_TCP) {
 3136                                         if (!ipf_state_tcp(softc, softs, fin,
 3137                                                            tcp, is)) {
 3138                                                 oow |= fin->fin_flx & FI_OOW;
 3139                                                 continue;
 3140                                         }
 3141                                 }
 3142                                 break;
 3143                         }
 3144                 }
 3145                 if (is != NULL) {
 3146                         if (tryagain &&
 3147                             !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) {
 3148                                 hv += dport;
 3149                                 hv += sport;
 3150                                 ipf_ipsmove(softs, is, hv);
 3151                                 MUTEX_DOWNGRADE(&softc->ipf_state);
 3152                         }
 3153                         break;
 3154                 }
 3155                 RWLOCK_EXIT(&softc->ipf_state);
 3156 
 3157                 if ((softs->ipf_state_stats.iss_wild != 0) &&
 3158                     ((fin->fin_flx & FI_NOWILD) == 0)) {
 3159                         if (tryagain == 0) {
 3160                                 hv -= dport;
 3161                                 hv -= sport;
 3162                         } else if (tryagain == 1) {
 3163                                 hv = fin->fin_fi.fi_p;
 3164                                 /*
 3165                                  * If we try to pretend this is a reply to a
 3166                                  * multicast/broadcast packet then we need to
 3167                                  * exclude part of the address from the hash
 3168                                  * calculation.
 3169                                  */
 3170                                 if (fin->fin_out == 0) {
 3171                                         hv += src.in4.s_addr;
 3172                                 } else {
 3173                                         hv += dst.in4.s_addr;
 3174                                 }
 3175                                 hv += dport;
 3176                                 hv += sport;
 3177                         }
 3178                         tryagain++;
 3179                         if (tryagain <= 2) {
 3180                                 WRITE_ENTER(&softc->ipf_state);
 3181                                 goto retry_tcpudp;
 3182                         }
 3183                 }
 3184                 fin->fin_flx |= oow;
 3185                 break;
 3186 
 3187 #if 0
 3188         case IPPROTO_GRE :
 3189                 gre = fin->fin_dp;
 3190                 if (GRE_REV(gre->gr_flags) == 1) {
 3191                         hv += gre->gr_call;
 3192                 }
 3193                 /* FALLTHROUGH */
 3194 #endif
 3195         default :
 3196                 ifqp = NULL;
 3197                 hvm = DOUBLE_HASH(hv);
 3198                 READ_ENTER(&softc->ipf_state);
 3199                 for (isp = &softs->ipf_state_table[hvm];
 3200                      ((is = *isp) != NULL); ) {
 3201                         isp = &is->is_hnext;
 3202                         if ((is->is_p != pr) || (is->is_v != v))
 3203                                 continue;
 3204                         is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
 3205                         if (is != NULL) {
 3206                                 ifq = &softs->ipf_state_iptq;
 3207                                 break;
 3208                         }
 3209                 }
 3210                 if (is == NULL) {
 3211                         RWLOCK_EXIT(&softc->ipf_state);
 3212                 }
 3213                 break;
 3214         }
 3215 
 3216         if (is != NULL) {
 3217                 if (((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) &&
 3218                     (is->is_tqehead[fin->fin_rev] != NULL))
 3219                         ifq = is->is_tqehead[fin->fin_rev];
 3220                 if (ifq != NULL && ifqp != NULL)
 3221                         *ifqp = ifq;
 3222         } else {
 3223                 SBUMP(ipf_state_stats.iss_lookup_miss);
 3224         }
 3225         return (is);
 3226 }
 3227 
 3228 
 3229 /* ------------------------------------------------------------------------ */
 3230 /* Function:    ipf_state_check                                             */
 3231 /* Returns:     frentry_t* - NULL == search failed,                         */
 3232 /*                           else pointer to rule for matching state        */
 3233 /* Parameters:  fin(I)   - pointer to packet information                    */
 3234 /*              passp(I) - pointer to filtering result flags                */
 3235 /*                                                                          */
 3236 /* Check if a packet is associated with an entry in the state table.        */
 3237 /* ------------------------------------------------------------------------ */
 3238 frentry_t *
 3239 ipf_state_check(fr_info_t *fin, u_32_t *passp)
 3240 {
 3241         ipf_main_softc_t *softc = fin->fin_main_soft;
 3242         ipf_state_softc_t *softs = softc->ipf_state_soft;
 3243         ipftqent_t *tqe;
 3244         ipstate_t *is;
 3245         frentry_t *fr;
 3246         tcphdr_t *tcp;
 3247         ipftq_t *ifq;
 3248         u_int pass;
 3249         int inout;
 3250 
 3251         if (softs->ipf_state_lock || (softs->ipf_state_list == NULL))
 3252                 return (NULL);
 3253 
 3254         if (fin->fin_flx & (FI_SHORT|FI_FRAGBODY|FI_BAD)) {
 3255                 SBUMPD(ipf_state_stats, iss_check_bad);
 3256                 return (NULL);
 3257         }
 3258 
 3259         if ((fin->fin_flx & FI_TCPUDP) ||
 3260             (fin->fin_fi.fi_p == IPPROTO_ICMP)
 3261 #ifdef  USE_INET6
 3262             || (fin->fin_fi.fi_p == IPPROTO_ICMPV6)
 3263 #endif
 3264             )
 3265                 tcp = fin->fin_dp;
 3266         else
 3267                 tcp = NULL;
 3268 
 3269         ifq = NULL;
 3270         /*
 3271          * Search the hash table for matching packet header info.
 3272          */
 3273         is = ipf_state_lookup(fin, tcp, &ifq);
 3274 
 3275         switch (fin->fin_p)
 3276         {
 3277 #ifdef  USE_INET6
 3278         case IPPROTO_ICMPV6 :
 3279                 if (is != NULL)
 3280                         break;
 3281                 if (fin->fin_v == 6) {
 3282                         is = ipf_checkicmp6matchingstate(fin);
 3283                 }
 3284                 break;
 3285 #endif
 3286         case IPPROTO_ICMP :
 3287                 if (is != NULL)
 3288                         break;
 3289                 /*
 3290                  * No matching icmp state entry. Perhaps this is a
 3291                  * response to another state entry.
 3292                  */
 3293                 is = ipf_checkicmpmatchingstate(fin);
 3294                 break;
 3295 
 3296         case IPPROTO_TCP :
 3297                 if (is == NULL)
 3298                         break;
 3299 
 3300                 if (is->is_pass & FR_NEWISN) {
 3301                         if (fin->fin_out == 0)
 3302                                 ipf_fixinisn(fin, is);
 3303                         else if (fin->fin_out == 1)
 3304                                 ipf_fixoutisn(fin, is);
 3305                 }
 3306                 break;
 3307         default :
 3308                 if (fin->fin_rev)
 3309                         ifq = &softs->ipf_state_udpacktq;
 3310                 else
 3311                         ifq = &softs->ipf_state_udptq;
 3312                 break;
 3313         }
 3314         if (is == NULL) {
 3315                 SBUMP(ipf_state_stats.iss_check_miss);
 3316                 return (NULL);
 3317         }
 3318 
 3319         fr = is->is_rule;
 3320         if (fr != NULL) {
 3321                 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) {
 3322                         if (fin->fin_nattag == NULL) {
 3323                                 RWLOCK_EXIT(&softc->ipf_state);
 3324                                 SBUMPD(ipf_state_stats, iss_check_notag);
 3325                                 return (NULL);
 3326                         }
 3327                         if (ipf_matchtag(&fr->fr_nattag, fin->fin_nattag)!=0) {
 3328                                 RWLOCK_EXIT(&softc->ipf_state);
 3329                                 SBUMPD(ipf_state_stats, iss_check_nattag);
 3330                                 return (NULL);
 3331                         }
 3332                 }
 3333                 (void) strncpy(fin->fin_group, FR_NAME(fr, fr_group),
 3334                                FR_GROUPLEN);
 3335                 fin->fin_icode = fr->fr_icode;
 3336         }
 3337 
 3338         fin->fin_rule = is->is_rulen;
 3339         fin->fin_fr = fr;
 3340 
 3341         /*
 3342          * If this packet is a fragment and the rule says to track fragments,
 3343          * then create a new fragment cache entry.
 3344          */
 3345         if (fin->fin_flx & FI_FRAG && FR_ISPASS(is->is_pass) &&
 3346            is->is_pass & FR_KEEPFRAG)
 3347                 (void) ipf_frag_new(softc, fin, is->is_pass);
 3348 
 3349         /*
 3350          * For TCP packets, ifq == NULL.  For all others, check if this new
 3351          * queue is different to the last one it was on and move it if so.
 3352          */
 3353         tqe = &is->is_sti;
 3354         if ((tqe->tqe_flags & TQE_RULEBASED) != 0)
 3355                 ifq = is->is_tqehead[fin->fin_rev];
 3356 
 3357         MUTEX_ENTER(&is->is_lock);
 3358 
 3359         if (ifq != NULL)
 3360                 ipf_movequeue(softc->ipf_ticks, tqe, tqe->tqe_ifq, ifq);
 3361 
 3362         inout = (fin->fin_rev << 1) + fin->fin_out;
 3363         is->is_pkts[inout]++;
 3364         is->is_bytes[inout] += fin->fin_plen;
 3365         fin->fin_pktnum = is->is_pkts[inout] + is->is_icmppkts[inout];
 3366 
 3367         MUTEX_EXIT(&is->is_lock);
 3368 
 3369         pass = is->is_pass;
 3370 
 3371         if (is->is_flags & IS_STATESYNC)
 3372                 ipf_sync_update(softc, SMC_STATE, fin, is->is_sync);
 3373 
 3374         RWLOCK_EXIT(&softc->ipf_state);
 3375 
 3376         SBUMP(ipf_state_stats.iss_hits);
 3377 
 3378         fin->fin_dif = &is->is_dif;
 3379         fin->fin_tif = &is->is_tifs[fin->fin_rev];
 3380         fin->fin_flx |= FI_STATE;
 3381         if ((pass & FR_LOGFIRST) != 0)
 3382                 pass &= ~(FR_LOGFIRST|FR_LOG);
 3383         *passp = pass;
 3384         return (fr);
 3385 }
 3386 
 3387 
 3388 /* ------------------------------------------------------------------------ */
 3389 /* Function:    ipf_fixoutisn                                               */
 3390 /* Returns:     Nil                                                         */
 3391 /* Parameters:  fin(I) - pointer to packet information                      */
 3392 /*              is(I)  - pointer to master state structure                  */
 3393 /*                                                                          */
 3394 /* Called only for outbound packets, adjusts the sequence number and the    */
 3395 /* TCP checksum to match that change.                                       */
 3396 /* ------------------------------------------------------------------------ */
 3397 static void
 3398 ipf_fixoutisn(fr_info_t *fin, ipstate_t *is)
 3399 {
 3400         tcphdr_t *tcp;
 3401         int rev;
 3402         u_32_t seq;
 3403 
 3404         tcp = fin->fin_dp;
 3405         rev = fin->fin_rev;
 3406         if ((is->is_flags & IS_ISNSYN) != 0) {
 3407                 if ((rev == 0) && (fin->fin_cksum < FI_CK_L4PART)) {
 3408                         seq = ntohl(tcp->th_seq);
 3409                         seq += is->is_isninc[0];
 3410                         tcp->th_seq = htonl(seq);
 3411                         ipf_fix_outcksum(0, &tcp->th_sum, is->is_sumd[0], 0);
 3412                 }
 3413         }
 3414         if ((is->is_flags & IS_ISNACK) != 0) {
 3415                 if ((rev == 1) && (fin->fin_cksum < FI_CK_L4PART)) {
 3416                         seq = ntohl(tcp->th_seq);
 3417                         seq += is->is_isninc[1];
 3418                         tcp->th_seq = htonl(seq);
 3419                         ipf_fix_outcksum(0, &tcp->th_sum, is->is_sumd[1], 0);
 3420                 }
 3421         }
 3422 }
 3423 
 3424 
 3425 /* ------------------------------------------------------------------------ */
 3426 /* Function:    ipf_fixinisn                                                */
 3427 /* Returns:     Nil                                                         */
 3428 /* Parameters:  fin(I)   - pointer to packet information                    */
 3429 /*              is(I)  - pointer to master state structure                  */
 3430 /*                                                                          */
 3431 /* Called only for inbound packets, adjusts the acknowledge number and the  */
 3432 /* TCP checksum to match that change.                                       */
 3433 /* ------------------------------------------------------------------------ */
 3434 static void
 3435 ipf_fixinisn(fr_info_t *fin, ipstate_t *is)
 3436 {
 3437         tcphdr_t *tcp;
 3438         int rev;
 3439         u_32_t ack;
 3440 
 3441         tcp = fin->fin_dp;
 3442         rev = fin->fin_rev;
 3443         if ((is->is_flags & IS_ISNSYN) != 0) {
 3444                 if ((rev == 1) && (fin->fin_cksum < FI_CK_L4PART)) {
 3445                         ack = ntohl(tcp->th_ack);
 3446                         ack -= is->is_isninc[0];
 3447                         tcp->th_ack = htonl(ack);
 3448                         ipf_fix_incksum(0, &tcp->th_sum, is->is_sumd[0], 0);
 3449                 }
 3450         }
 3451         if ((is->is_flags & IS_ISNACK) != 0) {
 3452                 if ((rev == 0) && (fin->fin_cksum < FI_CK_L4PART)) {
 3453                         ack = ntohl(tcp->th_ack);
 3454                         ack -= is->is_isninc[1];
 3455                         tcp->th_ack = htonl(ack);
 3456                         ipf_fix_incksum(0, &tcp->th_sum, is->is_sumd[1], 0);
 3457                 }
 3458         }
 3459 }
 3460 
 3461 
 3462 /* ------------------------------------------------------------------------ */
 3463 /* Function:    ipf_state_sync                                              */
 3464 /* Returns:     Nil                                                         */
 3465 /* Parameters:  softc(I) - pointer to soft context main structure           */
 3466 /*              ifp(I)   - pointer to interface                             */
 3467 /*                                                                          */
 3468 /* Walk through all state entries and if an interface pointer match is      */
 3469 /* found then look it up again, based on its name in case the pointer has   */
 3470 /* changed since last time.                                                 */
 3471 /*                                                                          */
 3472 /* If ifp is passed in as being non-null then we are only doing updates for */
 3473 /* existing, matching, uses of it.                                          */
 3474 /* ------------------------------------------------------------------------ */
 3475 void
 3476 ipf_state_sync(ipf_main_softc_t *softc, void *ifp)
 3477 {
 3478         ipf_state_softc_t *softs = softc->ipf_state_soft;
 3479         ipstate_t *is;
 3480         int i;
 3481 
 3482         if (softc->ipf_running <= 0)
 3483                 return;
 3484 
 3485         WRITE_ENTER(&softc->ipf_state);
 3486 
 3487         if (softc->ipf_running <= 0) {
 3488                 RWLOCK_EXIT(&softc->ipf_state);
 3489                 return;
 3490         }
 3491 
 3492         for (is = softs->ipf_state_list; is; is = is->is_next) {
 3493                 /*
 3494                  * Look up all the interface names in the state entry.
 3495                  */
 3496                 for (i = 0; i < FR_NUM(is->is_ifp); i++) {
 3497                         if (ifp == NULL || ifp == is->is_ifp[i])
 3498                                 is->is_ifp[i] = ipf_resolvenic(softc,
 3499                                                               is->is_ifname[i],
 3500                                                               is->is_v);
 3501                 }
 3502         }
 3503         RWLOCK_EXIT(&softc->ipf_state);
 3504 }
 3505 
 3506 
 3507 /* ------------------------------------------------------------------------ */
 3508 /* Function:    ipf_state_del                                               */
 3509 /* Returns:     int    - 0 = deleted, else refernce count on active struct  */
 3510 /* Parameters:  softc(I) - pointer to soft context main structure           */
 3511 /*              is(I)  - pointer to state structure to delete               */
 3512 /*              why(I) - if not 0, log reason why it was deleted            */
 3513 /* Write Locks: ipf_state                                                   */
 3514 /*                                                                          */
 3515 /* Deletes a state entry from the enumerated list as well as the hash table */
 3516 /* and timeout queue lists.  Make adjustments to hash table statistics and  */
 3517 /* global counters as required.                                             */
 3518 /* ------------------------------------------------------------------------ */
 3519 static int
 3520 ipf_state_del(ipf_main_softc_t *softc, ipstate_t *is, int why)
 3521 {
 3522         ipf_state_softc_t *softs = softc->ipf_state_soft;
 3523         int orphan = 1;
 3524         frentry_t *fr;
 3525 
 3526         /*
 3527          * Since we want to delete this, remove it from the state table,
 3528          * where it can be found & used, first.
 3529          */
 3530         if (is->is_phnext != NULL) {
 3531                 *is->is_phnext = is->is_hnext;
 3532                 if (is->is_hnext != NULL)
 3533                         is->is_hnext->is_phnext = is->is_phnext;
 3534                 if (softs->ipf_state_table[is->is_hv] == NULL)
 3535                         softs->ipf_state_stats.iss_inuse--;
 3536                 softs->ipf_state_stats.iss_bucketlen[is->is_hv]--;
 3537 
 3538                 is->is_phnext = NULL;
 3539                 is->is_hnext = NULL;
 3540                 orphan = 0;
 3541         }
 3542 
 3543         /*
 3544          * Because ipf_state_stats.iss_wild is a count of entries in the state
 3545          * table that have wildcard flags set, only decerement it once
 3546          * and do it here.
 3547          */
 3548         if (is->is_flags & (SI_WILDP|SI_WILDA)) {
 3549                 if (!(is->is_flags & SI_CLONED)) {
 3550                         ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
 3551                 }
 3552                 is->is_flags &= ~(SI_WILDP|SI_WILDA);
 3553         }
 3554 
 3555         /*
 3556          * Next, remove it from the timeout queue it is in.
 3557          */
 3558         if (is->is_sti.tqe_ifq != NULL)
 3559                 ipf_deletequeueentry(&is->is_sti);
 3560 
 3561         /*
 3562          * If it is still in use by something else, do not go any further,
 3563          * but note that at this point it is now an orphan.  How can this
 3564          * be?  ipf_state_flush() calls ipf_delete() directly because it wants
 3565          * to empty the table out and if something has a hold on a state
 3566          * entry (such as ipfstat), it'll do the deref path that'll bring
 3567          * us back here to do the real delete & free.
 3568          */
 3569         MUTEX_ENTER(&is->is_lock);
 3570         if (is->is_me != NULL) {
 3571                 *is->is_me = NULL;
 3572                 is->is_me = NULL;
 3573                 is->is_ref--;
 3574         }
 3575         is->is_ref--;
 3576         if (is->is_ref > 0) {
 3577                 int refs;
 3578 
 3579                 refs = is->is_ref;
 3580                 MUTEX_EXIT(&is->is_lock);
 3581                 if (!orphan)
 3582                         softs->ipf_state_stats.iss_orphan++;
 3583                 return (refs);
 3584         }
 3585 
 3586         fr = is->is_rule;
 3587         is->is_rule = NULL;
 3588         if (fr != NULL) {
 3589                 if (fr->fr_srctrack.ht_max_nodes != 0) {
 3590                         (void) ipf_ht_node_del(&fr->fr_srctrack,
 3591                                                is->is_family, &is->is_src);
 3592                 }
 3593         }
 3594 
 3595         ASSERT(is->is_ref == 0);
 3596         MUTEX_EXIT(&is->is_lock);
 3597 
 3598         if (is->is_tqehead[0] != NULL) {
 3599                 if (ipf_deletetimeoutqueue(is->is_tqehead[0]) == 0)
 3600                         ipf_freetimeoutqueue(softc, is->is_tqehead[0]);
 3601         }
 3602         if (is->is_tqehead[1] != NULL) {
 3603                 if (ipf_deletetimeoutqueue(is->is_tqehead[1]) == 0)
 3604                         ipf_freetimeoutqueue(softc, is->is_tqehead[1]);
 3605         }
 3606 
 3607         if (is->is_sync)
 3608                 ipf_sync_del_state(softc->ipf_sync_soft, is->is_sync);
 3609 
 3610         /*
 3611          * Now remove it from the linked list of known states
 3612          */
 3613         if (is->is_pnext != NULL) {
 3614                 *is->is_pnext = is->is_next;
 3615 
 3616                 if (is->is_next != NULL)
 3617                         is->is_next->is_pnext = is->is_pnext;
 3618 
 3619                 is->is_pnext = NULL;
 3620                 is->is_next = NULL;
 3621         }
 3622 
 3623         if (softs->ipf_state_logging != 0 && why != 0)
 3624                 ipf_state_log(softc, is, why);
 3625 
 3626         if (is->is_p == IPPROTO_TCP)
 3627                 softs->ipf_state_stats.iss_fin++;
 3628         else
 3629                 softs->ipf_state_stats.iss_expire++;
 3630         if (orphan)
 3631                 softs->ipf_state_stats.iss_orphan--;
 3632 
 3633         if (fr != NULL) {
 3634                 fr->fr_statecnt--;
 3635                 (void) ipf_derefrule(softc, &fr);
 3636         }
 3637 
 3638         softs->ipf_state_stats.iss_active_proto[is->is_p]--;
 3639 
 3640         MUTEX_DESTROY(&is->is_lock);
 3641         KFREE(is);
 3642         softs->ipf_state_stats.iss_active--;
 3643 
 3644         return (0);
 3645 }
 3646 
 3647 
 3648 /* ------------------------------------------------------------------------ */
 3649 /* Function:    ipf_state_expire                                            */
 3650 /* Returns:     Nil                                                         */
 3651 /* Parameters:  softc(I) - pointer to soft context main structure           */
 3652 /*                                                                          */
 3653 /* Slowly expire held state for thingslike UDP and ICMP.  The algorithm     */
 3654 /* used here is to keep the queue sorted with the oldest things at the top  */
 3655 /* and the youngest at the bottom.  So if the top one doesn't need to be    */
 3656 /* expired then neither will any under it.                                  */
 3657 /* ------------------------------------------------------------------------ */
 3658 void
 3659 ipf_state_expire(ipf_main_softc_t *softc)
 3660 {
 3661         ipf_state_softc_t *softs = softc->ipf_state_soft;
 3662         ipftq_t *ifq, *ifqnext;
 3663         ipftqent_t *tqe, *tqn;
 3664         ipstate_t *is;
 3665         SPL_INT(s);
 3666 
 3667         SPL_NET(s);
 3668         WRITE_ENTER(&softc->ipf_state);
 3669         for (ifq = softs->ipf_state_tcptq; ifq != NULL; ifq = ifq->ifq_next)
 3670                 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
 3671                         if (tqe->tqe_die > softc->ipf_ticks)
 3672                                 break;
 3673                         tqn = tqe->tqe_next;
 3674                         is = tqe->tqe_parent;
 3675                         ipf_state_del(softc, is, ISL_EXPIRE);
 3676                 }
 3677 
 3678         for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
 3679                 ifqnext = ifq->ifq_next;
 3680 
 3681                 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
 3682                         if (tqe->tqe_die > softc->ipf_ticks)
 3683                                 break;
 3684                         tqn = tqe->tqe_next;
 3685                         is = tqe->tqe_parent;
 3686                         ipf_state_del(softc, is, ISL_EXPIRE);
 3687                 }
 3688         }
 3689 
 3690         for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
 3691                 ifqnext = ifq->ifq_next;
 3692 
 3693                 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
 3694                     (ifq->ifq_ref == 0)) {
 3695                         ipf_freetimeoutqueue(softc, ifq);
 3696                 }
 3697         }
 3698 
 3699         if (softs->ipf_state_doflush) {
 3700                 (void) ipf_state_flush(softc, 2, 0);
 3701                 softs->ipf_state_doflush = 0;
 3702                 softs->ipf_state_wm_last = softc->ipf_ticks;
 3703         }
 3704 
 3705         RWLOCK_EXIT(&softc->ipf_state);
 3706         SPL_X(s);
 3707 }
 3708 
 3709 
 3710 /* ------------------------------------------------------------------------ */
 3711 /* Function:    ipf_state_flush                                             */
 3712 /* Returns:     int - 0 == success, -1 == failure                           */
 3713 /* Parameters:  softc(I) - pointer to soft context main structure           */
 3714 /*              which(I) - which flush action to perform                    */
 3715 /*              proto(I) - which protocol to flush (0 == ALL)               */
 3716 /* Write Locks: ipf_state                                                   */
 3717 /*                                                                          */
 3718 /* Flush state tables.  Three actions currently defined:                    */
 3719 /* which == 0 : flush all state table entries                               */
 3720 /* which == 1 : flush TCP connections which have started to close but are   */
 3721 /*            stuck for some reason.                                        */
 3722 /* which == 2 : flush TCP connections which have been idle for a long time, */
 3723 /*            starting at > 4 days idle and working back in successive half-*/
 3724 /*            days to at most 12 hours old.  If this fails to free enough   */
 3725 /*            slots then work backwards in half hour slots to 30 minutes.   */
 3726 /*            If that too fails, then work backwards in 30 second intervals */
 3727 /*            for the last 30 minutes to at worst 30 seconds idle.          */
 3728 /* ------------------------------------------------------------------------ */
 3729 int
 3730 ipf_state_flush(ipf_main_softc_t *softc, int which, int proto)
 3731 {
 3732         ipf_state_softc_t *softs = softc->ipf_state_soft;
 3733         ipftqent_t *tqe, *tqn;
 3734         ipstate_t *is, **isp;
 3735         ipftq_t *ifq;
 3736         int removed;
 3737         SPL_INT(s);
 3738 
 3739         removed = 0;
 3740 
 3741         SPL_NET(s);
 3742 
 3743         switch (which)
 3744         {
 3745         case 0 :
 3746                 SBUMP(ipf_state_stats.iss_flush_all);
 3747                 /*
 3748                  * Style 0 flush removes everything...
 3749                  */
 3750                 for (isp = &softs->ipf_state_list; ((is = *isp) != NULL); ) {
 3751                         if ((proto != 0) && (is->is_v != proto)) {
 3752                                 isp = &is->is_next;
 3753                                 continue;
 3754                         }
 3755                         if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
 3756                                 removed++;
 3757                         else
 3758                                 isp = &is->is_next;
 3759                 }
 3760                 break;
 3761 
 3762         case 1 :
 3763                 SBUMP(ipf_state_stats.iss_flush_closing);
 3764                 /*
 3765                  * Since we're only interested in things that are closing,
 3766                  * we can start with the appropriate timeout queue.
 3767                  */
 3768                 for (ifq = softs->ipf_state_tcptq + IPF_TCPS_CLOSE_WAIT;
 3769                      ifq != NULL; ifq = ifq->ifq_next) {
 3770 
 3771                         for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
 3772                                 tqn = tqe->tqe_next;
 3773                                 is = tqe->tqe_parent;
 3774                                 if (is->is_p != IPPROTO_TCP)
 3775                                         break;
 3776                                 if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
 3777                                         removed++;
 3778                         }
 3779                 }
 3780 
 3781                 /*
 3782                  * Also need to look through the user defined queues.
 3783                  */
 3784                 for (ifq = softs->ipf_state_usertq; ifq != NULL;
 3785                      ifq = ifq->ifq_next) {
 3786                         for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
 3787                                 tqn = tqe->tqe_next;
 3788                                 is = tqe->tqe_parent;
 3789                                 if (is->is_p != IPPROTO_TCP)
 3790                                         continue;
 3791 
 3792                                 if ((is->is_state[0] > IPF_TCPS_ESTABLISHED) &&
 3793                                     (is->is_state[1] > IPF_TCPS_ESTABLISHED)) {
 3794                                         if (ipf_state_del(softc, is,
 3795                                                           ISL_FLUSH) == 0)
 3796                                                 removed++;
 3797                                 }
 3798                         }
 3799                 }
 3800                 break;
 3801 
 3802         case 2 :
 3803                 break;
 3804 
 3805                 /*
 3806                  * Args 5-11 correspond to flushing those particular states
 3807                  * for TCP connections.
 3808                  */
 3809         case IPF_TCPS_CLOSE_WAIT :
 3810         case IPF_TCPS_FIN_WAIT_1 :
 3811         case IPF_TCPS_CLOSING :
 3812         case IPF_TCPS_LAST_ACK :
 3813         case IPF_TCPS_FIN_WAIT_2 :
 3814         case IPF_TCPS_TIME_WAIT :
 3815         case IPF_TCPS_CLOSED :
 3816                 SBUMP(ipf_state_stats.iss_flush_queue);
 3817                 tqn = softs->ipf_state_tcptq[which].ifq_head;
 3818                 while (tqn != NULL) {
 3819                         tqe = tqn;
 3820                         tqn = tqe->tqe_next;
 3821                         is = tqe->tqe_parent;
 3822                         if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
 3823                                 removed++;
 3824                 }
 3825                 break;
 3826 
 3827         default :
 3828                 if (which < 30)
 3829                         break;
 3830 
 3831                 SBUMP(ipf_state_stats.iss_flush_state);
 3832                 /*
 3833                  * Take a large arbitrary number to mean the number of seconds
 3834                  * for which which consider to be the maximum value we'll allow
 3835                  * the expiration to be.
 3836                  */
 3837                 which = IPF_TTLVAL(which);
 3838                 for (isp = &softs->ipf_state_list; ((is = *isp) != NULL); ) {
 3839                         if ((proto == 0) || (is->is_v == proto)) {
 3840                                 if (softc->ipf_ticks - is->is_touched > which) {
 3841                                         if (ipf_state_del(softc, is,
 3842                                                           ISL_FLUSH) == 0) {
 3843                                                 removed++;
 3844                                                 continue;
 3845                                         }
 3846                                 }
 3847                         }
 3848                         isp = &is->is_next;
 3849                 }
 3850                 break;
 3851         }
 3852 
 3853         if (which != 2) {
 3854                 SPL_X(s);
 3855                 return (removed);
 3856         }
 3857 
 3858         SBUMP(ipf_state_stats.iss_flush_timeout);
 3859         /*
 3860          * Asked to remove inactive entries because the table is full, try
 3861          * again, 3 times, if first attempt failed with a different criteria
 3862          * each time.  The order tried in must be in decreasing age.
 3863          * Another alternative is to implement random drop and drop N entries
 3864          * at random until N have been freed up.
 3865          */
 3866         if (softc->ipf_ticks - softs->ipf_state_wm_last >
 3867             softs->ipf_state_wm_freq) {
 3868                 removed = ipf_queueflush(softc, ipf_state_flush_entry,
 3869                                          softs->ipf_state_tcptq,
 3870                                          softs->ipf_state_usertq,
 3871                                          &softs->ipf_state_stats.iss_active,
 3872                                          softs->ipf_state_size,
 3873                                          softs->ipf_state_wm_low);
 3874                 softs->ipf_state_wm_last = softc->ipf_ticks;
 3875         }
 3876 
 3877         SPL_X(s);
 3878         return (removed);
 3879 }
 3880 
 3881 
 3882 /* ------------------------------------------------------------------------ */
 3883 /* Function:    ipf_state_flush_entry                                       */
 3884 /* Returns:     int - 0 = entry deleted, else not deleted                   */
 3885 /* Parameters:  softc(I) - pointer to soft context main structure           */
 3886 /*              entry(I)  - pointer to state structure to delete            */
 3887 /* Write Locks: ipf_state                                                   */
 3888 /*                                                                          */
 3889 /* This function is a stepping stone between ipf_queueflush() and           */
 3890 /* ipf_state_del().  It is used so we can provide a uniform interface via   */
 3891 /* the ipf_queueflush() function.                                           */
 3892 /* ------------------------------------------------------------------------ */
 3893 static int
 3894 ipf_state_flush_entry(ipf_main_softc_t *softc, void *entry)
 3895 {
 3896         return (ipf_state_del(softc, entry, ISL_FLUSH));
 3897 }
 3898 
 3899 
 3900 /* ------------------------------------------------------------------------ */
 3901 /* Function:    ipf_tcp_age                                                 */
 3902 /* Returns:     int - 1 == state transition made, 0 == no change (rejected) */
 3903 /* Parameters:  tqe(I)   - pointer to timeout queue information             */
 3904 /*              fin(I)   - pointer to packet information                    */
 3905 /*              tqtab(I) - TCP timeout queue table this is in               */
 3906 /*              flags(I) - flags from state/NAT entry                       */
 3907 /*              ok(I)    - can we advance state                             */
 3908 /*                                                                          */
 3909 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29:          */
 3910 /*                                                                          */
 3911 /* - (try to) base state transitions on real evidence only,                 */
 3912 /*   i.e. packets that are sent and have been received by ipfilter;         */
 3913 /*   diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used.       */
 3914 /*                                                                          */
 3915 /* - deal with half-closed connections correctly;                           */
 3916 /*                                                                          */
 3917 /* - store the state of the source in state[0] such that ipfstat            */
 3918 /*   displays the state as source/dest instead of dest/source; the calls    */
 3919 /*   to ipf_tcp_age have been changed accordingly.                          */
 3920 /*                                                                          */
 3921 /* Internal Parameters:                                                     */
 3922 /*                                                                          */
 3923 /*    state[0] = state of source (host that initiated connection)           */
 3924 /*    state[1] = state of dest   (host that accepted the connection)        */
 3925 /*                                                                          */
 3926 /*    dir == 0 : a packet from source to dest                               */
 3927 /*    dir == 1 : a packet from dest to source                               */
 3928 /*                                                                          */
 3929 /* A typical procession for a connection is as follows:                     */
 3930 /*                                                                          */
 3931 /* +--------------+-------------------+                                     */
 3932 /* | Side ''     | Side '1'          |                                     */
 3933 /* +--------------+-------------------+                                     */
 3934 /* | 0 -> 1 (SYN) |                   |                                     */
 3935 /* |              | 0 -> 2 (SYN-ACK)  |                                     */
 3936 /* | 1 -> 3 (ACK) |                   |                                     */
 3937 /* |              | 2 -> 4 (ACK-PUSH) |                                     */
 3938 /* | 3 -> 4 (ACK) |                   |                                     */
 3939 /* |   ...        |   ...             |                                     */
 3940 /* |              | 4 -> 6 (FIN-ACK)  |                                     */
 3941 /* | 4 -> 5 (ACK) |                   |                                     */
 3942 /* |              | 6 -> 6 (ACK-PUSH) |                                     */
 3943 /* | 5 -> 5 (ACK) |                   |                                     */
 3944 /* | 5 -> 8 (FIN) |                   |                                     */
 3945 /* |              | 6 -> 10 (ACK)     |                                     */
 3946 /* +--------------+-------------------+                                     */
 3947 /*                                                                          */
 3948 /* Locking: it is assumed that the parent of the tqe structure is locked.   */
 3949 /* ------------------------------------------------------------------------ */
 3950 int
 3951 ipf_tcp_age(ipftqent_t *tqe, fr_info_t *fin, ipftq_t *tqtab, int flags, int ok)
 3952 {
 3953         ipf_main_softc_t *softc = fin->fin_main_soft;
 3954         int dlen, ostate, nstate, rval, dir;
 3955         u_char tcpflags;
 3956         tcphdr_t *tcp;
 3957 
 3958         tcp = fin->fin_dp;
 3959 
 3960         rval = 0;
 3961         dir = fin->fin_rev;
 3962         tcpflags = tcp->th_flags;
 3963         dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2);
 3964         ostate = tqe->tqe_state[1 - dir];
 3965         nstate = tqe->tqe_state[dir];
 3966 
 3967         if (tcpflags & TH_RST) {
 3968                 if (!(tcpflags & TH_PUSH) && !dlen)
 3969                         nstate = IPF_TCPS_CLOSED;
 3970                 else
 3971                         nstate = IPF_TCPS_CLOSE_WAIT;
 3972 
 3973                 if (ostate <= IPF_TCPS_ESTABLISHED) {
 3974                         tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT;
 3975                 }
 3976                 rval = 1;
 3977         } else {
 3978                 switch (nstate)
 3979                 {
 3980                 case IPF_TCPS_LISTEN: /* 0 */
 3981                         if ((tcpflags & TH_OPENING) == TH_OPENING) {
 3982                                 /*
 3983                                  * 'dir' received an S and sends SA in
 3984                                  * response, LISTEN -> SYN_RECEIVED
 3985                                  */
 3986                                 nstate = IPF_TCPS_SYN_RECEIVED;
 3987                                 rval = 1;
 3988                         } else if ((tcpflags & TH_OPENING) == TH_SYN) {
 3989                                 /* 'dir' sent S, LISTEN -> SYN_SENT */
 3990                                 nstate = IPF_TCPS_SYN_SENT;
 3991                                 rval = 1;
 3992                         }
 3993                         /*
 3994                          * the next piece of code makes it possible to get
 3995                          * already established connections into the state table
 3996                          * after a restart or reload of the filter rules; this
 3997                          * does not work when a strict 'flags S keep state' is
 3998                          * used for tcp connections of course
 3999                          */
 4000                         if (((flags & IS_TCPFSM) == 0) &&
 4001                             ((tcpflags & TH_ACKMASK) == TH_ACK)) {
 4002                                 /*
 4003                                  * we saw an A, guess 'dir' is in ESTABLISHED
 4004                                  * mode
 4005                                  */
 4006                                 switch (ostate)
 4007                                 {
 4008                                 case IPF_TCPS_LISTEN :
 4009                                 case IPF_TCPS_SYN_RECEIVED :
 4010                                         nstate = IPF_TCPS_HALF_ESTAB;
 4011                                         rval = 1;
 4012                                         break;
 4013                                 case IPF_TCPS_HALF_ESTAB :
 4014                                 case IPF_TCPS_ESTABLISHED :
 4015                                         nstate = IPF_TCPS_ESTABLISHED;
 4016                                         rval = 1;
 4017                                         break;
 4018                                 default :
 4019                                         break;
 4020                                 }
 4021                         }
 4022                         /*
 4023                          * TODO: besides regular ACK packets we can have other
 4024                          * packets as well; it is yet to be determined how we
 4025                          * should initialize the states in those cases
 4026                          */
 4027                         break;
 4028 
 4029                 case IPF_TCPS_SYN_SENT: /* 1 */
 4030                         if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) {
 4031                                 /*
 4032                                  * A retransmitted SYN packet.  We do not reset
 4033                                  * the timeout here to ipf_tcptimeout because a
 4034                                  * connection connect timeout does not renew
 4035                                  * after every packet that is sent.  We need to
 4036                                  * set rval so as to indicate the packet has
 4037                                  * passed the check for its flags being valid
 4038                                  * in the TCP FSM.  Setting rval to 2 has the
 4039                                  * result of not resetting the timeout.
 4040                                  */
 4041                                 rval = 2;
 4042                         } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) ==
 4043                                    TH_ACK) {
 4044                                 /*
 4045                                  * we see an A from 'dir' which is in SYN_SENT
 4046                                  * state: 'dir' sent an A in response to an SA
 4047                                  * which it received, SYN_SENT -> ESTABLISHED
 4048                                  */
 4049                                 nstate = IPF_TCPS_ESTABLISHED;
 4050                                 rval = 1;
 4051                         } else if (tcpflags & TH_FIN) {
 4052                                 /*
 4053                                  * we see an F from 'dir' which is in SYN_SENT
 4054                                  * state and wants to close its side of the
 4055                                  * connection; SYN_SENT -> FIN_WAIT_1
 4056                                  */
 4057                                 nstate = IPF_TCPS_FIN_WAIT_1;
 4058                                 rval = 1;
 4059                         } else if ((tcpflags & TH_OPENING) == TH_OPENING) {
 4060                                 /*
 4061                                  * we see an SA from 'dir' which is already in
 4062                                  * SYN_SENT state, this means we have a
 4063                                  * simultaneous open; SYN_SENT -> SYN_RECEIVED
 4064                                  */
 4065                                 nstate = IPF_TCPS_SYN_RECEIVED;
 4066                                 rval = 1;
 4067                         }
 4068                         break;
 4069 
 4070                 case IPF_TCPS_SYN_RECEIVED: /* 2 */
 4071                         if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
 4072                                 /*
 4073                                  * we see an A from 'dir' which was in
 4074                                  * SYN_RECEIVED state so it must now be in
 4075                                  * established state, SYN_RECEIVED ->
 4076                                  * ESTABLISHED
 4077                                  */
 4078                                 nstate = IPF_TCPS_ESTABLISHED;
 4079                                 rval = 1;
 4080                         } else if ((tcpflags & ~(TH_ECN|TH_CWR)) ==
 4081                                    TH_OPENING) {
 4082                                 /*
 4083                                  * We see an SA from 'dir' which is already in
 4084                                  * SYN_RECEIVED state.
 4085                                  */
 4086                                 rval = 2;
 4087                         } else if (tcpflags & TH_FIN) {
 4088                                 /*
 4089                                  * we see an F from 'dir' which is in
 4090                                  * SYN_RECEIVED state and wants to close its
 4091                                  * side of the connection; SYN_RECEIVED ->
 4092                                  * FIN_WAIT_1
 4093                                  */
 4094                                 nstate = IPF_TCPS_FIN_WAIT_1;
 4095                                 rval = 1;
 4096                         }
 4097                         break;
 4098 
 4099                 case IPF_TCPS_HALF_ESTAB: /* 3 */
 4100                         if (tcpflags & TH_FIN) {
 4101                                 nstate = IPF_TCPS_FIN_WAIT_1;
 4102                                 rval = 1;
 4103                         } else if ((tcpflags & TH_ACKMASK) == TH_ACK) {
 4104                                 /*
 4105                                  * If we've picked up a connection in mid
 4106                                  * flight, we could be looking at a follow on
 4107                                  * packet from the same direction as the one
 4108                                  * that created this state.  Recognise it but
 4109                                  * do not advance the entire connection's
 4110                                  * state.
 4111                                  */
 4112                                 switch (ostate)
 4113                                 {
 4114                                 case IPF_TCPS_LISTEN :
 4115                                 case IPF_TCPS_SYN_SENT :
 4116                                 case IPF_TCPS_SYN_RECEIVED :
 4117                                         rval = 1;
 4118                                         break;
 4119                                 case IPF_TCPS_HALF_ESTAB :
 4120                                 case IPF_TCPS_ESTABLISHED :
 4121                                         nstate = IPF_TCPS_ESTABLISHED;
 4122                                         rval = 1;
 4123                                         break;
 4124                                 default :
 4125                                         break;
 4126                                 }
 4127                         }
 4128                         break;
 4129 
 4130                 case IPF_TCPS_ESTABLISHED: /* 4 */
 4131                         rval = 1;
 4132                         if (tcpflags & TH_FIN) {
 4133                                 /*
 4134                                  * 'dir' closed its side of the connection;
 4135                                  * this gives us a half-closed connection;
 4136                                  * ESTABLISHED -> FIN_WAIT_1
 4137                                  */
 4138                                 if (ostate == IPF_TCPS_FIN_WAIT_1) {
 4139                                         nstate = IPF_TCPS_CLOSING;
 4140                                 } else {
 4141                                         nstate = IPF_TCPS_FIN_WAIT_1;
 4142                                 }
 4143                         } else if (tcpflags & TH_ACK) {
 4144                                 /*
 4145                                  * an ACK, should we exclude other flags here?
 4146                                  */
 4147                                 if (ostate == IPF_TCPS_FIN_WAIT_1) {
 4148                                         /*
 4149                                          * We know the other side did an active
 4150                                          * close, so we are ACKing the recvd
 4151                                          * FIN packet (does the window matching
 4152                                          * code guarantee this?) and go into
 4153                                          * CLOSE_WAIT state; this gives us a
 4154                                          * half-closed connection
 4155                                          */
 4156                                         nstate = IPF_TCPS_CLOSE_WAIT;
 4157                                 } else if (ostate < IPF_TCPS_CLOSE_WAIT) {
 4158                                         /*
 4159                                          * still a fully established
 4160                                          * connection reset timeout
 4161                                          */
 4162                                         nstate = IPF_TCPS_ESTABLISHED;
 4163                                 }
 4164                         }
 4165                         break;
 4166 
 4167                 case IPF_TCPS_CLOSE_WAIT: /* 5 */
 4168                         rval = 1;
 4169                         if (tcpflags & TH_FIN) {
 4170                                 /*
 4171                                  * application closed and 'dir' sent a FIN,
 4172                                  * we're now going into LAST_ACK state
 4173                                  */
 4174                                 nstate = IPF_TCPS_LAST_ACK;
 4175                         } else {
 4176                                 /*
 4177                                  * we remain in CLOSE_WAIT because the other
 4178                                  * side has closed already and we did not
 4179                                  * close our side yet; reset timeout
 4180                                  */
 4181                                 nstate = IPF_TCPS_CLOSE_WAIT;
 4182                         }
 4183                         break;
 4184 
 4185                 case IPF_TCPS_FIN_WAIT_1: /* 6 */
 4186                         rval = 1;
 4187                         if ((tcpflags & TH_ACK) &&
 4188                             ostate > IPF_TCPS_CLOSE_WAIT) {
 4189                                 /*
 4190                                  * if the other side is not active anymore
 4191                                  * it has sent us a FIN packet that we are
 4192                                  * ack'ing now with an ACK; this means both
 4193                                  * sides have now closed the connection and
 4194                                  * we go into TIME_WAIT
 4195                                  */
 4196                                 /*
 4197                                  * XXX: how do we know we really are ACKing
 4198                                  * the FIN packet here? does the window code
 4199                                  * guarantee that?
 4200                                  */
 4201                                 nstate = IPF_TCPS_LAST_ACK;
 4202                         } else {
 4203                                 /*
 4204                                  * we closed our side of the connection
 4205                                  * already but the other side is still active
 4206                                  * (ESTABLISHED/CLOSE_WAIT); continue with
 4207                                  * this half-closed connection
 4208                                  */
 4209                                 nstate = IPF_TCPS_FIN_WAIT_1;
 4210                         }
 4211                         break;
 4212 
 4213                 case IPF_TCPS_CLOSING: /* 7 */
 4214                         if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) {
 4215                                 nstate = IPF_TCPS_TIME_WAIT;
 4216                         }
 4217                         rval = 1;
 4218                         break;
 4219 
 4220                 case IPF_TCPS_LAST_ACK: /* 8 */
 4221                         if (tcpflags & TH_ACK) {
 4222                                 rval = 1;
 4223                         }
 4224                         /*
 4225                          * we cannot detect when we go out of LAST_ACK state
 4226                          * to CLOSED because that is based on the reception
 4227                          * of ACK packets; ipfilter can only detect that a
 4228                          * packet has been sent by a host
 4229                          */
 4230                         break;
 4231 
 4232                 case IPF_TCPS_FIN_WAIT_2: /* 9 */
 4233                         /* NOT USED */
 4234                         break;
 4235 
 4236                 case IPF_TCPS_TIME_WAIT: /* 10 */
 4237                         /* we're in 2MSL timeout now */
 4238                         if (ostate == IPF_TCPS_LAST_ACK) {
 4239                                 nstate = IPF_TCPS_CLOSED;
 4240                                 rval = 1;
 4241                         } else {
 4242                                 rval = 2;
 4243                         }
 4244                         break;
 4245 
 4246                 case IPF_TCPS_CLOSED: /* 11 */
 4247                         rval = 2;
 4248                         break;
 4249 
 4250                 default :
 4251 #if !defined(_KERNEL)
 4252                         abort();
 4253 #endif
 4254                         break;
 4255                 }
 4256         }
 4257 
 4258         /*
 4259          * If rval == 2 then do not update the queue position, but treat the
 4260          * packet as being ok.
 4261          */
 4262         if (rval == 2)
 4263                 rval = 1;
 4264         else if (rval == 1) {
 4265                 if (ok)
 4266                         tqe->tqe_state[dir] = nstate;
 4267                 if ((tqe->tqe_flags & TQE_RULEBASED) == 0)
 4268                         ipf_movequeue(softc->ipf_ticks, tqe, tqe->tqe_ifq,
 4269                                       tqtab + nstate);
 4270         }
 4271 
 4272         return (rval);
 4273 }
 4274 
 4275 
 4276 /* ------------------------------------------------------------------------ */
 4277 /* Function:    ipf_state_log                                               */
 4278 /* Returns:     Nil                                                         */
 4279 /* Parameters:  softc(I) - pointer to soft context main structure           */
 4280 /*              is(I)    - pointer to state structure                       */
 4281 /*              type(I)  - type of log entry to create                      */
 4282 /*                                                                          */
 4283 /* Creates a state table log entry using the state structure and type info. */
 4284 /* passed in.  Log packet/byte counts, source/destination address and other */
 4285 /* protocol specific information.                                           */
 4286 /* ------------------------------------------------------------------------ */
 4287 void
 4288 ipf_state_log(ipf_main_softc_t *softc, struct ipstate *is, u_int type)
 4289 {
 4290 #ifdef  IPFILTER_LOG
 4291         struct  ipslog  ipsl;
 4292         size_t sizes[1];
 4293         void *items[1];
 4294         int types[1];
 4295 
 4296         /*
 4297          * Copy information out of the ipstate_t structure and into the
 4298          * structure used for logging.
 4299          */
 4300         ipsl.isl_type = type;
 4301         ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0];
 4302         ipsl.isl_bytes[0] = is->is_bytes[0];
 4303         ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1];
 4304         ipsl.isl_bytes[1] = is->is_bytes[1];
 4305         ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2];
 4306         ipsl.isl_bytes[2] = is->is_bytes[2];
 4307         ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3];
 4308         ipsl.isl_bytes[3] = is->is_bytes[3];
 4309         ipsl.isl_src = is->is_src;
 4310         ipsl.isl_dst = is->is_dst;
 4311         ipsl.isl_p = is->is_p;
 4312         ipsl.isl_v = is->is_v;
 4313         ipsl.isl_flags = is->is_flags;
 4314         ipsl.isl_tag = is->is_tag;
 4315         ipsl.isl_rulen = is->is_rulen;
 4316         (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN);
 4317 
 4318         if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) {
 4319                 ipsl.isl_sport = is->is_sport;
 4320                 ipsl.isl_dport = is->is_dport;
 4321                 if (ipsl.isl_p == IPPROTO_TCP) {
 4322                         ipsl.isl_state[0] = is->is_state[0];
 4323                         ipsl.isl_state[1] = is->is_state[1];
 4324                 }
 4325         } else if (ipsl.isl_p == IPPROTO_ICMP) {
 4326                 ipsl.isl_itype = is->is_icmp.ici_type;
 4327         } else if (ipsl.isl_p == IPPROTO_ICMPV6) {
 4328                 ipsl.isl_itype = is->is_icmp.ici_type;
 4329         } else {
 4330                 ipsl.isl_ps.isl_filler[0] = 0;
 4331                 ipsl.isl_ps.isl_filler[1] = 0;
 4332         }
 4333 
 4334         items[0] = &ipsl;
 4335         sizes[0] = sizeof(ipsl);
 4336         types[0] = 0;
 4337 
 4338         (void) ipf_log_items(softc, IPL_LOGSTATE, NULL, items, sizes, types, 1);
 4339 #endif
 4340 }
 4341 
 4342 
 4343 #ifdef  USE_INET6
 4344 /* ------------------------------------------------------------------------ */
 4345 /* Function:    ipf_checkicmp6matchingstate                                 */
 4346 /* Returns:     ipstate_t* - NULL == no match found,                        */
 4347 /*                           else  pointer to matching state entry          */
 4348 /* Parameters:  fin(I) - pointer to packet information                      */
 4349 /* Locks:       NULL == no locks, else Read Lock on ipf_state               */
 4350 /*                                                                          */
 4351 /* If we've got an ICMPv6 error message, using the information stored in    */
 4352 /* the ICMPv6 packet, look for a matching state table entry.                */
 4353 /* ------------------------------------------------------------------------ */
 4354 static ipstate_t *
 4355 ipf_checkicmp6matchingstate(fr_info_t *fin)
 4356 {
 4357         ipf_main_softc_t *softc = fin->fin_main_soft;
 4358         ipf_state_softc_t *softs = softc->ipf_state_soft;
 4359         struct icmp6_hdr *ic6, *oic;
 4360         ipstate_t *is, **isp;
 4361         u_short sport, dport;
 4362         i6addr_t dst, src;
 4363         u_short savelen;
 4364         icmpinfo_t *ic;
 4365         fr_info_t ofin;
 4366         tcphdr_t *tcp;
 4367         ip6_t *oip6;
 4368         u_char pr;
 4369         u_int hv;
 4370         int type;
 4371 
 4372         /*
 4373          * Does it at least have the return (basic) IP header ?
 4374          * Is it an actual recognised ICMP error type?
 4375          * Only a basic IP header (no options) should be with
 4376          * an ICMP error header.
 4377          */
 4378         if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) ||
 4379             !(fin->fin_flx & FI_ICMPERR)) {
 4380                 SBUMPD(ipf_state_stats, iss_icmp_bad);
 4381                 return (NULL);
 4382         }
 4383 
 4384         ic6 = fin->fin_dp;
 4385         type = ic6->icmp6_type;
 4386 
 4387         oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN);
 4388         if (fin->fin_plen < sizeof(*oip6)) {
 4389                 SBUMPD(ipf_state_stats, iss_icmp_short);
 4390                 return (NULL);
 4391         }
 4392 
 4393         bcopy((char *)fin, (char *)&ofin, sizeof(*fin));
 4394         ofin.fin_v = 6;
 4395         ofin.fin_ifp = fin->fin_ifp;
 4396         ofin.fin_out = !fin->fin_out;
 4397         ofin.fin_m = NULL;      /* if dereferenced, panic XXX */
 4398         ofin.fin_mp = NULL;     /* if dereferenced, panic XXX */
 4399 
 4400         /*
 4401          * We make a fin entry to be able to feed it to
 4402          * matchsrcdst. Note that not all fields are necessary
 4403          * but this is the cleanest way. Note further we fill
 4404          * in fin_mp such that if someone uses it we'll get
 4405          * a kernel panic. ipf_matchsrcdst does not use this.
 4406          *
 4407          * watch out here, as ip is in host order and oip6 in network
 4408          * order. Any change we make must be undone afterwards.
 4409          */
 4410         savelen = oip6->ip6_plen;
 4411         oip6->ip6_plen = htons(fin->fin_dlen - ICMPERR_ICMPHLEN);
 4412         ofin.fin_flx = FI_NOCKSUM;
 4413         ofin.fin_ip = (ip_t *)oip6;
 4414         (void) ipf_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin);
 4415         ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
 4416         oip6->ip6_plen = savelen;
 4417         pr = ofin.fin_p;
 4418 
 4419         /*
 4420          * an ICMP error can never generate an ICMP error in response.
 4421          */
 4422         if (ofin.fin_flx & FI_ICMPERR) {
 4423                 DT1(iss_icmp6_icmperr, fr_info_t *, &ofin);
 4424                 SBUMP(ipf_state_stats.iss_icmp6_icmperr);
 4425                 return (NULL);
 4426         }
 4427 
 4428         if (oip6->ip6_nxt == IPPROTO_ICMPV6) {
 4429                 oic = ofin.fin_dp;
 4430                 /*
 4431                  * an ICMP error can only be generated as a result of an
 4432                  * ICMP query, not as the response on an ICMP error
 4433                  *
 4434                  * XXX theoretically ICMP_ECHOREP and the other reply's are
 4435                  * ICMP query's as well, but adding them here seems strange XXX
 4436                  */
 4437                  if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) {
 4438                         DT1(iss_icmp6_notinfo, fr_info_t *, &ofin);
 4439                         SBUMP(ipf_state_stats.iss_icmp6_notinfo);
 4440                         return (NULL);
 4441                 }
 4442 
 4443                 /*
 4444                  * perform a lookup of the ICMP packet in the state table
 4445                  */
 4446                 hv = (pr = oip6->ip6_nxt);
 4447                 src.in6 = oip6->ip6_src;
 4448                 hv += src.in4.s_addr;
 4449                 dst.in6 = oip6->ip6_dst;
 4450                 hv += dst.in4.s_addr;
 4451                 hv += oic->icmp6_id;
 4452                 hv += oic->icmp6_seq;
 4453                 hv = DOUBLE_HASH(hv);
 4454 
 4455                 READ_ENTER(&softc->ipf_state);
 4456                 for (isp = &softs->ipf_state_table[hv];
 4457                      ((is = *isp) != NULL); ) {
 4458                         ic = &is->is_icmp;
 4459                         isp = &is->is_hnext;
 4460                         if ((is->is_p == pr) &&
 4461                             !(is->is_pass & FR_NOICMPERR) &&
 4462                             (oic->icmp6_id == ic->ici_id) &&
 4463                             (oic->icmp6_seq == ic->ici_seq) &&
 4464                             (is = ipf_matchsrcdst(&ofin, is, &src,
 4465                                                  &dst, NULL, FI_ICMPCMP))) {
 4466                                 /*
 4467                                  * in the state table ICMP query's are stored
 4468                                  * with the type of the corresponding ICMP
 4469                                  * response. Correct here
 4470                                  */
 4471                                 if (((ic->ici_type == ICMP6_ECHO_REPLY) &&
 4472                                      (oic->icmp6_type == ICMP6_ECHO_REQUEST)) ||
 4473                                      (ic->ici_type - 1 == oic->icmp6_type )) {
 4474                                         if (!ipf_allowstateicmp(fin, is, &src))
 4475                                                 return (is);
 4476                                 }
 4477                         }
 4478                 }
 4479                 RWLOCK_EXIT(&softc->ipf_state);
 4480                 SBUMPD(ipf_state_stats, iss_icmp6_miss);
 4481                 return (NULL);
 4482         }
 4483 
 4484         hv = (pr = oip6->ip6_nxt);
 4485         src.in6 = oip6->ip6_src;
 4486         hv += src.i6[0];
 4487         hv += src.i6[1];
 4488         hv += src.i6[2];
 4489         hv += src.i6[3];
 4490         dst.in6 = oip6->ip6_dst;
 4491         hv += dst.i6[0];
 4492         hv += dst.i6[1];
 4493         hv += dst.i6[2];
 4494         hv += dst.i6[3];
 4495 
 4496         tcp = NULL;
 4497 
 4498         switch (oip6->ip6_nxt)
 4499         {
 4500         case IPPROTO_TCP :
 4501         case IPPROTO_UDP :
 4502                 tcp = (tcphdr_t *)(oip6 + 1);
 4503                 dport = tcp->th_dport;
 4504                 sport = tcp->th_sport;
 4505                 hv += dport;
 4506                 hv += sport;
 4507                 break;
 4508 
 4509         case IPPROTO_ICMPV6 :
 4510                 oic = (struct icmp6_hdr *)(oip6 + 1);
 4511                 hv += oic->icmp6_id;
 4512                 hv += oic->icmp6_seq;
 4513                 break;
 4514 
 4515         default :
 4516                 break;
 4517         }
 4518 
 4519         hv = DOUBLE_HASH(hv);
 4520 
 4521         READ_ENTER(&softc->ipf_state);
 4522         for (isp = &softs->ipf_state_table[hv]; ((is = *isp) != NULL); ) {
 4523                 isp = &is->is_hnext;
 4524                 /*
 4525                  * Only allow this icmp though if the
 4526                  * encapsulated packet was allowed through the
 4527                  * other way around. Note that the minimal amount
 4528                  * of info present does not allow for checking against
 4529                  * tcp internals such as seq and ack numbers.
 4530                  */
 4531                 if ((is->is_p != pr) || (is->is_v != 6) ||
 4532                     (is->is_pass & FR_NOICMPERR))
 4533                         continue;
 4534                 is = ipf_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP);
 4535                 if ((is != NULL) && (ipf_allowstateicmp(fin, is, &src) == 0))
 4536                         return (is);
 4537         }
 4538         RWLOCK_EXIT(&softc->ipf_state);
 4539         SBUMPD(ipf_state_stats, iss_icmp_miss);
 4540         return (NULL);
 4541 }
 4542 #endif
 4543 
 4544 
 4545 /* ------------------------------------------------------------------------ */
 4546 /* Function:    ipf_sttab_init                                              */
 4547 /* Returns:     Nil                                                         */
 4548 /* Parameters:  softc(I) - pointer to soft context main structure           */
 4549 /*              tqp(I)   - pointer to an array of timeout queues for TCP    */
 4550 /*                                                                          */
 4551 /* Initialise the array of timeout queues for TCP.                          */
 4552 /* ------------------------------------------------------------------------ */
 4553 void
 4554 ipf_sttab_init(softc, tqp)
 4555         ipf_main_softc_t *softc;
 4556         ipftq_t *tqp;
 4557 {
 4558         int i;
 4559 
 4560         for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) {
 4561                 IPFTQ_INIT(&tqp[i], 0, "ipftq tcp tab");
 4562                 tqp[i].ifq_next = tqp + i + 1;
 4563         }
 4564         tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL;
 4565         tqp[IPF_TCPS_CLOSED].ifq_ttl = softc->ipf_tcpclosed;
 4566         tqp[IPF_TCPS_LISTEN].ifq_ttl = softc->ipf_tcptimeout;
 4567         tqp[IPF_TCPS_SYN_SENT].ifq_ttl = softc->ipf_tcpsynsent;
 4568         tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = softc->ipf_tcpsynrecv;
 4569         tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = softc->ipf_tcpidletimeout;
 4570         tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = softc->ipf_tcphalfclosed;
 4571         tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = softc->ipf_tcphalfclosed;
 4572         tqp[IPF_TCPS_CLOSING].ifq_ttl = softc->ipf_tcptimeout;
 4573         tqp[IPF_TCPS_LAST_ACK].ifq_ttl = softc->ipf_tcplastack;
 4574         tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = softc->ipf_tcpclosewait;
 4575         tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = softc->ipf_tcptimewait;
 4576         tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = softc->ipf_tcptimeout;
 4577 }
 4578 
 4579 
 4580 /* ------------------------------------------------------------------------ */
 4581 /* Function:    ipf_sttab_destroy                                           */
 4582 /* Returns:     Nil                                                         */
 4583 /* Parameters:  tqp(I) - pointer to an array of timeout queues for TCP      */
 4584 /*                                                                          */
 4585 /* Do whatever is necessary to "destroy" each of the entries in the array   */
 4586 /* of timeout queues for TCP.                                               */
 4587 /* ------------------------------------------------------------------------ */
 4588 void
 4589 ipf_sttab_destroy(ipftq_t *tqp)
 4590 {
 4591         int i;
 4592 
 4593         for (i = IPF_TCP_NSTATES - 1; i >= 0; i--)
 4594                 MUTEX_DESTROY(&tqp[i].ifq_lock);
 4595 }
 4596 
 4597 
 4598 /* ------------------------------------------------------------------------ */
 4599 /* Function:    ipf_state_deref                                             */
 4600 /* Returns:     Nil                                                         */
 4601 /* Parameters:  softc(I) - pointer to soft context main structure           */
 4602 /*              isp(I) - pointer to pointer to state table entry            */
 4603 /*                                                                          */
 4604 /* Decrement the reference counter for this state table entry and free it   */
 4605 /* if there are no more things using it.                                    */
 4606 /*                                                                          */
 4607 /* This function is only called when cleaning up after increasing is_ref by */
 4608 /* one earlier in the 'code path' so if is_ref is 1 when entering, we do    */
 4609 /* have an orphan, otherwise not.  However there is a possible race between */
 4610 /* the entry being deleted via flushing with an ioctl call (that calls the  */
 4611 /* delete function directly) and the tail end of packet processing so we    */
 4612 /* need to grab is_lock before doing the check to synchronise the two code  */
 4613 /* paths.                                                                   */
 4614 /*                                                                          */
 4615 /* When operating in userland (ipftest), we have no timers to clear a state */
 4616 /* entry.  Therefore, we make a few simple tests before deleting an entry   */
 4617 /* outright.  We compare states on each side looking for a combination of   */
 4618 /* TIME_WAIT (should really be FIN_WAIT_2?) and LAST_ACK.  Then we factor   */
 4619 /* in packet direction with the interface list to make sure we don't        */
 4620 /* prematurely delete an entry on a final inbound packet that's we're also  */
 4621 /* supposed to route elsewhere.                                             */
 4622 /*                                                                          */
 4623 /* Internal parameters:                                                     */
 4624 /*    state[0] = state of source (host that initiated connection)           */
 4625 /*    state[1] = state of dest   (host that accepted the connection)        */
 4626 /*                                                                          */
 4627 /*    dir == 0 : a packet from source to dest                               */
 4628 /*    dir == 1 : a packet from dest to source                               */
 4629 /* ------------------------------------------------------------------------ */
 4630 void
 4631 ipf_state_deref(ipf_main_softc_t *softc, ipstate_t **isp)
 4632 {
 4633         ipstate_t *is = *isp;
 4634 
 4635         is = *isp;
 4636         *isp = NULL;
 4637 
 4638         MUTEX_ENTER(&is->is_lock);
 4639         if (is->is_ref > 1) {
 4640                 is->is_ref--;
 4641                 MUTEX_EXIT(&is->is_lock);
 4642 #ifndef _KERNEL
 4643                 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) ||
 4644                     (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) {
 4645                         ipf_state_del(softc, is, ISL_EXPIRE);
 4646                 }
 4647 #endif
 4648                 return;
 4649         }
 4650         MUTEX_EXIT(&is->is_lock);
 4651 
 4652         WRITE_ENTER(&softc->ipf_state);
 4653         ipf_state_del(softc, is, ISL_ORPHAN);
 4654         RWLOCK_EXIT(&softc->ipf_state);
 4655 }
 4656 
 4657 
 4658 /* ------------------------------------------------------------------------ */
 4659 /* Function:    ipf_state_setqueue                                          */
 4660 /* Returns:     Nil                                                         */
 4661 /* Parameters:  softc(I) - pointer to soft context main structure           */
 4662 /*              is(I)    - pointer to state structure                       */
 4663 /*              rev(I)   - forward(0) or reverse(1) direction               */
 4664 /* Locks:       ipf_state (read or write)                                   */
 4665 /*                                                                          */
 4666 /* Put the state entry on its default queue entry, using rev as a helped in */
 4667 /* determining which queue it should be placed on.                          */
 4668 /* ------------------------------------------------------------------------ */
 4669 void
 4670 ipf_state_setqueue(ipf_main_softc_t *softc, ipstate_t *is, int rev)
 4671 {
 4672         ipf_state_softc_t *softs = softc->ipf_state_soft;
 4673         ipftq_t *oifq, *nifq;
 4674 
 4675         if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0)
 4676                 nifq = is->is_tqehead[rev];
 4677         else
 4678                 nifq = NULL;
 4679 
 4680         if (nifq == NULL) {
 4681                 switch (is->is_p)
 4682                 {
 4683 #ifdef USE_INET6
 4684                 case IPPROTO_ICMPV6 :
 4685                         if (rev == 1)
 4686                                 nifq = &softs->ipf_state_icmpacktq;
 4687                         else
 4688                                 nifq = &softs->ipf_state_icmptq;
 4689                         break;
 4690 #endif
 4691                 case IPPROTO_ICMP :
 4692                         if (rev == 1)
 4693                                 nifq = &softs->ipf_state_icmpacktq;
 4694                         else
 4695                                 nifq = &softs->ipf_state_icmptq;
 4696                         break;
 4697                 case IPPROTO_TCP :
 4698                         nifq = softs->ipf_state_tcptq + is->is_state[rev];
 4699                         break;
 4700 
 4701                 case IPPROTO_UDP :
 4702                         if (rev == 1)
 4703                                 nifq = &softs->ipf_state_udpacktq;
 4704                         else
 4705                                 nifq = &softs->ipf_state_udptq;
 4706                         break;
 4707 
 4708                 default :
 4709                         nifq = &softs->ipf_state_iptq;
 4710                         break;
 4711                 }
 4712         }
 4713 
 4714         oifq = is->is_sti.tqe_ifq;
 4715         /*
 4716          * If it's currently on a timeout queue, move it from one queue to
 4717          * another, else put it on the end of the newly determined queue.
 4718          */
 4719         if (oifq != NULL)
 4720                 ipf_movequeue(softc->ipf_ticks, &is->is_sti, oifq, nifq);
 4721         else
 4722                 ipf_queueappend(softc->ipf_ticks, &is->is_sti, nifq, is);
 4723         return;
 4724 }
 4725 
 4726 
 4727 /* ------------------------------------------------------------------------ */
 4728 /* Function:    ipf_state_iter                                              */
 4729 /* Returns:     int - 0 == success, else error                              */
 4730 /* Parameters:  softc(I) - pointer to main soft context                     */
 4731 /*              token(I) - pointer to ipftoken structure                    */
 4732 /*              itp(I)   - pointer to ipfgeniter structure                  */
 4733 /*              obj(I)   - pointer to data description structure            */
 4734 /*                                                                          */
 4735 /* This function handles the SIOCGENITER ioctl for the state tables and     */
 4736 /* walks through the list of entries in the state table list (softs->ipf_state_list.)    */
 4737 /* ------------------------------------------------------------------------ */
 4738 static int
 4739 ipf_state_iter(ipf_main_softc_t *softc, ipftoken_t *token, ipfgeniter_t *itp,
 4740         ipfobj_t *obj)
 4741 {
 4742         ipf_state_softc_t *softs = softc->ipf_state_soft;
 4743         ipstate_t *is, *next, zero;
 4744         int error;
 4745 
 4746         if (itp->igi_data == NULL) {
 4747                 IPFERROR(100026);
 4748                 return (EFAULT);
 4749         }
 4750 
 4751         if (itp->igi_nitems < 1) {
 4752                 IPFERROR(100027);
 4753                 return (ENOSPC);
 4754         }
 4755 
 4756         if (itp->igi_type != IPFGENITER_STATE) {
 4757                 IPFERROR(100028);
 4758                 return (EINVAL);
 4759         }
 4760 
 4761         is = token->ipt_data;
 4762         if (is == (void *)-1) {
 4763                 IPFERROR(100029);
 4764                 return (ESRCH);
 4765         }
 4766 
 4767         error = 0;
 4768         obj->ipfo_type = IPFOBJ_IPSTATE;
 4769         obj->ipfo_size = sizeof(ipstate_t);
 4770 
 4771         READ_ENTER(&softc->ipf_state);
 4772 
 4773         is = token->ipt_data;
 4774         if (is == NULL) {
 4775                 next = softs->ipf_state_list;
 4776         } else {
 4777                 next = is->is_next;
 4778         }
 4779 
 4780         /*
 4781          * If we find a state entry to use, bump its reference count so that
 4782          * it can be used for is_next when we come back.
 4783          */
 4784         if (next != NULL) {
 4785                 MUTEX_ENTER(&next->is_lock);
 4786                 next->is_ref++;
 4787                 MUTEX_EXIT(&next->is_lock);
 4788                 token->ipt_data = next;
 4789         } else {
 4790                 bzero(&zero, sizeof(zero));
 4791                 next = &zero;
 4792                 token->ipt_data = NULL;
 4793         }
 4794         if (next->is_next == NULL)
 4795                 ipf_token_mark_complete(token);
 4796 
 4797         RWLOCK_EXIT(&softc->ipf_state);
 4798 
 4799         obj->ipfo_ptr = itp->igi_data;
 4800         error = ipf_outobjk(softc, obj, next);
 4801         if (is != NULL)
 4802                 ipf_state_deref(softc, &is);
 4803 
 4804         return (error);
 4805 }
 4806 
 4807 
 4808 /* ------------------------------------------------------------------------ */
 4809 /* Function:    ipf_state_gettable                                          */
 4810 /* Returns:     int     - 0 = success, else error                           */
 4811 /* Parameters:  softc(I) - pointer to main soft context                     */
 4812 /*              softs(I) - pointer to state context structure               */
 4813 /*              data(I)  - pointer to ioctl data                             */
 4814 /*                                                                          */
 4815 /* This function handles ioctl requests for tables of state information.    */
 4816 /* At present the only table it deals with is the hash bucket statistics.   */
 4817 /* ------------------------------------------------------------------------ */
 4818 static int
 4819 ipf_state_gettable(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
 4820         char *data)
 4821 {
 4822         ipftable_t table;
 4823         int error;
 4824 
 4825         error = ipf_inobj(softc, data, NULL, &table, IPFOBJ_GTABLE);
 4826         if (error != 0)
 4827                 return (error);
 4828 
 4829         if (table.ita_type != IPFTABLE_BUCKETS) {
 4830                 IPFERROR(100031);
 4831                 return (EINVAL);
 4832         }
 4833 
 4834         error = COPYOUT(softs->ipf_state_stats.iss_bucketlen, table.ita_table,
 4835                         softs->ipf_state_size * sizeof(u_int));
 4836         if (error != 0) {
 4837                 IPFERROR(100032);
 4838                 error = EFAULT;
 4839         }
 4840         return (error);
 4841 }
 4842 
 4843 
 4844 /* ------------------------------------------------------------------------ */
 4845 /* Function:    ipf_state_setpending                                        */
 4846 /* Returns:     Nil                                                         */
 4847 /* Parameters:  softc(I) - pointer to main soft context                     */
 4848 /*              is(I)    - pointer to state structure                       */
 4849 /* Locks:       ipf_state (read or write)                                   */
 4850 /*                                                                          */
 4851 /* Put the state entry on to the pending queue - this queue has a very      */
 4852 /* short lifetime where items are put that can't be deleted straight away   */
 4853 /* because of locking issues but we want to delete them ASAP, anyway.       */
 4854 /* ------------------------------------------------------------------------ */
 4855 void
 4856 ipf_state_setpending(ipf_main_softc_t *softc, ipstate_t *is)
 4857 {
 4858         ipf_state_softc_t *softs = softc->ipf_state_soft;
 4859         ipftq_t *oifq;
 4860 
 4861         oifq = is->is_sti.tqe_ifq;
 4862         if (oifq != NULL)
 4863                 ipf_movequeue(softc->ipf_ticks, &is->is_sti, oifq,
 4864                               &softs->ipf_state_pending);
 4865         else
 4866                 ipf_queueappend(softc->ipf_ticks, &is->is_sti,
 4867                                 &softs->ipf_state_pending, is);
 4868 
 4869         MUTEX_ENTER(&is->is_lock);
 4870         if (is->is_me != NULL) {
 4871                 *is->is_me = NULL;
 4872                 is->is_me = NULL;
 4873                 is->is_ref--;
 4874         }
 4875         MUTEX_EXIT(&is->is_lock);
 4876 }
 4877 
 4878 
 4879 /* ------------------------------------------------------------------------ */
 4880 /* Function:    ipf_state_matchflush                                        */
 4881 /* Returns:     Nil                                                         */
 4882 /* Parameters:  softc(I) - pointer to main soft context                     */
 4883 /*              data(I)  - pointer to state structure                       */
 4884 /* Locks:       ipf_state (read or write)                                   */
 4885 /*                                                                          */
 4886 /* Flush all entries from the list of state entries that match the          */
 4887 /* properties in the array loaded.                                          */
 4888 /* ------------------------------------------------------------------------ */
 4889 int
 4890 ipf_state_matchflush(ipf_main_softc_t *softc, caddr_t data)
 4891 {
 4892         ipf_state_softc_t *softs = softc->ipf_state_soft;
 4893         int *array, flushed, error;
 4894         ipstate_t *state, *statenext;
 4895         ipfobj_t obj;
 4896 
 4897         error = ipf_matcharray_load(softc, data, &obj, &array);
 4898         if (error != 0)
 4899                 return (error);
 4900 
 4901         flushed = 0;
 4902 
 4903         for (state = softs->ipf_state_list; state != NULL; state = statenext) {
 4904                 statenext = state->is_next;
 4905                 if (ipf_state_matcharray(state, array, softc->ipf_ticks) == 0) {
 4906                         ipf_state_del(softc, state, ISL_FLUSH);
 4907                         flushed++;
 4908                 }
 4909         }
 4910 
 4911         obj.ipfo_retval = flushed;
 4912         error = BCOPYOUT(&obj, data, sizeof(obj));
 4913 
 4914         KFREES(array, array[0] * sizeof(*array));
 4915 
 4916         return (error);
 4917 }
 4918 
 4919 
 4920 /* ------------------------------------------------------------------------ */
 4921 /* Function:    ipf_state_matcharray                                        */
 4922 /* Returns:     int   - 0 = no match, 1 = match                             */
 4923 /* Parameters:  state(I) - pointer to state structure                       */
 4924 /*              array(I) - pointer to ipf matching expression               */
 4925 /*              ticks(I) - current value of ipfilter tick timer             */
 4926 /* Locks:       ipf_state (read or write)                                   */
 4927 /*                                                                          */
 4928 /* Compare a state entry with the match array passed in and return a value  */
 4929 /* to indicate whether or not the matching was successful.                  */
 4930 /* ------------------------------------------------------------------------ */
 4931 static int
 4932 ipf_state_matcharray(ipstate_t *state, int *array, u_long ticks)
 4933 {
 4934         int i, n, *x, rv, p;
 4935         ipfexp_t *e;
 4936 
 4937         rv = 0;
 4938         n = array[0];
 4939         x = array + 1;
 4940 
 4941         for (; n > 0; x += 3 + x[3], rv = 0) {
 4942                 e = (ipfexp_t *)x;
 4943                 n -= e->ipfe_size;
 4944                 if (x[0] == IPF_EXP_END)
 4945                         break;
 4946 
 4947                 /*
 4948                  * If we need to match the protocol and that doesn't match,
 4949                  * don't even both with the instruction array.
 4950                  */
 4951                 p = e->ipfe_cmd >> 16;
 4952                 if ((p != 0) && (p != state->is_p))
 4953                         break;
 4954 
 4955                 switch (e->ipfe_cmd)
 4956                 {
 4957                 case IPF_EXP_IP_PR :
 4958                         for (i = 0; !rv && i < e->ipfe_narg; i++) {
 4959                                 rv |= (state->is_p == e->ipfe_arg0[i]);
 4960                         }
 4961                         break;
 4962 
 4963                 case IPF_EXP_IP_SRCADDR :
 4964                         if (state->is_v != 4)
 4965                                 break;
 4966                         for (i = 0; !rv && i < e->ipfe_narg; i++) {
 4967                                 rv |= ((state->is_saddr &
 4968                                         e->ipfe_arg0[i * 2 + 1]) ==
 4969                                       e->ipfe_arg0[i * 2]);
 4970                         }
 4971                         break;
 4972 
 4973                 case IPF_EXP_IP_DSTADDR :
 4974                         if (state->is_v != 4)
 4975                                 break;
 4976                         for (i = 0; !rv && i < e->ipfe_narg; i++) {
 4977                                 rv |= ((state->is_daddr &
 4978                                         e->ipfe_arg0[i * 2 + 1]) ==
 4979                                        e->ipfe_arg0[i * 2]);
 4980                         }
 4981                         break;
 4982 
 4983                 case IPF_EXP_IP_ADDR :
 4984                         if (state->is_v != 4)
 4985                                 break;
 4986                         for (i = 0; !rv && i < e->ipfe_narg; i++) {
 4987                                 rv |= ((state->is_saddr &
 4988                                         e->ipfe_arg0[i * 2 + 1]) ==
 4989                                        e->ipfe_arg0[i * 2]) ||
 4990                                        ((state->is_daddr &
 4991                                         e->ipfe_arg0[i * 2 + 1]) ==
 4992                                        e->ipfe_arg0[i * 2]);
 4993                         }
 4994                         break;
 4995 
 4996 #ifdef USE_INET6
 4997                 case IPF_EXP_IP6_SRCADDR :
 4998                         if (state->is_v != 6)
 4999                                 break;
 5000                         for (i = 0; !rv && i < x[3]; i++) {
 5001                                 rv |= IP6_MASKEQ(&state->is_src.in6,
 5002                                                  &e->ipfe_arg0[i * 8 + 4],
 5003                                                  &e->ipfe_arg0[i * 8]);
 5004                         }
 5005                         break;
 5006 
 5007                 case IPF_EXP_IP6_DSTADDR :
 5008                         if (state->is_v != 6)
 5009                                 break;
 5010                         for (i = 0; !rv && i < x[3]; i++) {
 5011                                 rv |= IP6_MASKEQ(&state->is_dst.in6,
 5012                                                  &e->ipfe_arg0[i * 8 + 4],
 5013                                                  &e->ipfe_arg0[i * 8]);
 5014                         }
 5015                         break;
 5016 
 5017                 case IPF_EXP_IP6_ADDR :
 5018                         if (state->is_v != 6)
 5019                                 break;
 5020                         for (i = 0; !rv && i < x[3]; i++) {
 5021                                 rv |= IP6_MASKEQ(&state->is_src.in6,
 5022                                                  &e->ipfe_arg0[i * 8 + 4],
 5023                                                  &e->ipfe_arg0[i * 8]) ||
 5024                                       IP6_MASKEQ(&state->is_dst.in6,
 5025                                                  &e->ipfe_arg0[i * 8 + 4],
 5026                                                  &e->ipfe_arg0[i * 8]);
 5027                         }
 5028                         break;
 5029 #endif
 5030 
 5031                 case IPF_EXP_UDP_PORT :
 5032                 case IPF_EXP_TCP_PORT :
 5033                         for (i = 0; !rv && i < e->ipfe_narg; i++) {
 5034                                 rv |= (state->is_sport == e->ipfe_arg0[i]) ||
 5035                                       (state->is_dport == e->ipfe_arg0[i]);
 5036                         }
 5037                         break;
 5038 
 5039                 case IPF_EXP_UDP_SPORT :
 5040                 case IPF_EXP_TCP_SPORT :
 5041                         for (i = 0; !rv && i < e->ipfe_narg; i++) {
 5042                                 rv |= (state->is_sport == e->ipfe_arg0[i]);
 5043                         }
 5044                         break;
 5045 
 5046                 case IPF_EXP_UDP_DPORT :
 5047                 case IPF_EXP_TCP_DPORT :
 5048                         for (i = 0; !rv && i < e->ipfe_narg; i++) {
 5049                                 rv |= (state->is_dport == e->ipfe_arg0[i]);
 5050                         }
 5051                         break;
 5052 
 5053                 case IPF_EXP_TCP_STATE :
 5054                         for (i = 0; !rv && i < e->ipfe_narg; i++) {
 5055                                 rv |= (state->is_state[0] == e->ipfe_arg0[i]) ||
 5056                                       (state->is_state[1] == e->ipfe_arg0[i]);
 5057                         }
 5058                         break;
 5059 
 5060                 case IPF_EXP_IDLE_GT :
 5061                         rv |= (ticks - state->is_touched > e->ipfe_arg0[0]);
 5062                         break;
 5063                 }
 5064 
 5065                 /*
 5066                  * Factor in doing a negative match.
 5067                  */
 5068                 rv ^= e->ipfe_not;
 5069 
 5070                 if (rv == 0)
 5071                         break;
 5072         }
 5073 
 5074         return (rv);
 5075 }
 5076 
 5077 
 5078 /* ------------------------------------------------------------------------ */
 5079 /* Function:    ipf_state_settimeout                                        */
 5080 /* Returns:     int 0 = success, else failure                               */
 5081 /* Parameters:  softc(I)  - pointer to main soft context                    */
 5082 /*              t(I)      - pointer to tuneable being changed               */
 5083 /*              p(I)      - pointer to the new value                        */
 5084 /*                                                                          */
 5085 /* Sets a timeout value for one of the many timeout queues.  We find the    */
 5086 /* correct queue using a somewhat manual process of comparing the timeout   */
 5087 /* names for each specific value available and calling ipf_apply_timeout on */
 5088 /* that queue so that all of the items on it are updated accordingly.       */
 5089 /* ------------------------------------------------------------------------ */
 5090 int
 5091 ipf_state_settimeout(struct ipf_main_softc_s *softc, ipftuneable_t *t,
 5092         ipftuneval_t *p)
 5093 {
 5094         ipf_state_softc_t *softs = softc->ipf_state_soft;
 5095 
 5096         /*
 5097          * In case there is nothing to do...
 5098          */
 5099         if (*t->ipft_pint == p->ipftu_int)
 5100                 return (0);
 5101 
 5102         if (!strncmp(t->ipft_name, "tcp_", 4))
 5103                 return (ipf_settimeout_tcp(t, p, softs->ipf_state_tcptq));
 5104 
 5105         if (!strcmp(t->ipft_name, "udp_timeout")) {
 5106                 ipf_apply_timeout(&softs->ipf_state_udptq, p->ipftu_int);
 5107         } else if (!strcmp(t->ipft_name, "udp_ack_timeout")) {
 5108                 ipf_apply_timeout(&softs->ipf_state_udpacktq, p->ipftu_int);
 5109         } else if (!strcmp(t->ipft_name, "icmp_timeout")) {
 5110                 ipf_apply_timeout(&softs->ipf_state_icmptq, p->ipftu_int);
 5111         } else if (!strcmp(t->ipft_name, "icmp_ack_timeout")) {
 5112                 ipf_apply_timeout(&softs->ipf_state_icmpacktq, p->ipftu_int);
 5113         } else if (!strcmp(t->ipft_name, "ip_timeout")) {
 5114                 ipf_apply_timeout(&softs->ipf_state_iptq, p->ipftu_int);
 5115         } else {
 5116                 IPFERROR(100034);
 5117                 return (ESRCH);
 5118         }
 5119 
 5120         /*
 5121          * Update the tuneable being set.
 5122          */
 5123         *t->ipft_pint = p->ipftu_int;
 5124 
 5125         return (0);
 5126 }
 5127 
 5128 
 5129 /* ------------------------------------------------------------------------ */
 5130 /* Function:    ipf_state_rehash                                            */
 5131 /* Returns:     int 0 = success, else failure                               */
 5132 /* Parameters:  softc(I)  - pointer to main soft context                    */
 5133 /*              t(I)      - pointer to tuneable being changed               */
 5134 /*              p(I)      - pointer to the new value                        */
 5135 /*                                                                          */
 5136 /* To change the size of the state hash table at runtime, a new table has   */
 5137 /* to be allocated and then all of the existing entries put in it, bumping  */
 5138 /* up the bucketlength for it as we go along.                               */
 5139 /* ------------------------------------------------------------------------ */
 5140 int
 5141 ipf_state_rehash(ipf_main_softc_t *softc, ipftuneable_t *t, ipftuneval_t *p)
 5142 {
 5143         ipf_state_softc_t *softs = softc->ipf_state_soft;
 5144         ipstate_t **newtab, *is;
 5145         u_long *newseed;
 5146         u_int *bucketlens;
 5147         u_int maxbucket;
 5148         u_int newsize;
 5149         u_int hv;
 5150         int i;
 5151 
 5152         newsize = p->ipftu_int;
 5153         /*
 5154          * In case there is nothing to do...
 5155          */
 5156         if (newsize == softs->ipf_state_size)
 5157                 return (0);
 5158 
 5159         KMALLOCS(newtab, ipstate_t **, newsize * sizeof(ipstate_t *));
 5160         if (newtab == NULL) {
 5161                 IPFERROR(100035);
 5162                 return (ENOMEM);
 5163         }
 5164 
 5165         KMALLOCS(bucketlens, u_int *, newsize * sizeof(u_int));
 5166         if (bucketlens == NULL) {
 5167                 KFREES(newtab, newsize * sizeof(*softs->ipf_state_table));
 5168                 IPFERROR(100036);
 5169                 return (ENOMEM);
 5170         }
 5171 
 5172         newseed = ipf_state_seed_alloc(newsize, softs->ipf_state_max);
 5173         if (newseed == NULL) {
 5174                 KFREES(bucketlens, newsize * sizeof(*bucketlens));
 5175                 KFREES(newtab, newsize * sizeof(*newtab));
 5176                 IPFERROR(100037);
 5177                 return (ENOMEM);
 5178         }
 5179 
 5180         for (maxbucket = 0, i = newsize; i > 0; i >>= 1)
 5181                 maxbucket++;
 5182         maxbucket *= 2;
 5183 
 5184         bzero((char *)newtab, newsize * sizeof(ipstate_t *));
 5185         bzero((char *)bucketlens, newsize * sizeof(u_int));
 5186 
 5187         WRITE_ENTER(&softc->ipf_state);
 5188 
 5189         if (softs->ipf_state_table != NULL) {
 5190                 KFREES(softs->ipf_state_table,
 5191                        softs->ipf_state_size * sizeof(*softs->ipf_state_table));
 5192         }
 5193         softs->ipf_state_table = newtab;
 5194 
 5195         if (softs->ipf_state_seed != NULL) {
 5196                 KFREES(softs->ipf_state_seed,
 5197                        softs->ipf_state_size * sizeof(*softs->ipf_state_seed));
 5198         }
 5199         softs->ipf_state_seed = newseed;
 5200 
 5201         if (softs->ipf_state_stats.iss_bucketlen != NULL) {
 5202                 KFREES(softs->ipf_state_stats.iss_bucketlen,
 5203                        softs->ipf_state_size * sizeof(u_int));
 5204         }
 5205         softs->ipf_state_stats.iss_bucketlen = bucketlens;
 5206         softs->ipf_state_maxbucket = maxbucket;
 5207         softs->ipf_state_size = newsize;
 5208 
 5209         /*
 5210          * Walk through the entire list of state table entries and put them
 5211          * in the new state table, somewhere.  Because we have a new table,
 5212          * we need to restart the counter of how many chains are in use.
 5213          */
 5214         softs->ipf_state_stats.iss_inuse = 0;
 5215         for (is = softs->ipf_state_list; is != NULL; is = is->is_next) {
 5216                 is->is_hnext = NULL;
 5217                 is->is_phnext = NULL;
 5218                 hv = is->is_hv % softs->ipf_state_size;
 5219 
 5220                 if (softs->ipf_state_table[hv] != NULL)
 5221                         softs->ipf_state_table[hv]->is_phnext = &is->is_hnext;
 5222                 else
 5223                         softs->ipf_state_stats.iss_inuse++;
 5224                 is->is_phnext = softs->ipf_state_table + hv;
 5225                 is->is_hnext = softs->ipf_state_table[hv];
 5226                 softs->ipf_state_table[hv] = is;
 5227                 softs->ipf_state_stats.iss_bucketlen[hv]++;
 5228         }
 5229         RWLOCK_EXIT(&softc->ipf_state);
 5230 
 5231         return (0);
 5232 }
 5233 
 5234 
 5235 /* ------------------------------------------------------------------------ */
 5236 /* Function:    ipf_state_add_tq                                            */
 5237 /* Returns:     ipftq_t * - NULL = failure, else pointer to new timeout     */
 5238 /*                          queue                                           */
 5239 /* Parameters:  softc(I)  - pointer to main soft context                    */
 5240 /*              ttl(I)    - pointer to the ttl for the new queue            */
 5241 /*                                                                          */
 5242 /* Request a pointer to a timeout queue that has a ttl as given by the      */
 5243 /* value being passed in.  The timeout queue is added tot the list of those */
 5244 /* used internally for stateful filtering.                                  */
 5245 /* ------------------------------------------------------------------------ */
 5246 ipftq_t *
 5247 ipf_state_add_tq(ipf_main_softc_t *softc, int ttl)
 5248 {
 5249         ipf_state_softc_t *softs = softc->ipf_state_soft;
 5250 
 5251         return (ipf_addtimeoutqueue(softc, &softs->ipf_state_usertq, ttl));
 5252 }
 5253 
 5254 
 5255 #ifndef _KERNEL
 5256 /*
 5257  * Display the built up state table rules and mapping entries.
 5258  */
 5259 void
 5260 ipf_state_dump(ipf_main_softc_t *softc, void *arg)
 5261 {
 5262         ipf_state_softc_t *softs = arg;
 5263         ipstate_t *ips;
 5264 
 5265         printf("List of active state sessions:\n");
 5266         for (ips = softs->ipf_state_list; ips != NULL; )
 5267                 ips = printstate(ips, opts & (OPT_DEBUG|OPT_VERBOSE),
 5268                                  softc->ipf_ticks);
 5269 }
 5270 #endif

Cache object: dcfb914b7ab77d483f364e3ea108ee7e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.