The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/pf.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: pf.c,v 1.1171 2023/01/22 23:05:51 yasuoka Exp $ */
    2 
    3 /*
    4  * Copyright (c) 2001 Daniel Hartmeier
    5  * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org>
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  *
   12  *    - Redistributions of source code must retain the above copyright
   13  *      notice, this list of conditions and the following disclaimer.
   14  *    - Redistributions in binary form must reproduce the above
   15  *      copyright notice, this list of conditions and the following
   16  *      disclaimer in the documentation and/or other materials provided
   17  *      with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30  * POSSIBILITY OF SUCH DAMAGE.
   31  *
   32  * Effort sponsored in part by the Defense Advanced Research Projects
   33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
   34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
   35  *
   36  */
   37 
   38 #include "bpfilter.h"
   39 #include "carp.h"
   40 #include "pflog.h"
   41 #include "pfsync.h"
   42 #include "pflow.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/mbuf.h>
   47 #include <sys/filio.h>
   48 #include <sys/socket.h>
   49 #include <sys/socketvar.h>
   50 #include <sys/kernel.h>
   51 #include <sys/time.h>
   52 #include <sys/pool.h>
   53 #include <sys/proc.h>
   54 #include <sys/rwlock.h>
   55 #include <sys/syslog.h>
   56 
   57 #include <crypto/sha2.h>
   58 
   59 #include <net/if.h>
   60 #include <net/if_var.h>
   61 #include <net/if_types.h>
   62 #include <net/route.h>
   63 #include <net/toeplitz.h>
   64 
   65 #include <netinet/in.h>
   66 #include <netinet/in_var.h>
   67 #include <netinet/ip.h>
   68 #include <netinet/in_pcb.h>
   69 #include <netinet/ip_var.h>
   70 #include <netinet/ip_icmp.h>
   71 #include <netinet/icmp_var.h>
   72 #include <netinet/tcp.h>
   73 #include <netinet/tcp_seq.h>
   74 #include <netinet/tcp_timer.h>
   75 #include <netinet/tcp_var.h>
   76 #include <netinet/tcp_fsm.h>
   77 #include <netinet/udp.h>
   78 #include <netinet/udp_var.h>
   79 #include <netinet/ip_divert.h>
   80 
   81 #ifdef INET6
   82 #include <netinet6/in6_var.h>
   83 #include <netinet/ip6.h>
   84 #include <netinet6/ip6_var.h>
   85 #include <netinet/icmp6.h>
   86 #include <netinet6/nd6.h>
   87 #include <netinet6/ip6_divert.h>
   88 #endif /* INET6 */
   89 
   90 #include <net/pfvar.h>
   91 #include <net/pfvar_priv.h>
   92 
   93 #if NPFLOG > 0
   94 #include <net/if_pflog.h>
   95 #endif  /* NPFLOG > 0 */
   96 
   97 #if NPFLOW > 0
   98 #include <net/if_pflow.h>
   99 #endif  /* NPFLOW > 0 */
  100 
  101 #if NPFSYNC > 0
  102 #include <net/if_pfsync.h>
  103 #else
  104 struct pfsync_deferral;
  105 #endif /* NPFSYNC > 0 */
  106 
  107 /*
  108  * Global variables
  109  */
  110 struct pf_state_tree     pf_statetbl;
  111 struct pf_queuehead      pf_queues[2];
  112 struct pf_queuehead     *pf_queues_active;
  113 struct pf_queuehead     *pf_queues_inactive;
  114 
  115 struct pf_status         pf_status;
  116 
  117 int                      pf_hdr_limit = 20;  /* arbitrary limit, tune in ddb */
  118 
  119 SHA2_CTX                 pf_tcp_secret_ctx;
  120 u_char                   pf_tcp_secret[16];
  121 int                      pf_tcp_secret_init;
  122 int                      pf_tcp_iss_off;
  123 
  124 int              pf_npurge;
  125 struct task      pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge);
  126 struct timeout   pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL);
  127 
  128 enum pf_test_status {
  129         PF_TEST_FAIL = -1,
  130         PF_TEST_OK,
  131         PF_TEST_QUICK
  132 };
  133 
  134 struct pf_test_ctx {
  135         struct pf_pdesc          *pd;
  136         struct pf_rule_actions    act;
  137         u_int8_t                  icmpcode;
  138         u_int8_t                  icmptype;
  139         int                       icmp_dir;
  140         int                       state_icmp;
  141         int                       tag;
  142         u_short                   reason;
  143         struct pf_rule_item      *ri;
  144         struct pf_src_node       *sns[PF_SN_MAX];
  145         struct pf_rule_slist      rules;
  146         struct pf_rule           *nr;
  147         struct pf_rule          **rm;
  148         struct pf_rule           *a;
  149         struct pf_rule          **am;
  150         struct pf_ruleset       **rsm;
  151         struct pf_ruleset        *arsm;
  152         struct pf_ruleset        *aruleset;
  153         struct tcphdr            *th;
  154 };
  155 
  156 struct pool              pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
  157 struct pool              pf_state_pl, pf_state_key_pl, pf_state_item_pl;
  158 struct pool              pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl;
  159 
  160 void                     pf_add_threshold(struct pf_threshold *);
  161 int                      pf_check_threshold(struct pf_threshold *);
  162 int                      pf_check_tcp_cksum(struct mbuf *, int, int,
  163                             sa_family_t);
  164 __inline void            pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t,
  165                             u_int8_t);
  166 void                     pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *,
  167                             const struct pf_addr *, sa_family_t, u_int8_t);
  168 int                      pf_modulate_sack(struct pf_pdesc *,
  169                             struct pf_state_peer *);
  170 int                      pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *,
  171                             u_int16_t *, u_int16_t *);
  172 int                      pf_change_icmp_af(struct mbuf *, int,
  173                             struct pf_pdesc *, struct pf_pdesc *,
  174                             struct pf_addr *, struct pf_addr *, sa_family_t,
  175                             sa_family_t);
  176 int                      pf_translate_a(struct pf_pdesc *, struct pf_addr *,
  177                             struct pf_addr *);
  178 void                     pf_translate_icmp(struct pf_pdesc *, struct pf_addr *,
  179                             u_int16_t *, struct pf_addr *, struct pf_addr *,
  180                             u_int16_t);
  181 int                      pf_translate_icmp_af(struct pf_pdesc*, int, void *);
  182 void                     pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int,
  183                             sa_family_t, struct pf_rule *, u_int);
  184 void                     pf_detach_state(struct pf_state *);
  185 struct pf_state_key     *pf_state_key_attach(struct pf_state_key *,
  186                              struct pf_state *, int);
  187 void                     pf_state_key_detach(struct pf_state *, int);
  188 u_int32_t                pf_tcp_iss(struct pf_pdesc *);
  189 void                     pf_rule_to_actions(struct pf_rule *,
  190                             struct pf_rule_actions *);
  191 int                      pf_test_rule(struct pf_pdesc *, struct pf_rule **,
  192                             struct pf_state **, struct pf_rule **,
  193                             struct pf_ruleset **, u_short *,
  194                             struct pfsync_deferral **);
  195 static __inline int      pf_create_state(struct pf_pdesc *, struct pf_rule *,
  196                             struct pf_rule *, struct pf_rule *,
  197                             struct pf_state_key **, struct pf_state_key **,
  198                             int *, struct pf_state **, int,
  199                             struct pf_rule_slist *, struct pf_rule_actions *,
  200                             struct pf_src_node **);
  201 static __inline int      pf_state_key_addr_setup(struct pf_pdesc *, void *,
  202                             int, struct pf_addr *, int, struct pf_addr *,
  203                             int, int);
  204 int                      pf_state_key_setup(struct pf_pdesc *, struct
  205                             pf_state_key **, struct pf_state_key **, int);
  206 int                      pf_tcp_track_full(struct pf_pdesc *,
  207                             struct pf_state **, u_short *, int *, int);
  208 int                      pf_tcp_track_sloppy(struct pf_pdesc *,
  209                             struct pf_state **, u_short *);
  210 static __inline int      pf_synproxy(struct pf_pdesc *, struct pf_state **,
  211                             u_short *);
  212 int                      pf_test_state(struct pf_pdesc *, struct pf_state **,
  213                             u_short *);
  214 int                      pf_icmp_state_lookup(struct pf_pdesc *,
  215                             struct pf_state_key_cmp *, struct pf_state **,
  216                             u_int16_t, u_int16_t, int, int *, int, int);
  217 int                      pf_test_state_icmp(struct pf_pdesc *,
  218                             struct pf_state **, u_short *);
  219 u_int16_t                pf_calc_mss(struct pf_addr *, sa_family_t, int,
  220                             u_int16_t);
  221 static __inline int      pf_set_rt_ifp(struct pf_state *, struct pf_addr *,
  222                             sa_family_t, struct pf_src_node **);
  223 struct pf_divert        *pf_get_divert(struct mbuf *);
  224 int                      pf_walk_option(struct pf_pdesc *, struct ip *,
  225                             int, int, u_short *);
  226 int                      pf_walk_header(struct pf_pdesc *, struct ip *,
  227                             u_short *);
  228 int                      pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *,
  229                             int, int, u_short *);
  230 int                      pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *,
  231                             u_short *);
  232 void                     pf_print_state_parts(struct pf_state *,
  233                             struct pf_state_key *, struct pf_state_key *);
  234 int                      pf_addr_wrap_neq(struct pf_addr_wrap *,
  235                             struct pf_addr_wrap *);
  236 int                      pf_compare_state_keys(struct pf_state_key *,
  237                             struct pf_state_key *, struct pfi_kif *, u_int);
  238 u_int16_t                pf_pkt_hash(sa_family_t, uint8_t,
  239                              const struct pf_addr *, const struct pf_addr *,
  240                              uint16_t, uint16_t);
  241 int                      pf_find_state(struct pf_pdesc *,
  242                             struct pf_state_key_cmp *, struct pf_state **);
  243 int                      pf_src_connlimit(struct pf_state **);
  244 int                      pf_match_rcvif(struct mbuf *, struct pf_rule *);
  245 int                      pf_step_into_anchor(struct pf_test_ctx *,
  246                             struct pf_rule *);
  247 int                      pf_match_rule(struct pf_test_ctx *,
  248                             struct pf_ruleset *);
  249 void                     pf_counters_inc(int, struct pf_pdesc *,
  250                             struct pf_state *, struct pf_rule *,
  251                             struct pf_rule *);
  252 
  253 int                      pf_state_key_isvalid(struct pf_state_key *);
  254 struct pf_state_key     *pf_state_key_ref(struct pf_state_key *);
  255 void                     pf_state_key_unref(struct pf_state_key *);
  256 void                     pf_state_key_link_reverse(struct pf_state_key *,
  257                             struct pf_state_key *);
  258 void                     pf_state_key_unlink_reverse(struct pf_state_key *);
  259 void                     pf_state_key_link_inpcb(struct pf_state_key *,
  260                             struct inpcb *);
  261 void                     pf_state_key_unlink_inpcb(struct pf_state_key *);
  262 void                     pf_inpcb_unlink_state_key(struct inpcb *);
  263 void                     pf_pktenqueue_delayed(void *);
  264 int32_t                  pf_state_expires(const struct pf_state *, uint8_t);
  265 
  266 #if NPFLOG > 0
  267 void                     pf_log_matches(struct pf_pdesc *, struct pf_rule *,
  268                             struct pf_rule *, struct pf_ruleset *,
  269                             struct pf_rule_slist *);
  270 #endif  /* NPFLOG > 0 */
  271 
  272 extern struct pool pfr_ktable_pl;
  273 extern struct pool pfr_kentry_pl;
  274 
  275 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
  276         { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT },
  277         { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT },
  278         { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT },
  279         { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT },
  280         { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT },
  281         { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS },
  282         { &pf_anchor_pl, PF_ANCHOR_HIWAT, PF_ANCHOR_HIWAT }
  283 };
  284 
  285 #define BOUND_IFACE(r, k) \
  286         ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
  287 
  288 #define STATE_INC_COUNTERS(s)                                   \
  289         do {                                                    \
  290                 struct pf_rule_item *mrm;                       \
  291                 s->rule.ptr->states_cur++;                      \
  292                 s->rule.ptr->states_tot++;                      \
  293                 if (s->anchor.ptr != NULL) {                    \
  294                         s->anchor.ptr->states_cur++;            \
  295                         s->anchor.ptr->states_tot++;            \
  296                 }                                               \
  297                 SLIST_FOREACH(mrm, &s->match_rules, entry)      \
  298                         mrm->r->states_cur++;                   \
  299         } while (0)
  300 
  301 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
  302 static inline int pf_state_compare_key(const struct pf_state_key *,
  303         const struct pf_state_key *);
  304 static inline int pf_state_compare_id(const struct pf_state *,
  305         const struct pf_state *);
  306 #ifdef INET6
  307 static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t);
  308 static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t);
  309 #endif /* INET6 */
  310 static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t);
  311 
  312 struct pf_src_tree tree_src_tracking;
  313 
  314 struct pf_state_tree_id tree_id;
  315 struct pf_state_list pf_state_list = PF_STATE_LIST_INITIALIZER(pf_state_list);
  316 
  317 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
  318 RBT_GENERATE(pf_state_tree, pf_state_key, sk_entry, pf_state_compare_key);
  319 RBT_GENERATE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id);
  320 
  321 int
  322 pf_addr_compare(const struct pf_addr *a, const struct pf_addr *b,
  323     sa_family_t af)
  324 {
  325         switch (af) {
  326         case AF_INET:
  327                 if (a->addr32[0] > b->addr32[0])
  328                         return (1);
  329                 if (a->addr32[0] < b->addr32[0])
  330                         return (-1);
  331                 break;
  332 #ifdef INET6
  333         case AF_INET6:
  334                 if (a->addr32[3] > b->addr32[3])
  335                         return (1);
  336                 if (a->addr32[3] < b->addr32[3])
  337                         return (-1);
  338                 if (a->addr32[2] > b->addr32[2])
  339                         return (1);
  340                 if (a->addr32[2] < b->addr32[2])
  341                         return (-1);
  342                 if (a->addr32[1] > b->addr32[1])
  343                         return (1);
  344                 if (a->addr32[1] < b->addr32[1])
  345                         return (-1);
  346                 if (a->addr32[0] > b->addr32[0])
  347                         return (1);
  348                 if (a->addr32[0] < b->addr32[0])
  349                         return (-1);
  350                 break;
  351 #endif /* INET6 */
  352         }
  353         return (0);
  354 }
  355 
  356 static __inline int
  357 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
  358 {
  359         int     diff;
  360 
  361         if (a->rule.ptr > b->rule.ptr)
  362                 return (1);
  363         if (a->rule.ptr < b->rule.ptr)
  364                 return (-1);
  365         if ((diff = a->type - b->type) != 0)
  366                 return (diff);
  367         if ((diff = a->af - b->af) != 0)
  368                 return (diff);
  369         if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0)
  370                 return (diff);
  371         return (0);
  372 }
  373 
  374 static __inline void
  375 pf_set_protostate(struct pf_state *st, int which, u_int8_t newstate)
  376 {
  377         if (which == PF_PEER_DST || which == PF_PEER_BOTH)
  378                 st->dst.state = newstate;
  379         if (which == PF_PEER_DST)
  380                 return;
  381 
  382         if (st->src.state == newstate)
  383                 return;
  384         if (st->creatorid == pf_status.hostid &&
  385             st->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
  386             !(TCPS_HAVEESTABLISHED(st->src.state) ||
  387             st->src.state == TCPS_CLOSED) &&
  388             (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED))
  389                 pf_status.states_halfopen--;
  390 
  391         st->src.state = newstate;
  392 }
  393 
  394 void
  395 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
  396 {
  397         switch (af) {
  398         case AF_INET:
  399                 dst->addr32[0] = src->addr32[0];
  400                 break;
  401 #ifdef INET6
  402         case AF_INET6:
  403                 dst->addr32[0] = src->addr32[0];
  404                 dst->addr32[1] = src->addr32[1];
  405                 dst->addr32[2] = src->addr32[2];
  406                 dst->addr32[3] = src->addr32[3];
  407                 break;
  408 #endif /* INET6 */
  409         default:
  410                 unhandled_af(af);
  411         }
  412 }
  413 
  414 void
  415 pf_init_threshold(struct pf_threshold *threshold,
  416     u_int32_t limit, u_int32_t seconds)
  417 {
  418         threshold->limit = limit * PF_THRESHOLD_MULT;
  419         threshold->seconds = seconds;
  420         threshold->count = 0;
  421         threshold->last = getuptime();
  422 }
  423 
  424 void
  425 pf_add_threshold(struct pf_threshold *threshold)
  426 {
  427         u_int32_t t = getuptime(), diff = t - threshold->last;
  428 
  429         if (diff >= threshold->seconds)
  430                 threshold->count = 0;
  431         else
  432                 threshold->count -= threshold->count * diff /
  433                     threshold->seconds;
  434         threshold->count += PF_THRESHOLD_MULT;
  435         threshold->last = t;
  436 }
  437 
  438 int
  439 pf_check_threshold(struct pf_threshold *threshold)
  440 {
  441         return (threshold->count > threshold->limit);
  442 }
  443 
  444 void
  445 pf_state_list_insert(struct pf_state_list *pfs, struct pf_state *st)
  446 {
  447         /*
  448          * we can always put states on the end of the list.
  449          *
  450          * things reading the list should take a read lock, then
  451          * the mutex, get the head and tail pointers, release the
  452          * mutex, and then they can iterate between the head and tail.
  453          */
  454 
  455         pf_state_ref(st); /* get a ref for the list */
  456 
  457         mtx_enter(&pfs->pfs_mtx);
  458         TAILQ_INSERT_TAIL(&pfs->pfs_list, st, entry_list);
  459         mtx_leave(&pfs->pfs_mtx);
  460 }
  461 
  462 void
  463 pf_state_list_remove(struct pf_state_list *pfs, struct pf_state *st)
  464 {
  465         /* states can only be removed when the write lock is held */
  466         rw_assert_wrlock(&pfs->pfs_rwl);
  467 
  468         mtx_enter(&pfs->pfs_mtx);
  469         TAILQ_REMOVE(&pfs->pfs_list, st, entry_list);
  470         mtx_leave(&pfs->pfs_mtx);
  471 
  472         pf_state_unref(st); /* list no longer references the state */
  473 }
  474 
  475 int
  476 pf_src_connlimit(struct pf_state **stp)
  477 {
  478         int                      bad = 0;
  479         struct pf_src_node      *sn;
  480 
  481         if ((sn = pf_get_src_node((*stp), PF_SN_NONE)) == NULL)
  482                 return (0);
  483 
  484         sn->conn++;
  485         (*stp)->src.tcp_est = 1;
  486         pf_add_threshold(&sn->conn_rate);
  487 
  488         if ((*stp)->rule.ptr->max_src_conn &&
  489             (*stp)->rule.ptr->max_src_conn < sn->conn) {
  490                 pf_status.lcounters[LCNT_SRCCONN]++;
  491                 bad++;
  492         }
  493 
  494         if ((*stp)->rule.ptr->max_src_conn_rate.limit &&
  495             pf_check_threshold(&sn->conn_rate)) {
  496                 pf_status.lcounters[LCNT_SRCCONNRATE]++;
  497                 bad++;
  498         }
  499 
  500         if (!bad)
  501                 return (0);
  502 
  503         if ((*stp)->rule.ptr->overload_tbl) {
  504                 struct pfr_addr p;
  505                 u_int32_t       killed = 0;
  506 
  507                 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
  508                 if (pf_status.debug >= LOG_NOTICE) {
  509                         log(LOG_NOTICE,
  510                             "pf: pf_src_connlimit: blocking address ");
  511                         pf_print_host(&sn->addr, 0,
  512                             (*stp)->key[PF_SK_WIRE]->af);
  513                 }
  514 
  515                 memset(&p, 0, sizeof(p));
  516                 p.pfra_af = (*stp)->key[PF_SK_WIRE]->af;
  517                 switch ((*stp)->key[PF_SK_WIRE]->af) {
  518                 case AF_INET:
  519                         p.pfra_net = 32;
  520                         p.pfra_ip4addr = sn->addr.v4;
  521                         break;
  522 #ifdef INET6
  523                 case AF_INET6:
  524                         p.pfra_net = 128;
  525                         p.pfra_ip6addr = sn->addr.v6;
  526                         break;
  527 #endif /* INET6 */
  528                 }
  529 
  530                 pfr_insert_kentry((*stp)->rule.ptr->overload_tbl,
  531                     &p, gettime());
  532 
  533                 /* kill existing states if that's required. */
  534                 if ((*stp)->rule.ptr->flush) {
  535                         struct pf_state_key *sk;
  536                         struct pf_state *st;
  537 
  538                         pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
  539                         RBT_FOREACH(st, pf_state_tree_id, &tree_id) {
  540                                 sk = st->key[PF_SK_WIRE];
  541                                 /*
  542                                  * Kill states from this source.  (Only those
  543                                  * from the same rule if PF_FLUSH_GLOBAL is not
  544                                  * set)
  545                                  */
  546                                 if (sk->af ==
  547                                     (*stp)->key[PF_SK_WIRE]->af &&
  548                                     (((*stp)->direction == PF_OUT &&
  549                                     PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) ||
  550                                     ((*stp)->direction == PF_IN &&
  551                                     PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) &&
  552                                     ((*stp)->rule.ptr->flush &
  553                                     PF_FLUSH_GLOBAL ||
  554                                     (*stp)->rule.ptr == st->rule.ptr)) {
  555                                         st->timeout = PFTM_PURGE;
  556                                         pf_set_protostate(st, PF_PEER_BOTH,
  557                                             TCPS_CLOSED);
  558                                         killed++;
  559                                 }
  560                         }
  561                         if (pf_status.debug >= LOG_NOTICE)
  562                                 addlog(", %u states killed", killed);
  563                 }
  564                 if (pf_status.debug >= LOG_NOTICE)
  565                         addlog("\n");
  566         }
  567 
  568         /* kill this state */
  569         (*stp)->timeout = PFTM_PURGE;
  570         pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_CLOSED);
  571         return (1);
  572 }
  573 
  574 int
  575 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
  576     enum pf_sn_types type, sa_family_t af, struct pf_addr *src,
  577     struct pf_addr *raddr, struct pfi_kif *kif)
  578 {
  579         struct pf_src_node      k;
  580 
  581         if (*sn == NULL) {
  582                 k.af = af;
  583                 k.type = type;
  584                 pf_addrcpy(&k.addr, src, af);
  585                 k.rule.ptr = rule;
  586                 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
  587                 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
  588         }
  589         if (*sn == NULL) {
  590                 if (!rule->max_src_nodes ||
  591                     rule->src_nodes < rule->max_src_nodes)
  592                         (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO);
  593                 else
  594                         pf_status.lcounters[LCNT_SRCNODES]++;
  595                 if ((*sn) == NULL)
  596                         return (-1);
  597 
  598                 pf_init_threshold(&(*sn)->conn_rate,
  599                     rule->max_src_conn_rate.limit,
  600                     rule->max_src_conn_rate.seconds);
  601 
  602                 (*sn)->type = type;
  603                 (*sn)->af = af;
  604                 (*sn)->rule.ptr = rule;
  605                 pf_addrcpy(&(*sn)->addr, src, af);
  606                 if (raddr)
  607                         pf_addrcpy(&(*sn)->raddr, raddr, af);
  608                 if (RB_INSERT(pf_src_tree,
  609                     &tree_src_tracking, *sn) != NULL) {
  610                         if (pf_status.debug >= LOG_NOTICE) {
  611                                 log(LOG_NOTICE,
  612                                     "pf: src_tree insert failed: ");
  613                                 pf_print_host(&(*sn)->addr, 0, af);
  614                                 addlog("\n");
  615                         }
  616                         pool_put(&pf_src_tree_pl, *sn);
  617                         return (-1);
  618                 }
  619                 (*sn)->creation = getuptime();
  620                 (*sn)->rule.ptr->src_nodes++;
  621                 if (kif != NULL) {
  622                         (*sn)->kif = kif;
  623                         pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE);
  624                 }
  625                 pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
  626                 pf_status.src_nodes++;
  627         } else {
  628                 if (rule->max_src_states &&
  629                     (*sn)->states >= rule->max_src_states) {
  630                         pf_status.lcounters[LCNT_SRCSTATES]++;
  631                         return (-1);
  632                 }
  633         }
  634         return (0);
  635 }
  636 
  637 void
  638 pf_remove_src_node(struct pf_src_node *sn)
  639 {
  640         if (sn->states > 0 || sn->expire > getuptime())
  641                 return;
  642 
  643         sn->rule.ptr->src_nodes--;
  644         if (sn->rule.ptr->states_cur == 0 &&
  645             sn->rule.ptr->src_nodes == 0)
  646                 pf_rm_rule(NULL, sn->rule.ptr);
  647         RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
  648         pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
  649         pf_status.src_nodes--;
  650         pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE);
  651         pool_put(&pf_src_tree_pl, sn);
  652 }
  653 
  654 struct pf_src_node *
  655 pf_get_src_node(struct pf_state *st, enum pf_sn_types type)
  656 {
  657         struct pf_sn_item       *sni;
  658 
  659         SLIST_FOREACH(sni, &st->src_nodes, next)
  660                 if (sni->sn->type == type)
  661                         return (sni->sn);
  662         return (NULL);
  663 }
  664 
  665 void
  666 pf_state_rm_src_node(struct pf_state *st, struct pf_src_node *sn)
  667 {
  668         struct pf_sn_item       *sni, *snin, *snip = NULL;
  669 
  670         for (sni = SLIST_FIRST(&st->src_nodes); sni; sni = snin) {
  671                 snin = SLIST_NEXT(sni, next);
  672                 if (sni->sn == sn) {
  673                         if (snip)
  674                                 SLIST_REMOVE_AFTER(snip, next);
  675                         else
  676                                 SLIST_REMOVE_HEAD(&st->src_nodes, next);
  677                         pool_put(&pf_sn_item_pl, sni);
  678                         sni = NULL;
  679                         sn->states--;
  680                 }
  681                 if (sni != NULL)
  682                         snip = sni;
  683         }
  684 }
  685 
  686 /* state table stuff */
  687 
  688 static inline int
  689 pf_state_compare_key(const struct pf_state_key *a,
  690     const struct pf_state_key *b)
  691 {
  692         int     diff;
  693 
  694         if ((diff = a->hash - b->hash) != 0)
  695                 return (diff);
  696         if ((diff = a->proto - b->proto) != 0)
  697                 return (diff);
  698         if ((diff = a->af - b->af) != 0)
  699                 return (diff);
  700         if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0)
  701                 return (diff);
  702         if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0)
  703                 return (diff);
  704         if ((diff = a->port[0] - b->port[0]) != 0)
  705                 return (diff);
  706         if ((diff = a->port[1] - b->port[1]) != 0)
  707                 return (diff);
  708         if ((diff = a->rdomain - b->rdomain) != 0)
  709                 return (diff);
  710         return (0);
  711 }
  712 
  713 static inline int
  714 pf_state_compare_id(const struct pf_state *a, const struct pf_state *b)
  715 {
  716         if (a->id > b->id)
  717                 return (1);
  718         if (a->id < b->id)
  719                 return (-1);
  720         if (a->creatorid > b->creatorid)
  721                 return (1);
  722         if (a->creatorid < b->creatorid)
  723                 return (-1);
  724 
  725         return (0);
  726 }
  727 
  728 /*
  729  * on failure, pf_state_key_attach() releases the pf_state_key
  730  * reference and returns NULL.
  731  */
  732 struct pf_state_key *
  733 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *st, int idx)
  734 {
  735         struct pf_state_item    *si;
  736         struct pf_state_key     *cur;
  737         struct pf_state         *oldst = NULL;
  738 
  739         PF_ASSERT_LOCKED();
  740 
  741         KASSERT(st->key[idx] == NULL);
  742         sk->sk_removed = 0;
  743         cur = RBT_INSERT(pf_state_tree, &pf_statetbl, sk);
  744         if (cur != NULL) {
  745                 sk->sk_removed = 1;
  746                 /* key exists. check for same kif, if none, add to key */
  747                 TAILQ_FOREACH(si, &cur->sk_states, si_entry) {
  748                         struct pf_state *sist = si->si_st;
  749                         if (sist->kif == st->kif &&
  750                             ((sist->key[PF_SK_WIRE]->af == sk->af &&
  751                              sist->direction == st->direction) ||
  752                             (sist->key[PF_SK_WIRE]->af !=
  753                              sist->key[PF_SK_STACK]->af &&
  754                              sk->af == sist->key[PF_SK_STACK]->af &&
  755                              sist->direction != st->direction))) {
  756                                 int reuse = 0;
  757 
  758                                 if (sk->proto == IPPROTO_TCP &&
  759                                     sist->src.state >= TCPS_FIN_WAIT_2 &&
  760                                     sist->dst.state >= TCPS_FIN_WAIT_2)
  761                                         reuse = 1;
  762                                 if (pf_status.debug >= LOG_NOTICE) {
  763                                         log(LOG_NOTICE,
  764                                             "pf: %s key attach %s on %s: ",
  765                                             (idx == PF_SK_WIRE) ?
  766                                             "wire" : "stack",
  767                                             reuse ? "reuse" : "failed",
  768                                             st->kif->pfik_name);
  769                                         pf_print_state_parts(st,
  770                                             (idx == PF_SK_WIRE) ?  sk : NULL,
  771                                             (idx == PF_SK_STACK) ?  sk : NULL);
  772                                         addlog(", existing: ");
  773                                         pf_print_state_parts(sist,
  774                                             (idx == PF_SK_WIRE) ?  sk : NULL,
  775                                             (idx == PF_SK_STACK) ?  sk : NULL);
  776                                         addlog("\n");
  777                                 }
  778                                 if (reuse) {
  779                                         pf_set_protostate(sist, PF_PEER_BOTH,
  780                                             TCPS_CLOSED);
  781                                         /* remove late or sks can go away */
  782                                         oldst = sist;
  783                                 } else {
  784                                         pf_state_key_unref(sk);
  785                                         return (NULL);  /* collision! */
  786                                 }
  787                         }
  788                 }
  789 
  790                 /* reuse the existing state key */
  791                 pf_state_key_unref(sk);
  792                 sk = cur;
  793         }
  794 
  795         if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) {
  796                 if (TAILQ_EMPTY(&sk->sk_states)) {
  797                         KASSERT(cur == NULL);
  798                         RBT_REMOVE(pf_state_tree, &pf_statetbl, sk);
  799                         sk->sk_removed = 1;
  800                         pf_state_key_unref(sk);
  801                 }
  802 
  803                 return (NULL);
  804         }
  805 
  806         st->key[idx] = pf_state_key_ref(sk); /* give a ref to state */
  807         si->si_st = pf_state_ref(st);
  808 
  809         /* list is sorted, if-bound states before floating */
  810         if (st->kif == pfi_all)
  811                 TAILQ_INSERT_TAIL(&sk->sk_states, si, si_entry);
  812         else
  813                 TAILQ_INSERT_HEAD(&sk->sk_states, si, si_entry);
  814 
  815         if (oldst)
  816                 pf_remove_state(oldst);
  817 
  818         /* caller owns the pf_state ref, which owns a pf_state_key ref now */
  819         return (sk);
  820 }
  821 
  822 void
  823 pf_detach_state(struct pf_state *st)
  824 {
  825         KASSERT(st->key[PF_SK_WIRE] != NULL);
  826         pf_state_key_detach(st, PF_SK_WIRE);
  827 
  828         KASSERT(st->key[PF_SK_STACK] != NULL);
  829         if (st->key[PF_SK_STACK] != st->key[PF_SK_WIRE])
  830                 pf_state_key_detach(st, PF_SK_STACK);
  831 }
  832 
  833 void
  834 pf_state_key_detach(struct pf_state *st, int idx)
  835 {
  836         struct pf_state_item    *si;
  837         struct pf_state_key     *sk;
  838 
  839         PF_ASSERT_LOCKED();
  840 
  841         sk = st->key[idx];
  842         if (sk == NULL)
  843                 return;
  844 
  845         TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
  846                 if (si->si_st == st)
  847                         break;
  848         }
  849         if (si == NULL)
  850                 return;
  851 
  852         TAILQ_REMOVE(&sk->sk_states, si, si_entry);
  853         pool_put(&pf_state_item_pl, si);
  854 
  855         if (TAILQ_EMPTY(&sk->sk_states)) {
  856                 RBT_REMOVE(pf_state_tree, &pf_statetbl, sk);
  857                 sk->sk_removed = 1;
  858                 pf_state_key_unlink_reverse(sk);
  859                 pf_state_key_unlink_inpcb(sk);
  860                 pf_state_key_unref(sk);
  861         }
  862 
  863         pf_state_unref(st);
  864 }
  865 
  866 struct pf_state_key *
  867 pf_alloc_state_key(int pool_flags)
  868 {
  869         struct pf_state_key     *sk;
  870 
  871         if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL)
  872                 return (NULL);
  873 
  874         PF_REF_INIT(sk->sk_refcnt);
  875         TAILQ_INIT(&sk->sk_states);
  876         sk->sk_removed = 1;
  877 
  878         return (sk);
  879 }
  880 
  881 static __inline int
  882 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx,
  883     struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi)
  884 {
  885         struct pf_state_key_cmp *key = arg;
  886 #ifdef INET6
  887         struct pf_addr *target;
  888 
  889         if (af == AF_INET || pd->proto != IPPROTO_ICMPV6)
  890                 goto copy;
  891 
  892         switch (pd->hdr.icmp6.icmp6_type) {
  893         case ND_NEIGHBOR_SOLICIT:
  894                 if (multi)
  895                         return (-1);
  896                 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target;
  897                 daddr = target;
  898                 break;
  899         case ND_NEIGHBOR_ADVERT:
  900                 if (multi)
  901                         return (-1);
  902                 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target;
  903                 saddr = target;
  904                 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) {
  905                         key->addr[didx].addr32[0] = 0;
  906                         key->addr[didx].addr32[1] = 0;
  907                         key->addr[didx].addr32[2] = 0;
  908                         key->addr[didx].addr32[3] = 0;
  909                         daddr = NULL; /* overwritten */
  910                 }
  911                 break;
  912         default:
  913                 if (multi) {
  914                         key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL;
  915                         key->addr[sidx].addr32[1] = 0;
  916                         key->addr[sidx].addr32[2] = 0;
  917                         key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE;
  918                         saddr = NULL; /* overwritten */
  919                 }
  920         }
  921  copy:
  922 #endif  /* INET6 */
  923         if (saddr)
  924                 pf_addrcpy(&key->addr[sidx], saddr, af);
  925         if (daddr)
  926                 pf_addrcpy(&key->addr[didx], daddr, af);
  927 
  928         return (0);
  929 }
  930 
  931 int
  932 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw,
  933     struct pf_state_key **sks, int rtableid)
  934 {
  935         /* if returning error we MUST pool_put state keys ourselves */
  936         struct pf_state_key *sk1, *sk2;
  937         u_int wrdom = pd->rdomain;
  938         int afto = pd->af != pd->naf;
  939 
  940         if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL)
  941                 return (ENOMEM);
  942 
  943         pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst,
  944             pd->af, 0);
  945         sk1->port[pd->sidx] = pd->osport;
  946         sk1->port[pd->didx] = pd->odport;
  947         sk1->proto = pd->proto;
  948         sk1->af = pd->af;
  949         sk1->rdomain = pd->rdomain;
  950         sk1->hash = pf_pkt_hash(sk1->af, sk1->proto,
  951             &sk1->addr[0], &sk1->addr[1], sk1->port[0], sk1->port[1]);
  952         if (rtableid >= 0)
  953                 wrdom = rtable_l2(rtableid);
  954 
  955         if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) ||
  956             PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) ||
  957             pd->nsport != pd->osport || pd->ndport != pd->odport ||
  958             wrdom != pd->rdomain || afto) {     /* NAT/NAT64 */
  959                 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) {
  960                         pf_state_key_unref(sk1);
  961                         return (ENOMEM);
  962                 }
  963                 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx,
  964                     &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr,
  965                     pd->naf, 0);
  966                 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport;
  967                 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport;
  968                 if (afto) {
  969                         switch (pd->proto) {
  970                         case IPPROTO_ICMP:
  971                                 sk2->proto = IPPROTO_ICMPV6;
  972                                 break;
  973                         case IPPROTO_ICMPV6:
  974                                 sk2->proto = IPPROTO_ICMP;
  975                                 break;
  976                         default:
  977                                 sk2->proto = pd->proto;
  978                         }
  979                 } else
  980                         sk2->proto = pd->proto;
  981                 sk2->af = pd->naf;
  982                 sk2->rdomain = wrdom;
  983                 sk2->hash = pf_pkt_hash(sk2->af, sk2->proto,
  984                     &sk2->addr[0], &sk2->addr[1], sk2->port[0], sk2->port[1]);
  985         } else
  986                 sk2 = pf_state_key_ref(sk1);
  987 
  988         if (pd->dir == PF_IN) {
  989                 *skw = sk1;
  990                 *sks = sk2;
  991         } else {
  992                 *sks = sk1;
  993                 *skw = sk2;
  994         }
  995 
  996         if (pf_status.debug >= LOG_DEBUG) {
  997                 log(LOG_DEBUG, "pf: key setup: ");
  998                 pf_print_state_parts(NULL, *skw, *sks);
  999                 addlog("\n");
 1000         }
 1001 
 1002         return (0);
 1003 }
 1004 
 1005 /*
 1006  * pf_state_insert() does the following:
 1007  * - links the pf_state up with pf_state_key(s).
 1008  * - inserts the pf_state_keys into pf_state_tree.
 1009  * - inserts the pf_state into the into pf_state_tree_id.
 1010  * - tells pfsync about the state.
 1011  *
 1012  * pf_state_insert() owns the references to the pf_state_key structs
 1013  * it is given. on failure to insert, these references are released.
 1014  * on success, the caller owns a pf_state reference that allows it
 1015  * to access the state keys.
 1016  */
 1017 
 1018 int
 1019 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skwp,
 1020     struct pf_state_key **sksp, struct pf_state *st)
 1021 {
 1022         struct pf_state_key *skw = *skwp;
 1023         struct pf_state_key *sks = *sksp;
 1024         int same = (skw == sks);
 1025 
 1026         PF_ASSERT_LOCKED();
 1027 
 1028         st->kif = kif;
 1029         PF_STATE_ENTER_WRITE();
 1030 
 1031         skw = pf_state_key_attach(skw, st, PF_SK_WIRE);
 1032         if (skw == NULL) {
 1033                 pf_state_key_unref(sks);
 1034                 PF_STATE_EXIT_WRITE();
 1035                 return (-1);
 1036         }
 1037 
 1038         if (same) {
 1039                 /* pf_state_key_attach might have swapped skw */
 1040                 pf_state_key_unref(sks);
 1041                 st->key[PF_SK_STACK] = sks = pf_state_key_ref(skw);
 1042         } else if (pf_state_key_attach(sks, st, PF_SK_STACK) == NULL) {
 1043                 pf_state_key_detach(st, PF_SK_WIRE);
 1044                 PF_STATE_EXIT_WRITE();
 1045                 return (-1);
 1046         }
 1047 
 1048         if (st->id == 0 && st->creatorid == 0) {
 1049                 st->id = htobe64(pf_status.stateid++);
 1050                 st->creatorid = pf_status.hostid;
 1051         }
 1052         if (RBT_INSERT(pf_state_tree_id, &tree_id, st) != NULL) {
 1053                 if (pf_status.debug >= LOG_NOTICE) {
 1054                         log(LOG_NOTICE, "pf: state insert failed: "
 1055                             "id: %016llx creatorid: %08x",
 1056                             betoh64(st->id), ntohl(st->creatorid));
 1057                         addlog("\n");
 1058                 }
 1059                 pf_detach_state(st);
 1060                 PF_STATE_EXIT_WRITE();
 1061                 return (-1);
 1062         }
 1063         pf_state_list_insert(&pf_state_list, st);
 1064         pf_status.fcounters[FCNT_STATE_INSERT]++;
 1065         pf_status.states++;
 1066         pfi_kif_ref(kif, PFI_KIF_REF_STATE);
 1067 #if NPFSYNC > 0
 1068         pfsync_insert_state(st);
 1069 #endif  /* NPFSYNC > 0 */
 1070         PF_STATE_EXIT_WRITE();
 1071 
 1072         *skwp = skw;
 1073         *sksp = sks;
 1074 
 1075         return (0);
 1076 }
 1077 
 1078 struct pf_state *
 1079 pf_find_state_byid(struct pf_state_cmp *key)
 1080 {
 1081         pf_status.fcounters[FCNT_STATE_SEARCH]++;
 1082 
 1083         return (RBT_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
 1084 }
 1085 
 1086 int
 1087 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b,
 1088     struct pfi_kif *kif, u_int dir)
 1089 {
 1090         /* a (from hdr) and b (new) must be exact opposites of each other */
 1091         if (a->af == b->af && a->proto == b->proto &&
 1092             PF_AEQ(&a->addr[0], &b->addr[1], a->af) &&
 1093             PF_AEQ(&a->addr[1], &b->addr[0], a->af) &&
 1094             a->port[0] == b->port[1] &&
 1095             a->port[1] == b->port[0] && a->rdomain == b->rdomain)
 1096                 return (0);
 1097         else {
 1098                 /* mismatch. must not happen. */
 1099                 if (pf_status.debug >= LOG_ERR) {
 1100                         log(LOG_ERR,
 1101                             "pf: state key linking mismatch! dir=%s, "
 1102                             "if=%s, stored af=%u, a0: ",
 1103                             dir == PF_OUT ? "OUT" : "IN",
 1104                             kif->pfik_name, a->af);
 1105                         pf_print_host(&a->addr[0], a->port[0], a->af);
 1106                         addlog(", a1: ");
 1107                         pf_print_host(&a->addr[1], a->port[1], a->af);
 1108                         addlog(", proto=%u", a->proto);
 1109                         addlog(", found af=%u, a0: ", b->af);
 1110                         pf_print_host(&b->addr[0], b->port[0], b->af);
 1111                         addlog(", a1: ");
 1112                         pf_print_host(&b->addr[1], b->port[1], b->af);
 1113                         addlog(", proto=%u", b->proto);
 1114                         addlog("\n");
 1115                 }
 1116                 return (-1);
 1117         }
 1118 }
 1119 
 1120 int
 1121 pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key,
 1122     struct pf_state **stp)
 1123 {
 1124         struct pf_state_key     *sk, *pkt_sk, *inp_sk;
 1125         struct pf_state_item    *si;
 1126         struct pf_state         *st = NULL;
 1127 
 1128         pf_status.fcounters[FCNT_STATE_SEARCH]++;
 1129         if (pf_status.debug >= LOG_DEBUG) {
 1130                 log(LOG_DEBUG, "pf: key search, %s on %s: ",
 1131                     pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name);
 1132                 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL);
 1133                 addlog("\n");
 1134         }
 1135 
 1136         inp_sk = NULL;
 1137         pkt_sk = NULL;
 1138         sk = NULL;
 1139         if (pd->dir == PF_OUT) {
 1140                 /* first if block deals with outbound forwarded packet */
 1141                 pkt_sk = pd->m->m_pkthdr.pf.statekey;
 1142 
 1143                 if (!pf_state_key_isvalid(pkt_sk)) {
 1144                         pf_mbuf_unlink_state_key(pd->m);
 1145                         pkt_sk = NULL;
 1146                 }
 1147 
 1148                 if (pkt_sk && pf_state_key_isvalid(pkt_sk->sk_reverse))
 1149                         sk = pkt_sk->sk_reverse;
 1150 
 1151                 if (pkt_sk == NULL) {
 1152                         /* here we deal with local outbound packet */
 1153                         if (pd->m->m_pkthdr.pf.inp != NULL) {
 1154                                 inp_sk = pd->m->m_pkthdr.pf.inp->inp_pf_sk;
 1155                                 if (pf_state_key_isvalid(inp_sk))
 1156                                         sk = inp_sk;
 1157                                 else
 1158                                         pf_inpcb_unlink_state_key(
 1159                                             pd->m->m_pkthdr.pf.inp);
 1160                         }
 1161                 }
 1162         }
 1163 
 1164         if (sk == NULL) {
 1165                 if ((sk = RBT_FIND(pf_state_tree, &pf_statetbl,
 1166                     (struct pf_state_key *)key)) == NULL)
 1167                         return (PF_DROP);
 1168                 if (pd->dir == PF_OUT && pkt_sk &&
 1169                     pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0)
 1170                         pf_state_key_link_reverse(sk, pkt_sk);
 1171                 else if (pd->dir == PF_OUT && pd->m->m_pkthdr.pf.inp &&
 1172                     !pd->m->m_pkthdr.pf.inp->inp_pf_sk && !sk->sk_inp)
 1173                         pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp);
 1174         }
 1175 
 1176         /* remove firewall data from outbound packet */
 1177         if (pd->dir == PF_OUT)
 1178                 pf_pkt_addr_changed(pd->m);
 1179 
 1180         /* list is sorted, if-bound states before floating ones */
 1181         TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
 1182                 struct pf_state *sist = si->si_st;
 1183                 if (sist->timeout != PFTM_PURGE &&
 1184                     (sist->kif == pfi_all || sist->kif == pd->kif) &&
 1185                     ((sist->key[PF_SK_WIRE]->af == sist->key[PF_SK_STACK]->af &&
 1186                       sk == (pd->dir == PF_IN ? sist->key[PF_SK_WIRE] :
 1187                     sist->key[PF_SK_STACK])) ||
 1188                     (sist->key[PF_SK_WIRE]->af != sist->key[PF_SK_STACK]->af
 1189                     && pd->dir == PF_IN && (sk == sist->key[PF_SK_STACK] ||
 1190                     sk == sist->key[PF_SK_WIRE])))) {
 1191                         st = sist;
 1192                         break;
 1193                 }
 1194         }
 1195 
 1196         if (st == NULL)
 1197                 return (PF_DROP);
 1198         if (ISSET(st->state_flags, PFSTATE_INP_UNLINKED))
 1199                 return (PF_DROP);
 1200 
 1201         if (st->rule.ptr->pktrate.limit && pd->dir == st->direction) {
 1202                 pf_add_threshold(&st->rule.ptr->pktrate);
 1203                 if (pf_check_threshold(&st->rule.ptr->pktrate))
 1204                         return (PF_DROP);
 1205         }
 1206 
 1207         *stp = st;
 1208 
 1209         return (PF_MATCH);
 1210 }
 1211 
 1212 struct pf_state *
 1213 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
 1214 {
 1215         struct pf_state_key     *sk;
 1216         struct pf_state_item    *si, *ret = NULL;
 1217 
 1218         pf_status.fcounters[FCNT_STATE_SEARCH]++;
 1219 
 1220         sk = RBT_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key);
 1221 
 1222         if (sk != NULL) {
 1223                 TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
 1224                         struct pf_state *sist = si->si_st;
 1225                         if (dir == PF_INOUT ||
 1226                             (sk == (dir == PF_IN ? sist->key[PF_SK_WIRE] :
 1227                             sist->key[PF_SK_STACK]))) {
 1228                                 if (more == NULL)
 1229                                         return (sist);
 1230 
 1231                                 if (ret)
 1232                                         (*more)++;
 1233                                 else
 1234                                         ret = si;
 1235                         }
 1236                 }
 1237         }
 1238         return (ret ? ret->si_st : NULL);
 1239 }
 1240 
 1241 void
 1242 pf_state_peer_hton(const struct pf_state_peer *s, struct pfsync_state_peer *d)
 1243 {
 1244         d->seqlo = htonl(s->seqlo);
 1245         d->seqhi = htonl(s->seqhi);
 1246         d->seqdiff = htonl(s->seqdiff);
 1247         d->max_win = htons(s->max_win);
 1248         d->mss = htons(s->mss);
 1249         d->state = s->state;
 1250         d->wscale = s->wscale;
 1251         if (s->scrub) {
 1252                 d->scrub.pfss_flags =
 1253                     htons(s->scrub->pfss_flags & PFSS_TIMESTAMP);
 1254                 d->scrub.pfss_ttl = (s)->scrub->pfss_ttl;
 1255                 d->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);
 1256                 d->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID;
 1257         }
 1258 }
 1259 
 1260 void
 1261 pf_state_peer_ntoh(const struct pfsync_state_peer *s, struct pf_state_peer *d)
 1262 {
 1263         d->seqlo = ntohl(s->seqlo);
 1264         d->seqhi = ntohl(s->seqhi);
 1265         d->seqdiff = ntohl(s->seqdiff);
 1266         d->max_win = ntohs(s->max_win);
 1267         d->mss = ntohs(s->mss);
 1268         d->state = s->state;
 1269         d->wscale = s->wscale;
 1270         if (s->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID &&
 1271             d->scrub != NULL) {
 1272                 d->scrub->pfss_flags =
 1273                     ntohs(s->scrub.pfss_flags) & PFSS_TIMESTAMP;
 1274                 d->scrub->pfss_ttl = s->scrub.pfss_ttl;
 1275                 d->scrub->pfss_ts_mod = ntohl(s->scrub.pfss_ts_mod);
 1276         }
 1277 }
 1278 
 1279 void
 1280 pf_state_export(struct pfsync_state *sp, struct pf_state *st)
 1281 {
 1282         int32_t expire;
 1283 
 1284         memset(sp, 0, sizeof(struct pfsync_state));
 1285 
 1286         /* copy from state key */
 1287         sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
 1288         sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
 1289         sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
 1290         sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
 1291         sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain);
 1292         sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af;
 1293         sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
 1294         sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
 1295         sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
 1296         sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
 1297         sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain);
 1298         sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af;
 1299         sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]);
 1300         sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]);
 1301         sp->proto = st->key[PF_SK_WIRE]->proto;
 1302         sp->af = st->key[PF_SK_WIRE]->af;
 1303 
 1304         /* copy from state */
 1305         strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
 1306         sp->rt = st->rt;
 1307         sp->rt_addr = st->rt_addr;
 1308         sp->creation = htonl(getuptime() - st->creation);
 1309         expire = pf_state_expires(st, st->timeout);
 1310         if (expire <= getuptime())
 1311                 sp->expire = htonl(0);
 1312         else
 1313                 sp->expire = htonl(expire - getuptime());
 1314 
 1315         sp->direction = st->direction;
 1316 #if NPFLOG > 0
 1317         sp->log = st->log;
 1318 #endif  /* NPFLOG > 0 */
 1319         sp->timeout = st->timeout;
 1320         sp->state_flags = htons(st->state_flags);
 1321         if (!SLIST_EMPTY(&st->src_nodes))
 1322                 sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
 1323 
 1324         sp->id = st->id;
 1325         sp->creatorid = st->creatorid;
 1326         pf_state_peer_hton(&st->src, &sp->src);
 1327         pf_state_peer_hton(&st->dst, &sp->dst);
 1328 
 1329         if (st->rule.ptr == NULL)
 1330                 sp->rule = htonl(-1);
 1331         else
 1332                 sp->rule = htonl(st->rule.ptr->nr);
 1333         if (st->anchor.ptr == NULL)
 1334                 sp->anchor = htonl(-1);
 1335         else
 1336                 sp->anchor = htonl(st->anchor.ptr->nr);
 1337         sp->nat_rule = htonl(-1);       /* left for compat, nat_rule is gone */
 1338 
 1339         pf_state_counter_hton(st->packets[0], sp->packets[0]);
 1340         pf_state_counter_hton(st->packets[1], sp->packets[1]);
 1341         pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
 1342         pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
 1343 
 1344         sp->max_mss = htons(st->max_mss);
 1345         sp->min_ttl = st->min_ttl;
 1346         sp->set_tos = st->set_tos;
 1347         sp->set_prio[0] = st->set_prio[0];
 1348         sp->set_prio[1] = st->set_prio[1];
 1349 }
 1350 
 1351 int
 1352 pf_state_alloc_scrub_memory(const struct pfsync_state_peer *s,
 1353     struct pf_state_peer *d)
 1354 {
 1355         if (s->scrub.scrub_flag && d->scrub == NULL)
 1356                 return (pf_normalize_tcp_alloc(d));
 1357 
 1358         return (0);
 1359 }
 1360 
 1361 #if NPFSYNC > 0
 1362 int
 1363 pf_state_import(const struct pfsync_state *sp, int flags)
 1364 {
 1365         struct pf_state *st = NULL;
 1366         struct pf_state_key *skw = NULL, *sks = NULL;
 1367         struct pf_rule *r = NULL;
 1368         struct pfi_kif  *kif;
 1369         int pool_flags;
 1370         int error = ENOMEM;
 1371         int n = 0;
 1372 
 1373         if (sp->creatorid == 0) {
 1374                 DPFPRINTF(LOG_NOTICE, "%s: invalid creator id: %08x", __func__,
 1375                     ntohl(sp->creatorid));
 1376                 return (EINVAL);
 1377         }
 1378 
 1379         if ((kif = pfi_kif_get(sp->ifname, NULL)) == NULL) {
 1380                 DPFPRINTF(LOG_NOTICE, "%s: unknown interface: %s", __func__,
 1381                     sp->ifname);
 1382                 if (flags & PFSYNC_SI_IOCTL)
 1383                         return (EINVAL);
 1384                 return (0);     /* skip this state */
 1385         }
 1386 
 1387         if (sp->af == 0)
 1388                 return (0);     /* skip this state */
 1389 
 1390         /*
 1391          * If the ruleset checksums match or the state is coming from the ioctl,
 1392          * it's safe to associate the state with the rule of that number.
 1393          */
 1394         if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
 1395             (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) &&
 1396             ntohl(sp->rule) < pf_main_ruleset.rules.active.rcount) {
 1397                 TAILQ_FOREACH(r, pf_main_ruleset.rules.active.ptr, entries)
 1398                         if (ntohl(sp->rule) == n++)
 1399                                 break;
 1400         } else
 1401                 r = &pf_default_rule;
 1402 
 1403         if ((r->max_states && r->states_cur >= r->max_states))
 1404                 goto cleanup;
 1405 
 1406         if (flags & PFSYNC_SI_IOCTL)
 1407                 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
 1408         else
 1409                 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO;
 1410 
 1411         if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
 1412                 goto cleanup;
 1413 
 1414         if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
 1415                 goto cleanup;
 1416 
 1417         if ((sp->key[PF_SK_WIRE].af &&
 1418             (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) ||
 1419             PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
 1420             &sp->key[PF_SK_STACK].addr[0], sp->af) ||
 1421             PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
 1422             &sp->key[PF_SK_STACK].addr[1], sp->af) ||
 1423             sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
 1424             sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] ||
 1425             sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) {
 1426                 if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
 1427                         goto cleanup;
 1428         } else
 1429                 sks = pf_state_key_ref(skw);
 1430 
 1431         /* allocate memory for scrub info */
 1432         if (pf_state_alloc_scrub_memory(&sp->src, &st->src) ||
 1433             pf_state_alloc_scrub_memory(&sp->dst, &st->dst))
 1434                 goto cleanup;
 1435 
 1436         /* copy to state key(s) */
 1437         skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
 1438         skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
 1439         skw->port[0] = sp->key[PF_SK_WIRE].port[0];
 1440         skw->port[1] = sp->key[PF_SK_WIRE].port[1];
 1441         skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
 1442         skw->proto = sp->proto;
 1443         if (!(skw->af = sp->key[PF_SK_WIRE].af))
 1444                 skw->af = sp->af;
 1445         skw->hash = pf_pkt_hash(skw->af, skw->proto,
 1446             &skw->addr[0], &skw->addr[1], skw->port[0], skw->port[1]);
 1447 
 1448         if (sks != skw) {
 1449                 sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
 1450                 sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
 1451                 sks->port[0] = sp->key[PF_SK_STACK].port[0];
 1452                 sks->port[1] = sp->key[PF_SK_STACK].port[1];
 1453                 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
 1454                 if (!(sks->af = sp->key[PF_SK_STACK].af))
 1455                         sks->af = sp->af;
 1456                 if (sks->af != skw->af) {
 1457                         switch (sp->proto) {
 1458                         case IPPROTO_ICMP:
 1459                                 sks->proto = IPPROTO_ICMPV6;
 1460                                 break;
 1461                         case IPPROTO_ICMPV6:
 1462                                 sks->proto = IPPROTO_ICMP;
 1463                                 break;
 1464                         default:
 1465                                 sks->proto = sp->proto;
 1466                         }
 1467                 } else
 1468                         sks->proto = sp->proto;
 1469 
 1470                 if (((sks->af != AF_INET) && (sks->af != AF_INET6)) ||
 1471                     ((skw->af != AF_INET) && (skw->af != AF_INET6))) {
 1472                         error = EINVAL;
 1473                         goto cleanup;
 1474                 }
 1475 
 1476                 sks->hash = pf_pkt_hash(sks->af, sks->proto,
 1477                     &sks->addr[0], &sks->addr[1], sks->port[0], sks->port[1]);
 1478 
 1479         } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) {
 1480                 error = EINVAL;
 1481                 goto cleanup;
 1482         }
 1483         st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]);
 1484         st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
 1485 
 1486         /* copy to state */
 1487         st->rt_addr = sp->rt_addr;
 1488         st->rt = sp->rt;
 1489         st->creation = getuptime() - ntohl(sp->creation);
 1490         st->expire = getuptime();
 1491         if (ntohl(sp->expire)) {
 1492                 u_int32_t timeout;
 1493 
 1494                 timeout = r->timeout[sp->timeout];
 1495                 if (!timeout)
 1496                         timeout = pf_default_rule.timeout[sp->timeout];
 1497 
 1498                 /* sp->expire may have been adaptively scaled by export. */
 1499                 st->expire -= timeout - ntohl(sp->expire);
 1500         }
 1501 
 1502         st->direction = sp->direction;
 1503         st->log = sp->log;
 1504         st->timeout = sp->timeout;
 1505         st->state_flags = ntohs(sp->state_flags);
 1506         st->max_mss = ntohs(sp->max_mss);
 1507         st->min_ttl = sp->min_ttl;
 1508         st->set_tos = sp->set_tos;
 1509         st->set_prio[0] = sp->set_prio[0];
 1510         st->set_prio[1] = sp->set_prio[1];
 1511 
 1512         st->id = sp->id;
 1513         st->creatorid = sp->creatorid;
 1514         pf_state_peer_ntoh(&sp->src, &st->src);
 1515         pf_state_peer_ntoh(&sp->dst, &st->dst);
 1516 
 1517         st->rule.ptr = r;
 1518         st->anchor.ptr = NULL;
 1519 
 1520         st->pfsync_time = getuptime();
 1521         st->sync_state = PFSYNC_S_NONE;
 1522 
 1523         PF_REF_INIT(st->refcnt);
 1524         mtx_init(&st->mtx, IPL_NET);
 1525 
 1526         /* XXX when we have anchors, use STATE_INC_COUNTERS */
 1527         r->states_cur++;
 1528         r->states_tot++;
 1529 
 1530 #if NPFSYNC > 0
 1531         if (!ISSET(flags, PFSYNC_SI_IOCTL))
 1532                 SET(st->state_flags, PFSTATE_NOSYNC);
 1533 #endif
 1534 
 1535         /*
 1536          * We just set PFSTATE_NOSYNC bit, which prevents
 1537          * pfsync_insert_state() to insert state to pfsync.
 1538          */
 1539         if (pf_state_insert(kif, &skw, &sks, st) != 0) {
 1540                 /* XXX when we have anchors, use STATE_DEC_COUNTERS */
 1541                 r->states_cur--;
 1542                 error = EEXIST;
 1543                 goto cleanup_state;
 1544         }
 1545 
 1546 #if NPFSYNC > 0
 1547         if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
 1548                 CLR(st->state_flags, PFSTATE_NOSYNC);
 1549                 if (ISSET(st->state_flags, PFSTATE_ACK))
 1550                         pfsync_iack(st);
 1551         }
 1552         CLR(st->state_flags, PFSTATE_ACK);
 1553 #endif
 1554 
 1555         return (0);
 1556 
 1557  cleanup:
 1558         if (skw != NULL)
 1559                 pf_state_key_unref(skw);
 1560         if (sks != NULL)
 1561                 pf_state_key_unref(sks);
 1562 
 1563  cleanup_state: /* pf_state_insert frees the state keys */
 1564         if (st) {
 1565                 if (st->dst.scrub)
 1566                         pool_put(&pf_state_scrub_pl, st->dst.scrub);
 1567                 if (st->src.scrub)
 1568                         pool_put(&pf_state_scrub_pl, st->src.scrub);
 1569                 pool_put(&pf_state_pl, st);
 1570         }
 1571         return (error);
 1572 }
 1573 #endif /* NPFSYNC > 0 */
 1574 
 1575 /* END state table stuff */
 1576 
 1577 void
 1578 pf_purge_timeout(void *unused)
 1579 {
 1580         /* XXX move to systqmp to avoid KERNEL_LOCK */
 1581         task_add(systq, &pf_purge_task);
 1582 }
 1583 
 1584 void
 1585 pf_purge(void *xnloops)
 1586 {
 1587         int *nloops = xnloops;
 1588 
 1589         /*
 1590          * process a fraction of the state table every second
 1591          * Note:
 1592          *     we no longer need PF_LOCK() here, because
 1593          *     pf_purge_expired_states() uses pf_state_lock to maintain
 1594          *     consistency.
 1595          */
 1596         if (pf_default_rule.timeout[PFTM_INTERVAL] > 0)
 1597                 pf_purge_expired_states(1 + (pf_status.states
 1598                     / pf_default_rule.timeout[PFTM_INTERVAL]));
 1599 
 1600         NET_LOCK();
 1601 
 1602         PF_LOCK();
 1603         /* purge other expired types every PFTM_INTERVAL seconds */
 1604         if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL])
 1605                 pf_purge_expired_src_nodes();
 1606         PF_UNLOCK();
 1607 
 1608         /*
 1609          * Fragments don't require PF_LOCK(), they use their own lock.
 1610          */
 1611         if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) {
 1612                 pf_purge_expired_fragments();
 1613                 *nloops = 0;
 1614         }
 1615         NET_UNLOCK();
 1616 
 1617         timeout_add_sec(&pf_purge_to, 1);
 1618 }
 1619 
 1620 int32_t
 1621 pf_state_expires(const struct pf_state *st, uint8_t stimeout)
 1622 {
 1623         u_int32_t       timeout;
 1624         u_int32_t       start;
 1625         u_int32_t       end;
 1626         u_int32_t       states;
 1627 
 1628         /*
 1629          * pf_state_expires is used by the state purge task to
 1630          * decide if a state is a candidate for cleanup, and by the
 1631          * pfsync state export code to populate an expiry time.
 1632          *
 1633          * this function may be called by the state purge task while
 1634          * the state is being modified. avoid inconsistent reads of
 1635          * state->timeout by having the caller do the read (and any
 1636          * checks it needs to do on the same variable) and then pass
 1637          * their view of the timeout in here for this function to use.
 1638          * the only consequence of using a stale timeout value is
 1639          * that the state won't be a candidate for purging until the
 1640          * next pass of the purge task.
 1641          */
 1642 
 1643         /* handle all PFTM_* > PFTM_MAX here */
 1644         if (stimeout > PFTM_MAX)
 1645                 return (0);
 1646 
 1647         KASSERT(stimeout < PFTM_MAX);
 1648 
 1649         timeout = st->rule.ptr->timeout[stimeout];
 1650         if (!timeout)
 1651                 timeout = pf_default_rule.timeout[stimeout];
 1652 
 1653         start = st->rule.ptr->timeout[PFTM_ADAPTIVE_START];
 1654         if (start) {
 1655                 end = st->rule.ptr->timeout[PFTM_ADAPTIVE_END];
 1656                 states = st->rule.ptr->states_cur;
 1657         } else {
 1658                 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
 1659                 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
 1660                 states = pf_status.states;
 1661         }
 1662         if (end && states > start && start < end) {
 1663                 if (states >= end)
 1664                         return (0);
 1665 
 1666                 timeout = (u_int64_t)timeout * (end - states) / (end - start);
 1667         }
 1668 
 1669         return (st->expire + timeout);
 1670 }
 1671 
 1672 void
 1673 pf_purge_expired_src_nodes(void)
 1674 {
 1675         struct pf_src_node              *cur, *next;
 1676 
 1677         PF_ASSERT_LOCKED();
 1678 
 1679         for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
 1680                 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
 1681 
 1682                 if (cur->states == 0 && cur->expire <= getuptime()) {
 1683                         next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
 1684                         pf_remove_src_node(cur);
 1685                 }
 1686         }
 1687 }
 1688 
 1689 void
 1690 pf_src_tree_remove_state(struct pf_state *st)
 1691 {
 1692         u_int32_t                timeout;
 1693         struct pf_sn_item       *sni;
 1694 
 1695         while ((sni = SLIST_FIRST(&st->src_nodes)) != NULL) {
 1696                 SLIST_REMOVE_HEAD(&st->src_nodes, next);
 1697                 if (st->src.tcp_est)
 1698                         --sni->sn->conn;
 1699                 if (--sni->sn->states == 0) {
 1700                         timeout = st->rule.ptr->timeout[PFTM_SRC_NODE];
 1701                         if (!timeout)
 1702                                 timeout =
 1703                                     pf_default_rule.timeout[PFTM_SRC_NODE];
 1704                         sni->sn->expire = getuptime() + timeout;
 1705                 }
 1706                 pool_put(&pf_sn_item_pl, sni);
 1707         }
 1708 }
 1709 
 1710 void
 1711 pf_remove_state(struct pf_state *st)
 1712 {
 1713         PF_ASSERT_LOCKED();
 1714 
 1715         /* handle load balancing related tasks */
 1716         pf_postprocess_addr(st);
 1717 
 1718         if (st->src.state == PF_TCPS_PROXY_DST) {
 1719                 pf_send_tcp(st->rule.ptr, st->key[PF_SK_WIRE]->af,
 1720                     &st->key[PF_SK_WIRE]->addr[1],
 1721                     &st->key[PF_SK_WIRE]->addr[0],
 1722                     st->key[PF_SK_WIRE]->port[1],
 1723                     st->key[PF_SK_WIRE]->port[0],
 1724                     st->src.seqhi, st->src.seqlo + 1,
 1725                     TH_RST|TH_ACK, 0, 0, 0, 1, st->tag,
 1726                     st->key[PF_SK_WIRE]->rdomain);
 1727         }
 1728         if (st->key[PF_SK_STACK]->proto == IPPROTO_TCP)
 1729                 pf_set_protostate(st, PF_PEER_BOTH, TCPS_CLOSED);
 1730 
 1731         RBT_REMOVE(pf_state_tree_id, &tree_id, st);
 1732 #if NPFLOW > 0
 1733         if (st->state_flags & PFSTATE_PFLOW)
 1734                 export_pflow(st);
 1735 #endif  /* NPFLOW > 0 */
 1736 #if NPFSYNC > 0
 1737         pfsync_delete_state(st);
 1738 #endif  /* NPFSYNC > 0 */
 1739         st->timeout = PFTM_UNLINKED;
 1740         pf_src_tree_remove_state(st);
 1741         pf_detach_state(st);
 1742 }
 1743 
 1744 void
 1745 pf_remove_divert_state(struct pf_state_key *sk)
 1746 {
 1747         struct pf_state_item    *si;
 1748 
 1749         PF_ASSERT_UNLOCKED();
 1750 
 1751         PF_LOCK();
 1752         PF_STATE_ENTER_WRITE();
 1753         TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
 1754                 struct pf_state *sist = si->si_st;
 1755                 if (sk == sist->key[PF_SK_STACK] && sist->rule.ptr &&
 1756                     (sist->rule.ptr->divert.type == PF_DIVERT_TO ||
 1757                      sist->rule.ptr->divert.type == PF_DIVERT_REPLY)) {
 1758                         if (sist->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
 1759                             sist->key[PF_SK_WIRE] != sist->key[PF_SK_STACK]) {
 1760                                 /*
 1761                                  * If the local address is translated, keep
 1762                                  * the state for "tcp.closed" seconds to
 1763                                  * prevent its source port from being reused.
 1764                                  */
 1765                                 if (sist->src.state < TCPS_FIN_WAIT_2 ||
 1766                                     sist->dst.state < TCPS_FIN_WAIT_2) {
 1767                                         pf_set_protostate(sist, PF_PEER_BOTH,
 1768                                             TCPS_TIME_WAIT);
 1769                                         sist->timeout = PFTM_TCP_CLOSED;
 1770                                         sist->expire = getuptime();
 1771                                 }
 1772                                 sist->state_flags |= PFSTATE_INP_UNLINKED;
 1773                         } else
 1774                                 pf_remove_state(sist);
 1775                         break;
 1776                 }
 1777         }
 1778         PF_STATE_EXIT_WRITE();
 1779         PF_UNLOCK();
 1780 }
 1781 
 1782 void
 1783 pf_free_state(struct pf_state *st)
 1784 {
 1785         struct pf_rule_item *ri;
 1786 
 1787         PF_ASSERT_LOCKED();
 1788 
 1789 #if NPFSYNC > 0
 1790         if (pfsync_state_in_use(st))
 1791                 return;
 1792 #endif  /* NPFSYNC > 0 */
 1793         KASSERT(st->timeout == PFTM_UNLINKED);
 1794         if (--st->rule.ptr->states_cur == 0 &&
 1795             st->rule.ptr->src_nodes == 0)
 1796                 pf_rm_rule(NULL, st->rule.ptr);
 1797         if (st->anchor.ptr != NULL)
 1798                 if (--st->anchor.ptr->states_cur == 0)
 1799                         pf_rm_rule(NULL, st->anchor.ptr);
 1800         while ((ri = SLIST_FIRST(&st->match_rules))) {
 1801                 SLIST_REMOVE_HEAD(&st->match_rules, entry);
 1802                 if (--ri->r->states_cur == 0 &&
 1803                     ri->r->src_nodes == 0)
 1804                         pf_rm_rule(NULL, ri->r);
 1805                 pool_put(&pf_rule_item_pl, ri);
 1806         }
 1807         pf_normalize_tcp_cleanup(st);
 1808         pfi_kif_unref(st->kif, PFI_KIF_REF_STATE);
 1809         pf_state_list_remove(&pf_state_list, st);
 1810         if (st->tag)
 1811                 pf_tag_unref(st->tag);
 1812         pf_state_unref(st);
 1813         pf_status.fcounters[FCNT_STATE_REMOVALS]++;
 1814         pf_status.states--;
 1815 }
 1816 
 1817 void
 1818 pf_purge_expired_states(u_int32_t maxcheck)
 1819 {
 1820         /*
 1821          * this task/thread/context/whatever is the only thing that
 1822          * removes states from the pf_state_list, so the cur reference
 1823          * it holds between calls is guaranteed to still be in the
 1824          * list.
 1825          */
 1826         static struct pf_state  *cur = NULL;
 1827 
 1828         struct pf_state         *head, *tail;
 1829         struct pf_state         *st;
 1830         SLIST_HEAD(pf_state_gcl, pf_state) gcl = SLIST_HEAD_INITIALIZER(gcl);
 1831         time_t                   now;
 1832 
 1833         PF_ASSERT_UNLOCKED();
 1834 
 1835         rw_enter_read(&pf_state_list.pfs_rwl);
 1836 
 1837         mtx_enter(&pf_state_list.pfs_mtx);
 1838         head = TAILQ_FIRST(&pf_state_list.pfs_list);
 1839         tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
 1840         mtx_leave(&pf_state_list.pfs_mtx);
 1841 
 1842         if (head == NULL) {
 1843                 /* the list is empty */
 1844                 rw_exit_read(&pf_state_list.pfs_rwl);
 1845                 return;
 1846         }
 1847 
 1848         /* (re)start at the front of the list */
 1849         if (cur == NULL)
 1850                 cur = head;
 1851 
 1852         now = getuptime();
 1853 
 1854         do {
 1855                 uint8_t stimeout = cur->timeout;
 1856 
 1857                 if ((stimeout == PFTM_UNLINKED) ||
 1858                     (pf_state_expires(cur, stimeout) <= now)) {
 1859                         st = pf_state_ref(cur);
 1860                         SLIST_INSERT_HEAD(&gcl, st, gc_list);
 1861                 }
 1862 
 1863                 /* don't iterate past the end of our view of the list */
 1864                 if (cur == tail) {
 1865                         cur = NULL;
 1866                         break;
 1867                 }
 1868 
 1869                 cur = TAILQ_NEXT(cur, entry_list);
 1870         } while (maxcheck--);
 1871 
 1872         rw_exit_read(&pf_state_list.pfs_rwl);
 1873 
 1874         if (SLIST_EMPTY(&gcl))
 1875                 return;
 1876 
 1877         NET_LOCK();
 1878         rw_enter_write(&pf_state_list.pfs_rwl);
 1879         PF_LOCK();
 1880         PF_STATE_ENTER_WRITE();
 1881         SLIST_FOREACH(st, &gcl, gc_list) {
 1882                 if (st->timeout != PFTM_UNLINKED)
 1883                         pf_remove_state(st);
 1884 
 1885                 pf_free_state(st);
 1886         }
 1887         PF_STATE_EXIT_WRITE();
 1888         PF_UNLOCK();
 1889         rw_exit_write(&pf_state_list.pfs_rwl);
 1890         NET_UNLOCK();
 1891 
 1892         while ((st = SLIST_FIRST(&gcl)) != NULL) {
 1893                 SLIST_REMOVE_HEAD(&gcl, gc_list);
 1894                 pf_state_unref(st);
 1895         }
 1896 }
 1897 
 1898 int
 1899 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw, int wait)
 1900 {
 1901         if (aw->type != PF_ADDR_TABLE)
 1902                 return (0);
 1903         if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, wait)) == NULL)
 1904                 return (1);
 1905         return (0);
 1906 }
 1907 
 1908 void
 1909 pf_tbladdr_remove(struct pf_addr_wrap *aw)
 1910 {
 1911         if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
 1912                 return;
 1913         pfr_detach_table(aw->p.tbl);
 1914         aw->p.tbl = NULL;
 1915 }
 1916 
 1917 void
 1918 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
 1919 {
 1920         struct pfr_ktable *kt = aw->p.tbl;
 1921 
 1922         if (aw->type != PF_ADDR_TABLE || kt == NULL)
 1923                 return;
 1924         if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
 1925                 kt = kt->pfrkt_root;
 1926         aw->p.tbl = NULL;
 1927         aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
 1928                 kt->pfrkt_cnt : -1;
 1929 }
 1930 
 1931 void
 1932 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
 1933 {
 1934         switch (af) {
 1935         case AF_INET: {
 1936                 u_int32_t a = ntohl(addr->addr32[0]);
 1937                 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
 1938                     (a>>8)&255, a&255);
 1939                 if (p) {
 1940                         p = ntohs(p);
 1941                         addlog(":%u", p);
 1942                 }
 1943                 break;
 1944         }
 1945 #ifdef INET6
 1946         case AF_INET6: {
 1947                 u_int16_t b;
 1948                 u_int8_t i, curstart, curend, maxstart, maxend;
 1949                 curstart = curend = maxstart = maxend = 255;
 1950                 for (i = 0; i < 8; i++) {
 1951                         if (!addr->addr16[i]) {
 1952                                 if (curstart == 255)
 1953                                         curstart = i;
 1954                                 curend = i;
 1955                         } else {
 1956                                 if ((curend - curstart) >
 1957                                     (maxend - maxstart)) {
 1958                                         maxstart = curstart;
 1959                                         maxend = curend;
 1960                                 }
 1961                                 curstart = curend = 255;
 1962                         }
 1963                 }
 1964                 if ((curend - curstart) >
 1965                     (maxend - maxstart)) {
 1966                         maxstart = curstart;
 1967                         maxend = curend;
 1968                 }
 1969                 for (i = 0; i < 8; i++) {
 1970                         if (i >= maxstart && i <= maxend) {
 1971                                 if (i == 0)
 1972                                         addlog(":");
 1973                                 if (i == maxend)
 1974                                         addlog(":");
 1975                         } else {
 1976                                 b = ntohs(addr->addr16[i]);
 1977                                 addlog("%x", b);
 1978                                 if (i < 7)
 1979                                         addlog(":");
 1980                         }
 1981                 }
 1982                 if (p) {
 1983                         p = ntohs(p);
 1984                         addlog("[%u]", p);
 1985                 }
 1986                 break;
 1987         }
 1988 #endif /* INET6 */
 1989         }
 1990 }
 1991 
 1992 void
 1993 pf_print_state(struct pf_state *st)
 1994 {
 1995         pf_print_state_parts(st, NULL, NULL);
 1996 }
 1997 
 1998 void
 1999 pf_print_state_parts(struct pf_state *st,
 2000     struct pf_state_key *skwp, struct pf_state_key *sksp)
 2001 {
 2002         struct pf_state_key *skw, *sks;
 2003         u_int8_t proto, dir;
 2004 
 2005         /* Do our best to fill these, but they're skipped if NULL */
 2006         skw = skwp ? skwp : (st ? st->key[PF_SK_WIRE] : NULL);
 2007         sks = sksp ? sksp : (st ? st->key[PF_SK_STACK] : NULL);
 2008         proto = skw ? skw->proto : (sks ? sks->proto : 0);
 2009         dir = st ? st->direction : 0;
 2010 
 2011         switch (proto) {
 2012         case IPPROTO_IPV4:
 2013                 addlog("IPv4");
 2014                 break;
 2015         case IPPROTO_IPV6:
 2016                 addlog("IPv6");
 2017                 break;
 2018         case IPPROTO_TCP:
 2019                 addlog("TCP");
 2020                 break;
 2021         case IPPROTO_UDP:
 2022                 addlog("UDP");
 2023                 break;
 2024         case IPPROTO_ICMP:
 2025                 addlog("ICMP");
 2026                 break;
 2027         case IPPROTO_ICMPV6:
 2028                 addlog("ICMPv6");
 2029                 break;
 2030         default:
 2031                 addlog("%u", proto);
 2032                 break;
 2033         }
 2034         switch (dir) {
 2035         case PF_IN:
 2036                 addlog(" in");
 2037                 break;
 2038         case PF_OUT:
 2039                 addlog(" out");
 2040                 break;
 2041         }
 2042         if (skw) {
 2043                 addlog(" wire: (%d) ", skw->rdomain);
 2044                 pf_print_host(&skw->addr[0], skw->port[0], skw->af);
 2045                 addlog(" ");
 2046                 pf_print_host(&skw->addr[1], skw->port[1], skw->af);
 2047         }
 2048         if (sks) {
 2049                 addlog(" stack: (%d) ", sks->rdomain);
 2050                 if (sks != skw) {
 2051                         pf_print_host(&sks->addr[0], sks->port[0], sks->af);
 2052                         addlog(" ");
 2053                         pf_print_host(&sks->addr[1], sks->port[1], sks->af);
 2054                 } else
 2055                         addlog("-");
 2056         }
 2057         if (st) {
 2058                 if (proto == IPPROTO_TCP) {
 2059                         addlog(" [lo=%u high=%u win=%u modulator=%u",
 2060                             st->src.seqlo, st->src.seqhi,
 2061                             st->src.max_win, st->src.seqdiff);
 2062                         if (st->src.wscale && st->dst.wscale)
 2063                                 addlog(" wscale=%u",
 2064                                     st->src.wscale & PF_WSCALE_MASK);
 2065                         addlog("]");
 2066                         addlog(" [lo=%u high=%u win=%u modulator=%u",
 2067                             st->dst.seqlo, st->dst.seqhi,
 2068                             st->dst.max_win, st->dst.seqdiff);
 2069                         if (st->src.wscale && st->dst.wscale)
 2070                                 addlog(" wscale=%u",
 2071                                 st->dst.wscale & PF_WSCALE_MASK);
 2072                         addlog("]");
 2073                 }
 2074                 addlog(" %u:%u", st->src.state, st->dst.state);
 2075                 if (st->rule.ptr)
 2076                         addlog(" @%d", st->rule.ptr->nr);
 2077         }
 2078 }
 2079 
 2080 void
 2081 pf_print_flags(u_int8_t f)
 2082 {
 2083         if (f)
 2084                 addlog(" ");
 2085         if (f & TH_FIN)
 2086                 addlog("F");
 2087         if (f & TH_SYN)
 2088                 addlog("S");
 2089         if (f & TH_RST)
 2090                 addlog("R");
 2091         if (f & TH_PUSH)
 2092                 addlog("P");
 2093         if (f & TH_ACK)
 2094                 addlog("A");
 2095         if (f & TH_URG)
 2096                 addlog("U");
 2097         if (f & TH_ECE)
 2098                 addlog("E");
 2099         if (f & TH_CWR)
 2100                 addlog("W");
 2101 }
 2102 
 2103 #define PF_SET_SKIP_STEPS(i)                                    \
 2104         do {                                                    \
 2105                 while (head[i] != cur) {                        \
 2106                         head[i]->skip[i].ptr = cur;             \
 2107                         head[i] = TAILQ_NEXT(head[i], entries); \
 2108                 }                                               \
 2109         } while (0)
 2110 
 2111 void
 2112 pf_calc_skip_steps(struct pf_rulequeue *rules)
 2113 {
 2114         struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
 2115         int i;
 2116 
 2117         cur = TAILQ_FIRST(rules);
 2118         prev = cur;
 2119         for (i = 0; i < PF_SKIP_COUNT; ++i)
 2120                 head[i] = cur;
 2121         while (cur != NULL) {
 2122                 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
 2123                         PF_SET_SKIP_STEPS(PF_SKIP_IFP);
 2124                 if (cur->direction != prev->direction)
 2125                         PF_SET_SKIP_STEPS(PF_SKIP_DIR);
 2126                 if (cur->onrdomain != prev->onrdomain ||
 2127                     cur->ifnot != prev->ifnot)
 2128                         PF_SET_SKIP_STEPS(PF_SKIP_RDOM);
 2129                 if (cur->af != prev->af)
 2130                         PF_SET_SKIP_STEPS(PF_SKIP_AF);
 2131                 if (cur->proto != prev->proto)
 2132                         PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
 2133                 if (cur->src.neg != prev->src.neg ||
 2134                     pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
 2135                         PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
 2136                 if (cur->dst.neg != prev->dst.neg ||
 2137                     pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
 2138                         PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
 2139                 if (cur->src.port[0] != prev->src.port[0] ||
 2140                     cur->src.port[1] != prev->src.port[1] ||
 2141                     cur->src.port_op != prev->src.port_op)
 2142                         PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
 2143                 if (cur->dst.port[0] != prev->dst.port[0] ||
 2144                     cur->dst.port[1] != prev->dst.port[1] ||
 2145                     cur->dst.port_op != prev->dst.port_op)
 2146                         PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
 2147 
 2148                 prev = cur;
 2149                 cur = TAILQ_NEXT(cur, entries);
 2150         }
 2151         for (i = 0; i < PF_SKIP_COUNT; ++i)
 2152                 PF_SET_SKIP_STEPS(i);
 2153 }
 2154 
 2155 int
 2156 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
 2157 {
 2158         if (aw1->type != aw2->type)
 2159                 return (1);
 2160         switch (aw1->type) {
 2161         case PF_ADDR_ADDRMASK:
 2162         case PF_ADDR_RANGE:
 2163                 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
 2164                         return (1);
 2165                 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
 2166                         return (1);
 2167                 return (0);
 2168         case PF_ADDR_DYNIFTL:
 2169                 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
 2170         case PF_ADDR_NONE:
 2171         case PF_ADDR_NOROUTE:
 2172         case PF_ADDR_URPFFAILED:
 2173                 return (0);
 2174         case PF_ADDR_TABLE:
 2175                 return (aw1->p.tbl != aw2->p.tbl);
 2176         case PF_ADDR_RTLABEL:
 2177                 return (aw1->v.rtlabel != aw2->v.rtlabel);
 2178         default:
 2179                 addlog("invalid address type: %d\n", aw1->type);
 2180                 return (1);
 2181         }
 2182 }
 2183 
 2184 /* This algorithm computes 'a + b - c' in ones-complement using a trick to
 2185  * emulate at most one ones-complement subtraction. This thereby limits net
 2186  * carries/borrows to at most one, eliminating a reduction step and saving one
 2187  * each of +, >>, & and ~.
 2188  *
 2189  * def. x mod y = x - (x//y)*y for integer x,y
 2190  * def. sum = x mod 2^16
 2191  * def. accumulator = (x >> 16) mod 2^16
 2192  *
 2193  * The trick works as follows: subtracting exactly one u_int16_t from the
 2194  * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the
 2195  * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the
 2196  * ones-complement borrow:
 2197  *
 2198  *  (sum + accumulator) mod 2^16
 2199  * =    { assume underflow: accumulator := 2^16 - 1 }
 2200  *  (sum + 2^16 - 1) mod 2^16
 2201  * =    { mod }
 2202  *  (sum - 1) mod 2^16
 2203  *
 2204  * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's
 2205  * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown
 2206  * to zero as that requires subtraction of at least 2^16, which exceeds a
 2207  * single u_int16_t's range.
 2208  *
 2209  * We use the following theorem to derive the implementation:
 2210  *
 2211  * th. (x + (y mod z)) mod z  =  (x + y) mod z   (0)
 2212  * proof.
 2213  *     (x + (y mod z)) mod z
 2214  *    =  { def mod }
 2215  *     (x + y - (y//z)*z) mod z
 2216  *    =  { (a + b*c) mod c = a mod c }
 2217  *     (x + y) mod z                    [end of proof]
 2218  *
 2219  * ... and thereby obtain:
 2220  *
 2221  *  (sum + accumulator) mod 2^16
 2222  * =    { def. accumulator, def. sum }
 2223  *  (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16
 2224  * =    { (0), twice }
 2225  *  (x + (x >> 16)) mod 2^16
 2226  * =    { x mod 2^n = x & (2^n - 1) }
 2227  *  (x + (x >> 16)) & 0xffff
 2228  *
 2229  * Note: this serves also as a reduction step for at most one add (as the
 2230  * trailing mod 2^16 prevents further reductions by destroying carries).
 2231  */
 2232 __inline void
 2233 pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now,
 2234     u_int8_t proto)
 2235 {
 2236         u_int32_t x;
 2237         const int udp = proto == IPPROTO_UDP;
 2238 
 2239         x = *cksum + was - now;
 2240         x = (x + (x >> 16)) & 0xffff;
 2241 
 2242         /* optimise: eliminate a branch when not udp */
 2243         if (udp && *cksum == 0x0000)
 2244                 return;
 2245         if (udp && x == 0x0000)
 2246                 x = 0xffff;
 2247 
 2248         *cksum = (u_int16_t)(x);
 2249 }
 2250 
 2251 #ifdef INET6
 2252 /* pre: coverage(cksum) is superset of coverage(covered_cksum) */
 2253 static __inline void
 2254 pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto)
 2255 {
 2256         pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto);
 2257 }
 2258 
 2259 /* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */
 2260 static __inline void
 2261 pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto)
 2262 {
 2263         pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto);
 2264 }
 2265 #endif /* INET6 */
 2266 
 2267 /* pre: *a is 16-bit aligned within its packet
 2268  *
 2269  * This algorithm emulates 16-bit ones-complement sums on a twos-complement
 2270  * machine by conserving ones-complement's otherwise discarded carries in the
 2271  * upper bits of x. These accumulated carries when added to the lower 16-bits
 2272  * over at least zero 'reduction' steps then complete the ones-complement sum.
 2273  *
 2274  * def. sum = x mod 2^16
 2275  * def. accumulator = (x >> 16)
 2276  *
 2277  * At most two reduction steps
 2278  *
 2279  *   x := sum + accumulator
 2280  * =    { def sum, def accumulator }
 2281  *   x := x mod 2^16 + (x >> 16)
 2282  * =    { x mod 2^n = x & (2^n - 1) }
 2283  *   x := (x & 0xffff) + (x >> 16)
 2284  *
 2285  * are necessary to incorporate the accumulated carries (at most one per add)
 2286  * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits.
 2287  *
 2288  * The function is also invariant over the endian of the host. Why?
 2289  *
 2290  * Define the unary transpose operator ~ on a bitstring in python slice
 2291  * notation as lambda m: m[P:] + m[:P] , for some constant pivot P.
 2292  *
 2293  * th. ~ distributes over ones-complement addition, denoted by +_1, i.e.
 2294  *
 2295  *     ~m +_1 ~n  =  ~(m +_1 n)    (for all bitstrings m,n of equal length)
 2296  *
 2297  * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two
 2298  * 'half-adds'. Under ones-complement addition, each half-add carries to the
 2299  * other, so the sum of each half-add is unaffected by their relative
 2300  * order. Therefore:
 2301  *
 2302  *     ~m +_1 ~n
 2303  *   =    { half-adds invariant under transposition }
 2304  *     ~s
 2305  *   =    { substitute }
 2306  *     ~(m +_1 n)                   [end of proof]
 2307  *
 2308  * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine
 2309  * with the converse endian does not alter the result.
 2310  *
 2311  * proof.
 2312  *        { converse machine endian: load/store transposes, P := 8 }
 2313  *     ~(~m +_1 ~n)
 2314  *   =    { ~ over +_1 }
 2315  *     ~~m +_1 ~~n
 2316  *   =    { ~ is an involution }
 2317  *      m +_1 n                     [end of proof]
 2318  *
 2319  */
 2320 #define NEG(x) ((u_int16_t)~(x))
 2321 void
 2322 pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a,
 2323     const struct pf_addr *an, sa_family_t af, u_int8_t proto)
 2324 {
 2325         u_int32_t        x;
 2326         const u_int16_t *n = an->addr16;
 2327         const u_int16_t *o = a->addr16;
 2328         const int        udp = proto == IPPROTO_UDP;
 2329 
 2330         switch (af) {
 2331         case AF_INET:
 2332                 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]);
 2333                 break;
 2334 #ifdef INET6
 2335         case AF_INET6:
 2336                 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\
 2337                              o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\
 2338                              o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\
 2339                              o[6] + NEG(n[6]) + o[7] + NEG(n[7]);
 2340                 break;
 2341 #endif /* INET6 */
 2342         default:
 2343                 unhandled_af(af);
 2344         }
 2345 
 2346         x = (x & 0xffff) + (x >> 16);
 2347         x = (x & 0xffff) + (x >> 16);
 2348 
 2349         /* optimise: eliminate a branch when not udp */
 2350         if (udp && *cksum == 0x0000)
 2351                 return;
 2352         if (udp && x == 0x0000)
 2353                 x = 0xffff;
 2354 
 2355         *cksum = (u_int16_t)(x);
 2356 }
 2357 
 2358 int
 2359 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi)
 2360 {
 2361         int     rewrite = 0;
 2362 
 2363         if (*f != v) {
 2364                 u_int16_t old = htons(hi ? (*f << 8) : *f);
 2365                 u_int16_t new = htons(hi ? ( v << 8) :  v);
 2366 
 2367                 pf_cksum_fixup(pd->pcksum, old, new, pd->proto);
 2368                 *f = v;
 2369                 rewrite = 1;
 2370         }
 2371 
 2372         return (rewrite);
 2373 }
 2374 
 2375 /* pre: *f is 16-bit aligned within its packet */
 2376 int
 2377 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v)
 2378 {
 2379         int     rewrite = 0;
 2380 
 2381         if (*f != v) {
 2382                 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto);
 2383                 *f = v;
 2384                 rewrite = 1;
 2385         }
 2386 
 2387         return (rewrite);
 2388 }
 2389 
 2390 int
 2391 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi)
 2392 {
 2393         int             rewrite = 0;
 2394         u_int8_t       *fb = (u_int8_t*)f;
 2395         u_int8_t       *vb = (u_int8_t*)&v;
 2396 
 2397         if (hi && ALIGNED_POINTER(f, u_int16_t)) {
 2398                 return (pf_patch_16(pd, f, v)); /* optimise */
 2399         }
 2400 
 2401         rewrite += pf_patch_8(pd, fb++, *vb++, hi);
 2402         rewrite += pf_patch_8(pd, fb++, *vb++,!hi);
 2403 
 2404         return (rewrite);
 2405 }
 2406 
 2407 /* pre: *f is 16-bit aligned within its packet */
 2408 /* pre: pd->proto != IPPROTO_UDP */
 2409 int
 2410 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v)
 2411 {
 2412         int             rewrite = 0;
 2413         u_int16_t      *pc = pd->pcksum;
 2414         u_int8_t        proto = pd->proto;
 2415 
 2416         /* optimise: inline udp fixup code is unused; let compiler scrub it */
 2417         if (proto == IPPROTO_UDP)
 2418                 panic("%s: udp", __func__);
 2419 
 2420         /* optimise: skip *f != v guard; true for all use-cases */
 2421         pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto);
 2422         pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto);
 2423 
 2424         *f = v;
 2425         rewrite = 1;
 2426 
 2427         return (rewrite);
 2428 }
 2429 
 2430 int
 2431 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi)
 2432 {
 2433         int             rewrite = 0;
 2434         u_int8_t       *fb = (u_int8_t*)f;
 2435         u_int8_t       *vb = (u_int8_t*)&v;
 2436 
 2437         if (hi && ALIGNED_POINTER(f, u_int32_t)) {
 2438                 return (pf_patch_32(pd, f, v)); /* optimise */
 2439         }
 2440 
 2441         rewrite += pf_patch_8(pd, fb++, *vb++, hi);
 2442         rewrite += pf_patch_8(pd, fb++, *vb++,!hi);
 2443         rewrite += pf_patch_8(pd, fb++, *vb++, hi);
 2444         rewrite += pf_patch_8(pd, fb++, *vb++,!hi);
 2445 
 2446         return (rewrite);
 2447 }
 2448 
 2449 int
 2450 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir,
 2451     u_int16_t *virtual_id, u_int16_t *virtual_type)
 2452 {
 2453         /*
 2454          * ICMP types marked with PF_OUT are typically responses to
 2455          * PF_IN, and will match states in the opposite direction.
 2456          * PF_IN ICMP types need to match a state with that type.
 2457          */
 2458         *icmp_dir = PF_OUT;
 2459 
 2460         /* Queries (and responses) */
 2461         switch (pd->af) {
 2462         case AF_INET:
 2463                 switch (type) {
 2464                 case ICMP_ECHO:
 2465                         *icmp_dir = PF_IN;
 2466                         /* FALLTHROUGH */
 2467                 case ICMP_ECHOREPLY:
 2468                         *virtual_type = ICMP_ECHO;
 2469                         *virtual_id = pd->hdr.icmp.icmp_id;
 2470                         break;
 2471 
 2472                 case ICMP_TSTAMP:
 2473                         *icmp_dir = PF_IN;
 2474                         /* FALLTHROUGH */
 2475                 case ICMP_TSTAMPREPLY:
 2476                         *virtual_type = ICMP_TSTAMP;
 2477                         *virtual_id = pd->hdr.icmp.icmp_id;
 2478                         break;
 2479 
 2480                 case ICMP_IREQ:
 2481                         *icmp_dir = PF_IN;
 2482                         /* FALLTHROUGH */
 2483                 case ICMP_IREQREPLY:
 2484                         *virtual_type = ICMP_IREQ;
 2485                         *virtual_id = pd->hdr.icmp.icmp_id;
 2486                         break;
 2487 
 2488                 case ICMP_MASKREQ:
 2489                         *icmp_dir = PF_IN;
 2490                         /* FALLTHROUGH */
 2491                 case ICMP_MASKREPLY:
 2492                         *virtual_type = ICMP_MASKREQ;
 2493                         *virtual_id = pd->hdr.icmp.icmp_id;
 2494                         break;
 2495 
 2496                 case ICMP_IPV6_WHEREAREYOU:
 2497                         *icmp_dir = PF_IN;
 2498                         /* FALLTHROUGH */
 2499                 case ICMP_IPV6_IAMHERE:
 2500                         *virtual_type = ICMP_IPV6_WHEREAREYOU;
 2501                         *virtual_id = 0; /* Nothing sane to match on! */
 2502                         break;
 2503 
 2504                 case ICMP_MOBILE_REGREQUEST:
 2505                         *icmp_dir = PF_IN;
 2506                         /* FALLTHROUGH */
 2507                 case ICMP_MOBILE_REGREPLY:
 2508                         *virtual_type = ICMP_MOBILE_REGREQUEST;
 2509                         *virtual_id = 0; /* Nothing sane to match on! */
 2510                         break;
 2511 
 2512                 case ICMP_ROUTERSOLICIT:
 2513                         *icmp_dir = PF_IN;
 2514                         /* FALLTHROUGH */
 2515                 case ICMP_ROUTERADVERT:
 2516                         *virtual_type = ICMP_ROUTERSOLICIT;
 2517                         *virtual_id = 0; /* Nothing sane to match on! */
 2518                         break;
 2519 
 2520                 /* These ICMP types map to other connections */
 2521                 case ICMP_UNREACH:
 2522                 case ICMP_SOURCEQUENCH:
 2523                 case ICMP_REDIRECT:
 2524                 case ICMP_TIMXCEED:
 2525                 case ICMP_PARAMPROB:
 2526                         /* These will not be used, but set them anyway */
 2527                         *icmp_dir = PF_IN;
 2528                         *virtual_type = htons(type);
 2529                         *virtual_id = 0;
 2530                         return (1);  /* These types match to another state */
 2531 
 2532                 /*
 2533                  * All remaining ICMP types get their own states,
 2534                  * and will only match in one direction.
 2535                  */
 2536                 default:
 2537                         *icmp_dir = PF_IN;
 2538                         *virtual_type = type;
 2539                         *virtual_id = 0;
 2540                         break;
 2541                 }
 2542                 break;
 2543 #ifdef INET6
 2544         case AF_INET6:
 2545                 switch (type) {
 2546                 case ICMP6_ECHO_REQUEST:
 2547                         *icmp_dir = PF_IN;
 2548                         /* FALLTHROUGH */
 2549                 case ICMP6_ECHO_REPLY:
 2550                         *virtual_type = ICMP6_ECHO_REQUEST;
 2551                         *virtual_id = pd->hdr.icmp6.icmp6_id;
 2552                         break;
 2553 
 2554                 case MLD_LISTENER_QUERY:
 2555                 case MLD_LISTENER_REPORT: {
 2556                         struct mld_hdr *mld = &pd->hdr.mld;
 2557                         u_int32_t h;
 2558 
 2559                         /*
 2560                          * Listener Report can be sent by clients
 2561                          * without an associated Listener Query.
 2562                          * In addition to that, when Report is sent as a
 2563                          * reply to a Query its source and destination
 2564                          * address are different.
 2565                          */
 2566                         *icmp_dir = PF_IN;
 2567                         *virtual_type = MLD_LISTENER_QUERY;
 2568                         /* generate fake id for these messages */
 2569                         h = mld->mld_addr.s6_addr32[0] ^
 2570                             mld->mld_addr.s6_addr32[1] ^
 2571                             mld->mld_addr.s6_addr32[2] ^
 2572                             mld->mld_addr.s6_addr32[3];
 2573                         *virtual_id = (h >> 16) ^ (h & 0xffff);
 2574                         break;
 2575                 }
 2576 
 2577                 /*
 2578                  * ICMP6_FQDN and ICMP6_NI query/reply are the same type as
 2579                  * ICMP6_WRU
 2580                  */
 2581                 case ICMP6_WRUREQUEST:
 2582                         *icmp_dir = PF_IN;
 2583                         /* FALLTHROUGH */
 2584                 case ICMP6_WRUREPLY:
 2585                         *virtual_type = ICMP6_WRUREQUEST;
 2586                         *virtual_id = 0; /* Nothing sane to match on! */
 2587                         break;
 2588 
 2589                 case MLD_MTRACE:
 2590                         *icmp_dir = PF_IN;
 2591                         /* FALLTHROUGH */
 2592                 case MLD_MTRACE_RESP:
 2593                         *virtual_type = MLD_MTRACE;
 2594                         *virtual_id = 0; /* Nothing sane to match on! */
 2595                         break;
 2596 
 2597                 case ND_NEIGHBOR_SOLICIT:
 2598                         *icmp_dir = PF_IN;
 2599                         /* FALLTHROUGH */
 2600                 case ND_NEIGHBOR_ADVERT: {
 2601                         struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns;
 2602                         u_int32_t h;
 2603 
 2604                         *virtual_type = ND_NEIGHBOR_SOLICIT;
 2605                         /* generate fake id for these messages */
 2606                         h = nd->nd_ns_target.s6_addr32[0] ^
 2607                             nd->nd_ns_target.s6_addr32[1] ^
 2608                             nd->nd_ns_target.s6_addr32[2] ^
 2609                             nd->nd_ns_target.s6_addr32[3];
 2610                         *virtual_id = (h >> 16) ^ (h & 0xffff);
 2611                         break;
 2612                 }
 2613 
 2614                 /*
 2615                  * These ICMP types map to other connections.
 2616                  * ND_REDIRECT can't be in this list because the triggering
 2617                  * packet header is optional.
 2618                  */
 2619                 case ICMP6_DST_UNREACH:
 2620                 case ICMP6_PACKET_TOO_BIG:
 2621                 case ICMP6_TIME_EXCEEDED:
 2622                 case ICMP6_PARAM_PROB:
 2623                         /* These will not be used, but set them anyway */
 2624                         *icmp_dir = PF_IN;
 2625                         *virtual_type = htons(type);
 2626                         *virtual_id = 0;
 2627                         return (1);  /* These types match to another state */
 2628                 /*
 2629                  * All remaining ICMP6 types get their own states,
 2630                  * and will only match in one direction.
 2631                  */
 2632                 default:
 2633                         *icmp_dir = PF_IN;
 2634                         *virtual_type = type;
 2635                         *virtual_id = 0;
 2636                         break;
 2637                 }
 2638                 break;
 2639 #endif /* INET6 */
 2640         }
 2641         *virtual_type = htons(*virtual_type);
 2642         return (0);  /* These types match to their own state */
 2643 }
 2644 
 2645 void
 2646 pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp,
 2647     struct pf_addr *oa, struct pf_addr *na, u_int16_t np)
 2648 {
 2649         /* note: doesn't trouble to fixup quoted checksums, if any */
 2650 
 2651         /* change quoted protocol port */
 2652         if (qp != NULL)
 2653                 pf_patch_16(pd, qp, np);
 2654 
 2655         /* change quoted ip address */
 2656         pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto);
 2657         pf_addrcpy(qa, na, pd->af);
 2658 
 2659         /* change network-header's ip address */
 2660         if (oa)
 2661                 pf_translate_a(pd, oa, na);
 2662 }
 2663 
 2664 /* pre: *a is 16-bit aligned within its packet */
 2665 /*      *a is a network header src/dst address */
 2666 int
 2667 pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an)
 2668 {
 2669         int     rewrite = 0;
 2670 
 2671         /* warning: !PF_ANEQ != PF_AEQ */
 2672         if (!PF_ANEQ(a, an, pd->af))
 2673                 return (0);
 2674 
 2675         /* fixup transport pseudo-header, if any */
 2676         switch (pd->proto) {
 2677         case IPPROTO_TCP:       /* FALLTHROUGH */
 2678         case IPPROTO_UDP:       /* FALLTHROUGH */
 2679         case IPPROTO_ICMPV6:
 2680                 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto);
 2681                 break;
 2682         default:
 2683                 break;  /* assume no pseudo-header */
 2684         }
 2685 
 2686         pf_addrcpy(a, an, pd->af);
 2687         rewrite = 1;
 2688 
 2689         return (rewrite);
 2690 }
 2691 
 2692 #ifdef INET6
 2693 /* pf_translate_af() may change pd->m, adjust local copies after calling */
 2694 int
 2695 pf_translate_af(struct pf_pdesc *pd)
 2696 {
 2697         static const struct pf_addr     zero;
 2698         struct ip                      *ip4;
 2699         struct ip6_hdr                 *ip6;
 2700         int                             copyback = 0;
 2701         u_int                           hlen, ohlen, dlen;
 2702         u_int16_t                      *pc;
 2703         u_int8_t                        af_proto, naf_proto;
 2704 
 2705         hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6);
 2706         ohlen = pd->off;
 2707         dlen = pd->tot_len - pd->off;
 2708         pc = pd->pcksum;
 2709 
 2710         af_proto = naf_proto = pd->proto;
 2711         if (naf_proto == IPPROTO_ICMP)
 2712                 af_proto = IPPROTO_ICMPV6;
 2713         if (naf_proto == IPPROTO_ICMPV6)
 2714                 af_proto = IPPROTO_ICMP;
 2715 
 2716         /* uncover stale pseudo-header */
 2717         switch (af_proto) {
 2718         case IPPROTO_ICMPV6:
 2719                 /* optimise: unchanged for TCP/UDP */
 2720                 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto);
 2721                 pf_cksum_fixup(pc, htons(dlen),     0x0, af_proto);
 2722                                 /* FALLTHROUGH */
 2723         case IPPROTO_UDP:       /* FALLTHROUGH */
 2724         case IPPROTO_TCP:
 2725                 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto);
 2726                 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto);
 2727                 copyback = 1;
 2728                 break;
 2729         default:
 2730                 break;  /* assume no pseudo-header */
 2731         }
 2732 
 2733         /* replace the network header */
 2734         m_adj(pd->m, pd->off);
 2735         pd->src = NULL;
 2736         pd->dst = NULL;
 2737 
 2738         if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) {
 2739                 pd->m = NULL;
 2740                 return (-1);
 2741         }
 2742 
 2743         pd->off = hlen;
 2744         pd->tot_len += hlen - ohlen;
 2745 
 2746         switch (pd->naf) {
 2747         case AF_INET:
 2748                 ip4 = mtod(pd->m, struct ip *);
 2749                 memset(ip4, 0, hlen);
 2750                 ip4->ip_v   = IPVERSION;
 2751                 ip4->ip_hl  = hlen >> 2;
 2752                 ip4->ip_tos = pd->tos;
 2753                 ip4->ip_len = htons(hlen + dlen);
 2754                 ip4->ip_id  = htons(ip_randomid());
 2755                 ip4->ip_off = htons(IP_DF);
 2756                 ip4->ip_ttl = pd->ttl;
 2757                 ip4->ip_p   = pd->proto;
 2758                 ip4->ip_src = pd->nsaddr.v4;
 2759                 ip4->ip_dst = pd->ndaddr.v4;
 2760                 break;
 2761         case AF_INET6:
 2762                 ip6 = mtod(pd->m, struct ip6_hdr *);
 2763                 memset(ip6, 0, hlen);
 2764                 ip6->ip6_vfc  = IPV6_VERSION;
 2765                 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20);
 2766                 ip6->ip6_plen = htons(dlen);
 2767                 ip6->ip6_nxt  = pd->proto;
 2768                 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM)
 2769                         ip6->ip6_hlim = IPV6_DEFHLIM;
 2770                 else
 2771                         ip6->ip6_hlim = pd->ttl;
 2772                 ip6->ip6_src  = pd->nsaddr.v6;
 2773                 ip6->ip6_dst  = pd->ndaddr.v6;
 2774                 break;
 2775         default:
 2776                 unhandled_af(pd->naf);
 2777         }
 2778 
 2779         /* UDP over IPv6 must be checksummed per rfc2460 p27 */
 2780         if (naf_proto == IPPROTO_UDP && *pc == 0x0000 &&
 2781             pd->naf == AF_INET6) {
 2782                 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
 2783         }
 2784 
 2785         /* cover fresh pseudo-header */
 2786         switch (naf_proto) {
 2787         case IPPROTO_ICMPV6:
 2788                 /* optimise: unchanged for TCP/UDP */
 2789                 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto);
 2790                 pf_cksum_fixup(pc, 0x0, htons(dlen),      naf_proto);
 2791                                 /* FALLTHROUGH */
 2792         case IPPROTO_UDP:       /* FALLTHROUGH */
 2793         case IPPROTO_TCP:
 2794                 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto);
 2795                 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto);
 2796                 copyback = 1;
 2797                 break;
 2798         default:
 2799                 break;  /* assume no pseudo-header */
 2800         }
 2801 
 2802         /* flush pd->pcksum */
 2803         if (copyback)
 2804                 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT);
 2805 
 2806         return (0);
 2807 }
 2808 
 2809 int
 2810 pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd,
 2811     struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst,
 2812     sa_family_t af, sa_family_t naf)
 2813 {
 2814         struct mbuf             *n = NULL;
 2815         struct ip               *ip4;
 2816         struct ip6_hdr          *ip6;
 2817         u_int                    hlen, ohlen, dlen;
 2818         int                      d;
 2819 
 2820         if (af == naf || (af != AF_INET && af != AF_INET6) ||
 2821             (naf != AF_INET && naf != AF_INET6))
 2822                 return (-1);
 2823 
 2824         /* split the mbuf chain on the quoted ip/ip6 header boundary */
 2825         if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL)
 2826                 return (-1);
 2827 
 2828         /* new quoted header */
 2829         hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
 2830         /* old quoted header */
 2831         ohlen = pd2->off - ipoff2;
 2832 
 2833         /* trim old quoted header */
 2834         pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto);
 2835         m_adj(n, ohlen);
 2836 
 2837         /* prepend a new, translated, quoted header */
 2838         if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL)
 2839                 return (-1);
 2840 
 2841         switch (naf) {
 2842         case AF_INET:
 2843                 ip4 = mtod(n, struct ip *);
 2844                 memset(ip4, 0, sizeof(*ip4));
 2845                 ip4->ip_v   = IPVERSION;
 2846                 ip4->ip_hl  = sizeof(*ip4) >> 2;
 2847                 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen);
 2848                 ip4->ip_id  = htons(ip_randomid());
 2849                 ip4->ip_off = htons(IP_DF);
 2850                 ip4->ip_ttl = pd2->ttl;
 2851                 if (pd2->proto == IPPROTO_ICMPV6)
 2852                         ip4->ip_p = IPPROTO_ICMP;
 2853                 else
 2854                         ip4->ip_p = pd2->proto;
 2855                 ip4->ip_src = src->v4;
 2856                 ip4->ip_dst = dst->v4;
 2857                 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2);
 2858                 break;
 2859         case AF_INET6:
 2860                 ip6 = mtod(n, struct ip6_hdr *);
 2861                 memset(ip6, 0, sizeof(*ip6));
 2862                 ip6->ip6_vfc  = IPV6_VERSION;
 2863                 ip6->ip6_plen = htons(pd2->tot_len - ohlen);
 2864                 if (pd2->proto == IPPROTO_ICMP)
 2865                         ip6->ip6_nxt = IPPROTO_ICMPV6;
 2866                 else
 2867                         ip6->ip6_nxt = pd2->proto;
 2868                 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM)
 2869                         ip6->ip6_hlim = IPV6_DEFHLIM;
 2870                 else
 2871                         ip6->ip6_hlim = pd2->ttl;
 2872                 ip6->ip6_src  = src->v6;
 2873                 ip6->ip6_dst  = dst->v6;
 2874                 break;
 2875         }
 2876 
 2877         /* cover new quoted header */
 2878         /* optimise: any new AF_INET header of ours sums to zero */
 2879         if (naf != AF_INET) {
 2880                 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto);
 2881         }
 2882 
 2883         /* reattach modified quoted packet to outer header */
 2884         {
 2885                 int nlen = n->m_pkthdr.len;
 2886                 m_cat(m, n);
 2887                 m->m_pkthdr.len += nlen;
 2888         }
 2889 
 2890         /* account for altered length */
 2891         d = hlen - ohlen;
 2892 
 2893         if (pd->proto == IPPROTO_ICMPV6) {
 2894                 /* fixup pseudo-header */
 2895                 dlen = pd->tot_len - pd->off;
 2896                 pf_cksum_fixup(pd->pcksum,
 2897                     htons(dlen), htons(dlen + d), pd->proto);
 2898         }
 2899 
 2900         pd->tot_len  += d;
 2901         pd2->tot_len += d;
 2902         pd2->off     += d;
 2903 
 2904         /* note: not bothering to update network headers as
 2905            these due for rewrite by pf_translate_af() */
 2906 
 2907         return (0);
 2908 }
 2909 
 2910 
 2911 #define PTR_IP(field)   (offsetof(struct ip, field))
 2912 #define PTR_IP6(field)  (offsetof(struct ip6_hdr, field))
 2913 
 2914 int
 2915 pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg)
 2916 {
 2917         struct icmp             *icmp4;
 2918         struct icmp6_hdr        *icmp6;
 2919         u_int32_t                mtu;
 2920         int32_t                  ptr = -1;
 2921         u_int8_t                 type;
 2922         u_int8_t                 code;
 2923 
 2924         switch (af) {
 2925         case AF_INET:
 2926                 icmp6 = arg;
 2927                 type  = icmp6->icmp6_type;
 2928                 code  = icmp6->icmp6_code;
 2929                 mtu   = ntohl(icmp6->icmp6_mtu);
 2930 
 2931                 switch (type) {
 2932                 case ICMP6_ECHO_REQUEST:
 2933                         type = ICMP_ECHO;
 2934                         break;
 2935                 case ICMP6_ECHO_REPLY:
 2936                         type = ICMP_ECHOREPLY;
 2937                         break;
 2938                 case ICMP6_DST_UNREACH:
 2939                         type = ICMP_UNREACH;
 2940                         switch (code) {
 2941                         case ICMP6_DST_UNREACH_NOROUTE:
 2942                         case ICMP6_DST_UNREACH_BEYONDSCOPE:
 2943                         case ICMP6_DST_UNREACH_ADDR:
 2944                                 code = ICMP_UNREACH_HOST;
 2945                                 break;
 2946                         case ICMP6_DST_UNREACH_ADMIN:
 2947                                 code = ICMP_UNREACH_HOST_PROHIB;
 2948                                 break;
 2949                         case ICMP6_DST_UNREACH_NOPORT:
 2950                                 code = ICMP_UNREACH_PORT;
 2951                                 break;
 2952                         default:
 2953                                 return (-1);
 2954                         }
 2955                         break;
 2956                 case ICMP6_PACKET_TOO_BIG:
 2957                         type = ICMP_UNREACH;
 2958                         code = ICMP_UNREACH_NEEDFRAG;
 2959                         mtu -= 20;
 2960                         break;
 2961                 case ICMP6_TIME_EXCEEDED:
 2962                         type = ICMP_TIMXCEED;
 2963                         break;
 2964                 case ICMP6_PARAM_PROB:
 2965                         switch (code) {
 2966                         case ICMP6_PARAMPROB_HEADER:
 2967                                 type = ICMP_PARAMPROB;
 2968                                 code = ICMP_PARAMPROB_ERRATPTR;
 2969                                 ptr  = ntohl(icmp6->icmp6_pptr);
 2970 
 2971                                 if (ptr == PTR_IP6(ip6_vfc))
 2972                                         ; /* preserve */
 2973                                 else if (ptr == PTR_IP6(ip6_vfc) + 1)
 2974                                         ptr = PTR_IP(ip_tos);
 2975                                 else if (ptr == PTR_IP6(ip6_plen) ||
 2976                                     ptr == PTR_IP6(ip6_plen) + 1)
 2977                                         ptr = PTR_IP(ip_len);
 2978                                 else if (ptr == PTR_IP6(ip6_nxt))
 2979                                         ptr = PTR_IP(ip_p);
 2980                                 else if (ptr == PTR_IP6(ip6_hlim))
 2981                                         ptr = PTR_IP(ip_ttl);
 2982                                 else if (ptr >= PTR_IP6(ip6_src) &&
 2983                                     ptr < PTR_IP6(ip6_dst))
 2984                                         ptr = PTR_IP(ip_src);
 2985                                 else if (ptr >= PTR_IP6(ip6_dst) &&
 2986                                     ptr < sizeof(struct ip6_hdr))
 2987                                         ptr = PTR_IP(ip_dst);
 2988                                 else {
 2989                                         return (-1);
 2990                                 }
 2991                                 break;
 2992                         case ICMP6_PARAMPROB_NEXTHEADER:
 2993                                 type = ICMP_UNREACH;
 2994                                 code = ICMP_UNREACH_PROTOCOL;
 2995                                 break;
 2996                         default:
 2997                                 return (-1);
 2998                         }
 2999                         break;
 3000                 default:
 3001                         return (-1);
 3002                 }
 3003 
 3004                 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI);
 3005                 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO);
 3006 
 3007                 /* aligns well with a icmpv4 nextmtu */
 3008                 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu));
 3009 
 3010                 /* icmpv4 pptr is a one most significant byte */
 3011                 if (ptr >= 0)
 3012                         pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24));
 3013                 break;
 3014         case AF_INET6:
 3015                 icmp4 = arg;
 3016                 type  = icmp4->icmp_type;
 3017                 code  = icmp4->icmp_code;
 3018                 mtu   = ntohs(icmp4->icmp_nextmtu);
 3019 
 3020                 switch (type) {
 3021                 case ICMP_ECHO:
 3022                         type = ICMP6_ECHO_REQUEST;
 3023                         break;
 3024                 case ICMP_ECHOREPLY:
 3025                         type = ICMP6_ECHO_REPLY;
 3026                         break;
 3027                 case ICMP_UNREACH:
 3028                         type = ICMP6_DST_UNREACH;
 3029                         switch (code) {
 3030                         case ICMP_UNREACH_NET:
 3031                         case ICMP_UNREACH_HOST:
 3032                         case ICMP_UNREACH_NET_UNKNOWN:
 3033                         case ICMP_UNREACH_HOST_UNKNOWN:
 3034                         case ICMP_UNREACH_ISOLATED:
 3035                         case ICMP_UNREACH_TOSNET:
 3036                         case ICMP_UNREACH_TOSHOST:
 3037                                 code = ICMP6_DST_UNREACH_NOROUTE;
 3038                                 break;
 3039                         case ICMP_UNREACH_PORT:
 3040                                 code = ICMP6_DST_UNREACH_NOPORT;
 3041                                 break;
 3042                         case ICMP_UNREACH_NET_PROHIB:
 3043                         case ICMP_UNREACH_HOST_PROHIB:
 3044                         case ICMP_UNREACH_FILTER_PROHIB:
 3045                         case ICMP_UNREACH_PRECEDENCE_CUTOFF:
 3046                                 code = ICMP6_DST_UNREACH_ADMIN;
 3047                                 break;
 3048                         case ICMP_UNREACH_PROTOCOL:
 3049                                 type = ICMP6_PARAM_PROB;
 3050                                 code = ICMP6_PARAMPROB_NEXTHEADER;
 3051                                 ptr  = offsetof(struct ip6_hdr, ip6_nxt);
 3052                                 break;
 3053                         case ICMP_UNREACH_NEEDFRAG:
 3054                                 type = ICMP6_PACKET_TOO_BIG;
 3055                                 code = 0;
 3056                                 mtu += 20;
 3057                                 break;
 3058                         default:
 3059                                 return (-1);
 3060                         }
 3061                         break;
 3062                 case ICMP_TIMXCEED:
 3063                         type = ICMP6_TIME_EXCEEDED;
 3064                         break;
 3065                 case ICMP_PARAMPROB:
 3066                         type = ICMP6_PARAM_PROB;
 3067                         switch (code) {
 3068                         case ICMP_PARAMPROB_ERRATPTR:
 3069                                 code = ICMP6_PARAMPROB_HEADER;
 3070                                 break;
 3071                         case ICMP_PARAMPROB_LENGTH:
 3072                                 code = ICMP6_PARAMPROB_HEADER;
 3073                                 break;
 3074                         default:
 3075                                 return (-1);
 3076                         }
 3077 
 3078                         ptr = icmp4->icmp_pptr;
 3079                         if (ptr == 0 || ptr == PTR_IP(ip_tos))
 3080                                 ; /* preserve */
 3081                         else if (ptr == PTR_IP(ip_len) ||
 3082                             ptr == PTR_IP(ip_len) + 1)
 3083                                 ptr = PTR_IP6(ip6_plen);
 3084                         else if (ptr == PTR_IP(ip_ttl))
 3085                                 ptr = PTR_IP6(ip6_hlim);
 3086                         else if (ptr == PTR_IP(ip_p))
 3087                                 ptr = PTR_IP6(ip6_nxt);
 3088                         else if (ptr >= PTR_IP(ip_src) &&
 3089                             ptr < PTR_IP(ip_dst))
 3090                                 ptr = PTR_IP6(ip6_src);
 3091                         else if (ptr >= PTR_IP(ip_dst) &&
 3092                             ptr < sizeof(struct ip))
 3093                                 ptr = PTR_IP6(ip6_dst);
 3094                         else {
 3095                                 return (-1);
 3096                         }
 3097                         break;
 3098                 default:
 3099                         return (-1);
 3100                 }
 3101 
 3102                 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI);
 3103                 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO);
 3104                 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu));
 3105                 if (ptr >= 0)
 3106                         pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr));
 3107                 break;
 3108         }
 3109 
 3110         return (0);
 3111 }
 3112 #endif /* INET6 */
 3113 
 3114 /*
 3115  * Need to modulate the sequence numbers in the TCP SACK option
 3116  * (credits to Krzysztof Pfaff for report and patch)
 3117  */
 3118 int
 3119 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst)
 3120 {
 3121         struct sackblk   sack;
 3122         int              copyback = 0, i;
 3123         int              olen, optsoff;
 3124         u_int8_t         opts[MAX_TCPOPTLEN], *opt, *eoh;
 3125 
 3126         olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
 3127         optsoff = pd->off + sizeof(struct tcphdr);
 3128 #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2)
 3129         if (olen < TCPOLEN_MINSACK ||
 3130             !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af))
 3131                 return (0);
 3132 
 3133         eoh = opts + olen;
 3134         opt = opts;
 3135         while ((opt = pf_find_tcpopt(opt, opts, olen,
 3136                     TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL)
 3137         {
 3138                 size_t safelen = MIN(opt[1], (eoh - opt));
 3139                 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) {
 3140                         size_t startoff = (opt + i) - opts;
 3141                         memcpy(&sack, &opt[i], sizeof(sack));
 3142                         pf_patch_32_unaligned(pd, &sack.start,
 3143                             htonl(ntohl(sack.start) - dst->seqdiff),
 3144                             PF_ALGNMNT(startoff));
 3145                         pf_patch_32_unaligned(pd, &sack.end,
 3146                             htonl(ntohl(sack.end) - dst->seqdiff),
 3147                             PF_ALGNMNT(startoff + sizeof(sack.start)));
 3148                         memcpy(&opt[i], &sack, sizeof(sack));
 3149                 }
 3150                 copyback = 1;
 3151                 opt += opt[1];
 3152         }
 3153 
 3154         if (copyback)
 3155                 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT);
 3156         return (copyback);
 3157 }
 3158 
 3159 struct mbuf *
 3160 pf_build_tcp(const struct pf_rule *r, sa_family_t af,
 3161     const struct pf_addr *saddr, const struct pf_addr *daddr,
 3162     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
 3163     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
 3164     u_int16_t rtag, u_int sack, u_int rdom)
 3165 {
 3166         struct mbuf     *m;
 3167         int              len, tlen;
 3168         struct ip       *h;
 3169 #ifdef INET6
 3170         struct ip6_hdr  *h6;
 3171 #endif /* INET6 */
 3172         struct tcphdr   *th;
 3173         char            *opt;
 3174 
 3175         /* maximum segment size tcp option */
 3176         tlen = sizeof(struct tcphdr);
 3177         if (mss)
 3178                 tlen += 4;
 3179         if (sack)
 3180                 tlen += 2;
 3181 
 3182         switch (af) {
 3183         case AF_INET:
 3184                 len = sizeof(struct ip) + tlen;
 3185                 break;
 3186 #ifdef INET6
 3187         case AF_INET6:
 3188                 len = sizeof(struct ip6_hdr) + tlen;
 3189                 break;
 3190 #endif /* INET6 */
 3191         default:
 3192                 unhandled_af(af);
 3193         }
 3194 
 3195         /* create outgoing mbuf */
 3196         m = m_gethdr(M_DONTWAIT, MT_HEADER);
 3197         if (m == NULL)
 3198                 return (NULL);
 3199         if (tag)
 3200                 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
 3201         m->m_pkthdr.pf.tag = rtag;
 3202         m->m_pkthdr.ph_rtableid = rdom;
 3203         if (r && (r->scrub_flags & PFSTATE_SETPRIO))
 3204                 m->m_pkthdr.pf.prio = r->set_prio[0];
 3205         if (r && r->qid)
 3206                 m->m_pkthdr.pf.qid = r->qid;
 3207         m->m_data += max_linkhdr;
 3208         m->m_pkthdr.len = m->m_len = len;
 3209         m->m_pkthdr.ph_ifidx = 0;
 3210         m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
 3211         memset(m->m_data, 0, len);
 3212         switch (af) {
 3213         case AF_INET:
 3214                 h = mtod(m, struct ip *);
 3215                 h->ip_p = IPPROTO_TCP;
 3216                 h->ip_len = htons(tlen);
 3217                 h->ip_v = 4;
 3218                 h->ip_hl = sizeof(*h) >> 2;
 3219                 h->ip_tos = IPTOS_LOWDELAY;
 3220                 h->ip_len = htons(len);
 3221                 h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
 3222                 h->ip_ttl = ttl ? ttl : ip_defttl;
 3223                 h->ip_sum = 0;
 3224                 h->ip_src.s_addr = saddr->v4.s_addr;
 3225                 h->ip_dst.s_addr = daddr->v4.s_addr;
 3226 
 3227                 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
 3228                 break;
 3229 #ifdef INET6
 3230         case AF_INET6:
 3231                 h6 = mtod(m, struct ip6_hdr *);
 3232                 h6->ip6_nxt = IPPROTO_TCP;
 3233                 h6->ip6_plen = htons(tlen);
 3234                 h6->ip6_vfc |= IPV6_VERSION;
 3235                 h6->ip6_hlim = IPV6_DEFHLIM;
 3236                 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
 3237                 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
 3238 
 3239                 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
 3240                 break;
 3241 #endif /* INET6 */
 3242         default:
 3243                 unhandled_af(af);
 3244         }
 3245 
 3246         /* TCP header */
 3247         th->th_sport = sport;
 3248         th->th_dport = dport;
 3249         th->th_seq = htonl(seq);
 3250         th->th_ack = htonl(ack);
 3251         th->th_off = tlen >> 2;
 3252         th->th_flags = flags;
 3253         th->th_win = htons(win);
 3254 
 3255         opt = (char *)(th + 1);
 3256         if (mss) {
 3257                 opt[0] = TCPOPT_MAXSEG;
 3258                 opt[1] = 4;
 3259                 mss = htons(mss);
 3260                 memcpy((opt + 2), &mss, 2);
 3261                 opt += 4;
 3262         }
 3263         if (sack) {
 3264                 opt[0] = TCPOPT_SACK_PERMITTED;
 3265                 opt[1] = 2;
 3266                 opt += 2;
 3267         }
 3268 
 3269         return (m);
 3270 }
 3271 
 3272 void
 3273 pf_send_tcp(const struct pf_rule *r, sa_family_t af,
 3274     const struct pf_addr *saddr, const struct pf_addr *daddr,
 3275     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
 3276     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
 3277     u_int16_t rtag, u_int rdom)
 3278 {
 3279         struct mbuf     *m;
 3280 
 3281         if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack,
 3282             flags, win, mss, ttl, tag, rtag, 0, rdom)) == NULL)
 3283                 return;
 3284 
 3285         switch (af) {
 3286         case AF_INET:
 3287                 ip_send(m);
 3288                 break;
 3289 #ifdef INET6
 3290         case AF_INET6:
 3291                 ip6_send(m);
 3292                 break;
 3293 #endif /* INET6 */
 3294         }
 3295 }
 3296 
 3297 static void
 3298 pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *st,
 3299     struct pf_state_peer *src, struct pf_state_peer *dst)
 3300 {
 3301         /*
 3302          * We are sending challenge ACK as a response to SYN packet, which
 3303          * matches existing state (modulo TCP window check). Therefore packet
 3304          * must be sent on behalf of destination.
 3305          *
 3306          * We expect sender to remain either silent, or send RST packet
 3307          * so both, firewall and remote peer, can purge dead state from
 3308          * memory.
 3309          */
 3310         pf_send_tcp(st->rule.ptr, pd->af, pd->dst, pd->src,
 3311             pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo,
 3312             src->seqlo, TH_ACK, 0, 0, st->rule.ptr->return_ttl, 1, 0,
 3313             pd->rdomain);
 3314 }
 3315 
 3316 void
 3317 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param,
 3318     sa_family_t af, struct pf_rule *r, u_int rdomain)
 3319 {
 3320         struct mbuf     *m0;
 3321 
 3322         if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL)
 3323                 return;
 3324 
 3325         m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
 3326         m0->m_pkthdr.ph_rtableid = rdomain;
 3327         if (r && (r->scrub_flags & PFSTATE_SETPRIO))
 3328                 m0->m_pkthdr.pf.prio = r->set_prio[0];
 3329         if (r && r->qid)
 3330                 m0->m_pkthdr.pf.qid = r->qid;
 3331 
 3332         switch (af) {
 3333         case AF_INET:
 3334                 icmp_error(m0, type, code, 0, param);
 3335                 break;
 3336 #ifdef INET6
 3337         case AF_INET6:
 3338                 icmp6_error(m0, type, code, param);
 3339                 break;
 3340 #endif /* INET6 */
 3341         }
 3342 }
 3343 
 3344 /*
 3345  * Return ((n = 0) == (a = b [with mask m]))
 3346  * Note: n != 0 => returns (a != b [with mask m])
 3347  */
 3348 int
 3349 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
 3350     struct pf_addr *b, sa_family_t af)
 3351 {
 3352         switch (af) {
 3353         case AF_INET:
 3354                 if ((a->addr32[0] & m->addr32[0]) ==
 3355                     (b->addr32[0] & m->addr32[0]))
 3356                         return (n == 0);
 3357                 break;
 3358 #ifdef INET6
 3359         case AF_INET6:
 3360                 if (((a->addr32[0] & m->addr32[0]) ==
 3361                      (b->addr32[0] & m->addr32[0])) &&
 3362                     ((a->addr32[1] & m->addr32[1]) ==
 3363                      (b->addr32[1] & m->addr32[1])) &&
 3364                     ((a->addr32[2] & m->addr32[2]) ==
 3365                      (b->addr32[2] & m->addr32[2])) &&
 3366                     ((a->addr32[3] & m->addr32[3]) ==
 3367                      (b->addr32[3] & m->addr32[3])))
 3368                         return (n == 0);
 3369                 break;
 3370 #endif /* INET6 */
 3371         }
 3372 
 3373         return (n != 0);
 3374 }
 3375 
 3376 /*
 3377  * Return 1 if b <= a <= e, otherwise return 0.
 3378  */
 3379 int
 3380 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
 3381     struct pf_addr *a, sa_family_t af)
 3382 {
 3383         switch (af) {
 3384         case AF_INET:
 3385                 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
 3386                     (ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
 3387                         return (0);
 3388                 break;
 3389 #ifdef INET6
 3390         case AF_INET6: {
 3391                 int     i;
 3392 
 3393                 /* check a >= b */
 3394                 for (i = 0; i < 4; ++i)
 3395                         if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
 3396                                 break;
 3397                         else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
 3398                                 return (0);
 3399                 /* check a <= e */
 3400                 for (i = 0; i < 4; ++i)
 3401                         if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
 3402                                 break;
 3403                         else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
 3404                                 return (0);
 3405                 break;
 3406         }
 3407 #endif /* INET6 */
 3408         }
 3409         return (1);
 3410 }
 3411 
 3412 int
 3413 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
 3414 {
 3415         switch (op) {
 3416         case PF_OP_IRG:
 3417                 return ((p > a1) && (p < a2));
 3418         case PF_OP_XRG:
 3419                 return ((p < a1) || (p > a2));
 3420         case PF_OP_RRG:
 3421                 return ((p >= a1) && (p <= a2));
 3422         case PF_OP_EQ:
 3423                 return (p == a1);
 3424         case PF_OP_NE:
 3425                 return (p != a1);
 3426         case PF_OP_LT:
 3427                 return (p < a1);
 3428         case PF_OP_LE:
 3429                 return (p <= a1);
 3430         case PF_OP_GT:
 3431                 return (p > a1);
 3432         case PF_OP_GE:
 3433                 return (p >= a1);
 3434         }
 3435         return (0); /* never reached */
 3436 }
 3437 
 3438 int
 3439 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
 3440 {
 3441         return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p)));
 3442 }
 3443 
 3444 int
 3445 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
 3446 {
 3447         if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE)
 3448                 return (0);
 3449         return (pf_match(op, a1, a2, u));
 3450 }
 3451 
 3452 int
 3453 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
 3454 {
 3455         if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE)
 3456                 return (0);
 3457         return (pf_match(op, a1, a2, g));
 3458 }
 3459 
 3460 int
 3461 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
 3462 {
 3463         if (*tag == -1)
 3464                 *tag = m->m_pkthdr.pf.tag;
 3465 
 3466         return ((!r->match_tag_not && r->match_tag == *tag) ||
 3467             (r->match_tag_not && r->match_tag != *tag));
 3468 }
 3469 
 3470 int
 3471 pf_match_rcvif(struct mbuf *m, struct pf_rule *r)
 3472 {
 3473         struct ifnet *ifp;
 3474 #if NCARP > 0
 3475         struct ifnet *ifp0;
 3476 #endif
 3477         struct pfi_kif *kif;
 3478 
 3479         ifp = if_get(m->m_pkthdr.ph_ifidx);
 3480         if (ifp == NULL)
 3481                 return (0);
 3482 
 3483 #if NCARP > 0
 3484         if (ifp->if_type == IFT_CARP &&
 3485             (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) {
 3486                 kif = (struct pfi_kif *)ifp0->if_pf_kif;
 3487                 if_put(ifp0);
 3488         } else
 3489 #endif /* NCARP */
 3490                 kif = (struct pfi_kif *)ifp->if_pf_kif;
 3491 
 3492         if_put(ifp);
 3493 
 3494         if (kif == NULL) {
 3495                 DPFPRINTF(LOG_ERR,
 3496                     "%s: kif == NULL, @%d via %s", __func__,
 3497                     r->nr, r->rcv_ifname);
 3498                 return (0);
 3499         }
 3500 
 3501         return (pfi_kif_match(r->rcv_kif, kif));
 3502 }
 3503 
 3504 void
 3505 pf_tag_packet(struct mbuf *m, int tag, int rtableid)
 3506 {
 3507         if (tag > 0)
 3508                 m->m_pkthdr.pf.tag = tag;
 3509         if (rtableid >= 0)
 3510                 m->m_pkthdr.ph_rtableid = (u_int)rtableid;
 3511 }
 3512 
 3513 void
 3514 pf_anchor_stack_init(void)
 3515 {
 3516         struct pf_anchor_stackframe *stack;
 3517 
 3518         stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
 3519         stack[PF_ANCHOR_STACK_MAX].sf_stack_top = &stack[0];
 3520         cpumem_leave(pf_anchor_stack, stack);
 3521 }
 3522 
 3523 int
 3524 pf_anchor_stack_is_full(struct pf_anchor_stackframe *sf)
 3525 {
 3526         struct pf_anchor_stackframe *stack;
 3527         int rv;
 3528 
 3529         stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
 3530         rv = (sf == &stack[PF_ANCHOR_STACK_MAX]);
 3531         cpumem_leave(pf_anchor_stack, stack);
 3532 
 3533         return (rv);
 3534 }
 3535 
 3536 int
 3537 pf_anchor_stack_is_empty(struct pf_anchor_stackframe *sf)
 3538 {
 3539         struct pf_anchor_stackframe *stack;
 3540         int rv;
 3541 
 3542         stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
 3543         rv = (sf == &stack[0]);
 3544         cpumem_leave(pf_anchor_stack, stack);
 3545 
 3546         return (rv);
 3547 }
 3548 
 3549 struct pf_anchor_stackframe *
 3550 pf_anchor_stack_top(void)
 3551 {
 3552         struct pf_anchor_stackframe *stack;
 3553         struct pf_anchor_stackframe *top_sf;
 3554 
 3555         stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
 3556         top_sf = stack[PF_ANCHOR_STACK_MAX].sf_stack_top;
 3557         cpumem_leave(pf_anchor_stack, stack);
 3558 
 3559         return (top_sf);
 3560 }
 3561 
 3562 int
 3563 pf_anchor_stack_push(struct pf_ruleset *rs, struct pf_rule *r,
 3564     struct pf_anchor *child, int jump_target)
 3565 {
 3566         struct pf_anchor_stackframe *stack;
 3567         struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top();
 3568 
 3569         top_sf++;
 3570         if (pf_anchor_stack_is_full(top_sf))
 3571                 return (-1);
 3572 
 3573         top_sf->sf_rs = rs;
 3574         top_sf->sf_r = r;
 3575         top_sf->sf_child = child;
 3576         top_sf->sf_jump_target = jump_target;
 3577 
 3578         stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
 3579 
 3580         if ((top_sf <= &stack[0]) || (top_sf >= &stack[PF_ANCHOR_STACK_MAX]))
 3581                 panic("%s: top frame outside of anchor stack range", __func__);
 3582 
 3583         stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf;
 3584         cpumem_leave(pf_anchor_stack, stack);
 3585 
 3586         return (0);
 3587 }
 3588 
 3589 int
 3590 pf_anchor_stack_pop(struct pf_ruleset **rs, struct pf_rule **r,
 3591     struct pf_anchor **child, int *jump_target)
 3592 {
 3593         struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top();
 3594         struct pf_anchor_stackframe *stack;
 3595         int on_top;
 3596 
 3597         stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
 3598         if (pf_anchor_stack_is_empty(top_sf)) {
 3599                 on_top = -1;
 3600         } else {
 3601                 if ((top_sf <= &stack[0]) ||
 3602                     (top_sf >= &stack[PF_ANCHOR_STACK_MAX]))
 3603                         panic("%s: top frame outside of anchor stack range",
 3604                             __func__);
 3605 
 3606                 *rs = top_sf->sf_rs;
 3607                 *r = top_sf->sf_r;
 3608                 *child = top_sf->sf_child;
 3609                 *jump_target = top_sf->sf_jump_target;
 3610                 top_sf--;
 3611                 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf;
 3612                 on_top = 0;
 3613         }
 3614         cpumem_leave(pf_anchor_stack, stack);
 3615 
 3616         return (on_top);
 3617 }
 3618 
 3619 void
 3620 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
 3621     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
 3622 {
 3623         switch (af) {
 3624         case AF_INET:
 3625                 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
 3626                 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
 3627                 break;
 3628 #ifdef INET6
 3629         case AF_INET6:
 3630                 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
 3631                 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
 3632                 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
 3633                 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
 3634                 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
 3635                 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
 3636                 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
 3637                 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
 3638                 break;
 3639 #endif /* INET6 */
 3640         default:
 3641                 unhandled_af(af);
 3642         }
 3643 }
 3644 
 3645 void
 3646 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
 3647 {
 3648         switch (af) {
 3649         case AF_INET:
 3650                 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
 3651                 break;
 3652 #ifdef INET6
 3653         case AF_INET6:
 3654                 if (addr->addr32[3] == 0xffffffff) {
 3655                         addr->addr32[3] = 0;
 3656                         if (addr->addr32[2] == 0xffffffff) {
 3657                                 addr->addr32[2] = 0;
 3658                                 if (addr->addr32[1] == 0xffffffff) {
 3659                                         addr->addr32[1] = 0;
 3660                                         addr->addr32[0] =
 3661                                             htonl(ntohl(addr->addr32[0]) + 1);
 3662                                 } else
 3663                                         addr->addr32[1] =
 3664                                             htonl(ntohl(addr->addr32[1]) + 1);
 3665                         } else
 3666                                 addr->addr32[2] =
 3667                                     htonl(ntohl(addr->addr32[2]) + 1);
 3668                 } else
 3669                         addr->addr32[3] =
 3670                             htonl(ntohl(addr->addr32[3]) + 1);
 3671                 break;
 3672 #endif /* INET6 */
 3673         default:
 3674                 unhandled_af(af);
 3675         }
 3676 }
 3677 
 3678 int
 3679 pf_socket_lookup(struct pf_pdesc *pd)
 3680 {
 3681         struct pf_addr          *saddr, *daddr;
 3682         u_int16_t                sport, dport;
 3683         struct inpcbtable       *tb;
 3684         struct inpcb            *inp;
 3685 
 3686         pd->lookup.uid = -1;
 3687         pd->lookup.gid = -1;
 3688         pd->lookup.pid = NO_PID;
 3689         switch (pd->virtual_proto) {
 3690         case IPPROTO_TCP:
 3691                 sport = pd->hdr.tcp.th_sport;
 3692                 dport = pd->hdr.tcp.th_dport;
 3693                 PF_ASSERT_LOCKED();
 3694                 NET_ASSERT_LOCKED();
 3695                 tb = &tcbtable;
 3696                 break;
 3697         case IPPROTO_UDP:
 3698                 sport = pd->hdr.udp.uh_sport;
 3699                 dport = pd->hdr.udp.uh_dport;
 3700                 PF_ASSERT_LOCKED();
 3701                 NET_ASSERT_LOCKED();
 3702                 tb = &udbtable;
 3703                 break;
 3704         default:
 3705                 return (-1);
 3706         }
 3707         if (pd->dir == PF_IN) {
 3708                 saddr = pd->src;
 3709                 daddr = pd->dst;
 3710         } else {
 3711                 u_int16_t       p;
 3712 
 3713                 p = sport;
 3714                 sport = dport;
 3715                 dport = p;
 3716                 saddr = pd->dst;
 3717                 daddr = pd->src;
 3718         }
 3719         switch (pd->af) {
 3720         case AF_INET:
 3721                 /*
 3722                  * Fails when rtable is changed while evaluating the ruleset
 3723                  * The socket looked up will not match the one hit in the end.
 3724                  */
 3725                 inp = in_pcblookup(tb, saddr->v4, sport, daddr->v4, dport,
 3726                     pd->rdomain);
 3727                 if (inp == NULL) {
 3728                         inp = in_pcblookup_listen(tb, daddr->v4, dport,
 3729                             NULL, pd->rdomain);
 3730                         if (inp == NULL)
 3731                                 return (-1);
 3732                 }
 3733                 break;
 3734 #ifdef INET6
 3735         case AF_INET6:
 3736                 inp = in6_pcblookup(tb, &saddr->v6, sport, &daddr->v6,
 3737                     dport, pd->rdomain);
 3738                 if (inp == NULL) {
 3739                         inp = in6_pcblookup_listen(tb, &daddr->v6, dport,
 3740                             NULL, pd->rdomain);
 3741                         if (inp == NULL)
 3742                                 return (-1);
 3743                 }
 3744                 break;
 3745 #endif /* INET6 */
 3746         default:
 3747                 unhandled_af(pd->af);
 3748         }
 3749         pd->lookup.uid = inp->inp_socket->so_euid;
 3750         pd->lookup.gid = inp->inp_socket->so_egid;
 3751         pd->lookup.pid = inp->inp_socket->so_cpid;
 3752         in_pcbunref(inp);
 3753         return (1);
 3754 }
 3755 
 3756 /* post: r  => (r[0] == type /\ r[1] >= min_typelen >= 2  "validity"
 3757  *                      /\ (eoh - r) >= min_typelen >= 2  "safety"  )
 3758  *
 3759  * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen
 3760  */
 3761 u_int8_t*
 3762 pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type,
 3763     u_int8_t min_typelen)
 3764 {
 3765         u_int8_t *eoh = opts + hlen;
 3766 
 3767         if (min_typelen < 2)
 3768                 return (NULL);
 3769 
 3770         while ((eoh - opt) >= min_typelen) {
 3771                 switch (*opt) {
 3772                 case TCPOPT_EOL:
 3773                         /* FALLTHROUGH - Workaround the failure of some
 3774                            systems to NOP-pad their bzero'd option buffers,
 3775                            producing spurious EOLs */
 3776                 case TCPOPT_NOP:
 3777                         opt++;
 3778                         continue;
 3779                 default:
 3780                         if (opt[0] == type &&
 3781                             opt[1] >= min_typelen)
 3782                                 return (opt);
 3783                 }
 3784 
 3785                 opt += MAX(opt[1], 2); /* evade infinite loops */
 3786         }
 3787 
 3788         return (NULL);
 3789 }
 3790 
 3791 u_int8_t
 3792 pf_get_wscale(struct pf_pdesc *pd)
 3793 {
 3794         int              olen;
 3795         u_int8_t         opts[MAX_TCPOPTLEN], *opt;
 3796         u_int8_t         wscale = 0;
 3797 
 3798         olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
 3799         if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m,
 3800             pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af))
 3801                 return (0);
 3802 
 3803         opt = opts;
 3804         while ((opt = pf_find_tcpopt(opt, opts, olen,
 3805                     TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) {
 3806                 wscale = opt[2];
 3807                 wscale = MIN(wscale, TCP_MAX_WINSHIFT);
 3808                 wscale |= PF_WSCALE_FLAG;
 3809 
 3810                 opt += opt[1];
 3811         }
 3812 
 3813         return (wscale);
 3814 }
 3815 
 3816 u_int16_t
 3817 pf_get_mss(struct pf_pdesc *pd)
 3818 {
 3819         int              olen;
 3820         u_int8_t         opts[MAX_TCPOPTLEN], *opt;
 3821         u_int16_t        mss = tcp_mssdflt;
 3822 
 3823         olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
 3824         if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m,
 3825             pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af))
 3826                 return (0);
 3827 
 3828         opt = opts;
 3829         while ((opt = pf_find_tcpopt(opt, opts, olen,
 3830                     TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) {
 3831                         memcpy(&mss, (opt + 2), 2);
 3832                         mss = ntohs(mss);
 3833 
 3834                         opt += opt[1];
 3835         }
 3836         return (mss);
 3837 }
 3838 
 3839 u_int16_t
 3840 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
 3841 {
 3842         struct ifnet            *ifp;
 3843         struct sockaddr_in      *dst;
 3844 #ifdef INET6
 3845         struct sockaddr_in6     *dst6;
 3846 #endif /* INET6 */
 3847         struct rtentry          *rt = NULL;
 3848         struct sockaddr_storage  ss;
 3849         int                      hlen;
 3850         u_int16_t                mss = tcp_mssdflt;
 3851 
 3852         memset(&ss, 0, sizeof(ss));
 3853 
 3854         switch (af) {
 3855         case AF_INET:
 3856                 hlen = sizeof(struct ip);
 3857                 dst = (struct sockaddr_in *)&ss;
 3858                 dst->sin_family = AF_INET;
 3859                 dst->sin_len = sizeof(*dst);
 3860                 dst->sin_addr = addr->v4;
 3861                 rt = rtalloc(sintosa(dst), 0, rtableid);
 3862                 break;
 3863 #ifdef INET6
 3864         case AF_INET6:
 3865                 hlen = sizeof(struct ip6_hdr);
 3866                 dst6 = (struct sockaddr_in6 *)&ss;
 3867                 dst6->sin6_family = AF_INET6;
 3868                 dst6->sin6_len = sizeof(*dst6);
 3869                 dst6->sin6_addr = addr->v6;
 3870                 rt = rtalloc(sin6tosa(dst6), 0, rtableid);
 3871                 break;
 3872 #endif /* INET6 */
 3873         }
 3874 
 3875         if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) {
 3876                 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr);
 3877                 mss = max(tcp_mssdflt, mss);
 3878                 if_put(ifp);
 3879         }
 3880         rtfree(rt);
 3881         mss = min(mss, offer);
 3882         mss = max(mss, 64);             /* sanity - at least max opt space */
 3883         return (mss);
 3884 }
 3885 
 3886 static __inline int
 3887 pf_set_rt_ifp(struct pf_state *st, struct pf_addr *saddr, sa_family_t af,
 3888     struct pf_src_node **sns)
 3889 {
 3890         struct pf_rule *r = st->rule.ptr;
 3891         int     rv;
 3892 
 3893         if (!r->rt)
 3894                 return (0);
 3895 
 3896         rv = pf_map_addr(af, r, saddr, &st->rt_addr, NULL, sns,
 3897             &r->route, PF_SN_ROUTE);
 3898         if (rv == 0)
 3899                 st->rt = r->rt;
 3900 
 3901         return (rv);
 3902 }
 3903 
 3904 u_int32_t
 3905 pf_tcp_iss(struct pf_pdesc *pd)
 3906 {
 3907         SHA2_CTX ctx;
 3908         union {
 3909                 uint8_t bytes[SHA512_DIGEST_LENGTH];
 3910                 uint32_t words[1];
 3911         } digest;
 3912 
 3913         if (pf_tcp_secret_init == 0) {
 3914                 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret));
 3915                 SHA512Init(&pf_tcp_secret_ctx);
 3916                 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret,
 3917                     sizeof(pf_tcp_secret));
 3918                 pf_tcp_secret_init = 1;
 3919         }
 3920         ctx = pf_tcp_secret_ctx;
 3921 
 3922         SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain));
 3923         SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short));
 3924         SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short));
 3925         switch (pd->af) {
 3926         case AF_INET:
 3927                 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr));
 3928                 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr));
 3929                 break;
 3930 #ifdef INET6
 3931         case AF_INET6:
 3932                 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr));
 3933                 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr));
 3934                 break;
 3935 #endif /* INET6 */
 3936         }
 3937         SHA512Final(digest.bytes, &ctx);
 3938         pf_tcp_iss_off += 4096;
 3939         return (digest.words[0] + READ_ONCE(tcp_iss) + pf_tcp_iss_off);
 3940 }
 3941 
 3942 void
 3943 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a)
 3944 {
 3945         if (r->qid)
 3946                 a->qid = r->qid;
 3947         if (r->pqid)
 3948                 a->pqid = r->pqid;
 3949         if (r->rtableid >= 0)
 3950                 a->rtableid = r->rtableid;
 3951 #if NPFLOG > 0
 3952         a->log |= r->log;
 3953 #endif  /* NPFLOG > 0 */
 3954         if (r->scrub_flags & PFSTATE_SETTOS)
 3955                 a->set_tos = r->set_tos;
 3956         if (r->min_ttl)
 3957                 a->min_ttl = r->min_ttl;
 3958         if (r->max_mss)
 3959                 a->max_mss = r->max_mss;
 3960         a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID|
 3961             PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO));
 3962         if (r->scrub_flags & PFSTATE_SETPRIO) {
 3963                 a->set_prio[0] = r->set_prio[0];
 3964                 a->set_prio[1] = r->set_prio[1];
 3965         }
 3966         if (r->rule_flag & PFRULE_SETDELAY)
 3967                 a->delay = r->delay;
 3968 }
 3969 
 3970 #define PF_TEST_ATTRIB(t, a)                    \
 3971         if (t) {                                \
 3972                 r = a;                          \
 3973                 continue;                       \
 3974         } else do {                             \
 3975         } while (0)
 3976 
 3977 enum pf_test_status
 3978 pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset)
 3979 {
 3980         struct pf_rule *r;
 3981         struct pf_anchor *child = NULL;
 3982         int target;
 3983 
 3984         pf_anchor_stack_init();
 3985 enter_ruleset:
 3986         r = TAILQ_FIRST(ruleset->rules.active.ptr);
 3987         while (r != NULL) {
 3988                 PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED,
 3989                     TAILQ_NEXT(r, entries));
 3990                 r->evaluations++;
 3991                 PF_TEST_ATTRIB(
 3992                     (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot),
 3993                         r->skip[PF_SKIP_IFP].ptr);
 3994                 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir),
 3995                         r->skip[PF_SKIP_DIR].ptr);
 3996                 PF_TEST_ATTRIB((r->onrdomain >= 0  &&
 3997                     (r->onrdomain == ctx->pd->rdomain) == r->ifnot),
 3998                         r->skip[PF_SKIP_RDOM].ptr);
 3999                 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af),
 4000                         r->skip[PF_SKIP_AF].ptr);
 4001                 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto),
 4002                         r->skip[PF_SKIP_PROTO].ptr);
 4003                 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr,
 4004                     ctx->pd->naf, r->src.neg, ctx->pd->kif,
 4005                     ctx->act.rtableid)),
 4006                         r->skip[PF_SKIP_SRC_ADDR].ptr);
 4007                 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr,
 4008                     ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)),
 4009                         r->skip[PF_SKIP_DST_ADDR].ptr);
 4010 
 4011                 switch (ctx->pd->virtual_proto) {
 4012                 case PF_VPROTO_FRAGMENT:
 4013                         /* tcp/udp only. port_op always 0 in other cases */
 4014                         PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
 4015                                 TAILQ_NEXT(r, entries));
 4016                         PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP &&
 4017                             r->flagset),
 4018                                 TAILQ_NEXT(r, entries));
 4019                         /* icmp only. type/code always 0 in other cases */
 4020                         PF_TEST_ATTRIB((r->type || r->code),
 4021                                 TAILQ_NEXT(r, entries));
 4022                         /* tcp/udp only. {uid|gid}.op always 0 in other cases */
 4023                         PF_TEST_ATTRIB((r->gid.op || r->uid.op),
 4024                                 TAILQ_NEXT(r, entries));
 4025                         break;
 4026 
 4027                 case IPPROTO_TCP:
 4028                         PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) !=
 4029                             r->flags),
 4030                                 TAILQ_NEXT(r, entries));
 4031                         PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
 4032                             !pf_osfp_match(pf_osfp_fingerprint(ctx->pd),
 4033                             r->os_fingerprint)),
 4034                                 TAILQ_NEXT(r, entries));
 4035                         /* FALLTHROUGH */
 4036 
 4037                 case IPPROTO_UDP:
 4038                         /* tcp/udp only. port_op always 0 in other cases */
 4039                         PF_TEST_ATTRIB((r->src.port_op &&
 4040                             !pf_match_port(r->src.port_op, r->src.port[0],
 4041                             r->src.port[1], ctx->pd->nsport)),
 4042                                 r->skip[PF_SKIP_SRC_PORT].ptr);
 4043                         PF_TEST_ATTRIB((r->dst.port_op &&
 4044                             !pf_match_port(r->dst.port_op, r->dst.port[0],
 4045                             r->dst.port[1], ctx->pd->ndport)),
 4046                                 r->skip[PF_SKIP_DST_PORT].ptr);
 4047                         /* tcp/udp only. uid.op always 0 in other cases */
 4048                         PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done ||
 4049                             (ctx->pd->lookup.done =
 4050                             pf_socket_lookup(ctx->pd), 1)) &&
 4051                             !pf_match_uid(r->uid.op, r->uid.uid[0],
 4052                             r->uid.uid[1], ctx->pd->lookup.uid)),
 4053                                 TAILQ_NEXT(r, entries));
 4054                         /* tcp/udp only. gid.op always 0 in other cases */
 4055                         PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done ||
 4056                             (ctx->pd->lookup.done =
 4057                             pf_socket_lookup(ctx->pd), 1)) &&
 4058                             !pf_match_gid(r->gid.op, r->gid.gid[0],
 4059                             r->gid.gid[1], ctx->pd->lookup.gid)),
 4060                                 TAILQ_NEXT(r, entries));
 4061                         break;
 4062 
 4063                 case IPPROTO_ICMP:
 4064                 case IPPROTO_ICMPV6:
 4065                         /* icmp only. type always 0 in other cases */
 4066                         PF_TEST_ATTRIB((r->type &&
 4067                             r->type != ctx->icmptype + 1),
 4068                                 TAILQ_NEXT(r, entries));
 4069                         /* icmp only. type always 0 in other cases */
 4070                         PF_TEST_ATTRIB((r->code &&
 4071                             r->code != ctx->icmpcode + 1),
 4072                                 TAILQ_NEXT(r, entries));
 4073                         /* icmp only. don't create states on replies */
 4074                         PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp &&
 4075                             (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
 4076                             ctx->icmp_dir != PF_IN),
 4077                                 TAILQ_NEXT(r, entries));
 4078                         break;
 4079 
 4080                 default:
 4081                         break;
 4082                 }
 4083 
 4084                 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
 4085                     ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT),
 4086                         TAILQ_NEXT(r, entries));
 4087                 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)),
 4088                         TAILQ_NEXT(r, entries));
 4089                 PF_TEST_ATTRIB((r->prob &&
 4090                     r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
 4091                         TAILQ_NEXT(r, entries));
 4092                 PF_TEST_ATTRIB((r->match_tag &&
 4093                     !pf_match_tag(ctx->pd->m, r, &ctx->tag)),
 4094                         TAILQ_NEXT(r, entries));
 4095                 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) ==
 4096                     r->rcvifnot),
 4097                         TAILQ_NEXT(r, entries));
 4098                 PF_TEST_ATTRIB((r->prio &&
 4099                     (r->prio == PF_PRIO_ZERO ? 0 : r->prio) !=
 4100                     ctx->pd->m->m_pkthdr.pf.prio),
 4101                         TAILQ_NEXT(r, entries));
 4102 
 4103                 /* must be last! */
 4104                 if (r->pktrate.limit) {
 4105                         pf_add_threshold(&r->pktrate);
 4106                         PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)),
 4107                                 TAILQ_NEXT(r, entries));
 4108                 }
 4109 
 4110                 /* FALLTHROUGH */
 4111                 if (r->tag)
 4112                         ctx->tag = r->tag;
 4113                 if (r->anchor == NULL) {
 4114 
 4115                         if (r->rule_flag & PFRULE_ONCE) {
 4116                                 u_int32_t       rule_flag;
 4117 
 4118                                 rule_flag = r->rule_flag;
 4119                                 if (((rule_flag & PFRULE_EXPIRED) == 0) &&
 4120                                     atomic_cas_uint(&r->rule_flag, rule_flag,
 4121                                     rule_flag | PFRULE_EXPIRED) == rule_flag) {
 4122                                         r->exptime = gettime();
 4123                                 } else {
 4124                                         r = TAILQ_NEXT(r, entries);
 4125                                         continue;
 4126                                 }
 4127                         }
 4128 
 4129                         if (r->action == PF_MATCH) {
 4130                                 if ((ctx->ri = pool_get(&pf_rule_item_pl,
 4131                                     PR_NOWAIT)) == NULL) {
 4132                                         REASON_SET(&ctx->reason, PFRES_MEMORY);
 4133                                         return (PF_TEST_FAIL);
 4134                                 }
 4135                                 ctx->ri->r = r;
 4136                                 /* order is irrelevant */
 4137                                 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry);
 4138                                 ctx->ri = NULL;
 4139                                 pf_rule_to_actions(r, &ctx->act);
 4140                                 if (r->rule_flag & PFRULE_AFTO)
 4141                                         ctx->pd->naf = r->naf;
 4142                                 if (pf_get_transaddr(r, ctx->pd, ctx->sns,
 4143                                     &ctx->nr) == -1) {
 4144                                         REASON_SET(&ctx->reason,
 4145                                             PFRES_TRANSLATE);
 4146                                         return (PF_TEST_FAIL);
 4147                                 }
 4148 #if NPFLOG > 0
 4149                                 if (r->log) {
 4150                                         REASON_SET(&ctx->reason, PFRES_MATCH);
 4151                                         pflog_packet(ctx->pd, ctx->reason, r,
 4152                                             ctx->a, ruleset, NULL);
 4153                                 }
 4154 #endif  /* NPFLOG > 0 */
 4155                         } else {
 4156                                 /*
 4157                                  * found matching r
 4158                                  */
 4159                                 *ctx->rm = r;
 4160                                 /*
 4161                                  * anchor, with ruleset, where r belongs to
 4162                                  */
 4163                                 *ctx->am = ctx->a;
 4164                                 /*
 4165                                  * ruleset where r belongs to
 4166                                  */
 4167                                 *ctx->rsm = ruleset;
 4168                                 /*
 4169                                  * ruleset, where anchor belongs to.
 4170                                  */
 4171                                 ctx->arsm = ctx->aruleset;
 4172                         }
 4173 
 4174 #if NPFLOG > 0
 4175                         if (ctx->act.log & PF_LOG_MATCHES)
 4176                                 pf_log_matches(ctx->pd, r, ctx->a, ruleset,
 4177                                     &ctx->rules);
 4178 #endif  /* NPFLOG > 0 */
 4179 
 4180                         if (r->quick)
 4181                                 return (PF_TEST_QUICK);
 4182                 } else {
 4183                         ctx->a = r;
 4184                         ctx->aruleset = &r->anchor->ruleset;
 4185                         if (r->anchor_wildcard) {
 4186                                 RB_FOREACH(child, pf_anchor_node,
 4187                                     &r->anchor->children) {
 4188                                         if (pf_anchor_stack_push(ruleset, r, child,
 4189                                             PF_NEXT_CHILD) != 0)
 4190                                                 return (PF_TEST_FAIL);
 4191 
 4192                                         ruleset = &child->ruleset;
 4193                                         goto enter_ruleset;
 4194 next_child:
 4195                                         continue;       /* with RB_FOREACH() */
 4196                                 }
 4197                         } else {
 4198                                 if (pf_anchor_stack_push(ruleset, r, child,
 4199                                     PF_NEXT_RULE) != 0)
 4200                                         return (PF_TEST_FAIL);
 4201 
 4202                                 ruleset = &r->anchor->ruleset;
 4203                                 child = NULL;
 4204                                 goto enter_ruleset;
 4205 next_rule:
 4206                                 ;
 4207                         }
 4208                 }
 4209                 r = TAILQ_NEXT(r, entries);
 4210         }
 4211 
 4212         if (pf_anchor_stack_pop(&ruleset, &r, &child, &target) == 0) {
 4213                 /* stop if any rule matched within quick anchors. */
 4214                 if (r->quick == PF_TEST_QUICK && *ctx->am == r)
 4215                         return (PF_TEST_QUICK);
 4216 
 4217                 switch (target) {
 4218                 case PF_NEXT_CHILD:
 4219                         goto next_child;
 4220                 case PF_NEXT_RULE:
 4221                         goto next_rule;
 4222                 default:
 4223                         panic("%s: unknown jump target", __func__);
 4224                 }
 4225         }
 4226 
 4227         return (PF_TEST_OK);
 4228 }
 4229 
 4230 int
 4231 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm,
 4232     struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason,
 4233     struct pfsync_deferral **pdeferral)
 4234 {
 4235         struct pf_rule          *r = NULL;
 4236         struct pf_rule          *a = NULL;
 4237         struct pf_ruleset       *ruleset = NULL;
 4238         struct pf_state_key     *skw = NULL, *sks = NULL;
 4239         int                      rewrite = 0;
 4240         u_int16_t                virtual_type, virtual_id;
 4241         int                      action = PF_DROP;
 4242         struct pf_test_ctx       ctx;
 4243         int                      rv;
 4244 
 4245         memset(&ctx, 0, sizeof(ctx));
 4246         ctx.pd = pd;
 4247         ctx.rm = rm;
 4248         ctx.am = am;
 4249         ctx.rsm = rsm;
 4250         ctx.th = &pd->hdr.tcp;
 4251         ctx.act.rtableid = pd->rdomain;
 4252         ctx.tag = -1;
 4253         SLIST_INIT(&ctx.rules);
 4254 
 4255         if (pd->dir == PF_IN && if_congested()) {
 4256                 REASON_SET(&ctx.reason, PFRES_CONGEST);
 4257                 return (PF_DROP);
 4258         }
 4259 
 4260         switch (pd->virtual_proto) {
 4261         case IPPROTO_ICMP:
 4262                 ctx.icmptype = pd->hdr.icmp.icmp_type;
 4263                 ctx.icmpcode = pd->hdr.icmp.icmp_code;
 4264                 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
 4265                     &ctx.icmp_dir, &virtual_id, &virtual_type);
 4266                 if (ctx.icmp_dir == PF_IN) {
 4267                         pd->osport = pd->nsport = virtual_id;
 4268                         pd->odport = pd->ndport = virtual_type;
 4269                 } else {
 4270                         pd->osport = pd->nsport = virtual_type;
 4271                         pd->odport = pd->ndport = virtual_id;
 4272                 }
 4273                 break;
 4274 #ifdef INET6
 4275         case IPPROTO_ICMPV6:
 4276                 ctx.icmptype = pd->hdr.icmp6.icmp6_type;
 4277                 ctx.icmpcode = pd->hdr.icmp6.icmp6_code;
 4278                 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
 4279                     &ctx.icmp_dir, &virtual_id, &virtual_type);
 4280                 if (ctx.icmp_dir == PF_IN) {
 4281                         pd->osport = pd->nsport = virtual_id;
 4282                         pd->odport = pd->ndport = virtual_type;
 4283                 } else {
 4284                         pd->osport = pd->nsport = virtual_type;
 4285                         pd->odport = pd->ndport = virtual_id;
 4286                 }
 4287                 break;
 4288 #endif /* INET6 */
 4289         }
 4290 
 4291         ruleset = &pf_main_ruleset;
 4292         rv = pf_match_rule(&ctx, ruleset);
 4293         if (rv == PF_TEST_FAIL) {
 4294                 /*
 4295                  * Reason has been set in pf_match_rule() already.
 4296                  */
 4297                 goto cleanup;
 4298         }
 4299 
 4300         r = *ctx.rm;    /* matching rule */
 4301         a = *ctx.am;    /* rule that defines an anchor containing 'r' */
 4302         ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */
 4303         ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */
 4304 
 4305         /* apply actions for last matching pass/block rule */
 4306         pf_rule_to_actions(r, &ctx.act);
 4307         if (r->rule_flag & PFRULE_AFTO)
 4308                 pd->naf = r->naf;
 4309         if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) {
 4310                 REASON_SET(&ctx.reason, PFRES_TRANSLATE);
 4311                 goto cleanup;
 4312         }
 4313         REASON_SET(&ctx.reason, PFRES_MATCH);
 4314 
 4315 #if NPFLOG > 0
 4316         if (r->log)
 4317                 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL);
 4318         if (ctx.act.log & PF_LOG_MATCHES)
 4319                 pf_log_matches(pd, r, a, ruleset, &ctx.rules);
 4320 #endif  /* NPFLOG > 0 */
 4321 
 4322         if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
 4323             (r->action == PF_DROP) &&
 4324             ((r->rule_flag & PFRULE_RETURNRST) ||
 4325             (r->rule_flag & PFRULE_RETURNICMP) ||
 4326             (r->rule_flag & PFRULE_RETURN))) {
 4327                 if (pd->proto == IPPROTO_TCP &&
 4328                     ((r->rule_flag & PFRULE_RETURNRST) ||
 4329                     (r->rule_flag & PFRULE_RETURN)) &&
 4330                     !(ctx.th->th_flags & TH_RST)) {
 4331                         u_int32_t        ack =
 4332                             ntohl(ctx.th->th_seq) + pd->p_len;
 4333 
 4334                         if (pf_check_tcp_cksum(pd->m, pd->off,
 4335                             pd->tot_len - pd->off, pd->af))
 4336                                 REASON_SET(&ctx.reason, PFRES_PROTCKSUM);
 4337                         else {
 4338                                 if (ctx.th->th_flags & TH_SYN)
 4339                                         ack++;
 4340                                 if (ctx.th->th_flags & TH_FIN)
 4341                                         ack++;
 4342                                 pf_send_tcp(r, pd->af, pd->dst,
 4343                                     pd->src, ctx.th->th_dport,
 4344                                     ctx.th->th_sport, ntohl(ctx.th->th_ack),
 4345                                     ack, TH_RST|TH_ACK, 0, 0, r->return_ttl,
 4346                                     1, 0, pd->rdomain);
 4347                         }
 4348                 } else if ((pd->proto != IPPROTO_ICMP ||
 4349                     ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET &&
 4350                     r->return_icmp)
 4351                         pf_send_icmp(pd->m, r->return_icmp >> 8,
 4352                             r->return_icmp & 255, 0, pd->af, r, pd->rdomain);
 4353                 else if ((pd->proto != IPPROTO_ICMPV6 ||
 4354                     (ctx.icmptype >= ICMP6_ECHO_REQUEST &&
 4355                     ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
 4356                     r->return_icmp6)
 4357                         pf_send_icmp(pd->m, r->return_icmp6 >> 8,
 4358                             r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain);
 4359         }
 4360 
 4361         if (r->action == PF_DROP)
 4362                 goto cleanup;
 4363 
 4364         pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid);
 4365         if (ctx.act.rtableid >= 0 &&
 4366             rtable_l2(ctx.act.rtableid) != pd->rdomain)
 4367                 pd->destchg = 1;
 4368 
 4369         if (r->action == PF_PASS && pd->badopts != 0 && ! r->allow_opts) {
 4370                 REASON_SET(&ctx.reason, PFRES_IPOPTIONS);
 4371 #if NPFLOG > 0
 4372                 pd->pflog |= PF_LOG_FORCE;
 4373 #endif  /* NPFLOG > 0 */
 4374                 DPFPRINTF(LOG_NOTICE, "dropping packet with "
 4375                     "ip/ipv6 options in pf_test_rule()");
 4376                 goto cleanup;
 4377         }
 4378 
 4379         action = PF_PASS;
 4380 
 4381         if (pd->virtual_proto != PF_VPROTO_FRAGMENT
 4382             && !ctx.state_icmp && r->keep_state) {
 4383 
 4384                 if (r->rule_flag & PFRULE_SRCTRACK &&
 4385                     pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE,
 4386                     pd->af, pd->src, NULL, NULL) != 0) {
 4387                         REASON_SET(&ctx.reason, PFRES_SRCLIMIT);
 4388                         goto cleanup;
 4389                 }
 4390 
 4391                 if (r->max_states && (r->states_cur >= r->max_states)) {
 4392                         pf_status.lcounters[LCNT_STATES]++;
 4393                         REASON_SET(&ctx.reason, PFRES_MAXSTATES);
 4394                         goto cleanup;
 4395                 }
 4396 
 4397                 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks,
 4398                     &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns);
 4399 
 4400                 if (action != PF_PASS)
 4401                         goto cleanup;
 4402                 if (sks != skw) {
 4403                         struct pf_state_key     *sk;
 4404 
 4405                         if (pd->dir == PF_IN)
 4406                                 sk = sks;
 4407                         else
 4408                                 sk = skw;
 4409                         rewrite += pf_translate(pd,
 4410                             &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx],
 4411                             sk->port[pd->af == pd->naf ? pd->sidx : pd->didx],
 4412                             &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
 4413                             sk->port[pd->af == pd->naf ? pd->didx : pd->sidx],
 4414                             virtual_type, ctx.icmp_dir);
 4415                 }
 4416 
 4417 #ifdef INET6
 4418                 if (rewrite && skw->af != sks->af)
 4419                         action = PF_AFRT;
 4420 #endif /* INET6 */
 4421 
 4422         } else {
 4423                 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
 4424                         SLIST_REMOVE_HEAD(&ctx.rules, entry);
 4425                         pool_put(&pf_rule_item_pl, ctx.ri);
 4426                 }
 4427         }
 4428 
 4429         /* copy back packet headers if needed */
 4430         if (rewrite && pd->hdrlen) {
 4431                 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT);
 4432         }
 4433 
 4434 #if NPFSYNC > 0
 4435         if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) &&
 4436             pd->dir == PF_OUT && pfsync_is_up()) {
 4437                 /*
 4438                  * We want the state created, but we dont
 4439                  * want to send this in case a partner
 4440                  * firewall has to know about it to allow
 4441                  * replies through it.
 4442                  */
 4443                 if (pfsync_defer(*sm, pd->m, pdeferral))
 4444                         return (PF_DEFER);
 4445         }
 4446 #endif  /* NPFSYNC > 0 */
 4447 
 4448         return (action);
 4449 
 4450 cleanup:
 4451         while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
 4452                 SLIST_REMOVE_HEAD(&ctx.rules, entry);
 4453                 pool_put(&pf_rule_item_pl, ctx.ri);
 4454         }
 4455 
 4456         return (action);
 4457 }
 4458 
 4459 static __inline int
 4460 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a,
 4461     struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks,
 4462     int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules,
 4463     struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX])
 4464 {
 4465         struct pf_state         *st = NULL;
 4466         struct tcphdr           *th = &pd->hdr.tcp;
 4467         u_int16_t                mss = tcp_mssdflt;
 4468         u_short                  reason;
 4469         u_int                    i;
 4470 
 4471         st = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO);
 4472         if (st == NULL) {
 4473                 REASON_SET(&reason, PFRES_MEMORY);
 4474                 goto csfailed;
 4475         }
 4476         st->rule.ptr = r;
 4477         st->anchor.ptr = a;
 4478         st->natrule.ptr = nr;
 4479         if (r->allow_opts)
 4480                 st->state_flags |= PFSTATE_ALLOWOPTS;
 4481         if (r->rule_flag & PFRULE_STATESLOPPY)
 4482                 st->state_flags |= PFSTATE_SLOPPY;
 4483         if (r->rule_flag & PFRULE_PFLOW)
 4484                 st->state_flags |= PFSTATE_PFLOW;
 4485 #if NPFLOG > 0
 4486         st->log = act->log & PF_LOG_ALL;
 4487 #endif  /* NPFLOG > 0 */
 4488         st->qid = act->qid;
 4489         st->pqid = act->pqid;
 4490         st->rtableid[pd->didx] = act->rtableid;
 4491         st->rtableid[pd->sidx] = -1;    /* return traffic is routed normally */
 4492         st->min_ttl = act->min_ttl;
 4493         st->set_tos = act->set_tos;
 4494         st->max_mss = act->max_mss;
 4495         st->state_flags |= act->flags;
 4496 #if NPFSYNC > 0
 4497         st->sync_state = PFSYNC_S_NONE;
 4498 #endif  /* NPFSYNC > 0 */
 4499         st->set_prio[0] = act->set_prio[0];
 4500         st->set_prio[1] = act->set_prio[1];
 4501         st->delay = act->delay;
 4502         SLIST_INIT(&st->src_nodes);
 4503         /*
 4504          * must initialize refcnt, before pf_state_insert() gets called.
 4505          * pf_state_inserts() grabs reference for pfsync!
 4506          */
 4507         PF_REF_INIT(st->refcnt);
 4508         mtx_init(&st->mtx, IPL_NET);
 4509 
 4510         switch (pd->proto) {
 4511         case IPPROTO_TCP:
 4512                 st->src.seqlo = ntohl(th->th_seq);
 4513                 st->src.seqhi = st->src.seqlo + pd->p_len + 1;
 4514                 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
 4515                     r->keep_state == PF_STATE_MODULATE) {
 4516                         /* Generate sequence number modulator */
 4517                         st->src.seqdiff = pf_tcp_iss(pd) - st->src.seqlo;
 4518                         if (st->src.seqdiff == 0)
 4519                                 st->src.seqdiff = 1;
 4520                         pf_patch_32(pd, &th->th_seq,
 4521                             htonl(st->src.seqlo + st->src.seqdiff));
 4522                         *rewrite = 1;
 4523                 } else
 4524                         st->src.seqdiff = 0;
 4525                 if (th->th_flags & TH_SYN) {
 4526                         st->src.seqhi++;
 4527                         st->src.wscale = pf_get_wscale(pd);
 4528                 }
 4529                 st->src.max_win = MAX(ntohs(th->th_win), 1);
 4530                 if (st->src.wscale & PF_WSCALE_MASK) {
 4531                         /* Remove scale factor from initial window */
 4532                         int win = st->src.max_win;
 4533                         win += 1 << (st->src.wscale & PF_WSCALE_MASK);
 4534                         st->src.max_win = (win - 1) >>
 4535                             (st->src.wscale & PF_WSCALE_MASK);
 4536                 }
 4537                 if (th->th_flags & TH_FIN)
 4538                         st->src.seqhi++;
 4539                 st->dst.seqhi = 1;
 4540                 st->dst.max_win = 1;
 4541                 pf_set_protostate(st, PF_PEER_SRC, TCPS_SYN_SENT);
 4542                 pf_set_protostate(st, PF_PEER_DST, TCPS_CLOSED);
 4543                 st->timeout = PFTM_TCP_FIRST_PACKET;
 4544                 pf_status.states_halfopen++;
 4545                 break;
 4546         case IPPROTO_UDP:
 4547                 pf_set_protostate(st, PF_PEER_SRC, PFUDPS_SINGLE);
 4548                 pf_set_protostate(st, PF_PEER_DST, PFUDPS_NO_TRAFFIC);
 4549                 st->timeout = PFTM_UDP_FIRST_PACKET;
 4550                 break;
 4551         case IPPROTO_ICMP:
 4552 #ifdef INET6
 4553         case IPPROTO_ICMPV6:
 4554 #endif  /* INET6 */
 4555                 st->timeout = PFTM_ICMP_FIRST_PACKET;
 4556                 break;
 4557         default:
 4558                 pf_set_protostate(st, PF_PEER_SRC, PFOTHERS_SINGLE);
 4559                 pf_set_protostate(st, PF_PEER_DST, PFOTHERS_NO_TRAFFIC);
 4560                 st->timeout = PFTM_OTHER_FIRST_PACKET;
 4561         }
 4562 
 4563         st->creation = getuptime();
 4564         st->expire = getuptime();
 4565 
 4566         if (pd->proto == IPPROTO_TCP) {
 4567                 if (st->state_flags & PFSTATE_SCRUB_TCP &&
 4568                     pf_normalize_tcp_init(pd, &st->src)) {
 4569                         REASON_SET(&reason, PFRES_MEMORY);
 4570                         goto csfailed;
 4571                 }
 4572                 if (st->state_flags & PFSTATE_SCRUB_TCP && st->src.scrub &&
 4573                     pf_normalize_tcp_stateful(pd, &reason, st,
 4574                     &st->src, &st->dst, rewrite)) {
 4575                         /* This really shouldn't happen!!! */
 4576                         DPFPRINTF(LOG_ERR,
 4577                             "%s: tcp normalize failed on first pkt", __func__);
 4578                         goto csfailed;
 4579                 }
 4580         }
 4581         st->direction = pd->dir;
 4582 
 4583         if (pf_state_key_setup(pd, skw, sks, act->rtableid)) {
 4584                 REASON_SET(&reason, PFRES_MEMORY);
 4585                 goto csfailed;
 4586         }
 4587 
 4588         if (pf_set_rt_ifp(st, pd->src, (*skw)->af, sns) != 0) {
 4589                 REASON_SET(&reason, PFRES_NOROUTE);
 4590                 goto csfailed;
 4591         }
 4592 
 4593         for (i = 0; i < PF_SN_MAX; i++)
 4594                 if (sns[i] != NULL) {
 4595                         struct pf_sn_item       *sni;
 4596 
 4597                         sni = pool_get(&pf_sn_item_pl, PR_NOWAIT);
 4598                         if (sni == NULL) {
 4599                                 REASON_SET(&reason, PFRES_MEMORY);
 4600                                 goto csfailed;
 4601                         }
 4602                         sni->sn = sns[i];
 4603                         SLIST_INSERT_HEAD(&st->src_nodes, sni, next);
 4604                         sni->sn->states++;
 4605                 }
 4606 
 4607         if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, st)) {
 4608                 *sks = *skw = NULL;
 4609                 REASON_SET(&reason, PFRES_STATEINS);
 4610                 goto csfailed;
 4611         } else
 4612                 *sm = st;
 4613 
 4614         /*
 4615          * Make state responsible for rules it binds here.
 4616          */
 4617         memcpy(&st->match_rules, rules, sizeof(st->match_rules));
 4618         memset(rules, 0, sizeof(*rules));
 4619         STATE_INC_COUNTERS(st);
 4620 
 4621         if (tag > 0) {
 4622                 pf_tag_ref(tag);
 4623                 st->tag = tag;
 4624         }
 4625         if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
 4626             TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) {
 4627                 int rtid = pd->rdomain;
 4628                 if (act->rtableid >= 0)
 4629                         rtid = act->rtableid;
 4630                 pf_set_protostate(st, PF_PEER_SRC, PF_TCPS_PROXY_SRC);
 4631                 st->src.seqhi = arc4random();
 4632                 /* Find mss option */
 4633                 mss = pf_get_mss(pd);
 4634                 mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
 4635                 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
 4636                 st->src.mss = mss;
 4637                 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
 4638                     th->th_sport, st->src.seqhi, ntohl(th->th_seq) + 1,
 4639                     TH_SYN|TH_ACK, 0, st->src.mss, 0, 1, 0, pd->rdomain);
 4640                 REASON_SET(&reason, PFRES_SYNPROXY);
 4641                 return (PF_SYNPROXY_DROP);
 4642         }
 4643 
 4644         return (PF_PASS);
 4645 
 4646 csfailed:
 4647         if (st) {
 4648                 pf_normalize_tcp_cleanup(st);   /* safe even w/o init */
 4649                 pf_src_tree_remove_state(st);
 4650                 pool_put(&pf_state_pl, st);
 4651         }
 4652 
 4653         for (i = 0; i < PF_SN_MAX; i++)
 4654                 if (sns[i] != NULL)
 4655                         pf_remove_src_node(sns[i]);
 4656 
 4657         return (PF_DROP);
 4658 }
 4659 
 4660 int
 4661 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport,
 4662     struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type,
 4663     int icmp_dir)
 4664 {
 4665         int     rewrite = 0;
 4666         int     afto = pd->af != pd->naf;
 4667 
 4668         if (afto || PF_ANEQ(daddr, pd->dst, pd->af))
 4669                 pd->destchg = 1;
 4670 
 4671         switch (pd->proto) {
 4672         case IPPROTO_TCP:       /* FALLTHROUGH */
 4673         case IPPROTO_UDP:
 4674                 rewrite += pf_patch_16(pd, pd->sport, sport);
 4675                 rewrite += pf_patch_16(pd, pd->dport, dport);
 4676                 break;
 4677 
 4678         case IPPROTO_ICMP:
 4679                 if (pd->af != AF_INET)
 4680                         return (0);
 4681 
 4682 #ifdef INET6
 4683                 if (afto) {
 4684                         if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp))
 4685                                 return (0);
 4686                         pd->proto = IPPROTO_ICMPV6;
 4687                         rewrite = 1;
 4688                 }
 4689 #endif /* INET6 */
 4690                 if (virtual_type == htons(ICMP_ECHO)) {
 4691                         u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
 4692                         rewrite += pf_patch_16(pd,
 4693                             &pd->hdr.icmp.icmp_id, icmpid);
 4694                 }
 4695                 break;
 4696 
 4697 #ifdef INET6
 4698         case IPPROTO_ICMPV6:
 4699                 if (pd->af != AF_INET6)
 4700                         return (0);
 4701 
 4702                 if (afto) {
 4703                         if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6))
 4704                                 return (0);
 4705                         pd->proto = IPPROTO_ICMP;
 4706                         rewrite = 1;
 4707                 }
 4708                 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) {
 4709                         u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
 4710                         rewrite += pf_patch_16(pd,
 4711                             &pd->hdr.icmp6.icmp6_id, icmpid);
 4712                 }
 4713                 break;
 4714 #endif /* INET6 */
 4715         }
 4716 
 4717         if (!afto) {
 4718                 rewrite += pf_translate_a(pd, pd->src, saddr);
 4719                 rewrite += pf_translate_a(pd, pd->dst, daddr);
 4720         }
 4721 
 4722         return (rewrite);
 4723 }
 4724 
 4725 int
 4726 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason,
 4727     int *copyback, int reverse)
 4728 {
 4729         struct tcphdr           *th = &pd->hdr.tcp;
 4730         struct pf_state_peer    *src, *dst;
 4731         u_int16_t                win = ntohs(th->th_win);
 4732         u_int32_t                ack, end, data_end, seq, orig_seq;
 4733         u_int8_t                 sws, dws, psrc, pdst;
 4734         int                      ackskew;
 4735 
 4736         if ((pd->dir == (*stp)->direction && !reverse) ||
 4737             (pd->dir != (*stp)->direction && reverse)) {
 4738                 src = &(*stp)->src;
 4739                 dst = &(*stp)->dst;
 4740                 psrc = PF_PEER_SRC;
 4741                 pdst = PF_PEER_DST;
 4742         } else {
 4743                 src = &(*stp)->dst;
 4744                 dst = &(*stp)->src;
 4745                 psrc = PF_PEER_DST;
 4746                 pdst = PF_PEER_SRC;
 4747         }
 4748 
 4749         if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
 4750                 sws = src->wscale & PF_WSCALE_MASK;
 4751                 dws = dst->wscale & PF_WSCALE_MASK;
 4752         } else
 4753                 sws = dws = 0;
 4754 
 4755         /*
 4756          * Sequence tracking algorithm from Guido van Rooij's paper:
 4757          *   http://www.madison-gurkha.com/publications/tcp_filtering/
 4758          *      tcp_filtering.ps
 4759          */
 4760 
 4761         orig_seq = seq = ntohl(th->th_seq);
 4762         if (src->seqlo == 0) {
 4763                 /* First packet from this end. Set its state */
 4764 
 4765                 if (((*stp)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) &&
 4766                     src->scrub == NULL) {
 4767                         if (pf_normalize_tcp_init(pd, src)) {
 4768                                 REASON_SET(reason, PFRES_MEMORY);
 4769                                 return (PF_DROP);
 4770                         }
 4771                 }
 4772 
 4773                 /* Deferred generation of sequence number modulator */
 4774                 if (dst->seqdiff && !src->seqdiff) {
 4775                         /* use random iss for the TCP server */
 4776                         while ((src->seqdiff = arc4random() - seq) == 0)
 4777                                 continue;
 4778                         ack = ntohl(th->th_ack) - dst->seqdiff;
 4779                         pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff));
 4780                         pf_patch_32(pd, &th->th_ack, htonl(ack));
 4781                         *copyback = 1;
 4782                 } else {
 4783                         ack = ntohl(th->th_ack);
 4784                 }
 4785 
 4786                 end = seq + pd->p_len;
 4787                 if (th->th_flags & TH_SYN) {
 4788                         end++;
 4789                         if (dst->wscale & PF_WSCALE_FLAG) {
 4790                                 src->wscale = pf_get_wscale(pd);
 4791                                 if (src->wscale & PF_WSCALE_FLAG) {
 4792                                         /* Remove scale factor from initial
 4793                                          * window */
 4794                                         sws = src->wscale & PF_WSCALE_MASK;
 4795                                         win = ((u_int32_t)win + (1 << sws) - 1)
 4796                                             >> sws;
 4797                                         dws = dst->wscale & PF_WSCALE_MASK;
 4798                                 } else {
 4799                                         /* fixup other window */
 4800                                         dst->max_win = MIN(TCP_MAXWIN,
 4801                                             (u_int32_t)dst->max_win <<
 4802                                             (dst->wscale & PF_WSCALE_MASK));
 4803                                         /* in case of a retrans SYN|ACK */
 4804                                         dst->wscale = 0;
 4805                                 }
 4806                         }
 4807                 }
 4808                 data_end = end;
 4809                 if (th->th_flags & TH_FIN)
 4810                         end++;
 4811 
 4812                 src->seqlo = seq;
 4813                 if (src->state < TCPS_SYN_SENT)
 4814                         pf_set_protostate(*stp, psrc, TCPS_SYN_SENT);
 4815 
 4816                 /*
 4817                  * May need to slide the window (seqhi may have been set by
 4818                  * the crappy stack check or if we picked up the connection
 4819                  * after establishment)
 4820                  */
 4821                 if (src->seqhi == 1 ||
 4822                     SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
 4823                         src->seqhi = end + MAX(1, dst->max_win << dws);
 4824                 if (win > src->max_win)
 4825                         src->max_win = win;
 4826 
 4827         } else {
 4828                 ack = ntohl(th->th_ack) - dst->seqdiff;
 4829                 if (src->seqdiff) {
 4830                         /* Modulate sequence numbers */
 4831                         pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff));
 4832                         pf_patch_32(pd, &th->th_ack, htonl(ack));
 4833                         *copyback = 1;
 4834                 }
 4835                 end = seq + pd->p_len;
 4836                 if (th->th_flags & TH_SYN)
 4837                         end++;
 4838                 data_end = end;
 4839                 if (th->th_flags & TH_FIN)
 4840                         end++;
 4841         }
 4842 
 4843         if ((th->th_flags & TH_ACK) == 0) {
 4844                 /* Let it pass through the ack skew check */
 4845                 ack = dst->seqlo;
 4846         } else if ((ack == 0 &&
 4847             (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
 4848             /* broken tcp stacks do not set ack */
 4849             (dst->state < TCPS_SYN_SENT)) {
 4850                 /*
 4851                  * Many stacks (ours included) will set the ACK number in an
 4852                  * FIN|ACK if the SYN times out -- no sequence to ACK.
 4853                  */
 4854                 ack = dst->seqlo;
 4855         }
 4856 
 4857         if (seq == end) {
 4858                 /* Ease sequencing restrictions on no data packets */
 4859                 seq = src->seqlo;
 4860                 data_end = end = seq;
 4861         }
 4862 
 4863         ackskew = dst->seqlo - ack;
 4864 
 4865 
 4866         /*
 4867          * Need to demodulate the sequence numbers in any TCP SACK options
 4868          * (Selective ACK). We could optionally validate the SACK values
 4869          * against the current ACK window, either forwards or backwards, but
 4870          * I'm not confident that SACK has been implemented properly
 4871          * everywhere. It wouldn't surprise me if several stacks accidently
 4872          * SACK too far backwards of previously ACKed data. There really aren't
 4873          * any security implications of bad SACKing unless the target stack
 4874          * doesn't validate the option length correctly. Someone trying to
 4875          * spoof into a TCP connection won't bother blindly sending SACK
 4876          * options anyway.
 4877          */
 4878         if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
 4879                 if (pf_modulate_sack(pd, dst))
 4880                         *copyback = 1;
 4881         }
 4882 
 4883 
 4884 #define MAXACKWINDOW (0xffff + 1500)    /* 1500 is an arbitrary fudge factor */
 4885         if (SEQ_GEQ(src->seqhi, data_end) &&
 4886             /* Last octet inside other's window space */
 4887             SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
 4888             /* Retrans: not more than one window back */
 4889             (ackskew >= -MAXACKWINDOW) &&
 4890             /* Acking not more than one reassembled fragment backwards */
 4891             (ackskew <= (MAXACKWINDOW << sws)) &&
 4892             /* Acking not more than one window forward */
 4893             ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
 4894             (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) {
 4895             /* Require an exact/+1 sequence match on resets when possible */
 4896 
 4897                 if (dst->scrub || src->scrub) {
 4898                         if (pf_normalize_tcp_stateful(pd, reason, *stp, src,
 4899                             dst, copyback))
 4900                                 return (PF_DROP);
 4901                 }
 4902 
 4903                 /* update max window */
 4904                 if (src->max_win < win)
 4905                         src->max_win = win;
 4906                 /* synchronize sequencing */
 4907                 if (SEQ_GT(end, src->seqlo))
 4908                         src->seqlo = end;
 4909                 /* slide the window of what the other end can send */
 4910                 if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
 4911                         dst->seqhi = ack + MAX((win << sws), 1);
 4912 
 4913                 /* update states */
 4914                 if (th->th_flags & TH_SYN)
 4915                         if (src->state < TCPS_SYN_SENT)
 4916                                 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT);
 4917                 if (th->th_flags & TH_FIN)
 4918                         if (src->state < TCPS_CLOSING)
 4919                                 pf_set_protostate(*stp, psrc, TCPS_CLOSING);
 4920                 if (th->th_flags & TH_ACK) {
 4921                         if (dst->state == TCPS_SYN_SENT) {
 4922                                 pf_set_protostate(*stp, pdst,
 4923                                     TCPS_ESTABLISHED);
 4924                                 if (src->state == TCPS_ESTABLISHED &&
 4925                                     !SLIST_EMPTY(&(*stp)->src_nodes) &&
 4926                                     pf_src_connlimit(stp)) {
 4927                                         REASON_SET(reason, PFRES_SRCLIMIT);
 4928                                         return (PF_DROP);
 4929                                 }
 4930                         } else if (dst->state == TCPS_CLOSING)
 4931                                 pf_set_protostate(*stp, pdst,
 4932                                     TCPS_FIN_WAIT_2);
 4933                 }
 4934                 if (th->th_flags & TH_RST)
 4935                         pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT);
 4936 
 4937                 /* update expire time */
 4938                 (*stp)->expire = getuptime();
 4939                 if (src->state >= TCPS_FIN_WAIT_2 &&
 4940                     dst->state >= TCPS_FIN_WAIT_2)
 4941                         (*stp)->timeout = PFTM_TCP_CLOSED;
 4942                 else if (src->state >= TCPS_CLOSING &&
 4943                     dst->state >= TCPS_CLOSING)
 4944                         (*stp)->timeout = PFTM_TCP_FIN_WAIT;
 4945                 else if (src->state < TCPS_ESTABLISHED ||
 4946                     dst->state < TCPS_ESTABLISHED)
 4947                         (*stp)->timeout = PFTM_TCP_OPENING;
 4948                 else if (src->state >= TCPS_CLOSING ||
 4949                     dst->state >= TCPS_CLOSING)
 4950                         (*stp)->timeout = PFTM_TCP_CLOSING;
 4951                 else
 4952                         (*stp)->timeout = PFTM_TCP_ESTABLISHED;
 4953 
 4954                 /* Fall through to PASS packet */
 4955         } else if ((dst->state < TCPS_SYN_SENT ||
 4956                 dst->state >= TCPS_FIN_WAIT_2 ||
 4957                 src->state >= TCPS_FIN_WAIT_2) &&
 4958             SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) &&
 4959             /* Within a window forward of the originating packet */
 4960             SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
 4961             /* Within a window backward of the originating packet */
 4962 
 4963                 /*
 4964                  * This currently handles three situations:
 4965                  *  1) Stupid stacks will shotgun SYNs before their peer
 4966                  *     replies.
 4967                  *  2) When PF catches an already established stream (the
 4968                  *     firewall rebooted, the state table was flushed, routes
 4969                  *     changed...)
 4970                  *  3) Packets get funky immediately after the connection
 4971                  *     closes (this should catch Solaris spurious ACK|FINs
 4972                  *     that web servers like to spew after a close)
 4973                  *
 4974                  * This must be a little more careful than the above code
 4975                  * since packet floods will also be caught here. We don't
 4976                  * update the TTL here to mitigate the damage of a packet
 4977                  * flood and so the same code can handle awkward establishment
 4978                  * and a loosened connection close.
 4979                  * In the establishment case, a correct peer response will
 4980                  * validate the connection, go through the normal state code
 4981                  * and keep updating the state TTL.
 4982                  */
 4983 
 4984                 if (pf_status.debug >= LOG_NOTICE) {
 4985                         log(LOG_NOTICE, "pf: loose state match: ");
 4986                         pf_print_state(*stp);
 4987                         pf_print_flags(th->th_flags);
 4988                         addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d "
 4989                             "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
 4990                             pd->p_len, ackskew, (*stp)->packets[0],
 4991                             (*stp)->packets[1],
 4992                             pd->dir == PF_IN ? "in" : "out",
 4993                             pd->dir == (*stp)->direction ? "fwd" : "rev");
 4994                 }
 4995 
 4996                 if (dst->scrub || src->scrub) {
 4997                         if (pf_normalize_tcp_stateful(pd, reason, *stp, src,
 4998                             dst, copyback))
 4999                                 return (PF_DROP);
 5000                 }
 5001 
 5002                 /* update max window */
 5003                 if (src->max_win < win)
 5004                         src->max_win = win;
 5005                 /* synchronize sequencing */
 5006                 if (SEQ_GT(end, src->seqlo))
 5007                         src->seqlo = end;
 5008                 /* slide the window of what the other end can send */
 5009                 if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
 5010                         dst->seqhi = ack + MAX((win << sws), 1);
 5011 
 5012                 /*
 5013                  * Cannot set dst->seqhi here since this could be a shotgunned
 5014                  * SYN and not an already established connection.
 5015                  */
 5016                 if (th->th_flags & TH_FIN)
 5017                         if (src->state < TCPS_CLOSING)
 5018                                 pf_set_protostate(*stp, psrc, TCPS_CLOSING);
 5019                 if (th->th_flags & TH_RST)
 5020                         pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT);
 5021 
 5022                 /* Fall through to PASS packet */
 5023         } else {
 5024                 if ((*stp)->dst.state == TCPS_SYN_SENT &&
 5025                     (*stp)->src.state == TCPS_SYN_SENT) {
 5026                         /* Send RST for state mismatches during handshake */
 5027                         if (!(th->th_flags & TH_RST))
 5028                                 pf_send_tcp((*stp)->rule.ptr, pd->af,
 5029                                     pd->dst, pd->src, th->th_dport,
 5030                                     th->th_sport, ntohl(th->th_ack), 0,
 5031                                     TH_RST, 0, 0,
 5032                                     (*stp)->rule.ptr->return_ttl, 1, 0,
 5033                                     pd->rdomain);
 5034                         src->seqlo = 0;
 5035                         src->seqhi = 1;
 5036                         src->max_win = 1;
 5037                 } else if (pf_status.debug >= LOG_NOTICE) {
 5038                         log(LOG_NOTICE, "pf: BAD state: ");
 5039                         pf_print_state(*stp);
 5040                         pf_print_flags(th->th_flags);
 5041                         addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d "
 5042                             "pkts=%llu:%llu dir=%s,%s\n",
 5043                             seq, orig_seq, ack, pd->p_len, ackskew,
 5044                             (*stp)->packets[0], (*stp)->packets[1],
 5045                             pd->dir == PF_IN ? "in" : "out",
 5046                             pd->dir == (*stp)->direction ? "fwd" : "rev");
 5047                         addlog("pf: State failure on: %c %c %c %c | %c %c\n",
 5048                             SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1',
 5049                             SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
 5050                             ' ': '2',
 5051                             (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
 5052                             (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
 5053                             SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ?
 5054                             ' ' :'5',
 5055                             SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
 5056                 }
 5057                 REASON_SET(reason, PFRES_BADSTATE);
 5058                 return (PF_DROP);
 5059         }
 5060 
 5061         return (PF_PASS);
 5062 }
 5063 
 5064 int
 5065 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **stp,
 5066     u_short *reason)
 5067 {
 5068         struct tcphdr           *th = &pd->hdr.tcp;
 5069         struct pf_state_peer    *src, *dst;
 5070         u_int8_t                 psrc, pdst;
 5071 
 5072         if (pd->dir == (*stp)->direction) {
 5073                 src = &(*stp)->src;
 5074                 dst = &(*stp)->dst;
 5075                 psrc = PF_PEER_SRC;
 5076                 pdst = PF_PEER_DST;
 5077         } else {
 5078                 src = &(*stp)->dst;
 5079                 dst = &(*stp)->src;
 5080                 psrc = PF_PEER_DST;
 5081                 pdst = PF_PEER_SRC;
 5082         }
 5083 
 5084         if (th->th_flags & TH_SYN)
 5085                 if (src->state < TCPS_SYN_SENT)
 5086                         pf_set_protostate(*stp, psrc, TCPS_SYN_SENT);
 5087         if (th->th_flags & TH_FIN)
 5088                 if (src->state < TCPS_CLOSING)
 5089                         pf_set_protostate(*stp, psrc, TCPS_CLOSING);
 5090         if (th->th_flags & TH_ACK) {
 5091                 if (dst->state == TCPS_SYN_SENT) {
 5092                         pf_set_protostate(*stp, pdst, TCPS_ESTABLISHED);
 5093                         if (src->state == TCPS_ESTABLISHED &&
 5094                             !SLIST_EMPTY(&(*stp)->src_nodes) &&
 5095                             pf_src_connlimit(stp)) {
 5096                                 REASON_SET(reason, PFRES_SRCLIMIT);
 5097                                 return (PF_DROP);
 5098                         }
 5099                 } else if (dst->state == TCPS_CLOSING) {
 5100                         pf_set_protostate(*stp, pdst, TCPS_FIN_WAIT_2);
 5101                 } else if (src->state == TCPS_SYN_SENT &&
 5102                     dst->state < TCPS_SYN_SENT) {
 5103                         /*
 5104                          * Handle a special sloppy case where we only see one
 5105                          * half of the connection. If there is a ACK after
 5106                          * the initial SYN without ever seeing a packet from
 5107                          * the destination, set the connection to established.
 5108                          */
 5109                         pf_set_protostate(*stp, PF_PEER_BOTH,
 5110                             TCPS_ESTABLISHED);
 5111                         if (!SLIST_EMPTY(&(*stp)->src_nodes) &&
 5112                             pf_src_connlimit(stp)) {
 5113                                 REASON_SET(reason, PFRES_SRCLIMIT);
 5114                                 return (PF_DROP);
 5115                         }
 5116                 } else if (src->state == TCPS_CLOSING &&
 5117                     dst->state == TCPS_ESTABLISHED &&
 5118                     dst->seqlo == 0) {
 5119                         /*
 5120                          * Handle the closing of half connections where we
 5121                          * don't see the full bidirectional FIN/ACK+ACK
 5122                          * handshake.
 5123                          */
 5124                         pf_set_protostate(*stp, pdst, TCPS_CLOSING);
 5125                 }
 5126         }
 5127         if (th->th_flags & TH_RST)
 5128                 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT);
 5129 
 5130         /* update expire time */
 5131         (*stp)->expire = getuptime();
 5132         if (src->state >= TCPS_FIN_WAIT_2 &&
 5133             dst->state >= TCPS_FIN_WAIT_2)
 5134                 (*stp)->timeout = PFTM_TCP_CLOSED;
 5135         else if (src->state >= TCPS_CLOSING &&
 5136             dst->state >= TCPS_CLOSING)
 5137                 (*stp)->timeout = PFTM_TCP_FIN_WAIT;
 5138         else if (src->state < TCPS_ESTABLISHED ||
 5139             dst->state < TCPS_ESTABLISHED)
 5140                 (*stp)->timeout = PFTM_TCP_OPENING;
 5141         else if (src->state >= TCPS_CLOSING ||
 5142             dst->state >= TCPS_CLOSING)
 5143                 (*stp)->timeout = PFTM_TCP_CLOSING;
 5144         else
 5145                 (*stp)->timeout = PFTM_TCP_ESTABLISHED;
 5146 
 5147         return (PF_PASS);
 5148 }
 5149 
 5150 static __inline int
 5151 pf_synproxy(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason)
 5152 {
 5153         struct pf_state_key     *sk = (*stp)->key[pd->didx];
 5154 
 5155         if ((*stp)->src.state == PF_TCPS_PROXY_SRC) {
 5156                 struct tcphdr   *th = &pd->hdr.tcp;
 5157 
 5158                 if (pd->dir != (*stp)->direction) {
 5159                         REASON_SET(reason, PFRES_SYNPROXY);
 5160                         return (PF_SYNPROXY_DROP);
 5161                 }
 5162                 if (th->th_flags & TH_SYN) {
 5163                         if (ntohl(th->th_seq) != (*stp)->src.seqlo) {
 5164                                 REASON_SET(reason, PFRES_SYNPROXY);
 5165                                 return (PF_DROP);
 5166                         }
 5167                         pf_send_tcp((*stp)->rule.ptr, pd->af, pd->dst,
 5168                             pd->src, th->th_dport, th->th_sport,
 5169                             (*stp)->src.seqhi, ntohl(th->th_seq) + 1,
 5170                             TH_SYN|TH_ACK, 0, (*stp)->src.mss, 0, 1,
 5171                             0, pd->rdomain);
 5172                         REASON_SET(reason, PFRES_SYNPROXY);
 5173                         return (PF_SYNPROXY_DROP);
 5174                 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK ||
 5175                     (ntohl(th->th_ack) != (*stp)->src.seqhi + 1) ||
 5176                     (ntohl(th->th_seq) != (*stp)->src.seqlo + 1)) {
 5177                         REASON_SET(reason, PFRES_SYNPROXY);
 5178                         return (PF_DROP);
 5179                 } else if (!SLIST_EMPTY(&(*stp)->src_nodes) &&
 5180                     pf_src_connlimit(stp)) {
 5181                         REASON_SET(reason, PFRES_SRCLIMIT);
 5182                         return (PF_DROP);
 5183                 } else
 5184                         pf_set_protostate(*stp, PF_PEER_SRC,
 5185                             PF_TCPS_PROXY_DST);
 5186         }
 5187         if ((*stp)->src.state == PF_TCPS_PROXY_DST) {
 5188                 struct tcphdr   *th = &pd->hdr.tcp;
 5189 
 5190                 if (pd->dir == (*stp)->direction) {
 5191                         if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
 5192                             (ntohl(th->th_ack) != (*stp)->src.seqhi + 1) ||
 5193                             (ntohl(th->th_seq) != (*stp)->src.seqlo + 1)) {
 5194                                 REASON_SET(reason, PFRES_SYNPROXY);
 5195                                 return (PF_DROP);
 5196                         }
 5197                         (*stp)->src.max_win = MAX(ntohs(th->th_win), 1);
 5198                         if ((*stp)->dst.seqhi == 1)
 5199                                 (*stp)->dst.seqhi = arc4random();
 5200                         pf_send_tcp((*stp)->rule.ptr, pd->af,
 5201                             &sk->addr[pd->sidx], &sk->addr[pd->didx],
 5202                             sk->port[pd->sidx], sk->port[pd->didx],
 5203                             (*stp)->dst.seqhi, 0, TH_SYN, 0,
 5204                             (*stp)->src.mss, 0, 0, (*stp)->tag,
 5205                             sk->rdomain);
 5206                         REASON_SET(reason, PFRES_SYNPROXY);
 5207                         return (PF_SYNPROXY_DROP);
 5208                 } else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
 5209                     (TH_SYN|TH_ACK)) ||
 5210                     (ntohl(th->th_ack) != (*stp)->dst.seqhi + 1)) {
 5211                         REASON_SET(reason, PFRES_SYNPROXY);
 5212                         return (PF_DROP);
 5213                 } else {
 5214                         (*stp)->dst.max_win = MAX(ntohs(th->th_win), 1);
 5215                         (*stp)->dst.seqlo = ntohl(th->th_seq);
 5216                         pf_send_tcp((*stp)->rule.ptr, pd->af, pd->dst,
 5217                             pd->src, th->th_dport, th->th_sport,
 5218                             ntohl(th->th_ack), ntohl(th->th_seq) + 1,
 5219                             TH_ACK, (*stp)->src.max_win, 0, 0, 0,
 5220                             (*stp)->tag, pd->rdomain);
 5221                         pf_send_tcp((*stp)->rule.ptr, pd->af,
 5222                             &sk->addr[pd->sidx], &sk->addr[pd->didx],
 5223                             sk->port[pd->sidx], sk->port[pd->didx],
 5224                             (*stp)->src.seqhi + 1, (*stp)->src.seqlo + 1,
 5225                             TH_ACK, (*stp)->dst.max_win, 0, 0, 1,
 5226                             0, sk->rdomain);
 5227                         (*stp)->src.seqdiff = (*stp)->dst.seqhi -
 5228                             (*stp)->src.seqlo;
 5229                         (*stp)->dst.seqdiff = (*stp)->src.seqhi -
 5230                             (*stp)->dst.seqlo;
 5231                         (*stp)->src.seqhi = (*stp)->src.seqlo +
 5232                             (*stp)->dst.max_win;
 5233                         (*stp)->dst.seqhi = (*stp)->dst.seqlo +
 5234                             (*stp)->src.max_win;
 5235                         (*stp)->src.wscale = (*stp)->dst.wscale = 0;
 5236                         pf_set_protostate(*stp, PF_PEER_BOTH,
 5237                             TCPS_ESTABLISHED);
 5238                         REASON_SET(reason, PFRES_SYNPROXY);
 5239                         return (PF_SYNPROXY_DROP);
 5240                 }
 5241         }
 5242         return (PF_PASS);
 5243 }
 5244 
 5245 int
 5246 pf_test_state(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason)
 5247 {
 5248         int                      copyback = 0;
 5249         struct pf_state_peer    *src, *dst;
 5250         int                      action;
 5251         struct inpcb            *inp = pd->m->m_pkthdr.pf.inp;
 5252         u_int8_t                 psrc, pdst;
 5253 
 5254         action = PF_PASS;
 5255         if (pd->dir == (*stp)->direction) {
 5256                 src = &(*stp)->src;
 5257                 dst = &(*stp)->dst;
 5258                 psrc = PF_PEER_SRC;
 5259                 pdst = PF_PEER_DST;
 5260         } else {
 5261                 src = &(*stp)->dst;
 5262                 dst = &(*stp)->src;
 5263                 psrc = PF_PEER_DST;
 5264                 pdst = PF_PEER_SRC;
 5265         }
 5266 
 5267         switch (pd->virtual_proto) {
 5268         case IPPROTO_TCP:
 5269                 if ((action = pf_synproxy(pd, stp, reason)) != PF_PASS)
 5270                         return (action);
 5271                 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) {
 5272 
 5273                         if (dst->state >= TCPS_FIN_WAIT_2 &&
 5274                             src->state >= TCPS_FIN_WAIT_2) {
 5275                                 if (pf_status.debug >= LOG_NOTICE) {
 5276                                         log(LOG_NOTICE, "pf: state reuse ");
 5277                                         pf_print_state(*stp);
 5278                                         pf_print_flags(pd->hdr.tcp.th_flags);
 5279                                         addlog("\n");
 5280                                 }
 5281                                 /* XXX make sure it's the same direction ?? */
 5282                                 (*stp)->timeout = PFTM_PURGE;
 5283                                 pf_state_unref(*stp);
 5284                                 *stp = NULL;
 5285                                 pf_mbuf_link_inpcb(pd->m, inp);
 5286                                 return (PF_DROP);
 5287                         } else if (dst->state >= TCPS_ESTABLISHED &&
 5288                             src->state >= TCPS_ESTABLISHED) {
 5289                                 /*
 5290                                  * SYN matches existing state???
 5291                                  * Typically happens when sender boots up after
 5292                                  * sudden panic. Certain protocols (NFSv3) are
 5293                                  * always using same port numbers. Challenge
 5294                                  * ACK enables all parties (firewall and peers)
 5295                                  * to get in sync again.
 5296                                  */
 5297                                 pf_send_challenge_ack(pd, *stp, src, dst);
 5298                                 return (PF_DROP);
 5299                         }
 5300                 }
 5301 
 5302                 if ((*stp)->state_flags & PFSTATE_SLOPPY) {
 5303                         if (pf_tcp_track_sloppy(pd, stp, reason) == PF_DROP)
 5304                                 return (PF_DROP);
 5305                 } else {
 5306                         if (pf_tcp_track_full(pd, stp, reason, &copyback,
 5307                             PF_REVERSED_KEY((*stp)->key, pd->af)) == PF_DROP)
 5308                                 return (PF_DROP);
 5309                 }
 5310                 break;
 5311         case IPPROTO_UDP:
 5312                 /* update states */
 5313                 if (src->state < PFUDPS_SINGLE)
 5314                         pf_set_protostate(*stp, psrc, PFUDPS_SINGLE);
 5315                 if (dst->state == PFUDPS_SINGLE)
 5316                         pf_set_protostate(*stp, pdst, PFUDPS_MULTIPLE);
 5317 
 5318                 /* update expire time */
 5319                 (*stp)->expire = getuptime();
 5320                 if (src->state == PFUDPS_MULTIPLE &&
 5321                     dst->state == PFUDPS_MULTIPLE)
 5322                         (*stp)->timeout = PFTM_UDP_MULTIPLE;
 5323                 else
 5324                         (*stp)->timeout = PFTM_UDP_SINGLE;
 5325                 break;
 5326         default:
 5327                 /* update states */
 5328                 if (src->state < PFOTHERS_SINGLE)
 5329                         pf_set_protostate(*stp, psrc, PFOTHERS_SINGLE);
 5330                 if (dst->state == PFOTHERS_SINGLE)
 5331                         pf_set_protostate(*stp, pdst, PFOTHERS_MULTIPLE);
 5332 
 5333                 /* update expire time */
 5334                 (*stp)->expire = getuptime();
 5335                 if (src->state == PFOTHERS_MULTIPLE &&
 5336                     dst->state == PFOTHERS_MULTIPLE)
 5337                         (*stp)->timeout = PFTM_OTHER_MULTIPLE;
 5338                 else
 5339                         (*stp)->timeout = PFTM_OTHER_SINGLE;
 5340                 break;
 5341         }
 5342 
 5343         /* translate source/destination address, if necessary */
 5344         if ((*stp)->key[PF_SK_WIRE] != (*stp)->key[PF_SK_STACK]) {
 5345                 struct pf_state_key     *nk;
 5346                 int                      afto, sidx, didx;
 5347 
 5348                 if (PF_REVERSED_KEY((*stp)->key, pd->af))
 5349                         nk = (*stp)->key[pd->sidx];
 5350                 else
 5351                         nk = (*stp)->key[pd->didx];
 5352 
 5353                 afto = pd->af != nk->af;
 5354                 sidx = afto ? pd->didx : pd->sidx;
 5355                 didx = afto ? pd->sidx : pd->didx;
 5356 
 5357 #ifdef INET6
 5358                 if (afto) {
 5359                         pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af);
 5360                         pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af);
 5361                         pd->naf = nk->af;
 5362                         action = PF_AFRT;
 5363                 }
 5364 #endif /* INET6 */
 5365 
 5366                 if (!afto)
 5367                         pf_translate_a(pd, pd->src, &nk->addr[sidx]);
 5368 
 5369                 if (pd->sport != NULL)
 5370                         pf_patch_16(pd, pd->sport, nk->port[sidx]);
 5371 
 5372                 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) ||
 5373                     pd->rdomain != nk->rdomain)
 5374                         pd->destchg = 1;
 5375 
 5376                 if (!afto)
 5377                         pf_translate_a(pd, pd->dst, &nk->addr[didx]);
 5378 
 5379                 if (pd->dport != NULL)
 5380                         pf_patch_16(pd, pd->dport, nk->port[didx]);
 5381 
 5382                 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
 5383                 copyback = 1;
 5384         }
 5385 
 5386         if (copyback && pd->hdrlen > 0) {
 5387                 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT);
 5388         }
 5389 
 5390         return (action);
 5391 }
 5392 
 5393 int
 5394 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key,
 5395     struct pf_state **stp, u_int16_t icmpid, u_int16_t type,
 5396     int icmp_dir, int *iidx, int multi, int inner)
 5397 {
 5398         int direction, action;
 5399 
 5400         key->af = pd->af;
 5401         key->proto = pd->proto;
 5402         key->rdomain = pd->rdomain;
 5403         if (icmp_dir == PF_IN) {
 5404                 *iidx = pd->sidx;
 5405                 key->port[pd->sidx] = icmpid;
 5406                 key->port[pd->didx] = type;
 5407         } else {
 5408                 *iidx = pd->didx;
 5409                 key->port[pd->sidx] = type;
 5410                 key->port[pd->didx] = icmpid;
 5411         }
 5412 
 5413         if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx,
 5414             pd->dst, pd->af, multi))
 5415                 return (PF_DROP);
 5416 
 5417         key->hash = pf_pkt_hash(key->af, key->proto,
 5418             &key->addr[0], &key->addr[1], 0, 0);
 5419 
 5420         action = pf_find_state(pd, key, stp);
 5421         if (action != PF_MATCH)
 5422                 return (action);
 5423 
 5424         if ((*stp)->state_flags & PFSTATE_SLOPPY)
 5425                 return (-1);
 5426 
 5427         /* Is this ICMP message flowing in right direction? */
 5428         if ((*stp)->key[PF_SK_WIRE]->af != (*stp)->key[PF_SK_STACK]->af)
 5429                 direction = (pd->af == (*stp)->key[PF_SK_WIRE]->af) ?
 5430                     PF_IN : PF_OUT;
 5431         else
 5432                 direction = (*stp)->direction;
 5433         if ((((!inner && direction == pd->dir) ||
 5434             (inner && direction != pd->dir)) ?
 5435             PF_IN : PF_OUT) != icmp_dir) {
 5436                 if (pf_status.debug >= LOG_NOTICE) {
 5437                         log(LOG_NOTICE,
 5438                             "pf: icmp type %d in wrong direction (%d): ",
 5439                             ntohs(type), icmp_dir);
 5440                         pf_print_state(*stp);
 5441                         addlog("\n");
 5442                 }
 5443                 return (PF_DROP);
 5444         }
 5445         return (-1);
 5446 }
 5447 
 5448 int
 5449 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **stp,
 5450     u_short *reason)
 5451 {
 5452         u_int16_t        virtual_id, virtual_type;
 5453         u_int8_t         icmptype, icmpcode;
 5454         int              icmp_dir, iidx, ret, copyback = 0;
 5455 
 5456         struct pf_state_key_cmp key;
 5457 
 5458         switch (pd->proto) {
 5459         case IPPROTO_ICMP:
 5460                 icmptype = pd->hdr.icmp.icmp_type;
 5461                 icmpcode = pd->hdr.icmp.icmp_code;
 5462                 break;
 5463 #ifdef INET6
 5464         case IPPROTO_ICMPV6:
 5465                 icmptype = pd->hdr.icmp6.icmp6_type;
 5466                 icmpcode = pd->hdr.icmp6.icmp6_code;
 5467                 break;
 5468 #endif /* INET6 */
 5469         default:
 5470                 panic("unhandled proto %d", pd->proto);
 5471         }
 5472 
 5473         if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id,
 5474             &virtual_type) == 0) {
 5475                 /*
 5476                  * ICMP query/reply message not related to a TCP/UDP packet.
 5477                  * Search for an ICMP state.
 5478                  */
 5479                 ret = pf_icmp_state_lookup(pd, &key, stp,
 5480                     virtual_id, virtual_type, icmp_dir, &iidx,
 5481                     0, 0);
 5482                 /* IPv6? try matching a multicast address */
 5483                 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT)
 5484                         ret = pf_icmp_state_lookup(pd, &key, stp, virtual_id,
 5485                             virtual_type, icmp_dir, &iidx, 1, 0);
 5486                 if (ret >= 0)
 5487                         return (ret);
 5488 
 5489                 (*stp)->expire = getuptime();
 5490                 (*stp)->timeout = PFTM_ICMP_ERROR_REPLY;
 5491 
 5492                 /* translate source/destination address, if necessary */
 5493                 if ((*stp)->key[PF_SK_WIRE] != (*stp)->key[PF_SK_STACK]) {
 5494                         struct pf_state_key     *nk;
 5495                         int                      afto, sidx, didx;
 5496 
 5497                         if (PF_REVERSED_KEY((*stp)->key, pd->af))
 5498                                 nk = (*stp)->key[pd->sidx];
 5499                         else
 5500                                 nk = (*stp)->key[pd->didx];
 5501 
 5502                         afto = pd->af != nk->af;
 5503                         sidx = afto ? pd->didx : pd->sidx;
 5504                         didx = afto ? pd->sidx : pd->didx;
 5505                         iidx = afto ? !iidx : iidx;
 5506 #ifdef  INET6
 5507                         if (afto) {
 5508                                 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx],
 5509                                     nk->af);
 5510                                 pf_addrcpy(&pd->ndaddr, &nk->addr[didx],
 5511                                     nk->af);
 5512                                 pd->naf = nk->af;
 5513                         }
 5514 #endif /* INET6 */
 5515                         if (!afto) {
 5516                                 pf_translate_a(pd, pd->src, &nk->addr[sidx]);
 5517                                 pf_translate_a(pd, pd->dst, &nk->addr[didx]);
 5518                         }
 5519 
 5520                         if (pd->rdomain != nk->rdomain)
 5521                                 pd->destchg = 1;
 5522                         if (!afto && PF_ANEQ(pd->dst,
 5523                                 &nk->addr[didx], pd->af))
 5524                                 pd->destchg = 1;
 5525                         pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
 5526 
 5527                         switch (pd->af) {
 5528                         case AF_INET:
 5529 #ifdef INET6
 5530                                 if (afto) {
 5531                                         if (pf_translate_icmp_af(pd, AF_INET6,
 5532                                             &pd->hdr.icmp))
 5533                                                 return (PF_DROP);
 5534                                         pd->proto = IPPROTO_ICMPV6;
 5535                                 }
 5536 #endif /* INET6 */
 5537                                 pf_patch_16(pd,
 5538                                     &pd->hdr.icmp.icmp_id, nk->port[iidx]);
 5539 
 5540                                 m_copyback(pd->m, pd->off, ICMP_MINLEN,
 5541                                     &pd->hdr.icmp, M_NOWAIT);
 5542                                 copyback = 1;
 5543                                 break;
 5544 #ifdef INET6
 5545                         case AF_INET6:
 5546                                 if (afto) {
 5547                                         if (pf_translate_icmp_af(pd, AF_INET,
 5548                                             &pd->hdr.icmp6))
 5549                                                 return (PF_DROP);
 5550                                         pd->proto = IPPROTO_ICMP;
 5551                                 }
 5552 
 5553                                 pf_patch_16(pd,
 5554                                     &pd->hdr.icmp6.icmp6_id, nk->port[iidx]);
 5555 
 5556                                 m_copyback(pd->m, pd->off,
 5557                                     sizeof(struct icmp6_hdr), &pd->hdr.icmp6,
 5558                                     M_NOWAIT);
 5559                                 copyback = 1;
 5560                                 break;
 5561 #endif /* INET6 */
 5562                         }
 5563 #ifdef  INET6
 5564                         if (afto)
 5565                                 return (PF_AFRT);
 5566 #endif /* INET6 */
 5567                 }
 5568         } else {
 5569                 /*
 5570                  * ICMP error message in response to a TCP/UDP packet.
 5571                  * Extract the inner TCP/UDP header and search for that state.
 5572                  */
 5573                 struct pf_pdesc  pd2;
 5574                 struct ip        h2;
 5575 #ifdef INET6
 5576                 struct ip6_hdr   h2_6;
 5577 #endif /* INET6 */
 5578                 int              ipoff2;
 5579 
 5580                 /* Initialize pd2 fields valid for both packets with pd. */
 5581                 memset(&pd2, 0, sizeof(pd2));
 5582                 pd2.af = pd->af;
 5583                 pd2.dir = pd->dir;
 5584                 pd2.kif = pd->kif;
 5585                 pd2.m = pd->m;
 5586                 pd2.rdomain = pd->rdomain;
 5587                 /* Payload packet is from the opposite direction. */
 5588                 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0;
 5589                 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1;
 5590                 switch (pd->af) {
 5591                 case AF_INET:
 5592                         /* offset of h2 in mbuf chain */
 5593                         ipoff2 = pd->off + ICMP_MINLEN;
 5594 
 5595                         if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2),
 5596                             NULL, reason, pd2.af)) {
 5597                                 DPFPRINTF(LOG_NOTICE,
 5598                                     "ICMP error message too short (ip)");
 5599                                 return (PF_DROP);
 5600                         }
 5601                         /*
 5602                          * ICMP error messages don't refer to non-first
 5603                          * fragments
 5604                          */
 5605                         if (h2.ip_off & htons(IP_OFFMASK)) {
 5606                                 REASON_SET(reason, PFRES_FRAG);
 5607                                 return (PF_DROP);
 5608                         }
 5609 
 5610                         /* offset of protocol header that follows h2 */
 5611                         pd2.off = ipoff2;
 5612                         if (pf_walk_header(&pd2, &h2, reason) != PF_PASS)
 5613                                 return (PF_DROP);
 5614 
 5615                         pd2.tot_len = ntohs(h2.ip_len);
 5616                         pd2.src = (struct pf_addr *)&h2.ip_src;
 5617                         pd2.dst = (struct pf_addr *)&h2.ip_dst;
 5618                         break;
 5619 #ifdef INET6
 5620                 case AF_INET6:
 5621                         ipoff2 = pd->off + sizeof(struct icmp6_hdr);
 5622 
 5623                         if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6),
 5624                             NULL, reason, pd2.af)) {
 5625                                 DPFPRINTF(LOG_NOTICE,
 5626                                     "ICMP error message too short (ip6)");
 5627                                 return (PF_DROP);
 5628                         }
 5629 
 5630                         pd2.off = ipoff2;
 5631                         if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS)
 5632                                 return (PF_DROP);
 5633 
 5634                         pd2.tot_len = ntohs(h2_6.ip6_plen) +
 5635                             sizeof(struct ip6_hdr);
 5636                         pd2.src = (struct pf_addr *)&h2_6.ip6_src;
 5637                         pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
 5638                         break;
 5639 #endif /* INET6 */
 5640                 default:
 5641                         unhandled_af(pd->af);
 5642                 }
 5643 
 5644                 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) {
 5645                         if (pf_status.debug >= LOG_NOTICE) {
 5646                                 log(LOG_NOTICE,
 5647                                     "pf: BAD ICMP %d:%d outer dst: ",
 5648                                     icmptype, icmpcode);
 5649                                 pf_print_host(pd->src, 0, pd->af);
 5650                                 addlog(" -> ");
 5651                                 pf_print_host(pd->dst, 0, pd->af);
 5652                                 addlog(" inner src: ");
 5653                                 pf_print_host(pd2.src, 0, pd2.af);
 5654                                 addlog(" -> ");
 5655                                 pf_print_host(pd2.dst, 0, pd2.af);
 5656                                 addlog("\n");
 5657                         }
 5658                         REASON_SET(reason, PFRES_BADSTATE);
 5659                         return (PF_DROP);
 5660                 }
 5661 
 5662                 switch (pd2.proto) {
 5663                 case IPPROTO_TCP: {
 5664                         struct tcphdr           *th = &pd2.hdr.tcp;
 5665                         u_int32_t                seq;
 5666                         struct pf_state_peer    *src, *dst;
 5667                         u_int8_t                 dws;
 5668                         int                      action;
 5669 
 5670                         /*
 5671                          * Only the first 8 bytes of the TCP header can be
 5672                          * expected. Don't access any TCP header fields after
 5673                          * th_seq, an ackskew test is not possible.
 5674                          */
 5675                         if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, NULL, reason,
 5676                             pd2.af)) {
 5677                                 DPFPRINTF(LOG_NOTICE,
 5678                                     "ICMP error message too short (tcp)");
 5679                                 return (PF_DROP);
 5680                         }
 5681 
 5682                         key.af = pd2.af;
 5683                         key.proto = IPPROTO_TCP;
 5684                         key.rdomain = pd2.rdomain;
 5685                         pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
 5686                         pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
 5687                         key.port[pd2.sidx] = th->th_sport;
 5688                         key.port[pd2.didx] = th->th_dport;
 5689                         key.hash = pf_pkt_hash(pd2.af, pd2.proto,
 5690                             pd2.src, pd2.dst, th->th_sport, th->th_dport);
 5691 
 5692                         action = pf_find_state(&pd2, &key, stp);
 5693                         if (action != PF_MATCH)
 5694                                 return (action);
 5695 
 5696                         if (pd2.dir == (*stp)->direction) {
 5697                                 if (PF_REVERSED_KEY((*stp)->key, pd->af)) {
 5698                                         src = &(*stp)->src;
 5699                                         dst = &(*stp)->dst;
 5700                                 } else {
 5701                                         src = &(*stp)->dst;
 5702                                         dst = &(*stp)->src;
 5703                                 }
 5704                         } else {
 5705                                 if (PF_REVERSED_KEY((*stp)->key, pd->af)) {
 5706                                         src = &(*stp)->dst;
 5707                                         dst = &(*stp)->src;
 5708                                 } else {
 5709                                         src = &(*stp)->src;
 5710                                         dst = &(*stp)->dst;
 5711                                 }
 5712                         }
 5713 
 5714                         if (src->wscale && dst->wscale)
 5715                                 dws = dst->wscale & PF_WSCALE_MASK;
 5716                         else
 5717                                 dws = 0;
 5718 
 5719                         /* Demodulate sequence number */
 5720                         seq = ntohl(th->th_seq) - src->seqdiff;
 5721                         if (src->seqdiff) {
 5722                                 pf_patch_32(pd, &th->th_seq, htonl(seq));
 5723                                 copyback = 1;
 5724                         }
 5725 
 5726                         if (!((*stp)->state_flags & PFSTATE_SLOPPY) &&
 5727                             (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq,
 5728                             src->seqlo - (dst->max_win << dws)))) {
 5729                                 if (pf_status.debug >= LOG_NOTICE) {
 5730                                         log(LOG_NOTICE,
 5731                                             "pf: BAD ICMP %d:%d ",
 5732                                             icmptype, icmpcode);
 5733                                         pf_print_host(pd->src, 0, pd->af);
 5734                                         addlog(" -> ");
 5735                                         pf_print_host(pd->dst, 0, pd->af);
 5736                                         addlog(" state: ");
 5737                                         pf_print_state(*stp);
 5738                                         addlog(" seq=%u\n", seq);
 5739                                 }
 5740                                 REASON_SET(reason, PFRES_BADSTATE);
 5741                                 return (PF_DROP);
 5742                         } else {
 5743                                 if (pf_status.debug >= LOG_DEBUG) {
 5744                                         log(LOG_DEBUG,
 5745                                             "pf: OK ICMP %d:%d ",
 5746                                             icmptype, icmpcode);
 5747                                         pf_print_host(pd->src, 0, pd->af);
 5748                                         addlog(" -> ");
 5749                                         pf_print_host(pd->dst, 0, pd->af);
 5750                                         addlog(" state: ");
 5751                                         pf_print_state(*stp);
 5752                                         addlog(" seq=%u\n", seq);
 5753                                 }
 5754                         }
 5755 
 5756                         /* translate source/destination address, if necessary */
 5757                         if ((*stp)->key[PF_SK_WIRE] !=
 5758                             (*stp)->key[PF_SK_STACK]) {
 5759                                 struct pf_state_key     *nk;
 5760                                 int                      afto, sidx, didx;
 5761 
 5762                                 if (PF_REVERSED_KEY((*stp)->key, pd->af))
 5763                                         nk = (*stp)->key[pd->sidx];
 5764                                 else
 5765                                         nk = (*stp)->key[pd->didx];
 5766 
 5767                                 afto = pd->af != nk->af;
 5768                                 sidx = afto ? pd2.didx : pd2.sidx;
 5769                                 didx = afto ? pd2.sidx : pd2.didx;
 5770 
 5771 #ifdef INET6
 5772                                 if (afto) {
 5773                                         if (pf_translate_icmp_af(pd, nk->af,
 5774                                             &pd->hdr.icmp))
 5775                                                 return (PF_DROP);
 5776                                         m_copyback(pd->m, pd->off,
 5777                                             sizeof(struct icmp6_hdr),
 5778                                             &pd->hdr.icmp6, M_NOWAIT);
 5779                                         if (pf_change_icmp_af(pd->m, ipoff2,
 5780                                             pd, &pd2, &nk->addr[sidx],
 5781                                             &nk->addr[didx], pd->af, nk->af))
 5782                                                 return (PF_DROP);
 5783                                         if (nk->af == AF_INET)
 5784                                                 pd->proto = IPPROTO_ICMP;
 5785                                         else
 5786                                                 pd->proto = IPPROTO_ICMPV6;
 5787                                         pd->m->m_pkthdr.ph_rtableid =
 5788                                             nk->rdomain;
 5789                                         pd->destchg = 1;
 5790                                         pf_addrcpy(&pd->nsaddr,
 5791                                             &nk->addr[pd2.sidx], nk->af);
 5792                                         pf_addrcpy(&pd->ndaddr,
 5793                                             &nk->addr[pd2.didx], nk->af);
 5794                                         pd->naf = nk->af;
 5795 
 5796                                         pf_patch_16(pd,
 5797                                             &th->th_sport, nk->port[sidx]);
 5798                                         pf_patch_16(pd,
 5799                                             &th->th_dport, nk->port[didx]);
 5800 
 5801                                         m_copyback(pd2.m, pd2.off, 8, th,
 5802                                             M_NOWAIT);
 5803                                         return (PF_AFRT);
 5804                                 }
 5805 #endif  /* INET6 */
 5806                                 if (PF_ANEQ(pd2.src,
 5807                                     &nk->addr[pd2.sidx], pd2.af) ||
 5808                                     nk->port[pd2.sidx] != th->th_sport)
 5809                                         pf_translate_icmp(pd, pd2.src,
 5810                                             &th->th_sport, pd->dst,
 5811                                             &nk->addr[pd2.sidx],
 5812                                             nk->port[pd2.sidx]);
 5813 
 5814                                 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
 5815                                     pd2.af) || pd2.rdomain != nk->rdomain)
 5816                                         pd->destchg = 1;
 5817                                 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
 5818 
 5819                                 if (PF_ANEQ(pd2.dst,
 5820                                     &nk->addr[pd2.didx], pd2.af) ||
 5821                                     nk->port[pd2.didx] != th->th_dport)
 5822                                         pf_translate_icmp(pd, pd2.dst,
 5823                                             &th->th_dport, pd->src,
 5824                                             &nk->addr[pd2.didx],
 5825                                             nk->port[pd2.didx]);
 5826                                 copyback = 1;
 5827                         }
 5828 
 5829                         if (copyback) {
 5830                                 switch (pd2.af) {
 5831                                 case AF_INET:
 5832                                         m_copyback(pd->m, pd->off, ICMP_MINLEN,
 5833                                             &pd->hdr.icmp, M_NOWAIT);
 5834                                         m_copyback(pd2.m, ipoff2, sizeof(h2),
 5835                                             &h2, M_NOWAIT);
 5836                                         break;
 5837 #ifdef INET6
 5838                                 case AF_INET6:
 5839                                         m_copyback(pd->m, pd->off,
 5840                                             sizeof(struct icmp6_hdr),
 5841                                             &pd->hdr.icmp6, M_NOWAIT);
 5842                                         m_copyback(pd2.m, ipoff2, sizeof(h2_6),
 5843                                             &h2_6, M_NOWAIT);
 5844                                         break;
 5845 #endif /* INET6 */
 5846                                 }
 5847                                 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT);
 5848                         }
 5849                         break;
 5850                 }
 5851                 case IPPROTO_UDP: {
 5852                         struct udphdr   *uh = &pd2.hdr.udp;
 5853                         int              action;
 5854 
 5855                         if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh),
 5856                             NULL, reason, pd2.af)) {
 5857                                 DPFPRINTF(LOG_NOTICE,
 5858                                     "ICMP error message too short (udp)");
 5859                                 return (PF_DROP);
 5860                         }
 5861 
 5862                         key.af = pd2.af;
 5863                         key.proto = IPPROTO_UDP;
 5864                         key.rdomain = pd2.rdomain;
 5865                         pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
 5866                         pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
 5867                         key.port[pd2.sidx] = uh->uh_sport;
 5868                         key.port[pd2.didx] = uh->uh_dport;
 5869                         key.hash = pf_pkt_hash(pd2.af, pd2.proto,
 5870                             pd2.src, pd2.dst, uh->uh_sport, uh->uh_dport);
 5871 
 5872                         action = pf_find_state(&pd2, &key, stp);
 5873                         if (action != PF_MATCH)
 5874                                 return (action);
 5875 
 5876                         /* translate source/destination address, if necessary */
 5877                         if ((*stp)->key[PF_SK_WIRE] !=
 5878                             (*stp)->key[PF_SK_STACK]) {
 5879                                 struct pf_state_key     *nk;
 5880                                 int                      afto, sidx, didx;
 5881 
 5882                                 if (PF_REVERSED_KEY((*stp)->key, pd->af))
 5883                                         nk = (*stp)->key[pd->sidx];
 5884                                 else
 5885                                         nk = (*stp)->key[pd->didx];
 5886 
 5887                                 afto = pd->af != nk->af;
 5888                                 sidx = afto ? pd2.didx : pd2.sidx;
 5889                                 didx = afto ? pd2.sidx : pd2.didx;
 5890 
 5891 #ifdef INET6
 5892                                 if (afto) {
 5893                                         if (pf_translate_icmp_af(pd, nk->af,
 5894                                             &pd->hdr.icmp))
 5895                                                 return (PF_DROP);
 5896                                         m_copyback(pd->m, pd->off,
 5897                                             sizeof(struct icmp6_hdr),
 5898                                             &pd->hdr.icmp6, M_NOWAIT);
 5899                                         if (pf_change_icmp_af(pd->m, ipoff2,
 5900                                             pd, &pd2, &nk->addr[sidx],
 5901                                             &nk->addr[didx], pd->af, nk->af))
 5902                                                 return (PF_DROP);
 5903                                         if (nk->af == AF_INET)
 5904                                                 pd->proto = IPPROTO_ICMP;
 5905                                         else
 5906                                                 pd->proto = IPPROTO_ICMPV6;
 5907                                         pd->m->m_pkthdr.ph_rtableid =
 5908                                             nk->rdomain;
 5909                                         pd->destchg = 1;
 5910                                         pf_addrcpy(&pd->nsaddr,
 5911                                             &nk->addr[pd2.sidx], nk->af);
 5912                                         pf_addrcpy(&pd->ndaddr,
 5913                                             &nk->addr[pd2.didx], nk->af);
 5914                                         pd->naf = nk->af;
 5915 
 5916                                         pf_patch_16(pd,
 5917                                             &uh->uh_sport, nk->port[sidx]);
 5918                                         pf_patch_16(pd,
 5919                                             &uh->uh_dport, nk->port[didx]);
 5920 
 5921                                         m_copyback(pd2.m, pd2.off, sizeof(*uh),
 5922                                             uh, M_NOWAIT);
 5923                                         return (PF_AFRT);
 5924                                 }
 5925 #endif /* INET6 */
 5926 
 5927                                 if (PF_ANEQ(pd2.src,
 5928                                     &nk->addr[pd2.sidx], pd2.af) ||
 5929                                     nk->port[pd2.sidx] != uh->uh_sport)
 5930                                         pf_translate_icmp(pd, pd2.src,
 5931                                             &uh->uh_sport, pd->dst,
 5932                                             &nk->addr[pd2.sidx],
 5933                                             nk->port[pd2.sidx]);
 5934 
 5935                                 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
 5936                                     pd2.af) || pd2.rdomain != nk->rdomain)
 5937                                         pd->destchg = 1;
 5938                                 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
 5939 
 5940                                 if (PF_ANEQ(pd2.dst,
 5941                                     &nk->addr[pd2.didx], pd2.af) ||
 5942                                     nk->port[pd2.didx] != uh->uh_dport)
 5943                                         pf_translate_icmp(pd, pd2.dst,
 5944                                             &uh->uh_dport, pd->src,
 5945                                             &nk->addr[pd2.didx],
 5946                                             nk->port[pd2.didx]);
 5947 
 5948                                 switch (pd2.af) {
 5949                                 case AF_INET:
 5950                                         m_copyback(pd->m, pd->off, ICMP_MINLEN,
 5951                                             &pd->hdr.icmp, M_NOWAIT);
 5952                                         m_copyback(pd2.m, ipoff2, sizeof(h2),
 5953                                             &h2, M_NOWAIT);
 5954                                         break;
 5955 #ifdef INET6
 5956                                 case AF_INET6:
 5957                                         m_copyback(pd->m, pd->off,
 5958                                             sizeof(struct icmp6_hdr),
 5959                                             &pd->hdr.icmp6, M_NOWAIT);
 5960                                         m_copyback(pd2.m, ipoff2, sizeof(h2_6),
 5961                                             &h2_6, M_NOWAIT);
 5962                                         break;
 5963 #endif /* INET6 */
 5964                                 }
 5965                                 /* Avoid recomputing quoted UDP checksum.
 5966                                  * note: udp6 0 csum invalid per rfc2460 p27.
 5967                                  * but presumed nothing cares in this context */
 5968                                 pf_patch_16(pd, &uh->uh_sum, 0);
 5969                                 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh,
 5970                                     M_NOWAIT);
 5971                                 copyback = 1;
 5972                         }
 5973                         break;
 5974                 }
 5975                 case IPPROTO_ICMP: {
 5976                         struct icmp     *iih = &pd2.hdr.icmp;
 5977 
 5978                         if (pd2.af != AF_INET) {
 5979                                 REASON_SET(reason, PFRES_NORM);
 5980                                 return (PF_DROP);
 5981                         }
 5982 
 5983                         if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN,
 5984                             NULL, reason, pd2.af)) {
 5985                                 DPFPRINTF(LOG_NOTICE,
 5986                                     "ICMP error message too short (icmp)");
 5987                                 return (PF_DROP);
 5988                         }
 5989 
 5990                         pf_icmp_mapping(&pd2, iih->icmp_type,
 5991                             &icmp_dir, &virtual_id, &virtual_type);
 5992 
 5993                         ret = pf_icmp_state_lookup(&pd2, &key, stp,
 5994                             virtual_id, virtual_type, icmp_dir, &iidx, 0, 1);
 5995                         if (ret >= 0)
 5996                                 return (ret);
 5997 
 5998                         /* translate source/destination address, if necessary */
 5999                         if ((*stp)->key[PF_SK_WIRE] !=
 6000                             (*stp)->key[PF_SK_STACK]) {
 6001                                 struct pf_state_key     *nk;
 6002                                 int                      afto, sidx, didx;
 6003 
 6004                                 if (PF_REVERSED_KEY((*stp)->key, pd->af))
 6005                                         nk = (*stp)->key[pd->sidx];
 6006                                 else
 6007                                         nk = (*stp)->key[pd->didx];
 6008 
 6009                                 afto = pd->af != nk->af;
 6010                                 sidx = afto ? pd2.didx : pd2.sidx;
 6011                                 didx = afto ? pd2.sidx : pd2.didx;
 6012                                 iidx = afto ? !iidx : iidx;
 6013 
 6014 #ifdef INET6
 6015                                 if (afto) {
 6016                                         if (nk->af != AF_INET6)
 6017                                                 return (PF_DROP);
 6018                                         if (pf_translate_icmp_af(pd, nk->af,
 6019                                             &pd->hdr.icmp))
 6020                                                 return (PF_DROP);
 6021                                         m_copyback(pd->m, pd->off,
 6022                                             sizeof(struct icmp6_hdr),
 6023                                             &pd->hdr.icmp6, M_NOWAIT);
 6024                                         if (pf_change_icmp_af(pd->m, ipoff2,
 6025                                             pd, &pd2, &nk->addr[sidx],
 6026                                             &nk->addr[didx], pd->af, nk->af))
 6027                                                 return (PF_DROP);
 6028                                         pd->proto = IPPROTO_ICMPV6;
 6029                                         if (pf_translate_icmp_af(pd,
 6030                                                 nk->af, iih))
 6031                                                 return (PF_DROP);
 6032                                         if (virtual_type == htons(ICMP_ECHO))
 6033                                                 pf_patch_16(pd, &iih->icmp_id,
 6034                                                     nk->port[iidx]);
 6035                                         m_copyback(pd2.m, pd2.off, ICMP_MINLEN,
 6036                                             iih, M_NOWAIT);
 6037                                         pd->m->m_pkthdr.ph_rtableid =
 6038                                             nk->rdomain;
 6039                                         pd->destchg = 1;
 6040                                         pf_addrcpy(&pd->nsaddr,
 6041                                             &nk->addr[pd2.sidx], nk->af);
 6042                                         pf_addrcpy(&pd->ndaddr,
 6043                                             &nk->addr[pd2.didx], nk->af);
 6044                                         pd->naf = nk->af;
 6045                                         return (PF_AFRT);
 6046                                 }
 6047 #endif /* INET6 */
 6048 
 6049                                 if (PF_ANEQ(pd2.src,
 6050                                     &nk->addr[pd2.sidx], pd2.af) ||
 6051                                     (virtual_type == htons(ICMP_ECHO) &&
 6052                                     nk->port[iidx] != iih->icmp_id))
 6053                                         pf_translate_icmp(pd, pd2.src,
 6054                                             (virtual_type == htons(ICMP_ECHO)) ?
 6055                                             &iih->icmp_id : NULL,
 6056                                             pd->dst, &nk->addr[pd2.sidx],
 6057                                             (virtual_type == htons(ICMP_ECHO)) ?
 6058                                             nk->port[iidx] : 0);
 6059 
 6060                                 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
 6061                                     pd2.af) || pd2.rdomain != nk->rdomain)
 6062                                         pd->destchg = 1;
 6063                                 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
 6064 
 6065                                 if (PF_ANEQ(pd2.dst,
 6066                                     &nk->addr[pd2.didx], pd2.af))
 6067                                         pf_translate_icmp(pd, pd2.dst, NULL,
 6068                                             pd->src, &nk->addr[pd2.didx], 0);
 6069 
 6070                                 m_copyback(pd->m, pd->off, ICMP_MINLEN,
 6071                                     &pd->hdr.icmp, M_NOWAIT);
 6072                                 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2,
 6073                                     M_NOWAIT);
 6074                                 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih,
 6075                                     M_NOWAIT);
 6076                                 copyback = 1;
 6077                         }
 6078                         break;
 6079                 }
 6080 #ifdef INET6
 6081                 case IPPROTO_ICMPV6: {
 6082                         struct icmp6_hdr        *iih = &pd2.hdr.icmp6;
 6083 
 6084                         if (pd2.af != AF_INET6) {
 6085                                 REASON_SET(reason, PFRES_NORM);
 6086                                 return (PF_DROP);
 6087                         }
 6088 
 6089                         if (!pf_pull_hdr(pd2.m, pd2.off, iih,
 6090                             sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
 6091                                 DPFPRINTF(LOG_NOTICE,
 6092                                     "ICMP error message too short (icmp6)");
 6093                                 return (PF_DROP);
 6094                         }
 6095 
 6096                         pf_icmp_mapping(&pd2, iih->icmp6_type,
 6097                             &icmp_dir, &virtual_id, &virtual_type);
 6098                         ret = pf_icmp_state_lookup(&pd2, &key, stp,
 6099                             virtual_id, virtual_type, icmp_dir, &iidx, 0, 1);
 6100                         /* IPv6? try matching a multicast address */
 6101                         if (ret == PF_DROP && pd2.af == AF_INET6 &&
 6102                             icmp_dir == PF_OUT)
 6103                                 ret = pf_icmp_state_lookup(&pd2, &key, stp,
 6104                                     virtual_id, virtual_type, icmp_dir, &iidx,
 6105                                     1, 1);
 6106                         if (ret >= 0)
 6107                                 return (ret);
 6108 
 6109                         /* translate source/destination address, if necessary */
 6110                         if ((*stp)->key[PF_SK_WIRE] !=
 6111                             (*stp)->key[PF_SK_STACK]) {
 6112                                 struct pf_state_key     *nk;
 6113                                 int                      afto, sidx, didx;
 6114 
 6115                                 if (PF_REVERSED_KEY((*stp)->key, pd->af))
 6116                                         nk = (*stp)->key[pd->sidx];
 6117                                 else
 6118                                         nk = (*stp)->key[pd->didx];
 6119 
 6120                                 afto = pd->af != nk->af;
 6121                                 sidx = afto ? pd2.didx : pd2.sidx;
 6122                                 didx = afto ? pd2.sidx : pd2.didx;
 6123                                 iidx = afto ? !iidx : iidx;
 6124 
 6125                                 if (afto) {
 6126                                         if (nk->af != AF_INET)
 6127                                                 return (PF_DROP);
 6128                                         if (pf_translate_icmp_af(pd, nk->af,
 6129                                             &pd->hdr.icmp))
 6130                                                 return (PF_DROP);
 6131                                         m_copyback(pd->m, pd->off,
 6132                                             sizeof(struct icmp6_hdr),
 6133                                             &pd->hdr.icmp6, M_NOWAIT);
 6134                                         if (pf_change_icmp_af(pd->m, ipoff2,
 6135                                             pd, &pd2, &nk->addr[sidx],
 6136                                             &nk->addr[didx], pd->af, nk->af))
 6137                                                 return (PF_DROP);
 6138                                         pd->proto = IPPROTO_ICMP;
 6139                                         if (pf_translate_icmp_af(pd,
 6140                                                 nk->af, iih))
 6141                                                 return (PF_DROP);
 6142                                         if (virtual_type ==
 6143                                             htons(ICMP6_ECHO_REQUEST))
 6144                                                 pf_patch_16(pd, &iih->icmp6_id,
 6145                                                     nk->port[iidx]);
 6146                                         m_copyback(pd2.m, pd2.off,
 6147                                             sizeof(struct icmp6_hdr), iih,
 6148                                             M_NOWAIT);
 6149                                         pd->m->m_pkthdr.ph_rtableid =
 6150                                             nk->rdomain;
 6151                                         pd->destchg = 1;
 6152                                         pf_addrcpy(&pd->nsaddr,
 6153                                             &nk->addr[pd2.sidx], nk->af);
 6154                                         pf_addrcpy(&pd->ndaddr,
 6155                                             &nk->addr[pd2.didx], nk->af);
 6156                                         pd->naf = nk->af;
 6157                                         return (PF_AFRT);
 6158                                 }
 6159 
 6160                                 if (PF_ANEQ(pd2.src,
 6161                                     &nk->addr[pd2.sidx], pd2.af) ||
 6162                                     ((virtual_type ==
 6163                                     htons(ICMP6_ECHO_REQUEST)) &&
 6164                                     nk->port[pd2.sidx] != iih->icmp6_id))
 6165                                         pf_translate_icmp(pd, pd2.src,
 6166                                             (virtual_type ==
 6167                                             htons(ICMP6_ECHO_REQUEST))
 6168                                             ? &iih->icmp6_id : NULL,
 6169                                             pd->dst, &nk->addr[pd2.sidx],
 6170                                             (virtual_type ==
 6171                                             htons(ICMP6_ECHO_REQUEST))
 6172                                             ? nk->port[iidx] : 0);
 6173 
 6174                                 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
 6175                                     pd2.af) || pd2.rdomain != nk->rdomain)
 6176                                         pd->destchg = 1;
 6177                                 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
 6178 
 6179                                 if (PF_ANEQ(pd2.dst,
 6180                                     &nk->addr[pd2.didx], pd2.af))
 6181                                         pf_translate_icmp(pd, pd2.dst, NULL,
 6182                                             pd->src, &nk->addr[pd2.didx], 0);
 6183 
 6184                                 m_copyback(pd->m, pd->off,
 6185                                     sizeof(struct icmp6_hdr), &pd->hdr.icmp6,
 6186                                     M_NOWAIT);
 6187                                 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6,
 6188                                     M_NOWAIT);
 6189                                 m_copyback(pd2.m, pd2.off,
 6190                                     sizeof(struct icmp6_hdr), iih, M_NOWAIT);
 6191                                 copyback = 1;
 6192                         }
 6193                         break;
 6194                 }
 6195 #endif /* INET6 */
 6196                 default: {
 6197                         int     action;
 6198 
 6199                         key.af = pd2.af;
 6200                         key.proto = pd2.proto;
 6201                         key.rdomain = pd2.rdomain;
 6202                         pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
 6203                         pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
 6204                         key.port[0] = key.port[1] = 0;
 6205                         key.hash = pf_pkt_hash(pd2.af, pd2.proto,
 6206                             pd2.src, pd2.dst, 0, 0);
 6207 
 6208                         action = pf_find_state(&pd2, &key, stp);
 6209                         if (action != PF_MATCH)
 6210                                 return (action);
 6211 
 6212                         /* translate source/destination address, if necessary */
 6213                         if ((*stp)->key[PF_SK_WIRE] !=
 6214                             (*stp)->key[PF_SK_STACK]) {
 6215                                 struct pf_state_key *nk =
 6216                                     (*stp)->key[pd->didx];
 6217 
 6218                                 if (PF_ANEQ(pd2.src,
 6219                                     &nk->addr[pd2.sidx], pd2.af))
 6220                                         pf_translate_icmp(pd, pd2.src, NULL,
 6221                                             pd->dst, &nk->addr[pd2.sidx], 0);
 6222 
 6223                                 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
 6224                                     pd2.af) || pd2.rdomain != nk->rdomain)
 6225                                         pd->destchg = 1;
 6226                                 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
 6227 
 6228                                 if (PF_ANEQ(pd2.dst,
 6229                                     &nk->addr[pd2.didx], pd2.af))
 6230                                         pf_translate_icmp(pd, pd2.dst, NULL,
 6231                                             pd->src, &nk->addr[pd2.didx], 0);
 6232 
 6233                                 switch (pd2.af) {
 6234                                 case AF_INET:
 6235                                         m_copyback(pd->m, pd->off, ICMP_MINLEN,
 6236                                             &pd->hdr.icmp, M_NOWAIT);
 6237                                         m_copyback(pd2.m, ipoff2, sizeof(h2),
 6238                                             &h2, M_NOWAIT);
 6239                                         break;
 6240 #ifdef INET6
 6241                                 case AF_INET6:
 6242                                         m_copyback(pd->m, pd->off,
 6243                                             sizeof(struct icmp6_hdr),
 6244                                             &pd->hdr.icmp6, M_NOWAIT);
 6245                                         m_copyback(pd2.m, ipoff2, sizeof(h2_6),
 6246                                             &h2_6, M_NOWAIT);
 6247                                         break;
 6248 #endif /* INET6 */
 6249                                 }
 6250                                 copyback = 1;
 6251                         }
 6252                         break;
 6253                 }
 6254                 }
 6255         }
 6256         if (copyback) {
 6257                 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT);
 6258         }
 6259 
 6260         return (PF_PASS);
 6261 }
 6262 
 6263 /*
 6264  * ipoff and off are measured from the start of the mbuf chain.
 6265  * h must be at "ipoff" on the mbuf chain.
 6266  */
 6267 void *
 6268 pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
 6269     u_short *actionp, u_short *reasonp, sa_family_t af)
 6270 {
 6271         int iplen = 0;
 6272 
 6273         switch (af) {
 6274         case AF_INET: {
 6275                 struct ip       *h = mtod(m, struct ip *);
 6276                 u_int16_t        fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
 6277 
 6278                 if (fragoff) {
 6279                         if (fragoff >= len)
 6280                                 ACTION_SET(actionp, PF_PASS);
 6281                         else {
 6282                                 ACTION_SET(actionp, PF_DROP);
 6283                                 REASON_SET(reasonp, PFRES_FRAG);
 6284                         }
 6285                         return (NULL);
 6286                 }
 6287                 iplen = ntohs(h->ip_len);
 6288                 break;
 6289         }
 6290 #ifdef INET6
 6291         case AF_INET6: {
 6292                 struct ip6_hdr  *h = mtod(m, struct ip6_hdr *);
 6293 
 6294                 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
 6295                 break;
 6296         }
 6297 #endif /* INET6 */
 6298         }
 6299         if (m->m_pkthdr.len < off + len || iplen < off + len) {
 6300                 ACTION_SET(actionp, PF_DROP);
 6301                 REASON_SET(reasonp, PFRES_SHORT);
 6302                 return (NULL);
 6303         }
 6304         m_copydata(m, off, len, p);
 6305         return (p);
 6306 }
 6307 
 6308 int
 6309 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
 6310     int rtableid)
 6311 {
 6312         struct sockaddr_storage  ss;
 6313         struct sockaddr_in      *dst;
 6314         int                      ret = 1;
 6315         int                      check_mpath;
 6316 #ifdef INET6
 6317         struct sockaddr_in6     *dst6;
 6318 #endif  /* INET6 */
 6319         struct rtentry          *rt = NULL;
 6320 
 6321         check_mpath = 0;
 6322         memset(&ss, 0, sizeof(ss));
 6323         switch (af) {
 6324         case AF_INET:
 6325                 dst = (struct sockaddr_in *)&ss;
 6326                 dst->sin_family = AF_INET;
 6327                 dst->sin_len = sizeof(*dst);
 6328                 dst->sin_addr = addr->v4;
 6329                 if (ipmultipath)
 6330                         check_mpath = 1;
 6331                 break;
 6332 #ifdef INET6
 6333         case AF_INET6:
 6334                 /*
 6335                  * Skip check for addresses with embedded interface scope,
 6336                  * as they would always match anyway.
 6337                  */
 6338                 if (IN6_IS_SCOPE_EMBED(&addr->v6))
 6339                         goto out;
 6340                 dst6 = (struct sockaddr_in6 *)&ss;
 6341                 dst6->sin6_family = AF_INET6;
 6342                 dst6->sin6_len = sizeof(*dst6);
 6343                 dst6->sin6_addr = addr->v6;
 6344                 if (ip6_multipath)
 6345                         check_mpath = 1;
 6346                 break;
 6347 #endif /* INET6 */
 6348         }
 6349 
 6350         /* Skip checks for ipsec interfaces */
 6351         if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
 6352                 goto out;
 6353 
 6354         rt = rtalloc(sstosa(&ss), 0, rtableid);
 6355         if (rt != NULL) {
 6356                 /* No interface given, this is a no-route check */
 6357                 if (kif == NULL)
 6358                         goto out;
 6359 
 6360                 if (kif->pfik_ifp == NULL) {
 6361                         ret = 0;
 6362                         goto out;
 6363                 }
 6364 
 6365                 /* Perform uRPF check if passed input interface */
 6366                 ret = 0;
 6367                 do {
 6368                         if (rt->rt_ifidx == kif->pfik_ifp->if_index) {
 6369                                 ret = 1;
 6370 #if NCARP > 0
 6371                         } else {
 6372                                 struct ifnet    *ifp;
 6373 
 6374                                 ifp = if_get(rt->rt_ifidx);
 6375                                 if (ifp != NULL && ifp->if_type == IFT_CARP &&
 6376                                     ifp->if_carpdevidx ==
 6377                                     kif->pfik_ifp->if_index)
 6378                                         ret = 1;
 6379                                 if_put(ifp);
 6380 #endif /* NCARP */
 6381                         }
 6382 
 6383                         rt = rtable_iterate(rt);
 6384                 } while (check_mpath == 1 && rt != NULL && ret == 0);
 6385         } else
 6386                 ret = 0;
 6387 out:
 6388         rtfree(rt);
 6389         return (ret);
 6390 }
 6391 
 6392 int
 6393 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw,
 6394     int rtableid)
 6395 {
 6396         struct sockaddr_storage  ss;
 6397         struct sockaddr_in      *dst;
 6398 #ifdef INET6
 6399         struct sockaddr_in6     *dst6;
 6400 #endif  /* INET6 */
 6401         struct rtentry          *rt;
 6402         int                      ret = 0;
 6403 
 6404         memset(&ss, 0, sizeof(ss));
 6405         switch (af) {
 6406         case AF_INET:
 6407                 dst = (struct sockaddr_in *)&ss;
 6408                 dst->sin_family = AF_INET;
 6409                 dst->sin_len = sizeof(*dst);
 6410                 dst->sin_addr = addr->v4;
 6411                 break;
 6412 #ifdef INET6
 6413         case AF_INET6:
 6414                 dst6 = (struct sockaddr_in6 *)&ss;
 6415                 dst6->sin6_family = AF_INET6;
 6416                 dst6->sin6_len = sizeof(*dst6);
 6417                 dst6->sin6_addr = addr->v6;
 6418                 break;
 6419 #endif /* INET6 */
 6420         }
 6421 
 6422         rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid);
 6423         if (rt != NULL) {
 6424                 if (rt->rt_labelid == aw->v.rtlabel)
 6425                         ret = 1;
 6426                 rtfree(rt);
 6427         }
 6428 
 6429         return (ret);
 6430 }
 6431 
 6432 /* pf_route() may change pd->m, adjust local copies after calling */
 6433 void
 6434 pf_route(struct pf_pdesc *pd, struct pf_state *st)
 6435 {
 6436         struct mbuf             *m0;
 6437         struct mbuf_list         fml;
 6438         struct sockaddr_in      *dst, sin;
 6439         struct rtentry          *rt = NULL;
 6440         struct ip               *ip;
 6441         struct ifnet            *ifp = NULL;
 6442         int                      error = 0;
 6443         unsigned int             rtableid;
 6444 
 6445         if (pd->m->m_pkthdr.pf.routed++ > 3) {
 6446                 m_freem(pd->m);
 6447                 pd->m = NULL;
 6448                 return;
 6449         }
 6450 
 6451         if (st->rt == PF_DUPTO) {
 6452                 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
 6453                         return;
 6454         } else {
 6455                 if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
 6456                         return;
 6457                 m0 = pd->m;
 6458                 pd->m = NULL;
 6459         }
 6460 
 6461         if (m0->m_len < sizeof(struct ip)) {
 6462                 DPFPRINTF(LOG_ERR,
 6463                     "%s: m0->m_len < sizeof(struct ip)", __func__);
 6464                 goto bad;
 6465         }
 6466 
 6467         ip = mtod(m0, struct ip *);
 6468 
 6469         if (pd->dir == PF_IN) {
 6470                 if (ip->ip_ttl <= IPTTLDEC) {
 6471                         if (st->rt != PF_DUPTO) {
 6472                                 pf_send_icmp(m0, ICMP_TIMXCEED,
 6473                                     ICMP_TIMXCEED_INTRANS, 0,
 6474                                     pd->af, st->rule.ptr, pd->rdomain);
 6475                         }
 6476                         goto bad;
 6477                 }
 6478                 ip->ip_ttl -= IPTTLDEC;
 6479         }
 6480 
 6481         memset(&sin, 0, sizeof(sin));
 6482         dst = &sin;
 6483         dst->sin_family = AF_INET;
 6484         dst->sin_len = sizeof(*dst);
 6485         dst->sin_addr = st->rt_addr.v4;
 6486         rtableid = m0->m_pkthdr.ph_rtableid;
 6487 
 6488         rt = rtalloc_mpath(sintosa(dst), &ip->ip_src.s_addr, rtableid);
 6489         if (!rtisvalid(rt)) {
 6490                 if (st->rt != PF_DUPTO) {
 6491                         pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST,
 6492                             0, pd->af, st->rule.ptr, pd->rdomain);
 6493                 }
 6494                 ipstat_inc(ips_noroute);
 6495                 goto bad;
 6496         }
 6497 
 6498         ifp = if_get(rt->rt_ifidx);
 6499         if (ifp == NULL)
 6500                 goto bad;
 6501 
 6502         /* A locally generated packet may have invalid source address. */
 6503         if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET &&
 6504             (ifp->if_flags & IFF_LOOPBACK) == 0)
 6505                 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr;
 6506 
 6507         if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
 6508                 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS)
 6509                         goto bad;
 6510                 else if (m0 == NULL)
 6511                         goto done;
 6512                 if (m0->m_len < sizeof(struct ip)) {
 6513                         DPFPRINTF(LOG_ERR,
 6514                             "%s: m0->m_len < sizeof(struct ip)", __func__);
 6515                         goto bad;
 6516                 }
 6517                 ip = mtod(m0, struct ip *);
 6518         }
 6519 
 6520         in_proto_cksum_out(m0, ifp);
 6521 
 6522         if (ntohs(ip->ip_len) <= ifp->if_mtu) {
 6523                 ip->ip_sum = 0;
 6524                 if (ifp->if_capabilities & IFCAP_CSUM_IPv4)
 6525                         m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
 6526                 else {
 6527                         ipstat_inc(ips_outswcsum);
 6528                         ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
 6529                 }
 6530                 error = ifp->if_output(ifp, m0, sintosa(dst), rt);
 6531                 goto done;
 6532         }
 6533 
 6534         /*
 6535          * Too large for interface; fragment if possible.
 6536          * Must be able to put at least 8 bytes per fragment.
 6537          */
 6538         if (ip->ip_off & htons(IP_DF)) {
 6539                 ipstat_inc(ips_cantfrag);
 6540                 if (st->rt != PF_DUPTO)
 6541                         pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
 6542                             ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain);
 6543                 goto bad;
 6544         }
 6545 
 6546         error = ip_fragment(m0, &fml, ifp, ifp->if_mtu);
 6547         if (error)
 6548                 goto done;
 6549 
 6550         while ((m0 = ml_dequeue(&fml)) != NULL) {
 6551                 error = ifp->if_output(ifp, m0, sintosa(dst), rt);
 6552                 if (error)
 6553                         break;
 6554         }
 6555         if (error)
 6556                 ml_purge(&fml);
 6557         else
 6558                 ipstat_inc(ips_fragmented);
 6559 
 6560 done:
 6561         if_put(ifp);
 6562         rtfree(rt);
 6563         return;
 6564 
 6565 bad:
 6566         m_freem(m0);
 6567         goto done;
 6568 }
 6569 
 6570 #ifdef INET6
 6571 /* pf_route6() may change pd->m, adjust local copies after calling */
 6572 void
 6573 pf_route6(struct pf_pdesc *pd, struct pf_state *st)
 6574 {
 6575         struct mbuf             *m0;
 6576         struct sockaddr_in6     *dst, sin6;
 6577         struct rtentry          *rt = NULL;
 6578         struct ip6_hdr          *ip6;
 6579         struct ifnet            *ifp = NULL;
 6580         struct m_tag            *mtag;
 6581         unsigned int             rtableid;
 6582 
 6583         if (pd->m->m_pkthdr.pf.routed++ > 3) {
 6584                 m_freem(pd->m);
 6585                 pd->m = NULL;
 6586                 return;
 6587         }
 6588 
 6589         if (st->rt == PF_DUPTO) {
 6590                 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
 6591                         return;
 6592         } else {
 6593                 if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
 6594                         return;
 6595                 m0 = pd->m;
 6596                 pd->m = NULL;
 6597         }
 6598 
 6599         if (m0->m_len < sizeof(struct ip6_hdr)) {
 6600                 DPFPRINTF(LOG_ERR,
 6601                     "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__);
 6602                 goto bad;
 6603         }
 6604         ip6 = mtod(m0, struct ip6_hdr *);
 6605 
 6606         if (pd->dir == PF_IN) {
 6607                 if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
 6608                         if (st->rt != PF_DUPTO) {
 6609                                 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED,
 6610                                     ICMP6_TIME_EXCEED_TRANSIT, 0,
 6611                                     pd->af, st->rule.ptr, pd->rdomain);
 6612                         }
 6613                         goto bad;
 6614                 }
 6615                 ip6->ip6_hlim -= IPV6_HLIMDEC;
 6616         }
 6617 
 6618         memset(&sin6, 0, sizeof(sin6));
 6619         dst = &sin6;
 6620         dst->sin6_family = AF_INET6;
 6621         dst->sin6_len = sizeof(*dst);
 6622         dst->sin6_addr = st->rt_addr.v6;
 6623         rtableid = m0->m_pkthdr.ph_rtableid;
 6624 
 6625         rt = rtalloc_mpath(sin6tosa(dst), &ip6->ip6_src.s6_addr32[0],
 6626             rtableid);
 6627         if (!rtisvalid(rt)) {
 6628                 if (st->rt != PF_DUPTO) {
 6629                         pf_send_icmp(m0, ICMP6_DST_UNREACH,
 6630                             ICMP6_DST_UNREACH_NOROUTE, 0,
 6631                             pd->af, st->rule.ptr, pd->rdomain);
 6632                 }
 6633                 ip6stat_inc(ip6s_noroute);
 6634                 goto bad;
 6635         }
 6636 
 6637         ifp = if_get(rt->rt_ifidx);
 6638         if (ifp == NULL)
 6639                 goto bad;
 6640 
 6641         /* A locally generated packet may have invalid source address. */
 6642         if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
 6643             (ifp->if_flags & IFF_LOOPBACK) == 0)
 6644                 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
 6645 
 6646         if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
 6647                 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS)
 6648                         goto bad;
 6649                 else if (m0 == NULL)
 6650                         goto done;
 6651                 if (m0->m_len < sizeof(struct ip6_hdr)) {
 6652                         DPFPRINTF(LOG_ERR,
 6653                             "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__);
 6654                         goto bad;
 6655                 }
 6656         }
 6657 
 6658         in6_proto_cksum_out(m0, ifp);
 6659 
 6660         /*
 6661          * If packet has been reassembled by PF earlier, we have to
 6662          * use pf_refragment6() here to turn it back to fragments.
 6663          */
 6664         if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) {
 6665                 (void) pf_refragment6(&m0, mtag, dst, ifp, rt);
 6666         } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) {
 6667                 ifp->if_output(ifp, m0, sin6tosa(dst), rt);
 6668         } else {
 6669                 ip6stat_inc(ip6s_cantfrag);
 6670                 if (st->rt != PF_DUPTO)
 6671                         pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0,
 6672                             ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain);
 6673                 goto bad;
 6674         }
 6675 
 6676 done:
 6677         if_put(ifp);
 6678         rtfree(rt);
 6679         return;
 6680 
 6681 bad:
 6682         m_freem(m0);
 6683         goto done;
 6684 }
 6685 #endif /* INET6 */
 6686 
 6687 /*
 6688  * check TCP checksum and set mbuf flag
 6689  *   off is the offset where the protocol header starts
 6690  *   len is the total length of protocol header plus payload
 6691  * returns 0 when the checksum is valid, otherwise returns 1.
 6692  * if the _OUT flag is set the checksum isn't done yet, consider these ok
 6693  */
 6694 int
 6695 pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af)
 6696 {
 6697         u_int16_t sum;
 6698 
 6699         if (m->m_pkthdr.csum_flags &
 6700             (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) {
 6701                 return (0);
 6702         }
 6703         if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD ||
 6704             off < sizeof(struct ip) ||
 6705             m->m_pkthdr.len < off + len) {
 6706                 return (1);
 6707         }
 6708 
 6709         /* need to do it in software */
 6710         tcpstat_inc(tcps_inswcsum);
 6711 
 6712         switch (af) {
 6713         case AF_INET:
 6714                 if (m->m_len < sizeof(struct ip))
 6715                         return (1);
 6716 
 6717                 sum = in4_cksum(m, IPPROTO_TCP, off, len);
 6718                 break;
 6719 #ifdef INET6
 6720         case AF_INET6:
 6721                 if (m->m_len < sizeof(struct ip6_hdr))
 6722                         return (1);
 6723 
 6724                 sum = in6_cksum(m, IPPROTO_TCP, off, len);
 6725                 break;
 6726 #endif /* INET6 */
 6727         default:
 6728                 unhandled_af(af);
 6729         }
 6730         if (sum) {
 6731                 tcpstat_inc(tcps_rcvbadsum);
 6732                 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD;
 6733                 return (1);
 6734         }
 6735 
 6736         m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
 6737         return (0);
 6738 }
 6739 
 6740 struct pf_divert *
 6741 pf_find_divert(struct mbuf *m)
 6742 {
 6743         struct m_tag    *mtag;
 6744 
 6745         if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL)
 6746                 return (NULL);
 6747 
 6748         return ((struct pf_divert *)(mtag + 1));
 6749 }
 6750 
 6751 struct pf_divert *
 6752 pf_get_divert(struct mbuf *m)
 6753 {
 6754         struct m_tag    *mtag;
 6755 
 6756         if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) {
 6757                 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert),
 6758                     M_NOWAIT);
 6759                 if (mtag == NULL)
 6760                         return (NULL);
 6761                 memset(mtag + 1, 0, sizeof(struct pf_divert));
 6762                 m_tag_prepend(m, mtag);
 6763         }
 6764 
 6765         return ((struct pf_divert *)(mtag + 1));
 6766 }
 6767 
 6768 int
 6769 pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end,
 6770     u_short *reason)
 6771 {
 6772         uint8_t type, length, opts[15 * 4 - sizeof(struct ip)];
 6773 
 6774         /* IP header in payload of ICMP packet may be too short */
 6775         if (pd->m->m_pkthdr.len < end) {
 6776                 DPFPRINTF(LOG_NOTICE, "IP option too short");
 6777                 REASON_SET(reason, PFRES_SHORT);
 6778                 return (PF_DROP);
 6779         }
 6780 
 6781         KASSERT(end - off <= sizeof(opts));
 6782         m_copydata(pd->m, off, end - off, opts);
 6783         end -= off;
 6784         off = 0;
 6785 
 6786         while (off < end) {
 6787                 type = opts[off];
 6788                 if (type == IPOPT_EOL)
 6789                         break;
 6790                 if (type == IPOPT_NOP) {
 6791                         off++;
 6792                         continue;
 6793                 }
 6794                 if (off + 2 > end) {
 6795                         DPFPRINTF(LOG_NOTICE, "IP length opt");
 6796                         REASON_SET(reason, PFRES_IPOPTIONS);
 6797                         return (PF_DROP);
 6798                 }
 6799                 length = opts[off + 1];
 6800                 if (length < 2) {
 6801                         DPFPRINTF(LOG_NOTICE, "IP short opt");
 6802                         REASON_SET(reason, PFRES_IPOPTIONS);
 6803                         return (PF_DROP);
 6804                 }
 6805                 if (off + length > end) {
 6806                         DPFPRINTF(LOG_NOTICE, "IP long opt");
 6807                         REASON_SET(reason, PFRES_IPOPTIONS);
 6808                         return (PF_DROP);
 6809                 }
 6810                 switch (type) {
 6811                 case IPOPT_RA:
 6812                         SET(pd->badopts, PF_OPT_ROUTER_ALERT);
 6813                         break;
 6814                 default:
 6815                         SET(pd->badopts, PF_OPT_OTHER);
 6816                         break;
 6817                 }
 6818                 off += length;
 6819         }
 6820 
 6821         return (PF_PASS);
 6822 }
 6823 
 6824 int
 6825 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
 6826 {
 6827         struct ip6_ext           ext;
 6828         u_int32_t                hlen, end;
 6829         int                      hdr_cnt;
 6830 
 6831         hlen = h->ip_hl << 2;
 6832         if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) {
 6833                 REASON_SET(reason, PFRES_SHORT);
 6834                 return (PF_DROP);
 6835         }
 6836         if (hlen != sizeof(struct ip)) {
 6837                 if (pf_walk_option(pd, h, pd->off + sizeof(struct ip),
 6838                     pd->off + hlen, reason) != PF_PASS)
 6839                         return (PF_DROP);
 6840                 /* header options which contain only padding is fishy */
 6841                 if (pd->badopts == 0)
 6842                         SET(pd->badopts, PF_OPT_OTHER);
 6843         }
 6844         end = pd->off + ntohs(h->ip_len);
 6845         pd->off += hlen;
 6846         pd->proto = h->ip_p;
 6847         /* IGMP packets have router alert options, allow them */
 6848         if (pd->proto == IPPROTO_IGMP) {
 6849                 /* According to RFC 1112 ttl must be set to 1. */
 6850                 if ((h->ip_ttl != 1) || !IN_MULTICAST(h->ip_dst.s_addr)) {
 6851                         DPFPRINTF(LOG_NOTICE, "Invalid IGMP");
 6852                         REASON_SET(reason, PFRES_IPOPTIONS);
 6853                         return (PF_DROP);
 6854                 }
 6855                 CLR(pd->badopts, PF_OPT_ROUTER_ALERT);
 6856         }
 6857         /* stop walking over non initial fragments */
 6858         if ((h->ip_off & htons(IP_OFFMASK)) != 0)
 6859                 return (PF_PASS);
 6860 
 6861         for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) {
 6862                 switch (pd->proto) {
 6863                 case IPPROTO_AH:
 6864                         /* fragments may be short */
 6865                         if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 &&
 6866                             end < pd->off + sizeof(ext))
 6867                                 return (PF_PASS);
 6868                         if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
 6869                             NULL, reason, AF_INET)) {
 6870                                 DPFPRINTF(LOG_NOTICE, "IP short exthdr");
 6871                                 return (PF_DROP);
 6872                         }
 6873                         pd->off += (ext.ip6e_len + 2) * 4;
 6874                         pd->proto = ext.ip6e_nxt;
 6875                         break;
 6876                 default:
 6877                         return (PF_PASS);
 6878                 }
 6879         }
 6880         DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit");
 6881         REASON_SET(reason, PFRES_IPOPTIONS);
 6882         return (PF_DROP);
 6883 }
 6884 
 6885 #ifdef INET6
 6886 int
 6887 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
 6888     u_short *reason)
 6889 {
 6890         struct ip6_opt           opt;
 6891         struct ip6_opt_jumbo     jumbo;
 6892 
 6893         while (off < end) {
 6894                 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type,
 6895                     sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) {
 6896                         DPFPRINTF(LOG_NOTICE, "IPv6 short opt type");
 6897                         return (PF_DROP);
 6898                 }
 6899                 if (opt.ip6o_type == IP6OPT_PAD1) {
 6900                         off++;
 6901                         continue;
 6902                 }
 6903                 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt),
 6904                     NULL, reason, AF_INET6)) {
 6905                         DPFPRINTF(LOG_NOTICE, "IPv6 short opt");
 6906                         return (PF_DROP);
 6907                 }
 6908                 if (off + sizeof(opt) + opt.ip6o_len > end) {
 6909                         DPFPRINTF(LOG_NOTICE, "IPv6 long opt");
 6910                         REASON_SET(reason, PFRES_IPOPTIONS);
 6911                         return (PF_DROP);
 6912                 }
 6913                 switch (opt.ip6o_type) {
 6914                 case IP6OPT_PADN:
 6915                         break;
 6916                 case IP6OPT_JUMBO:
 6917                         SET(pd->badopts, PF_OPT_JUMBO);
 6918                         if (pd->jumbolen != 0) {
 6919                                 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo");
 6920                                 REASON_SET(reason, PFRES_IPOPTIONS);
 6921                                 return (PF_DROP);
 6922                         }
 6923                         if (ntohs(h->ip6_plen) != 0) {
 6924                                 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen");
 6925                                 REASON_SET(reason, PFRES_IPOPTIONS);
 6926                                 return (PF_DROP);
 6927                         }
 6928                         if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo),
 6929                             NULL, reason, AF_INET6)) {
 6930                                 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo");
 6931                                 return (PF_DROP);
 6932                         }
 6933                         memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len,
 6934                             sizeof(pd->jumbolen));
 6935                         pd->jumbolen = ntohl(pd->jumbolen);
 6936                         if (pd->jumbolen < IPV6_MAXPACKET) {
 6937                                 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen");
 6938                                 REASON_SET(reason, PFRES_IPOPTIONS);
 6939                                 return (PF_DROP);
 6940                         }
 6941                         break;
 6942                 case IP6OPT_ROUTER_ALERT:
 6943                         SET(pd->badopts, PF_OPT_ROUTER_ALERT);
 6944                         break;
 6945                 default:
 6946                         SET(pd->badopts, PF_OPT_OTHER);
 6947                         break;
 6948                 }
 6949                 off += sizeof(opt) + opt.ip6o_len;
 6950         }
 6951 
 6952         return (PF_PASS);
 6953 }
 6954 
 6955 int
 6956 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
 6957 {
 6958         struct ip6_frag          frag;
 6959         struct ip6_ext           ext;
 6960         struct icmp6_hdr         icmp6;
 6961         struct ip6_rthdr         rthdr;
 6962         u_int32_t                end;
 6963         int                      hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0;
 6964 
 6965         pd->off += sizeof(struct ip6_hdr);
 6966         end = pd->off + ntohs(h->ip6_plen);
 6967         pd->fragoff = pd->extoff = pd->jumbolen = 0;
 6968         pd->proto = h->ip6_nxt;
 6969 
 6970         for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) {
 6971                 switch (pd->proto) {
 6972                 case IPPROTO_ROUTING:
 6973                 case IPPROTO_DSTOPTS:
 6974                         SET(pd->badopts, PF_OPT_OTHER);
 6975                         break;
 6976                 case IPPROTO_HOPOPTS:
 6977                         if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
 6978                             NULL, reason, AF_INET6)) {
 6979                                 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr");
 6980                                 return (PF_DROP);
 6981                         }
 6982                         if (pf_walk_option6(pd, h, pd->off + sizeof(ext),
 6983                             pd->off + (ext.ip6e_len + 1) * 8, reason)
 6984                             != PF_PASS)
 6985                                 return (PF_DROP);
 6986                         /* option header which contains only padding is fishy */
 6987                         if (pd->badopts == 0)
 6988                                 SET(pd->badopts, PF_OPT_OTHER);
 6989                         break;
 6990                 }
 6991                 switch (pd->proto) {
 6992                 case IPPROTO_FRAGMENT:
 6993                         if (fraghdr_cnt++) {
 6994                                 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment");
 6995                                 REASON_SET(reason, PFRES_FRAG);
 6996                                 return (PF_DROP);
 6997                         }
 6998                         /* jumbo payload packets cannot be fragmented */
 6999                         if (pd->jumbolen != 0) {
 7000                                 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo");
 7001                                 REASON_SET(reason, PFRES_FRAG);
 7002                                 return (PF_DROP);
 7003                         }
 7004                         if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag),
 7005                             NULL, reason, AF_INET6)) {
 7006                                 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment");
 7007                                 return (PF_DROP);
 7008                         }
 7009                         /* stop walking over non initial fragments */
 7010                         if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) {
 7011                                 pd->fragoff = pd->off;
 7012                                 return (PF_PASS);
 7013                         }
 7014                         /* RFC6946:  reassemble only non atomic fragments */
 7015                         if (frag.ip6f_offlg & IP6F_MORE_FRAG)
 7016                                 pd->fragoff = pd->off;
 7017                         pd->off += sizeof(frag);
 7018                         pd->proto = frag.ip6f_nxt;
 7019                         break;
 7020                 case IPPROTO_ROUTING:
 7021                         if (rthdr_cnt++) {
 7022                                 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr");
 7023                                 REASON_SET(reason, PFRES_IPOPTIONS);
 7024                                 return (PF_DROP);
 7025                         }
 7026                         /* fragments may be short */
 7027                         if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) {
 7028                                 pd->off = pd->fragoff;
 7029                                 pd->proto = IPPROTO_FRAGMENT;
 7030                                 return (PF_PASS);
 7031                         }
 7032                         if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr),
 7033                             NULL, reason, AF_INET6)) {
 7034                                 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr");
 7035                                 return (PF_DROP);
 7036                         }
 7037                         if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
 7038                                 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0");
 7039                                 REASON_SET(reason, PFRES_IPOPTIONS);
 7040                                 return (PF_DROP);
 7041                         }
 7042                         /* FALLTHROUGH */
 7043                 case IPPROTO_HOPOPTS:
 7044                         /* RFC2460 4.1:  Hop-by-Hop only after IPv6 header */
 7045                         if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) {
 7046                                 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first");
 7047                                 REASON_SET(reason, PFRES_IPOPTIONS);
 7048                                 return (PF_DROP);
 7049                         }
 7050                         /* FALLTHROUGH */
 7051                 case IPPROTO_AH:
 7052                 case IPPROTO_DSTOPTS:
 7053                         /* fragments may be short */
 7054                         if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) {
 7055                                 pd->off = pd->fragoff;
 7056                                 pd->proto = IPPROTO_FRAGMENT;
 7057                                 return (PF_PASS);
 7058                         }
 7059                         if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
 7060                             NULL, reason, AF_INET6)) {
 7061                                 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr");
 7062                                 return (PF_DROP);
 7063                         }
 7064                         /* reassembly needs the ext header before the frag */
 7065                         if (pd->fragoff == 0)
 7066                                 pd->extoff = pd->off;
 7067                         if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 &&
 7068                             ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) {
 7069                                 DPFPRINTF(LOG_NOTICE, "IPv6 missing jumbo");
 7070                                 REASON_SET(reason, PFRES_IPOPTIONS);
 7071                                 return (PF_DROP);
 7072                         }
 7073                         if (pd->proto == IPPROTO_AH)
 7074                                 pd->off += (ext.ip6e_len + 2) * 4;
 7075                         else
 7076                                 pd->off += (ext.ip6e_len + 1) * 8;
 7077                         pd->proto = ext.ip6e_nxt;
 7078                         break;
 7079                 case IPPROTO_ICMPV6:
 7080                         /* fragments may be short, ignore inner header then */
 7081                         if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) {
 7082                                 pd->off = pd->fragoff;
 7083                                 pd->proto = IPPROTO_FRAGMENT;
 7084                                 return (PF_PASS);
 7085                         }
 7086                         if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6),
 7087                             NULL, reason, AF_INET6)) {
 7088                                 DPFPRINTF(LOG_NOTICE, "IPv6 short icmp6hdr");
 7089                                 return (PF_DROP);
 7090                         }
 7091                         /* ICMP multicast packets have router alert options */
 7092                         switch (icmp6.icmp6_type) {
 7093                         case MLD_LISTENER_QUERY:
 7094                         case MLD_LISTENER_REPORT:
 7095                         case MLD_LISTENER_DONE:
 7096                         case MLDV2_LISTENER_REPORT:
 7097                                 /*
 7098                                  * According to RFC 2710 all MLD messages are
 7099                                  * sent with hop-limit (ttl) set to 1, and link
 7100                                  * local source address.  If either one is
 7101                                  * missing then MLD message is invalid and
 7102                                  * should be discarded.
 7103                                  */
 7104                                 if ((h->ip6_hlim != 1) ||
 7105                                     !IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) {
 7106                                         DPFPRINTF(LOG_NOTICE, "Invalid MLD");
 7107                                         REASON_SET(reason, PFRES_IPOPTIONS);
 7108                                         return (PF_DROP);
 7109                                 }
 7110                                 CLR(pd->badopts, PF_OPT_ROUTER_ALERT);
 7111                                 break;
 7112                         }
 7113                         return (PF_PASS);
 7114                 case IPPROTO_TCP:
 7115                 case IPPROTO_UDP:
 7116                         /* fragments may be short, ignore inner header then */
 7117                         if (pd->fragoff != 0 && end < pd->off +
 7118                             (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) :
 7119                             pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) :
 7120                             sizeof(struct icmp6_hdr))) {
 7121                                 pd->off = pd->fragoff;
 7122                                 pd->proto = IPPROTO_FRAGMENT;
 7123                         }
 7124                         /* FALLTHROUGH */
 7125                 default:
 7126                         return (PF_PASS);
 7127                 }
 7128         }
 7129         DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit");
 7130         REASON_SET(reason, PFRES_IPOPTIONS);
 7131         return (PF_DROP);
 7132 }
 7133 #endif /* INET6 */
 7134 
 7135 u_int16_t
 7136 pf_pkt_hash(sa_family_t af, uint8_t proto,
 7137     const struct pf_addr *src, const struct pf_addr *dst,
 7138     uint16_t sport, uint16_t dport)
 7139 {
 7140         uint32_t hash;
 7141 
 7142         hash = src->addr32[0] ^ dst->addr32[0];
 7143 #ifdef INET6
 7144         if (af == AF_INET6) {
 7145                 hash ^= src->addr32[1] ^ dst->addr32[1];
 7146                 hash ^= src->addr32[2] ^ dst->addr32[2];
 7147                 hash ^= src->addr32[3] ^ dst->addr32[3];
 7148         }
 7149 #endif
 7150 
 7151         switch (proto) {
 7152         case IPPROTO_TCP:
 7153         case IPPROTO_UDP:
 7154                 hash ^= sport ^ dport;
 7155                 break;
 7156         }
 7157 
 7158         return stoeplitz_n32(hash);
 7159 }
 7160 
 7161 int
 7162 pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir,
 7163     struct pfi_kif *kif, struct mbuf *m, u_short *reason)
 7164 {
 7165         memset(pd, 0, sizeof(*pd));
 7166         pd->dir = dir;
 7167         pd->kif = kif;          /* kif is NULL when called by pflog */
 7168         pd->m = m;
 7169         pd->sidx = (dir == PF_IN) ? 0 : 1;
 7170         pd->didx = (dir == PF_IN) ? 1 : 0;
 7171         pd->af = pd->naf = af;
 7172         pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid);
 7173 
 7174         switch (pd->af) {
 7175         case AF_INET: {
 7176                 struct ip       *h;
 7177 
 7178                 /* Check for illegal packets */
 7179                 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) {
 7180                         REASON_SET(reason, PFRES_SHORT);
 7181                         return (PF_DROP);
 7182                 }
 7183 
 7184                 h = mtod(pd->m, struct ip *);
 7185                 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) {
 7186                         REASON_SET(reason, PFRES_SHORT);
 7187                         return (PF_DROP);
 7188                 }
 7189 
 7190                 if (pf_walk_header(pd, h, reason) != PF_PASS)
 7191                         return (PF_DROP);
 7192 
 7193                 pd->src = (struct pf_addr *)&h->ip_src;
 7194                 pd->dst = (struct pf_addr *)&h->ip_dst;
 7195                 pd->tot_len = ntohs(h->ip_len);
 7196                 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK;
 7197                 pd->ttl = h->ip_ttl;
 7198                 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ?
 7199                      PF_VPROTO_FRAGMENT : pd->proto;
 7200 
 7201                 break;
 7202         }
 7203 #ifdef INET6
 7204         case AF_INET6: {
 7205                 struct ip6_hdr  *h;
 7206 
 7207                 /* Check for illegal packets */
 7208                 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) {
 7209                         REASON_SET(reason, PFRES_SHORT);
 7210                         return (PF_DROP);
 7211                 }
 7212 
 7213                 h = mtod(pd->m, struct ip6_hdr *);
 7214                 if (pd->m->m_pkthdr.len <
 7215                     sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) {
 7216                         REASON_SET(reason, PFRES_SHORT);
 7217                         return (PF_DROP);
 7218                 }
 7219 
 7220                 if (pf_walk_header6(pd, h, reason) != PF_PASS)
 7221                         return (PF_DROP);
 7222 
 7223 #if 1
 7224                 /*
 7225                  * we do not support jumbogram yet.  if we keep going, zero
 7226                  * ip6_plen will do something bad, so drop the packet for now.
 7227                  */
 7228                 if (pd->jumbolen != 0) {
 7229                         REASON_SET(reason, PFRES_NORM);
 7230                         return (PF_DROP);
 7231                 }
 7232 #endif  /* 1 */
 7233 
 7234                 pd->src = (struct pf_addr *)&h->ip6_src;
 7235                 pd->dst = (struct pf_addr *)&h->ip6_dst;
 7236                 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
 7237                 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20;
 7238                 pd->ttl = h->ip6_hlim;
 7239                 pd->virtual_proto = (pd->fragoff != 0) ?
 7240                         PF_VPROTO_FRAGMENT : pd->proto;
 7241 
 7242                 break;
 7243         }
 7244 #endif /* INET6 */
 7245         default:
 7246                 panic("pf_setup_pdesc called with illegal af %u", pd->af);
 7247 
 7248         }
 7249 
 7250         pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
 7251         pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
 7252 
 7253         switch (pd->virtual_proto) {
 7254         case IPPROTO_TCP: {
 7255                 struct tcphdr   *th = &pd->hdr.tcp;
 7256 
 7257                 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th),
 7258                     NULL, reason, pd->af))
 7259                         return (PF_DROP);
 7260                 pd->hdrlen = sizeof(*th);
 7261                 if (th->th_dport == 0 ||
 7262                     pd->off + (th->th_off << 2) > pd->tot_len ||
 7263                     (th->th_off << 2) < sizeof(struct tcphdr)) {
 7264                         REASON_SET(reason, PFRES_SHORT);
 7265                         return (PF_DROP);
 7266                 }
 7267                 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2);
 7268                 pd->sport = &th->th_sport;
 7269                 pd->dport = &th->th_dport;
 7270                 pd->pcksum = &th->th_sum;
 7271                 break;
 7272         }
 7273         case IPPROTO_UDP: {
 7274                 struct udphdr   *uh = &pd->hdr.udp;
 7275 
 7276                 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh),
 7277                     NULL, reason, pd->af))
 7278                         return (PF_DROP);
 7279                 pd->hdrlen = sizeof(*uh);
 7280                 if (uh->uh_dport == 0 ||
 7281                     pd->off + ntohs(uh->uh_ulen) > pd->tot_len ||
 7282                     ntohs(uh->uh_ulen) < sizeof(struct udphdr)) {
 7283                         REASON_SET(reason, PFRES_SHORT);
 7284                         return (PF_DROP);
 7285                 }
 7286                 pd->sport = &uh->uh_sport;
 7287                 pd->dport = &uh->uh_dport;
 7288                 pd->pcksum = &uh->uh_sum;
 7289                 break;
 7290         }
 7291         case IPPROTO_ICMP: {
 7292                 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN,
 7293                     NULL, reason, pd->af))
 7294                         return (PF_DROP);
 7295                 pd->hdrlen = ICMP_MINLEN;
 7296                 if (pd->off + pd->hdrlen > pd->tot_len) {
 7297                         REASON_SET(reason, PFRES_SHORT);
 7298                         return (PF_DROP);
 7299                 }
 7300                 pd->pcksum = &pd->hdr.icmp.icmp_cksum;
 7301                 break;
 7302         }
 7303 #ifdef INET6
 7304         case IPPROTO_ICMPV6: {
 7305                 size_t  icmp_hlen = sizeof(struct icmp6_hdr);
 7306 
 7307                 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
 7308                     NULL, reason, pd->af))
 7309                         return (PF_DROP);
 7310                 /* ICMP headers we look further into to match state */
 7311                 switch (pd->hdr.icmp6.icmp6_type) {
 7312                 case MLD_LISTENER_QUERY:
 7313                 case MLD_LISTENER_REPORT:
 7314                         icmp_hlen = sizeof(struct mld_hdr);
 7315                         break;
 7316                 case ND_NEIGHBOR_SOLICIT:
 7317                 case ND_NEIGHBOR_ADVERT:
 7318                         icmp_hlen = sizeof(struct nd_neighbor_solicit);
 7319                         /* FALLTHROUGH */
 7320                 case ND_ROUTER_SOLICIT:
 7321                 case ND_ROUTER_ADVERT:
 7322                 case ND_REDIRECT:
 7323                         if (pd->ttl != 255) {
 7324                                 REASON_SET(reason, PFRES_NORM);
 7325                                 return (PF_DROP);
 7326                         }
 7327                         break;
 7328                 }
 7329                 if (icmp_hlen > sizeof(struct icmp6_hdr) &&
 7330                     !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
 7331                     NULL, reason, pd->af))
 7332                         return (PF_DROP);
 7333                 pd->hdrlen = icmp_hlen;
 7334                 if (pd->off + pd->hdrlen > pd->tot_len) {
 7335                         REASON_SET(reason, PFRES_SHORT);
 7336                         return (PF_DROP);
 7337                 }
 7338                 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum;
 7339                 break;
 7340         }
 7341 #endif  /* INET6 */
 7342         }
 7343 
 7344         if (pd->sport)
 7345                 pd->osport = pd->nsport = *pd->sport;
 7346         if (pd->dport)
 7347                 pd->odport = pd->ndport = *pd->dport;
 7348 
 7349         pd->hash = pf_pkt_hash(pd->af, pd->proto,
 7350             pd->src, pd->dst, pd->osport, pd->odport);
 7351 
 7352         return (PF_PASS);
 7353 }
 7354 
 7355 void
 7356 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *st,
 7357     struct pf_rule *r, struct pf_rule *a)
 7358 {
 7359         int dirndx;
 7360         pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT]
 7361             [action != PF_PASS] += pd->tot_len;
 7362         pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT]
 7363             [action != PF_PASS]++;
 7364 
 7365         if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) {
 7366                 dirndx = (pd->dir == PF_OUT);
 7367                 r->packets[dirndx]++;
 7368                 r->bytes[dirndx] += pd->tot_len;
 7369                 if (a != NULL) {
 7370                         a->packets[dirndx]++;
 7371                         a->bytes[dirndx] += pd->tot_len;
 7372                 }
 7373                 if (st != NULL) {
 7374                         struct pf_rule_item     *ri;
 7375                         struct pf_sn_item       *sni;
 7376 
 7377                         SLIST_FOREACH(sni, &st->src_nodes, next) {
 7378                                 sni->sn->packets[dirndx]++;
 7379                                 sni->sn->bytes[dirndx] += pd->tot_len;
 7380                         }
 7381                         dirndx = (pd->dir == st->direction) ? 0 : 1;
 7382                         st->packets[dirndx]++;
 7383                         st->bytes[dirndx] += pd->tot_len;
 7384 
 7385                         SLIST_FOREACH(ri, &st->match_rules, entry) {
 7386                                 ri->r->packets[dirndx]++;
 7387                                 ri->r->bytes[dirndx] += pd->tot_len;
 7388 
 7389                                 if (ri->r->src.addr.type == PF_ADDR_TABLE)
 7390                                         pfr_update_stats(ri->r->src.addr.p.tbl,
 7391                                             &st->key[(st->direction == PF_IN)]->
 7392                                                 addr[(st->direction == PF_OUT)],
 7393                                             pd, ri->r->action, ri->r->src.neg);
 7394                                 if (ri->r->dst.addr.type == PF_ADDR_TABLE)
 7395                                         pfr_update_stats(ri->r->dst.addr.p.tbl,
 7396                                             &st->key[(st->direction == PF_IN)]->
 7397                                                 addr[(st->direction == PF_IN)],
 7398                                             pd, ri->r->action, ri->r->dst.neg);
 7399                         }
 7400                 }
 7401                 if (r->src.addr.type == PF_ADDR_TABLE)
 7402                         pfr_update_stats(r->src.addr.p.tbl,
 7403                             (st == NULL) ? pd->src :
 7404                             &st->key[(st->direction == PF_IN)]->
 7405                                 addr[(st->direction == PF_OUT)],
 7406                             pd, r->action, r->src.neg);
 7407                 if (r->dst.addr.type == PF_ADDR_TABLE)
 7408                         pfr_update_stats(r->dst.addr.p.tbl,
 7409                             (st == NULL) ? pd->dst :
 7410                             &st->key[(st->direction == PF_IN)]->
 7411                                 addr[(st->direction == PF_IN)],
 7412                             pd, r->action, r->dst.neg);
 7413         }
 7414 }
 7415 
 7416 int
 7417 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
 7418 {
 7419 #if NCARP > 0
 7420         struct ifnet            *ifp0;
 7421 #endif
 7422         struct pfi_kif          *kif;
 7423         u_short                  action, reason = 0;
 7424         struct pf_rule          *a = NULL, *r = &pf_default_rule;
 7425         struct pf_state         *st = NULL;
 7426         struct pf_state_key_cmp  key;
 7427         struct pf_ruleset       *ruleset = NULL;
 7428         struct pf_pdesc          pd;
 7429         int                      dir = (fwdir == PF_FWD) ? PF_OUT : fwdir;
 7430         u_int32_t                qid, pqid = 0;
 7431         int                      have_pf_lock = 0;
 7432         struct pfsync_deferral  *deferral = NULL;
 7433 
 7434         if (!pf_status.running)
 7435                 return (PF_PASS);
 7436 
 7437 #if NCARP > 0
 7438         if (ifp->if_type == IFT_CARP &&
 7439                 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) {
 7440                 kif = (struct pfi_kif *)ifp0->if_pf_kif;
 7441                 if_put(ifp0);
 7442         } else
 7443 #endif /* NCARP */
 7444                 kif = (struct pfi_kif *)ifp->if_pf_kif;
 7445 
 7446         if (kif == NULL) {
 7447                 DPFPRINTF(LOG_ERR,
 7448                     "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname);
 7449                 return (PF_DROP);
 7450         }
 7451         if (kif->pfik_flags & PFI_IFLAG_SKIP)
 7452                 return (PF_PASS);
 7453 
 7454 #ifdef DIAGNOSTIC
 7455         if (((*m0)->m_flags & M_PKTHDR) == 0)
 7456                 panic("non-M_PKTHDR is passed to pf_test");
 7457 #endif /* DIAGNOSTIC */
 7458 
 7459         if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED)
 7460                 return (PF_PASS);
 7461 
 7462         if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) {
 7463                 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_DIVERTED_PACKET;
 7464                 return (PF_PASS);
 7465         }
 7466 
 7467         if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) {
 7468                 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED;
 7469                 return (PF_PASS);
 7470         }
 7471 
 7472         action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason);
 7473         if (action != PF_PASS) {
 7474 #if NPFLOG > 0
 7475                 pd.pflog |= PF_LOG_FORCE;
 7476 #endif  /* NPFLOG > 0 */
 7477                 goto done;
 7478         }
 7479 
 7480         /* packet normalization and reassembly */
 7481         switch (pd.af) {
 7482         case AF_INET:
 7483                 action = pf_normalize_ip(&pd, &reason);
 7484                 break;
 7485 #ifdef INET6
 7486         case AF_INET6:
 7487                 action = pf_normalize_ip6(&pd, &reason);
 7488                 break;
 7489 #endif  /* INET6 */
 7490         }
 7491         *m0 = pd.m;
 7492         /* if packet sits in reassembly queue, return without error */
 7493         if (pd.m == NULL)
 7494                 return PF_PASS;
 7495 
 7496         if (action != PF_PASS) {
 7497 #if NPFLOG > 0
 7498                 pd.pflog |= PF_LOG_FORCE;
 7499 #endif  /* NPFLOG > 0 */
 7500                 goto done;
 7501         }
 7502 
 7503         /* if packet has been reassembled, update packet description */
 7504         if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) {
 7505                 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason);
 7506                 if (action != PF_PASS) {
 7507 #if NPFLOG > 0
 7508                         pd.pflog |= PF_LOG_FORCE;
 7509 #endif  /* NPFLOG > 0 */
 7510                         goto done;
 7511                 }
 7512         }
 7513         pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED;
 7514 
 7515         /*
 7516          * Avoid pcb-lookups from the forwarding path.  They should never
 7517          * match and would cause MP locking problems.
 7518          */
 7519         if (fwdir == PF_FWD) {
 7520                 pd.lookup.done = -1;
 7521                 pd.lookup.uid = -1;
 7522                 pd.lookup.gid = -1;
 7523                 pd.lookup.pid = NO_PID;
 7524         }
 7525 
 7526         switch (pd.virtual_proto) {
 7527 
 7528         case PF_VPROTO_FRAGMENT: {
 7529                 /*
 7530                  * handle fragments that aren't reassembled by
 7531                  * normalization
 7532                  */
 7533                 PF_LOCK();
 7534                 have_pf_lock = 1;
 7535                 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, &reason,
 7536                     &deferral);
 7537                 st = pf_state_ref(st);
 7538                 if (action != PF_PASS)
 7539                         REASON_SET(&reason, PFRES_FRAG);
 7540                 break;
 7541         }
 7542 
 7543         case IPPROTO_ICMP: {
 7544                 if (pd.af != AF_INET) {
 7545                         action = PF_DROP;
 7546                         REASON_SET(&reason, PFRES_NORM);
 7547                         DPFPRINTF(LOG_NOTICE,
 7548                             "dropping IPv6 packet with ICMPv4 payload");
 7549                         break;
 7550                 }
 7551                 PF_STATE_ENTER_READ();
 7552                 action = pf_test_state_icmp(&pd, &st, &reason);
 7553                 st = pf_state_ref(st);
 7554                 PF_STATE_EXIT_READ();
 7555                 if (action == PF_PASS || action == PF_AFRT) {
 7556 #if NPFSYNC > 0
 7557                         pfsync_update_state(st);
 7558 #endif /* NPFSYNC > 0 */
 7559                         r = st->rule.ptr;
 7560                         a = st->anchor.ptr;
 7561 #if NPFLOG > 0
 7562                         pd.pflog |= st->log;
 7563 #endif  /* NPFLOG > 0 */
 7564                 } else if (st == NULL) {
 7565                         PF_LOCK();
 7566                         have_pf_lock = 1;
 7567                         action = pf_test_rule(&pd, &r, &st, &a, &ruleset,
 7568                             &reason, &deferral);
 7569                         st = pf_state_ref(st);
 7570                 }
 7571                 break;
 7572         }
 7573 
 7574 #ifdef INET6
 7575         case IPPROTO_ICMPV6: {
 7576                 if (pd.af != AF_INET6) {
 7577                         action = PF_DROP;
 7578                         REASON_SET(&reason, PFRES_NORM);
 7579                         DPFPRINTF(LOG_NOTICE,
 7580                             "dropping IPv4 packet with ICMPv6 payload");
 7581                         break;
 7582                 }
 7583                 PF_STATE_ENTER_READ();
 7584                 action = pf_test_state_icmp(&pd, &st, &reason);
 7585                 st = pf_state_ref(st);
 7586                 PF_STATE_EXIT_READ();
 7587                 if (action == PF_PASS || action == PF_AFRT) {
 7588 #if NPFSYNC > 0
 7589                         pfsync_update_state(st);
 7590 #endif /* NPFSYNC > 0 */
 7591                         r = st->rule.ptr;
 7592                         a = st->anchor.ptr;
 7593 #if NPFLOG > 0
 7594                         pd.pflog |= st->log;
 7595 #endif  /* NPFLOG > 0 */
 7596                 } else if (st == NULL) {
 7597                         PF_LOCK();
 7598                         have_pf_lock = 1;
 7599                         action = pf_test_rule(&pd, &r, &st, &a, &ruleset,
 7600                             &reason, &deferral);
 7601                         st = pf_state_ref(st);
 7602                 }
 7603                 break;
 7604         }
 7605 #endif /* INET6 */
 7606 
 7607         default:
 7608                 if (pd.virtual_proto == IPPROTO_TCP) {
 7609                         if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags &
 7610                             (TH_SYN|TH_ACK)) == TH_SYN &&
 7611                             pf_synflood_check(&pd)) {
 7612                                 PF_LOCK();
 7613                                 have_pf_lock = 1;
 7614                                 pf_syncookie_send(&pd);
 7615                                 action = PF_DROP;
 7616                                 break;
 7617                         }
 7618                         if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0)
 7619                                 pqid = 1;
 7620                         action = pf_normalize_tcp(&pd);
 7621                         if (action == PF_DROP)
 7622                                 break;
 7623                 }
 7624 
 7625                 key.af = pd.af;
 7626                 key.proto = pd.virtual_proto;
 7627                 key.rdomain = pd.rdomain;
 7628                 pf_addrcpy(&key.addr[pd.sidx], pd.src, key.af);
 7629                 pf_addrcpy(&key.addr[pd.didx], pd.dst, key.af);
 7630                 key.port[pd.sidx] = pd.osport;
 7631                 key.port[pd.didx] = pd.odport;
 7632                 key.hash = pd.hash;
 7633 
 7634                 PF_STATE_ENTER_READ();
 7635                 action = pf_find_state(&pd, &key, &st);
 7636                 st = pf_state_ref(st);
 7637                 PF_STATE_EXIT_READ();
 7638 
 7639                 /* check for syncookies if tcp ack and no active state */
 7640                 if (pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP &&
 7641                     (st == NULL || (st->src.state >= TCPS_FIN_WAIT_2 &&
 7642                     st->dst.state >= TCPS_FIN_WAIT_2)) &&
 7643                     (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK &&
 7644                     pf_syncookie_validate(&pd)) {
 7645                         struct mbuf     *msyn = pf_syncookie_recreate_syn(&pd);
 7646                         if (msyn) {
 7647                                 action = pf_test(af, fwdir, ifp, &msyn);
 7648                                 m_freem(msyn);
 7649                                 if (action == PF_PASS || action == PF_AFRT) {
 7650                                         PF_STATE_ENTER_READ();
 7651                                         pf_state_unref(st);
 7652                                         action = pf_find_state(&pd, &key, &st);
 7653                                         st = pf_state_ref(st);
 7654                                         PF_STATE_EXIT_READ();
 7655                                         if (st == NULL)
 7656                                                 return (PF_DROP);
 7657                                         st->src.seqhi = st->dst.seqhi =
 7658                                             ntohl(pd.hdr.tcp.th_ack) - 1;
 7659                                         st->src.seqlo =
 7660                                             ntohl(pd.hdr.tcp.th_seq) - 1;
 7661                                         pf_set_protostate(st, PF_PEER_SRC,
 7662                                             PF_TCPS_PROXY_DST);
 7663                                 }
 7664                         } else
 7665                                 action = PF_DROP;
 7666                 }
 7667 
 7668                 if (action == PF_MATCH)
 7669                         action = pf_test_state(&pd, &st, &reason);
 7670 
 7671                 if (action == PF_PASS || action == PF_AFRT) {
 7672 #if NPFSYNC > 0
 7673                         pfsync_update_state(st);
 7674 #endif /* NPFSYNC > 0 */
 7675                         r = st->rule.ptr;
 7676                         a = st->anchor.ptr;
 7677 #if NPFLOG > 0
 7678                         pd.pflog |= st->log;
 7679 #endif  /* NPFLOG > 0 */
 7680                 } else if (st == NULL) {
 7681                         PF_LOCK();
 7682                         have_pf_lock = 1;
 7683                         action = pf_test_rule(&pd, &r, &st, &a, &ruleset,
 7684                             &reason, &deferral);
 7685                         st = pf_state_ref(st);
 7686                 }
 7687 
 7688                 if (pd.virtual_proto == IPPROTO_TCP) {
 7689                         if (st) {
 7690                                 if (st->max_mss)
 7691                                         pf_normalize_mss(&pd, st->max_mss);
 7692                         } else if (r->max_mss)
 7693                                 pf_normalize_mss(&pd, r->max_mss);
 7694                 }
 7695 
 7696                 break;
 7697         }
 7698 
 7699         if (have_pf_lock != 0)
 7700                 PF_UNLOCK();
 7701 
 7702         /*
 7703          * At the moment, we rely on NET_LOCK() to prevent removal of items
 7704          * we've collected above ('r', 'anchor' and 'ruleset').  They'll have
 7705          * to be refcounted when NET_LOCK() is gone.
 7706          */
 7707 
 7708 done:
 7709         if (action != PF_DROP) {
 7710                 if (st) {
 7711                         /* The non-state case is handled in pf_test_rule() */
 7712                         if (action == PF_PASS && pd.badopts != 0 &&
 7713                             !(st->state_flags & PFSTATE_ALLOWOPTS)) {
 7714                                 action = PF_DROP;
 7715                                 REASON_SET(&reason, PFRES_IPOPTIONS);
 7716 #if NPFLOG > 0
 7717                                 pd.pflog |= PF_LOG_FORCE;
 7718 #endif  /* NPFLOG > 0 */
 7719                                 DPFPRINTF(LOG_NOTICE, "dropping packet with "
 7720                                     "ip/ipv6 options in pf_test()");
 7721                         }
 7722 
 7723                         pf_scrub(pd.m, st->state_flags, pd.af, st->min_ttl,
 7724                             st->set_tos);
 7725                         pf_tag_packet(pd.m, st->tag, st->rtableid[pd.didx]);
 7726                         if (pqid || (pd.tos & IPTOS_LOWDELAY)) {
 7727                                 qid = st->pqid;
 7728                                 if (st->state_flags & PFSTATE_SETPRIO) {
 7729                                         pd.m->m_pkthdr.pf.prio =
 7730                                             st->set_prio[1];
 7731                                 }
 7732                         } else {
 7733                                 qid = st->qid;
 7734                                 if (st->state_flags & PFSTATE_SETPRIO) {
 7735                                         pd.m->m_pkthdr.pf.prio =
 7736                                             st->set_prio[0];
 7737                                 }
 7738                         }
 7739                         pd.m->m_pkthdr.pf.delay = st->delay;
 7740                 } else {
 7741                         pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl,
 7742                             r->set_tos);
 7743                         if (pqid || (pd.tos & IPTOS_LOWDELAY)) {
 7744                                 qid = r->pqid;
 7745                                 if (r->scrub_flags & PFSTATE_SETPRIO)
 7746                                         pd.m->m_pkthdr.pf.prio = r->set_prio[1];
 7747                         } else {
 7748                                 qid = r->qid;
 7749                                 if (r->scrub_flags & PFSTATE_SETPRIO)
 7750                                         pd.m->m_pkthdr.pf.prio = r->set_prio[0];
 7751                         }
 7752                         pd.m->m_pkthdr.pf.delay = r->delay;
 7753                 }
 7754         }
 7755 
 7756         if (action == PF_PASS && qid)
 7757                 pd.m->m_pkthdr.pf.qid = qid;
 7758         if (pd.dir == PF_IN && st && st->key[PF_SK_STACK])
 7759                 pf_mbuf_link_state_key(pd.m, st->key[PF_SK_STACK]);
 7760         if (pd.dir == PF_OUT &&
 7761             pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk &&
 7762             st && st->key[PF_SK_STACK] && !st->key[PF_SK_STACK]->sk_inp)
 7763                 pf_state_key_link_inpcb(st->key[PF_SK_STACK],
 7764                     pd.m->m_pkthdr.pf.inp);
 7765 
 7766         if (st != NULL && !ISSET(pd.m->m_pkthdr.csum_flags, M_FLOWID)) {
 7767                 pd.m->m_pkthdr.ph_flowid = st->key[PF_SK_WIRE]->hash;
 7768                 SET(pd.m->m_pkthdr.csum_flags, M_FLOWID);
 7769         }
 7770 
 7771         /*
 7772          * connections redirected to loopback should not match sockets
 7773          * bound specifically to loopback due to security implications,
 7774          * see in_pcblookup_listen().
 7775          */
 7776         if (pd.destchg)
 7777                 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >>
 7778                     IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) ||
 7779                     (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)))
 7780                         pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
 7781         /* We need to redo the route lookup on outgoing routes. */
 7782         if (pd.destchg && pd.dir == PF_OUT)
 7783                 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE;
 7784 
 7785         if (pd.dir == PF_IN && action == PF_PASS &&
 7786             (r->divert.type == PF_DIVERT_TO ||
 7787             r->divert.type == PF_DIVERT_REPLY)) {
 7788                 struct pf_divert *divert;
 7789 
 7790                 if ((divert = pf_get_divert(pd.m))) {
 7791                         pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
 7792                         divert->addr = r->divert.addr;
 7793                         divert->port = r->divert.port;
 7794                         divert->rdomain = pd.rdomain;
 7795                         divert->type = r->divert.type;
 7796                 }
 7797         }
 7798 
 7799         if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET)
 7800                 action = PF_DIVERT;
 7801 
 7802 #if NPFLOG > 0
 7803         if (pd.pflog) {
 7804                 struct pf_rule_item     *ri;
 7805 
 7806                 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL)
 7807                         pflog_packet(&pd, reason, r, a, ruleset, NULL);
 7808                 if (st) {
 7809                         SLIST_FOREACH(ri, &st->match_rules, entry)
 7810                                 if (ri->r->log & PF_LOG_ALL)
 7811                                         pflog_packet(&pd, reason, ri->r, a,
 7812                                             ruleset, NULL);
 7813                 }
 7814         }
 7815 #endif  /* NPFLOG > 0 */
 7816 
 7817         pf_counters_inc(action, &pd, st, r, a);
 7818 
 7819         switch (action) {
 7820         case PF_SYNPROXY_DROP:
 7821                 m_freem(pd.m);
 7822                 /* FALLTHROUGH */
 7823         case PF_DEFER:
 7824 #if NPFSYNC > 0
 7825                 /*
 7826                  * We no longer hold PF_LOCK() here, so we can dispatch
 7827                  * deferral if we are asked to do so.
 7828                  */
 7829                 if (deferral != NULL)
 7830                         pfsync_undefer(deferral, 0);
 7831 #endif  /* NPFSYNC > 0 */
 7832                 pd.m = NULL;
 7833                 action = PF_PASS;
 7834                 break;
 7835         case PF_DIVERT:
 7836                 switch (pd.af) {
 7837                 case AF_INET:
 7838                         divert_packet(pd.m, pd.dir, r->divert.port);
 7839                         pd.m = NULL;
 7840                         break;
 7841 #ifdef INET6
 7842                 case AF_INET6:
 7843                         divert6_packet(pd.m, pd.dir, r->divert.port);
 7844                         pd.m = NULL;
 7845                         break;
 7846 #endif /* INET6 */
 7847                 }
 7848                 action = PF_PASS;
 7849                 break;
 7850 #ifdef INET6
 7851         case PF_AFRT:
 7852                 if (pf_translate_af(&pd)) {
 7853                         action = PF_DROP;
 7854                         break;
 7855                 }
 7856                 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
 7857                 switch (pd.naf) {
 7858                 case AF_INET:
 7859                         if (pd.dir == PF_IN) {
 7860                                 if (ipforwarding == 0) {
 7861                                         ipstat_inc(ips_cantforward);
 7862                                         action = PF_DROP;
 7863                                         break;
 7864                                 }
 7865                                 ip_forward(pd.m, ifp, NULL, 1);
 7866                         } else
 7867                                 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0);
 7868                         break;
 7869                 case AF_INET6:
 7870                         if (pd.dir == PF_IN) {
 7871                                 if (ip6_forwarding == 0) {
 7872                                         ip6stat_inc(ip6s_cantforward);
 7873                                         action = PF_DROP;
 7874                                         break;
 7875                                 }
 7876                                 ip6_forward(pd.m, NULL, 1);
 7877                         } else
 7878                                 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL);
 7879                         break;
 7880                 }
 7881                 if (action != PF_DROP) {
 7882                         pd.m = NULL;
 7883                         action = PF_PASS;
 7884                 }
 7885                 break;
 7886 #endif /* INET6 */
 7887         case PF_DROP:
 7888                 m_freem(pd.m);
 7889                 pd.m = NULL;
 7890                 break;
 7891         default:
 7892                 if (st && st->rt) {
 7893                         switch (pd.af) {
 7894                         case AF_INET:
 7895                                 pf_route(&pd, st);
 7896                                 break;
 7897 #ifdef INET6
 7898                         case AF_INET6:
 7899                                 pf_route6(&pd, st);
 7900                                 break;
 7901 #endif /* INET6 */
 7902                         }
 7903                 }
 7904                 break;
 7905         }
 7906 
 7907 #ifdef INET6
 7908         /* if reassembled packet passed, create new fragments */
 7909         if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD &&
 7910             pd.af == AF_INET6) {
 7911                 struct m_tag    *mtag;
 7912 
 7913                 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL)))
 7914                         action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL);
 7915         }
 7916 #endif  /* INET6 */
 7917         if (st && action != PF_DROP) {
 7918                 if (!st->if_index_in && dir == PF_IN)
 7919                         st->if_index_in = ifp->if_index;
 7920                 else if (!st->if_index_out && dir == PF_OUT)
 7921                         st->if_index_out = ifp->if_index;
 7922         }
 7923 
 7924         *m0 = pd.m;
 7925 
 7926         pf_state_unref(st);
 7927 
 7928         return (action);
 7929 }
 7930 
 7931 int
 7932 pf_ouraddr(struct mbuf *m)
 7933 {
 7934         struct pf_state_key     *sk;
 7935 
 7936         if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED)
 7937                 return (1);
 7938 
 7939         sk = m->m_pkthdr.pf.statekey;
 7940         if (sk != NULL) {
 7941                 if (sk->sk_inp != NULL)
 7942                         return (1);
 7943         }
 7944 
 7945         return (-1);
 7946 }
 7947 
 7948 /*
 7949  * must be called whenever any addressing information such as
 7950  * address, port, protocol has changed
 7951  */
 7952 void
 7953 pf_pkt_addr_changed(struct mbuf *m)
 7954 {
 7955         pf_mbuf_unlink_state_key(m);
 7956         pf_mbuf_unlink_inpcb(m);
 7957 }
 7958 
 7959 struct inpcb *
 7960 pf_inp_lookup(struct mbuf *m)
 7961 {
 7962         struct inpcb *inp = NULL;
 7963         struct pf_state_key *sk = m->m_pkthdr.pf.statekey;
 7964 
 7965         if (!pf_state_key_isvalid(sk))
 7966                 pf_mbuf_unlink_state_key(m);
 7967         else
 7968                 inp = m->m_pkthdr.pf.statekey->sk_inp;
 7969 
 7970         if (inp && inp->inp_pf_sk)
 7971                 KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk);
 7972 
 7973         in_pcbref(inp);
 7974         return (inp);
 7975 }
 7976 
 7977 void
 7978 pf_inp_link(struct mbuf *m, struct inpcb *inp)
 7979 {
 7980         struct pf_state_key *sk = m->m_pkthdr.pf.statekey;
 7981 
 7982         if (!pf_state_key_isvalid(sk)) {
 7983                 pf_mbuf_unlink_state_key(m);
 7984                 return;
 7985         }
 7986 
 7987         /*
 7988          * we don't need to grab PF-lock here. At worst case we link inp to
 7989          * state, which might be just being marked as deleted by another
 7990          * thread.
 7991          */
 7992         if (inp && !sk->sk_inp && !inp->inp_pf_sk)
 7993                 pf_state_key_link_inpcb(sk, inp);
 7994 
 7995         /* The statekey has finished finding the inp, it is no longer needed. */
 7996         pf_mbuf_unlink_state_key(m);
 7997 }
 7998 
 7999 void
 8000 pf_inp_unlink(struct inpcb *inp)
 8001 {
 8002         pf_inpcb_unlink_state_key(inp);
 8003 }
 8004 
 8005 void
 8006 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev)
 8007 {
 8008         struct pf_state_key *old_reverse;
 8009 
 8010         old_reverse = atomic_cas_ptr(&sk->sk_reverse, NULL, skrev);
 8011         if (old_reverse != NULL)
 8012                 KASSERT(old_reverse == skrev);
 8013         else {
 8014                 pf_state_key_ref(skrev);
 8015 
 8016                 /*
 8017                  * NOTE: if sk == skrev, then KASSERT() below holds true, we
 8018                  * still want to grab a reference in such case, because
 8019                  * pf_state_key_unlink_reverse() does not check whether keys
 8020                  * are identical or not.
 8021                  */
 8022                 old_reverse = atomic_cas_ptr(&skrev->sk_reverse, NULL, sk);
 8023                 if (old_reverse != NULL)
 8024                         KASSERT(old_reverse == sk);
 8025 
 8026                 pf_state_key_ref(sk);
 8027         }
 8028 }
 8029 
 8030 #if NPFLOG > 0
 8031 void
 8032 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am,
 8033     struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules)
 8034 {
 8035         struct pf_rule_item     *ri;
 8036 
 8037         /* if this is the log(matches) rule, packet has been logged already */
 8038         if (rm->log & PF_LOG_MATCHES)
 8039                 return;
 8040 
 8041         SLIST_FOREACH(ri, matchrules, entry)
 8042                 if (ri->r->log & PF_LOG_MATCHES)
 8043                         pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r);
 8044 }
 8045 #endif  /* NPFLOG > 0 */
 8046 
 8047 struct pf_state_key *
 8048 pf_state_key_ref(struct pf_state_key *sk)
 8049 {
 8050         if (sk != NULL)
 8051                 PF_REF_TAKE(sk->sk_refcnt);
 8052 
 8053         return (sk);
 8054 }
 8055 
 8056 void
 8057 pf_state_key_unref(struct pf_state_key *sk)
 8058 {
 8059         if (PF_REF_RELE(sk->sk_refcnt)) {
 8060                 /* state key must be removed from tree */
 8061                 KASSERT(!pf_state_key_isvalid(sk));
 8062                 /* state key must be unlinked from reverse key */
 8063                 KASSERT(sk->sk_reverse == NULL);
 8064                 /* state key must be unlinked from socket */
 8065                 KASSERT(sk->sk_inp == NULL);
 8066                 pool_put(&pf_state_key_pl, sk);
 8067         }
 8068 }
 8069 
 8070 int
 8071 pf_state_key_isvalid(struct pf_state_key *sk)
 8072 {
 8073         return ((sk != NULL) && (sk->sk_removed == 0));
 8074 }
 8075 
 8076 void
 8077 pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk)
 8078 {
 8079         KASSERT(m->m_pkthdr.pf.statekey == NULL);
 8080         m->m_pkthdr.pf.statekey = pf_state_key_ref(sk);
 8081 }
 8082 
 8083 void
 8084 pf_mbuf_unlink_state_key(struct mbuf *m)
 8085 {
 8086         struct pf_state_key *sk = m->m_pkthdr.pf.statekey;
 8087 
 8088         if (sk != NULL) {
 8089                 m->m_pkthdr.pf.statekey = NULL;
 8090                 pf_state_key_unref(sk);
 8091         }
 8092 }
 8093 
 8094 void
 8095 pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp)
 8096 {
 8097         KASSERT(m->m_pkthdr.pf.inp == NULL);
 8098         m->m_pkthdr.pf.inp = in_pcbref(inp);
 8099 }
 8100 
 8101 void
 8102 pf_mbuf_unlink_inpcb(struct mbuf *m)
 8103 {
 8104         struct inpcb *inp = m->m_pkthdr.pf.inp;
 8105 
 8106         if (inp != NULL) {
 8107                 m->m_pkthdr.pf.inp = NULL;
 8108                 in_pcbunref(inp);
 8109         }
 8110 }
 8111 
 8112 void
 8113 pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp)
 8114 {
 8115         KASSERT(sk->sk_inp == NULL);
 8116         sk->sk_inp = in_pcbref(inp);
 8117         KASSERT(inp->inp_pf_sk == NULL);
 8118         inp->inp_pf_sk = pf_state_key_ref(sk);
 8119 }
 8120 
 8121 void
 8122 pf_inpcb_unlink_state_key(struct inpcb *inp)
 8123 {
 8124         struct pf_state_key *sk = inp->inp_pf_sk;
 8125 
 8126         if (sk != NULL) {
 8127                 KASSERT(sk->sk_inp == inp);
 8128                 sk->sk_inp = NULL;
 8129                 inp->inp_pf_sk = NULL;
 8130                 pf_state_key_unref(sk);
 8131                 in_pcbunref(inp);
 8132         }
 8133 }
 8134 
 8135 void
 8136 pf_state_key_unlink_inpcb(struct pf_state_key *sk)
 8137 {
 8138         struct inpcb *inp = sk->sk_inp;
 8139 
 8140         if (inp != NULL) {
 8141                 KASSERT(inp->inp_pf_sk == sk);
 8142                 sk->sk_inp = NULL;
 8143                 inp->inp_pf_sk = NULL;
 8144                 pf_state_key_unref(sk);
 8145                 in_pcbunref(inp);
 8146         }
 8147 }
 8148 
 8149 void
 8150 pf_state_key_unlink_reverse(struct pf_state_key *sk)
 8151 {
 8152         struct pf_state_key *skrev = sk->sk_reverse;
 8153 
 8154         /* Note that sk and skrev may be equal, then we unref twice. */
 8155         if (skrev != NULL) {
 8156                 KASSERT(skrev->sk_reverse == sk);
 8157                 sk->sk_reverse = NULL;
 8158                 skrev->sk_reverse = NULL;
 8159                 pf_state_key_unref(skrev);
 8160                 pf_state_key_unref(sk);
 8161         }
 8162 }
 8163 
 8164 struct pf_state *
 8165 pf_state_ref(struct pf_state *st)
 8166 {
 8167         if (st != NULL)
 8168                 PF_REF_TAKE(st->refcnt);
 8169         return (st);
 8170 }
 8171 
 8172 void
 8173 pf_state_unref(struct pf_state *st)
 8174 {
 8175         if ((st != NULL) && PF_REF_RELE(st->refcnt)) {
 8176                 /* never inserted or removed */
 8177 #if NPFSYNC > 0
 8178                 KASSERT((TAILQ_NEXT(st, sync_list) == NULL) ||
 8179                     ((TAILQ_NEXT(st, sync_list) == _Q_INVALID) &&
 8180                     (st->sync_state == PFSYNC_S_NONE)));
 8181 #endif  /* NPFSYNC */
 8182                 KASSERT((TAILQ_NEXT(st, entry_list) == NULL) ||
 8183                     (TAILQ_NEXT(st, entry_list) == _Q_INVALID));
 8184 
 8185                 pf_state_key_unref(st->key[PF_SK_WIRE]);
 8186                 pf_state_key_unref(st->key[PF_SK_STACK]);
 8187 
 8188                 pool_put(&pf_state_pl, st);
 8189         }
 8190 }
 8191 
 8192 int
 8193 pf_delay_pkt(struct mbuf *m, u_int ifidx)
 8194 {
 8195         struct pf_pktdelay      *pdy;
 8196 
 8197         if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) {
 8198                 m_freem(m);
 8199                 return (ENOBUFS);
 8200         }
 8201         pdy->ifidx = ifidx;
 8202         pdy->m = m;
 8203         timeout_set(&pdy->to, pf_pktenqueue_delayed, pdy);
 8204         timeout_add_msec(&pdy->to, m->m_pkthdr.pf.delay);
 8205         m->m_pkthdr.pf.delay = 0;
 8206         return (0);
 8207 }
 8208 
 8209 void
 8210 pf_pktenqueue_delayed(void *arg)
 8211 {
 8212         struct pf_pktdelay      *pdy = arg;
 8213         struct ifnet            *ifp;
 8214 
 8215         ifp = if_get(pdy->ifidx);
 8216         if (ifp != NULL) {
 8217                 if_enqueue(ifp, pdy->m);
 8218                 if_put(ifp);
 8219         } else
 8220                 m_freem(pdy->m);
 8221 
 8222         pool_put(&pf_pktdelay_pl, pdy);
 8223 }

Cache object: 828c0a2ba69593011ca8315d0d2ba4f8


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.