The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/if_pfsync.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: if_pfsync.c,v 1.312 2023/01/04 10:31:55 dlg Exp $     */
    2 
    3 /*
    4  * Copyright (c) 2002 Michael Shalayeff
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
   20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
   24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
   25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
   26  * THE POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
   31  *
   32  * Permission to use, copy, modify, and distribute this software for any
   33  * purpose with or without fee is hereby granted, provided that the above
   34  * copyright notice and this permission notice appear in all copies.
   35  *
   36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
   37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   43  */
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/time.h>
   48 #include <sys/malloc.h>
   49 #include <sys/mbuf.h>
   50 #include <sys/socket.h>
   51 #include <sys/ioctl.h>
   52 #include <sys/timeout.h>
   53 #include <sys/kernel.h>
   54 #include <sys/sysctl.h>
   55 #include <sys/pool.h>
   56 #include <sys/syslog.h>
   57 
   58 #include <net/if.h>
   59 #include <net/if_types.h>
   60 #include <net/bpf.h>
   61 #include <net/netisr.h>
   62 
   63 #include <netinet/in.h>
   64 #include <netinet/if_ether.h>
   65 #include <netinet/ip.h>
   66 #include <netinet/in_var.h>
   67 #include <netinet/ip_var.h>
   68 #include <netinet/ip_ipsp.h>
   69 #include <netinet/ip_icmp.h>
   70 #include <netinet/icmp6.h>
   71 #include <netinet/tcp.h>
   72 #include <netinet/tcp_seq.h>
   73 #include <netinet/tcp_fsm.h>
   74 #include <netinet/udp.h>
   75 
   76 #ifdef INET6
   77 #include <netinet6/in6_var.h>
   78 #include <netinet/ip6.h>
   79 #include <netinet6/ip6_var.h>
   80 #include <netinet6/nd6.h>
   81 #endif /* INET6 */
   82 
   83 #include "carp.h"
   84 #if NCARP > 0
   85 #include <netinet/ip_carp.h>
   86 #endif
   87 
   88 #define PF_DEBUGNAME    "pfsync: "
   89 #include <net/pfvar.h>
   90 #include <net/pfvar_priv.h>
   91 #include <net/if_pfsync.h>
   92 
   93 #include "bpfilter.h"
   94 #include "pfsync.h"
   95 
   96 #define PFSYNC_DEFER_NSEC 20000000ULL
   97 
   98 #define PFSYNC_MINPKT ( \
   99         sizeof(struct ip) + \
  100         sizeof(struct pfsync_header))
  101 
  102 int     pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
  103             struct pfsync_state_peer *);
  104 
  105 int     pfsync_in_clr(caddr_t, int, int, int);
  106 int     pfsync_in_iack(caddr_t, int, int, int);
  107 int     pfsync_in_upd_c(caddr_t, int, int, int);
  108 int     pfsync_in_ureq(caddr_t, int, int, int);
  109 int     pfsync_in_del(caddr_t, int, int, int);
  110 int     pfsync_in_del_c(caddr_t, int, int, int);
  111 int     pfsync_in_bus(caddr_t, int, int, int);
  112 int     pfsync_in_tdb(caddr_t, int, int, int);
  113 int     pfsync_in_ins(caddr_t, int, int, int);
  114 int     pfsync_in_upd(caddr_t, int, int, int);
  115 int     pfsync_in_eof(caddr_t, int, int, int);
  116 
  117 int     pfsync_in_error(caddr_t, int, int, int);
  118 
  119 void    pfsync_update_state_locked(struct pf_state *);
  120 
  121 const struct {
  122         int     (*in)(caddr_t, int, int, int);
  123         size_t  len;
  124 } pfsync_acts[] = {
  125         /* PFSYNC_ACT_CLR */
  126         { pfsync_in_clr,        sizeof(struct pfsync_clr) },
  127          /* PFSYNC_ACT_OINS */
  128         { pfsync_in_error,      0 },
  129         /* PFSYNC_ACT_INS_ACK */
  130         { pfsync_in_iack,       sizeof(struct pfsync_ins_ack) },
  131         /* PFSYNC_ACT_OUPD */
  132         { pfsync_in_error,      0 },
  133         /* PFSYNC_ACT_UPD_C */
  134         { pfsync_in_upd_c,      sizeof(struct pfsync_upd_c) },
  135         /* PFSYNC_ACT_UPD_REQ */
  136         { pfsync_in_ureq,       sizeof(struct pfsync_upd_req) },
  137         /* PFSYNC_ACT_DEL */
  138         { pfsync_in_del,        sizeof(struct pfsync_state) },
  139         /* PFSYNC_ACT_DEL_C */
  140         { pfsync_in_del_c,      sizeof(struct pfsync_del_c) },
  141         /* PFSYNC_ACT_INS_F */
  142         { pfsync_in_error,      0 },
  143         /* PFSYNC_ACT_DEL_F */
  144         { pfsync_in_error,      0 },
  145         /* PFSYNC_ACT_BUS */
  146         { pfsync_in_bus,        sizeof(struct pfsync_bus) },
  147         /* PFSYNC_ACT_OTDB */
  148         { pfsync_in_error,      0 },
  149         /* PFSYNC_ACT_EOF */
  150         { pfsync_in_error,      0 },
  151         /* PFSYNC_ACT_INS */
  152         { pfsync_in_ins,        sizeof(struct pfsync_state) },
  153         /* PFSYNC_ACT_UPD */
  154         { pfsync_in_upd,        sizeof(struct pfsync_state) },
  155         /* PFSYNC_ACT_TDB */
  156         { pfsync_in_tdb,        sizeof(struct pfsync_tdb) },
  157 };
  158 
  159 struct pfsync_q {
  160         void            (*write)(struct pf_state *, void *);
  161         size_t          len;
  162         u_int8_t        action;
  163 };
  164 
  165 /* we have one of these for every PFSYNC_S_ */
  166 void    pfsync_out_state(struct pf_state *, void *);
  167 void    pfsync_out_iack(struct pf_state *, void *);
  168 void    pfsync_out_upd_c(struct pf_state *, void *);
  169 void    pfsync_out_del(struct pf_state *, void *);
  170 
  171 struct pfsync_q pfsync_qs[] = {
  172         { pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
  173         { pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
  174         { pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C },
  175         { pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
  176         { pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD }
  177 };
  178 
  179 void    pfsync_q_ins(struct pf_state *, int);
  180 void    pfsync_q_del(struct pf_state *);
  181 
  182 struct pfsync_upd_req_item {
  183         TAILQ_ENTRY(pfsync_upd_req_item)        ur_entry;
  184         TAILQ_ENTRY(pfsync_upd_req_item)        ur_snap;
  185         struct pfsync_upd_req                   ur_msg;
  186 };
  187 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
  188 
  189 struct pfsync_deferral {
  190         TAILQ_ENTRY(pfsync_deferral)             pd_entry;
  191         struct pf_state                         *pd_st;
  192         struct mbuf                             *pd_m;
  193         uint64_t                                 pd_deadline;
  194 };
  195 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
  196 
  197 #define PFSYNC_PLSIZE   MAX(sizeof(struct pfsync_upd_req_item), \
  198                             sizeof(struct pfsync_deferral))
  199 
  200 void    pfsync_out_tdb(struct tdb *, void *);
  201 
  202 struct pfsync_softc {
  203         struct ifnet             sc_if;
  204         unsigned int             sc_sync_ifidx;
  205 
  206         struct pool              sc_pool;
  207 
  208         struct ip_moptions       sc_imo;
  209 
  210         struct in_addr           sc_sync_peer;
  211         u_int8_t                 sc_maxupdates;
  212 
  213         struct ip                sc_template;
  214 
  215         struct pf_state_queue    sc_qs[PFSYNC_S_COUNT];
  216         struct mutex             sc_st_mtx;
  217         size_t                   sc_len;
  218 
  219         struct pfsync_upd_reqs   sc_upd_req_list;
  220         struct mutex             sc_upd_req_mtx;
  221 
  222         int                      sc_initial_bulk;
  223         int                      sc_link_demoted;
  224 
  225         int                      sc_defer;
  226         struct pfsync_deferrals  sc_deferrals;
  227         u_int                    sc_deferred;
  228         struct mutex             sc_deferrals_mtx;
  229         struct timeout           sc_deferrals_tmo;
  230 
  231         void                    *sc_plus;
  232         size_t                   sc_pluslen;
  233 
  234         u_int32_t                sc_ureq_sent;
  235         int                      sc_bulk_tries;
  236         struct timeout           sc_bulkfail_tmo;
  237 
  238         u_int32_t                sc_ureq_received;
  239         struct pf_state         *sc_bulk_next;
  240         struct pf_state         *sc_bulk_last;
  241         struct timeout           sc_bulk_tmo;
  242 
  243         TAILQ_HEAD(, tdb)        sc_tdb_q;
  244         struct mutex             sc_tdb_mtx;
  245 
  246         struct task              sc_ltask;
  247         struct task              sc_dtask;
  248 
  249         struct timeout           sc_tmo;
  250 };
  251 
  252 struct pfsync_snapshot {
  253         struct pfsync_softc     *sn_sc;
  254         struct pf_state_queue    sn_qs[PFSYNC_S_COUNT];
  255         struct pfsync_upd_reqs   sn_upd_req_list;
  256         TAILQ_HEAD(, tdb)        sn_tdb_q;
  257         size_t                   sn_len;
  258         void                    *sn_plus;
  259         size_t                   sn_pluslen;
  260 };
  261 
  262 struct pfsync_softc     *pfsyncif = NULL;
  263 struct cpumem           *pfsynccounters;
  264 
  265 void    pfsyncattach(int);
  266 int     pfsync_clone_create(struct if_clone *, int);
  267 int     pfsync_clone_destroy(struct ifnet *);
  268 void    pfsync_update_net_tdb(struct pfsync_tdb *);
  269 int     pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
  270             struct rtentry *);
  271 int     pfsyncioctl(struct ifnet *, u_long, caddr_t);
  272 void    pfsyncstart(struct ifqueue *);
  273 void    pfsync_syncdev_state(void *);
  274 void    pfsync_ifdetach(void *);
  275 
  276 void    pfsync_deferred(struct pf_state *, int);
  277 void    pfsync_undefer(struct pfsync_deferral *, int);
  278 void    pfsync_deferrals_tmo(void *);
  279 
  280 void    pfsync_cancel_full_update(struct pfsync_softc *);
  281 void    pfsync_request_full_update(struct pfsync_softc *);
  282 void    pfsync_request_update(u_int32_t, u_int64_t);
  283 void    pfsync_update_state_req(struct pf_state *);
  284 
  285 void    pfsync_drop(struct pfsync_softc *);
  286 void    pfsync_sendout(void);
  287 void    pfsync_send_plus(void *, size_t);
  288 void    pfsync_timeout(void *);
  289 void    pfsync_tdb_timeout(void *);
  290 
  291 void    pfsync_bulk_start(void);
  292 void    pfsync_bulk_status(u_int8_t);
  293 void    pfsync_bulk_update(void *);
  294 void    pfsync_bulk_fail(void *);
  295 
  296 void    pfsync_grab_snapshot(struct pfsync_snapshot *, struct pfsync_softc *);
  297 void    pfsync_drop_snapshot(struct pfsync_snapshot *);
  298 
  299 void    pfsync_send_dispatch(void *);
  300 void    pfsync_send_pkt(struct mbuf *);
  301 
  302 static struct mbuf_queue        pfsync_mq;
  303 static struct task      pfsync_task =
  304     TASK_INITIALIZER(pfsync_send_dispatch, &pfsync_mq);
  305 
  306 #define PFSYNC_MAX_BULKTRIES    12
  307 int     pfsync_sync_ok;
  308 
  309 struct if_clone pfsync_cloner =
  310     IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
  311 
  312 void
  313 pfsyncattach(int npfsync)
  314 {
  315         if_clone_attach(&pfsync_cloner);
  316         pfsynccounters = counters_alloc(pfsyncs_ncounters);
  317         mq_init(&pfsync_mq, 4096, IPL_MPFLOOR);
  318 }
  319 
  320 int
  321 pfsync_clone_create(struct if_clone *ifc, int unit)
  322 {
  323         struct pfsync_softc *sc;
  324         struct ifnet *ifp;
  325         int q;
  326 
  327         if (unit != 0)
  328                 return (EINVAL);
  329 
  330         pfsync_sync_ok = 1;
  331 
  332         sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO);
  333         for (q = 0; q < PFSYNC_S_COUNT; q++)
  334                 TAILQ_INIT(&sc->sc_qs[q]);
  335         mtx_init(&sc->sc_st_mtx, IPL_MPFLOOR);
  336 
  337         pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_MPFLOOR, 0, "pfsync",
  338             NULL);
  339         TAILQ_INIT(&sc->sc_upd_req_list);
  340         mtx_init(&sc->sc_upd_req_mtx, IPL_MPFLOOR);
  341         TAILQ_INIT(&sc->sc_deferrals);
  342         mtx_init(&sc->sc_deferrals_mtx, IPL_MPFLOOR);
  343         timeout_set_proc(&sc->sc_deferrals_tmo, pfsync_deferrals_tmo, sc);
  344         task_set(&sc->sc_ltask, pfsync_syncdev_state, sc);
  345         task_set(&sc->sc_dtask, pfsync_ifdetach, sc);
  346         sc->sc_deferred = 0;
  347 
  348         TAILQ_INIT(&sc->sc_tdb_q);
  349         mtx_init(&sc->sc_tdb_mtx, IPL_MPFLOOR);
  350 
  351         sc->sc_len = PFSYNC_MINPKT;
  352         sc->sc_maxupdates = 128;
  353 
  354         sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS,
  355             sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO);
  356         sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
  357 
  358         ifp = &sc->sc_if;
  359         snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
  360         ifp->if_softc = sc;
  361         ifp->if_ioctl = pfsyncioctl;
  362         ifp->if_output = pfsyncoutput;
  363         ifp->if_qstart = pfsyncstart;
  364         ifp->if_type = IFT_PFSYNC;
  365         ifp->if_hdrlen = sizeof(struct pfsync_header);
  366         ifp->if_mtu = ETHERMTU;
  367         ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
  368         timeout_set_proc(&sc->sc_tmo, pfsync_timeout, NULL);
  369         timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, NULL);
  370         timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, NULL);
  371 
  372         if_attach(ifp);
  373         if_alloc_sadl(ifp);
  374 
  375 #if NCARP > 0
  376         if_addgroup(ifp, "carp");
  377 #endif
  378 
  379 #if NBPFILTER > 0
  380         bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
  381 #endif
  382 
  383         pfsyncif = sc;
  384 
  385         return (0);
  386 }
  387 
  388 int
  389 pfsync_clone_destroy(struct ifnet *ifp)
  390 {
  391         struct pfsync_softc *sc = ifp->if_softc;
  392         struct ifnet *ifp0;
  393         struct pfsync_deferral *pd;
  394         struct pfsync_deferrals  deferrals;
  395 
  396         NET_LOCK();
  397 
  398 #if NCARP > 0
  399         if (!pfsync_sync_ok)
  400                 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
  401         if (sc->sc_link_demoted)
  402                 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
  403 #endif
  404         if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) {
  405                 if_linkstatehook_del(ifp0, &sc->sc_ltask);
  406                 if_detachhook_del(ifp0, &sc->sc_dtask);
  407         }
  408         if_put(ifp0);
  409 
  410         /* XXXSMP breaks atomicity */
  411         NET_UNLOCK();
  412         if_detach(ifp);
  413         NET_LOCK();
  414 
  415         pfsync_drop(sc);
  416 
  417         if (sc->sc_deferred > 0) {
  418                 TAILQ_INIT(&deferrals);
  419                 mtx_enter(&sc->sc_deferrals_mtx);
  420                 TAILQ_CONCAT(&deferrals, &sc->sc_deferrals, pd_entry);
  421                 sc->sc_deferred = 0;
  422                 mtx_leave(&sc->sc_deferrals_mtx);
  423 
  424                 while ((pd = TAILQ_FIRST(&deferrals)) != NULL) {
  425                         TAILQ_REMOVE(&deferrals, pd, pd_entry);
  426                         pfsync_undefer(pd, 0);
  427                 }
  428         }
  429 
  430         pfsyncif = NULL;
  431         timeout_del(&sc->sc_bulkfail_tmo);
  432         timeout_del(&sc->sc_bulk_tmo);
  433         timeout_del(&sc->sc_tmo);
  434 
  435         NET_UNLOCK();
  436 
  437         pool_destroy(&sc->sc_pool);
  438         free(sc->sc_imo.imo_membership, M_IPMOPTS,
  439             sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *));
  440         free(sc, M_DEVBUF, sizeof(*sc));
  441 
  442         return (0);
  443 }
  444 
  445 /*
  446  * Start output on the pfsync interface.
  447  */
  448 void
  449 pfsyncstart(struct ifqueue *ifq)
  450 {
  451         ifq_purge(ifq);
  452 }
  453 
  454 void
  455 pfsync_syncdev_state(void *arg)
  456 {
  457         struct pfsync_softc *sc = arg;
  458         struct ifnet *ifp;
  459 
  460         if ((sc->sc_if.if_flags & IFF_UP) == 0)
  461                 return;
  462         if ((ifp = if_get(sc->sc_sync_ifidx)) == NULL)
  463                 return;
  464 
  465         if (ifp->if_link_state == LINK_STATE_DOWN) {
  466                 sc->sc_if.if_flags &= ~IFF_RUNNING;
  467                 if (!sc->sc_link_demoted) {
  468 #if NCARP > 0
  469                         carp_group_demote_adj(&sc->sc_if, 1,
  470                             "pfsync link state down");
  471 #endif
  472                         sc->sc_link_demoted = 1;
  473                 }
  474 
  475                 /* drop everything */
  476                 timeout_del(&sc->sc_tmo);
  477                 pfsync_drop(sc);
  478 
  479                 pfsync_cancel_full_update(sc);
  480         } else if (sc->sc_link_demoted) {
  481                 sc->sc_if.if_flags |= IFF_RUNNING;
  482 
  483                 pfsync_request_full_update(sc);
  484         }
  485 
  486         if_put(ifp);
  487 }
  488 
  489 void
  490 pfsync_ifdetach(void *arg)
  491 {
  492         struct pfsync_softc *sc = arg;
  493         struct ifnet *ifp;
  494 
  495         if ((ifp = if_get(sc->sc_sync_ifidx)) != NULL) {
  496                 if_linkstatehook_del(ifp, &sc->sc_ltask);
  497                 if_detachhook_del(ifp, &sc->sc_dtask);
  498         }
  499         if_put(ifp);
  500 
  501         sc->sc_sync_ifidx = 0;
  502 }
  503 
  504 int
  505 pfsync_input(struct mbuf **mp, int *offp, int proto, int af)
  506 {
  507         struct mbuf *n, *m = *mp;
  508         struct pfsync_softc *sc = pfsyncif;
  509         struct ip *ip = mtod(m, struct ip *);
  510         struct pfsync_header *ph;
  511         struct pfsync_subheader subh;
  512         int offset, noff, len, count, mlen, flags = 0;
  513         int e;
  514 
  515         NET_ASSERT_LOCKED();
  516 
  517         pfsyncstat_inc(pfsyncs_ipackets);
  518 
  519         /* verify that we have a sync interface configured */
  520         if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
  521             sc->sc_sync_ifidx == 0 || !pf_status.running)
  522                 goto done;
  523 
  524         /* verify that the packet came in on the right interface */
  525         if (sc->sc_sync_ifidx != m->m_pkthdr.ph_ifidx) {
  526                 pfsyncstat_inc(pfsyncs_badif);
  527                 goto done;
  528         }
  529 
  530         sc->sc_if.if_ipackets++;
  531         sc->sc_if.if_ibytes += m->m_pkthdr.len;
  532 
  533         /* verify that the IP TTL is 255. */
  534         if (ip->ip_ttl != PFSYNC_DFLTTL) {
  535                 pfsyncstat_inc(pfsyncs_badttl);
  536                 goto done;
  537         }
  538 
  539         offset = ip->ip_hl << 2;
  540         n = m_pulldown(m, offset, sizeof(*ph), &noff);
  541         if (n == NULL) {
  542                 pfsyncstat_inc(pfsyncs_hdrops);
  543                 return IPPROTO_DONE;
  544         }
  545         ph = (struct pfsync_header *)(n->m_data + noff);
  546 
  547         /* verify the version */
  548         if (ph->version != PFSYNC_VERSION) {
  549                 pfsyncstat_inc(pfsyncs_badver);
  550                 goto done;
  551         }
  552         len = ntohs(ph->len) + offset;
  553         if (m->m_pkthdr.len < len) {
  554                 pfsyncstat_inc(pfsyncs_badlen);
  555                 goto done;
  556         }
  557 
  558         if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
  559                 flags = PFSYNC_SI_CKSUM;
  560 
  561         offset += sizeof(*ph);
  562         while (offset <= len - sizeof(subh)) {
  563                 m_copydata(m, offset, sizeof(subh), &subh);
  564                 offset += sizeof(subh);
  565 
  566                 mlen = subh.len << 2;
  567                 count = ntohs(subh.count);
  568 
  569                 if (subh.action >= PFSYNC_ACT_MAX ||
  570                     subh.action >= nitems(pfsync_acts) ||
  571                     mlen < pfsync_acts[subh.action].len) {
  572                         /*
  573                          * subheaders are always followed by at least one
  574                          * message, so if the peer is new
  575                          * enough to tell us how big its messages are then we
  576                          * know enough to skip them.
  577                          */
  578                         if (count > 0 && mlen > 0) {
  579                                 offset += count * mlen;
  580                                 continue;
  581                         }
  582                         pfsyncstat_inc(pfsyncs_badact);
  583                         goto done;
  584                 }
  585 
  586                 n = m_pulldown(m, offset, mlen * count, &noff);
  587                 if (n == NULL) {
  588                         pfsyncstat_inc(pfsyncs_badlen);
  589                         return IPPROTO_DONE;
  590                 }
  591 
  592                 e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count,
  593                     flags);
  594                 if (e != 0)
  595                         goto done;
  596 
  597                 offset += mlen * count;
  598         }
  599 
  600 done:
  601         m_freem(m);
  602         return IPPROTO_DONE;
  603 }
  604 
  605 int
  606 pfsync_in_clr(caddr_t buf, int len, int count, int flags)
  607 {
  608         struct pfsync_clr *clr;
  609         struct pf_state *st, *nexts;
  610         struct pfi_kif *kif;
  611         u_int32_t creatorid;
  612         int i;
  613 
  614         PF_LOCK();
  615         for (i = 0; i < count; i++) {
  616                 clr = (struct pfsync_clr *)buf + len * i;
  617                 kif = NULL;
  618                 creatorid = clr->creatorid;
  619                 if (strlen(clr->ifname) &&
  620                     (kif = pfi_kif_find(clr->ifname)) == NULL)
  621                         continue;
  622 
  623                 PF_STATE_ENTER_WRITE();
  624                 RBT_FOREACH_SAFE(st, pf_state_tree_id, &tree_id, nexts) {
  625                         if (st->creatorid == creatorid &&
  626                             ((kif && st->kif == kif) || !kif)) {
  627                                 SET(st->state_flags, PFSTATE_NOSYNC);
  628                                 pf_remove_state(st);
  629                         }
  630                 }
  631                 PF_STATE_EXIT_WRITE();
  632         }
  633         PF_UNLOCK();
  634 
  635         return (0);
  636 }
  637 
  638 int
  639 pfsync_in_ins(caddr_t buf, int len, int count, int flags)
  640 {
  641         struct pfsync_state *sp;
  642         sa_family_t af1, af2;
  643         int i;
  644 
  645         PF_LOCK();
  646         for (i = 0; i < count; i++) {
  647                 sp = (struct pfsync_state *)(buf + len * i);
  648                 af1 = sp->key[0].af;
  649                 af2 = sp->key[1].af;
  650 
  651                 /* check for invalid values */
  652                 if (sp->timeout >= PFTM_MAX ||
  653                     sp->src.state > PF_TCPS_PROXY_DST ||
  654                     sp->dst.state > PF_TCPS_PROXY_DST ||
  655                     sp->direction > PF_OUT ||
  656                     (((af1 || af2) &&
  657                      ((af1 != AF_INET && af1 != AF_INET6) ||
  658                       (af2 != AF_INET && af2 != AF_INET6))) ||
  659                     (sp->af != AF_INET && sp->af != AF_INET6))) {
  660                         DPFPRINTF(LOG_NOTICE,
  661                             "pfsync_input: PFSYNC5_ACT_INS: invalid value");
  662                         pfsyncstat_inc(pfsyncs_badval);
  663                         continue;
  664                 }
  665 
  666                 if (pf_state_import(sp, flags) == ENOMEM) {
  667                         /* drop out, but process the rest of the actions */
  668                         break;
  669                 }
  670         }
  671         PF_UNLOCK();
  672 
  673         return (0);
  674 }
  675 
  676 int
  677 pfsync_in_iack(caddr_t buf, int len, int count, int flags)
  678 {
  679         struct pfsync_ins_ack *ia;
  680         struct pf_state_cmp id_key;
  681         struct pf_state *st;
  682         int i;
  683 
  684         for (i = 0; i < count; i++) {
  685                 ia = (struct pfsync_ins_ack *)(buf + len * i);
  686 
  687                 id_key.id = ia->id;
  688                 id_key.creatorid = ia->creatorid;
  689 
  690                 PF_STATE_ENTER_READ();
  691                 st = pf_find_state_byid(&id_key);
  692                 pf_state_ref(st);
  693                 PF_STATE_EXIT_READ();
  694                 if (st == NULL)
  695                         continue;
  696 
  697                 if (ISSET(st->state_flags, PFSTATE_ACK))
  698                         pfsync_deferred(st, 0);
  699 
  700                 pf_state_unref(st);
  701         }
  702 
  703         return (0);
  704 }
  705 
  706 int
  707 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
  708     struct pfsync_state_peer *dst)
  709 {
  710         int sync = 0;
  711 
  712         /*
  713          * The state should never go backwards except
  714          * for syn-proxy states.  Neither should the
  715          * sequence window slide backwards.
  716          */
  717         if ((st->src.state > src->state &&
  718             (st->src.state < PF_TCPS_PROXY_SRC ||
  719             src->state >= PF_TCPS_PROXY_SRC)) ||
  720 
  721             (st->src.state == src->state &&
  722             SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
  723                 sync++;
  724         else
  725                 pf_state_peer_ntoh(src, &st->src);
  726 
  727         if ((st->dst.state > dst->state) ||
  728 
  729             (st->dst.state >= TCPS_SYN_SENT &&
  730             SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
  731                 sync++;
  732         else
  733                 pf_state_peer_ntoh(dst, &st->dst);
  734 
  735         return (sync);
  736 }
  737 
  738 int
  739 pfsync_in_upd(caddr_t buf, int len, int count, int flags)
  740 {
  741         struct pfsync_state *sp;
  742         struct pf_state_cmp id_key;
  743         struct pf_state *st;
  744         int sync, error;
  745         int i;
  746 
  747         for (i = 0; i < count; i++) {
  748                 sp = (struct pfsync_state *)(buf + len * i);
  749 
  750                 /* check for invalid values */
  751                 if (sp->timeout >= PFTM_MAX ||
  752                     sp->src.state > PF_TCPS_PROXY_DST ||
  753                     sp->dst.state > PF_TCPS_PROXY_DST) {
  754                         DPFPRINTF(LOG_NOTICE,
  755                             "pfsync_input: PFSYNC_ACT_UPD: invalid value");
  756                         pfsyncstat_inc(pfsyncs_badval);
  757                         continue;
  758                 }
  759 
  760                 id_key.id = sp->id;
  761                 id_key.creatorid = sp->creatorid;
  762 
  763                 PF_STATE_ENTER_READ();
  764                 st = pf_find_state_byid(&id_key);
  765                 pf_state_ref(st);
  766                 PF_STATE_EXIT_READ();
  767                 if (st == NULL) {
  768                         /* insert the update */
  769                         PF_LOCK();
  770                         error = pf_state_import(sp, flags);
  771                         if (error)
  772                                 pfsyncstat_inc(pfsyncs_badstate);
  773                         PF_UNLOCK();
  774                         continue;
  775                 }
  776 
  777                 if (ISSET(st->state_flags, PFSTATE_ACK))
  778                         pfsync_deferred(st, 1);
  779 
  780                 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
  781                         sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
  782                 else {
  783                         sync = 0;
  784 
  785                         /*
  786                          * Non-TCP protocol state machine always go
  787                          * forwards
  788                          */
  789                         if (st->src.state > sp->src.state)
  790                                 sync++;
  791                         else
  792                                 pf_state_peer_ntoh(&sp->src, &st->src);
  793 
  794                         if (st->dst.state > sp->dst.state)
  795                                 sync++;
  796                         else
  797                                 pf_state_peer_ntoh(&sp->dst, &st->dst);
  798                 }
  799 
  800                 if (sync < 2) {
  801                         pf_state_alloc_scrub_memory(&sp->dst, &st->dst);
  802                         pf_state_peer_ntoh(&sp->dst, &st->dst);
  803                         st->expire = getuptime();
  804                         st->timeout = sp->timeout;
  805                 }
  806                 st->pfsync_time = getuptime();
  807 
  808                 if (sync) {
  809                         pfsyncstat_inc(pfsyncs_stale);
  810 
  811                         pfsync_update_state(st);
  812                         schednetisr(NETISR_PFSYNC);
  813                 }
  814 
  815                 pf_state_unref(st);
  816         }
  817 
  818         return (0);
  819 }
  820 
  821 int
  822 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags)
  823 {
  824         struct pfsync_upd_c *up;
  825         struct pf_state_cmp id_key;
  826         struct pf_state *st;
  827 
  828         int sync;
  829 
  830         int i;
  831 
  832         for (i = 0; i < count; i++) {
  833                 up = (struct pfsync_upd_c *)(buf + len * i);
  834 
  835                 /* check for invalid values */
  836                 if (up->timeout >= PFTM_MAX ||
  837                     up->src.state > PF_TCPS_PROXY_DST ||
  838                     up->dst.state > PF_TCPS_PROXY_DST) {
  839                         DPFPRINTF(LOG_NOTICE,
  840                             "pfsync_input: PFSYNC_ACT_UPD_C: invalid value");
  841                         pfsyncstat_inc(pfsyncs_badval);
  842                         continue;
  843                 }
  844 
  845                 id_key.id = up->id;
  846                 id_key.creatorid = up->creatorid;
  847 
  848                 PF_STATE_ENTER_READ();
  849                 st = pf_find_state_byid(&id_key);
  850                 pf_state_ref(st);
  851                 PF_STATE_EXIT_READ();
  852                 if (st == NULL) {
  853                         /* We don't have this state. Ask for it. */
  854                         pfsync_request_update(id_key.creatorid, id_key.id);
  855                         continue;
  856                 }
  857 
  858                 if (ISSET(st->state_flags, PFSTATE_ACK))
  859                         pfsync_deferred(st, 1);
  860 
  861                 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
  862                         sync = pfsync_upd_tcp(st, &up->src, &up->dst);
  863                 else {
  864                         sync = 0;
  865                         /*
  866                          * Non-TCP protocol state machine always go
  867                          * forwards
  868                          */
  869                         if (st->src.state > up->src.state)
  870                                 sync++;
  871                         else
  872                                 pf_state_peer_ntoh(&up->src, &st->src);
  873 
  874                         if (st->dst.state > up->dst.state)
  875                                 sync++;
  876                         else
  877                                 pf_state_peer_ntoh(&up->dst, &st->dst);
  878                 }
  879                 if (sync < 2) {
  880                         pf_state_alloc_scrub_memory(&up->dst, &st->dst);
  881                         pf_state_peer_ntoh(&up->dst, &st->dst);
  882                         st->expire = getuptime();
  883                         st->timeout = up->timeout;
  884                 }
  885                 st->pfsync_time = getuptime();
  886 
  887                 if (sync) {
  888                         pfsyncstat_inc(pfsyncs_stale);
  889 
  890                         pfsync_update_state(st);
  891                         schednetisr(NETISR_PFSYNC);
  892                 }
  893 
  894                 pf_state_unref(st);
  895         }
  896 
  897         return (0);
  898 }
  899 
  900 int
  901 pfsync_in_ureq(caddr_t buf, int len, int count, int flags)
  902 {
  903         struct pfsync_upd_req *ur;
  904         int i;
  905 
  906         struct pf_state_cmp id_key;
  907         struct pf_state *st;
  908 
  909         for (i = 0; i < count; i++) {
  910                 ur = (struct pfsync_upd_req *)(buf + len * i);
  911 
  912                 id_key.id = ur->id;
  913                 id_key.creatorid = ur->creatorid;
  914 
  915                 if (id_key.id == 0 && id_key.creatorid == 0)
  916                         pfsync_bulk_start();
  917                 else {
  918                         PF_STATE_ENTER_READ();
  919                         st = pf_find_state_byid(&id_key);
  920                         pf_state_ref(st);
  921                         PF_STATE_EXIT_READ();
  922                         if (st == NULL) {
  923                                 pfsyncstat_inc(pfsyncs_badstate);
  924                                 continue;
  925                         }
  926                         if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
  927                                 pf_state_unref(st);
  928                                 continue;
  929                         }
  930 
  931                         pfsync_update_state_req(st);
  932                         pf_state_unref(st);
  933                 }
  934         }
  935 
  936         return (0);
  937 }
  938 
  939 int
  940 pfsync_in_del(caddr_t buf, int len, int count, int flags)
  941 {
  942         struct pfsync_state *sp;
  943         struct pf_state_cmp id_key;
  944         struct pf_state *st;
  945         int i;
  946 
  947         PF_STATE_ENTER_WRITE();
  948         for (i = 0; i < count; i++) {
  949                 sp = (struct pfsync_state *)(buf + len * i);
  950 
  951                 id_key.id = sp->id;
  952                 id_key.creatorid = sp->creatorid;
  953 
  954                 st = pf_find_state_byid(&id_key);
  955                 if (st == NULL) {
  956                         pfsyncstat_inc(pfsyncs_badstate);
  957                         continue;
  958                 }
  959                 SET(st->state_flags, PFSTATE_NOSYNC);
  960                 pf_remove_state(st);
  961         }
  962         PF_STATE_EXIT_WRITE();
  963 
  964         return (0);
  965 }
  966 
  967 int
  968 pfsync_in_del_c(caddr_t buf, int len, int count, int flags)
  969 {
  970         struct pfsync_del_c *sp;
  971         struct pf_state_cmp id_key;
  972         struct pf_state *st;
  973         int i;
  974 
  975         PF_LOCK();
  976         PF_STATE_ENTER_WRITE();
  977         for (i = 0; i < count; i++) {
  978                 sp = (struct pfsync_del_c *)(buf + len * i);
  979 
  980                 id_key.id = sp->id;
  981                 id_key.creatorid = sp->creatorid;
  982 
  983                 st = pf_find_state_byid(&id_key);
  984                 if (st == NULL) {
  985                         pfsyncstat_inc(pfsyncs_badstate);
  986                         continue;
  987                 }
  988 
  989                 SET(st->state_flags, PFSTATE_NOSYNC);
  990                 pf_remove_state(st);
  991         }
  992         PF_STATE_EXIT_WRITE();
  993         PF_UNLOCK();
  994 
  995         return (0);
  996 }
  997 
  998 int
  999 pfsync_in_bus(caddr_t buf, int len, int count, int flags)
 1000 {
 1001         struct pfsync_softc *sc = pfsyncif;
 1002         struct pfsync_bus *bus;
 1003 
 1004         /* If we're not waiting for a bulk update, who cares. */
 1005         if (sc->sc_ureq_sent == 0)
 1006                 return (0);
 1007 
 1008         bus = (struct pfsync_bus *)buf;
 1009 
 1010         switch (bus->status) {
 1011         case PFSYNC_BUS_START:
 1012                 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
 1013                     pf_pool_limits[PF_LIMIT_STATES].limit /
 1014                     ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
 1015                     sizeof(struct pfsync_state)));
 1016                 DPFPRINTF(LOG_INFO, "received bulk update start");
 1017                 break;
 1018 
 1019         case PFSYNC_BUS_END:
 1020                 if (getuptime() - ntohl(bus->endtime) >=
 1021                     sc->sc_ureq_sent) {
 1022                         /* that's it, we're happy */
 1023                         sc->sc_ureq_sent = 0;
 1024                         sc->sc_bulk_tries = 0;
 1025                         timeout_del(&sc->sc_bulkfail_tmo);
 1026 #if NCARP > 0
 1027                         if (!pfsync_sync_ok)
 1028                                 carp_group_demote_adj(&sc->sc_if, -1,
 1029                                     sc->sc_link_demoted ?
 1030                                     "pfsync link state up" :
 1031                                     "pfsync bulk done");
 1032                         if (sc->sc_initial_bulk) {
 1033                                 carp_group_demote_adj(&sc->sc_if, -32,
 1034                                     "pfsync init");
 1035                                 sc->sc_initial_bulk = 0;
 1036                         }
 1037 #endif
 1038                         pfsync_sync_ok = 1;
 1039                         sc->sc_link_demoted = 0;
 1040                         DPFPRINTF(LOG_INFO, "received valid bulk update end");
 1041                 } else {
 1042                         DPFPRINTF(LOG_WARNING, "received invalid "
 1043                             "bulk update end: bad timestamp");
 1044                 }
 1045                 break;
 1046         }
 1047 
 1048         return (0);
 1049 }
 1050 
 1051 int
 1052 pfsync_in_tdb(caddr_t buf, int len, int count, int flags)
 1053 {
 1054 #if defined(IPSEC)
 1055         struct pfsync_tdb *tp;
 1056         int i;
 1057 
 1058         for (i = 0; i < count; i++) {
 1059                 tp = (struct pfsync_tdb *)(buf + len * i);
 1060                 pfsync_update_net_tdb(tp);
 1061         }
 1062 #endif
 1063 
 1064         return (0);
 1065 }
 1066 
 1067 #if defined(IPSEC)
 1068 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
 1069 void
 1070 pfsync_update_net_tdb(struct pfsync_tdb *pt)
 1071 {
 1072         struct tdb              *tdb;
 1073 
 1074         NET_ASSERT_LOCKED();
 1075 
 1076         /* check for invalid values */
 1077         if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
 1078             (pt->dst.sa.sa_family != AF_INET &&
 1079              pt->dst.sa.sa_family != AF_INET6))
 1080                 goto bad;
 1081 
 1082         tdb = gettdb(ntohs(pt->rdomain), pt->spi,
 1083             (union sockaddr_union *)&pt->dst, pt->sproto);
 1084         if (tdb) {
 1085                 pt->rpl = betoh64(pt->rpl);
 1086                 pt->cur_bytes = betoh64(pt->cur_bytes);
 1087 
 1088                 /* Neither replay nor byte counter should ever decrease. */
 1089                 if (pt->rpl < tdb->tdb_rpl ||
 1090                     pt->cur_bytes < tdb->tdb_cur_bytes) {
 1091                         tdb_unref(tdb);
 1092                         goto bad;
 1093                 }
 1094 
 1095                 tdb->tdb_rpl = pt->rpl;
 1096                 tdb->tdb_cur_bytes = pt->cur_bytes;
 1097                 tdb_unref(tdb);
 1098         }
 1099         return;
 1100 
 1101  bad:
 1102         DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
 1103             "invalid value");
 1104         pfsyncstat_inc(pfsyncs_badstate);
 1105         return;
 1106 }
 1107 #endif
 1108 
 1109 
 1110 int
 1111 pfsync_in_eof(caddr_t buf, int len, int count, int flags)
 1112 {
 1113         if (len > 0 || count > 0)
 1114                 pfsyncstat_inc(pfsyncs_badact);
 1115 
 1116         /* we're done. let the caller return */
 1117         return (1);
 1118 }
 1119 
 1120 int
 1121 pfsync_in_error(caddr_t buf, int len, int count, int flags)
 1122 {
 1123         pfsyncstat_inc(pfsyncs_badact);
 1124         return (-1);
 1125 }
 1126 
 1127 int
 1128 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 1129         struct rtentry *rt)
 1130 {
 1131         m_freem(m);     /* drop packet */
 1132         return (EAFNOSUPPORT);
 1133 }
 1134 
 1135 int
 1136 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 1137 {
 1138         struct proc *p = curproc;
 1139         struct pfsync_softc *sc = ifp->if_softc;
 1140         struct ifreq *ifr = (struct ifreq *)data;
 1141         struct ip_moptions *imo = &sc->sc_imo;
 1142         struct pfsyncreq pfsyncr;
 1143         struct ifnet *ifp0, *sifp;
 1144         struct ip *ip;
 1145         int error;
 1146 
 1147         switch (cmd) {
 1148         case SIOCSIFFLAGS:
 1149                 if ((ifp->if_flags & IFF_RUNNING) == 0 &&
 1150                     (ifp->if_flags & IFF_UP)) {
 1151                         ifp->if_flags |= IFF_RUNNING;
 1152 
 1153 #if NCARP > 0
 1154                         sc->sc_initial_bulk = 1;
 1155                         carp_group_demote_adj(&sc->sc_if, 32, "pfsync init");
 1156 #endif
 1157 
 1158                         pfsync_request_full_update(sc);
 1159                 }
 1160                 if ((ifp->if_flags & IFF_RUNNING) &&
 1161                     (ifp->if_flags & IFF_UP) == 0) {
 1162                         ifp->if_flags &= ~IFF_RUNNING;
 1163 
 1164                         /* drop everything */
 1165                         timeout_del(&sc->sc_tmo);
 1166                         pfsync_drop(sc);
 1167 
 1168                         pfsync_cancel_full_update(sc);
 1169                 }
 1170                 break;
 1171         case SIOCSIFMTU:
 1172                 if ((ifp0 = if_get(sc->sc_sync_ifidx)) == NULL)
 1173                         return (EINVAL);
 1174                 error = 0;
 1175                 if (ifr->ifr_mtu <= PFSYNC_MINPKT ||
 1176                     ifr->ifr_mtu > ifp0->if_mtu) {
 1177                         error = EINVAL;
 1178                 }
 1179                 if_put(ifp0);
 1180                 if (error)
 1181                         return error;
 1182                 if (ifr->ifr_mtu < ifp->if_mtu)
 1183                         pfsync_sendout();
 1184                 ifp->if_mtu = ifr->ifr_mtu;
 1185                 break;
 1186         case SIOCGETPFSYNC:
 1187                 bzero(&pfsyncr, sizeof(pfsyncr));
 1188                 if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) {
 1189                         strlcpy(pfsyncr.pfsyncr_syncdev,
 1190                             ifp0->if_xname, IFNAMSIZ);
 1191                 }
 1192                 if_put(ifp0);
 1193                 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
 1194                 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
 1195                 pfsyncr.pfsyncr_defer = sc->sc_defer;
 1196                 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
 1197 
 1198         case SIOCSETPFSYNC:
 1199                 if ((error = suser(p)) != 0)
 1200                         return (error);
 1201                 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
 1202                         return (error);
 1203 
 1204                 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
 1205                         sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
 1206                 else
 1207                         sc->sc_sync_peer.s_addr =
 1208                             pfsyncr.pfsyncr_syncpeer.s_addr;
 1209 
 1210                 if (pfsyncr.pfsyncr_maxupdates > 255)
 1211                         return (EINVAL);
 1212                 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
 1213 
 1214                 sc->sc_defer = pfsyncr.pfsyncr_defer;
 1215 
 1216                 if (pfsyncr.pfsyncr_syncdev[0] == 0) {
 1217                         if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) {
 1218                                 if_linkstatehook_del(ifp0, &sc->sc_ltask);
 1219                                 if_detachhook_del(ifp0, &sc->sc_dtask);
 1220                         }
 1221                         if_put(ifp0);
 1222                         sc->sc_sync_ifidx = 0;
 1223                         if (imo->imo_num_memberships > 0) {
 1224                                 in_delmulti(imo->imo_membership[
 1225                                     --imo->imo_num_memberships]);
 1226                                 imo->imo_ifidx = 0;
 1227                         }
 1228                         break;
 1229                 }
 1230 
 1231                 if ((sifp = if_unit(pfsyncr.pfsyncr_syncdev)) == NULL)
 1232                         return (EINVAL);
 1233 
 1234                 ifp0 = if_get(sc->sc_sync_ifidx);
 1235 
 1236                 if (sifp->if_mtu < sc->sc_if.if_mtu || (ifp0 != NULL &&
 1237                     sifp->if_mtu < ifp0->if_mtu) ||
 1238                     sifp->if_mtu < MCLBYTES - sizeof(struct ip))
 1239                         pfsync_sendout();
 1240 
 1241                 if (ifp0) {
 1242                         if_linkstatehook_del(ifp0, &sc->sc_ltask);
 1243                         if_detachhook_del(ifp0, &sc->sc_dtask);
 1244                 }
 1245                 if_put(ifp0);
 1246                 sc->sc_sync_ifidx = sifp->if_index;
 1247 
 1248                 if (imo->imo_num_memberships > 0) {
 1249                         in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
 1250                         imo->imo_ifidx = 0;
 1251                 }
 1252 
 1253                 if (sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
 1254                         struct in_addr addr;
 1255 
 1256                         if (!(sifp->if_flags & IFF_MULTICAST)) {
 1257                                 sc->sc_sync_ifidx = 0;
 1258                                 if_put(sifp);
 1259                                 return (EADDRNOTAVAIL);
 1260                         }
 1261 
 1262                         addr.s_addr = INADDR_PFSYNC_GROUP;
 1263 
 1264                         if ((imo->imo_membership[0] =
 1265                             in_addmulti(&addr, sifp)) == NULL) {
 1266                                 sc->sc_sync_ifidx = 0;
 1267                                 if_put(sifp);
 1268                                 return (ENOBUFS);
 1269                         }
 1270                         imo->imo_num_memberships++;
 1271                         imo->imo_ifidx = sc->sc_sync_ifidx;
 1272                         imo->imo_ttl = PFSYNC_DFLTTL;
 1273                         imo->imo_loop = 0;
 1274                 }
 1275 
 1276                 ip = &sc->sc_template;
 1277                 bzero(ip, sizeof(*ip));
 1278                 ip->ip_v = IPVERSION;
 1279                 ip->ip_hl = sizeof(sc->sc_template) >> 2;
 1280                 ip->ip_tos = IPTOS_LOWDELAY;
 1281                 /* len and id are set later */
 1282                 ip->ip_off = htons(IP_DF);
 1283                 ip->ip_ttl = PFSYNC_DFLTTL;
 1284                 ip->ip_p = IPPROTO_PFSYNC;
 1285                 ip->ip_src.s_addr = INADDR_ANY;
 1286                 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
 1287 
 1288                 if_linkstatehook_add(sifp, &sc->sc_ltask);
 1289                 if_detachhook_add(sifp, &sc->sc_dtask);
 1290                 if_put(sifp);
 1291 
 1292                 pfsync_request_full_update(sc);
 1293 
 1294                 break;
 1295 
 1296         default:
 1297                 return (ENOTTY);
 1298         }
 1299 
 1300         return (0);
 1301 }
 1302 
 1303 void
 1304 pfsync_out_state(struct pf_state *st, void *buf)
 1305 {
 1306         struct pfsync_state *sp = buf;
 1307 
 1308         pf_state_export(sp, st);
 1309 }
 1310 
 1311 void
 1312 pfsync_out_iack(struct pf_state *st, void *buf)
 1313 {
 1314         struct pfsync_ins_ack *iack = buf;
 1315 
 1316         iack->id = st->id;
 1317         iack->creatorid = st->creatorid;
 1318 }
 1319 
 1320 void
 1321 pfsync_out_upd_c(struct pf_state *st, void *buf)
 1322 {
 1323         struct pfsync_upd_c *up = buf;
 1324 
 1325         bzero(up, sizeof(*up));
 1326         up->id = st->id;
 1327         pf_state_peer_hton(&st->src, &up->src);
 1328         pf_state_peer_hton(&st->dst, &up->dst);
 1329         up->creatorid = st->creatorid;
 1330         up->timeout = st->timeout;
 1331 }
 1332 
 1333 void
 1334 pfsync_out_del(struct pf_state *st, void *buf)
 1335 {
 1336         struct pfsync_del_c *dp = buf;
 1337 
 1338         dp->id = st->id;
 1339         dp->creatorid = st->creatorid;
 1340 
 1341         SET(st->state_flags, PFSTATE_NOSYNC);
 1342 }
 1343 
 1344 void
 1345 pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc *sc)
 1346 {
 1347         int q;
 1348         struct pf_state *st;
 1349         struct pfsync_upd_req_item *ur;
 1350 #if defined(IPSEC)
 1351         struct tdb *tdb;
 1352 #endif
 1353 
 1354         sn->sn_sc = sc;
 1355 
 1356         mtx_enter(&sc->sc_st_mtx);
 1357         mtx_enter(&sc->sc_upd_req_mtx);
 1358         mtx_enter(&sc->sc_tdb_mtx);
 1359 
 1360         for (q = 0; q < PFSYNC_S_COUNT; q++) {
 1361                 TAILQ_INIT(&sn->sn_qs[q]);
 1362 
 1363                 while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) {
 1364                         TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
 1365                         if (st->snapped == 0) {
 1366                                 TAILQ_INSERT_TAIL(&sn->sn_qs[q], st, sync_snap);
 1367                                 st->snapped = 1;
 1368                         } else {
 1369                                 /*
 1370                                  * item is on snapshot list already, so we can
 1371                                  * skip it now.
 1372                                  */
 1373                                 pf_state_unref(st);
 1374                         }
 1375                 }
 1376         }
 1377 
 1378         TAILQ_INIT(&sn->sn_upd_req_list);
 1379         while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
 1380                 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
 1381                 TAILQ_INSERT_TAIL(&sn->sn_upd_req_list, ur, ur_snap);
 1382         }
 1383 
 1384         TAILQ_INIT(&sn->sn_tdb_q);
 1385 #if defined(IPSEC)
 1386         while ((tdb = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) {
 1387                 TAILQ_REMOVE(&sc->sc_tdb_q, tdb, tdb_sync_entry);
 1388                 TAILQ_INSERT_TAIL(&sn->sn_tdb_q, tdb, tdb_sync_snap);
 1389 
 1390                 mtx_enter(&tdb->tdb_mtx);
 1391                 KASSERT(!ISSET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED));
 1392                 SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED);
 1393                 mtx_leave(&tdb->tdb_mtx);
 1394         }
 1395 #endif
 1396 
 1397         sn->sn_len = sc->sc_len;
 1398         sc->sc_len = PFSYNC_MINPKT;
 1399 
 1400         sn->sn_plus = sc->sc_plus;
 1401         sc->sc_plus = NULL;
 1402         sn->sn_pluslen = sc->sc_pluslen;
 1403         sc->sc_pluslen = 0;
 1404 
 1405         mtx_leave(&sc->sc_tdb_mtx);
 1406         mtx_leave(&sc->sc_upd_req_mtx);
 1407         mtx_leave(&sc->sc_st_mtx);
 1408 }
 1409 
 1410 void
 1411 pfsync_drop_snapshot(struct pfsync_snapshot *sn)
 1412 {
 1413         struct pf_state *st;
 1414         struct pfsync_upd_req_item *ur;
 1415 #if defined(IPSEC)
 1416         struct tdb *t;
 1417 #endif
 1418         int q;
 1419 
 1420         for (q = 0; q < PFSYNC_S_COUNT; q++) {
 1421                 if (TAILQ_EMPTY(&sn->sn_qs[q]))
 1422                         continue;
 1423 
 1424                 while ((st = TAILQ_FIRST(&sn->sn_qs[q])) != NULL) {
 1425                         KASSERT(st->sync_state == q);
 1426                         KASSERT(st->snapped == 1);
 1427                         TAILQ_REMOVE(&sn->sn_qs[q], st, sync_snap);
 1428                         st->sync_state = PFSYNC_S_NONE;
 1429                         st->snapped = 0;
 1430                         pf_state_unref(st);
 1431                 }
 1432         }
 1433 
 1434         while ((ur = TAILQ_FIRST(&sn->sn_upd_req_list)) != NULL) {
 1435                 TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_snap);
 1436                 pool_put(&sn->sn_sc->sc_pool, ur);
 1437         }
 1438 
 1439 #if defined(IPSEC)
 1440         while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) {
 1441                 TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_snap);
 1442                 mtx_enter(&t->tdb_mtx);
 1443                 KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED));
 1444                 CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED);
 1445                 CLR(t->tdb_flags, TDBF_PFSYNC);
 1446                 mtx_leave(&t->tdb_mtx);
 1447         }
 1448 #endif
 1449 }
 1450 
 1451 int
 1452 pfsync_is_snapshot_empty(struct pfsync_snapshot *sn)
 1453 {
 1454         int     q;
 1455 
 1456         for (q = 0; q < PFSYNC_S_COUNT; q++)
 1457                 if (!TAILQ_EMPTY(&sn->sn_qs[q]))
 1458                         return (0);
 1459 
 1460         if (!TAILQ_EMPTY(&sn->sn_upd_req_list))
 1461                 return (0);
 1462 
 1463         if (!TAILQ_EMPTY(&sn->sn_tdb_q))
 1464                 return (0);
 1465 
 1466         return (sn->sn_plus == NULL);
 1467 }
 1468 
 1469 void
 1470 pfsync_drop(struct pfsync_softc *sc)
 1471 {
 1472         struct pfsync_snapshot  sn;
 1473 
 1474         pfsync_grab_snapshot(&sn, sc);
 1475         pfsync_drop_snapshot(&sn);
 1476 }
 1477 
 1478 void
 1479 pfsync_send_dispatch(void *xmq)
 1480 {
 1481         struct mbuf_queue *mq = xmq;
 1482         struct pfsync_softc *sc;
 1483         struct mbuf *m;
 1484         struct mbuf_list ml;
 1485         int error;
 1486 
 1487         mq_delist(mq, &ml);
 1488         if (ml_empty(&ml))
 1489                 return;
 1490 
 1491         NET_LOCK();
 1492         sc = pfsyncif;
 1493         if (sc == NULL) {
 1494                 ml_purge(&ml);
 1495                 goto done;
 1496         }
 1497 
 1498         while ((m = ml_dequeue(&ml)) != NULL) {
 1499                 if ((error = ip_output(m, NULL, NULL, IP_RAWOUTPUT,
 1500                     &sc->sc_imo, NULL, 0)) == 0)
 1501                         pfsyncstat_inc(pfsyncs_opackets);
 1502                 else {
 1503                         DPFPRINTF(LOG_DEBUG,
 1504                             "ip_output() @ %s failed (%d)\n", __func__, error);
 1505                         pfsyncstat_inc(pfsyncs_oerrors);
 1506                 }
 1507         }
 1508 done:
 1509         NET_UNLOCK();
 1510 }
 1511 
 1512 void
 1513 pfsync_send_pkt(struct mbuf *m)
 1514 {
 1515         if (mq_enqueue(&pfsync_mq, m) != 0) {
 1516                 pfsyncstat_inc(pfsyncs_oerrors);
 1517                 DPFPRINTF(LOG_DEBUG, "mq_enqueue() @ %s failed, queue full\n",
 1518                     __func__);
 1519         } else
 1520                 task_add(net_tq(0), &pfsync_task);
 1521 }
 1522 
 1523 void
 1524 pfsync_sendout(void)
 1525 {
 1526         struct pfsync_snapshot sn;
 1527         struct pfsync_softc *sc = pfsyncif;
 1528 #if NBPFILTER > 0
 1529         struct ifnet *ifp = &sc->sc_if;
 1530 #endif
 1531         struct mbuf *m;
 1532         struct ip *ip;
 1533         struct pfsync_header *ph;
 1534         struct pfsync_subheader *subh;
 1535         struct pf_state *st;
 1536         struct pfsync_upd_req_item *ur;
 1537         int offset;
 1538         int q, count = 0;
 1539 
 1540         if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
 1541                 return;
 1542 
 1543         if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
 1544 #if NBPFILTER > 0
 1545             (ifp->if_bpf == NULL && sc->sc_sync_ifidx == 0)) {
 1546 #else
 1547             sc->sc_sync_ifidx == 0) {
 1548 #endif
 1549                 pfsync_drop(sc);
 1550                 return;
 1551         }
 1552 
 1553         pfsync_grab_snapshot(&sn, sc);
 1554 
 1555         /*
 1556          * Check below is sufficient to prevent us from sending empty packets,
 1557          * but it does not stop us from sending short packets.
 1558          */
 1559         if (pfsync_is_snapshot_empty(&sn))
 1560                 return;
 1561 
 1562         MGETHDR(m, M_DONTWAIT, MT_DATA);
 1563         if (m == NULL) {
 1564                 sc->sc_if.if_oerrors++;
 1565                 pfsyncstat_inc(pfsyncs_onomem);
 1566                 pfsync_drop_snapshot(&sn);
 1567                 return;
 1568         }
 1569 
 1570         if (max_linkhdr + sn.sn_len > MHLEN) {
 1571                 MCLGETL(m, M_DONTWAIT, max_linkhdr + sn.sn_len);
 1572                 if (!ISSET(m->m_flags, M_EXT)) {
 1573                         m_free(m);
 1574                         sc->sc_if.if_oerrors++;
 1575                         pfsyncstat_inc(pfsyncs_onomem);
 1576                         pfsync_drop_snapshot(&sn);
 1577                         return;
 1578                 }
 1579         }
 1580         m->m_data += max_linkhdr;
 1581         m->m_len = m->m_pkthdr.len = sn.sn_len;
 1582 
 1583         /* build the ip header */
 1584         ip = mtod(m, struct ip *);
 1585         bcopy(&sc->sc_template, ip, sizeof(*ip));
 1586         offset = sizeof(*ip);
 1587 
 1588         ip->ip_len = htons(m->m_pkthdr.len);
 1589         ip->ip_id = htons(ip_randomid());
 1590 
 1591         /* build the pfsync header */
 1592         ph = (struct pfsync_header *)(m->m_data + offset);
 1593         bzero(ph, sizeof(*ph));
 1594         offset += sizeof(*ph);
 1595 
 1596         ph->version = PFSYNC_VERSION;
 1597         ph->len = htons(sn.sn_len - sizeof(*ip));
 1598         bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
 1599 
 1600         if (!TAILQ_EMPTY(&sn.sn_upd_req_list)) {
 1601                 subh = (struct pfsync_subheader *)(m->m_data + offset);
 1602                 offset += sizeof(*subh);
 1603 
 1604                 count = 0;
 1605                 while ((ur = TAILQ_FIRST(&sn.sn_upd_req_list)) != NULL) {
 1606                         TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_snap);
 1607 
 1608                         bcopy(&ur->ur_msg, m->m_data + offset,
 1609                             sizeof(ur->ur_msg));
 1610                         offset += sizeof(ur->ur_msg);
 1611 
 1612                         pool_put(&sc->sc_pool, ur);
 1613 
 1614                         count++;
 1615                 }
 1616 
 1617                 bzero(subh, sizeof(*subh));
 1618                 subh->len = sizeof(ur->ur_msg) >> 2;
 1619                 subh->action = PFSYNC_ACT_UPD_REQ;
 1620                 subh->count = htons(count);
 1621         }
 1622 
 1623         /* has someone built a custom region for us to add? */
 1624         if (sn.sn_plus != NULL) {
 1625                 bcopy(sn.sn_plus, m->m_data + offset, sn.sn_pluslen);
 1626                 offset += sn.sn_pluslen;
 1627                 sn.sn_plus = NULL;      /* XXX memory leak ? */
 1628         }
 1629 
 1630 #if defined(IPSEC)
 1631         if (!TAILQ_EMPTY(&sn.sn_tdb_q)) {
 1632                 struct tdb *t;
 1633 
 1634                 subh = (struct pfsync_subheader *)(m->m_data + offset);
 1635                 offset += sizeof(*subh);
 1636 
 1637                 count = 0;
 1638                 while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) {
 1639                         TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_snap);
 1640                         pfsync_out_tdb(t, m->m_data + offset);
 1641                         offset += sizeof(struct pfsync_tdb);
 1642                         mtx_enter(&t->tdb_mtx);
 1643                         KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED));
 1644                         CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED);
 1645                         CLR(t->tdb_flags, TDBF_PFSYNC);
 1646                         mtx_leave(&t->tdb_mtx);
 1647                         tdb_unref(t);
 1648                         count++;
 1649                 }
 1650 
 1651                 bzero(subh, sizeof(*subh));
 1652                 subh->action = PFSYNC_ACT_TDB;
 1653                 subh->len = sizeof(struct pfsync_tdb) >> 2;
 1654                 subh->count = htons(count);
 1655         }
 1656 #endif
 1657 
 1658         /* walk the queues */
 1659         for (q = 0; q < PFSYNC_S_COUNT; q++) {
 1660                 if (TAILQ_EMPTY(&sn.sn_qs[q]))
 1661                         continue;
 1662 
 1663                 subh = (struct pfsync_subheader *)(m->m_data + offset);
 1664                 offset += sizeof(*subh);
 1665 
 1666                 count = 0;
 1667                 while ((st = TAILQ_FIRST(&sn.sn_qs[q])) != NULL) {
 1668                         TAILQ_REMOVE(&sn.sn_qs[q], st, sync_snap);
 1669                         KASSERT(st->sync_state == q);
 1670                         KASSERT(st->snapped == 1);
 1671                         st->sync_state = PFSYNC_S_NONE;
 1672                         st->snapped = 0;
 1673                         pfsync_qs[q].write(st, m->m_data + offset);
 1674                         offset += pfsync_qs[q].len;
 1675 
 1676                         pf_state_unref(st);
 1677                         count++;
 1678                 }
 1679 
 1680                 bzero(subh, sizeof(*subh));
 1681                 subh->action = pfsync_qs[q].action;
 1682                 subh->len = pfsync_qs[q].len >> 2;
 1683                 subh->count = htons(count);
 1684         }
 1685 
 1686         /* we're done, let's put it on the wire */
 1687 #if NBPFILTER > 0
 1688         if (ifp->if_bpf) {
 1689                 m->m_data += sizeof(*ip);
 1690                 m->m_len = m->m_pkthdr.len = sn.sn_len - sizeof(*ip);
 1691                 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
 1692                 m->m_data -= sizeof(*ip);
 1693                 m->m_len = m->m_pkthdr.len = sn.sn_len;
 1694         }
 1695 
 1696         if (sc->sc_sync_ifidx == 0) {
 1697                 sc->sc_len = PFSYNC_MINPKT;
 1698                 m_freem(m);
 1699                 return;
 1700         }
 1701 #endif
 1702 
 1703         sc->sc_if.if_opackets++;
 1704         sc->sc_if.if_obytes += m->m_pkthdr.len;
 1705 
 1706         m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
 1707 
 1708         pfsync_send_pkt(m);
 1709 }
 1710 
 1711 void
 1712 pfsync_insert_state(struct pf_state *st)
 1713 {
 1714         struct pfsync_softc *sc = pfsyncif;
 1715 
 1716         NET_ASSERT_LOCKED();
 1717 
 1718         if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
 1719             st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
 1720                 SET(st->state_flags, PFSTATE_NOSYNC);
 1721                 return;
 1722         }
 1723 
 1724         if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
 1725             ISSET(st->state_flags, PFSTATE_NOSYNC))
 1726                 return;
 1727 
 1728         KASSERT(st->sync_state == PFSYNC_S_NONE);
 1729 
 1730         if (sc->sc_len == PFSYNC_MINPKT)
 1731                 timeout_add_sec(&sc->sc_tmo, 1);
 1732 
 1733         pfsync_q_ins(st, PFSYNC_S_INS);
 1734 
 1735         st->sync_updates = 0;
 1736 }
 1737 
 1738 int
 1739 pfsync_defer(struct pf_state *st, struct mbuf *m, struct pfsync_deferral **ppd)
 1740 {
 1741         struct pfsync_softc *sc = pfsyncif;
 1742         struct pfsync_deferral *pd;
 1743         unsigned int sched;
 1744 
 1745         NET_ASSERT_LOCKED();
 1746 
 1747         if (!sc->sc_defer ||
 1748             ISSET(st->state_flags, PFSTATE_NOSYNC) ||
 1749             m->m_flags & (M_BCAST|M_MCAST))
 1750                 return (0);
 1751 
 1752         pd = pool_get(&sc->sc_pool, M_NOWAIT);
 1753         if (pd == NULL)
 1754                 return (0);
 1755 
 1756         /*
 1757          * deferral queue grows faster, than timeout can consume,
 1758          * we have to ask packet (caller) to help timer and dispatch
 1759          * one deferral for us.
 1760          *
 1761          * We wish to call pfsync_undefer() here. Unfortunately we can't,
 1762          * because pfsync_undefer() will be calling to ip_output(),
 1763          * which in turn will call to pf_test(), which would then attempt
 1764          * to grab PF_LOCK() we currently hold.
 1765          */
 1766         if (sc->sc_deferred >= 128) {
 1767                 mtx_enter(&sc->sc_deferrals_mtx);
 1768                 *ppd = TAILQ_FIRST(&sc->sc_deferrals);
 1769                 if (*ppd != NULL) {
 1770                         TAILQ_REMOVE(&sc->sc_deferrals, *ppd, pd_entry);
 1771                         sc->sc_deferred--;
 1772                 }
 1773                 mtx_leave(&sc->sc_deferrals_mtx);
 1774         } else
 1775                 *ppd = NULL;
 1776 
 1777         m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
 1778         SET(st->state_flags, PFSTATE_ACK);
 1779 
 1780         pd->pd_st = pf_state_ref(st);
 1781         pd->pd_m = m;
 1782 
 1783         pd->pd_deadline = getnsecuptime() + PFSYNC_DEFER_NSEC;
 1784 
 1785         mtx_enter(&sc->sc_deferrals_mtx);
 1786         sched = TAILQ_EMPTY(&sc->sc_deferrals);
 1787 
 1788         TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
 1789         sc->sc_deferred++;
 1790         mtx_leave(&sc->sc_deferrals_mtx);
 1791 
 1792         if (sched)
 1793                 timeout_add_nsec(&sc->sc_deferrals_tmo, PFSYNC_DEFER_NSEC);
 1794 
 1795         schednetisr(NETISR_PFSYNC);
 1796 
 1797         return (1);
 1798 }
 1799 
 1800 void
 1801 pfsync_undefer_notify(struct pfsync_deferral *pd)
 1802 {
 1803         struct pf_pdesc pdesc;
 1804         struct pf_state *st = pd->pd_st;
 1805 
 1806         /*
 1807          * pf_remove_state removes the state keys and sets st->timeout
 1808          * to PFTM_UNLINKED. this is done under NET_LOCK which should
 1809          * be held here, so we can use PFTM_UNLINKED as a test for
 1810          * whether the state keys are set for the address family
 1811          * lookup.
 1812          */
 1813 
 1814         if (st->timeout == PFTM_UNLINKED)
 1815                 return;
 1816 
 1817         if (st->rt == PF_ROUTETO) {
 1818                 if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af,
 1819                     st->direction, st->kif, pd->pd_m, NULL) != PF_PASS)
 1820                         return;
 1821                 switch (st->key[PF_SK_WIRE]->af) {
 1822                 case AF_INET:
 1823                         pf_route(&pdesc, st);
 1824                         break;
 1825 #ifdef INET6
 1826                 case AF_INET6:
 1827                         pf_route6(&pdesc, st);
 1828                         break;
 1829 #endif /* INET6 */
 1830                 default:
 1831                         unhandled_af(st->key[PF_SK_WIRE]->af);
 1832                 }
 1833                 pd->pd_m = pdesc.m;
 1834         } else {
 1835                 switch (st->key[PF_SK_WIRE]->af) {
 1836                 case AF_INET:
 1837                         ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 0);
 1838                         break;
 1839 #ifdef INET6
 1840                 case AF_INET6:
 1841                         ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL);
 1842                         break;
 1843 #endif /* INET6 */
 1844                 default:
 1845                         unhandled_af(st->key[PF_SK_WIRE]->af);
 1846                 }
 1847 
 1848                 pd->pd_m = NULL;
 1849         }
 1850 }
 1851 
 1852 void
 1853 pfsync_free_deferral(struct pfsync_deferral *pd)
 1854 {
 1855         struct pfsync_softc *sc = pfsyncif;
 1856 
 1857         pf_state_unref(pd->pd_st);
 1858         m_freem(pd->pd_m);
 1859         pool_put(&sc->sc_pool, pd);
 1860 }
 1861 
 1862 void
 1863 pfsync_undefer(struct pfsync_deferral *pd, int drop)
 1864 {
 1865         struct pfsync_softc *sc = pfsyncif;
 1866 
 1867         NET_ASSERT_LOCKED();
 1868 
 1869         if (sc == NULL)
 1870                 return;
 1871 
 1872         CLR(pd->pd_st->state_flags, PFSTATE_ACK);
 1873         if (!drop)
 1874                 pfsync_undefer_notify(pd);
 1875 
 1876         pfsync_free_deferral(pd);
 1877 }
 1878 
 1879 void
 1880 pfsync_deferrals_tmo(void *arg)
 1881 {
 1882         struct pfsync_softc *sc = arg;
 1883         struct pfsync_deferral *pd;
 1884         uint64_t now, nsec = 0;
 1885         struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds);
 1886 
 1887         now = getnsecuptime();
 1888 
 1889         mtx_enter(&sc->sc_deferrals_mtx);
 1890         for (;;) {
 1891                 pd = TAILQ_FIRST(&sc->sc_deferrals);
 1892                 if (pd == NULL)
 1893                         break;
 1894 
 1895                 if (now < pd->pd_deadline) {
 1896                         nsec = pd->pd_deadline - now;
 1897                         break;
 1898                 }
 1899 
 1900                 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
 1901                 sc->sc_deferred--;
 1902                 TAILQ_INSERT_TAIL(&pds, pd, pd_entry);
 1903         }
 1904         mtx_leave(&sc->sc_deferrals_mtx);
 1905 
 1906         if (nsec > 0) {
 1907                 /* we were looking at a pd, but it wasn't old enough */
 1908                 timeout_add_nsec(&sc->sc_deferrals_tmo, nsec);
 1909         }
 1910 
 1911         if (TAILQ_EMPTY(&pds))
 1912                 return;
 1913 
 1914         NET_LOCK();
 1915         while ((pd = TAILQ_FIRST(&pds)) != NULL) {
 1916                 TAILQ_REMOVE(&pds, pd, pd_entry);
 1917 
 1918                 pfsync_undefer(pd, 0);
 1919         }
 1920         NET_UNLOCK();
 1921 }
 1922 
 1923 void
 1924 pfsync_deferred(struct pf_state *st, int drop)
 1925 {
 1926         struct pfsync_softc *sc = pfsyncif;
 1927         struct pfsync_deferral *pd;
 1928 
 1929         NET_ASSERT_LOCKED();
 1930 
 1931         mtx_enter(&sc->sc_deferrals_mtx);
 1932         TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
 1933                  if (pd->pd_st == st) {
 1934                         TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
 1935                         sc->sc_deferred--;
 1936                         break;
 1937                 }
 1938         }
 1939         mtx_leave(&sc->sc_deferrals_mtx);
 1940 
 1941         if (pd != NULL)
 1942                 pfsync_undefer(pd, drop);
 1943 }
 1944 
 1945 void
 1946 pfsync_update_state(struct pf_state *st)
 1947 {
 1948         struct pfsync_softc *sc = pfsyncif;
 1949         int sync = 0;
 1950 
 1951         NET_ASSERT_LOCKED();
 1952 
 1953         if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
 1954                 return;
 1955 
 1956         if (ISSET(st->state_flags, PFSTATE_ACK))
 1957                 pfsync_deferred(st, 0);
 1958         if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
 1959                 if (st->sync_state != PFSYNC_S_NONE)
 1960                         pfsync_q_del(st);
 1961                 return;
 1962         }
 1963 
 1964         if (sc->sc_len == PFSYNC_MINPKT)
 1965                 timeout_add_sec(&sc->sc_tmo, 1);
 1966 
 1967         switch (st->sync_state) {
 1968         case PFSYNC_S_UPD_C:
 1969         case PFSYNC_S_UPD:
 1970         case PFSYNC_S_INS:
 1971                 /* we're already handling it */
 1972 
 1973                 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
 1974                         st->sync_updates++;
 1975                         if (st->sync_updates >= sc->sc_maxupdates)
 1976                                 sync = 1;
 1977                 }
 1978                 break;
 1979 
 1980         case PFSYNC_S_IACK:
 1981                 pfsync_q_del(st);
 1982         case PFSYNC_S_NONE:
 1983                 pfsync_q_ins(st, PFSYNC_S_UPD_C);
 1984                 st->sync_updates = 0;
 1985                 break;
 1986 
 1987         default:
 1988                 panic("pfsync_update_state: unexpected sync state %d",
 1989                     st->sync_state);
 1990         }
 1991 
 1992         if (sync || (getuptime() - st->pfsync_time) < 2)
 1993                 schednetisr(NETISR_PFSYNC);
 1994 }
 1995 
 1996 void
 1997 pfsync_cancel_full_update(struct pfsync_softc *sc)
 1998 {
 1999         if (timeout_pending(&sc->sc_bulkfail_tmo) ||
 2000             timeout_pending(&sc->sc_bulk_tmo)) {
 2001 #if NCARP > 0
 2002                 if (!pfsync_sync_ok)
 2003                         carp_group_demote_adj(&sc->sc_if, -1,
 2004                             "pfsync bulk cancelled");
 2005                 if (sc->sc_initial_bulk) {
 2006                         carp_group_demote_adj(&sc->sc_if, -32,
 2007                             "pfsync init");
 2008                         sc->sc_initial_bulk = 0;
 2009                 }
 2010 #endif
 2011                 pfsync_sync_ok = 1;
 2012                 DPFPRINTF(LOG_INFO, "cancelling bulk update");
 2013         }
 2014         timeout_del(&sc->sc_bulkfail_tmo);
 2015         timeout_del(&sc->sc_bulk_tmo);
 2016         sc->sc_bulk_next = NULL;
 2017         sc->sc_bulk_last = NULL;
 2018         sc->sc_ureq_sent = 0;
 2019         sc->sc_bulk_tries = 0;
 2020 }
 2021 
 2022 void
 2023 pfsync_request_full_update(struct pfsync_softc *sc)
 2024 {
 2025         if (sc->sc_sync_ifidx != 0 && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
 2026                 /* Request a full state table update. */
 2027                 sc->sc_ureq_sent = getuptime();
 2028 #if NCARP > 0
 2029                 if (!sc->sc_link_demoted && pfsync_sync_ok)
 2030                         carp_group_demote_adj(&sc->sc_if, 1,
 2031                             "pfsync bulk start");
 2032 #endif
 2033                 pfsync_sync_ok = 0;
 2034                 DPFPRINTF(LOG_INFO, "requesting bulk update");
 2035                 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
 2036                     pf_pool_limits[PF_LIMIT_STATES].limit /
 2037                     ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
 2038                     sizeof(struct pfsync_state)));
 2039                 pfsync_request_update(0, 0);
 2040         }
 2041 }
 2042 
 2043 void
 2044 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
 2045 {
 2046         struct pfsync_softc *sc = pfsyncif;
 2047         struct pfsync_upd_req_item *item;
 2048         size_t nlen, sclen;
 2049         int retry;
 2050 
 2051         /*
 2052          * this code does nothing to prevent multiple update requests for the
 2053          * same state being generated.
 2054          */
 2055 
 2056         item = pool_get(&sc->sc_pool, PR_NOWAIT);
 2057         if (item == NULL) {
 2058                 /* XXX stats */
 2059                 return;
 2060         }
 2061 
 2062         item->ur_msg.id = id;
 2063         item->ur_msg.creatorid = creatorid;
 2064 
 2065         for (;;) {
 2066                 mtx_enter(&sc->sc_upd_req_mtx);
 2067 
 2068                 nlen = sizeof(struct pfsync_upd_req);
 2069                 if (TAILQ_EMPTY(&sc->sc_upd_req_list))
 2070                         nlen += sizeof(struct pfsync_subheader);
 2071 
 2072                 sclen = atomic_add_long_nv(&sc->sc_len, nlen);
 2073                 retry = (sclen > sc->sc_if.if_mtu);
 2074                 if (retry)
 2075                         atomic_sub_long(&sc->sc_len, nlen);
 2076                 else
 2077                         TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
 2078 
 2079                 mtx_leave(&sc->sc_upd_req_mtx);
 2080 
 2081                 if (!retry)
 2082                         break;
 2083 
 2084                 pfsync_sendout();
 2085         }
 2086 
 2087         schednetisr(NETISR_PFSYNC);
 2088 }
 2089 
 2090 void
 2091 pfsync_update_state_req(struct pf_state *st)
 2092 {
 2093         struct pfsync_softc *sc = pfsyncif;
 2094 
 2095         if (sc == NULL)
 2096                 panic("pfsync_update_state_req: nonexistent instance");
 2097 
 2098         if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
 2099                 if (st->sync_state != PFSYNC_S_NONE)
 2100                         pfsync_q_del(st);
 2101                 return;
 2102         }
 2103 
 2104         switch (st->sync_state) {
 2105         case PFSYNC_S_UPD_C:
 2106         case PFSYNC_S_IACK:
 2107                 pfsync_q_del(st);
 2108         case PFSYNC_S_NONE:
 2109                 pfsync_q_ins(st, PFSYNC_S_UPD);
 2110                 schednetisr(NETISR_PFSYNC);
 2111                 return;
 2112 
 2113         case PFSYNC_S_INS:
 2114         case PFSYNC_S_UPD:
 2115         case PFSYNC_S_DEL:
 2116                 /* we're already handling it */
 2117                 return;
 2118 
 2119         default:
 2120                 panic("pfsync_update_state_req: unexpected sync state %d",
 2121                     st->sync_state);
 2122         }
 2123 }
 2124 
 2125 void
 2126 pfsync_delete_state(struct pf_state *st)
 2127 {
 2128         struct pfsync_softc *sc = pfsyncif;
 2129 
 2130         NET_ASSERT_LOCKED();
 2131 
 2132         if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
 2133                 return;
 2134 
 2135         if (ISSET(st->state_flags, PFSTATE_ACK))
 2136                 pfsync_deferred(st, 1);
 2137         if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
 2138                 if (st->sync_state != PFSYNC_S_NONE)
 2139                         pfsync_q_del(st);
 2140                 return;
 2141         }
 2142 
 2143         if (sc->sc_len == PFSYNC_MINPKT)
 2144                 timeout_add_sec(&sc->sc_tmo, 1);
 2145 
 2146         switch (st->sync_state) {
 2147         case PFSYNC_S_INS:
 2148                 /* we never got to tell the world so just forget about it */
 2149                 pfsync_q_del(st);
 2150                 return;
 2151 
 2152         case PFSYNC_S_UPD_C:
 2153         case PFSYNC_S_UPD:
 2154         case PFSYNC_S_IACK:
 2155                 pfsync_q_del(st);
 2156                 /*
 2157                  * FALLTHROUGH to putting it on the del list
 2158                  * Note on reference count bookkeeping:
 2159                  *      pfsync_q_del() drops reference for queue
 2160                  *      ownership. But the st entry survives, because
 2161                  *      our caller still holds a reference.
 2162                  */
 2163 
 2164         case PFSYNC_S_NONE:
 2165                 /*
 2166                  * We either fall through here, or there is no reference to
 2167                  * st owned by pfsync queues at this point.
 2168                  *
 2169                  * Calling pfsync_q_ins() puts st to del queue. The pfsync_q_ins()
 2170                  * grabs a reference for delete queue.
 2171                  */
 2172                 pfsync_q_ins(st, PFSYNC_S_DEL);
 2173                 return;
 2174 
 2175         default:
 2176                 panic("pfsync_delete_state: unexpected sync state %d",
 2177                     st->sync_state);
 2178         }
 2179 }
 2180 
 2181 void
 2182 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
 2183 {
 2184         struct pfsync_softc *sc = pfsyncif;
 2185         struct {
 2186                 struct pfsync_subheader subh;
 2187                 struct pfsync_clr clr;
 2188         } __packed r;
 2189 
 2190         NET_ASSERT_LOCKED();
 2191 
 2192         if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
 2193                 return;
 2194 
 2195         bzero(&r, sizeof(r));
 2196 
 2197         r.subh.action = PFSYNC_ACT_CLR;
 2198         r.subh.len = sizeof(struct pfsync_clr) >> 2;
 2199         r.subh.count = htons(1);
 2200 
 2201         strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
 2202         r.clr.creatorid = creatorid;
 2203 
 2204         pfsync_send_plus(&r, sizeof(r));
 2205 }
 2206 
 2207 void
 2208 pfsync_iack(struct pf_state *st)
 2209 {
 2210         pfsync_q_ins(st, PFSYNC_S_IACK);
 2211         schednetisr(NETISR_PFSYNC);
 2212 }
 2213 
 2214 void
 2215 pfsync_q_ins(struct pf_state *st, int q)
 2216 {
 2217         struct pfsync_softc *sc = pfsyncif;
 2218         size_t nlen, sclen;
 2219 
 2220         if (sc->sc_len < PFSYNC_MINPKT)
 2221                 panic("pfsync pkt len is too low %zd", sc->sc_len);
 2222         do {
 2223                 mtx_enter(&sc->sc_st_mtx);
 2224 
 2225                 /*
 2226                  * There are either two threads trying to update the
 2227                  * the same state, or the state is just being processed
 2228                  * (is on snapshot queue).
 2229                  */
 2230                 if (st->sync_state != PFSYNC_S_NONE) {
 2231                         mtx_leave(&sc->sc_st_mtx);
 2232                         break;
 2233                 }
 2234 
 2235                 nlen = pfsync_qs[q].len;
 2236 
 2237                 if (TAILQ_EMPTY(&sc->sc_qs[q]))
 2238                         nlen += sizeof(struct pfsync_subheader);
 2239 
 2240                 sclen = atomic_add_long_nv(&sc->sc_len, nlen);
 2241                 if (sclen > sc->sc_if.if_mtu) {
 2242                         atomic_sub_long(&sc->sc_len, nlen);
 2243                         mtx_leave(&sc->sc_st_mtx);
 2244                         pfsync_sendout();
 2245                         continue;
 2246                 }
 2247 
 2248                 pf_state_ref(st);
 2249 
 2250                 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
 2251                 st->sync_state = q;
 2252                 mtx_leave(&sc->sc_st_mtx);
 2253         } while (0);
 2254 }
 2255 
 2256 void
 2257 pfsync_q_del(struct pf_state *st)
 2258 {
 2259         struct pfsync_softc *sc = pfsyncif;
 2260         int q;
 2261 
 2262         KASSERT(st->sync_state != PFSYNC_S_NONE);
 2263 
 2264         mtx_enter(&sc->sc_st_mtx);
 2265         q = st->sync_state;
 2266         /*
 2267          * re-check under mutex
 2268          * if state is snapped already, then just bail out, because we came
 2269          * too late, the state is being just processed/dispatched to peer.
 2270          */
 2271         if ((q == PFSYNC_S_NONE) || (st->snapped)) {
 2272                 mtx_leave(&sc->sc_st_mtx);
 2273                 return;
 2274         }
 2275         atomic_sub_long(&sc->sc_len, pfsync_qs[q].len);
 2276         TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
 2277         if (TAILQ_EMPTY(&sc->sc_qs[q]))
 2278                 atomic_sub_long(&sc->sc_len, sizeof (struct pfsync_subheader));
 2279         st->sync_state = PFSYNC_S_NONE;
 2280         mtx_leave(&sc->sc_st_mtx);
 2281 
 2282         pf_state_unref(st);
 2283 }
 2284 
 2285 #if defined(IPSEC)
 2286 void
 2287 pfsync_update_tdb(struct tdb *t, int output)
 2288 {
 2289         struct pfsync_softc *sc = pfsyncif;
 2290         size_t nlen, sclen;
 2291 
 2292         if (sc == NULL)
 2293                 return;
 2294 
 2295         if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
 2296                 do {
 2297                         mtx_enter(&sc->sc_tdb_mtx);
 2298                         nlen = sizeof(struct pfsync_tdb);
 2299 
 2300                         mtx_enter(&t->tdb_mtx);
 2301                         if (ISSET(t->tdb_flags, TDBF_PFSYNC)) {
 2302                                 /* we've lost race, no action for us then */
 2303                                 mtx_leave(&t->tdb_mtx);
 2304                                 mtx_leave(&sc->sc_tdb_mtx);
 2305                                 break;
 2306                         }
 2307 
 2308                         if (TAILQ_EMPTY(&sc->sc_tdb_q))
 2309                                 nlen += sizeof(struct pfsync_subheader);
 2310 
 2311                         sclen = atomic_add_long_nv(&sc->sc_len, nlen);
 2312                         if (sclen > sc->sc_if.if_mtu) {
 2313                                 atomic_sub_long(&sc->sc_len, nlen);
 2314                                 mtx_leave(&t->tdb_mtx);
 2315                                 mtx_leave(&sc->sc_tdb_mtx);
 2316                                 pfsync_sendout();
 2317                                 continue;
 2318                         }
 2319 
 2320                         TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
 2321                         tdb_ref(t);
 2322                         SET(t->tdb_flags, TDBF_PFSYNC);
 2323                         mtx_leave(&t->tdb_mtx);
 2324 
 2325                         mtx_leave(&sc->sc_tdb_mtx);
 2326                         t->tdb_updates = 0;
 2327                 } while (0);
 2328         } else {
 2329                 if (++t->tdb_updates >= sc->sc_maxupdates)
 2330                         schednetisr(NETISR_PFSYNC);
 2331         }
 2332 
 2333         mtx_enter(&t->tdb_mtx);
 2334         if (output)
 2335                 SET(t->tdb_flags, TDBF_PFSYNC_RPL);
 2336         else
 2337                 CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
 2338         mtx_leave(&t->tdb_mtx);
 2339 }
 2340 #endif
 2341 
 2342 #if defined(IPSEC)
 2343 void
 2344 pfsync_delete_tdb(struct tdb *t)
 2345 {
 2346         struct pfsync_softc *sc = pfsyncif;
 2347         size_t nlen;
 2348 
 2349         if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
 2350                 return;
 2351 
 2352         mtx_enter(&sc->sc_tdb_mtx);
 2353 
 2354         /*
 2355          * if tdb entry is just being processed (found in snapshot),
 2356          * then it can not be deleted. we just came too late
 2357          */
 2358         if (ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)) {
 2359                 mtx_leave(&sc->sc_tdb_mtx);
 2360                 return;
 2361         }
 2362 
 2363         TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
 2364 
 2365         mtx_enter(&t->tdb_mtx);
 2366         CLR(t->tdb_flags, TDBF_PFSYNC);
 2367         mtx_leave(&t->tdb_mtx);
 2368 
 2369         nlen = sizeof(struct pfsync_tdb);
 2370         if (TAILQ_EMPTY(&sc->sc_tdb_q))
 2371                 nlen += sizeof(struct pfsync_subheader);
 2372         atomic_sub_long(&sc->sc_len, nlen);
 2373 
 2374         mtx_leave(&sc->sc_tdb_mtx);
 2375 
 2376         tdb_unref(t);
 2377 }
 2378 #endif
 2379 
 2380 void
 2381 pfsync_out_tdb(struct tdb *t, void *buf)
 2382 {
 2383         struct pfsync_tdb *ut = buf;
 2384 
 2385         bzero(ut, sizeof(*ut));
 2386         ut->spi = t->tdb_spi;
 2387         bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
 2388         /*
 2389          * When a failover happens, the master's rpl is probably above
 2390          * what we see here (we may be up to a second late), so
 2391          * increase it a bit for outbound tdbs to manage most such
 2392          * situations.
 2393          *
 2394          * For now, just add an offset that is likely to be larger
 2395          * than the number of packets we can see in one second. The RFC
 2396          * just says the next packet must have a higher seq value.
 2397          *
 2398          * XXX What is a good algorithm for this? We could use
 2399          * a rate-determined increase, but to know it, we would have
 2400          * to extend struct tdb.
 2401          * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
 2402          * will soon be replaced anyway. For now, just don't handle
 2403          * this edge case.
 2404          */
 2405 #define RPL_INCR 16384
 2406         ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
 2407             RPL_INCR : 0));
 2408         ut->cur_bytes = htobe64(t->tdb_cur_bytes);
 2409         ut->sproto = t->tdb_sproto;
 2410         ut->rdomain = htons(t->tdb_rdomain);
 2411 }
 2412 
 2413 void
 2414 pfsync_bulk_start(void)
 2415 {
 2416         struct pfsync_softc *sc = pfsyncif;
 2417 
 2418         NET_ASSERT_LOCKED();
 2419 
 2420         /*
 2421          * pf gc via pfsync_state_in_use reads sc_bulk_next and
 2422          * sc_bulk_last while exclusively holding the pf_state_list
 2423          * rwlock. make sure it can't race with us setting these
 2424          * pointers. they basically act as hazards, and borrow the
 2425          * lists state reference count.
 2426          */
 2427         rw_enter_read(&pf_state_list.pfs_rwl);
 2428 
 2429         /* get a consistent view of the list pointers */
 2430         mtx_enter(&pf_state_list.pfs_mtx);
 2431         if (sc->sc_bulk_next == NULL)
 2432                 sc->sc_bulk_next = TAILQ_FIRST(&pf_state_list.pfs_list);
 2433 
 2434         sc->sc_bulk_last = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
 2435         mtx_leave(&pf_state_list.pfs_mtx);
 2436 
 2437         rw_exit_read(&pf_state_list.pfs_rwl);
 2438 
 2439         DPFPRINTF(LOG_INFO, "received bulk update request");
 2440 
 2441         if (sc->sc_bulk_last == NULL)
 2442                 pfsync_bulk_status(PFSYNC_BUS_END);
 2443         else {
 2444                 sc->sc_ureq_received = getuptime();
 2445 
 2446                 pfsync_bulk_status(PFSYNC_BUS_START);
 2447                 timeout_add(&sc->sc_bulk_tmo, 0);
 2448         }
 2449 }
 2450 
 2451 void
 2452 pfsync_bulk_update(void *arg)
 2453 {
 2454         struct pfsync_softc *sc;
 2455         struct pf_state *st;
 2456         int i = 0;
 2457 
 2458         NET_LOCK();
 2459         sc = pfsyncif;
 2460         if (sc == NULL)
 2461                 goto out;
 2462 
 2463         rw_enter_read(&pf_state_list.pfs_rwl);
 2464         st = sc->sc_bulk_next;
 2465         sc->sc_bulk_next = NULL;
 2466 
 2467         for (;;) {
 2468                 if (st->sync_state == PFSYNC_S_NONE &&
 2469                     st->timeout < PFTM_MAX &&
 2470                     st->pfsync_time <= sc->sc_ureq_received) {
 2471                         pfsync_update_state_req(st);
 2472                         i++;
 2473                 }
 2474 
 2475                 st = TAILQ_NEXT(st, entry_list);
 2476                 if ((st == NULL) || (st == sc->sc_bulk_last)) {
 2477                         /* we're done */
 2478                         sc->sc_bulk_last = NULL;
 2479                         pfsync_bulk_status(PFSYNC_BUS_END);
 2480                         break;
 2481                 }
 2482 
 2483                 if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) <
 2484                     sizeof(struct pfsync_state)) {
 2485                         /* we've filled a packet */
 2486                         sc->sc_bulk_next = st;
 2487                         timeout_add(&sc->sc_bulk_tmo, 1);
 2488                         break;
 2489                 }
 2490         }
 2491 
 2492         rw_exit_read(&pf_state_list.pfs_rwl);
 2493  out:
 2494         NET_UNLOCK();
 2495 }
 2496 
 2497 void
 2498 pfsync_bulk_status(u_int8_t status)
 2499 {
 2500         struct {
 2501                 struct pfsync_subheader subh;
 2502                 struct pfsync_bus bus;
 2503         } __packed r;
 2504 
 2505         struct pfsync_softc *sc = pfsyncif;
 2506 
 2507         bzero(&r, sizeof(r));
 2508 
 2509         r.subh.action = PFSYNC_ACT_BUS;
 2510         r.subh.len = sizeof(struct pfsync_bus) >> 2;
 2511         r.subh.count = htons(1);
 2512 
 2513         r.bus.creatorid = pf_status.hostid;
 2514         r.bus.endtime = htonl(getuptime() - sc->sc_ureq_received);
 2515         r.bus.status = status;
 2516 
 2517         pfsync_send_plus(&r, sizeof(r));
 2518 }
 2519 
 2520 void
 2521 pfsync_bulk_fail(void *arg)
 2522 {
 2523         struct pfsync_softc *sc;
 2524 
 2525         NET_LOCK();
 2526         sc = pfsyncif;
 2527         if (sc == NULL)
 2528                 goto out;
 2529         if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
 2530                 /* Try again */
 2531                 timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
 2532                 pfsync_request_update(0, 0);
 2533         } else {
 2534                 /* Pretend like the transfer was ok */
 2535                 sc->sc_ureq_sent = 0;
 2536                 sc->sc_bulk_tries = 0;
 2537 #if NCARP > 0
 2538                 if (!pfsync_sync_ok)
 2539                         carp_group_demote_adj(&sc->sc_if, -1,
 2540                             sc->sc_link_demoted ?
 2541                             "pfsync link state up" :
 2542                             "pfsync bulk fail");
 2543                 if (sc->sc_initial_bulk) {
 2544                         carp_group_demote_adj(&sc->sc_if, -32,
 2545                             "pfsync init");
 2546                         sc->sc_initial_bulk = 0;
 2547                 }
 2548 #endif
 2549                 pfsync_sync_ok = 1;
 2550                 sc->sc_link_demoted = 0;
 2551                 DPFPRINTF(LOG_ERR, "failed to receive bulk update");
 2552         }
 2553  out:
 2554         NET_UNLOCK();
 2555 }
 2556 
 2557 void
 2558 pfsync_send_plus(void *plus, size_t pluslen)
 2559 {
 2560         struct pfsync_softc *sc = pfsyncif;
 2561 
 2562         if (sc->sc_len + pluslen > sc->sc_if.if_mtu)
 2563                 pfsync_sendout();
 2564 
 2565         sc->sc_plus = plus;
 2566         sc->sc_pluslen = pluslen;
 2567         atomic_add_long(&sc->sc_len, pluslen);
 2568 
 2569         pfsync_sendout();
 2570 }
 2571 
 2572 int
 2573 pfsync_is_up(void)
 2574 {
 2575         struct pfsync_softc *sc = pfsyncif;
 2576 
 2577         if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
 2578                 return (0);
 2579 
 2580         return (1);
 2581 }
 2582 
 2583 int
 2584 pfsync_state_in_use(struct pf_state *st)
 2585 {
 2586         struct pfsync_softc *sc = pfsyncif;
 2587 
 2588         if (sc == NULL)
 2589                 return (0);
 2590 
 2591         rw_assert_wrlock(&pf_state_list.pfs_rwl);
 2592 
 2593         if (st->sync_state != PFSYNC_S_NONE ||
 2594             st == sc->sc_bulk_next ||
 2595             st == sc->sc_bulk_last)
 2596                 return (1);
 2597 
 2598         return (0);
 2599 }
 2600 
 2601 void
 2602 pfsync_timeout(void *arg)
 2603 {
 2604         NET_LOCK();
 2605         pfsync_sendout();
 2606         NET_UNLOCK();
 2607 }
 2608 
 2609 /* this is a softnet/netisr handler */
 2610 void
 2611 pfsyncintr(void)
 2612 {
 2613         pfsync_sendout();
 2614 }
 2615 
 2616 int
 2617 pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp)
 2618 {
 2619         struct pfsyncstats pfsyncstat;
 2620 
 2621         CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t)));
 2622         memset(&pfsyncstat, 0, sizeof pfsyncstat);
 2623         counters_read(pfsynccounters, (uint64_t *)&pfsyncstat,
 2624             pfsyncs_ncounters);
 2625         return (sysctl_rdstruct(oldp, oldlenp, newp,
 2626             &pfsyncstat, sizeof(pfsyncstat)));
 2627 }
 2628 
 2629 int
 2630 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
 2631     size_t newlen)
 2632 {
 2633         /* All sysctl names at this level are terminal. */
 2634         if (namelen != 1)
 2635                 return (ENOTDIR);
 2636 
 2637         switch (name[0]) {
 2638         case PFSYNCCTL_STATS:
 2639                 return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp));
 2640         default:
 2641                 return (ENOPROTOOPT);
 2642         }
 2643 }

Cache object: 49ec750069ed8df5f5603a2f068ffc3b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.