The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_carp.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2002 Michael Shalayeff.
    5  * Copyright (c) 2003 Ryan McBride.
    6  * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
    7  * All rights reserved.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   21  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
   22  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   24  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
   26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
   27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
   28  * THE POSSIBILITY OF SUCH DAMAGE.
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include "opt_bpf.h"
   35 #include "opt_inet.h"
   36 #include "opt_inet6.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/devctl.h>
   41 #include <sys/jail.h>
   42 #include <sys/kernel.h>
   43 #include <sys/limits.h>
   44 #include <sys/malloc.h>
   45 #include <sys/mbuf.h>
   46 #include <sys/module.h>
   47 #include <sys/priv.h>
   48 #include <sys/proc.h>
   49 #include <sys/socket.h>
   50 #include <sys/sockio.h>
   51 #include <sys/sysctl.h>
   52 #include <sys/syslog.h>
   53 #include <sys/taskqueue.h>
   54 #include <sys/counter.h>
   55 
   56 #include <net/ethernet.h>
   57 #include <net/if.h>
   58 #include <net/if_var.h>
   59 #include <net/if_dl.h>
   60 #include <net/if_llatbl.h>
   61 #include <net/if_types.h>
   62 #include <net/route.h>
   63 #include <net/vnet.h>
   64 
   65 #if defined(INET) || defined(INET6)
   66 #include <netinet/in.h>
   67 #include <netinet/in_var.h>
   68 #include <netinet/ip_carp.h>
   69 #include <netinet/ip.h>
   70 #include <machine/in_cksum.h>
   71 #endif
   72 #ifdef INET
   73 #include <netinet/ip_var.h>
   74 #include <netinet/if_ether.h>
   75 #endif
   76 
   77 #ifdef INET6
   78 #include <netinet/icmp6.h>
   79 #include <netinet/ip6.h>
   80 #include <netinet6/in6_var.h>
   81 #include <netinet6/ip6_var.h>
   82 #include <netinet6/scope6_var.h>
   83 #include <netinet6/nd6.h>
   84 #endif
   85 
   86 #include <crypto/sha1.h>
   87 
   88 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
   89 
   90 struct carp_softc {
   91         struct ifnet            *sc_carpdev;    /* Pointer to parent ifnet. */
   92         struct ifaddr           **sc_ifas;      /* Our ifaddrs. */
   93         struct sockaddr_dl      sc_addr;        /* Our link level address. */
   94         struct callout          sc_ad_tmo;      /* Advertising timeout. */
   95 #ifdef INET
   96         struct callout          sc_md_tmo;      /* Master down timeout. */
   97 #endif
   98 #ifdef INET6
   99         struct callout          sc_md6_tmo;     /* XXX: Master down timeout. */
  100 #endif
  101         struct mtx              sc_mtx;
  102 
  103         int                     sc_vhid;
  104         int                     sc_advskew;
  105         int                     sc_advbase;
  106 
  107         int                     sc_naddrs;
  108         int                     sc_naddrs6;
  109         int                     sc_ifasiz;
  110         enum { INIT = 0, BACKUP, MASTER }       sc_state;
  111         int                     sc_suppress;
  112         int                     sc_sendad_errors;
  113 #define CARP_SENDAD_MAX_ERRORS  3
  114         int                     sc_sendad_success;
  115 #define CARP_SENDAD_MIN_SUCCESS 3
  116 
  117         int                     sc_init_counter;
  118         uint64_t                sc_counter;
  119 
  120         /* authentication */
  121 #define CARP_HMAC_PAD   64
  122         unsigned char sc_key[CARP_KEY_LEN];
  123         unsigned char sc_pad[CARP_HMAC_PAD];
  124         SHA1_CTX sc_sha1;
  125 
  126         TAILQ_ENTRY(carp_softc) sc_list;        /* On the carp_if list. */
  127         LIST_ENTRY(carp_softc)  sc_next;        /* On the global list. */
  128 };
  129 
  130 struct carp_if {
  131 #ifdef INET
  132         int     cif_naddrs;
  133 #endif
  134 #ifdef INET6
  135         int     cif_naddrs6;
  136 #endif
  137         TAILQ_HEAD(, carp_softc) cif_vrs;
  138 #ifdef INET
  139         struct ip_moptions       cif_imo;
  140 #endif
  141 #ifdef INET6
  142         struct ip6_moptions      cif_im6o;
  143 #endif
  144         struct ifnet    *cif_ifp;
  145         struct mtx      cif_mtx;
  146         uint32_t        cif_flags;
  147 #define CIF_PROMISC     0x00000001
  148 };
  149 
  150 /*
  151  * Brief design of carp(4).
  152  *
  153  * Any carp-capable ifnet may have a list of carp softcs hanging off
  154  * its ifp->if_carp pointer. Each softc represents one unique virtual
  155  * host id, or vhid. The softc has a back pointer to the ifnet. All
  156  * softcs are joined in a global list, which has quite limited use.
  157  *
  158  * Any interface address that takes part in CARP negotiation has a
  159  * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
  160  * AF_INET or AF_INET6 address.
  161  *
  162  * Although, one can get the softc's backpointer to ifnet and traverse
  163  * through its ifp->if_addrhead queue to find all interface addresses
  164  * involved in CARP, we keep a growable array of ifaddr pointers. This
  165  * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
  166  * do calls into the network stack, thus avoiding LORs.
  167  *
  168  * Locking:
  169  *
  170  * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
  171  * callout-driven events and ioctl()s.
  172  *
  173  * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx.
  174  * To traverse the global list we use the mutex carp_mtx.
  175  *
  176  * Known issues with locking:
  177  *
  178  * - Sending ad, we put the pointer to the softc in an mtag, and no reference
  179  *   counting is done on the softc.
  180  * - On module unload we may race (?) with packet processing thread
  181  *   dereferencing our function pointers.
  182  */
  183 
  184 /* Accept incoming CARP packets. */
  185 VNET_DEFINE_STATIC(int, carp_allow) = 1;
  186 #define V_carp_allow    VNET(carp_allow)
  187 
  188 /* Set DSCP in outgoing CARP packets. */
  189 VNET_DEFINE_STATIC(int, carp_dscp) = 56;
  190 #define V_carp_dscp     VNET(carp_dscp)
  191 
  192 /* Preempt slower nodes. */
  193 VNET_DEFINE_STATIC(int, carp_preempt) = 0;
  194 #define V_carp_preempt  VNET(carp_preempt)
  195 
  196 /* Log level. */
  197 VNET_DEFINE_STATIC(int, carp_log) = 1;
  198 #define V_carp_log      VNET(carp_log)
  199 
  200 /* Global advskew demotion. */
  201 VNET_DEFINE_STATIC(int, carp_demotion) = 0;
  202 #define V_carp_demotion VNET(carp_demotion)
  203 
  204 /* Send error demotion factor. */
  205 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW;
  206 #define V_carp_senderr_adj      VNET(carp_senderr_adj)
  207 
  208 /* Iface down demotion factor. */
  209 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW;
  210 #define V_carp_ifdown_adj       VNET(carp_ifdown_adj)
  211 
  212 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS);
  213 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS);
  214 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
  215 
  216 SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  217     "CARP");
  218 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow,
  219     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
  220     0, 0, carp_allow_sysctl, "I",
  221     "Accept incoming CARP packets");
  222 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp,
  223     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
  224     0, 0, carp_dscp_sysctl, "I",
  225     "DSCP value for carp packets");
  226 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
  227     &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
  228 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
  229     &VNET_NAME(carp_log), 0, "CARP log level");
  230 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion,
  231     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
  232     0, 0, carp_demote_adj_sysctl, "I",
  233     "Adjust demotion factor (skew of advskew)");
  234 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor,
  235     CTLFLAG_VNET | CTLFLAG_RW,
  236     &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
  237 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor,
  238     CTLFLAG_VNET | CTLFLAG_RW,
  239     &VNET_NAME(carp_ifdown_adj), 0,
  240     "Interface down demotion factor adjustment");
  241 
  242 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
  243 VNET_PCPUSTAT_SYSINIT(carpstats);
  244 VNET_PCPUSTAT_SYSUNINIT(carpstats);
  245 
  246 #define CARPSTATS_ADD(name, val)        \
  247     counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
  248         sizeof(uint64_t)], (val))
  249 #define CARPSTATS_INC(name)             CARPSTATS_ADD(name, 1)
  250 
  251 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
  252     carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
  253 
  254 #define CARP_LOCK_INIT(sc)      mtx_init(&(sc)->sc_mtx, "carp_softc",   \
  255         NULL, MTX_DEF)
  256 #define CARP_LOCK_DESTROY(sc)   mtx_destroy(&(sc)->sc_mtx)
  257 #define CARP_LOCK_ASSERT(sc)    mtx_assert(&(sc)->sc_mtx, MA_OWNED)
  258 #define CARP_LOCK(sc)           mtx_lock(&(sc)->sc_mtx)
  259 #define CARP_UNLOCK(sc)         mtx_unlock(&(sc)->sc_mtx)
  260 #define CIF_LOCK_INIT(cif)      mtx_init(&(cif)->cif_mtx, "carp_if",   \
  261         NULL, MTX_DEF)
  262 #define CIF_LOCK_DESTROY(cif)   mtx_destroy(&(cif)->cif_mtx)
  263 #define CIF_LOCK_ASSERT(cif)    mtx_assert(&(cif)->cif_mtx, MA_OWNED)
  264 #define CIF_LOCK(cif)           mtx_lock(&(cif)->cif_mtx)
  265 #define CIF_UNLOCK(cif)         mtx_unlock(&(cif)->cif_mtx)
  266 #define CIF_FREE(cif)   do {                            \
  267                 CIF_LOCK(cif);                          \
  268                 if (TAILQ_EMPTY(&(cif)->cif_vrs))       \
  269                         carp_free_if(cif);              \
  270                 else                                    \
  271                         CIF_UNLOCK(cif);                \
  272 } while (0)
  273 
  274 #define CARP_LOG(...)   do {                            \
  275         if (V_carp_log > 0)                             \
  276                 log(LOG_INFO, "carp: " __VA_ARGS__);    \
  277 } while (0)
  278 
  279 #define CARP_DEBUG(...) do {                            \
  280         if (V_carp_log > 1)                             \
  281                 log(LOG_DEBUG, __VA_ARGS__);            \
  282 } while (0)
  283 
  284 #define IFNET_FOREACH_IFA(ifp, ifa)                                     \
  285         CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \
  286                 if ((ifa)->ifa_carp != NULL)
  287 
  288 #define CARP_FOREACH_IFA(sc, ifa)                                       \
  289         CARP_LOCK_ASSERT(sc);                                           \
  290         for (int _i = 0;                                                \
  291                 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&              \
  292                 ((ifa) = sc->sc_ifas[_i]) != NULL;                      \
  293                 ++_i)
  294 
  295 #define IFNET_FOREACH_CARP(ifp, sc)                                     \
  296         KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) ||                    \
  297             sx_xlocked(&carp_sx), ("cif_vrs not locked"));              \
  298         TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
  299 
  300 #define DEMOTE_ADVSKEW(sc)                                      \
  301     (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ?      \
  302     CARP_MAXSKEW :                                              \
  303         (((sc)->sc_advskew + V_carp_demotion < 0) ?             \
  304         0 : ((sc)->sc_advskew + V_carp_demotion)))
  305 
  306 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
  307 static struct carp_softc
  308                 *carp_alloc(struct ifnet *);
  309 static void     carp_destroy(struct carp_softc *);
  310 static struct carp_if
  311                 *carp_alloc_if(struct ifnet *);
  312 static void     carp_free_if(struct carp_if *);
  313 static void     carp_set_state(struct carp_softc *, int, const char* reason);
  314 static void     carp_sc_state(struct carp_softc *);
  315 static void     carp_setrun(struct carp_softc *, sa_family_t);
  316 static void     carp_master_down(void *);
  317 static void     carp_master_down_locked(struct carp_softc *,
  318                     const char* reason);
  319 static void     carp_send_ad(void *);
  320 static void     carp_send_ad_locked(struct carp_softc *);
  321 static void     carp_addroute(struct carp_softc *);
  322 static void     carp_ifa_addroute(struct ifaddr *);
  323 static void     carp_delroute(struct carp_softc *);
  324 static void     carp_ifa_delroute(struct ifaddr *);
  325 static void     carp_send_ad_all(void *, int);
  326 static void     carp_demote_adj(int, char *);
  327 
  328 static LIST_HEAD(, carp_softc) carp_list;
  329 static struct mtx carp_mtx;
  330 static struct sx carp_sx;
  331 static struct task carp_sendall_task =
  332     TASK_INITIALIZER(0, carp_send_ad_all, NULL);
  333 
  334 static void
  335 carp_hmac_prepare(struct carp_softc *sc)
  336 {
  337         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
  338         uint8_t vhid = sc->sc_vhid & 0xff;
  339         struct ifaddr *ifa;
  340         int i, found;
  341 #ifdef INET
  342         struct in_addr last, cur, in;
  343 #endif
  344 #ifdef INET6
  345         struct in6_addr last6, cur6, in6;
  346 #endif
  347 
  348         CARP_LOCK_ASSERT(sc);
  349 
  350         /* Compute ipad from key. */
  351         bzero(sc->sc_pad, sizeof(sc->sc_pad));
  352         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
  353         for (i = 0; i < sizeof(sc->sc_pad); i++)
  354                 sc->sc_pad[i] ^= 0x36;
  355 
  356         /* Precompute first part of inner hash. */
  357         SHA1Init(&sc->sc_sha1);
  358         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
  359         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
  360         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
  361         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
  362 #ifdef INET
  363         cur.s_addr = 0;
  364         do {
  365                 found = 0;
  366                 last = cur;
  367                 cur.s_addr = 0xffffffff;
  368                 CARP_FOREACH_IFA(sc, ifa) {
  369                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
  370                         if (ifa->ifa_addr->sa_family == AF_INET &&
  371                             ntohl(in.s_addr) > ntohl(last.s_addr) &&
  372                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
  373                                 cur.s_addr = in.s_addr;
  374                                 found++;
  375                         }
  376                 }
  377                 if (found)
  378                         SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
  379         } while (found);
  380 #endif /* INET */
  381 #ifdef INET6
  382         memset(&cur6, 0, sizeof(cur6));
  383         do {
  384                 found = 0;
  385                 last6 = cur6;
  386                 memset(&cur6, 0xff, sizeof(cur6));
  387                 CARP_FOREACH_IFA(sc, ifa) {
  388                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
  389                         if (IN6_IS_SCOPE_EMBED(&in6))
  390                                 in6.s6_addr16[1] = 0;
  391                         if (ifa->ifa_addr->sa_family == AF_INET6 &&
  392                             memcmp(&in6, &last6, sizeof(in6)) > 0 &&
  393                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
  394                                 cur6 = in6;
  395                                 found++;
  396                         }
  397                 }
  398                 if (found)
  399                         SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
  400         } while (found);
  401 #endif /* INET6 */
  402 
  403         /* convert ipad to opad */
  404         for (i = 0; i < sizeof(sc->sc_pad); i++)
  405                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
  406 }
  407 
  408 static void
  409 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
  410     unsigned char md[20])
  411 {
  412         SHA1_CTX sha1ctx;
  413 
  414         CARP_LOCK_ASSERT(sc);
  415 
  416         /* fetch first half of inner hash */
  417         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
  418 
  419         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
  420         SHA1Final(md, &sha1ctx);
  421 
  422         /* outer hash */
  423         SHA1Init(&sha1ctx);
  424         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
  425         SHA1Update(&sha1ctx, md, 20);
  426         SHA1Final(md, &sha1ctx);
  427 }
  428 
  429 static int
  430 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
  431     unsigned char md[20])
  432 {
  433         unsigned char md2[20];
  434 
  435         CARP_LOCK_ASSERT(sc);
  436 
  437         carp_hmac_generate(sc, counter, md2);
  438 
  439         return (bcmp(md, md2, sizeof(md2)));
  440 }
  441 
  442 /*
  443  * process input packet.
  444  * we have rearranged checks order compared to the rfc,
  445  * but it seems more efficient this way or not possible otherwise.
  446  */
  447 #ifdef INET
  448 static int
  449 carp_input(struct mbuf **mp, int *offp, int proto)
  450 {
  451         struct mbuf *m = *mp;
  452         struct ip *ip = mtod(m, struct ip *);
  453         struct carp_header *ch;
  454         int iplen, len;
  455 
  456         iplen = *offp;
  457         *mp = NULL;
  458 
  459         CARPSTATS_INC(carps_ipackets);
  460 
  461         if (!V_carp_allow) {
  462                 m_freem(m);
  463                 return (IPPROTO_DONE);
  464         }
  465 
  466         /* verify that the IP TTL is 255.  */
  467         if (ip->ip_ttl != CARP_DFLTTL) {
  468                 CARPSTATS_INC(carps_badttl);
  469                 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
  470                     ip->ip_ttl,
  471                     m->m_pkthdr.rcvif->if_xname);
  472                 m_freem(m);
  473                 return (IPPROTO_DONE);
  474         }
  475 
  476         iplen = ip->ip_hl << 2;
  477 
  478         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
  479                 CARPSTATS_INC(carps_badlen);
  480                 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
  481                     "on %s\n", __func__, m->m_len - sizeof(struct ip),
  482                     m->m_pkthdr.rcvif->if_xname);
  483                 m_freem(m);
  484                 return (IPPROTO_DONE);
  485         }
  486 
  487         if (iplen + sizeof(*ch) < m->m_len) {
  488                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
  489                         CARPSTATS_INC(carps_hdrops);
  490                         CARP_DEBUG("%s: pullup failed\n", __func__);
  491                         return (IPPROTO_DONE);
  492                 }
  493                 ip = mtod(m, struct ip *);
  494         }
  495         ch = (struct carp_header *)((char *)ip + iplen);
  496 
  497         /*
  498          * verify that the received packet length is
  499          * equal to the CARP header
  500          */
  501         len = iplen + sizeof(*ch);
  502         if (len > m->m_pkthdr.len) {
  503                 CARPSTATS_INC(carps_badlen);
  504                 CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
  505                     m->m_pkthdr.len,
  506                     m->m_pkthdr.rcvif->if_xname);
  507                 m_freem(m);
  508                 return (IPPROTO_DONE);
  509         }
  510 
  511         if ((m = m_pullup(m, len)) == NULL) {
  512                 CARPSTATS_INC(carps_hdrops);
  513                 return (IPPROTO_DONE);
  514         }
  515         ip = mtod(m, struct ip *);
  516         ch = (struct carp_header *)((char *)ip + iplen);
  517 
  518         /* verify the CARP checksum */
  519         m->m_data += iplen;
  520         if (in_cksum(m, len - iplen)) {
  521                 CARPSTATS_INC(carps_badsum);
  522                 CARP_DEBUG("%s: checksum failed on %s\n", __func__,
  523                     m->m_pkthdr.rcvif->if_xname);
  524                 m_freem(m);
  525                 return (IPPROTO_DONE);
  526         }
  527         m->m_data -= iplen;
  528 
  529         carp_input_c(m, ch, AF_INET);
  530         return (IPPROTO_DONE);
  531 }
  532 #endif
  533 
  534 #ifdef INET6
  535 static int
  536 carp6_input(struct mbuf **mp, int *offp, int proto)
  537 {
  538         struct mbuf *m = *mp;
  539         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
  540         struct carp_header *ch;
  541         u_int len;
  542 
  543         CARPSTATS_INC(carps_ipackets6);
  544 
  545         if (!V_carp_allow) {
  546                 m_freem(m);
  547                 return (IPPROTO_DONE);
  548         }
  549 
  550         /* check if received on a valid carp interface */
  551         if (m->m_pkthdr.rcvif->if_carp == NULL) {
  552                 CARPSTATS_INC(carps_badif);
  553                 CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
  554                     __func__, m->m_pkthdr.rcvif->if_xname);
  555                 m_freem(m);
  556                 return (IPPROTO_DONE);
  557         }
  558 
  559         /* verify that the IP TTL is 255 */
  560         if (ip6->ip6_hlim != CARP_DFLTTL) {
  561                 CARPSTATS_INC(carps_badttl);
  562                 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
  563                     ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname);
  564                 m_freem(m);
  565                 return (IPPROTO_DONE);
  566         }
  567 
  568         /* verify that we have a complete carp packet */
  569         if (m->m_len < *offp + sizeof(*ch)) {
  570                 len = m->m_len;
  571                 m = m_pullup(m, *offp + sizeof(*ch));
  572                 if (m == NULL) {
  573                         CARPSTATS_INC(carps_badlen);
  574                         CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
  575                         return (IPPROTO_DONE);
  576                 }
  577         }
  578         ch = (struct carp_header *)(mtod(m, char *) + *offp);
  579 
  580         /* verify the CARP checksum */
  581         m->m_data += *offp;
  582         if (in_cksum(m, sizeof(*ch))) {
  583                 CARPSTATS_INC(carps_badsum);
  584                 CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
  585                     m->m_pkthdr.rcvif->if_xname);
  586                 m_freem(m);
  587                 return (IPPROTO_DONE);
  588         }
  589         m->m_data -= *offp;
  590 
  591         carp_input_c(m, ch, AF_INET6);
  592         return (IPPROTO_DONE);
  593 }
  594 #endif /* INET6 */
  595 
  596 /*
  597  * This routine should not be necessary at all, but some switches
  598  * (VMWare ESX vswitches) can echo our own packets back at us,
  599  * and we must ignore them or they will cause us to drop out of
  600  * MASTER mode.
  601  *
  602  * We cannot catch all cases of network loops.  Instead, what we
  603  * do here is catch any packet that arrives with a carp header
  604  * with a VHID of 0, that comes from an address that is our own.
  605  * These packets are by definition "from us" (even if they are from
  606  * a misconfigured host that is pretending to be us).
  607  *
  608  * The VHID test is outside this mini-function.
  609  */
  610 static int
  611 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
  612 {
  613 #ifdef INET
  614         struct ip *ip4;
  615         struct in_addr in4;
  616 #endif
  617 #ifdef INET6
  618         struct ip6_hdr *ip6;
  619         struct in6_addr in6;
  620 #endif
  621 
  622         switch (af) {
  623 #ifdef INET
  624         case AF_INET:
  625                 ip4 = mtod(m, struct ip *);
  626                 in4 = ifatoia(ifa)->ia_addr.sin_addr;
  627                 return (in4.s_addr == ip4->ip_src.s_addr);
  628 #endif
  629 #ifdef INET6
  630         case AF_INET6:
  631                 ip6 = mtod(m, struct ip6_hdr *);
  632                 in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
  633                 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0);
  634 #endif
  635         default:
  636                 break;
  637         }
  638         return (0);
  639 }
  640 
  641 static void
  642 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
  643 {
  644         struct ifnet *ifp = m->m_pkthdr.rcvif;
  645         struct ifaddr *ifa, *match;
  646         struct carp_softc *sc;
  647         uint64_t tmp_counter;
  648         struct timeval sc_tv, ch_tv;
  649         int error;
  650 
  651         NET_EPOCH_ASSERT();
  652 
  653         /*
  654          * Verify that the VHID is valid on the receiving interface.
  655          *
  656          * There should be just one match.  If there are none
  657          * the VHID is not valid and we drop the packet.  If
  658          * there are multiple VHID matches, take just the first
  659          * one, for compatibility with previous code.  While we're
  660          * scanning, check for obvious loops in the network topology
  661          * (these should never happen, and as noted above, we may
  662          * miss real loops; this is just a double-check).
  663          */
  664         error = 0;
  665         match = NULL;
  666         IFNET_FOREACH_IFA(ifp, ifa) {
  667                 if (match == NULL && ifa->ifa_carp != NULL &&
  668                     ifa->ifa_addr->sa_family == af &&
  669                     ifa->ifa_carp->sc_vhid == ch->carp_vhid)
  670                         match = ifa;
  671                 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af))
  672                         error = ELOOP;
  673         }
  674         ifa = error ? NULL : match;
  675         if (ifa != NULL)
  676                 ifa_ref(ifa);
  677 
  678         if (ifa == NULL) {
  679                 if (error == ELOOP) {
  680                         CARP_DEBUG("dropping looped packet on interface %s\n",
  681                             ifp->if_xname);
  682                         CARPSTATS_INC(carps_badif);     /* ??? */
  683                 } else {
  684                         CARPSTATS_INC(carps_badvhid);
  685                 }
  686                 m_freem(m);
  687                 return;
  688         }
  689 
  690         /* verify the CARP version. */
  691         if (ch->carp_version != CARP_VERSION) {
  692                 CARPSTATS_INC(carps_badver);
  693                 CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname,
  694                     ch->carp_version);
  695                 ifa_free(ifa);
  696                 m_freem(m);
  697                 return;
  698         }
  699 
  700         sc = ifa->ifa_carp;
  701         CARP_LOCK(sc);
  702         ifa_free(ifa);
  703 
  704         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
  705                 CARPSTATS_INC(carps_badauth);
  706                 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
  707                     sc->sc_vhid, ifp->if_xname);
  708                 goto out;
  709         }
  710 
  711         tmp_counter = ntohl(ch->carp_counter[0]);
  712         tmp_counter = tmp_counter<<32;
  713         tmp_counter += ntohl(ch->carp_counter[1]);
  714 
  715         /* XXX Replay protection goes here */
  716 
  717         sc->sc_init_counter = 0;
  718         sc->sc_counter = tmp_counter;
  719 
  720         sc_tv.tv_sec = sc->sc_advbase;
  721         sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
  722         ch_tv.tv_sec = ch->carp_advbase;
  723         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
  724 
  725         switch (sc->sc_state) {
  726         case INIT:
  727                 break;
  728         case MASTER:
  729                 /*
  730                  * If we receive an advertisement from a master who's going to
  731                  * be more frequent than us, go into BACKUP state.
  732                  */
  733                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
  734                     timevalcmp(&sc_tv, &ch_tv, ==)) {
  735                         callout_stop(&sc->sc_ad_tmo);
  736                         carp_set_state(sc, BACKUP,
  737                             "more frequent advertisement received");
  738                         carp_setrun(sc, 0);
  739                         carp_delroute(sc);
  740                 }
  741                 break;
  742         case BACKUP:
  743                 /*
  744                  * If we're pre-empting masters who advertise slower than us,
  745                  * and this one claims to be slower, treat him as down.
  746                  */
  747                 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
  748                         carp_master_down_locked(sc,
  749                             "preempting a slower master");
  750                         break;
  751                 }
  752 
  753                 /*
  754                  *  If the master is going to advertise at such a low frequency
  755                  *  that he's guaranteed to time out, we'd might as well just
  756                  *  treat him as timed out now.
  757                  */
  758                 sc_tv.tv_sec = sc->sc_advbase * 3;
  759                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
  760                         carp_master_down_locked(sc, "master will time out");
  761                         break;
  762                 }
  763 
  764                 /*
  765                  * Otherwise, we reset the counter and wait for the next
  766                  * advertisement.
  767                  */
  768                 carp_setrun(sc, af);
  769                 break;
  770         }
  771 
  772 out:
  773         CARP_UNLOCK(sc);
  774         m_freem(m);
  775 }
  776 
  777 static int
  778 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
  779 {
  780         struct m_tag *mtag;
  781 
  782         if (sc->sc_init_counter) {
  783                 /* this could also be seconds since unix epoch */
  784                 sc->sc_counter = arc4random();
  785                 sc->sc_counter = sc->sc_counter << 32;
  786                 sc->sc_counter += arc4random();
  787         } else
  788                 sc->sc_counter++;
  789 
  790         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
  791         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
  792 
  793         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
  794 
  795         /* Tag packet for carp_output */
  796         if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
  797             M_NOWAIT)) == NULL) {
  798                 m_freem(m);
  799                 CARPSTATS_INC(carps_onomem);
  800                 return (ENOMEM);
  801         }
  802         bcopy(&sc, mtag + 1, sizeof(sc));
  803         m_tag_prepend(m, mtag);
  804 
  805         return (0);
  806 }
  807 
  808 /*
  809  * To avoid LORs and possible recursions this function shouldn't
  810  * be called directly, but scheduled via taskqueue.
  811  */
  812 static void
  813 carp_send_ad_all(void *ctx __unused, int pending __unused)
  814 {
  815         struct carp_softc *sc;
  816         struct epoch_tracker et;
  817 
  818         NET_EPOCH_ENTER(et);
  819         mtx_lock(&carp_mtx);
  820         LIST_FOREACH(sc, &carp_list, sc_next)
  821                 if (sc->sc_state == MASTER) {
  822                         CARP_LOCK(sc);
  823                         CURVNET_SET(sc->sc_carpdev->if_vnet);
  824                         carp_send_ad_locked(sc);
  825                         CURVNET_RESTORE();
  826                         CARP_UNLOCK(sc);
  827                 }
  828         mtx_unlock(&carp_mtx);
  829         NET_EPOCH_EXIT(et);
  830 }
  831 
  832 /* Send a periodic advertisement, executed in callout context. */
  833 static void
  834 carp_send_ad(void *v)
  835 {
  836         struct carp_softc *sc = v;
  837         struct epoch_tracker et;
  838 
  839         NET_EPOCH_ENTER(et);
  840         CARP_LOCK_ASSERT(sc);
  841         CURVNET_SET(sc->sc_carpdev->if_vnet);
  842         carp_send_ad_locked(sc);
  843         CURVNET_RESTORE();
  844         CARP_UNLOCK(sc);
  845         NET_EPOCH_EXIT(et);
  846 }
  847 
  848 static void
  849 carp_send_ad_error(struct carp_softc *sc, int error)
  850 {
  851 
  852         /*
  853          * We track errors and successfull sends with this logic:
  854          * - Any error resets success counter to 0.
  855          * - MAX_ERRORS triggers demotion.
  856          * - MIN_SUCCESS successes resets error counter to 0.
  857          * - MIN_SUCCESS reverts demotion, if it was triggered before.
  858          */
  859         if (error) {
  860                 if (sc->sc_sendad_errors < INT_MAX)
  861                         sc->sc_sendad_errors++;
  862                 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
  863                         static const char fmt[] = "send error %d on %s";
  864                         char msg[sizeof(fmt) + IFNAMSIZ];
  865 
  866                         sprintf(msg, fmt, error, sc->sc_carpdev->if_xname);
  867                         carp_demote_adj(V_carp_senderr_adj, msg);
  868                 }
  869                 sc->sc_sendad_success = 0;
  870         } else if (sc->sc_sendad_errors > 0) {
  871                 if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
  872                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
  873                                 static const char fmt[] = "send ok on %s";
  874                                 char msg[sizeof(fmt) + IFNAMSIZ];
  875 
  876                                 sprintf(msg, fmt, sc->sc_carpdev->if_xname);
  877                                 carp_demote_adj(-V_carp_senderr_adj, msg);
  878                         }
  879                         sc->sc_sendad_errors = 0;
  880                 }
  881         }
  882 }
  883 
  884 /*
  885  * Pick the best ifaddr on the given ifp for sending CARP
  886  * advertisements.
  887  *
  888  * "Best" here is defined by ifa_preferred().  This function is much
  889  * much like ifaof_ifpforaddr() except that we just use ifa_preferred().
  890  *
  891  * (This could be simplified to return the actual address, except that
  892  * it has a different format in AF_INET and AF_INET6.)
  893  */
  894 static struct ifaddr *
  895 carp_best_ifa(int af, struct ifnet *ifp)
  896 {
  897         struct ifaddr *ifa, *best;
  898 
  899         NET_EPOCH_ASSERT();
  900 
  901         if (af >= AF_MAX)
  902                 return (NULL);
  903         best = NULL;
  904         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
  905                 if (ifa->ifa_addr->sa_family == af &&
  906                     (best == NULL || ifa_preferred(best, ifa)))
  907                         best = ifa;
  908         }
  909         if (best != NULL)
  910                 ifa_ref(best);
  911         return (best);
  912 }
  913 
  914 static void
  915 carp_send_ad_locked(struct carp_softc *sc)
  916 {
  917         struct carp_header ch;
  918         struct timeval tv;
  919         struct ifaddr *ifa;
  920         struct carp_header *ch_ptr;
  921         struct mbuf *m;
  922         int len, advskew;
  923 
  924         NET_EPOCH_ASSERT();
  925         CARP_LOCK_ASSERT(sc);
  926 
  927         advskew = DEMOTE_ADVSKEW(sc);
  928         tv.tv_sec = sc->sc_advbase;
  929         tv.tv_usec = advskew * 1000000 / 256;
  930 
  931         ch.carp_version = CARP_VERSION;
  932         ch.carp_type = CARP_ADVERTISEMENT;
  933         ch.carp_vhid = sc->sc_vhid;
  934         ch.carp_advbase = sc->sc_advbase;
  935         ch.carp_advskew = advskew;
  936         ch.carp_authlen = 7;    /* XXX DEFINE */
  937         ch.carp_pad1 = 0;       /* must be zero */
  938         ch.carp_cksum = 0;
  939 
  940         /* XXXGL: OpenBSD picks first ifaddr with needed family. */
  941 
  942 #ifdef INET
  943         if (sc->sc_naddrs) {
  944                 struct ip *ip;
  945 
  946                 m = m_gethdr(M_NOWAIT, MT_DATA);
  947                 if (m == NULL) {
  948                         CARPSTATS_INC(carps_onomem);
  949                         goto resched;
  950                 }
  951                 len = sizeof(*ip) + sizeof(ch);
  952                 m->m_pkthdr.len = len;
  953                 m->m_pkthdr.rcvif = NULL;
  954                 m->m_len = len;
  955                 M_ALIGN(m, m->m_len);
  956                 m->m_flags |= M_MCAST;
  957                 ip = mtod(m, struct ip *);
  958                 ip->ip_v = IPVERSION;
  959                 ip->ip_hl = sizeof(*ip) >> 2;
  960                 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET;
  961                 ip->ip_len = htons(len);
  962                 ip->ip_off = htons(IP_DF);
  963                 ip->ip_ttl = CARP_DFLTTL;
  964                 ip->ip_p = IPPROTO_CARP;
  965                 ip->ip_sum = 0;
  966                 ip_fillid(ip);
  967 
  968                 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev);
  969                 if (ifa != NULL) {
  970                         ip->ip_src.s_addr =
  971                             ifatoia(ifa)->ia_addr.sin_addr.s_addr;
  972                         ifa_free(ifa);
  973                 } else
  974                         ip->ip_src.s_addr = 0;
  975                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
  976 
  977                 ch_ptr = (struct carp_header *)(&ip[1]);
  978                 bcopy(&ch, ch_ptr, sizeof(ch));
  979                 if (carp_prepare_ad(m, sc, ch_ptr))
  980                         goto resched;
  981 
  982                 m->m_data += sizeof(*ip);
  983                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
  984                 m->m_data -= sizeof(*ip);
  985 
  986                 CARPSTATS_INC(carps_opackets);
  987 
  988                 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
  989                     &sc->sc_carpdev->if_carp->cif_imo, NULL));
  990         }
  991 #endif /* INET */
  992 #ifdef INET6
  993         if (sc->sc_naddrs6) {
  994                 struct ip6_hdr *ip6;
  995 
  996                 m = m_gethdr(M_NOWAIT, MT_DATA);
  997                 if (m == NULL) {
  998                         CARPSTATS_INC(carps_onomem);
  999                         goto resched;
 1000                 }
 1001                 len = sizeof(*ip6) + sizeof(ch);
 1002                 m->m_pkthdr.len = len;
 1003                 m->m_pkthdr.rcvif = NULL;
 1004                 m->m_len = len;
 1005                 M_ALIGN(m, m->m_len);
 1006                 m->m_flags |= M_MCAST;
 1007                 ip6 = mtod(m, struct ip6_hdr *);
 1008                 bzero(ip6, sizeof(*ip6));
 1009                 ip6->ip6_vfc |= IPV6_VERSION;
 1010                 /* Traffic class isn't defined in ip6 struct instead
 1011                  * it gets offset into flowid field */
 1012                 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN +
 1013                     IPTOS_DSCP_OFFSET));
 1014                 ip6->ip6_hlim = CARP_DFLTTL;
 1015                 ip6->ip6_nxt = IPPROTO_CARP;
 1016 
 1017                 /* set the source address */
 1018                 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev);
 1019                 if (ifa != NULL) {
 1020                         bcopy(IFA_IN6(ifa), &ip6->ip6_src,
 1021                             sizeof(struct in6_addr));
 1022                         ifa_free(ifa);
 1023                 } else
 1024                         /* This should never happen with IPv6. */
 1025                         bzero(&ip6->ip6_src, sizeof(struct in6_addr));
 1026 
 1027                 /* Set the multicast destination. */
 1028                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
 1029                 ip6->ip6_dst.s6_addr8[15] = 0x12;
 1030                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
 1031                         m_freem(m);
 1032                         CARP_DEBUG("%s: in6_setscope failed\n", __func__);
 1033                         goto resched;
 1034                 }
 1035 
 1036                 ch_ptr = (struct carp_header *)(&ip6[1]);
 1037                 bcopy(&ch, ch_ptr, sizeof(ch));
 1038                 if (carp_prepare_ad(m, sc, ch_ptr))
 1039                         goto resched;
 1040 
 1041                 m->m_data += sizeof(*ip6);
 1042                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
 1043                 m->m_data -= sizeof(*ip6);
 1044 
 1045                 CARPSTATS_INC(carps_opackets6);
 1046 
 1047                 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
 1048                     &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
 1049         }
 1050 #endif /* INET6 */
 1051 
 1052 resched:
 1053         callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
 1054 }
 1055 
 1056 static void
 1057 carp_addroute(struct carp_softc *sc)
 1058 {
 1059         struct ifaddr *ifa;
 1060 
 1061         CARP_FOREACH_IFA(sc, ifa)
 1062                 carp_ifa_addroute(ifa);
 1063 }
 1064 
 1065 static void
 1066 carp_ifa_addroute(struct ifaddr *ifa)
 1067 {
 1068 
 1069         switch (ifa->ifa_addr->sa_family) {
 1070 #ifdef INET
 1071         case AF_INET:
 1072                 in_addprefix(ifatoia(ifa));
 1073                 ifa_add_loopback_route(ifa,
 1074                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
 1075                 break;
 1076 #endif
 1077 #ifdef INET6
 1078         case AF_INET6:
 1079                 ifa_add_loopback_route(ifa,
 1080                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
 1081                 nd6_add_ifa_lle(ifatoia6(ifa));
 1082                 break;
 1083 #endif
 1084         }
 1085 }
 1086 
 1087 static void
 1088 carp_delroute(struct carp_softc *sc)
 1089 {
 1090         struct ifaddr *ifa;
 1091 
 1092         CARP_FOREACH_IFA(sc, ifa)
 1093                 carp_ifa_delroute(ifa);
 1094 }
 1095 
 1096 static void
 1097 carp_ifa_delroute(struct ifaddr *ifa)
 1098 {
 1099 
 1100         switch (ifa->ifa_addr->sa_family) {
 1101 #ifdef INET
 1102         case AF_INET:
 1103                 ifa_del_loopback_route(ifa,
 1104                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
 1105                 in_scrubprefix(ifatoia(ifa), LLE_STATIC);
 1106                 break;
 1107 #endif
 1108 #ifdef INET6
 1109         case AF_INET6:
 1110                 ifa_del_loopback_route(ifa,
 1111                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
 1112                 nd6_rem_ifa_lle(ifatoia6(ifa), 1);
 1113                 break;
 1114 #endif
 1115         }
 1116 }
 1117 
 1118 int
 1119 carp_master(struct ifaddr *ifa)
 1120 {
 1121         struct carp_softc *sc = ifa->ifa_carp;
 1122 
 1123         return (sc->sc_state == MASTER);
 1124 }
 1125 
 1126 #ifdef INET
 1127 /*
 1128  * Broadcast a gratuitous ARP request containing
 1129  * the virtual router MAC address for each IP address
 1130  * associated with the virtual router.
 1131  */
 1132 static void
 1133 carp_send_arp(struct carp_softc *sc)
 1134 {
 1135         struct ifaddr *ifa;
 1136         struct in_addr addr;
 1137 
 1138         NET_EPOCH_ASSERT();
 1139 
 1140         CARP_FOREACH_IFA(sc, ifa) {
 1141                 if (ifa->ifa_addr->sa_family != AF_INET)
 1142                         continue;
 1143                 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
 1144                 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr));
 1145         }
 1146 }
 1147 
 1148 int
 1149 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
 1150 {
 1151         struct carp_softc *sc = ifa->ifa_carp;
 1152 
 1153         if (sc->sc_state == MASTER) {
 1154                 *enaddr = LLADDR(&sc->sc_addr);
 1155                 return (1);
 1156         }
 1157 
 1158         return (0);
 1159 }
 1160 #endif
 1161 
 1162 #ifdef INET6
 1163 static void
 1164 carp_send_na(struct carp_softc *sc)
 1165 {
 1166         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 1167         struct ifaddr *ifa;
 1168         struct in6_addr *in6;
 1169 
 1170         CARP_FOREACH_IFA(sc, ifa) {
 1171                 if (ifa->ifa_addr->sa_family != AF_INET6)
 1172                         continue;
 1173 
 1174                 in6 = IFA_IN6(ifa);
 1175                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
 1176                     ND_NA_FLAG_OVERRIDE, 1, NULL);
 1177                 DELAY(1000);    /* XXX */
 1178         }
 1179 }
 1180 
 1181 /*
 1182  * Returns ifa in case it's a carp address and it is MASTER, or if the address
 1183  * matches and is not a carp address.  Returns NULL otherwise.
 1184  */
 1185 struct ifaddr *
 1186 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
 1187 {
 1188         struct ifaddr *ifa;
 1189 
 1190         NET_EPOCH_ASSERT();
 1191 
 1192         ifa = NULL;
 1193         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 1194                 if (ifa->ifa_addr->sa_family != AF_INET6)
 1195                         continue;
 1196                 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
 1197                         continue;
 1198                 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
 1199                         ifa = NULL;
 1200                 else
 1201                         ifa_ref(ifa);
 1202                 break;
 1203         }
 1204 
 1205         return (ifa);
 1206 }
 1207 
 1208 char *
 1209 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
 1210 {
 1211         struct ifaddr *ifa;
 1212 
 1213         NET_EPOCH_ASSERT();
 1214 
 1215         IFNET_FOREACH_IFA(ifp, ifa)
 1216                 if (ifa->ifa_addr->sa_family == AF_INET6 &&
 1217                     IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
 1218                         struct carp_softc *sc = ifa->ifa_carp;
 1219                         struct m_tag *mtag;
 1220 
 1221                         mtag = m_tag_get(PACKET_TAG_CARP,
 1222                             sizeof(struct carp_softc *), M_NOWAIT);
 1223                         if (mtag == NULL)
 1224                                 /* Better a bit than nothing. */
 1225                                 return (LLADDR(&sc->sc_addr));
 1226 
 1227                         bcopy(&sc, mtag + 1, sizeof(sc));
 1228                         m_tag_prepend(m, mtag);
 1229 
 1230                         return (LLADDR(&sc->sc_addr));
 1231                 }
 1232 
 1233         return (NULL);
 1234 }
 1235 #endif /* INET6 */
 1236 
 1237 int
 1238 carp_forus(struct ifnet *ifp, u_char *dhost)
 1239 {
 1240         struct carp_softc *sc;
 1241         uint8_t *ena = dhost;
 1242 
 1243         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
 1244                 return (0);
 1245 
 1246         CIF_LOCK(ifp->if_carp);
 1247         IFNET_FOREACH_CARP(ifp, sc) {
 1248                 /*
 1249                  * CARP_LOCK() is not here, since would protect nothing, but
 1250                  * cause deadlock with if_bridge, calling this under its lock.
 1251                  */
 1252                 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
 1253                     ETHER_ADDR_LEN)) {
 1254                         CIF_UNLOCK(ifp->if_carp);
 1255                         return (1);
 1256                 }
 1257         }
 1258         CIF_UNLOCK(ifp->if_carp);
 1259 
 1260         return (0);
 1261 }
 1262 
 1263 /* Master down timeout event, executed in callout context. */
 1264 static void
 1265 carp_master_down(void *v)
 1266 {
 1267         struct carp_softc *sc = v;
 1268         struct epoch_tracker et;
 1269 
 1270         NET_EPOCH_ENTER(et);
 1271         CARP_LOCK_ASSERT(sc);
 1272 
 1273         CURVNET_SET(sc->sc_carpdev->if_vnet);
 1274         if (sc->sc_state == BACKUP) {
 1275                 carp_master_down_locked(sc, "master timed out");
 1276         }
 1277         CURVNET_RESTORE();
 1278 
 1279         CARP_UNLOCK(sc);
 1280         NET_EPOCH_EXIT(et);
 1281 }
 1282 
 1283 static void
 1284 carp_master_down_locked(struct carp_softc *sc, const char *reason)
 1285 {
 1286 
 1287         NET_EPOCH_ASSERT();
 1288         CARP_LOCK_ASSERT(sc);
 1289 
 1290         switch (sc->sc_state) {
 1291         case BACKUP:
 1292                 carp_set_state(sc, MASTER, reason);
 1293                 carp_send_ad_locked(sc);
 1294 #ifdef INET
 1295                 carp_send_arp(sc);
 1296 #endif
 1297 #ifdef INET6
 1298                 carp_send_na(sc);
 1299 #endif
 1300                 carp_setrun(sc, 0);
 1301                 carp_addroute(sc);
 1302                 break;
 1303         case INIT:
 1304         case MASTER:
 1305 #ifdef INVARIANTS
 1306                 panic("carp: VHID %u@%s: master_down event in %s state\n",
 1307                     sc->sc_vhid,
 1308                     sc->sc_carpdev->if_xname,
 1309                     sc->sc_state ? "MASTER" : "INIT");
 1310 #endif
 1311                 break;
 1312         }
 1313 }
 1314 
 1315 /*
 1316  * When in backup state, af indicates whether to reset the master down timer
 1317  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
 1318  */
 1319 static void
 1320 carp_setrun(struct carp_softc *sc, sa_family_t af)
 1321 {
 1322         struct timeval tv;
 1323 
 1324         CARP_LOCK_ASSERT(sc);
 1325 
 1326         if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
 1327             sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 1328             (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) ||
 1329             !V_carp_allow)
 1330                 return;
 1331 
 1332         switch (sc->sc_state) {
 1333         case INIT:
 1334                 carp_set_state(sc, BACKUP, "initialization complete");
 1335                 carp_setrun(sc, 0);
 1336                 break;
 1337         case BACKUP:
 1338                 callout_stop(&sc->sc_ad_tmo);
 1339                 tv.tv_sec = 3 * sc->sc_advbase;
 1340                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 1341                 switch (af) {
 1342 #ifdef INET
 1343                 case AF_INET:
 1344                         callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 1345                             carp_master_down, sc);
 1346                         break;
 1347 #endif
 1348 #ifdef INET6
 1349                 case AF_INET6:
 1350                         callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 1351                             carp_master_down, sc);
 1352                         break;
 1353 #endif
 1354                 default:
 1355 #ifdef INET
 1356                         if (sc->sc_naddrs)
 1357                                 callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 1358                                     carp_master_down, sc);
 1359 #endif
 1360 #ifdef INET6
 1361                         if (sc->sc_naddrs6)
 1362                                 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 1363                                     carp_master_down, sc);
 1364 #endif
 1365                         break;
 1366                 }
 1367                 break;
 1368         case MASTER:
 1369                 tv.tv_sec = sc->sc_advbase;
 1370                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 1371                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 1372                     carp_send_ad, sc);
 1373                 break;
 1374         }
 1375 }
 1376 
 1377 /*
 1378  * Setup multicast structures.
 1379  */
 1380 static int
 1381 carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
 1382 {
 1383         struct ifnet *ifp = cif->cif_ifp;
 1384         int error = 0;
 1385 
 1386         switch (sa) {
 1387 #ifdef INET
 1388         case AF_INET:
 1389             {
 1390                 struct ip_moptions *imo = &cif->cif_imo;
 1391                 struct in_mfilter *imf;
 1392                 struct in_addr addr;
 1393 
 1394                 if (ip_mfilter_first(&imo->imo_head) != NULL)
 1395                         return (0);
 1396 
 1397                 imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
 1398                 ip_mfilter_init(&imo->imo_head);
 1399                 imo->imo_multicast_vif = -1;
 1400 
 1401                 addr.s_addr = htonl(INADDR_CARP_GROUP);
 1402                 if ((error = in_joingroup(ifp, &addr, NULL,
 1403                     &imf->imf_inm)) != 0) {
 1404                         ip_mfilter_free(imf);
 1405                         break;
 1406                 }
 1407 
 1408                 ip_mfilter_insert(&imo->imo_head, imf);
 1409                 imo->imo_multicast_ifp = ifp;
 1410                 imo->imo_multicast_ttl = CARP_DFLTTL;
 1411                 imo->imo_multicast_loop = 0;
 1412                 break;
 1413            }
 1414 #endif
 1415 #ifdef INET6
 1416         case AF_INET6:
 1417             {
 1418                 struct ip6_moptions *im6o = &cif->cif_im6o;
 1419                 struct in6_mfilter *im6f[2];
 1420                 struct in6_addr in6;
 1421 
 1422                 if (ip6_mfilter_first(&im6o->im6o_head))
 1423                         return (0);
 1424 
 1425                 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
 1426                 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
 1427 
 1428                 ip6_mfilter_init(&im6o->im6o_head);
 1429                 im6o->im6o_multicast_hlim = CARP_DFLTTL;
 1430                 im6o->im6o_multicast_ifp = ifp;
 1431 
 1432                 /* Join IPv6 CARP multicast group. */
 1433                 bzero(&in6, sizeof(in6));
 1434                 in6.s6_addr16[0] = htons(0xff02);
 1435                 in6.s6_addr8[15] = 0x12;
 1436                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
 1437                         ip6_mfilter_free(im6f[0]);
 1438                         ip6_mfilter_free(im6f[1]);
 1439                         break;
 1440                 }
 1441                 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) {
 1442                         ip6_mfilter_free(im6f[0]);
 1443                         ip6_mfilter_free(im6f[1]);
 1444                         break;
 1445                 }
 1446 
 1447                 /* Join solicited multicast address. */
 1448                 bzero(&in6, sizeof(in6));
 1449                 in6.s6_addr16[0] = htons(0xff02);
 1450                 in6.s6_addr32[1] = 0;
 1451                 in6.s6_addr32[2] = htonl(1);
 1452                 in6.s6_addr32[3] = 0;
 1453                 in6.s6_addr8[12] = 0xff;
 1454 
 1455                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
 1456                         ip6_mfilter_free(im6f[0]);
 1457                         ip6_mfilter_free(im6f[1]);
 1458                         break;
 1459                 }
 1460 
 1461                 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) {
 1462                         in6_leavegroup(im6f[0]->im6f_in6m, NULL);
 1463                         ip6_mfilter_free(im6f[0]);
 1464                         ip6_mfilter_free(im6f[1]);
 1465                         break;
 1466                 }
 1467                 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]);
 1468                 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]);
 1469                 break;
 1470             }
 1471 #endif
 1472         }
 1473 
 1474         return (error);
 1475 }
 1476 
 1477 /*
 1478  * Free multicast structures.
 1479  */
 1480 static void
 1481 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
 1482 {
 1483 #ifdef INET
 1484         struct ip_moptions *imo = &cif->cif_imo;
 1485         struct in_mfilter *imf;
 1486 #endif
 1487 #ifdef INET6
 1488         struct ip6_moptions *im6o = &cif->cif_im6o;
 1489         struct in6_mfilter *im6f;
 1490 #endif
 1491         sx_assert(&carp_sx, SA_XLOCKED);
 1492 
 1493         switch (sa) {
 1494 #ifdef INET
 1495         case AF_INET:
 1496                 if (cif->cif_naddrs != 0)
 1497                         break;
 1498 
 1499                 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
 1500                         ip_mfilter_remove(&imo->imo_head, imf);
 1501                         in_leavegroup(imf->imf_inm, NULL);
 1502                         ip_mfilter_free(imf);
 1503                 }
 1504                 break;
 1505 #endif
 1506 #ifdef INET6
 1507         case AF_INET6:
 1508                 if (cif->cif_naddrs6 != 0)
 1509                         break;
 1510 
 1511                 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) {
 1512                         ip6_mfilter_remove(&im6o->im6o_head, im6f);
 1513                         in6_leavegroup(im6f->im6f_in6m, NULL);
 1514                         ip6_mfilter_free(im6f);
 1515                 }
 1516                 break;
 1517 #endif
 1518         }
 1519 }
 1520 
 1521 int
 1522 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
 1523 {
 1524         struct m_tag *mtag;
 1525         struct carp_softc *sc;
 1526 
 1527         if (!sa)
 1528                 return (0);
 1529 
 1530         switch (sa->sa_family) {
 1531 #ifdef INET
 1532         case AF_INET:
 1533                 break;
 1534 #endif
 1535 #ifdef INET6
 1536         case AF_INET6:
 1537                 break;
 1538 #endif
 1539         default:
 1540                 return (0);
 1541         }
 1542 
 1543         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
 1544         if (mtag == NULL)
 1545                 return (0);
 1546 
 1547         bcopy(mtag + 1, &sc, sizeof(sc));
 1548 
 1549         /* Set the source MAC address to the Virtual Router MAC Address. */
 1550         switch (ifp->if_type) {
 1551         case IFT_ETHER:
 1552         case IFT_BRIDGE:
 1553         case IFT_L2VLAN: {
 1554                         struct ether_header *eh;
 1555 
 1556                         eh = mtod(m, struct ether_header *);
 1557                         eh->ether_shost[0] = 0;
 1558                         eh->ether_shost[1] = 0;
 1559                         eh->ether_shost[2] = 0x5e;
 1560                         eh->ether_shost[3] = 0;
 1561                         eh->ether_shost[4] = 1;
 1562                         eh->ether_shost[5] = sc->sc_vhid;
 1563                 }
 1564                 break;
 1565         default:
 1566                 printf("%s: carp is not supported for the %d interface type\n",
 1567                     ifp->if_xname, ifp->if_type);
 1568                 return (EOPNOTSUPP);
 1569         }
 1570 
 1571         return (0);
 1572 }
 1573 
 1574 static struct carp_softc*
 1575 carp_alloc(struct ifnet *ifp)
 1576 {
 1577         struct carp_softc *sc;
 1578         struct carp_if *cif;
 1579 
 1580         sx_assert(&carp_sx, SA_XLOCKED);
 1581 
 1582         if ((cif = ifp->if_carp) == NULL)
 1583                 cif = carp_alloc_if(ifp);
 1584 
 1585         sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
 1586 
 1587         sc->sc_advbase = CARP_DFLTINTV;
 1588         sc->sc_vhid = -1;       /* required setting */
 1589         sc->sc_init_counter = 1;
 1590         sc->sc_state = INIT;
 1591 
 1592         sc->sc_ifasiz = sizeof(struct ifaddr *);
 1593         sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
 1594         sc->sc_carpdev = ifp;
 1595 
 1596         CARP_LOCK_INIT(sc);
 1597 #ifdef INET
 1598         callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 1599 #endif
 1600 #ifdef INET6
 1601         callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 1602 #endif
 1603         callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 1604 
 1605         CIF_LOCK(cif);
 1606         TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
 1607         CIF_UNLOCK(cif);
 1608 
 1609         mtx_lock(&carp_mtx);
 1610         LIST_INSERT_HEAD(&carp_list, sc, sc_next);
 1611         mtx_unlock(&carp_mtx);
 1612 
 1613         return (sc);
 1614 }
 1615 
 1616 static void
 1617 carp_grow_ifas(struct carp_softc *sc)
 1618 {
 1619         struct ifaddr **new;
 1620 
 1621         new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO);
 1622         CARP_LOCK(sc);
 1623         bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
 1624         free(sc->sc_ifas, M_CARP);
 1625         sc->sc_ifas = new;
 1626         sc->sc_ifasiz *= 2;
 1627         CARP_UNLOCK(sc);
 1628 }
 1629 
 1630 static void
 1631 carp_destroy(struct carp_softc *sc)
 1632 {
 1633         struct ifnet *ifp = sc->sc_carpdev;
 1634         struct carp_if *cif = ifp->if_carp;
 1635 
 1636         sx_assert(&carp_sx, SA_XLOCKED);
 1637 
 1638         if (sc->sc_suppress)
 1639                 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
 1640         CARP_UNLOCK(sc);
 1641 
 1642         CIF_LOCK(cif);
 1643         TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
 1644         CIF_UNLOCK(cif);
 1645 
 1646         mtx_lock(&carp_mtx);
 1647         LIST_REMOVE(sc, sc_next);
 1648         mtx_unlock(&carp_mtx);
 1649 
 1650         callout_drain(&sc->sc_ad_tmo);
 1651 #ifdef INET
 1652         callout_drain(&sc->sc_md_tmo);
 1653 #endif
 1654 #ifdef INET6
 1655         callout_drain(&sc->sc_md6_tmo);
 1656 #endif
 1657         CARP_LOCK_DESTROY(sc);
 1658 
 1659         free(sc->sc_ifas, M_CARP);
 1660         free(sc, M_CARP);
 1661 }
 1662 
 1663 static struct carp_if*
 1664 carp_alloc_if(struct ifnet *ifp)
 1665 {
 1666         struct carp_if *cif;
 1667         int error;
 1668 
 1669         cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
 1670 
 1671         if ((error = ifpromisc(ifp, 1)) != 0)
 1672                 printf("%s: ifpromisc(%s) failed: %d\n",
 1673                     __func__, ifp->if_xname, error);
 1674         else
 1675                 cif->cif_flags |= CIF_PROMISC;
 1676 
 1677         CIF_LOCK_INIT(cif);
 1678         cif->cif_ifp = ifp;
 1679         TAILQ_INIT(&cif->cif_vrs);
 1680 
 1681         IF_ADDR_WLOCK(ifp);
 1682         ifp->if_carp = cif;
 1683         if_ref(ifp);
 1684         IF_ADDR_WUNLOCK(ifp);
 1685 
 1686         return (cif);
 1687 }
 1688 
 1689 static void
 1690 carp_free_if(struct carp_if *cif)
 1691 {
 1692         struct ifnet *ifp = cif->cif_ifp;
 1693 
 1694         CIF_LOCK_ASSERT(cif);
 1695         KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
 1696             __func__));
 1697 
 1698         IF_ADDR_WLOCK(ifp);
 1699         ifp->if_carp = NULL;
 1700         IF_ADDR_WUNLOCK(ifp);
 1701 
 1702         CIF_LOCK_DESTROY(cif);
 1703 
 1704         if (cif->cif_flags & CIF_PROMISC)
 1705                 ifpromisc(ifp, 0);
 1706         if_rele(ifp);
 1707 
 1708         free(cif, M_CARP);
 1709 }
 1710 
 1711 static void
 1712 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv)
 1713 {
 1714 
 1715         CARP_LOCK(sc);
 1716         carpr->carpr_state = sc->sc_state;
 1717         carpr->carpr_vhid = sc->sc_vhid;
 1718         carpr->carpr_advbase = sc->sc_advbase;
 1719         carpr->carpr_advskew = sc->sc_advskew;
 1720         if (priv)
 1721                 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
 1722         else
 1723                 bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
 1724         CARP_UNLOCK(sc);
 1725 }
 1726 
 1727 int
 1728 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
 1729 {
 1730         struct carpreq carpr;
 1731         struct ifnet *ifp;
 1732         struct carp_softc *sc = NULL;
 1733         int error = 0, locked = 0;
 1734 
 1735         if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr)))
 1736                 return (error);
 1737 
 1738         ifp = ifunit_ref(ifr->ifr_name);
 1739         if (ifp == NULL)
 1740                 return (ENXIO);
 1741 
 1742         switch (ifp->if_type) {
 1743         case IFT_ETHER:
 1744         case IFT_L2VLAN:
 1745         case IFT_BRIDGE:
 1746                 break;
 1747         default:
 1748                 error = EOPNOTSUPP;
 1749                 goto out;
 1750         }
 1751 
 1752         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 1753                 error = EADDRNOTAVAIL;
 1754                 goto out;
 1755         }
 1756 
 1757         sx_xlock(&carp_sx);
 1758         switch (cmd) {
 1759         case SIOCSVH:
 1760                 if ((error = priv_check(td, PRIV_NETINET_CARP)))
 1761                         break;
 1762                 if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID ||
 1763                     carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) {
 1764                         error = EINVAL;
 1765                         break;
 1766                 }
 1767 
 1768                 if (ifp->if_carp) {
 1769                         IFNET_FOREACH_CARP(ifp, sc)
 1770                                 if (sc->sc_vhid == carpr.carpr_vhid)
 1771                                         break;
 1772                 }
 1773                 if (sc == NULL) {
 1774                         sc = carp_alloc(ifp);
 1775                         CARP_LOCK(sc);
 1776                         sc->sc_vhid = carpr.carpr_vhid;
 1777                         LLADDR(&sc->sc_addr)[0] = 0;
 1778                         LLADDR(&sc->sc_addr)[1] = 0;
 1779                         LLADDR(&sc->sc_addr)[2] = 0x5e;
 1780                         LLADDR(&sc->sc_addr)[3] = 0;
 1781                         LLADDR(&sc->sc_addr)[4] = 1;
 1782                         LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
 1783                 } else
 1784                         CARP_LOCK(sc);
 1785                 locked = 1;
 1786                 if (carpr.carpr_advbase > 0) {
 1787                         if (carpr.carpr_advbase > 255 ||
 1788                             carpr.carpr_advbase < CARP_DFLTINTV) {
 1789                                 error = EINVAL;
 1790                                 break;
 1791                         }
 1792                         sc->sc_advbase = carpr.carpr_advbase;
 1793                 }
 1794                 if (carpr.carpr_advskew >= 255) {
 1795                         error = EINVAL;
 1796                         break;
 1797                 }
 1798                 sc->sc_advskew = carpr.carpr_advskew;
 1799                 if (carpr.carpr_key[0] != '\0') {
 1800                         bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
 1801                         carp_hmac_prepare(sc);
 1802                 }
 1803                 if (sc->sc_state != INIT &&
 1804                     carpr.carpr_state != sc->sc_state) {
 1805                         switch (carpr.carpr_state) {
 1806                         case BACKUP:
 1807                                 callout_stop(&sc->sc_ad_tmo);
 1808                                 carp_set_state(sc, BACKUP,
 1809                                     "user requested via ifconfig");
 1810                                 carp_setrun(sc, 0);
 1811                                 carp_delroute(sc);
 1812                                 break;
 1813                         case MASTER:
 1814                                 carp_master_down_locked(sc,
 1815                                     "user requested via ifconfig");
 1816                                 break;
 1817                         default:
 1818                                 break;
 1819                         }
 1820                 }
 1821                 break;
 1822 
 1823         case SIOCGVH:
 1824             {
 1825                 int priveleged;
 1826 
 1827                 if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) {
 1828                         error = EINVAL;
 1829                         break;
 1830                 }
 1831                 if (carpr.carpr_count < 1) {
 1832                         error = EMSGSIZE;
 1833                         break;
 1834                 }
 1835                 if (ifp->if_carp == NULL) {
 1836                         error = ENOENT;
 1837                         break;
 1838                 }
 1839 
 1840                 priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0);
 1841                 if (carpr.carpr_vhid != 0) {
 1842                         IFNET_FOREACH_CARP(ifp, sc)
 1843                                 if (sc->sc_vhid == carpr.carpr_vhid)
 1844                                         break;
 1845                         if (sc == NULL) {
 1846                                 error = ENOENT;
 1847                                 break;
 1848                         }
 1849                         carp_carprcp(&carpr, sc, priveleged);
 1850                         error = copyout(&carpr, ifr_data_get_ptr(ifr),
 1851                             sizeof(carpr));
 1852                 } else  {
 1853                         int i, count;
 1854 
 1855                         count = 0;
 1856                         IFNET_FOREACH_CARP(ifp, sc)
 1857                                 count++;
 1858 
 1859                         if (count > carpr.carpr_count) {
 1860                                 CIF_UNLOCK(ifp->if_carp);
 1861                                 error = EMSGSIZE;
 1862                                 break;
 1863                         }
 1864 
 1865                         i = 0;
 1866                         IFNET_FOREACH_CARP(ifp, sc) {
 1867                                 carp_carprcp(&carpr, sc, priveleged);
 1868                                 carpr.carpr_count = count;
 1869                                 error = copyout(&carpr,
 1870                                     (char *)ifr_data_get_ptr(ifr) +
 1871                                     (i * sizeof(carpr)), sizeof(carpr));
 1872                                 if (error) {
 1873                                         CIF_UNLOCK(ifp->if_carp);
 1874                                         break;
 1875                                 }
 1876                                 i++;
 1877                         }
 1878                 }
 1879                 break;
 1880             }
 1881         default:
 1882                 error = EINVAL;
 1883         }
 1884         sx_xunlock(&carp_sx);
 1885 
 1886 out:
 1887         if (locked)
 1888                 CARP_UNLOCK(sc);
 1889         if_rele(ifp);
 1890 
 1891         return (error);
 1892 }
 1893 
 1894 static int
 1895 carp_get_vhid(struct ifaddr *ifa)
 1896 {
 1897 
 1898         if (ifa == NULL || ifa->ifa_carp == NULL)
 1899                 return (0);
 1900 
 1901         return (ifa->ifa_carp->sc_vhid);
 1902 }
 1903 
 1904 int
 1905 carp_attach(struct ifaddr *ifa, int vhid)
 1906 {
 1907         struct ifnet *ifp = ifa->ifa_ifp;
 1908         struct carp_if *cif = ifp->if_carp;
 1909         struct carp_softc *sc;
 1910         int index, error;
 1911 
 1912         KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa));
 1913 
 1914         switch (ifa->ifa_addr->sa_family) {
 1915 #ifdef INET
 1916         case AF_INET:
 1917 #endif
 1918 #ifdef INET6
 1919         case AF_INET6:
 1920 #endif
 1921                 break;
 1922         default:
 1923                 return (EPROTOTYPE);
 1924         }
 1925 
 1926         sx_xlock(&carp_sx);
 1927         if (ifp->if_carp == NULL) {
 1928                 sx_xunlock(&carp_sx);
 1929                 return (ENOPROTOOPT);
 1930         }
 1931 
 1932         IFNET_FOREACH_CARP(ifp, sc)
 1933                 if (sc->sc_vhid == vhid)
 1934                         break;
 1935         if (sc == NULL) {
 1936                 sx_xunlock(&carp_sx);
 1937                 return (ENOENT);
 1938         }
 1939 
 1940         error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
 1941         if (error) {
 1942                 CIF_FREE(cif);
 1943                 sx_xunlock(&carp_sx);
 1944                 return (error);
 1945         }
 1946 
 1947         index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
 1948         if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
 1949                 carp_grow_ifas(sc);
 1950 
 1951         switch (ifa->ifa_addr->sa_family) {
 1952 #ifdef INET
 1953         case AF_INET:
 1954                 cif->cif_naddrs++;
 1955                 sc->sc_naddrs++;
 1956                 break;
 1957 #endif
 1958 #ifdef INET6
 1959         case AF_INET6:
 1960                 cif->cif_naddrs6++;
 1961                 sc->sc_naddrs6++;
 1962                 break;
 1963 #endif
 1964         }
 1965 
 1966         ifa_ref(ifa);
 1967 
 1968         CARP_LOCK(sc);
 1969         sc->sc_ifas[index - 1] = ifa;
 1970         ifa->ifa_carp = sc;
 1971         carp_hmac_prepare(sc);
 1972         carp_sc_state(sc);
 1973         CARP_UNLOCK(sc);
 1974 
 1975         sx_xunlock(&carp_sx);
 1976 
 1977         return (0);
 1978 }
 1979 
 1980 void
 1981 carp_detach(struct ifaddr *ifa, bool keep_cif)
 1982 {
 1983         struct ifnet *ifp = ifa->ifa_ifp;
 1984         struct carp_if *cif = ifp->if_carp;
 1985         struct carp_softc *sc = ifa->ifa_carp;
 1986         int i, index;
 1987 
 1988         KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
 1989 
 1990         sx_xlock(&carp_sx);
 1991 
 1992         CARP_LOCK(sc);
 1993         /* Shift array. */
 1994         index = sc->sc_naddrs + sc->sc_naddrs6;
 1995         for (i = 0; i < index; i++)
 1996                 if (sc->sc_ifas[i] == ifa)
 1997                         break;
 1998         KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
 1999         for (; i < index - 1; i++)
 2000                 sc->sc_ifas[i] = sc->sc_ifas[i+1];
 2001         sc->sc_ifas[index - 1] = NULL;
 2002 
 2003         switch (ifa->ifa_addr->sa_family) {
 2004 #ifdef INET
 2005         case AF_INET:
 2006                 cif->cif_naddrs--;
 2007                 sc->sc_naddrs--;
 2008                 break;
 2009 #endif
 2010 #ifdef INET6
 2011         case AF_INET6:
 2012                 cif->cif_naddrs6--;
 2013                 sc->sc_naddrs6--;
 2014                 break;
 2015 #endif
 2016         }
 2017 
 2018         carp_ifa_delroute(ifa);
 2019         carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
 2020 
 2021         ifa->ifa_carp = NULL;
 2022         ifa_free(ifa);
 2023 
 2024         carp_hmac_prepare(sc);
 2025         carp_sc_state(sc);
 2026 
 2027         if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)
 2028                 carp_destroy(sc);
 2029         else
 2030                 CARP_UNLOCK(sc);
 2031 
 2032         if (!keep_cif)
 2033                 CIF_FREE(cif);
 2034 
 2035         sx_xunlock(&carp_sx);
 2036 }
 2037 
 2038 static void
 2039 carp_set_state(struct carp_softc *sc, int state, const char *reason)
 2040 {
 2041 
 2042         CARP_LOCK_ASSERT(sc);
 2043 
 2044         if (sc->sc_state != state) {
 2045                 const char *carp_states[] = { CARP_STATES };
 2046                 char subsys[IFNAMSIZ+5];
 2047 
 2048                 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
 2049                     sc->sc_carpdev->if_xname);
 2050 
 2051                 CARP_LOG("%s: %s -> %s (%s)\n", subsys,
 2052                     carp_states[sc->sc_state], carp_states[state], reason);
 2053 
 2054                 sc->sc_state = state;
 2055 
 2056                 devctl_notify("CARP", subsys, carp_states[state], NULL);
 2057         }
 2058 }
 2059 
 2060 static void
 2061 carp_linkstate(struct ifnet *ifp)
 2062 {
 2063         struct carp_softc *sc;
 2064 
 2065         CIF_LOCK(ifp->if_carp);
 2066         IFNET_FOREACH_CARP(ifp, sc) {
 2067                 CARP_LOCK(sc);
 2068                 carp_sc_state(sc);
 2069                 CARP_UNLOCK(sc);
 2070         }
 2071         CIF_UNLOCK(ifp->if_carp);
 2072 }
 2073 
 2074 static void
 2075 carp_sc_state(struct carp_softc *sc)
 2076 {
 2077 
 2078         CARP_LOCK_ASSERT(sc);
 2079 
 2080         if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 2081             !(sc->sc_carpdev->if_flags & IFF_UP) ||
 2082             !V_carp_allow) {
 2083                 callout_stop(&sc->sc_ad_tmo);
 2084 #ifdef INET
 2085                 callout_stop(&sc->sc_md_tmo);
 2086 #endif
 2087 #ifdef INET6
 2088                 callout_stop(&sc->sc_md6_tmo);
 2089 #endif
 2090                 carp_set_state(sc, INIT, "hardware interface down");
 2091                 carp_setrun(sc, 0);
 2092                 if (!sc->sc_suppress)
 2093                         carp_demote_adj(V_carp_ifdown_adj, "interface down");
 2094                 sc->sc_suppress = 1;
 2095         } else {
 2096                 carp_set_state(sc, INIT, "hardware interface up");
 2097                 carp_setrun(sc, 0);
 2098                 if (sc->sc_suppress)
 2099                         carp_demote_adj(-V_carp_ifdown_adj, "interface up");
 2100                 sc->sc_suppress = 0;
 2101         }
 2102 }
 2103 
 2104 static void
 2105 carp_demote_adj(int adj, char *reason)
 2106 {
 2107         atomic_add_int(&V_carp_demotion, adj);
 2108         CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
 2109         taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
 2110 }
 2111 
 2112 static int
 2113 carp_allow_sysctl(SYSCTL_HANDLER_ARGS)
 2114 {
 2115         int new, error;
 2116         struct carp_softc *sc;
 2117 
 2118         new = V_carp_allow;
 2119         error = sysctl_handle_int(oidp, &new, 0, req);
 2120         if (error || !req->newptr)
 2121                 return (error);
 2122 
 2123         if (V_carp_allow != new) {
 2124                 V_carp_allow = new;
 2125 
 2126                 mtx_lock(&carp_mtx);
 2127                 LIST_FOREACH(sc, &carp_list, sc_next) {
 2128                         CARP_LOCK(sc);
 2129                         if (curvnet == sc->sc_carpdev->if_vnet)
 2130                                 carp_sc_state(sc);
 2131                         CARP_UNLOCK(sc);
 2132                 }
 2133                 mtx_unlock(&carp_mtx);
 2134         }
 2135 
 2136         return (0);
 2137 }
 2138 
 2139 static int
 2140 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS)
 2141 {
 2142         int new, error;
 2143 
 2144         new = V_carp_dscp;
 2145         error = sysctl_handle_int(oidp, &new, 0, req);
 2146         if (error || !req->newptr)
 2147                 return (error);
 2148 
 2149         if (new < 0 || new > 63)
 2150                 return (EINVAL);
 2151 
 2152         V_carp_dscp = new;
 2153 
 2154         return (0);
 2155 }
 2156 
 2157 static int
 2158 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
 2159 {
 2160         int new, error;
 2161 
 2162         new = V_carp_demotion;
 2163         error = sysctl_handle_int(oidp, &new, 0, req);
 2164         if (error || !req->newptr)
 2165                 return (error);
 2166 
 2167         carp_demote_adj(new, "sysctl");
 2168 
 2169         return (0);
 2170 }
 2171 
 2172 static void
 2173 carp_mod_cleanup(void)
 2174 {
 2175 
 2176 #ifdef INET
 2177         (void)ipproto_unregister(IPPROTO_CARP);
 2178         carp_iamatch_p = NULL;
 2179 #endif
 2180 #ifdef INET6
 2181         (void)ip6proto_unregister(IPPROTO_CARP);
 2182         carp_iamatch6_p = NULL;
 2183         carp_macmatch6_p = NULL;
 2184 #endif
 2185         carp_ioctl_p = NULL;
 2186         carp_attach_p = NULL;
 2187         carp_detach_p = NULL;
 2188         carp_get_vhid_p = NULL;
 2189         carp_linkstate_p = NULL;
 2190         carp_forus_p = NULL;
 2191         carp_output_p = NULL;
 2192         carp_demote_adj_p = NULL;
 2193         carp_master_p = NULL;
 2194         mtx_unlock(&carp_mtx);
 2195         taskqueue_drain(taskqueue_swi, &carp_sendall_task);
 2196         mtx_destroy(&carp_mtx);
 2197         sx_destroy(&carp_sx);
 2198 }
 2199 
 2200 static int
 2201 carp_mod_load(void)
 2202 {
 2203         int err;
 2204 
 2205         mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
 2206         sx_init(&carp_sx, "carp_sx");
 2207         LIST_INIT(&carp_list);
 2208         carp_get_vhid_p = carp_get_vhid;
 2209         carp_forus_p = carp_forus;
 2210         carp_output_p = carp_output;
 2211         carp_linkstate_p = carp_linkstate;
 2212         carp_ioctl_p = carp_ioctl;
 2213         carp_attach_p = carp_attach;
 2214         carp_detach_p = carp_detach;
 2215         carp_demote_adj_p = carp_demote_adj;
 2216         carp_master_p = carp_master;
 2217 #ifdef INET6
 2218         carp_iamatch6_p = carp_iamatch6;
 2219         carp_macmatch6_p = carp_macmatch6;
 2220         err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL);
 2221         if (err) {
 2222                 printf("carp: error %d registering with INET6\n", err);
 2223                 carp_mod_cleanup();
 2224                 return (err);
 2225         }
 2226 #endif
 2227 #ifdef INET
 2228         carp_iamatch_p = carp_iamatch;
 2229         err = ipproto_register(IPPROTO_CARP, carp_input, NULL);
 2230         if (err) {
 2231                 printf("carp: error %d registering with INET\n", err);
 2232                 carp_mod_cleanup();
 2233                 return (err);
 2234         }
 2235 #endif
 2236         return (0);
 2237 }
 2238 
 2239 static int
 2240 carp_modevent(module_t mod, int type, void *data)
 2241 {
 2242         switch (type) {
 2243         case MOD_LOAD:
 2244                 return carp_mod_load();
 2245                 /* NOTREACHED */
 2246         case MOD_UNLOAD:
 2247                 mtx_lock(&carp_mtx);
 2248                 if (LIST_EMPTY(&carp_list))
 2249                         carp_mod_cleanup();
 2250                 else {
 2251                         mtx_unlock(&carp_mtx);
 2252                         return (EBUSY);
 2253                 }
 2254                 break;
 2255 
 2256         default:
 2257                 return (EINVAL);
 2258         }
 2259 
 2260         return (0);
 2261 }
 2262 
 2263 static moduledata_t carp_mod = {
 2264         "carp",
 2265         carp_modevent,
 2266         0
 2267 };
 2268 
 2269 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);

Cache object: 8d483b5275b3f44298d4ef5775b7a3eb


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.