The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_carp.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2002 Michael Shalayeff.
    3  * Copyright (c) 2003 Ryan McBride.
    4  * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
   20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
   24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
   25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
   26  * THE POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD: releng/11.2/sys/netinet/ip_carp.c 332288 2018-04-08 16:54:07Z brooks $");
   31 
   32 #include "opt_bpf.h"
   33 #include "opt_inet.h"
   34 #include "opt_inet6.h"
   35 
   36 #include <sys/param.h>
   37 #include <sys/systm.h>
   38 #include <sys/bus.h>
   39 #include <sys/jail.h>
   40 #include <sys/kernel.h>
   41 #include <sys/limits.h>
   42 #include <sys/malloc.h>
   43 #include <sys/mbuf.h>
   44 #include <sys/module.h>
   45 #include <sys/priv.h>
   46 #include <sys/proc.h>
   47 #include <sys/protosw.h>
   48 #include <sys/socket.h>
   49 #include <sys/sockio.h>
   50 #include <sys/sysctl.h>
   51 #include <sys/syslog.h>
   52 #include <sys/taskqueue.h>
   53 #include <sys/counter.h>
   54 
   55 #include <net/ethernet.h>
   56 #include <net/fddi.h>
   57 #include <net/if.h>
   58 #include <net/if_var.h>
   59 #include <net/if_dl.h>
   60 #include <net/if_llatbl.h>
   61 #include <net/if_types.h>
   62 #include <net/iso88025.h>
   63 #include <net/route.h>
   64 #include <net/vnet.h>
   65 
   66 #if defined(INET) || defined(INET6)
   67 #include <netinet/in.h>
   68 #include <netinet/in_var.h>
   69 #include <netinet/ip_carp.h>
   70 #include <netinet/ip.h>
   71 #include <machine/in_cksum.h>
   72 #endif
   73 #ifdef INET
   74 #include <netinet/ip_var.h>
   75 #include <netinet/if_ether.h>
   76 #endif
   77 
   78 #ifdef INET6
   79 #include <netinet/icmp6.h>
   80 #include <netinet/ip6.h>
   81 #include <netinet6/in6_var.h>
   82 #include <netinet6/ip6_var.h>
   83 #include <netinet6/scope6_var.h>
   84 #include <netinet6/nd6.h>
   85 #endif
   86 
   87 #include <crypto/sha1.h>
   88 
   89 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
   90 
   91 struct carp_softc {
   92         struct ifnet            *sc_carpdev;    /* Pointer to parent ifnet. */
   93         struct ifaddr           **sc_ifas;      /* Our ifaddrs. */
   94         struct sockaddr_dl      sc_addr;        /* Our link level address. */
   95         struct callout          sc_ad_tmo;      /* Advertising timeout. */
   96 #ifdef INET
   97         struct callout          sc_md_tmo;      /* Master down timeout. */
   98 #endif
   99 #ifdef INET6
  100         struct callout          sc_md6_tmo;     /* XXX: Master down timeout. */
  101 #endif
  102         struct mtx              sc_mtx;
  103 
  104         int                     sc_vhid;
  105         int                     sc_advskew;
  106         int                     sc_advbase;
  107 
  108         int                     sc_naddrs;
  109         int                     sc_naddrs6;
  110         int                     sc_ifasiz;
  111         enum { INIT = 0, BACKUP, MASTER }       sc_state;
  112         int                     sc_suppress;
  113         int                     sc_sendad_errors;
  114 #define CARP_SENDAD_MAX_ERRORS  3
  115         int                     sc_sendad_success;
  116 #define CARP_SENDAD_MIN_SUCCESS 3
  117 
  118         int                     sc_init_counter;
  119         uint64_t                sc_counter;
  120 
  121         /* authentication */
  122 #define CARP_HMAC_PAD   64
  123         unsigned char sc_key[CARP_KEY_LEN];
  124         unsigned char sc_pad[CARP_HMAC_PAD];
  125         SHA1_CTX sc_sha1;
  126 
  127         TAILQ_ENTRY(carp_softc) sc_list;        /* On the carp_if list. */
  128         LIST_ENTRY(carp_softc)  sc_next;        /* On the global list. */
  129 };
  130 
  131 struct carp_if {
  132 #ifdef INET
  133         int     cif_naddrs;
  134 #endif
  135 #ifdef INET6
  136         int     cif_naddrs6;
  137 #endif
  138         TAILQ_HEAD(, carp_softc) cif_vrs;
  139 #ifdef INET
  140         struct ip_moptions       cif_imo;
  141 #endif
  142 #ifdef INET6
  143         struct ip6_moptions      cif_im6o;
  144 #endif
  145         struct ifnet    *cif_ifp;
  146         struct mtx      cif_mtx;
  147         uint32_t        cif_flags;
  148 #define CIF_PROMISC     0x00000001
  149 };
  150 
  151 #define CARP_INET       0
  152 #define CARP_INET6      1
  153 static int proto_reg[] = {-1, -1};
  154 
  155 /*
  156  * Brief design of carp(4).
  157  *
  158  * Any carp-capable ifnet may have a list of carp softcs hanging off
  159  * its ifp->if_carp pointer. Each softc represents one unique virtual
  160  * host id, or vhid. The softc has a back pointer to the ifnet. All
  161  * softcs are joined in a global list, which has quite limited use.
  162  *
  163  * Any interface address that takes part in CARP negotiation has a
  164  * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
  165  * AF_INET or AF_INET6 address.
  166  *
  167  * Although, one can get the softc's backpointer to ifnet and traverse
  168  * through its ifp->if_addrhead queue to find all interface addresses
  169  * involved in CARP, we keep a growable array of ifaddr pointers. This
  170  * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
  171  * do calls into the network stack, thus avoiding LORs.
  172  *
  173  * Locking:
  174  *
  175  * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
  176  * callout-driven events and ioctl()s.
  177  *
  178  * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx.
  179  * To traverse the global list we use the mutex carp_mtx.
  180  *
  181  * Known issues with locking:
  182  *
  183  * - Sending ad, we put the pointer to the softc in an mtag, and no reference
  184  *   counting is done on the softc.
  185  * - On module unload we may race (?) with packet processing thread
  186  *   dereferencing our function pointers.
  187  */
  188 
  189 /* Accept incoming CARP packets. */
  190 static VNET_DEFINE(int, carp_allow) = 1;
  191 #define V_carp_allow    VNET(carp_allow)
  192 
  193 /* Preempt slower nodes. */
  194 static VNET_DEFINE(int, carp_preempt) = 0;
  195 #define V_carp_preempt  VNET(carp_preempt)
  196 
  197 /* Log level. */
  198 static VNET_DEFINE(int, carp_log) = 1;
  199 #define V_carp_log      VNET(carp_log)
  200 
  201 /* Global advskew demotion. */
  202 static VNET_DEFINE(int, carp_demotion) = 0;
  203 #define V_carp_demotion VNET(carp_demotion)
  204 
  205 /* Send error demotion factor. */
  206 static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW;
  207 #define V_carp_senderr_adj      VNET(carp_senderr_adj)
  208 
  209 /* Iface down demotion factor. */
  210 static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW;
  211 #define V_carp_ifdown_adj       VNET(carp_ifdown_adj)
  212 
  213 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
  214 
  215 SYSCTL_NODE(_net_inet, IPPROTO_CARP,    carp,   CTLFLAG_RW, 0,  "CARP");
  216 SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_VNET | CTLFLAG_RW,
  217     &VNET_NAME(carp_allow), 0, "Accept incoming CARP packets");
  218 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
  219     &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
  220 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
  221     &VNET_NAME(carp_log), 0, "CARP log level");
  222 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion,
  223     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
  224     0, 0, carp_demote_adj_sysctl, "I",
  225     "Adjust demotion factor (skew of advskew)");
  226 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor,
  227     CTLFLAG_VNET | CTLFLAG_RW,
  228     &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
  229 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor,
  230     CTLFLAG_VNET | CTLFLAG_RW,
  231     &VNET_NAME(carp_ifdown_adj), 0,
  232     "Interface down demotion factor adjustment");
  233 
  234 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
  235 VNET_PCPUSTAT_SYSINIT(carpstats);
  236 VNET_PCPUSTAT_SYSUNINIT(carpstats);
  237 
  238 #define CARPSTATS_ADD(name, val)        \
  239     counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
  240         sizeof(uint64_t)], (val))
  241 #define CARPSTATS_INC(name)             CARPSTATS_ADD(name, 1)
  242 
  243 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
  244     carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
  245 
  246 #define CARP_LOCK_INIT(sc)      mtx_init(&(sc)->sc_mtx, "carp_softc",   \
  247         NULL, MTX_DEF)
  248 #define CARP_LOCK_DESTROY(sc)   mtx_destroy(&(sc)->sc_mtx)
  249 #define CARP_LOCK_ASSERT(sc)    mtx_assert(&(sc)->sc_mtx, MA_OWNED)
  250 #define CARP_LOCK(sc)           mtx_lock(&(sc)->sc_mtx)
  251 #define CARP_UNLOCK(sc)         mtx_unlock(&(sc)->sc_mtx)
  252 #define CIF_LOCK_INIT(cif)      mtx_init(&(cif)->cif_mtx, "carp_if",   \
  253         NULL, MTX_DEF)
  254 #define CIF_LOCK_DESTROY(cif)   mtx_destroy(&(cif)->cif_mtx)
  255 #define CIF_LOCK_ASSERT(cif)    mtx_assert(&(cif)->cif_mtx, MA_OWNED)
  256 #define CIF_LOCK(cif)           mtx_lock(&(cif)->cif_mtx)
  257 #define CIF_UNLOCK(cif)         mtx_unlock(&(cif)->cif_mtx)
  258 #define CIF_FREE(cif)   do {                            \
  259                 CIF_LOCK(cif);                          \
  260                 if (TAILQ_EMPTY(&(cif)->cif_vrs))       \
  261                         carp_free_if(cif);              \
  262                 else                                    \
  263                         CIF_UNLOCK(cif);                \
  264 } while (0)
  265 
  266 #define CARP_LOG(...)   do {                            \
  267         if (V_carp_log > 0)                             \
  268                 log(LOG_INFO, "carp: " __VA_ARGS__);    \
  269 } while (0)
  270 
  271 #define CARP_DEBUG(...) do {                            \
  272         if (V_carp_log > 1)                             \
  273                 log(LOG_DEBUG, __VA_ARGS__);            \
  274 } while (0)
  275 
  276 #define IFNET_FOREACH_IFA(ifp, ifa)                                     \
  277         IF_ADDR_LOCK_ASSERT(ifp);                                       \
  278         TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link)             \
  279                 if ((ifa)->ifa_carp != NULL)
  280 
  281 #define CARP_FOREACH_IFA(sc, ifa)                                       \
  282         CARP_LOCK_ASSERT(sc);                                           \
  283         for (int _i = 0;                                                \
  284                 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&              \
  285                 ((ifa) = sc->sc_ifas[_i]) != NULL;                      \
  286                 ++_i)
  287 
  288 #define IFNET_FOREACH_CARP(ifp, sc)                                     \
  289         KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) ||                    \
  290             sx_xlocked(&carp_sx), ("cif_vrs not locked"));              \
  291         TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
  292 
  293 #define DEMOTE_ADVSKEW(sc)                                      \
  294     (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ?      \
  295     CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion))
  296 
  297 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
  298 static struct carp_softc
  299                 *carp_alloc(struct ifnet *);
  300 static void     carp_destroy(struct carp_softc *);
  301 static struct carp_if
  302                 *carp_alloc_if(struct ifnet *);
  303 static void     carp_free_if(struct carp_if *);
  304 static void     carp_set_state(struct carp_softc *, int, const char* reason);
  305 static void     carp_sc_state(struct carp_softc *);
  306 static void     carp_setrun(struct carp_softc *, sa_family_t);
  307 static void     carp_master_down(void *);
  308 static void     carp_master_down_locked(struct carp_softc *,
  309                     const char* reason);
  310 static void     carp_send_ad(void *);
  311 static void     carp_send_ad_locked(struct carp_softc *);
  312 static void     carp_addroute(struct carp_softc *);
  313 static void     carp_ifa_addroute(struct ifaddr *);
  314 static void     carp_delroute(struct carp_softc *);
  315 static void     carp_ifa_delroute(struct ifaddr *);
  316 static void     carp_send_ad_all(void *, int);
  317 static void     carp_demote_adj(int, char *);
  318 
  319 static LIST_HEAD(, carp_softc) carp_list;
  320 static struct mtx carp_mtx;
  321 static struct sx carp_sx;
  322 static struct task carp_sendall_task =
  323     TASK_INITIALIZER(0, carp_send_ad_all, NULL);
  324 
  325 static void
  326 carp_hmac_prepare(struct carp_softc *sc)
  327 {
  328         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
  329         uint8_t vhid = sc->sc_vhid & 0xff;
  330         struct ifaddr *ifa;
  331         int i, found;
  332 #ifdef INET
  333         struct in_addr last, cur, in;
  334 #endif
  335 #ifdef INET6
  336         struct in6_addr last6, cur6, in6;
  337 #endif
  338 
  339         CARP_LOCK_ASSERT(sc);
  340 
  341         /* Compute ipad from key. */
  342         bzero(sc->sc_pad, sizeof(sc->sc_pad));
  343         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
  344         for (i = 0; i < sizeof(sc->sc_pad); i++)
  345                 sc->sc_pad[i] ^= 0x36;
  346 
  347         /* Precompute first part of inner hash. */
  348         SHA1Init(&sc->sc_sha1);
  349         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
  350         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
  351         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
  352         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
  353 #ifdef INET
  354         cur.s_addr = 0;
  355         do {
  356                 found = 0;
  357                 last = cur;
  358                 cur.s_addr = 0xffffffff;
  359                 CARP_FOREACH_IFA(sc, ifa) {
  360                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
  361                         if (ifa->ifa_addr->sa_family == AF_INET &&
  362                             ntohl(in.s_addr) > ntohl(last.s_addr) &&
  363                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
  364                                 cur.s_addr = in.s_addr;
  365                                 found++;
  366                         }
  367                 }
  368                 if (found)
  369                         SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
  370         } while (found);
  371 #endif /* INET */
  372 #ifdef INET6
  373         memset(&cur6, 0, sizeof(cur6));
  374         do {
  375                 found = 0;
  376                 last6 = cur6;
  377                 memset(&cur6, 0xff, sizeof(cur6));
  378                 CARP_FOREACH_IFA(sc, ifa) {
  379                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
  380                         if (IN6_IS_SCOPE_EMBED(&in6))
  381                                 in6.s6_addr16[1] = 0;
  382                         if (ifa->ifa_addr->sa_family == AF_INET6 &&
  383                             memcmp(&in6, &last6, sizeof(in6)) > 0 &&
  384                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
  385                                 cur6 = in6;
  386                                 found++;
  387                         }
  388                 }
  389                 if (found)
  390                         SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
  391         } while (found);
  392 #endif /* INET6 */
  393 
  394         /* convert ipad to opad */
  395         for (i = 0; i < sizeof(sc->sc_pad); i++)
  396                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
  397 }
  398 
  399 static void
  400 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
  401     unsigned char md[20])
  402 {
  403         SHA1_CTX sha1ctx;
  404 
  405         CARP_LOCK_ASSERT(sc);
  406 
  407         /* fetch first half of inner hash */
  408         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
  409 
  410         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
  411         SHA1Final(md, &sha1ctx);
  412 
  413         /* outer hash */
  414         SHA1Init(&sha1ctx);
  415         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
  416         SHA1Update(&sha1ctx, md, 20);
  417         SHA1Final(md, &sha1ctx);
  418 }
  419 
  420 static int
  421 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
  422     unsigned char md[20])
  423 {
  424         unsigned char md2[20];
  425 
  426         CARP_LOCK_ASSERT(sc);
  427 
  428         carp_hmac_generate(sc, counter, md2);
  429 
  430         return (bcmp(md, md2, sizeof(md2)));
  431 }
  432 
  433 /*
  434  * process input packet.
  435  * we have rearranged checks order compared to the rfc,
  436  * but it seems more efficient this way or not possible otherwise.
  437  */
  438 #ifdef INET
  439 int
  440 carp_input(struct mbuf **mp, int *offp, int proto)
  441 {
  442         struct mbuf *m = *mp;
  443         struct ip *ip = mtod(m, struct ip *);
  444         struct carp_header *ch;
  445         int iplen, len;
  446 
  447         iplen = *offp;
  448         *mp = NULL;
  449 
  450         CARPSTATS_INC(carps_ipackets);
  451 
  452         if (!V_carp_allow) {
  453                 m_freem(m);
  454                 return (IPPROTO_DONE);
  455         }
  456 
  457         /* verify that the IP TTL is 255.  */
  458         if (ip->ip_ttl != CARP_DFLTTL) {
  459                 CARPSTATS_INC(carps_badttl);
  460                 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
  461                     ip->ip_ttl,
  462                     m->m_pkthdr.rcvif->if_xname);
  463                 m_freem(m);
  464                 return (IPPROTO_DONE);
  465         }
  466 
  467         iplen = ip->ip_hl << 2;
  468 
  469         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
  470                 CARPSTATS_INC(carps_badlen);
  471                 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
  472                     "on %s\n", __func__, m->m_len - sizeof(struct ip),
  473                     m->m_pkthdr.rcvif->if_xname);
  474                 m_freem(m);
  475                 return (IPPROTO_DONE);
  476         }
  477 
  478         if (iplen + sizeof(*ch) < m->m_len) {
  479                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
  480                         CARPSTATS_INC(carps_hdrops);
  481                         CARP_DEBUG("%s: pullup failed\n", __func__);
  482                         return (IPPROTO_DONE);
  483                 }
  484                 ip = mtod(m, struct ip *);
  485         }
  486         ch = (struct carp_header *)((char *)ip + iplen);
  487 
  488         /*
  489          * verify that the received packet length is
  490          * equal to the CARP header
  491          */
  492         len = iplen + sizeof(*ch);
  493         if (len > m->m_pkthdr.len) {
  494                 CARPSTATS_INC(carps_badlen);
  495                 CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
  496                     m->m_pkthdr.len,
  497                     m->m_pkthdr.rcvif->if_xname);
  498                 m_freem(m);
  499                 return (IPPROTO_DONE);
  500         }
  501 
  502         if ((m = m_pullup(m, len)) == NULL) {
  503                 CARPSTATS_INC(carps_hdrops);
  504                 return (IPPROTO_DONE);
  505         }
  506         ip = mtod(m, struct ip *);
  507         ch = (struct carp_header *)((char *)ip + iplen);
  508 
  509         /* verify the CARP checksum */
  510         m->m_data += iplen;
  511         if (in_cksum(m, len - iplen)) {
  512                 CARPSTATS_INC(carps_badsum);
  513                 CARP_DEBUG("%s: checksum failed on %s\n", __func__,
  514                     m->m_pkthdr.rcvif->if_xname);
  515                 m_freem(m);
  516                 return (IPPROTO_DONE);
  517         }
  518         m->m_data -= iplen;
  519 
  520         carp_input_c(m, ch, AF_INET);
  521         return (IPPROTO_DONE);
  522 }
  523 #endif
  524 
  525 #ifdef INET6
  526 int
  527 carp6_input(struct mbuf **mp, int *offp, int proto)
  528 {
  529         struct mbuf *m = *mp;
  530         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
  531         struct carp_header *ch;
  532         u_int len;
  533 
  534         CARPSTATS_INC(carps_ipackets6);
  535 
  536         if (!V_carp_allow) {
  537                 m_freem(m);
  538                 return (IPPROTO_DONE);
  539         }
  540 
  541         /* check if received on a valid carp interface */
  542         if (m->m_pkthdr.rcvif->if_carp == NULL) {
  543                 CARPSTATS_INC(carps_badif);
  544                 CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
  545                     __func__, m->m_pkthdr.rcvif->if_xname);
  546                 m_freem(m);
  547                 return (IPPROTO_DONE);
  548         }
  549 
  550         /* verify that the IP TTL is 255 */
  551         if (ip6->ip6_hlim != CARP_DFLTTL) {
  552                 CARPSTATS_INC(carps_badttl);
  553                 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
  554                     ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname);
  555                 m_freem(m);
  556                 return (IPPROTO_DONE);
  557         }
  558 
  559         /* verify that we have a complete carp packet */
  560         len = m->m_len;
  561         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
  562         if (ch == NULL) {
  563                 CARPSTATS_INC(carps_badlen);
  564                 CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
  565                 return (IPPROTO_DONE);
  566         }
  567 
  568 
  569         /* verify the CARP checksum */
  570         m->m_data += *offp;
  571         if (in_cksum(m, sizeof(*ch))) {
  572                 CARPSTATS_INC(carps_badsum);
  573                 CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
  574                     m->m_pkthdr.rcvif->if_xname);
  575                 m_freem(m);
  576                 return (IPPROTO_DONE);
  577         }
  578         m->m_data -= *offp;
  579 
  580         carp_input_c(m, ch, AF_INET6);
  581         return (IPPROTO_DONE);
  582 }
  583 #endif /* INET6 */
  584 
  585 static void
  586 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
  587 {
  588         struct ifnet *ifp = m->m_pkthdr.rcvif;
  589         struct ifaddr *ifa;
  590         struct carp_softc *sc;
  591         uint64_t tmp_counter;
  592         struct timeval sc_tv, ch_tv;
  593 
  594         /* verify that the VHID is valid on the receiving interface */
  595         IF_ADDR_RLOCK(ifp);
  596         IFNET_FOREACH_IFA(ifp, ifa)
  597                 if (ifa->ifa_addr->sa_family == af &&
  598                     ifa->ifa_carp->sc_vhid == ch->carp_vhid) {
  599                         ifa_ref(ifa);
  600                         break;
  601                 }
  602         IF_ADDR_RUNLOCK(ifp);
  603 
  604         if (ifa == NULL) {
  605                 CARPSTATS_INC(carps_badvhid);
  606                 m_freem(m);
  607                 return;
  608         }
  609 
  610         /* verify the CARP version. */
  611         if (ch->carp_version != CARP_VERSION) {
  612                 CARPSTATS_INC(carps_badver);
  613                 CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname,
  614                     ch->carp_version);
  615                 ifa_free(ifa);
  616                 m_freem(m);
  617                 return;
  618         }
  619 
  620         sc = ifa->ifa_carp;
  621         CARP_LOCK(sc);
  622         ifa_free(ifa);
  623 
  624         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
  625                 CARPSTATS_INC(carps_badauth);
  626                 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
  627                     sc->sc_vhid, ifp->if_xname);
  628                 goto out;
  629         }
  630 
  631         tmp_counter = ntohl(ch->carp_counter[0]);
  632         tmp_counter = tmp_counter<<32;
  633         tmp_counter += ntohl(ch->carp_counter[1]);
  634 
  635         /* XXX Replay protection goes here */
  636 
  637         sc->sc_init_counter = 0;
  638         sc->sc_counter = tmp_counter;
  639 
  640         sc_tv.tv_sec = sc->sc_advbase;
  641         sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
  642         ch_tv.tv_sec = ch->carp_advbase;
  643         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
  644 
  645         switch (sc->sc_state) {
  646         case INIT:
  647                 break;
  648         case MASTER:
  649                 /*
  650                  * If we receive an advertisement from a master who's going to
  651                  * be more frequent than us, go into BACKUP state.
  652                  */
  653                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
  654                     timevalcmp(&sc_tv, &ch_tv, ==)) {
  655                         callout_stop(&sc->sc_ad_tmo);
  656                         carp_set_state(sc, BACKUP,
  657                             "more frequent advertisement received");
  658                         carp_setrun(sc, 0);
  659                         carp_delroute(sc);
  660                 }
  661                 break;
  662         case BACKUP:
  663                 /*
  664                  * If we're pre-empting masters who advertise slower than us,
  665                  * and this one claims to be slower, treat him as down.
  666                  */
  667                 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
  668                         carp_master_down_locked(sc,
  669                             "preempting a slower master");
  670                         break;
  671                 }
  672 
  673                 /*
  674                  *  If the master is going to advertise at such a low frequency
  675                  *  that he's guaranteed to time out, we'd might as well just
  676                  *  treat him as timed out now.
  677                  */
  678                 sc_tv.tv_sec = sc->sc_advbase * 3;
  679                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
  680                         carp_master_down_locked(sc, "master will time out");
  681                         break;
  682                 }
  683 
  684                 /*
  685                  * Otherwise, we reset the counter and wait for the next
  686                  * advertisement.
  687                  */
  688                 carp_setrun(sc, af);
  689                 break;
  690         }
  691 
  692 out:
  693         CARP_UNLOCK(sc);
  694         m_freem(m);
  695 }
  696 
  697 static int
  698 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
  699 {
  700         struct m_tag *mtag;
  701 
  702         if (sc->sc_init_counter) {
  703                 /* this could also be seconds since unix epoch */
  704                 sc->sc_counter = arc4random();
  705                 sc->sc_counter = sc->sc_counter << 32;
  706                 sc->sc_counter += arc4random();
  707         } else
  708                 sc->sc_counter++;
  709 
  710         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
  711         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
  712 
  713         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
  714 
  715         /* Tag packet for carp_output */
  716         if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
  717             M_NOWAIT)) == NULL) {
  718                 m_freem(m);
  719                 CARPSTATS_INC(carps_onomem);
  720                 return (ENOMEM);
  721         }
  722         bcopy(&sc, mtag + 1, sizeof(sc));
  723         m_tag_prepend(m, mtag);
  724 
  725         return (0);
  726 }
  727 
  728 /*
  729  * To avoid LORs and possible recursions this function shouldn't
  730  * be called directly, but scheduled via taskqueue.
  731  */
  732 static void
  733 carp_send_ad_all(void *ctx __unused, int pending __unused)
  734 {
  735         struct carp_softc *sc;
  736 
  737         mtx_lock(&carp_mtx);
  738         LIST_FOREACH(sc, &carp_list, sc_next)
  739                 if (sc->sc_state == MASTER) {
  740                         CARP_LOCK(sc);
  741                         CURVNET_SET(sc->sc_carpdev->if_vnet);
  742                         carp_send_ad_locked(sc);
  743                         CURVNET_RESTORE();
  744                         CARP_UNLOCK(sc);
  745                 }
  746         mtx_unlock(&carp_mtx);
  747 }
  748 
  749 /* Send a periodic advertisement, executed in callout context. */
  750 static void
  751 carp_send_ad(void *v)
  752 {
  753         struct carp_softc *sc = v;
  754 
  755         CARP_LOCK_ASSERT(sc);
  756         CURVNET_SET(sc->sc_carpdev->if_vnet);
  757         carp_send_ad_locked(sc);
  758         CURVNET_RESTORE();
  759         CARP_UNLOCK(sc);
  760 }
  761 
  762 static void
  763 carp_send_ad_error(struct carp_softc *sc, int error)
  764 {
  765 
  766         if (error) {
  767                 if (sc->sc_sendad_errors < INT_MAX)
  768                         sc->sc_sendad_errors++;
  769                 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
  770                         static const char fmt[] = "send error %d on %s";
  771                         char msg[sizeof(fmt) + IFNAMSIZ];
  772 
  773                         sprintf(msg, fmt, error, sc->sc_carpdev->if_xname);
  774                         carp_demote_adj(V_carp_senderr_adj, msg);
  775                 }
  776                 sc->sc_sendad_success = 0;
  777         } else {
  778                 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS &&
  779                     ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
  780                         static const char fmt[] = "send ok on %s";
  781                         char msg[sizeof(fmt) + IFNAMSIZ];
  782 
  783                         sprintf(msg, fmt, sc->sc_carpdev->if_xname);
  784                         carp_demote_adj(-V_carp_senderr_adj, msg);
  785                         sc->sc_sendad_errors = 0;
  786                 } else
  787                         sc->sc_sendad_errors = 0;
  788         }
  789 }
  790 
  791 static void
  792 carp_send_ad_locked(struct carp_softc *sc)
  793 {
  794         struct carp_header ch;
  795         struct timeval tv;
  796         struct sockaddr sa;
  797         struct ifaddr *ifa;
  798         struct carp_header *ch_ptr;
  799         struct mbuf *m;
  800         int len, advskew;
  801 
  802         CARP_LOCK_ASSERT(sc);
  803 
  804         advskew = DEMOTE_ADVSKEW(sc);
  805         tv.tv_sec = sc->sc_advbase;
  806         tv.tv_usec = advskew * 1000000 / 256;
  807 
  808         ch.carp_version = CARP_VERSION;
  809         ch.carp_type = CARP_ADVERTISEMENT;
  810         ch.carp_vhid = sc->sc_vhid;
  811         ch.carp_advbase = sc->sc_advbase;
  812         ch.carp_advskew = advskew;
  813         ch.carp_authlen = 7;    /* XXX DEFINE */
  814         ch.carp_pad1 = 0;       /* must be zero */
  815         ch.carp_cksum = 0;
  816 
  817         /* XXXGL: OpenBSD picks first ifaddr with needed family. */
  818 
  819 #ifdef INET
  820         if (sc->sc_naddrs) {
  821                 struct ip *ip;
  822 
  823                 m = m_gethdr(M_NOWAIT, MT_DATA);
  824                 if (m == NULL) {
  825                         CARPSTATS_INC(carps_onomem);
  826                         goto resched;
  827                 }
  828                 len = sizeof(*ip) + sizeof(ch);
  829                 m->m_pkthdr.len = len;
  830                 m->m_pkthdr.rcvif = NULL;
  831                 m->m_len = len;
  832                 M_ALIGN(m, m->m_len);
  833                 m->m_flags |= M_MCAST;
  834                 ip = mtod(m, struct ip *);
  835                 ip->ip_v = IPVERSION;
  836                 ip->ip_hl = sizeof(*ip) >> 2;
  837                 ip->ip_tos = IPTOS_LOWDELAY;
  838                 ip->ip_len = htons(len);
  839                 ip->ip_off = htons(IP_DF);
  840                 ip->ip_ttl = CARP_DFLTTL;
  841                 ip->ip_p = IPPROTO_CARP;
  842                 ip->ip_sum = 0;
  843                 ip_fillid(ip);
  844 
  845                 bzero(&sa, sizeof(sa));
  846                 sa.sa_family = AF_INET;
  847                 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
  848                 if (ifa != NULL) {
  849                         ip->ip_src.s_addr =
  850                             ifatoia(ifa)->ia_addr.sin_addr.s_addr;
  851                         ifa_free(ifa);
  852                 } else
  853                         ip->ip_src.s_addr = 0;
  854                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
  855 
  856                 ch_ptr = (struct carp_header *)(&ip[1]);
  857                 bcopy(&ch, ch_ptr, sizeof(ch));
  858                 if (carp_prepare_ad(m, sc, ch_ptr))
  859                         goto resched;
  860 
  861                 m->m_data += sizeof(*ip);
  862                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
  863                 m->m_data -= sizeof(*ip);
  864 
  865                 CARPSTATS_INC(carps_opackets);
  866 
  867                 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
  868                     &sc->sc_carpdev->if_carp->cif_imo, NULL));
  869         }
  870 #endif /* INET */
  871 #ifdef INET6
  872         if (sc->sc_naddrs6) {
  873                 struct ip6_hdr *ip6;
  874 
  875                 m = m_gethdr(M_NOWAIT, MT_DATA);
  876                 if (m == NULL) {
  877                         CARPSTATS_INC(carps_onomem);
  878                         goto resched;
  879                 }
  880                 len = sizeof(*ip6) + sizeof(ch);
  881                 m->m_pkthdr.len = len;
  882                 m->m_pkthdr.rcvif = NULL;
  883                 m->m_len = len;
  884                 M_ALIGN(m, m->m_len);
  885                 m->m_flags |= M_MCAST;
  886                 ip6 = mtod(m, struct ip6_hdr *);
  887                 bzero(ip6, sizeof(*ip6));
  888                 ip6->ip6_vfc |= IPV6_VERSION;
  889                 ip6->ip6_hlim = CARP_DFLTTL;
  890                 ip6->ip6_nxt = IPPROTO_CARP;
  891                 bzero(&sa, sizeof(sa));
  892 
  893                 /* set the source address */
  894                 sa.sa_family = AF_INET6;
  895                 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
  896                 if (ifa != NULL) {
  897                         bcopy(IFA_IN6(ifa), &ip6->ip6_src,
  898                             sizeof(struct in6_addr));
  899                         ifa_free(ifa);
  900                 } else
  901                         /* This should never happen with IPv6. */
  902                         bzero(&ip6->ip6_src, sizeof(struct in6_addr));
  903 
  904                 /* Set the multicast destination. */
  905                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
  906                 ip6->ip6_dst.s6_addr8[15] = 0x12;
  907                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
  908                         m_freem(m);
  909                         CARP_DEBUG("%s: in6_setscope failed\n", __func__);
  910                         goto resched;
  911                 }
  912 
  913                 ch_ptr = (struct carp_header *)(&ip6[1]);
  914                 bcopy(&ch, ch_ptr, sizeof(ch));
  915                 if (carp_prepare_ad(m, sc, ch_ptr))
  916                         goto resched;
  917 
  918                 m->m_data += sizeof(*ip6);
  919                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
  920                 m->m_data -= sizeof(*ip6);
  921 
  922                 CARPSTATS_INC(carps_opackets6);
  923 
  924                 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
  925                     &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
  926         }
  927 #endif /* INET6 */
  928 
  929 resched:
  930         callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
  931 }
  932 
  933 static void
  934 carp_addroute(struct carp_softc *sc)
  935 {
  936         struct ifaddr *ifa;
  937 
  938         CARP_FOREACH_IFA(sc, ifa)
  939                 carp_ifa_addroute(ifa);
  940 }
  941 
  942 static void
  943 carp_ifa_addroute(struct ifaddr *ifa)
  944 {
  945 
  946         switch (ifa->ifa_addr->sa_family) {
  947 #ifdef INET
  948         case AF_INET:
  949                 in_addprefix(ifatoia(ifa), RTF_UP);
  950                 ifa_add_loopback_route(ifa,
  951                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
  952                 break;
  953 #endif
  954 #ifdef INET6
  955         case AF_INET6:
  956                 ifa_add_loopback_route(ifa,
  957                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
  958                 nd6_add_ifa_lle(ifatoia6(ifa));
  959                 break;
  960 #endif
  961         }
  962 }
  963 
  964 static void
  965 carp_delroute(struct carp_softc *sc)
  966 {
  967         struct ifaddr *ifa;
  968 
  969         CARP_FOREACH_IFA(sc, ifa)
  970                 carp_ifa_delroute(ifa);
  971 }
  972 
  973 static void
  974 carp_ifa_delroute(struct ifaddr *ifa)
  975 {
  976 
  977         switch (ifa->ifa_addr->sa_family) {
  978 #ifdef INET
  979         case AF_INET:
  980                 ifa_del_loopback_route(ifa,
  981                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
  982                 in_scrubprefix(ifatoia(ifa), LLE_STATIC);
  983                 break;
  984 #endif
  985 #ifdef INET6
  986         case AF_INET6:
  987                 ifa_del_loopback_route(ifa,
  988                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
  989                 nd6_rem_ifa_lle(ifatoia6(ifa), 1);
  990                 break;
  991 #endif
  992         }
  993 }
  994 
  995 int
  996 carp_master(struct ifaddr *ifa)
  997 {
  998         struct carp_softc *sc = ifa->ifa_carp;
  999 
 1000         return (sc->sc_state == MASTER);
 1001 }
 1002 
 1003 #ifdef INET
 1004 /*
 1005  * Broadcast a gratuitous ARP request containing
 1006  * the virtual router MAC address for each IP address
 1007  * associated with the virtual router.
 1008  */
 1009 static void
 1010 carp_send_arp(struct carp_softc *sc)
 1011 {
 1012         struct ifaddr *ifa;
 1013         struct in_addr addr;
 1014 
 1015         CARP_FOREACH_IFA(sc, ifa) {
 1016                 if (ifa->ifa_addr->sa_family != AF_INET)
 1017                         continue;
 1018                 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
 1019                 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr));
 1020         }
 1021 }
 1022 
 1023 int
 1024 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
 1025 {
 1026         struct carp_softc *sc = ifa->ifa_carp;
 1027 
 1028         if (sc->sc_state == MASTER) {
 1029                 *enaddr = LLADDR(&sc->sc_addr);
 1030                 return (1);
 1031         }
 1032 
 1033         return (0);
 1034 }
 1035 #endif
 1036 
 1037 #ifdef INET6
 1038 static void
 1039 carp_send_na(struct carp_softc *sc)
 1040 {
 1041         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 1042         struct ifaddr *ifa;
 1043         struct in6_addr *in6;
 1044 
 1045         CARP_FOREACH_IFA(sc, ifa) {
 1046                 if (ifa->ifa_addr->sa_family != AF_INET6)
 1047                         continue;
 1048 
 1049                 in6 = IFA_IN6(ifa);
 1050                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
 1051                     ND_NA_FLAG_OVERRIDE, 1, NULL);
 1052                 DELAY(1000);    /* XXX */
 1053         }
 1054 }
 1055 
 1056 /*
 1057  * Returns ifa in case it's a carp address and it is MASTER, or if the address
 1058  * matches and is not a carp address.  Returns NULL otherwise.
 1059  */
 1060 struct ifaddr *
 1061 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
 1062 {
 1063         struct ifaddr *ifa;
 1064 
 1065         ifa = NULL;
 1066         IF_ADDR_RLOCK(ifp);
 1067         TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 1068                 if (ifa->ifa_addr->sa_family != AF_INET6)
 1069                         continue;
 1070                 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
 1071                         continue;
 1072                 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
 1073                         ifa = NULL;
 1074                 else
 1075                         ifa_ref(ifa);
 1076                 break;
 1077         }
 1078         IF_ADDR_RUNLOCK(ifp);
 1079 
 1080         return (ifa);
 1081 }
 1082 
 1083 caddr_t
 1084 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
 1085 {
 1086         struct ifaddr *ifa;
 1087 
 1088         IF_ADDR_RLOCK(ifp);
 1089         IFNET_FOREACH_IFA(ifp, ifa)
 1090                 if (ifa->ifa_addr->sa_family == AF_INET6 &&
 1091                     IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
 1092                         struct carp_softc *sc = ifa->ifa_carp;
 1093                         struct m_tag *mtag;
 1094 
 1095                         IF_ADDR_RUNLOCK(ifp);
 1096 
 1097                         mtag = m_tag_get(PACKET_TAG_CARP,
 1098                             sizeof(struct carp_softc *), M_NOWAIT);
 1099                         if (mtag == NULL)
 1100                                 /* Better a bit than nothing. */
 1101                                 return (LLADDR(&sc->sc_addr));
 1102 
 1103                         bcopy(&sc, mtag + 1, sizeof(sc));
 1104                         m_tag_prepend(m, mtag);
 1105 
 1106                         return (LLADDR(&sc->sc_addr));
 1107                 }
 1108         IF_ADDR_RUNLOCK(ifp);
 1109 
 1110         return (NULL);
 1111 }
 1112 #endif /* INET6 */
 1113 
 1114 int
 1115 carp_forus(struct ifnet *ifp, u_char *dhost)
 1116 {
 1117         struct carp_softc *sc;
 1118         uint8_t *ena = dhost;
 1119 
 1120         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
 1121                 return (0);
 1122 
 1123         CIF_LOCK(ifp->if_carp);
 1124         IFNET_FOREACH_CARP(ifp, sc) {
 1125                 CARP_LOCK(sc);
 1126                 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
 1127                     ETHER_ADDR_LEN)) {
 1128                         CARP_UNLOCK(sc);
 1129                         CIF_UNLOCK(ifp->if_carp);
 1130                         return (1);
 1131                 }
 1132                 CARP_UNLOCK(sc);
 1133         }
 1134         CIF_UNLOCK(ifp->if_carp);
 1135 
 1136         return (0);
 1137 }
 1138 
 1139 /* Master down timeout event, executed in callout context. */
 1140 static void
 1141 carp_master_down(void *v)
 1142 {
 1143         struct carp_softc *sc = v;
 1144 
 1145         CARP_LOCK_ASSERT(sc);
 1146 
 1147         CURVNET_SET(sc->sc_carpdev->if_vnet);
 1148         if (sc->sc_state == BACKUP) {
 1149                 carp_master_down_locked(sc, "master timed out");
 1150         }
 1151         CURVNET_RESTORE();
 1152 
 1153         CARP_UNLOCK(sc);
 1154 }
 1155 
 1156 static void
 1157 carp_master_down_locked(struct carp_softc *sc, const char *reason)
 1158 {
 1159 
 1160         CARP_LOCK_ASSERT(sc);
 1161 
 1162         switch (sc->sc_state) {
 1163         case BACKUP:
 1164                 carp_set_state(sc, MASTER, reason);
 1165                 carp_send_ad_locked(sc);
 1166 #ifdef INET
 1167                 carp_send_arp(sc);
 1168 #endif
 1169 #ifdef INET6
 1170                 carp_send_na(sc);
 1171 #endif
 1172                 carp_setrun(sc, 0);
 1173                 carp_addroute(sc);
 1174                 break;
 1175         case INIT:
 1176         case MASTER:
 1177 #ifdef INVARIANTS
 1178                 panic("carp: VHID %u@%s: master_down event in %s state\n",
 1179                     sc->sc_vhid,
 1180                     sc->sc_carpdev->if_xname,
 1181                     sc->sc_state ? "MASTER" : "INIT");
 1182 #endif
 1183                 break;
 1184         }
 1185 }
 1186 
 1187 /*
 1188  * When in backup state, af indicates whether to reset the master down timer
 1189  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
 1190  */
 1191 static void
 1192 carp_setrun(struct carp_softc *sc, sa_family_t af)
 1193 {
 1194         struct timeval tv;
 1195 
 1196         CARP_LOCK_ASSERT(sc);
 1197 
 1198         if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
 1199             sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 1200             (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0))
 1201                 return;
 1202 
 1203         switch (sc->sc_state) {
 1204         case INIT:
 1205                 carp_set_state(sc, BACKUP, "initialization complete");
 1206                 carp_setrun(sc, 0);
 1207                 break;
 1208         case BACKUP:
 1209                 callout_stop(&sc->sc_ad_tmo);
 1210                 tv.tv_sec = 3 * sc->sc_advbase;
 1211                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 1212                 switch (af) {
 1213 #ifdef INET
 1214                 case AF_INET:
 1215                         callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 1216                             carp_master_down, sc);
 1217                         break;
 1218 #endif
 1219 #ifdef INET6
 1220                 case AF_INET6:
 1221                         callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 1222                             carp_master_down, sc);
 1223                         break;
 1224 #endif
 1225                 default:
 1226 #ifdef INET
 1227                         if (sc->sc_naddrs)
 1228                                 callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 1229                                     carp_master_down, sc);
 1230 #endif
 1231 #ifdef INET6
 1232                         if (sc->sc_naddrs6)
 1233                                 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 1234                                     carp_master_down, sc);
 1235 #endif
 1236                         break;
 1237                 }
 1238                 break;
 1239         case MASTER:
 1240                 tv.tv_sec = sc->sc_advbase;
 1241                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 1242                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 1243                     carp_send_ad, sc);
 1244                 break;
 1245         }
 1246 }
 1247 
 1248 /*
 1249  * Setup multicast structures.
 1250  */
 1251 static int
 1252 carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
 1253 {
 1254         struct ifnet *ifp = cif->cif_ifp;
 1255         int error = 0;
 1256 
 1257         switch (sa) {
 1258 #ifdef INET
 1259         case AF_INET:
 1260             {
 1261                 struct ip_moptions *imo = &cif->cif_imo;
 1262                 struct in_addr addr;
 1263 
 1264                 if (imo->imo_membership)
 1265                         return (0);
 1266 
 1267                 imo->imo_membership = (struct in_multi **)malloc(
 1268                     (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
 1269                     M_WAITOK);
 1270                 imo->imo_mfilters = NULL;
 1271                 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
 1272                 imo->imo_multicast_vif = -1;
 1273 
 1274                 addr.s_addr = htonl(INADDR_CARP_GROUP);
 1275                 if ((error = in_joingroup(ifp, &addr, NULL,
 1276                     &imo->imo_membership[0])) != 0) {
 1277                         free(imo->imo_membership, M_CARP);
 1278                         break;
 1279                 }
 1280                 imo->imo_num_memberships++;
 1281                 imo->imo_multicast_ifp = ifp;
 1282                 imo->imo_multicast_ttl = CARP_DFLTTL;
 1283                 imo->imo_multicast_loop = 0;
 1284                 break;
 1285            }
 1286 #endif
 1287 #ifdef INET6
 1288         case AF_INET6:
 1289             {
 1290                 struct ip6_moptions *im6o = &cif->cif_im6o;
 1291                 struct in6_addr in6;
 1292                 struct in6_multi *in6m;
 1293 
 1294                 if (im6o->im6o_membership)
 1295                         return (0);
 1296 
 1297                 im6o->im6o_membership = (struct in6_multi **)malloc(
 1298                     (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP,
 1299                     M_ZERO | M_WAITOK);
 1300                 im6o->im6o_mfilters = NULL;
 1301                 im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS;
 1302                 im6o->im6o_multicast_hlim = CARP_DFLTTL;
 1303                 im6o->im6o_multicast_ifp = ifp;
 1304 
 1305                 /* Join IPv6 CARP multicast group. */
 1306                 bzero(&in6, sizeof(in6));
 1307                 in6.s6_addr16[0] = htons(0xff02);
 1308                 in6.s6_addr8[15] = 0x12;
 1309                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
 1310                         free(im6o->im6o_membership, M_CARP);
 1311                         break;
 1312                 }
 1313                 in6m = NULL;
 1314                 if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
 1315                         free(im6o->im6o_membership, M_CARP);
 1316                         break;
 1317                 }
 1318                 im6o->im6o_membership[0] = in6m;
 1319                 im6o->im6o_num_memberships++;
 1320 
 1321                 /* Join solicited multicast address. */
 1322                 bzero(&in6, sizeof(in6));
 1323                 in6.s6_addr16[0] = htons(0xff02);
 1324                 in6.s6_addr32[1] = 0;
 1325                 in6.s6_addr32[2] = htonl(1);
 1326                 in6.s6_addr32[3] = 0;
 1327                 in6.s6_addr8[12] = 0xff;
 1328                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
 1329                         in6_mc_leave(im6o->im6o_membership[0], NULL);
 1330                         free(im6o->im6o_membership, M_CARP);
 1331                         break;
 1332                 }
 1333                 in6m = NULL;
 1334                 if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) {
 1335                         in6_mc_leave(im6o->im6o_membership[0], NULL);
 1336                         free(im6o->im6o_membership, M_CARP);
 1337                         break;
 1338                 }
 1339                 im6o->im6o_membership[1] = in6m;
 1340                 im6o->im6o_num_memberships++;
 1341                 break;
 1342             }
 1343 #endif
 1344         }
 1345 
 1346         return (error);
 1347 }
 1348 
 1349 /*
 1350  * Free multicast structures.
 1351  */
 1352 static void
 1353 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
 1354 {
 1355 
 1356         sx_assert(&carp_sx, SA_XLOCKED);
 1357 
 1358         switch (sa) {
 1359 #ifdef INET
 1360         case AF_INET:
 1361                 if (cif->cif_naddrs == 0) {
 1362                         struct ip_moptions *imo = &cif->cif_imo;
 1363 
 1364                         in_leavegroup(imo->imo_membership[0], NULL);
 1365                         KASSERT(imo->imo_mfilters == NULL,
 1366                             ("%s: imo_mfilters != NULL", __func__));
 1367                         free(imo->imo_membership, M_CARP);
 1368                         imo->imo_membership = NULL;
 1369 
 1370                 }
 1371                 break;
 1372 #endif
 1373 #ifdef INET6
 1374         case AF_INET6:
 1375                 if (cif->cif_naddrs6 == 0) {
 1376                         struct ip6_moptions *im6o = &cif->cif_im6o;
 1377 
 1378                         in6_mc_leave(im6o->im6o_membership[0], NULL);
 1379                         in6_mc_leave(im6o->im6o_membership[1], NULL);
 1380                         KASSERT(im6o->im6o_mfilters == NULL,
 1381                             ("%s: im6o_mfilters != NULL", __func__));
 1382                         free(im6o->im6o_membership, M_CARP);
 1383                         im6o->im6o_membership = NULL;
 1384                 }
 1385                 break;
 1386 #endif
 1387         }
 1388 }
 1389 
 1390 int
 1391 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
 1392 {
 1393         struct m_tag *mtag;
 1394         struct carp_softc *sc;
 1395 
 1396         if (!sa)
 1397                 return (0);
 1398 
 1399         switch (sa->sa_family) {
 1400 #ifdef INET
 1401         case AF_INET:
 1402                 break;
 1403 #endif
 1404 #ifdef INET6
 1405         case AF_INET6:
 1406                 break;
 1407 #endif
 1408         default:
 1409                 return (0);
 1410         }
 1411 
 1412         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
 1413         if (mtag == NULL)
 1414                 return (0);
 1415 
 1416         bcopy(mtag + 1, &sc, sizeof(sc));
 1417 
 1418         /* Set the source MAC address to the Virtual Router MAC Address. */
 1419         switch (ifp->if_type) {
 1420         case IFT_ETHER:
 1421         case IFT_BRIDGE:
 1422         case IFT_L2VLAN: {
 1423                         struct ether_header *eh;
 1424 
 1425                         eh = mtod(m, struct ether_header *);
 1426                         eh->ether_shost[0] = 0;
 1427                         eh->ether_shost[1] = 0;
 1428                         eh->ether_shost[2] = 0x5e;
 1429                         eh->ether_shost[3] = 0;
 1430                         eh->ether_shost[4] = 1;
 1431                         eh->ether_shost[5] = sc->sc_vhid;
 1432                 }
 1433                 break;
 1434         case IFT_FDDI: {
 1435                         struct fddi_header *fh;
 1436 
 1437                         fh = mtod(m, struct fddi_header *);
 1438                         fh->fddi_shost[0] = 0;
 1439                         fh->fddi_shost[1] = 0;
 1440                         fh->fddi_shost[2] = 0x5e;
 1441                         fh->fddi_shost[3] = 0;
 1442                         fh->fddi_shost[4] = 1;
 1443                         fh->fddi_shost[5] = sc->sc_vhid;
 1444                 }
 1445                 break;
 1446         case IFT_ISO88025: {
 1447                         struct iso88025_header *th;
 1448                         th = mtod(m, struct iso88025_header *);
 1449                         th->iso88025_shost[0] = 3;
 1450                         th->iso88025_shost[1] = 0;
 1451                         th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
 1452                         th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
 1453                         th->iso88025_shost[4] = 0;
 1454                         th->iso88025_shost[5] = 0;
 1455                 }
 1456                 break;
 1457         default:
 1458                 printf("%s: carp is not supported for the %d interface type\n",
 1459                     ifp->if_xname, ifp->if_type);
 1460                 return (EOPNOTSUPP);
 1461         }
 1462 
 1463         return (0);
 1464 }
 1465 
 1466 static struct carp_softc*
 1467 carp_alloc(struct ifnet *ifp)
 1468 {
 1469         struct carp_softc *sc;
 1470         struct carp_if *cif;
 1471 
 1472         sx_assert(&carp_sx, SA_XLOCKED);
 1473 
 1474         if ((cif = ifp->if_carp) == NULL)
 1475                 cif = carp_alloc_if(ifp);
 1476 
 1477         sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
 1478 
 1479         sc->sc_advbase = CARP_DFLTINTV;
 1480         sc->sc_vhid = -1;       /* required setting */
 1481         sc->sc_init_counter = 1;
 1482         sc->sc_state = INIT;
 1483 
 1484         sc->sc_ifasiz = sizeof(struct ifaddr *);
 1485         sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
 1486         sc->sc_carpdev = ifp;
 1487 
 1488         CARP_LOCK_INIT(sc);
 1489 #ifdef INET
 1490         callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 1491 #endif
 1492 #ifdef INET6
 1493         callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 1494 #endif
 1495         callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 1496 
 1497         CIF_LOCK(cif);
 1498         TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
 1499         CIF_UNLOCK(cif);
 1500 
 1501         mtx_lock(&carp_mtx);
 1502         LIST_INSERT_HEAD(&carp_list, sc, sc_next);
 1503         mtx_unlock(&carp_mtx);
 1504 
 1505         return (sc);
 1506 }
 1507 
 1508 static void
 1509 carp_grow_ifas(struct carp_softc *sc)
 1510 {
 1511         struct ifaddr **new;
 1512 
 1513         new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO);
 1514         CARP_LOCK(sc);
 1515         bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
 1516         free(sc->sc_ifas, M_CARP);
 1517         sc->sc_ifas = new;
 1518         sc->sc_ifasiz *= 2;
 1519         CARP_UNLOCK(sc);
 1520 }
 1521 
 1522 static void
 1523 carp_destroy(struct carp_softc *sc)
 1524 {
 1525         struct ifnet *ifp = sc->sc_carpdev;
 1526         struct carp_if *cif = ifp->if_carp;
 1527 
 1528         sx_assert(&carp_sx, SA_XLOCKED);
 1529 
 1530         if (sc->sc_suppress)
 1531                 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
 1532         CARP_UNLOCK(sc);
 1533 
 1534         CIF_LOCK(cif);
 1535         TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
 1536         CIF_UNLOCK(cif);
 1537 
 1538         mtx_lock(&carp_mtx);
 1539         LIST_REMOVE(sc, sc_next);
 1540         mtx_unlock(&carp_mtx);
 1541 
 1542         callout_drain(&sc->sc_ad_tmo);
 1543 #ifdef INET
 1544         callout_drain(&sc->sc_md_tmo);
 1545 #endif
 1546 #ifdef INET6
 1547         callout_drain(&sc->sc_md6_tmo);
 1548 #endif
 1549         CARP_LOCK_DESTROY(sc);
 1550 
 1551         free(sc->sc_ifas, M_CARP);
 1552         free(sc, M_CARP);
 1553 }
 1554 
 1555 static struct carp_if*
 1556 carp_alloc_if(struct ifnet *ifp)
 1557 {
 1558         struct carp_if *cif;
 1559         int error;
 1560 
 1561         cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
 1562 
 1563         if ((error = ifpromisc(ifp, 1)) != 0)
 1564                 printf("%s: ifpromisc(%s) failed: %d\n",
 1565                     __func__, ifp->if_xname, error);
 1566         else
 1567                 cif->cif_flags |= CIF_PROMISC;
 1568 
 1569         CIF_LOCK_INIT(cif);
 1570         cif->cif_ifp = ifp;
 1571         TAILQ_INIT(&cif->cif_vrs);
 1572 
 1573         IF_ADDR_WLOCK(ifp);
 1574         ifp->if_carp = cif;
 1575         if_ref(ifp);
 1576         IF_ADDR_WUNLOCK(ifp);
 1577 
 1578         return (cif);
 1579 }
 1580 
 1581 static void
 1582 carp_free_if(struct carp_if *cif)
 1583 {
 1584         struct ifnet *ifp = cif->cif_ifp;
 1585 
 1586         CIF_LOCK_ASSERT(cif);
 1587         KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
 1588             __func__));
 1589 
 1590         IF_ADDR_WLOCK(ifp);
 1591         ifp->if_carp = NULL;
 1592         IF_ADDR_WUNLOCK(ifp);
 1593 
 1594         CIF_LOCK_DESTROY(cif);
 1595 
 1596         if (cif->cif_flags & CIF_PROMISC)
 1597                 ifpromisc(ifp, 0);
 1598         if_rele(ifp);
 1599 
 1600         free(cif, M_CARP);
 1601 }
 1602 
 1603 static void
 1604 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv)
 1605 {
 1606 
 1607         CARP_LOCK(sc);
 1608         carpr->carpr_state = sc->sc_state;
 1609         carpr->carpr_vhid = sc->sc_vhid;
 1610         carpr->carpr_advbase = sc->sc_advbase;
 1611         carpr->carpr_advskew = sc->sc_advskew;
 1612         if (priv)
 1613                 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
 1614         else
 1615                 bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
 1616         CARP_UNLOCK(sc);
 1617 }
 1618 
 1619 int
 1620 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
 1621 {
 1622         struct carpreq carpr;
 1623         struct ifnet *ifp;
 1624         struct carp_softc *sc = NULL;
 1625         int error = 0, locked = 0;
 1626 
 1627         if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr)))
 1628                 return (error);
 1629 
 1630         ifp = ifunit_ref(ifr->ifr_name);
 1631         if (ifp == NULL)
 1632                 return (ENXIO);
 1633 
 1634         switch (ifp->if_type) {
 1635         case IFT_ETHER:
 1636         case IFT_L2VLAN:
 1637         case IFT_BRIDGE:
 1638         case IFT_FDDI:
 1639         case IFT_ISO88025:
 1640                 break;
 1641         default:
 1642                 error = EOPNOTSUPP;
 1643                 goto out;
 1644         }
 1645 
 1646         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 1647                 error = EADDRNOTAVAIL;
 1648                 goto out;
 1649         }
 1650 
 1651         sx_xlock(&carp_sx);
 1652         switch (cmd) {
 1653         case SIOCSVH:
 1654                 if ((error = priv_check(td, PRIV_NETINET_CARP)))
 1655                         break;
 1656                 if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID ||
 1657                     carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) {
 1658                         error = EINVAL;
 1659                         break;
 1660                 }
 1661 
 1662                 if (ifp->if_carp) {
 1663                         IFNET_FOREACH_CARP(ifp, sc)
 1664                                 if (sc->sc_vhid == carpr.carpr_vhid)
 1665                                         break;
 1666                 }
 1667                 if (sc == NULL) {
 1668                         sc = carp_alloc(ifp);
 1669                         CARP_LOCK(sc);
 1670                         sc->sc_vhid = carpr.carpr_vhid;
 1671                         LLADDR(&sc->sc_addr)[0] = 0;
 1672                         LLADDR(&sc->sc_addr)[1] = 0;
 1673                         LLADDR(&sc->sc_addr)[2] = 0x5e;
 1674                         LLADDR(&sc->sc_addr)[3] = 0;
 1675                         LLADDR(&sc->sc_addr)[4] = 1;
 1676                         LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
 1677                 } else
 1678                         CARP_LOCK(sc);
 1679                 locked = 1;
 1680                 if (carpr.carpr_advbase > 0) {
 1681                         if (carpr.carpr_advbase > 255 ||
 1682                             carpr.carpr_advbase < CARP_DFLTINTV) {
 1683                                 error = EINVAL;
 1684                                 break;
 1685                         }
 1686                         sc->sc_advbase = carpr.carpr_advbase;
 1687                 }
 1688                 if (carpr.carpr_advskew >= 255) {
 1689                         error = EINVAL;
 1690                         break;
 1691                 }
 1692                 sc->sc_advskew = carpr.carpr_advskew;
 1693                 if (carpr.carpr_key[0] != '\0') {
 1694                         bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
 1695                         carp_hmac_prepare(sc);
 1696                 }
 1697                 if (sc->sc_state != INIT &&
 1698                     carpr.carpr_state != sc->sc_state) {
 1699                         switch (carpr.carpr_state) {
 1700                         case BACKUP:
 1701                                 callout_stop(&sc->sc_ad_tmo);
 1702                                 carp_set_state(sc, BACKUP,
 1703                                     "user requested via ifconfig");
 1704                                 carp_setrun(sc, 0);
 1705                                 carp_delroute(sc);
 1706                                 break;
 1707                         case MASTER:
 1708                                 carp_master_down_locked(sc,
 1709                                     "user requested via ifconfig");
 1710                                 break;
 1711                         default:
 1712                                 break;
 1713                         }
 1714                 }
 1715                 break;
 1716 
 1717         case SIOCGVH:
 1718             {
 1719                 int priveleged;
 1720 
 1721                 if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) {
 1722                         error = EINVAL;
 1723                         break;
 1724                 }
 1725                 if (carpr.carpr_count < 1) {
 1726                         error = EMSGSIZE;
 1727                         break;
 1728                 }
 1729                 if (ifp->if_carp == NULL) {
 1730                         error = ENOENT;
 1731                         break;
 1732                 }
 1733 
 1734                 priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0);
 1735                 if (carpr.carpr_vhid != 0) {
 1736                         IFNET_FOREACH_CARP(ifp, sc)
 1737                                 if (sc->sc_vhid == carpr.carpr_vhid)
 1738                                         break;
 1739                         if (sc == NULL) {
 1740                                 error = ENOENT;
 1741                                 break;
 1742                         }
 1743                         carp_carprcp(&carpr, sc, priveleged);
 1744                         error = copyout(&carpr, ifr_data_get_ptr(ifr),
 1745                             sizeof(carpr));
 1746                 } else  {
 1747                         int i, count;
 1748 
 1749                         count = 0;
 1750                         IFNET_FOREACH_CARP(ifp, sc)
 1751                                 count++;
 1752 
 1753                         if (count > carpr.carpr_count) {
 1754                                 CIF_UNLOCK(ifp->if_carp);
 1755                                 error = EMSGSIZE;
 1756                                 break;
 1757                         }
 1758 
 1759                         i = 0;
 1760                         IFNET_FOREACH_CARP(ifp, sc) {
 1761                                 carp_carprcp(&carpr, sc, priveleged);
 1762                                 carpr.carpr_count = count;
 1763                                 error = copyout(&carpr,
 1764                                     (caddr_t)ifr_data_get_ptr(ifr) +
 1765                                     (i * sizeof(carpr)), sizeof(carpr));
 1766                                 if (error) {
 1767                                         CIF_UNLOCK(ifp->if_carp);
 1768                                         break;
 1769                                 }
 1770                                 i++;
 1771                         }
 1772                 }
 1773                 break;
 1774             }
 1775         default:
 1776                 error = EINVAL;
 1777         }
 1778         sx_xunlock(&carp_sx);
 1779 
 1780 out:
 1781         if (locked)
 1782                 CARP_UNLOCK(sc);
 1783         if_rele(ifp);
 1784 
 1785         return (error);
 1786 }
 1787 
 1788 static int
 1789 carp_get_vhid(struct ifaddr *ifa)
 1790 {
 1791 
 1792         if (ifa == NULL || ifa->ifa_carp == NULL)
 1793                 return (0);
 1794 
 1795         return (ifa->ifa_carp->sc_vhid);
 1796 }
 1797 
 1798 int
 1799 carp_attach(struct ifaddr *ifa, int vhid)
 1800 {
 1801         struct ifnet *ifp = ifa->ifa_ifp;
 1802         struct carp_if *cif = ifp->if_carp;
 1803         struct carp_softc *sc;
 1804         int index, error;
 1805 
 1806         KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa));
 1807 
 1808         switch (ifa->ifa_addr->sa_family) {
 1809 #ifdef INET
 1810         case AF_INET:
 1811 #endif
 1812 #ifdef INET6
 1813         case AF_INET6:
 1814 #endif
 1815                 break;
 1816         default:
 1817                 return (EPROTOTYPE);
 1818         }
 1819 
 1820         sx_xlock(&carp_sx);
 1821         if (ifp->if_carp == NULL) {
 1822                 sx_xunlock(&carp_sx);
 1823                 return (ENOPROTOOPT);
 1824         }
 1825 
 1826         IFNET_FOREACH_CARP(ifp, sc)
 1827                 if (sc->sc_vhid == vhid)
 1828                         break;
 1829         if (sc == NULL) {
 1830                 sx_xunlock(&carp_sx);
 1831                 return (ENOENT);
 1832         }
 1833 
 1834         error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
 1835         if (error) {
 1836                 CIF_FREE(cif);
 1837                 sx_xunlock(&carp_sx);
 1838                 return (error);
 1839         }
 1840 
 1841         index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
 1842         if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
 1843                 carp_grow_ifas(sc);
 1844 
 1845         switch (ifa->ifa_addr->sa_family) {
 1846 #ifdef INET
 1847         case AF_INET:
 1848                 cif->cif_naddrs++;
 1849                 sc->sc_naddrs++;
 1850                 break;
 1851 #endif
 1852 #ifdef INET6
 1853         case AF_INET6:
 1854                 cif->cif_naddrs6++;
 1855                 sc->sc_naddrs6++;
 1856                 break;
 1857 #endif
 1858         }
 1859 
 1860         ifa_ref(ifa);
 1861 
 1862         CARP_LOCK(sc);
 1863         sc->sc_ifas[index - 1] = ifa;
 1864         ifa->ifa_carp = sc;
 1865         carp_hmac_prepare(sc);
 1866         carp_sc_state(sc);
 1867         CARP_UNLOCK(sc);
 1868 
 1869         sx_xunlock(&carp_sx);
 1870 
 1871         return (0);
 1872 }
 1873 
 1874 void
 1875 carp_detach(struct ifaddr *ifa)
 1876 {
 1877         struct ifnet *ifp = ifa->ifa_ifp;
 1878         struct carp_if *cif = ifp->if_carp;
 1879         struct carp_softc *sc = ifa->ifa_carp;
 1880         int i, index;
 1881 
 1882         KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
 1883 
 1884         sx_xlock(&carp_sx);
 1885 
 1886         CARP_LOCK(sc);
 1887         /* Shift array. */
 1888         index = sc->sc_naddrs + sc->sc_naddrs6;
 1889         for (i = 0; i < index; i++)
 1890                 if (sc->sc_ifas[i] == ifa)
 1891                         break;
 1892         KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
 1893         for (; i < index - 1; i++)
 1894                 sc->sc_ifas[i] = sc->sc_ifas[i+1];
 1895         sc->sc_ifas[index - 1] = NULL;
 1896 
 1897         switch (ifa->ifa_addr->sa_family) {
 1898 #ifdef INET
 1899         case AF_INET:
 1900                 cif->cif_naddrs--;
 1901                 sc->sc_naddrs--;
 1902                 break;
 1903 #endif
 1904 #ifdef INET6
 1905         case AF_INET6:
 1906                 cif->cif_naddrs6--;
 1907                 sc->sc_naddrs6--;
 1908                 break;
 1909 #endif
 1910         }
 1911 
 1912         carp_ifa_delroute(ifa);
 1913         carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
 1914 
 1915         ifa->ifa_carp = NULL;
 1916         ifa_free(ifa);
 1917 
 1918         carp_hmac_prepare(sc);
 1919         carp_sc_state(sc);
 1920 
 1921         if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)
 1922                 carp_destroy(sc);
 1923         else
 1924                 CARP_UNLOCK(sc);
 1925 
 1926         CIF_FREE(cif);
 1927 
 1928         sx_xunlock(&carp_sx);
 1929 }
 1930 
 1931 static void
 1932 carp_set_state(struct carp_softc *sc, int state, const char *reason)
 1933 {
 1934 
 1935         CARP_LOCK_ASSERT(sc);
 1936 
 1937         if (sc->sc_state != state) {
 1938                 const char *carp_states[] = { CARP_STATES };
 1939                 char subsys[IFNAMSIZ+5];
 1940 
 1941                 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
 1942                     sc->sc_carpdev->if_xname);
 1943 
 1944                 CARP_LOG("%s: %s -> %s (%s)\n", subsys,
 1945                     carp_states[sc->sc_state], carp_states[state], reason);
 1946 
 1947                 sc->sc_state = state;
 1948 
 1949                 devctl_notify("CARP", subsys, carp_states[state], NULL);
 1950         }
 1951 }
 1952 
 1953 static void
 1954 carp_linkstate(struct ifnet *ifp)
 1955 {
 1956         struct carp_softc *sc;
 1957 
 1958         CIF_LOCK(ifp->if_carp);
 1959         IFNET_FOREACH_CARP(ifp, sc) {
 1960                 CARP_LOCK(sc);
 1961                 carp_sc_state(sc);
 1962                 CARP_UNLOCK(sc);
 1963         }
 1964         CIF_UNLOCK(ifp->if_carp);
 1965 }
 1966 
 1967 static void
 1968 carp_sc_state(struct carp_softc *sc)
 1969 {
 1970 
 1971         CARP_LOCK_ASSERT(sc);
 1972 
 1973         if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 1974             !(sc->sc_carpdev->if_flags & IFF_UP)) {
 1975                 callout_stop(&sc->sc_ad_tmo);
 1976 #ifdef INET
 1977                 callout_stop(&sc->sc_md_tmo);
 1978 #endif
 1979 #ifdef INET6
 1980                 callout_stop(&sc->sc_md6_tmo);
 1981 #endif
 1982                 carp_set_state(sc, INIT, "hardware interface down");
 1983                 carp_setrun(sc, 0);
 1984                 if (!sc->sc_suppress)
 1985                         carp_demote_adj(V_carp_ifdown_adj, "interface down");
 1986                 sc->sc_suppress = 1;
 1987         } else {
 1988                 carp_set_state(sc, INIT, "hardware interface up");
 1989                 carp_setrun(sc, 0);
 1990                 if (sc->sc_suppress)
 1991                         carp_demote_adj(-V_carp_ifdown_adj, "interface up");
 1992                 sc->sc_suppress = 0;
 1993         }
 1994 }
 1995 
 1996 static void
 1997 carp_demote_adj(int adj, char *reason)
 1998 {
 1999         atomic_add_int(&V_carp_demotion, adj);
 2000         CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
 2001         taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
 2002 }
 2003 
 2004 static int
 2005 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
 2006 {
 2007         int new, error;
 2008 
 2009         new = V_carp_demotion;
 2010         error = sysctl_handle_int(oidp, &new, 0, req);
 2011         if (error || !req->newptr)
 2012                 return (error);
 2013 
 2014         carp_demote_adj(new, "sysctl");
 2015 
 2016         return (0);
 2017 }
 2018 
 2019 #ifdef INET
 2020 extern  struct domain inetdomain;
 2021 static struct protosw in_carp_protosw = {
 2022         .pr_type =              SOCK_RAW,
 2023         .pr_domain =            &inetdomain,
 2024         .pr_protocol =          IPPROTO_CARP,
 2025         .pr_flags =             PR_ATOMIC|PR_ADDR,
 2026         .pr_input =             carp_input,
 2027         .pr_output =            rip_output,
 2028         .pr_ctloutput =         rip_ctloutput,
 2029         .pr_usrreqs =           &rip_usrreqs
 2030 };
 2031 #endif
 2032 
 2033 #ifdef INET6
 2034 extern  struct domain inet6domain;
 2035 static struct protosw in6_carp_protosw = {
 2036         .pr_type =              SOCK_RAW,
 2037         .pr_domain =            &inet6domain,
 2038         .pr_protocol =          IPPROTO_CARP,
 2039         .pr_flags =             PR_ATOMIC|PR_ADDR,
 2040         .pr_input =             carp6_input,
 2041         .pr_output =            rip6_output,
 2042         .pr_ctloutput =         rip6_ctloutput,
 2043         .pr_usrreqs =           &rip6_usrreqs
 2044 };
 2045 #endif
 2046 
 2047 static void
 2048 carp_mod_cleanup(void)
 2049 {
 2050 
 2051 #ifdef INET
 2052         if (proto_reg[CARP_INET] == 0) {
 2053                 (void)ipproto_unregister(IPPROTO_CARP);
 2054                 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW);
 2055                 proto_reg[CARP_INET] = -1;
 2056         }
 2057         carp_iamatch_p = NULL;
 2058 #endif
 2059 #ifdef INET6
 2060         if (proto_reg[CARP_INET6] == 0) {
 2061                 (void)ip6proto_unregister(IPPROTO_CARP);
 2062                 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW);
 2063                 proto_reg[CARP_INET6] = -1;
 2064         }
 2065         carp_iamatch6_p = NULL;
 2066         carp_macmatch6_p = NULL;
 2067 #endif
 2068         carp_ioctl_p = NULL;
 2069         carp_attach_p = NULL;
 2070         carp_detach_p = NULL;
 2071         carp_get_vhid_p = NULL;
 2072         carp_linkstate_p = NULL;
 2073         carp_forus_p = NULL;
 2074         carp_output_p = NULL;
 2075         carp_demote_adj_p = NULL;
 2076         carp_master_p = NULL;
 2077         mtx_unlock(&carp_mtx);
 2078         taskqueue_drain(taskqueue_swi, &carp_sendall_task);
 2079         mtx_destroy(&carp_mtx);
 2080         sx_destroy(&carp_sx);
 2081 }
 2082 
 2083 static int
 2084 carp_mod_load(void)
 2085 {
 2086         int err;
 2087 
 2088         mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
 2089         sx_init(&carp_sx, "carp_sx");
 2090         LIST_INIT(&carp_list);
 2091         carp_get_vhid_p = carp_get_vhid;
 2092         carp_forus_p = carp_forus;
 2093         carp_output_p = carp_output;
 2094         carp_linkstate_p = carp_linkstate;
 2095         carp_ioctl_p = carp_ioctl;
 2096         carp_attach_p = carp_attach;
 2097         carp_detach_p = carp_detach;
 2098         carp_demote_adj_p = carp_demote_adj;
 2099         carp_master_p = carp_master;
 2100 #ifdef INET6
 2101         carp_iamatch6_p = carp_iamatch6;
 2102         carp_macmatch6_p = carp_macmatch6;
 2103         proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
 2104             (struct protosw *)&in6_carp_protosw);
 2105         if (proto_reg[CARP_INET6]) {
 2106                 printf("carp: error %d attaching to PF_INET6\n",
 2107                     proto_reg[CARP_INET6]);
 2108                 carp_mod_cleanup();
 2109                 return (proto_reg[CARP_INET6]);
 2110         }
 2111         err = ip6proto_register(IPPROTO_CARP);
 2112         if (err) {
 2113                 printf("carp: error %d registering with INET6\n", err);
 2114                 carp_mod_cleanup();
 2115                 return (err);
 2116         }
 2117 #endif
 2118 #ifdef INET
 2119         carp_iamatch_p = carp_iamatch;
 2120         proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
 2121         if (proto_reg[CARP_INET]) {
 2122                 printf("carp: error %d attaching to PF_INET\n",
 2123                     proto_reg[CARP_INET]);
 2124                 carp_mod_cleanup();
 2125                 return (proto_reg[CARP_INET]);
 2126         }
 2127         err = ipproto_register(IPPROTO_CARP);
 2128         if (err) {
 2129                 printf("carp: error %d registering with INET\n", err);
 2130                 carp_mod_cleanup();
 2131                 return (err);
 2132         }
 2133 #endif
 2134         return (0);
 2135 }
 2136 
 2137 static int
 2138 carp_modevent(module_t mod, int type, void *data)
 2139 {
 2140         switch (type) {
 2141         case MOD_LOAD:
 2142                 return carp_mod_load();
 2143                 /* NOTREACHED */
 2144         case MOD_UNLOAD:
 2145                 mtx_lock(&carp_mtx);
 2146                 if (LIST_EMPTY(&carp_list))
 2147                         carp_mod_cleanup();
 2148                 else {
 2149                         mtx_unlock(&carp_mtx);
 2150                         return (EBUSY);
 2151                 }
 2152                 break;
 2153 
 2154         default:
 2155                 return (EINVAL);
 2156         }
 2157 
 2158         return (0);
 2159 }
 2160 
 2161 static moduledata_t carp_mod = {
 2162         "carp",
 2163         carp_modevent,
 2164         0
 2165 };
 2166 
 2167 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);

Cache object: 9c40d3b8b4a569471b65fee1dd5d09f5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.