The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_carp.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2002 Michael Shalayeff.
    5  * Copyright (c) 2003 Ryan McBride.
    6  * Copyright (c) 2011 Gleb Smirnoff <glebius@FreeBSD.org>
    7  * All rights reserved.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   21  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
   22  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   24  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
   26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
   27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
   28  * THE POSSIBILITY OF SUCH DAMAGE.
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD$");
   33 
   34 #include "opt_bpf.h"
   35 #include "opt_inet.h"
   36 #include "opt_inet6.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/systm.h>
   40 #include <sys/bus.h>
   41 #include <sys/jail.h>
   42 #include <sys/kernel.h>
   43 #include <sys/limits.h>
   44 #include <sys/malloc.h>
   45 #include <sys/mbuf.h>
   46 #include <sys/module.h>
   47 #include <sys/priv.h>
   48 #include <sys/proc.h>
   49 #include <sys/protosw.h>
   50 #include <sys/socket.h>
   51 #include <sys/sockio.h>
   52 #include <sys/sysctl.h>
   53 #include <sys/syslog.h>
   54 #include <sys/taskqueue.h>
   55 #include <sys/counter.h>
   56 
   57 #include <net/ethernet.h>
   58 #include <net/if.h>
   59 #include <net/if_var.h>
   60 #include <net/if_dl.h>
   61 #include <net/if_llatbl.h>
   62 #include <net/if_types.h>
   63 #include <net/route.h>
   64 #include <net/vnet.h>
   65 
   66 #if defined(INET) || defined(INET6)
   67 #include <netinet/in.h>
   68 #include <netinet/in_var.h>
   69 #include <netinet/ip_carp.h>
   70 #include <netinet/ip.h>
   71 #include <machine/in_cksum.h>
   72 #endif
   73 #ifdef INET
   74 #include <netinet/ip_var.h>
   75 #include <netinet/if_ether.h>
   76 #endif
   77 
   78 #ifdef INET6
   79 #include <netinet/icmp6.h>
   80 #include <netinet/ip6.h>
   81 #include <netinet6/in6_var.h>
   82 #include <netinet6/ip6_var.h>
   83 #include <netinet6/scope6_var.h>
   84 #include <netinet6/nd6.h>
   85 #endif
   86 
   87 #include <crypto/sha1.h>
   88 
   89 static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses");
   90 
   91 struct carp_softc {
   92         struct ifnet            *sc_carpdev;    /* Pointer to parent ifnet. */
   93         struct ifaddr           **sc_ifas;      /* Our ifaddrs. */
   94         struct sockaddr_dl      sc_addr;        /* Our link level address. */
   95         struct callout          sc_ad_tmo;      /* Advertising timeout. */
   96 #ifdef INET
   97         struct callout          sc_md_tmo;      /* Master down timeout. */
   98 #endif
   99 #ifdef INET6
  100         struct callout          sc_md6_tmo;     /* XXX: Master down timeout. */
  101 #endif
  102         struct mtx              sc_mtx;
  103 
  104         int                     sc_vhid;
  105         int                     sc_advskew;
  106         int                     sc_advbase;
  107 
  108         int                     sc_naddrs;
  109         int                     sc_naddrs6;
  110         int                     sc_ifasiz;
  111         enum { INIT = 0, BACKUP, MASTER }       sc_state;
  112         int                     sc_suppress;
  113         int                     sc_sendad_errors;
  114 #define CARP_SENDAD_MAX_ERRORS  3
  115         int                     sc_sendad_success;
  116 #define CARP_SENDAD_MIN_SUCCESS 3
  117 
  118         int                     sc_init_counter;
  119         uint64_t                sc_counter;
  120 
  121         /* authentication */
  122 #define CARP_HMAC_PAD   64
  123         unsigned char sc_key[CARP_KEY_LEN];
  124         unsigned char sc_pad[CARP_HMAC_PAD];
  125         SHA1_CTX sc_sha1;
  126 
  127         TAILQ_ENTRY(carp_softc) sc_list;        /* On the carp_if list. */
  128         LIST_ENTRY(carp_softc)  sc_next;        /* On the global list. */
  129 };
  130 
  131 struct carp_if {
  132 #ifdef INET
  133         int     cif_naddrs;
  134 #endif
  135 #ifdef INET6
  136         int     cif_naddrs6;
  137 #endif
  138         TAILQ_HEAD(, carp_softc) cif_vrs;
  139 #ifdef INET
  140         struct ip_moptions       cif_imo;
  141 #endif
  142 #ifdef INET6
  143         struct ip6_moptions      cif_im6o;
  144 #endif
  145         struct ifnet    *cif_ifp;
  146         struct mtx      cif_mtx;
  147         uint32_t        cif_flags;
  148 #define CIF_PROMISC     0x00000001
  149 };
  150 
  151 #define CARP_INET       0
  152 #define CARP_INET6      1
  153 static int proto_reg[] = {-1, -1};
  154 
  155 /*
  156  * Brief design of carp(4).
  157  *
  158  * Any carp-capable ifnet may have a list of carp softcs hanging off
  159  * its ifp->if_carp pointer. Each softc represents one unique virtual
  160  * host id, or vhid. The softc has a back pointer to the ifnet. All
  161  * softcs are joined in a global list, which has quite limited use.
  162  *
  163  * Any interface address that takes part in CARP negotiation has a
  164  * pointer to the softc of its vhid, ifa->ifa_carp. That could be either
  165  * AF_INET or AF_INET6 address.
  166  *
  167  * Although, one can get the softc's backpointer to ifnet and traverse
  168  * through its ifp->if_addrhead queue to find all interface addresses
  169  * involved in CARP, we keep a growable array of ifaddr pointers. This
  170  * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that
  171  * do calls into the network stack, thus avoiding LORs.
  172  *
  173  * Locking:
  174  *
  175  * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(),
  176  * callout-driven events and ioctl()s.
  177  *
  178  * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx.
  179  * To traverse the global list we use the mutex carp_mtx.
  180  *
  181  * Known issues with locking:
  182  *
  183  * - Sending ad, we put the pointer to the softc in an mtag, and no reference
  184  *   counting is done on the softc.
  185  * - On module unload we may race (?) with packet processing thread
  186  *   dereferencing our function pointers.
  187  */
  188 
  189 /* Accept incoming CARP packets. */
  190 VNET_DEFINE_STATIC(int, carp_allow) = 1;
  191 #define V_carp_allow    VNET(carp_allow)
  192 
  193 /* Set DSCP in outgoing CARP packets. */
  194 VNET_DEFINE_STATIC(int, carp_dscp) = 56;
  195 #define V_carp_dscp     VNET(carp_dscp)
  196 
  197 /* Preempt slower nodes. */
  198 VNET_DEFINE_STATIC(int, carp_preempt) = 0;
  199 #define V_carp_preempt  VNET(carp_preempt)
  200 
  201 /* Log level. */
  202 VNET_DEFINE_STATIC(int, carp_log) = 1;
  203 #define V_carp_log      VNET(carp_log)
  204 
  205 /* Global advskew demotion. */
  206 VNET_DEFINE_STATIC(int, carp_demotion) = 0;
  207 #define V_carp_demotion VNET(carp_demotion)
  208 
  209 /* Send error demotion factor. */
  210 VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW;
  211 #define V_carp_senderr_adj      VNET(carp_senderr_adj)
  212 
  213 /* Iface down demotion factor. */
  214 VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW;
  215 #define V_carp_ifdown_adj       VNET(carp_ifdown_adj)
  216 
  217 static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS);
  218 static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS);
  219 static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
  220 
  221 SYSCTL_NODE(_net_inet, IPPROTO_CARP,    carp,   CTLFLAG_RW, 0,  "CARP");
  222 SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow,
  223     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_allow_sysctl, "I",
  224     "Accept incoming CARP packets");
  225 SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp,
  226     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_dscp_sysctl, "I",
  227     "DSCP value for carp packets");
  228 SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
  229     &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
  230 SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
  231     &VNET_NAME(carp_log), 0, "CARP log level");
  232 SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion,
  233     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
  234     0, 0, carp_demote_adj_sysctl, "I",
  235     "Adjust demotion factor (skew of advskew)");
  236 SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor,
  237     CTLFLAG_VNET | CTLFLAG_RW,
  238     &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment");
  239 SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor,
  240     CTLFLAG_VNET | CTLFLAG_RW,
  241     &VNET_NAME(carp_ifdown_adj), 0,
  242     "Interface down demotion factor adjustment");
  243 
  244 VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats);
  245 VNET_PCPUSTAT_SYSINIT(carpstats);
  246 VNET_PCPUSTAT_SYSUNINIT(carpstats);
  247 
  248 #define CARPSTATS_ADD(name, val)        \
  249     counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \
  250         sizeof(uint64_t)], (val))
  251 #define CARPSTATS_INC(name)             CARPSTATS_ADD(name, 1)
  252 
  253 SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats,
  254     carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
  255 
  256 #define CARP_LOCK_INIT(sc)      mtx_init(&(sc)->sc_mtx, "carp_softc",   \
  257         NULL, MTX_DEF)
  258 #define CARP_LOCK_DESTROY(sc)   mtx_destroy(&(sc)->sc_mtx)
  259 #define CARP_LOCK_ASSERT(sc)    mtx_assert(&(sc)->sc_mtx, MA_OWNED)
  260 #define CARP_LOCK(sc)           mtx_lock(&(sc)->sc_mtx)
  261 #define CARP_UNLOCK(sc)         mtx_unlock(&(sc)->sc_mtx)
  262 #define CIF_LOCK_INIT(cif)      mtx_init(&(cif)->cif_mtx, "carp_if",   \
  263         NULL, MTX_DEF)
  264 #define CIF_LOCK_DESTROY(cif)   mtx_destroy(&(cif)->cif_mtx)
  265 #define CIF_LOCK_ASSERT(cif)    mtx_assert(&(cif)->cif_mtx, MA_OWNED)
  266 #define CIF_LOCK(cif)           mtx_lock(&(cif)->cif_mtx)
  267 #define CIF_UNLOCK(cif)         mtx_unlock(&(cif)->cif_mtx)
  268 #define CIF_FREE(cif)   do {                            \
  269                 CIF_LOCK(cif);                          \
  270                 if (TAILQ_EMPTY(&(cif)->cif_vrs))       \
  271                         carp_free_if(cif);              \
  272                 else                                    \
  273                         CIF_UNLOCK(cif);                \
  274 } while (0)
  275 
  276 #define CARP_LOG(...)   do {                            \
  277         if (V_carp_log > 0)                             \
  278                 log(LOG_INFO, "carp: " __VA_ARGS__);    \
  279 } while (0)
  280 
  281 #define CARP_DEBUG(...) do {                            \
  282         if (V_carp_log > 1)                             \
  283                 log(LOG_DEBUG, __VA_ARGS__);            \
  284 } while (0)
  285 
  286 #define IFNET_FOREACH_IFA(ifp, ifa)                                     \
  287         CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \
  288                 if ((ifa)->ifa_carp != NULL)
  289 
  290 #define CARP_FOREACH_IFA(sc, ifa)                                       \
  291         CARP_LOCK_ASSERT(sc);                                           \
  292         for (int _i = 0;                                                \
  293                 _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 &&              \
  294                 ((ifa) = sc->sc_ifas[_i]) != NULL;                      \
  295                 ++_i)
  296 
  297 #define IFNET_FOREACH_CARP(ifp, sc)                                     \
  298         KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) ||                    \
  299             sx_xlocked(&carp_sx), ("cif_vrs not locked"));              \
  300         TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
  301 
  302 #define DEMOTE_ADVSKEW(sc)                                      \
  303     (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ?      \
  304     CARP_MAXSKEW :                                              \
  305         (((sc)->sc_advskew + V_carp_demotion < 0) ?             \
  306         0 : ((sc)->sc_advskew + V_carp_demotion)))
  307 
  308 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
  309 static struct carp_softc
  310                 *carp_alloc(struct ifnet *);
  311 static void     carp_destroy(struct carp_softc *);
  312 static struct carp_if
  313                 *carp_alloc_if(struct ifnet *);
  314 static void     carp_free_if(struct carp_if *);
  315 static void     carp_set_state(struct carp_softc *, int, const char* reason);
  316 static void     carp_sc_state(struct carp_softc *);
  317 static void     carp_setrun(struct carp_softc *, sa_family_t);
  318 static void     carp_master_down(void *);
  319 static void     carp_master_down_locked(struct carp_softc *,
  320                     const char* reason);
  321 static void     carp_send_ad(void *);
  322 static void     carp_send_ad_locked(struct carp_softc *);
  323 static void     carp_addroute(struct carp_softc *);
  324 static void     carp_ifa_addroute(struct ifaddr *);
  325 static void     carp_delroute(struct carp_softc *);
  326 static void     carp_ifa_delroute(struct ifaddr *);
  327 static void     carp_send_ad_all(void *, int);
  328 static void     carp_demote_adj(int, char *);
  329 
  330 static LIST_HEAD(, carp_softc) carp_list;
  331 static struct mtx carp_mtx;
  332 static struct sx carp_sx;
  333 static struct task carp_sendall_task =
  334     TASK_INITIALIZER(0, carp_send_ad_all, NULL);
  335 
  336 static void
  337 carp_hmac_prepare(struct carp_softc *sc)
  338 {
  339         uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
  340         uint8_t vhid = sc->sc_vhid & 0xff;
  341         struct ifaddr *ifa;
  342         int i, found;
  343 #ifdef INET
  344         struct in_addr last, cur, in;
  345 #endif
  346 #ifdef INET6
  347         struct in6_addr last6, cur6, in6;
  348 #endif
  349 
  350         CARP_LOCK_ASSERT(sc);
  351 
  352         /* Compute ipad from key. */
  353         bzero(sc->sc_pad, sizeof(sc->sc_pad));
  354         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
  355         for (i = 0; i < sizeof(sc->sc_pad); i++)
  356                 sc->sc_pad[i] ^= 0x36;
  357 
  358         /* Precompute first part of inner hash. */
  359         SHA1Init(&sc->sc_sha1);
  360         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
  361         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
  362         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
  363         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
  364 #ifdef INET
  365         cur.s_addr = 0;
  366         do {
  367                 found = 0;
  368                 last = cur;
  369                 cur.s_addr = 0xffffffff;
  370                 CARP_FOREACH_IFA(sc, ifa) {
  371                         in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
  372                         if (ifa->ifa_addr->sa_family == AF_INET &&
  373                             ntohl(in.s_addr) > ntohl(last.s_addr) &&
  374                             ntohl(in.s_addr) < ntohl(cur.s_addr)) {
  375                                 cur.s_addr = in.s_addr;
  376                                 found++;
  377                         }
  378                 }
  379                 if (found)
  380                         SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
  381         } while (found);
  382 #endif /* INET */
  383 #ifdef INET6
  384         memset(&cur6, 0, sizeof(cur6));
  385         do {
  386                 found = 0;
  387                 last6 = cur6;
  388                 memset(&cur6, 0xff, sizeof(cur6));
  389                 CARP_FOREACH_IFA(sc, ifa) {
  390                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
  391                         if (IN6_IS_SCOPE_EMBED(&in6))
  392                                 in6.s6_addr16[1] = 0;
  393                         if (ifa->ifa_addr->sa_family == AF_INET6 &&
  394                             memcmp(&in6, &last6, sizeof(in6)) > 0 &&
  395                             memcmp(&in6, &cur6, sizeof(in6)) < 0) {
  396                                 cur6 = in6;
  397                                 found++;
  398                         }
  399                 }
  400                 if (found)
  401                         SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
  402         } while (found);
  403 #endif /* INET6 */
  404 
  405         /* convert ipad to opad */
  406         for (i = 0; i < sizeof(sc->sc_pad); i++)
  407                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
  408 }
  409 
  410 static void
  411 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
  412     unsigned char md[20])
  413 {
  414         SHA1_CTX sha1ctx;
  415 
  416         CARP_LOCK_ASSERT(sc);
  417 
  418         /* fetch first half of inner hash */
  419         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
  420 
  421         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
  422         SHA1Final(md, &sha1ctx);
  423 
  424         /* outer hash */
  425         SHA1Init(&sha1ctx);
  426         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
  427         SHA1Update(&sha1ctx, md, 20);
  428         SHA1Final(md, &sha1ctx);
  429 }
  430 
  431 static int
  432 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
  433     unsigned char md[20])
  434 {
  435         unsigned char md2[20];
  436 
  437         CARP_LOCK_ASSERT(sc);
  438 
  439         carp_hmac_generate(sc, counter, md2);
  440 
  441         return (bcmp(md, md2, sizeof(md2)));
  442 }
  443 
  444 /*
  445  * process input packet.
  446  * we have rearranged checks order compared to the rfc,
  447  * but it seems more efficient this way or not possible otherwise.
  448  */
  449 #ifdef INET
  450 int
  451 carp_input(struct mbuf **mp, int *offp, int proto)
  452 {
  453         struct mbuf *m = *mp;
  454         struct ip *ip = mtod(m, struct ip *);
  455         struct carp_header *ch;
  456         int iplen, len;
  457 
  458         iplen = *offp;
  459         *mp = NULL;
  460 
  461         CARPSTATS_INC(carps_ipackets);
  462 
  463         if (!V_carp_allow) {
  464                 m_freem(m);
  465                 return (IPPROTO_DONE);
  466         }
  467 
  468         /* verify that the IP TTL is 255.  */
  469         if (ip->ip_ttl != CARP_DFLTTL) {
  470                 CARPSTATS_INC(carps_badttl);
  471                 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
  472                     ip->ip_ttl,
  473                     m->m_pkthdr.rcvif->if_xname);
  474                 m_freem(m);
  475                 return (IPPROTO_DONE);
  476         }
  477 
  478         iplen = ip->ip_hl << 2;
  479 
  480         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
  481                 CARPSTATS_INC(carps_badlen);
  482                 CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) "
  483                     "on %s\n", __func__, m->m_len - sizeof(struct ip),
  484                     m->m_pkthdr.rcvif->if_xname);
  485                 m_freem(m);
  486                 return (IPPROTO_DONE);
  487         }
  488 
  489         if (iplen + sizeof(*ch) < m->m_len) {
  490                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
  491                         CARPSTATS_INC(carps_hdrops);
  492                         CARP_DEBUG("%s: pullup failed\n", __func__);
  493                         return (IPPROTO_DONE);
  494                 }
  495                 ip = mtod(m, struct ip *);
  496         }
  497         ch = (struct carp_header *)((char *)ip + iplen);
  498 
  499         /*
  500          * verify that the received packet length is
  501          * equal to the CARP header
  502          */
  503         len = iplen + sizeof(*ch);
  504         if (len > m->m_pkthdr.len) {
  505                 CARPSTATS_INC(carps_badlen);
  506                 CARP_DEBUG("%s: packet too short %d on %s\n", __func__,
  507                     m->m_pkthdr.len,
  508                     m->m_pkthdr.rcvif->if_xname);
  509                 m_freem(m);
  510                 return (IPPROTO_DONE);
  511         }
  512 
  513         if ((m = m_pullup(m, len)) == NULL) {
  514                 CARPSTATS_INC(carps_hdrops);
  515                 return (IPPROTO_DONE);
  516         }
  517         ip = mtod(m, struct ip *);
  518         ch = (struct carp_header *)((char *)ip + iplen);
  519 
  520         /* verify the CARP checksum */
  521         m->m_data += iplen;
  522         if (in_cksum(m, len - iplen)) {
  523                 CARPSTATS_INC(carps_badsum);
  524                 CARP_DEBUG("%s: checksum failed on %s\n", __func__,
  525                     m->m_pkthdr.rcvif->if_xname);
  526                 m_freem(m);
  527                 return (IPPROTO_DONE);
  528         }
  529         m->m_data -= iplen;
  530 
  531         carp_input_c(m, ch, AF_INET);
  532         return (IPPROTO_DONE);
  533 }
  534 #endif
  535 
  536 #ifdef INET6
  537 int
  538 carp6_input(struct mbuf **mp, int *offp, int proto)
  539 {
  540         struct mbuf *m = *mp;
  541         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
  542         struct carp_header *ch;
  543         u_int len;
  544 
  545         CARPSTATS_INC(carps_ipackets6);
  546 
  547         if (!V_carp_allow) {
  548                 m_freem(m);
  549                 return (IPPROTO_DONE);
  550         }
  551 
  552         /* check if received on a valid carp interface */
  553         if (m->m_pkthdr.rcvif->if_carp == NULL) {
  554                 CARPSTATS_INC(carps_badif);
  555                 CARP_DEBUG("%s: packet received on non-carp interface: %s\n",
  556                     __func__, m->m_pkthdr.rcvif->if_xname);
  557                 m_freem(m);
  558                 return (IPPROTO_DONE);
  559         }
  560 
  561         /* verify that the IP TTL is 255 */
  562         if (ip6->ip6_hlim != CARP_DFLTTL) {
  563                 CARPSTATS_INC(carps_badttl);
  564                 CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__,
  565                     ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname);
  566                 m_freem(m);
  567                 return (IPPROTO_DONE);
  568         }
  569 
  570         /* verify that we have a complete carp packet */
  571         if (m->m_len < *offp + sizeof(*ch)) {
  572                 len = m->m_len;
  573                 m = m_pullup(m, *offp + sizeof(*ch));
  574                 if (m == NULL) {
  575                         CARPSTATS_INC(carps_badlen);
  576                         CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
  577                         return (IPPROTO_DONE);
  578                 }
  579         }
  580         ch = (struct carp_header *)(mtod(m, char *) + *offp);
  581 
  582 
  583         /* verify the CARP checksum */
  584         m->m_data += *offp;
  585         if (in_cksum(m, sizeof(*ch))) {
  586                 CARPSTATS_INC(carps_badsum);
  587                 CARP_DEBUG("%s: checksum failed, on %s\n", __func__,
  588                     m->m_pkthdr.rcvif->if_xname);
  589                 m_freem(m);
  590                 return (IPPROTO_DONE);
  591         }
  592         m->m_data -= *offp;
  593 
  594         carp_input_c(m, ch, AF_INET6);
  595         return (IPPROTO_DONE);
  596 }
  597 #endif /* INET6 */
  598 
  599 /*
  600  * This routine should not be necessary at all, but some switches
  601  * (VMWare ESX vswitches) can echo our own packets back at us,
  602  * and we must ignore them or they will cause us to drop out of
  603  * MASTER mode.
  604  *
  605  * We cannot catch all cases of network loops.  Instead, what we
  606  * do here is catch any packet that arrives with a carp header
  607  * with a VHID of 0, that comes from an address that is our own.
  608  * These packets are by definition "from us" (even if they are from
  609  * a misconfigured host that is pretending to be us).
  610  *
  611  * The VHID test is outside this mini-function.
  612  */
  613 static int
  614 carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af)
  615 {
  616 #ifdef INET
  617         struct ip *ip4;
  618         struct in_addr in4;
  619 #endif
  620 #ifdef INET6
  621         struct ip6_hdr *ip6;
  622         struct in6_addr in6;
  623 #endif
  624 
  625         switch (af) {
  626 #ifdef INET
  627         case AF_INET:
  628                 ip4 = mtod(m, struct ip *);
  629                 in4 = ifatoia(ifa)->ia_addr.sin_addr;
  630                 return (in4.s_addr == ip4->ip_src.s_addr);
  631 #endif
  632 #ifdef INET6
  633         case AF_INET6:
  634                 ip6 = mtod(m, struct ip6_hdr *);
  635                 in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
  636                 return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0);
  637 #endif
  638         default:
  639                 break;
  640         }
  641         return (0);
  642 }
  643 
  644 static void
  645 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
  646 {
  647         struct ifnet *ifp = m->m_pkthdr.rcvif;
  648         struct ifaddr *ifa, *match;
  649         struct carp_softc *sc;
  650         uint64_t tmp_counter;
  651         struct timeval sc_tv, ch_tv;
  652         int error;
  653 
  654         /*
  655          * Verify that the VHID is valid on the receiving interface.
  656          *
  657          * There should be just one match.  If there are none
  658          * the VHID is not valid and we drop the packet.  If
  659          * there are multiple VHID matches, take just the first
  660          * one, for compatibility with previous code.  While we're
  661          * scanning, check for obvious loops in the network topology
  662          * (these should never happen, and as noted above, we may
  663          * miss real loops; this is just a double-check).
  664          */
  665         IF_ADDR_RLOCK(ifp);
  666         error = 0;
  667         match = NULL;
  668         IFNET_FOREACH_IFA(ifp, ifa) {
  669                 if (match == NULL && ifa->ifa_carp != NULL &&
  670                     ifa->ifa_addr->sa_family == af &&
  671                     ifa->ifa_carp->sc_vhid == ch->carp_vhid)
  672                         match = ifa;
  673                 if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af))
  674                         error = ELOOP;
  675         }
  676         ifa = error ? NULL : match;
  677         if (ifa != NULL)
  678                 ifa_ref(ifa);
  679         IF_ADDR_RUNLOCK(ifp);
  680 
  681         if (ifa == NULL) {
  682                 if (error == ELOOP) {
  683                         CARP_DEBUG("dropping looped packet on interface %s\n",
  684                             ifp->if_xname);
  685                         CARPSTATS_INC(carps_badif);     /* ??? */
  686                 } else {
  687                         CARPSTATS_INC(carps_badvhid);
  688                 }
  689                 m_freem(m);
  690                 return;
  691         }
  692 
  693         /* verify the CARP version. */
  694         if (ch->carp_version != CARP_VERSION) {
  695                 CARPSTATS_INC(carps_badver);
  696                 CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname,
  697                     ch->carp_version);
  698                 ifa_free(ifa);
  699                 m_freem(m);
  700                 return;
  701         }
  702 
  703         sc = ifa->ifa_carp;
  704         CARP_LOCK(sc);
  705         ifa_free(ifa);
  706 
  707         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
  708                 CARPSTATS_INC(carps_badauth);
  709                 CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__,
  710                     sc->sc_vhid, ifp->if_xname);
  711                 goto out;
  712         }
  713 
  714         tmp_counter = ntohl(ch->carp_counter[0]);
  715         tmp_counter = tmp_counter<<32;
  716         tmp_counter += ntohl(ch->carp_counter[1]);
  717 
  718         /* XXX Replay protection goes here */
  719 
  720         sc->sc_init_counter = 0;
  721         sc->sc_counter = tmp_counter;
  722 
  723         sc_tv.tv_sec = sc->sc_advbase;
  724         sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
  725         ch_tv.tv_sec = ch->carp_advbase;
  726         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
  727 
  728         switch (sc->sc_state) {
  729         case INIT:
  730                 break;
  731         case MASTER:
  732                 /*
  733                  * If we receive an advertisement from a master who's going to
  734                  * be more frequent than us, go into BACKUP state.
  735                  */
  736                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
  737                     timevalcmp(&sc_tv, &ch_tv, ==)) {
  738                         callout_stop(&sc->sc_ad_tmo);
  739                         carp_set_state(sc, BACKUP,
  740                             "more frequent advertisement received");
  741                         carp_setrun(sc, 0);
  742                         carp_delroute(sc);
  743                 }
  744                 break;
  745         case BACKUP:
  746                 /*
  747                  * If we're pre-empting masters who advertise slower than us,
  748                  * and this one claims to be slower, treat him as down.
  749                  */
  750                 if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
  751                         carp_master_down_locked(sc,
  752                             "preempting a slower master");
  753                         break;
  754                 }
  755 
  756                 /*
  757                  *  If the master is going to advertise at such a low frequency
  758                  *  that he's guaranteed to time out, we'd might as well just
  759                  *  treat him as timed out now.
  760                  */
  761                 sc_tv.tv_sec = sc->sc_advbase * 3;
  762                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
  763                         carp_master_down_locked(sc, "master will time out");
  764                         break;
  765                 }
  766 
  767                 /*
  768                  * Otherwise, we reset the counter and wait for the next
  769                  * advertisement.
  770                  */
  771                 carp_setrun(sc, af);
  772                 break;
  773         }
  774 
  775 out:
  776         CARP_UNLOCK(sc);
  777         m_freem(m);
  778 }
  779 
  780 static int
  781 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
  782 {
  783         struct m_tag *mtag;
  784 
  785         if (sc->sc_init_counter) {
  786                 /* this could also be seconds since unix epoch */
  787                 sc->sc_counter = arc4random();
  788                 sc->sc_counter = sc->sc_counter << 32;
  789                 sc->sc_counter += arc4random();
  790         } else
  791                 sc->sc_counter++;
  792 
  793         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
  794         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
  795 
  796         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
  797 
  798         /* Tag packet for carp_output */
  799         if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *),
  800             M_NOWAIT)) == NULL) {
  801                 m_freem(m);
  802                 CARPSTATS_INC(carps_onomem);
  803                 return (ENOMEM);
  804         }
  805         bcopy(&sc, mtag + 1, sizeof(sc));
  806         m_tag_prepend(m, mtag);
  807 
  808         return (0);
  809 }
  810 
  811 /*
  812  * To avoid LORs and possible recursions this function shouldn't
  813  * be called directly, but scheduled via taskqueue.
  814  */
  815 static void
  816 carp_send_ad_all(void *ctx __unused, int pending __unused)
  817 {
  818         struct carp_softc *sc;
  819 
  820         mtx_lock(&carp_mtx);
  821         LIST_FOREACH(sc, &carp_list, sc_next)
  822                 if (sc->sc_state == MASTER) {
  823                         CARP_LOCK(sc);
  824                         CURVNET_SET(sc->sc_carpdev->if_vnet);
  825                         carp_send_ad_locked(sc);
  826                         CURVNET_RESTORE();
  827                         CARP_UNLOCK(sc);
  828                 }
  829         mtx_unlock(&carp_mtx);
  830 }
  831 
  832 /* Send a periodic advertisement, executed in callout context. */
  833 static void
  834 carp_send_ad(void *v)
  835 {
  836         struct carp_softc *sc = v;
  837 
  838         CARP_LOCK_ASSERT(sc);
  839         CURVNET_SET(sc->sc_carpdev->if_vnet);
  840         carp_send_ad_locked(sc);
  841         CURVNET_RESTORE();
  842         CARP_UNLOCK(sc);
  843 }
  844 
  845 static void
  846 carp_send_ad_error(struct carp_softc *sc, int error)
  847 {
  848 
  849         if (error) {
  850                 if (sc->sc_sendad_errors < INT_MAX)
  851                         sc->sc_sendad_errors++;
  852                 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
  853                         static const char fmt[] = "send error %d on %s";
  854                         char msg[sizeof(fmt) + IFNAMSIZ];
  855 
  856                         sprintf(msg, fmt, error, sc->sc_carpdev->if_xname);
  857                         carp_demote_adj(V_carp_senderr_adj, msg);
  858                 }
  859                 sc->sc_sendad_success = 0;
  860         } else {
  861                 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS &&
  862                     ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) {
  863                         static const char fmt[] = "send ok on %s";
  864                         char msg[sizeof(fmt) + IFNAMSIZ];
  865 
  866                         sprintf(msg, fmt, sc->sc_carpdev->if_xname);
  867                         carp_demote_adj(-V_carp_senderr_adj, msg);
  868                         sc->sc_sendad_errors = 0;
  869                 } else
  870                         sc->sc_sendad_errors = 0;
  871         }
  872 }
  873 
  874 /*
  875  * Pick the best ifaddr on the given ifp for sending CARP
  876  * advertisements.
  877  *
  878  * "Best" here is defined by ifa_preferred().  This function is much
  879  * much like ifaof_ifpforaddr() except that we just use ifa_preferred().
  880  *
  881  * (This could be simplified to return the actual address, except that
  882  * it has a different format in AF_INET and AF_INET6.)
  883  */
  884 static struct ifaddr *
  885 carp_best_ifa(int af, struct ifnet *ifp)
  886 {
  887         struct ifaddr *ifa, *best;
  888 
  889         if (af >= AF_MAX)
  890                 return (NULL);
  891         best = NULL;
  892         IF_ADDR_RLOCK(ifp);
  893         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
  894                 if (ifa->ifa_addr->sa_family == af &&
  895                     (best == NULL || ifa_preferred(best, ifa)))
  896                         best = ifa;
  897         }
  898         IF_ADDR_RUNLOCK(ifp);
  899         if (best != NULL)
  900                 ifa_ref(best);
  901         return (best);
  902 }
  903 
  904 static void
  905 carp_send_ad_locked(struct carp_softc *sc)
  906 {
  907         struct carp_header ch;
  908         struct timeval tv;
  909         struct ifaddr *ifa;
  910         struct carp_header *ch_ptr;
  911         struct mbuf *m;
  912         int len, advskew;
  913 
  914         CARP_LOCK_ASSERT(sc);
  915 
  916         advskew = DEMOTE_ADVSKEW(sc);
  917         tv.tv_sec = sc->sc_advbase;
  918         tv.tv_usec = advskew * 1000000 / 256;
  919 
  920         ch.carp_version = CARP_VERSION;
  921         ch.carp_type = CARP_ADVERTISEMENT;
  922         ch.carp_vhid = sc->sc_vhid;
  923         ch.carp_advbase = sc->sc_advbase;
  924         ch.carp_advskew = advskew;
  925         ch.carp_authlen = 7;    /* XXX DEFINE */
  926         ch.carp_pad1 = 0;       /* must be zero */
  927         ch.carp_cksum = 0;
  928 
  929         /* XXXGL: OpenBSD picks first ifaddr with needed family. */
  930 
  931 #ifdef INET
  932         if (sc->sc_naddrs) {
  933                 struct ip *ip;
  934 
  935                 m = m_gethdr(M_NOWAIT, MT_DATA);
  936                 if (m == NULL) {
  937                         CARPSTATS_INC(carps_onomem);
  938                         goto resched;
  939                 }
  940                 len = sizeof(*ip) + sizeof(ch);
  941                 m->m_pkthdr.len = len;
  942                 m->m_pkthdr.rcvif = NULL;
  943                 m->m_len = len;
  944                 M_ALIGN(m, m->m_len);
  945                 m->m_flags |= M_MCAST;
  946                 ip = mtod(m, struct ip *);
  947                 ip->ip_v = IPVERSION;
  948                 ip->ip_hl = sizeof(*ip) >> 2;
  949                 ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET;
  950                 ip->ip_len = htons(len);
  951                 ip->ip_off = htons(IP_DF);
  952                 ip->ip_ttl = CARP_DFLTTL;
  953                 ip->ip_p = IPPROTO_CARP;
  954                 ip->ip_sum = 0;
  955                 ip_fillid(ip);
  956 
  957                 ifa = carp_best_ifa(AF_INET, sc->sc_carpdev);
  958                 if (ifa != NULL) {
  959                         ip->ip_src.s_addr =
  960                             ifatoia(ifa)->ia_addr.sin_addr.s_addr;
  961                         ifa_free(ifa);
  962                 } else
  963                         ip->ip_src.s_addr = 0;
  964                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
  965 
  966                 ch_ptr = (struct carp_header *)(&ip[1]);
  967                 bcopy(&ch, ch_ptr, sizeof(ch));
  968                 if (carp_prepare_ad(m, sc, ch_ptr))
  969                         goto resched;
  970 
  971                 m->m_data += sizeof(*ip);
  972                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip));
  973                 m->m_data -= sizeof(*ip);
  974 
  975                 CARPSTATS_INC(carps_opackets);
  976 
  977                 carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT,
  978                     &sc->sc_carpdev->if_carp->cif_imo, NULL));
  979         }
  980 #endif /* INET */
  981 #ifdef INET6
  982         if (sc->sc_naddrs6) {
  983                 struct ip6_hdr *ip6;
  984 
  985                 m = m_gethdr(M_NOWAIT, MT_DATA);
  986                 if (m == NULL) {
  987                         CARPSTATS_INC(carps_onomem);
  988                         goto resched;
  989                 }
  990                 len = sizeof(*ip6) + sizeof(ch);
  991                 m->m_pkthdr.len = len;
  992                 m->m_pkthdr.rcvif = NULL;
  993                 m->m_len = len;
  994                 M_ALIGN(m, m->m_len);
  995                 m->m_flags |= M_MCAST;
  996                 ip6 = mtod(m, struct ip6_hdr *);
  997                 bzero(ip6, sizeof(*ip6));
  998                 ip6->ip6_vfc |= IPV6_VERSION;
  999                 /* Traffic class isn't defined in ip6 struct instead
 1000                  * it gets offset into flowid field */
 1001                 ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN +
 1002                     IPTOS_DSCP_OFFSET));
 1003                 ip6->ip6_hlim = CARP_DFLTTL;
 1004                 ip6->ip6_nxt = IPPROTO_CARP;
 1005 
 1006                 /* set the source address */
 1007                 ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev);
 1008                 if (ifa != NULL) {
 1009                         bcopy(IFA_IN6(ifa), &ip6->ip6_src,
 1010                             sizeof(struct in6_addr));
 1011                         ifa_free(ifa);
 1012                 } else
 1013                         /* This should never happen with IPv6. */
 1014                         bzero(&ip6->ip6_src, sizeof(struct in6_addr));
 1015 
 1016                 /* Set the multicast destination. */
 1017                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
 1018                 ip6->ip6_dst.s6_addr8[15] = 0x12;
 1019                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
 1020                         m_freem(m);
 1021                         CARP_DEBUG("%s: in6_setscope failed\n", __func__);
 1022                         goto resched;
 1023                 }
 1024 
 1025                 ch_ptr = (struct carp_header *)(&ip6[1]);
 1026                 bcopy(&ch, ch_ptr, sizeof(ch));
 1027                 if (carp_prepare_ad(m, sc, ch_ptr))
 1028                         goto resched;
 1029 
 1030                 m->m_data += sizeof(*ip6);
 1031                 ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6));
 1032                 m->m_data -= sizeof(*ip6);
 1033 
 1034                 CARPSTATS_INC(carps_opackets6);
 1035 
 1036                 carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0,
 1037                     &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL));
 1038         }
 1039 #endif /* INET6 */
 1040 
 1041 resched:
 1042         callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc);
 1043 }
 1044 
 1045 static void
 1046 carp_addroute(struct carp_softc *sc)
 1047 {
 1048         struct ifaddr *ifa;
 1049 
 1050         CARP_FOREACH_IFA(sc, ifa)
 1051                 carp_ifa_addroute(ifa);
 1052 }
 1053 
 1054 static void
 1055 carp_ifa_addroute(struct ifaddr *ifa)
 1056 {
 1057 
 1058         switch (ifa->ifa_addr->sa_family) {
 1059 #ifdef INET
 1060         case AF_INET:
 1061                 in_addprefix(ifatoia(ifa), RTF_UP);
 1062                 ifa_add_loopback_route(ifa,
 1063                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
 1064                 break;
 1065 #endif
 1066 #ifdef INET6
 1067         case AF_INET6:
 1068                 ifa_add_loopback_route(ifa,
 1069                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
 1070                 nd6_add_ifa_lle(ifatoia6(ifa));
 1071                 break;
 1072 #endif
 1073         }
 1074 }
 1075 
 1076 static void
 1077 carp_delroute(struct carp_softc *sc)
 1078 {
 1079         struct ifaddr *ifa;
 1080 
 1081         CARP_FOREACH_IFA(sc, ifa)
 1082                 carp_ifa_delroute(ifa);
 1083 }
 1084 
 1085 static void
 1086 carp_ifa_delroute(struct ifaddr *ifa)
 1087 {
 1088 
 1089         switch (ifa->ifa_addr->sa_family) {
 1090 #ifdef INET
 1091         case AF_INET:
 1092                 ifa_del_loopback_route(ifa,
 1093                     (struct sockaddr *)&ifatoia(ifa)->ia_addr);
 1094                 in_scrubprefix(ifatoia(ifa), LLE_STATIC);
 1095                 break;
 1096 #endif
 1097 #ifdef INET6
 1098         case AF_INET6:
 1099                 ifa_del_loopback_route(ifa,
 1100                     (struct sockaddr *)&ifatoia6(ifa)->ia_addr);
 1101                 nd6_rem_ifa_lle(ifatoia6(ifa), 1);
 1102                 break;
 1103 #endif
 1104         }
 1105 }
 1106 
 1107 int
 1108 carp_master(struct ifaddr *ifa)
 1109 {
 1110         struct carp_softc *sc = ifa->ifa_carp;
 1111 
 1112         return (sc->sc_state == MASTER);
 1113 }
 1114 
 1115 #ifdef INET
 1116 /*
 1117  * Broadcast a gratuitous ARP request containing
 1118  * the virtual router MAC address for each IP address
 1119  * associated with the virtual router.
 1120  */
 1121 static void
 1122 carp_send_arp(struct carp_softc *sc)
 1123 {
 1124         struct ifaddr *ifa;
 1125         struct in_addr addr;
 1126 
 1127         CARP_FOREACH_IFA(sc, ifa) {
 1128                 if (ifa->ifa_addr->sa_family != AF_INET)
 1129                         continue;
 1130                 addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
 1131                 arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr));
 1132         }
 1133 }
 1134 
 1135 int
 1136 carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
 1137 {
 1138         struct carp_softc *sc = ifa->ifa_carp;
 1139 
 1140         if (sc->sc_state == MASTER) {
 1141                 *enaddr = LLADDR(&sc->sc_addr);
 1142                 return (1);
 1143         }
 1144 
 1145         return (0);
 1146 }
 1147 #endif
 1148 
 1149 #ifdef INET6
 1150 static void
 1151 carp_send_na(struct carp_softc *sc)
 1152 {
 1153         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 1154         struct ifaddr *ifa;
 1155         struct in6_addr *in6;
 1156 
 1157         CARP_FOREACH_IFA(sc, ifa) {
 1158                 if (ifa->ifa_addr->sa_family != AF_INET6)
 1159                         continue;
 1160 
 1161                 in6 = IFA_IN6(ifa);
 1162                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
 1163                     ND_NA_FLAG_OVERRIDE, 1, NULL);
 1164                 DELAY(1000);    /* XXX */
 1165         }
 1166 }
 1167 
 1168 /*
 1169  * Returns ifa in case it's a carp address and it is MASTER, or if the address
 1170  * matches and is not a carp address.  Returns NULL otherwise.
 1171  */
 1172 struct ifaddr *
 1173 carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
 1174 {
 1175         struct ifaddr *ifa;
 1176 
 1177         ifa = NULL;
 1178         IF_ADDR_RLOCK(ifp);
 1179         CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 1180                 if (ifa->ifa_addr->sa_family != AF_INET6)
 1181                         continue;
 1182                 if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa)))
 1183                         continue;
 1184                 if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER)
 1185                         ifa = NULL;
 1186                 else
 1187                         ifa_ref(ifa);
 1188                 break;
 1189         }
 1190         IF_ADDR_RUNLOCK(ifp);
 1191 
 1192         return (ifa);
 1193 }
 1194 
 1195 char *
 1196 carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
 1197 {
 1198         struct ifaddr *ifa;
 1199 
 1200         IF_ADDR_RLOCK(ifp);
 1201         IFNET_FOREACH_IFA(ifp, ifa)
 1202                 if (ifa->ifa_addr->sa_family == AF_INET6 &&
 1203                     IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) {
 1204                         struct carp_softc *sc = ifa->ifa_carp;
 1205                         struct m_tag *mtag;
 1206 
 1207                         IF_ADDR_RUNLOCK(ifp);
 1208 
 1209                         mtag = m_tag_get(PACKET_TAG_CARP,
 1210                             sizeof(struct carp_softc *), M_NOWAIT);
 1211                         if (mtag == NULL)
 1212                                 /* Better a bit than nothing. */
 1213                                 return (LLADDR(&sc->sc_addr));
 1214 
 1215                         bcopy(&sc, mtag + 1, sizeof(sc));
 1216                         m_tag_prepend(m, mtag);
 1217 
 1218                         return (LLADDR(&sc->sc_addr));
 1219                 }
 1220         IF_ADDR_RUNLOCK(ifp);
 1221 
 1222         return (NULL);
 1223 }
 1224 #endif /* INET6 */
 1225 
 1226 int
 1227 carp_forus(struct ifnet *ifp, u_char *dhost)
 1228 {
 1229         struct carp_softc *sc;
 1230         uint8_t *ena = dhost;
 1231 
 1232         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
 1233                 return (0);
 1234 
 1235         CIF_LOCK(ifp->if_carp);
 1236         IFNET_FOREACH_CARP(ifp, sc) {
 1237                 /*
 1238                  * CARP_LOCK() is not here, since would protect nothing, but
 1239                  * cause deadlock with if_bridge, calling this under its lock.
 1240                  */
 1241                 if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
 1242                     ETHER_ADDR_LEN)) {
 1243                         CIF_UNLOCK(ifp->if_carp);
 1244                         return (1);
 1245                 }
 1246         }
 1247         CIF_UNLOCK(ifp->if_carp);
 1248 
 1249         return (0);
 1250 }
 1251 
 1252 /* Master down timeout event, executed in callout context. */
 1253 static void
 1254 carp_master_down(void *v)
 1255 {
 1256         struct carp_softc *sc = v;
 1257 
 1258         CARP_LOCK_ASSERT(sc);
 1259 
 1260         CURVNET_SET(sc->sc_carpdev->if_vnet);
 1261         if (sc->sc_state == BACKUP) {
 1262                 carp_master_down_locked(sc, "master timed out");
 1263         }
 1264         CURVNET_RESTORE();
 1265 
 1266         CARP_UNLOCK(sc);
 1267 }
 1268 
 1269 static void
 1270 carp_master_down_locked(struct carp_softc *sc, const char *reason)
 1271 {
 1272 
 1273         CARP_LOCK_ASSERT(sc);
 1274 
 1275         switch (sc->sc_state) {
 1276         case BACKUP:
 1277                 carp_set_state(sc, MASTER, reason);
 1278                 carp_send_ad_locked(sc);
 1279 #ifdef INET
 1280                 carp_send_arp(sc);
 1281 #endif
 1282 #ifdef INET6
 1283                 carp_send_na(sc);
 1284 #endif
 1285                 carp_setrun(sc, 0);
 1286                 carp_addroute(sc);
 1287                 break;
 1288         case INIT:
 1289         case MASTER:
 1290 #ifdef INVARIANTS
 1291                 panic("carp: VHID %u@%s: master_down event in %s state\n",
 1292                     sc->sc_vhid,
 1293                     sc->sc_carpdev->if_xname,
 1294                     sc->sc_state ? "MASTER" : "INIT");
 1295 #endif
 1296                 break;
 1297         }
 1298 }
 1299 
 1300 /*
 1301  * When in backup state, af indicates whether to reset the master down timer
 1302  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
 1303  */
 1304 static void
 1305 carp_setrun(struct carp_softc *sc, sa_family_t af)
 1306 {
 1307         struct timeval tv;
 1308 
 1309         CARP_LOCK_ASSERT(sc);
 1310 
 1311         if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 ||
 1312             sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 1313             (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) ||
 1314             !V_carp_allow)
 1315                 return;
 1316 
 1317         switch (sc->sc_state) {
 1318         case INIT:
 1319                 carp_set_state(sc, BACKUP, "initialization complete");
 1320                 carp_setrun(sc, 0);
 1321                 break;
 1322         case BACKUP:
 1323                 callout_stop(&sc->sc_ad_tmo);
 1324                 tv.tv_sec = 3 * sc->sc_advbase;
 1325                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 1326                 switch (af) {
 1327 #ifdef INET
 1328                 case AF_INET:
 1329                         callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 1330                             carp_master_down, sc);
 1331                         break;
 1332 #endif
 1333 #ifdef INET6
 1334                 case AF_INET6:
 1335                         callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 1336                             carp_master_down, sc);
 1337                         break;
 1338 #endif
 1339                 default:
 1340 #ifdef INET
 1341                         if (sc->sc_naddrs)
 1342                                 callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 1343                                     carp_master_down, sc);
 1344 #endif
 1345 #ifdef INET6
 1346                         if (sc->sc_naddrs6)
 1347                                 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 1348                                     carp_master_down, sc);
 1349 #endif
 1350                         break;
 1351                 }
 1352                 break;
 1353         case MASTER:
 1354                 tv.tv_sec = sc->sc_advbase;
 1355                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 1356                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 1357                     carp_send_ad, sc);
 1358                 break;
 1359         }
 1360 }
 1361 
 1362 /*
 1363  * Setup multicast structures.
 1364  */
 1365 static int
 1366 carp_multicast_setup(struct carp_if *cif, sa_family_t sa)
 1367 {
 1368         struct ifnet *ifp = cif->cif_ifp;
 1369         int error = 0;
 1370 
 1371         switch (sa) {
 1372 #ifdef INET
 1373         case AF_INET:
 1374             {
 1375                 struct ip_moptions *imo = &cif->cif_imo;
 1376                 struct in_mfilter *imf;
 1377                 struct in_addr addr;
 1378 
 1379                 if (ip_mfilter_first(&imo->imo_head) != NULL)
 1380                         return (0);
 1381 
 1382                 imf = ip_mfilter_alloc(M_WAITOK, 0, 0);
 1383                 ip_mfilter_init(&imo->imo_head);
 1384                 imo->imo_multicast_vif = -1;
 1385 
 1386                 addr.s_addr = htonl(INADDR_CARP_GROUP);
 1387                 if ((error = in_joingroup(ifp, &addr, NULL,
 1388                     &imf->imf_inm)) != 0) {
 1389                         ip_mfilter_free(imf);
 1390                         break;
 1391                 }
 1392 
 1393                 ip_mfilter_insert(&imo->imo_head, imf);
 1394                 imo->imo_multicast_ifp = ifp;
 1395                 imo->imo_multicast_ttl = CARP_DFLTTL;
 1396                 imo->imo_multicast_loop = 0;
 1397                 break;
 1398            }
 1399 #endif
 1400 #ifdef INET6
 1401         case AF_INET6:
 1402             {
 1403                 struct ip6_moptions *im6o = &cif->cif_im6o;
 1404                 struct in6_mfilter *im6f[2];
 1405                 struct in6_addr in6;
 1406 
 1407                 if (ip6_mfilter_first(&im6o->im6o_head))
 1408                         return (0);
 1409 
 1410                 im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
 1411                 im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0);
 1412 
 1413                 ip6_mfilter_init(&im6o->im6o_head);
 1414                 im6o->im6o_multicast_hlim = CARP_DFLTTL;
 1415                 im6o->im6o_multicast_ifp = ifp;
 1416 
 1417                 /* Join IPv6 CARP multicast group. */
 1418                 bzero(&in6, sizeof(in6));
 1419                 in6.s6_addr16[0] = htons(0xff02);
 1420                 in6.s6_addr8[15] = 0x12;
 1421                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
 1422                         ip6_mfilter_free(im6f[0]);
 1423                         ip6_mfilter_free(im6f[1]);
 1424                         break;
 1425                 }
 1426                 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) {
 1427                         ip6_mfilter_free(im6f[0]);
 1428                         ip6_mfilter_free(im6f[1]);
 1429                         break;
 1430                 }
 1431 
 1432                 /* Join solicited multicast address. */
 1433                 bzero(&in6, sizeof(in6));
 1434                 in6.s6_addr16[0] = htons(0xff02);
 1435                 in6.s6_addr32[1] = 0;
 1436                 in6.s6_addr32[2] = htonl(1);
 1437                 in6.s6_addr32[3] = 0;
 1438                 in6.s6_addr8[12] = 0xff;
 1439 
 1440                 if ((error = in6_setscope(&in6, ifp, NULL)) != 0) {
 1441                         ip6_mfilter_free(im6f[0]);
 1442                         ip6_mfilter_free(im6f[1]);
 1443                         break;
 1444                 }
 1445 
 1446                 if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) {
 1447                         in6_leavegroup(im6f[0]->im6f_in6m, NULL);
 1448                         ip6_mfilter_free(im6f[0]);
 1449                         ip6_mfilter_free(im6f[1]);
 1450                         break;
 1451                 }
 1452                 ip6_mfilter_insert(&im6o->im6o_head, im6f[0]);
 1453                 ip6_mfilter_insert(&im6o->im6o_head, im6f[1]);
 1454                 break;
 1455             }
 1456 #endif
 1457         }
 1458 
 1459         return (error);
 1460 }
 1461 
 1462 /*
 1463  * Free multicast structures.
 1464  */
 1465 static void
 1466 carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa)
 1467 {
 1468 #ifdef INET
 1469         struct ip_moptions *imo = &cif->cif_imo;
 1470         struct in_mfilter *imf;
 1471 #endif
 1472 #ifdef INET6
 1473         struct ip6_moptions *im6o = &cif->cif_im6o;
 1474         struct in6_mfilter *im6f;
 1475 #endif
 1476         sx_assert(&carp_sx, SA_XLOCKED);
 1477 
 1478         switch (sa) {
 1479 #ifdef INET
 1480         case AF_INET:
 1481                 if (cif->cif_naddrs != 0)
 1482                         break;
 1483 
 1484                 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) {
 1485                         ip_mfilter_remove(&imo->imo_head, imf);
 1486                         in_leavegroup(imf->imf_inm, NULL);
 1487                         ip_mfilter_free(imf);
 1488                 }
 1489                 break;
 1490 #endif
 1491 #ifdef INET6
 1492         case AF_INET6:
 1493                 if (cif->cif_naddrs6 != 0)
 1494                         break;
 1495 
 1496                 while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) {
 1497                         ip6_mfilter_remove(&im6o->im6o_head, im6f);
 1498                         in6_leavegroup(im6f->im6f_in6m, NULL);
 1499                         ip6_mfilter_free(im6f);
 1500                 }
 1501                 break;
 1502 #endif
 1503         }
 1504 }
 1505 
 1506 int
 1507 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa)
 1508 {
 1509         struct m_tag *mtag;
 1510         struct carp_softc *sc;
 1511 
 1512         if (!sa)
 1513                 return (0);
 1514 
 1515         switch (sa->sa_family) {
 1516 #ifdef INET
 1517         case AF_INET:
 1518                 break;
 1519 #endif
 1520 #ifdef INET6
 1521         case AF_INET6:
 1522                 break;
 1523 #endif
 1524         default:
 1525                 return (0);
 1526         }
 1527 
 1528         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
 1529         if (mtag == NULL)
 1530                 return (0);
 1531 
 1532         bcopy(mtag + 1, &sc, sizeof(sc));
 1533 
 1534         /* Set the source MAC address to the Virtual Router MAC Address. */
 1535         switch (ifp->if_type) {
 1536         case IFT_ETHER:
 1537         case IFT_BRIDGE:
 1538         case IFT_L2VLAN: {
 1539                         struct ether_header *eh;
 1540 
 1541                         eh = mtod(m, struct ether_header *);
 1542                         eh->ether_shost[0] = 0;
 1543                         eh->ether_shost[1] = 0;
 1544                         eh->ether_shost[2] = 0x5e;
 1545                         eh->ether_shost[3] = 0;
 1546                         eh->ether_shost[4] = 1;
 1547                         eh->ether_shost[5] = sc->sc_vhid;
 1548                 }
 1549                 break;
 1550         default:
 1551                 printf("%s: carp is not supported for the %d interface type\n",
 1552                     ifp->if_xname, ifp->if_type);
 1553                 return (EOPNOTSUPP);
 1554         }
 1555 
 1556         return (0);
 1557 }
 1558 
 1559 static struct carp_softc*
 1560 carp_alloc(struct ifnet *ifp)
 1561 {
 1562         struct carp_softc *sc;
 1563         struct carp_if *cif;
 1564 
 1565         sx_assert(&carp_sx, SA_XLOCKED);
 1566 
 1567         if ((cif = ifp->if_carp) == NULL)
 1568                 cif = carp_alloc_if(ifp);
 1569 
 1570         sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
 1571 
 1572         sc->sc_advbase = CARP_DFLTINTV;
 1573         sc->sc_vhid = -1;       /* required setting */
 1574         sc->sc_init_counter = 1;
 1575         sc->sc_state = INIT;
 1576 
 1577         sc->sc_ifasiz = sizeof(struct ifaddr *);
 1578         sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO);
 1579         sc->sc_carpdev = ifp;
 1580 
 1581         CARP_LOCK_INIT(sc);
 1582 #ifdef INET
 1583         callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 1584 #endif
 1585 #ifdef INET6
 1586         callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 1587 #endif
 1588         callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 1589 
 1590         CIF_LOCK(cif);
 1591         TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list);
 1592         CIF_UNLOCK(cif);
 1593 
 1594         mtx_lock(&carp_mtx);
 1595         LIST_INSERT_HEAD(&carp_list, sc, sc_next);
 1596         mtx_unlock(&carp_mtx);
 1597 
 1598         return (sc);
 1599 }
 1600 
 1601 static void
 1602 carp_grow_ifas(struct carp_softc *sc)
 1603 {
 1604         struct ifaddr **new;
 1605 
 1606         new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO);
 1607         CARP_LOCK(sc);
 1608         bcopy(sc->sc_ifas, new, sc->sc_ifasiz);
 1609         free(sc->sc_ifas, M_CARP);
 1610         sc->sc_ifas = new;
 1611         sc->sc_ifasiz *= 2;
 1612         CARP_UNLOCK(sc);
 1613 }
 1614 
 1615 static void
 1616 carp_destroy(struct carp_softc *sc)
 1617 {
 1618         struct ifnet *ifp = sc->sc_carpdev;
 1619         struct carp_if *cif = ifp->if_carp;
 1620 
 1621         sx_assert(&carp_sx, SA_XLOCKED);
 1622 
 1623         if (sc->sc_suppress)
 1624                 carp_demote_adj(-V_carp_ifdown_adj, "vhid removed");
 1625         CARP_UNLOCK(sc);
 1626 
 1627         CIF_LOCK(cif);
 1628         TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list);
 1629         CIF_UNLOCK(cif);
 1630 
 1631         mtx_lock(&carp_mtx);
 1632         LIST_REMOVE(sc, sc_next);
 1633         mtx_unlock(&carp_mtx);
 1634 
 1635         callout_drain(&sc->sc_ad_tmo);
 1636 #ifdef INET
 1637         callout_drain(&sc->sc_md_tmo);
 1638 #endif
 1639 #ifdef INET6
 1640         callout_drain(&sc->sc_md6_tmo);
 1641 #endif
 1642         CARP_LOCK_DESTROY(sc);
 1643 
 1644         free(sc->sc_ifas, M_CARP);
 1645         free(sc, M_CARP);
 1646 }
 1647 
 1648 static struct carp_if*
 1649 carp_alloc_if(struct ifnet *ifp)
 1650 {
 1651         struct carp_if *cif;
 1652         int error;
 1653 
 1654         cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO);
 1655 
 1656         if ((error = ifpromisc(ifp, 1)) != 0)
 1657                 printf("%s: ifpromisc(%s) failed: %d\n",
 1658                     __func__, ifp->if_xname, error);
 1659         else
 1660                 cif->cif_flags |= CIF_PROMISC;
 1661 
 1662         CIF_LOCK_INIT(cif);
 1663         cif->cif_ifp = ifp;
 1664         TAILQ_INIT(&cif->cif_vrs);
 1665 
 1666         IF_ADDR_WLOCK(ifp);
 1667         ifp->if_carp = cif;
 1668         if_ref(ifp);
 1669         IF_ADDR_WUNLOCK(ifp);
 1670 
 1671         return (cif);
 1672 }
 1673 
 1674 static void
 1675 carp_free_if(struct carp_if *cif)
 1676 {
 1677         struct ifnet *ifp = cif->cif_ifp;
 1678 
 1679         CIF_LOCK_ASSERT(cif);
 1680         KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty",
 1681             __func__));
 1682 
 1683         IF_ADDR_WLOCK(ifp);
 1684         ifp->if_carp = NULL;
 1685         IF_ADDR_WUNLOCK(ifp);
 1686 
 1687         CIF_LOCK_DESTROY(cif);
 1688 
 1689         if (cif->cif_flags & CIF_PROMISC)
 1690                 ifpromisc(ifp, 0);
 1691         if_rele(ifp);
 1692 
 1693         free(cif, M_CARP);
 1694 }
 1695 
 1696 static void
 1697 carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv)
 1698 {
 1699 
 1700         CARP_LOCK(sc);
 1701         carpr->carpr_state = sc->sc_state;
 1702         carpr->carpr_vhid = sc->sc_vhid;
 1703         carpr->carpr_advbase = sc->sc_advbase;
 1704         carpr->carpr_advskew = sc->sc_advskew;
 1705         if (priv)
 1706                 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
 1707         else
 1708                 bzero(carpr->carpr_key, sizeof(carpr->carpr_key));
 1709         CARP_UNLOCK(sc);
 1710 }
 1711 
 1712 int
 1713 carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
 1714 {
 1715         struct carpreq carpr;
 1716         struct ifnet *ifp;
 1717         struct carp_softc *sc = NULL;
 1718         int error = 0, locked = 0;
 1719 
 1720         if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr)))
 1721                 return (error);
 1722 
 1723         ifp = ifunit_ref(ifr->ifr_name);
 1724         if (ifp == NULL)
 1725                 return (ENXIO);
 1726 
 1727         switch (ifp->if_type) {
 1728         case IFT_ETHER:
 1729         case IFT_L2VLAN:
 1730         case IFT_BRIDGE:
 1731                 break;
 1732         default:
 1733                 error = EOPNOTSUPP;
 1734                 goto out;
 1735         }
 1736 
 1737         if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 1738                 error = EADDRNOTAVAIL;
 1739                 goto out;
 1740         }
 1741 
 1742         sx_xlock(&carp_sx);
 1743         switch (cmd) {
 1744         case SIOCSVH:
 1745                 if ((error = priv_check(td, PRIV_NETINET_CARP)))
 1746                         break;
 1747                 if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID ||
 1748                     carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) {
 1749                         error = EINVAL;
 1750                         break;
 1751                 }
 1752 
 1753                 if (ifp->if_carp) {
 1754                         IFNET_FOREACH_CARP(ifp, sc)
 1755                                 if (sc->sc_vhid == carpr.carpr_vhid)
 1756                                         break;
 1757                 }
 1758                 if (sc == NULL) {
 1759                         sc = carp_alloc(ifp);
 1760                         CARP_LOCK(sc);
 1761                         sc->sc_vhid = carpr.carpr_vhid;
 1762                         LLADDR(&sc->sc_addr)[0] = 0;
 1763                         LLADDR(&sc->sc_addr)[1] = 0;
 1764                         LLADDR(&sc->sc_addr)[2] = 0x5e;
 1765                         LLADDR(&sc->sc_addr)[3] = 0;
 1766                         LLADDR(&sc->sc_addr)[4] = 1;
 1767                         LLADDR(&sc->sc_addr)[5] = sc->sc_vhid;
 1768                 } else
 1769                         CARP_LOCK(sc);
 1770                 locked = 1;
 1771                 if (carpr.carpr_advbase > 0) {
 1772                         if (carpr.carpr_advbase > 255 ||
 1773                             carpr.carpr_advbase < CARP_DFLTINTV) {
 1774                                 error = EINVAL;
 1775                                 break;
 1776                         }
 1777                         sc->sc_advbase = carpr.carpr_advbase;
 1778                 }
 1779                 if (carpr.carpr_advskew >= 255) {
 1780                         error = EINVAL;
 1781                         break;
 1782                 }
 1783                 sc->sc_advskew = carpr.carpr_advskew;
 1784                 if (carpr.carpr_key[0] != '\0') {
 1785                         bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
 1786                         carp_hmac_prepare(sc);
 1787                 }
 1788                 if (sc->sc_state != INIT &&
 1789                     carpr.carpr_state != sc->sc_state) {
 1790                         switch (carpr.carpr_state) {
 1791                         case BACKUP:
 1792                                 callout_stop(&sc->sc_ad_tmo);
 1793                                 carp_set_state(sc, BACKUP,
 1794                                     "user requested via ifconfig");
 1795                                 carp_setrun(sc, 0);
 1796                                 carp_delroute(sc);
 1797                                 break;
 1798                         case MASTER:
 1799                                 carp_master_down_locked(sc,
 1800                                     "user requested via ifconfig");
 1801                                 break;
 1802                         default:
 1803                                 break;
 1804                         }
 1805                 }
 1806                 break;
 1807 
 1808         case SIOCGVH:
 1809             {
 1810                 int priveleged;
 1811 
 1812                 if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) {
 1813                         error = EINVAL;
 1814                         break;
 1815                 }
 1816                 if (carpr.carpr_count < 1) {
 1817                         error = EMSGSIZE;
 1818                         break;
 1819                 }
 1820                 if (ifp->if_carp == NULL) {
 1821                         error = ENOENT;
 1822                         break;
 1823                 }
 1824 
 1825                 priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0);
 1826                 if (carpr.carpr_vhid != 0) {
 1827                         IFNET_FOREACH_CARP(ifp, sc)
 1828                                 if (sc->sc_vhid == carpr.carpr_vhid)
 1829                                         break;
 1830                         if (sc == NULL) {
 1831                                 error = ENOENT;
 1832                                 break;
 1833                         }
 1834                         carp_carprcp(&carpr, sc, priveleged);
 1835                         error = copyout(&carpr, ifr_data_get_ptr(ifr),
 1836                             sizeof(carpr));
 1837                 } else  {
 1838                         int i, count;
 1839 
 1840                         count = 0;
 1841                         IFNET_FOREACH_CARP(ifp, sc)
 1842                                 count++;
 1843 
 1844                         if (count > carpr.carpr_count) {
 1845                                 CIF_UNLOCK(ifp->if_carp);
 1846                                 error = EMSGSIZE;
 1847                                 break;
 1848                         }
 1849 
 1850                         i = 0;
 1851                         IFNET_FOREACH_CARP(ifp, sc) {
 1852                                 carp_carprcp(&carpr, sc, priveleged);
 1853                                 carpr.carpr_count = count;
 1854                                 error = copyout(&carpr,
 1855                                     (char *)ifr_data_get_ptr(ifr) +
 1856                                     (i * sizeof(carpr)), sizeof(carpr));
 1857                                 if (error) {
 1858                                         CIF_UNLOCK(ifp->if_carp);
 1859                                         break;
 1860                                 }
 1861                                 i++;
 1862                         }
 1863                 }
 1864                 break;
 1865             }
 1866         default:
 1867                 error = EINVAL;
 1868         }
 1869         sx_xunlock(&carp_sx);
 1870 
 1871 out:
 1872         if (locked)
 1873                 CARP_UNLOCK(sc);
 1874         if_rele(ifp);
 1875 
 1876         return (error);
 1877 }
 1878 
 1879 static int
 1880 carp_get_vhid(struct ifaddr *ifa)
 1881 {
 1882 
 1883         if (ifa == NULL || ifa->ifa_carp == NULL)
 1884                 return (0);
 1885 
 1886         return (ifa->ifa_carp->sc_vhid);
 1887 }
 1888 
 1889 int
 1890 carp_attach(struct ifaddr *ifa, int vhid)
 1891 {
 1892         struct ifnet *ifp = ifa->ifa_ifp;
 1893         struct carp_if *cif = ifp->if_carp;
 1894         struct carp_softc *sc;
 1895         int index, error;
 1896 
 1897         KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa));
 1898 
 1899         switch (ifa->ifa_addr->sa_family) {
 1900 #ifdef INET
 1901         case AF_INET:
 1902 #endif
 1903 #ifdef INET6
 1904         case AF_INET6:
 1905 #endif
 1906                 break;
 1907         default:
 1908                 return (EPROTOTYPE);
 1909         }
 1910 
 1911         sx_xlock(&carp_sx);
 1912         if (ifp->if_carp == NULL) {
 1913                 sx_xunlock(&carp_sx);
 1914                 return (ENOPROTOOPT);
 1915         }
 1916 
 1917         IFNET_FOREACH_CARP(ifp, sc)
 1918                 if (sc->sc_vhid == vhid)
 1919                         break;
 1920         if (sc == NULL) {
 1921                 sx_xunlock(&carp_sx);
 1922                 return (ENOENT);
 1923         }
 1924 
 1925         error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family);
 1926         if (error) {
 1927                 CIF_FREE(cif);
 1928                 sx_xunlock(&carp_sx);
 1929                 return (error);
 1930         }
 1931 
 1932         index = sc->sc_naddrs + sc->sc_naddrs6 + 1;
 1933         if (index > sc->sc_ifasiz / sizeof(struct ifaddr *))
 1934                 carp_grow_ifas(sc);
 1935 
 1936         switch (ifa->ifa_addr->sa_family) {
 1937 #ifdef INET
 1938         case AF_INET:
 1939                 cif->cif_naddrs++;
 1940                 sc->sc_naddrs++;
 1941                 break;
 1942 #endif
 1943 #ifdef INET6
 1944         case AF_INET6:
 1945                 cif->cif_naddrs6++;
 1946                 sc->sc_naddrs6++;
 1947                 break;
 1948 #endif
 1949         }
 1950 
 1951         ifa_ref(ifa);
 1952 
 1953         CARP_LOCK(sc);
 1954         sc->sc_ifas[index - 1] = ifa;
 1955         ifa->ifa_carp = sc;
 1956         carp_hmac_prepare(sc);
 1957         carp_sc_state(sc);
 1958         CARP_UNLOCK(sc);
 1959 
 1960         sx_xunlock(&carp_sx);
 1961 
 1962         return (0);
 1963 }
 1964 
 1965 void
 1966 carp_detach(struct ifaddr *ifa, bool keep_cif)
 1967 {
 1968         struct ifnet *ifp = ifa->ifa_ifp;
 1969         struct carp_if *cif = ifp->if_carp;
 1970         struct carp_softc *sc = ifa->ifa_carp;
 1971         int i, index;
 1972 
 1973         KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa));
 1974 
 1975         sx_xlock(&carp_sx);
 1976 
 1977         CARP_LOCK(sc);
 1978         /* Shift array. */
 1979         index = sc->sc_naddrs + sc->sc_naddrs6;
 1980         for (i = 0; i < index; i++)
 1981                 if (sc->sc_ifas[i] == ifa)
 1982                         break;
 1983         KASSERT(i < index, ("%s: %p no backref", __func__, ifa));
 1984         for (; i < index - 1; i++)
 1985                 sc->sc_ifas[i] = sc->sc_ifas[i+1];
 1986         sc->sc_ifas[index - 1] = NULL;
 1987 
 1988         switch (ifa->ifa_addr->sa_family) {
 1989 #ifdef INET
 1990         case AF_INET:
 1991                 cif->cif_naddrs--;
 1992                 sc->sc_naddrs--;
 1993                 break;
 1994 #endif
 1995 #ifdef INET6
 1996         case AF_INET6:
 1997                 cif->cif_naddrs6--;
 1998                 sc->sc_naddrs6--;
 1999                 break;
 2000 #endif
 2001         }
 2002 
 2003         carp_ifa_delroute(ifa);
 2004         carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family);
 2005 
 2006         ifa->ifa_carp = NULL;
 2007         ifa_free(ifa);
 2008 
 2009         carp_hmac_prepare(sc);
 2010         carp_sc_state(sc);
 2011 
 2012         if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)
 2013                 carp_destroy(sc);
 2014         else
 2015                 CARP_UNLOCK(sc);
 2016 
 2017         if (!keep_cif)
 2018                 CIF_FREE(cif);
 2019 
 2020         sx_xunlock(&carp_sx);
 2021 }
 2022 
 2023 static void
 2024 carp_set_state(struct carp_softc *sc, int state, const char *reason)
 2025 {
 2026 
 2027         CARP_LOCK_ASSERT(sc);
 2028 
 2029         if (sc->sc_state != state) {
 2030                 const char *carp_states[] = { CARP_STATES };
 2031                 char subsys[IFNAMSIZ+5];
 2032 
 2033                 snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid,
 2034                     sc->sc_carpdev->if_xname);
 2035 
 2036                 CARP_LOG("%s: %s -> %s (%s)\n", subsys,
 2037                     carp_states[sc->sc_state], carp_states[state], reason);
 2038 
 2039                 sc->sc_state = state;
 2040 
 2041                 devctl_notify("CARP", subsys, carp_states[state], NULL);
 2042         }
 2043 }
 2044 
 2045 static void
 2046 carp_linkstate(struct ifnet *ifp)
 2047 {
 2048         struct carp_softc *sc;
 2049 
 2050         CIF_LOCK(ifp->if_carp);
 2051         IFNET_FOREACH_CARP(ifp, sc) {
 2052                 CARP_LOCK(sc);
 2053                 carp_sc_state(sc);
 2054                 CARP_UNLOCK(sc);
 2055         }
 2056         CIF_UNLOCK(ifp->if_carp);
 2057 }
 2058 
 2059 static void
 2060 carp_sc_state(struct carp_softc *sc)
 2061 {
 2062 
 2063         CARP_LOCK_ASSERT(sc);
 2064 
 2065         if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 2066             !(sc->sc_carpdev->if_flags & IFF_UP) ||
 2067             !V_carp_allow) {
 2068                 callout_stop(&sc->sc_ad_tmo);
 2069 #ifdef INET
 2070                 callout_stop(&sc->sc_md_tmo);
 2071 #endif
 2072 #ifdef INET6
 2073                 callout_stop(&sc->sc_md6_tmo);
 2074 #endif
 2075                 carp_set_state(sc, INIT, "hardware interface down");
 2076                 carp_setrun(sc, 0);
 2077                 if (!sc->sc_suppress)
 2078                         carp_demote_adj(V_carp_ifdown_adj, "interface down");
 2079                 sc->sc_suppress = 1;
 2080         } else {
 2081                 carp_set_state(sc, INIT, "hardware interface up");
 2082                 carp_setrun(sc, 0);
 2083                 if (sc->sc_suppress)
 2084                         carp_demote_adj(-V_carp_ifdown_adj, "interface up");
 2085                 sc->sc_suppress = 0;
 2086         }
 2087 }
 2088 
 2089 static void
 2090 carp_demote_adj(int adj, char *reason)
 2091 {
 2092         atomic_add_int(&V_carp_demotion, adj);
 2093         CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason);
 2094         taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
 2095 }
 2096 
 2097 static int
 2098 carp_allow_sysctl(SYSCTL_HANDLER_ARGS)
 2099 {
 2100         int new, error;
 2101         struct carp_softc *sc;
 2102 
 2103         new = V_carp_allow;
 2104         error = sysctl_handle_int(oidp, &new, 0, req);
 2105         if (error || !req->newptr)
 2106                 return (error);
 2107 
 2108         if (V_carp_allow != new) {
 2109                 V_carp_allow = new;
 2110 
 2111                 mtx_lock(&carp_mtx);
 2112                 LIST_FOREACH(sc, &carp_list, sc_next) {
 2113                         CARP_LOCK(sc);
 2114                         if (curvnet == sc->sc_carpdev->if_vnet)
 2115                                 carp_sc_state(sc);
 2116                         CARP_UNLOCK(sc);
 2117                 }
 2118                 mtx_unlock(&carp_mtx);
 2119         }
 2120 
 2121         return (0);
 2122 }
 2123 
 2124 static int
 2125 carp_dscp_sysctl(SYSCTL_HANDLER_ARGS)
 2126 {
 2127         int new, error;
 2128 
 2129         new = V_carp_dscp;
 2130         error = sysctl_handle_int(oidp, &new, 0, req);
 2131         if (error || !req->newptr)
 2132                 return (error);
 2133 
 2134         if (new < 0 || new > 63)
 2135                 return (EINVAL);
 2136 
 2137         V_carp_dscp = new;
 2138 
 2139         return (0);
 2140 }
 2141 
 2142 static int
 2143 carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS)
 2144 {
 2145         int new, error;
 2146 
 2147         new = V_carp_demotion;
 2148         error = sysctl_handle_int(oidp, &new, 0, req);
 2149         if (error || !req->newptr)
 2150                 return (error);
 2151 
 2152         carp_demote_adj(new, "sysctl");
 2153 
 2154         return (0);
 2155 }
 2156 
 2157 #ifdef INET
 2158 extern  struct domain inetdomain;
 2159 static struct protosw in_carp_protosw = {
 2160         .pr_type =              SOCK_RAW,
 2161         .pr_domain =            &inetdomain,
 2162         .pr_protocol =          IPPROTO_CARP,
 2163         .pr_flags =             PR_ATOMIC|PR_ADDR,
 2164         .pr_input =             carp_input,
 2165         .pr_output =            rip_output,
 2166         .pr_ctloutput =         rip_ctloutput,
 2167         .pr_usrreqs =           &rip_usrreqs
 2168 };
 2169 #endif
 2170 
 2171 #ifdef INET6
 2172 extern  struct domain inet6domain;
 2173 static struct protosw in6_carp_protosw = {
 2174         .pr_type =              SOCK_RAW,
 2175         .pr_domain =            &inet6domain,
 2176         .pr_protocol =          IPPROTO_CARP,
 2177         .pr_flags =             PR_ATOMIC|PR_ADDR,
 2178         .pr_input =             carp6_input,
 2179         .pr_output =            rip6_output,
 2180         .pr_ctloutput =         rip6_ctloutput,
 2181         .pr_usrreqs =           &rip6_usrreqs
 2182 };
 2183 #endif
 2184 
 2185 #ifdef VIMAGE
 2186 #if defined(__i386__)
 2187 /*
 2188  * XXX This is a hack to work around an absolute relocation outside
 2189  * set_vnet by one (on the stop symbol) for carpstats.  Add a dummy variable
 2190  * to the end of the file in the hope that the linker will just keep the
 2191  * order (as it seems to do at the moment).  It is understood to be fragile.
 2192  * See PR 230857 for a longer discussion of the problem and the referenced
 2193  * review for possible alternate solutions.  Each is a hack; we just need
 2194  * the least intrusive one for the next release.
 2195  */
 2196 VNET_DEFINE(char, carp_zzz) = 0xde;
 2197 #endif
 2198 #endif
 2199 
 2200 static void
 2201 carp_mod_cleanup(void)
 2202 {
 2203 
 2204 #ifdef INET
 2205         if (proto_reg[CARP_INET] == 0) {
 2206                 (void)ipproto_unregister(IPPROTO_CARP);
 2207                 pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW);
 2208                 proto_reg[CARP_INET] = -1;
 2209         }
 2210         carp_iamatch_p = NULL;
 2211 #endif
 2212 #ifdef INET6
 2213         if (proto_reg[CARP_INET6] == 0) {
 2214                 (void)ip6proto_unregister(IPPROTO_CARP);
 2215                 pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW);
 2216                 proto_reg[CARP_INET6] = -1;
 2217         }
 2218         carp_iamatch6_p = NULL;
 2219         carp_macmatch6_p = NULL;
 2220 #endif
 2221         carp_ioctl_p = NULL;
 2222         carp_attach_p = NULL;
 2223         carp_detach_p = NULL;
 2224         carp_get_vhid_p = NULL;
 2225         carp_linkstate_p = NULL;
 2226         carp_forus_p = NULL;
 2227         carp_output_p = NULL;
 2228         carp_demote_adj_p = NULL;
 2229         carp_master_p = NULL;
 2230         mtx_unlock(&carp_mtx);
 2231         taskqueue_drain(taskqueue_swi, &carp_sendall_task);
 2232         mtx_destroy(&carp_mtx);
 2233         sx_destroy(&carp_sx);
 2234 }
 2235 
 2236 static int
 2237 carp_mod_load(void)
 2238 {
 2239         int err;
 2240 
 2241         mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
 2242         sx_init(&carp_sx, "carp_sx");
 2243         LIST_INIT(&carp_list);
 2244         carp_get_vhid_p = carp_get_vhid;
 2245         carp_forus_p = carp_forus;
 2246         carp_output_p = carp_output;
 2247         carp_linkstate_p = carp_linkstate;
 2248         carp_ioctl_p = carp_ioctl;
 2249         carp_attach_p = carp_attach;
 2250         carp_detach_p = carp_detach;
 2251         carp_demote_adj_p = carp_demote_adj;
 2252         carp_master_p = carp_master;
 2253 #ifdef INET6
 2254         carp_iamatch6_p = carp_iamatch6;
 2255         carp_macmatch6_p = carp_macmatch6;
 2256         proto_reg[CARP_INET6] = pf_proto_register(PF_INET6,
 2257             (struct protosw *)&in6_carp_protosw);
 2258         if (proto_reg[CARP_INET6]) {
 2259                 printf("carp: error %d attaching to PF_INET6\n",
 2260                     proto_reg[CARP_INET6]);
 2261                 carp_mod_cleanup();
 2262                 return (proto_reg[CARP_INET6]);
 2263         }
 2264         err = ip6proto_register(IPPROTO_CARP);
 2265         if (err) {
 2266                 printf("carp: error %d registering with INET6\n", err);
 2267                 carp_mod_cleanup();
 2268                 return (err);
 2269         }
 2270 #endif
 2271 #ifdef INET
 2272         carp_iamatch_p = carp_iamatch;
 2273         proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw);
 2274         if (proto_reg[CARP_INET]) {
 2275                 printf("carp: error %d attaching to PF_INET\n",
 2276                     proto_reg[CARP_INET]);
 2277                 carp_mod_cleanup();
 2278                 return (proto_reg[CARP_INET]);
 2279         }
 2280         err = ipproto_register(IPPROTO_CARP);
 2281         if (err) {
 2282                 printf("carp: error %d registering with INET\n", err);
 2283                 carp_mod_cleanup();
 2284                 return (err);
 2285         }
 2286 #endif
 2287         return (0);
 2288 }
 2289 
 2290 static int
 2291 carp_modevent(module_t mod, int type, void *data)
 2292 {
 2293         switch (type) {
 2294         case MOD_LOAD:
 2295                 return carp_mod_load();
 2296                 /* NOTREACHED */
 2297         case MOD_UNLOAD:
 2298                 mtx_lock(&carp_mtx);
 2299                 if (LIST_EMPTY(&carp_list))
 2300                         carp_mod_cleanup();
 2301                 else {
 2302                         mtx_unlock(&carp_mtx);
 2303                         return (EBUSY);
 2304                 }
 2305                 break;
 2306 
 2307         default:
 2308                 return (EINVAL);
 2309         }
 2310 
 2311         return (0);
 2312 }
 2313 
 2314 static moduledata_t carp_mod = {
 2315         "carp",
 2316         carp_modevent,
 2317         0
 2318 };
 2319 
 2320 DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);

Cache object: 2be1cb2a21e9fb3bc94ba69c7630ddd7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.