The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_carp.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $FreeBSD: releng/6.2/sys/netinet/ip_carp.c 163211 2006-10-10 18:39:38Z bz $ */
    2 
    3 /*
    4  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
    5  * Copyright (c) 2003 Ryan McBride. All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
   20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
   24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
   25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
   26  * THE POSSIBILITY OF SUCH DAMAGE.
   27  */
   28 
   29 #include "opt_carp.h"
   30 #include "opt_bpf.h"
   31 #include "opt_inet.h"
   32 #include "opt_inet6.h"
   33 
   34 #include <sys/types.h>
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/conf.h>
   38 #include <sys/kernel.h>
   39 #include <sys/limits.h>
   40 #include <sys/malloc.h>
   41 #include <sys/mbuf.h>
   42 #include <sys/module.h>
   43 #include <sys/time.h>
   44 #include <sys/proc.h>
   45 #include <sys/sysctl.h>
   46 #include <sys/syslog.h>
   47 #include <sys/signalvar.h>
   48 #include <sys/filio.h>
   49 #include <sys/sockio.h>
   50 
   51 #include <sys/socket.h>
   52 #include <sys/vnode.h>
   53 
   54 #include <machine/stdarg.h>
   55 
   56 #include <net/bpf.h>
   57 #include <net/ethernet.h>
   58 #include <net/fddi.h>
   59 #include <net/iso88025.h>
   60 #include <net/if.h>
   61 #include <net/if_clone.h>
   62 #include <net/if_types.h>
   63 #include <net/route.h>
   64 
   65 #ifdef INET
   66 #include <netinet/in.h>
   67 #include <netinet/in_var.h>
   68 #include <netinet/in_systm.h>
   69 #include <netinet/ip.h>
   70 #include <netinet/ip_var.h>
   71 #include <netinet/if_ether.h>
   72 #include <machine/in_cksum.h>
   73 #endif
   74 
   75 #ifdef INET6
   76 #include <netinet/icmp6.h>
   77 #include <netinet/ip6.h>
   78 #include <netinet6/ip6_var.h>
   79 #include <netinet6/scope6_var.h>
   80 #include <netinet6/nd6.h>
   81 #include <net/if_dl.h>
   82 #endif
   83 
   84 #include <crypto/sha1.h>
   85 #include <netinet/ip_carp.h>
   86 
   87 #define CARP_IFNAME     "carp"
   88 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
   89 SYSCTL_DECL(_net_inet_carp);
   90 
   91 struct carp_softc {
   92         struct ifnet            *sc_ifp;        /* Interface clue */
   93         struct ifnet            *sc_carpdev;    /* Pointer to parent interface */
   94         struct in_ifaddr        *sc_ia;         /* primary iface address */
   95         struct ip_moptions       sc_imo;
   96 #ifdef INET6
   97         struct in6_ifaddr       *sc_ia6;        /* primary iface address v6 */
   98         struct ip6_moptions      sc_im6o;
   99 #endif /* INET6 */
  100         TAILQ_ENTRY(carp_softc)  sc_list;
  101 
  102         enum { INIT = 0, BACKUP, MASTER }       sc_state;
  103 
  104         int                      sc_flags_backup;
  105         int                      sc_suppress;
  106 
  107         int                      sc_sendad_errors;
  108 #define CARP_SENDAD_MAX_ERRORS  3
  109         int                      sc_sendad_success;
  110 #define CARP_SENDAD_MIN_SUCCESS 3
  111 
  112         int                      sc_vhid;
  113         int                      sc_advskew;
  114         int                      sc_naddrs;
  115         int                      sc_naddrs6;
  116         int                      sc_advbase;    /* seconds */
  117         int                      sc_init_counter;
  118         u_int64_t                sc_counter;
  119 
  120         /* authentication */
  121 #define CARP_HMAC_PAD   64
  122         unsigned char sc_key[CARP_KEY_LEN];
  123         unsigned char sc_pad[CARP_HMAC_PAD];
  124         SHA1_CTX sc_sha1;
  125 
  126         struct callout           sc_ad_tmo;     /* advertisement timeout */
  127         struct callout           sc_md_tmo;     /* master down timeout */
  128         struct callout           sc_md6_tmo;    /* master down timeout */
  129         
  130         LIST_ENTRY(carp_softc)   sc_next;       /* Interface clue */
  131 };
  132 #define SC2IFP(sc)      ((sc)->sc_ifp)
  133 
  134 int carp_suppress_preempt = 0;
  135 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 };    /* XXX for now */
  136 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
  137     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
  138 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
  139     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
  140 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
  141     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
  142 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
  143     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
  144 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
  145     &carp_suppress_preempt, 0, "Preemption is suppressed");
  146 
  147 struct carpstats carpstats;
  148 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
  149     &carpstats, carpstats,
  150     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
  151 
  152 struct carp_if {
  153         TAILQ_HEAD(, carp_softc) vhif_vrs;
  154         int vhif_nvrs;
  155 
  156         struct ifnet    *vhif_ifp;
  157         struct mtx       vhif_mtx;
  158 };
  159 
  160 /* Get carp_if from softc. Valid after carp_set_addr{,6}. */
  161 #define SC2CIF(sc)              ((struct carp_if *)(sc)->sc_carpdev->if_carp)
  162 
  163 /* lock per carp_if queue */
  164 #define CARP_LOCK_INIT(cif)     mtx_init(&(cif)->vhif_mtx, "carp_if",   \
  165         NULL, MTX_DEF)
  166 #define CARP_LOCK_DESTROY(cif)  mtx_destroy(&(cif)->vhif_mtx)
  167 #define CARP_LOCK_ASSERT(cif)   mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
  168 #define CARP_LOCK(cif)          mtx_lock(&(cif)->vhif_mtx)
  169 #define CARP_UNLOCK(cif)        mtx_unlock(&(cif)->vhif_mtx)
  170 
  171 #define CARP_SCLOCK(sc)         mtx_lock(&SC2CIF(sc)->vhif_mtx)
  172 #define CARP_SCUNLOCK(sc)       mtx_unlock(&SC2CIF(sc)->vhif_mtx)
  173 #define CARP_SCLOCK_ASSERT(sc)  mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED)
  174 
  175 #define CARP_LOG(...)   do {                            \
  176         if (carp_opts[CARPCTL_LOG] > 0)                 \
  177                 log(LOG_INFO, __VA_ARGS__);             \
  178 } while (0)
  179 
  180 #define CARP_DEBUG(...) do {                            \
  181         if (carp_opts[CARPCTL_LOG] > 1)                 \
  182                 log(LOG_DEBUG, __VA_ARGS__);            \
  183 } while (0)
  184 
  185 static void     carp_hmac_prepare(struct carp_softc *);
  186 static void     carp_hmac_generate(struct carp_softc *, u_int32_t *,
  187                     unsigned char *);
  188 static int      carp_hmac_verify(struct carp_softc *, u_int32_t *,
  189                     unsigned char *);
  190 static void     carp_setroute(struct carp_softc *, int);
  191 static void     carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
  192 static int      carp_clone_create(struct if_clone *, int);
  193 static void     carp_clone_destroy(struct ifnet *);
  194 static void     carpdetach(struct carp_softc *);
  195 static int      carp_prepare_ad(struct mbuf *, struct carp_softc *,
  196                     struct carp_header *);
  197 static void     carp_send_ad_all(void);
  198 static void     carp_send_ad(void *);
  199 static void     carp_send_ad_locked(struct carp_softc *);
  200 static void     carp_send_arp(struct carp_softc *);
  201 static void     carp_master_down(void *);
  202 static void     carp_master_down_locked(struct carp_softc *);
  203 static int      carp_ioctl(struct ifnet *, u_long, caddr_t);
  204 static int      carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
  205                     struct rtentry *);
  206 static void     carp_start(struct ifnet *);
  207 static void     carp_setrun(struct carp_softc *, sa_family_t);
  208 static void     carp_set_state(struct carp_softc *, int);
  209 static int      carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
  210 enum    { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
  211 
  212 static void     carp_multicast_cleanup(struct carp_softc *);
  213 static int      carp_set_addr(struct carp_softc *, struct sockaddr_in *);
  214 static int      carp_del_addr(struct carp_softc *, struct sockaddr_in *);
  215 static void     carp_carpdev_state_locked(struct carp_if *);
  216 static void     carp_sc_state_locked(struct carp_softc *);
  217 #ifdef INET6
  218 static void     carp_send_na(struct carp_softc *);
  219 static int      carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
  220 static int      carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
  221 #endif
  222 
  223 static LIST_HEAD(, carp_softc) carpif_list;
  224 static struct mtx carp_mtx;
  225 IFC_SIMPLE_DECLARE(carp, 0);
  226 
  227 static eventhandler_tag if_detach_event_tag;
  228 
  229 static __inline u_int16_t
  230 carp_cksum(struct mbuf *m, int len)
  231 {
  232         return (in_cksum(m, len));
  233 }
  234 
  235 static void
  236 carp_hmac_prepare(struct carp_softc *sc)
  237 {
  238         u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
  239         u_int8_t vhid = sc->sc_vhid & 0xff;
  240         struct ifaddr *ifa;
  241         int i;
  242 #ifdef INET6
  243         struct in6_addr in6;
  244 #endif
  245 
  246         if (sc->sc_carpdev)
  247                 CARP_SCLOCK(sc);
  248 
  249         /* XXX: possible race here */
  250 
  251         /* compute ipad from key */
  252         bzero(sc->sc_pad, sizeof(sc->sc_pad));
  253         bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
  254         for (i = 0; i < sizeof(sc->sc_pad); i++)
  255                 sc->sc_pad[i] ^= 0x36;
  256 
  257         /* precompute first part of inner hash */
  258         SHA1Init(&sc->sc_sha1);
  259         SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
  260         SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
  261         SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
  262         SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
  263 #ifdef INET
  264         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
  265                 if (ifa->ifa_addr->sa_family == AF_INET)
  266                         SHA1Update(&sc->sc_sha1,
  267                             (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr,
  268                             sizeof(struct in_addr));
  269         }
  270 #endif /* INET */
  271 #ifdef INET6
  272         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
  273                 if (ifa->ifa_addr->sa_family == AF_INET6) {
  274                         in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
  275                         in6_clearscope(&in6);
  276                         SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
  277                 }
  278         }
  279 #endif /* INET6 */
  280 
  281         /* convert ipad to opad */
  282         for (i = 0; i < sizeof(sc->sc_pad); i++)
  283                 sc->sc_pad[i] ^= 0x36 ^ 0x5c;
  284 
  285         if (sc->sc_carpdev)
  286                 CARP_SCUNLOCK(sc);
  287 }
  288 
  289 static void
  290 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
  291     unsigned char md[20])
  292 {
  293         SHA1_CTX sha1ctx;
  294 
  295         /* fetch first half of inner hash */
  296         bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
  297 
  298         SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
  299         SHA1Final(md, &sha1ctx);
  300 
  301         /* outer hash */
  302         SHA1Init(&sha1ctx);
  303         SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
  304         SHA1Update(&sha1ctx, md, 20);
  305         SHA1Final(md, &sha1ctx);
  306 }
  307 
  308 static int
  309 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
  310     unsigned char md[20])
  311 {
  312         unsigned char md2[20];
  313 
  314         CARP_SCLOCK_ASSERT(sc);
  315 
  316         carp_hmac_generate(sc, counter, md2);
  317 
  318         return (bcmp(md, md2, sizeof(md2)));
  319 }
  320 
  321 static void
  322 carp_setroute(struct carp_softc *sc, int cmd)
  323 {
  324         struct ifaddr *ifa;
  325         int s;
  326 
  327         if (sc->sc_carpdev)
  328                 CARP_SCLOCK_ASSERT(sc);
  329 
  330         s = splnet();
  331         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
  332                 if (ifa->ifa_addr->sa_family == AF_INET &&
  333                     sc->sc_carpdev != NULL) {
  334                         int count = carp_addrcount(
  335                             (struct carp_if *)sc->sc_carpdev->if_carp,
  336                             ifatoia(ifa), CARP_COUNT_MASTER);
  337 
  338                         if ((cmd == RTM_ADD && count == 1) ||
  339                             (cmd == RTM_DELETE && count == 0))
  340                                 rtinit(ifa, cmd, RTF_UP | RTF_HOST);
  341                 }
  342 #ifdef INET6
  343                 if (ifa->ifa_addr->sa_family == AF_INET6) {
  344                         if (cmd == RTM_ADD)
  345                                 in6_ifaddloop(ifa);
  346                         else
  347                                 in6_ifremloop(ifa);
  348                 }
  349 #endif /* INET6 */
  350         }
  351         splx(s);
  352 }
  353 
  354 static int
  355 carp_clone_create(struct if_clone *ifc, int unit)
  356 {
  357 
  358         struct carp_softc *sc;
  359         struct ifnet *ifp;
  360 
  361         MALLOC(sc, struct carp_softc *, sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
  362         ifp = SC2IFP(sc) = if_alloc(IFT_ETHER);
  363         if (ifp == NULL) {
  364                 FREE(sc, M_CARP);
  365                 return (ENOSPC);
  366         }
  367         
  368         sc->sc_flags_backup = 0;
  369         sc->sc_suppress = 0;
  370         sc->sc_advbase = CARP_DFLTINTV;
  371         sc->sc_vhid = -1;       /* required setting */
  372         sc->sc_advskew = 0;
  373         sc->sc_init_counter = 1;
  374         sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
  375 #ifdef INET6
  376         sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
  377 #endif
  378         sc->sc_imo.imo_multicast_vif = -1;
  379 
  380         callout_init(&sc->sc_ad_tmo, NET_CALLOUT_MPSAFE);
  381         callout_init(&sc->sc_md_tmo, NET_CALLOUT_MPSAFE);
  382         callout_init(&sc->sc_md6_tmo, NET_CALLOUT_MPSAFE);
  383         
  384         ifp->if_softc = sc;
  385         if_initname(ifp, CARP_IFNAME, unit);
  386         ifp->if_mtu = ETHERMTU;
  387         ifp->if_flags = IFF_LOOPBACK;
  388         ifp->if_ioctl = carp_ioctl;
  389         ifp->if_output = carp_looutput;
  390         ifp->if_start = carp_start;
  391         ifp->if_type = IFT_CARP;
  392         ifp->if_snd.ifq_maxlen = ifqmaxlen;
  393         ifp->if_hdrlen = 0;
  394         if_attach(ifp);
  395         bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t));
  396         mtx_lock(&carp_mtx);
  397         LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
  398         mtx_unlock(&carp_mtx);
  399         return (0);
  400 }
  401 
  402 static void
  403 carp_clone_destroy(struct ifnet *ifp)
  404 {
  405         struct carp_softc *sc = ifp->if_softc;
  406 
  407         if (sc->sc_carpdev)
  408                 CARP_SCLOCK(sc);
  409         carpdetach(sc); 
  410         if (sc->sc_carpdev)
  411                 CARP_SCUNLOCK(sc);
  412 
  413         mtx_lock(&carp_mtx);
  414         LIST_REMOVE(sc, sc_next);
  415         mtx_unlock(&carp_mtx);
  416         bpfdetach(ifp);
  417         if_detach(ifp);
  418         if_free_type(ifp, IFT_ETHER);
  419         free(sc, M_CARP);
  420 }
  421 
  422 static void
  423 carpdetach(struct carp_softc *sc)
  424 {
  425         struct carp_if *cif;
  426 
  427         callout_stop(&sc->sc_ad_tmo);
  428         callout_stop(&sc->sc_md_tmo);
  429         callout_stop(&sc->sc_md6_tmo);
  430 
  431         if (sc->sc_suppress)
  432                 carp_suppress_preempt--;
  433         sc->sc_suppress = 0;
  434 
  435         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
  436                 carp_suppress_preempt--;
  437         sc->sc_sendad_errors = 0;
  438 
  439         carp_set_state(sc, INIT);
  440         SC2IFP(sc)->if_flags &= ~IFF_UP;
  441         carp_setrun(sc, 0);
  442         carp_multicast_cleanup(sc);
  443 
  444         if (sc->sc_carpdev != NULL) {
  445                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
  446                 CARP_LOCK_ASSERT(cif);
  447                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
  448                 if (!--cif->vhif_nvrs) {
  449                         ifpromisc(sc->sc_carpdev, 0);
  450                         sc->sc_carpdev->if_carp = NULL;
  451                         CARP_LOCK_DESTROY(cif);
  452                         FREE(cif, M_IFADDR);
  453                 }
  454         }
  455         sc->sc_carpdev = NULL;
  456 }
  457 
  458 /* Detach an interface from the carp. */
  459 static void
  460 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
  461 {
  462         struct carp_if *cif = (struct carp_if *)ifp->if_carp;
  463         struct carp_softc *sc, *nextsc;
  464  
  465         if (cif == NULL)
  466                 return;
  467 
  468         /*
  469          * XXX: At the end of for() cycle the lock will be destroyed.
  470          */
  471         CARP_LOCK(cif);
  472         for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
  473                 nextsc = TAILQ_NEXT(sc, sc_list);
  474                 carpdetach(sc);
  475         }
  476 }
  477 
  478 /*
  479  * process input packet.
  480  * we have rearranged checks order compared to the rfc,
  481  * but it seems more efficient this way or not possible otherwise.
  482  */
  483 void
  484 carp_input(struct mbuf *m, int hlen)
  485 {
  486         struct ip *ip = mtod(m, struct ip *);
  487         struct carp_header *ch;
  488         int iplen, len;
  489 
  490         carpstats.carps_ipackets++;
  491 
  492         if (!carp_opts[CARPCTL_ALLOW]) {
  493                 m_freem(m);
  494                 return;
  495         }
  496 
  497         /* check if received on a valid carp interface */
  498         if (m->m_pkthdr.rcvif->if_carp == NULL) {
  499                 carpstats.carps_badif++;
  500                 CARP_LOG("carp_input: packet received on non-carp "
  501                     "interface: %s\n",
  502                     m->m_pkthdr.rcvif->if_xname);
  503                 m_freem(m);
  504                 return;
  505         }
  506 
  507         /* verify that the IP TTL is 255.  */
  508         if (ip->ip_ttl != CARP_DFLTTL) {
  509                 carpstats.carps_badttl++;
  510                 CARP_LOG("carp_input: received ttl %d != 255i on %s\n",
  511                     ip->ip_ttl,
  512                     m->m_pkthdr.rcvif->if_xname);
  513                 m_freem(m);
  514                 return;
  515         }
  516 
  517         iplen = ip->ip_hl << 2;
  518 
  519         if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
  520                 carpstats.carps_badlen++;
  521                 CARP_LOG("carp_input: received len %zd < "
  522                     "sizeof(struct carp_header)\n",
  523                     m->m_len - sizeof(struct ip));
  524                 m_freem(m);
  525                 return;
  526         }
  527 
  528         if (iplen + sizeof(*ch) < m->m_len) {
  529                 if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
  530                         carpstats.carps_hdrops++;
  531                         CARP_LOG("carp_input: pullup failed\n");
  532                         return;
  533                 }
  534                 ip = mtod(m, struct ip *);
  535         }
  536         ch = (struct carp_header *)((char *)ip + iplen);
  537 
  538         /*
  539          * verify that the received packet length is
  540          * equal to the CARP header
  541          */
  542         len = iplen + sizeof(*ch);
  543         if (len > m->m_pkthdr.len) {
  544                 carpstats.carps_badlen++;
  545                 CARP_LOG("carp_input: packet too short %d on %s\n",
  546                     m->m_pkthdr.len,
  547                     m->m_pkthdr.rcvif->if_xname);
  548                 m_freem(m);
  549                 return;
  550         }
  551 
  552         if ((m = m_pullup(m, len)) == NULL) {
  553                 carpstats.carps_hdrops++;
  554                 return;
  555         }
  556         ip = mtod(m, struct ip *);
  557         ch = (struct carp_header *)((char *)ip + iplen);
  558 
  559         /* verify the CARP checksum */
  560         m->m_data += iplen;
  561         if (carp_cksum(m, len - iplen)) {
  562                 carpstats.carps_badsum++;
  563                 CARP_LOG("carp_input: checksum failed on %s\n",
  564                     m->m_pkthdr.rcvif->if_xname);
  565                 m_freem(m);
  566                 return;
  567         }
  568         m->m_data -= iplen;
  569 
  570         carp_input_c(m, ch, AF_INET);
  571 }
  572 
  573 #ifdef INET6
  574 int
  575 carp6_input(struct mbuf **mp, int *offp, int proto)
  576 {
  577         struct mbuf *m = *mp;
  578         struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
  579         struct carp_header *ch;
  580         u_int len;
  581 
  582         carpstats.carps_ipackets6++;
  583 
  584         if (!carp_opts[CARPCTL_ALLOW]) {
  585                 m_freem(m);
  586                 return (IPPROTO_DONE);
  587         }
  588 
  589         /* check if received on a valid carp interface */
  590         if (m->m_pkthdr.rcvif->if_carp == NULL) {
  591                 carpstats.carps_badif++;
  592                 CARP_LOG("carp6_input: packet received on non-carp "
  593                     "interface: %s\n",
  594                     m->m_pkthdr.rcvif->if_xname);
  595                 m_freem(m);
  596                 return (IPPROTO_DONE);
  597         }
  598 
  599         /* verify that the IP TTL is 255 */
  600         if (ip6->ip6_hlim != CARP_DFLTTL) {
  601                 carpstats.carps_badttl++;
  602                 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
  603                     ip6->ip6_hlim,
  604                     m->m_pkthdr.rcvif->if_xname);
  605                 m_freem(m);
  606                 return (IPPROTO_DONE);
  607         }
  608 
  609         /* verify that we have a complete carp packet */
  610         len = m->m_len;
  611         IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
  612         if (ch == NULL) {
  613                 carpstats.carps_badlen++;
  614                 CARP_LOG("carp6_input: packet size %u too small\n", len);
  615                 return (IPPROTO_DONE);
  616         }
  617 
  618 
  619         /* verify the CARP checksum */
  620         m->m_data += *offp;
  621         if (carp_cksum(m, sizeof(*ch))) {
  622                 carpstats.carps_badsum++;
  623                 CARP_LOG("carp6_input: checksum failed, on %s\n",
  624                     m->m_pkthdr.rcvif->if_xname);
  625                 m_freem(m);
  626                 return (IPPROTO_DONE);
  627         }
  628         m->m_data -= *offp;
  629 
  630         carp_input_c(m, ch, AF_INET6);
  631         return (IPPROTO_DONE);
  632 }
  633 #endif /* INET6 */
  634 
  635 static void
  636 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
  637 {
  638         struct ifnet *ifp = m->m_pkthdr.rcvif;
  639         struct carp_softc *sc;
  640         u_int64_t tmp_counter;
  641         struct timeval sc_tv, ch_tv;
  642 
  643         /* verify that the VHID is valid on the receiving interface */
  644         CARP_LOCK(ifp->if_carp);
  645         TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
  646                 if (sc->sc_vhid == ch->carp_vhid)
  647                         break;
  648 
  649         if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) &&
  650             (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
  651                 carpstats.carps_badvhid++;
  652                 CARP_UNLOCK(ifp->if_carp);
  653                 m_freem(m);
  654                 return;
  655         }
  656 
  657         getmicrotime(&SC2IFP(sc)->if_lastchange);
  658         SC2IFP(sc)->if_ipackets++;
  659         SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
  660 
  661         if (SC2IFP(sc)->if_bpf) {
  662                 struct ip *ip = mtod(m, struct ip *);
  663                 uint32_t af1 = af;
  664 
  665                 /* BPF wants net byte order */
  666                 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
  667                 ip->ip_off = htons(ip->ip_off);
  668                 bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
  669         }
  670 
  671         /* verify the CARP version. */
  672         if (ch->carp_version != CARP_VERSION) {
  673                 carpstats.carps_badver++;
  674                 SC2IFP(sc)->if_ierrors++;
  675                 CARP_UNLOCK(ifp->if_carp);
  676                 CARP_LOG("%s; invalid version %d\n",
  677                     SC2IFP(sc)->if_xname,
  678                     ch->carp_version);
  679                 m_freem(m);
  680                 return;
  681         }
  682 
  683         /* verify the hash */
  684         if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
  685                 carpstats.carps_badauth++;
  686                 SC2IFP(sc)->if_ierrors++;
  687                 CARP_UNLOCK(ifp->if_carp);
  688                 CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
  689                 m_freem(m);
  690                 return;
  691         }
  692 
  693         tmp_counter = ntohl(ch->carp_counter[0]);
  694         tmp_counter = tmp_counter<<32;
  695         tmp_counter += ntohl(ch->carp_counter[1]);
  696 
  697         /* XXX Replay protection goes here */
  698 
  699         sc->sc_init_counter = 0;
  700         sc->sc_counter = tmp_counter;
  701 
  702         sc_tv.tv_sec = sc->sc_advbase;
  703         if (carp_suppress_preempt && sc->sc_advskew <  240)
  704                 sc_tv.tv_usec = 240 * 1000000 / 256;
  705         else
  706                 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
  707         ch_tv.tv_sec = ch->carp_advbase;
  708         ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
  709 
  710         switch (sc->sc_state) {
  711         case INIT:
  712                 break;
  713         case MASTER:
  714                 /*
  715                  * If we receive an advertisement from a master who's going to
  716                  * be more frequent than us, go into BACKUP state.
  717                  */
  718                 if (timevalcmp(&sc_tv, &ch_tv, >) ||
  719                     timevalcmp(&sc_tv, &ch_tv, ==)) {
  720                         callout_stop(&sc->sc_ad_tmo);
  721                         CARP_DEBUG("%s: MASTER -> BACKUP "
  722                            "(more frequent advertisement received)\n",
  723                            SC2IFP(sc)->if_xname);
  724                         carp_set_state(sc, BACKUP);
  725                         carp_setrun(sc, 0);
  726                         carp_setroute(sc, RTM_DELETE);
  727                 }
  728                 break;
  729         case BACKUP:
  730                 /*
  731                  * If we're pre-empting masters who advertise slower than us,
  732                  * and this one claims to be slower, treat him as down.
  733                  */
  734                 if (carp_opts[CARPCTL_PREEMPT] &&
  735                     timevalcmp(&sc_tv, &ch_tv, <)) {
  736                         CARP_DEBUG("%s: BACKUP -> MASTER "
  737                             "(preempting a slower master)\n",
  738                             SC2IFP(sc)->if_xname);
  739                         carp_master_down_locked(sc);
  740                         break;
  741                 }
  742 
  743                 /*
  744                  *  If the master is going to advertise at such a low frequency
  745                  *  that he's guaranteed to time out, we'd might as well just
  746                  *  treat him as timed out now.
  747                  */
  748                 sc_tv.tv_sec = sc->sc_advbase * 3;
  749                 if (timevalcmp(&sc_tv, &ch_tv, <)) {
  750                         CARP_DEBUG("%s: BACKUP -> MASTER "
  751                             "(master timed out)\n",
  752                             SC2IFP(sc)->if_xname);
  753                         carp_master_down_locked(sc);
  754                         break;
  755                 }
  756 
  757                 /*
  758                  * Otherwise, we reset the counter and wait for the next
  759                  * advertisement.
  760                  */
  761                 carp_setrun(sc, af);
  762                 break;
  763         }
  764 
  765         CARP_UNLOCK(ifp->if_carp);
  766 
  767         m_freem(m);
  768         return;
  769 }
  770 
  771 static int
  772 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
  773 {
  774         struct m_tag *mtag;
  775         struct ifnet *ifp = SC2IFP(sc);
  776 
  777         if (sc->sc_init_counter) {
  778                 /* this could also be seconds since unix epoch */
  779                 sc->sc_counter = arc4random();
  780                 sc->sc_counter = sc->sc_counter << 32;
  781                 sc->sc_counter += arc4random();
  782         } else
  783                 sc->sc_counter++;
  784 
  785         ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
  786         ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
  787 
  788         carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
  789 
  790         /* Tag packet for carp_output */
  791         mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
  792         if (mtag == NULL) {
  793                 m_freem(m);
  794                 SC2IFP(sc)->if_oerrors++;
  795                 return (ENOMEM);
  796         }
  797         bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
  798         m_tag_prepend(m, mtag);
  799 
  800         return (0);
  801 }
  802 
  803 static void
  804 carp_send_ad_all(void)
  805 {
  806         struct carp_softc *sc;
  807 
  808         mtx_lock(&carp_mtx);
  809         LIST_FOREACH(sc, &carpif_list, sc_next) {
  810                 if (sc->sc_carpdev == NULL)
  811                         continue;
  812                 CARP_SCLOCK(sc);
  813                 if ((SC2IFP(sc)->if_flags & IFF_UP) &&
  814                     (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) &&
  815                      sc->sc_state == MASTER)
  816                         carp_send_ad_locked(sc);
  817                 CARP_SCUNLOCK(sc);
  818         }
  819         mtx_unlock(&carp_mtx);
  820 }
  821 
  822 static void
  823 carp_send_ad(void *v)
  824 {
  825         struct carp_softc *sc = v;
  826 
  827         CARP_SCLOCK(sc);
  828         carp_send_ad_locked(sc);
  829         CARP_SCUNLOCK(sc);
  830 }
  831 
  832 static void
  833 carp_send_ad_locked(struct carp_softc *sc)
  834 {
  835         struct carp_header ch;
  836         struct timeval tv;
  837         struct carp_header *ch_ptr;
  838         struct mbuf *m;
  839         int len, advbase, advskew;
  840 
  841         CARP_SCLOCK_ASSERT(sc);
  842 
  843         /* bow out if we've lost our UPness or RUNNINGuiness */
  844         if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
  845             (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
  846                 advbase = 255;
  847                 advskew = 255;
  848         } else {
  849                 advbase = sc->sc_advbase;
  850                 if (!carp_suppress_preempt || sc->sc_advskew > 240)
  851                         advskew = sc->sc_advskew;
  852                 else
  853                         advskew = 240;
  854                 tv.tv_sec = advbase;
  855                 tv.tv_usec = advskew * 1000000 / 256;
  856         }
  857 
  858         ch.carp_version = CARP_VERSION;
  859         ch.carp_type = CARP_ADVERTISEMENT;
  860         ch.carp_vhid = sc->sc_vhid;
  861         ch.carp_advbase = advbase;
  862         ch.carp_advskew = advskew;
  863         ch.carp_authlen = 7;    /* XXX DEFINE */
  864         ch.carp_pad1 = 0;       /* must be zero */
  865         ch.carp_cksum = 0;
  866 
  867 #ifdef INET
  868         if (sc->sc_ia) {
  869                 struct ip *ip;
  870 
  871                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
  872                 if (m == NULL) {
  873                         SC2IFP(sc)->if_oerrors++;
  874                         carpstats.carps_onomem++;
  875                         /* XXX maybe less ? */
  876                         if (advbase != 255 || advskew != 255)
  877                                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
  878                                     carp_send_ad, sc);
  879                         return;
  880                 }
  881                 len = sizeof(*ip) + sizeof(ch);
  882                 m->m_pkthdr.len = len;
  883                 m->m_pkthdr.rcvif = NULL;
  884                 m->m_len = len;
  885                 MH_ALIGN(m, m->m_len);
  886                 m->m_flags |= M_MCAST;
  887                 ip = mtod(m, struct ip *);
  888                 ip->ip_v = IPVERSION;
  889                 ip->ip_hl = sizeof(*ip) >> 2;
  890                 ip->ip_tos = IPTOS_LOWDELAY;
  891                 ip->ip_len = len;
  892                 ip->ip_id = ip_newid();
  893                 ip->ip_off = IP_DF;
  894                 ip->ip_ttl = CARP_DFLTTL;
  895                 ip->ip_p = IPPROTO_CARP;
  896                 ip->ip_sum = 0;
  897                 ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
  898                 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
  899 
  900                 ch_ptr = (struct carp_header *)(&ip[1]);
  901                 bcopy(&ch, ch_ptr, sizeof(ch));
  902                 if (carp_prepare_ad(m, sc, ch_ptr))
  903                         return;
  904 
  905                 m->m_data += sizeof(*ip);
  906                 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
  907                 m->m_data -= sizeof(*ip);
  908 
  909                 getmicrotime(&SC2IFP(sc)->if_lastchange);
  910                 SC2IFP(sc)->if_opackets++;
  911                 SC2IFP(sc)->if_obytes += len;
  912                 carpstats.carps_opackets++;
  913 
  914                 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
  915                         SC2IFP(sc)->if_oerrors++;
  916                         if (sc->sc_sendad_errors < INT_MAX)
  917                                 sc->sc_sendad_errors++;
  918                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
  919                                 carp_suppress_preempt++;
  920                                 if (carp_suppress_preempt == 1) {
  921                                         CARP_SCUNLOCK(sc);
  922                                         carp_send_ad_all();
  923                                         CARP_SCLOCK(sc);
  924                                 }
  925                         }
  926                         sc->sc_sendad_success = 0;
  927                 } else {
  928                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
  929                                 if (++sc->sc_sendad_success >=
  930                                     CARP_SENDAD_MIN_SUCCESS) {
  931                                         carp_suppress_preempt--;
  932                                         sc->sc_sendad_errors = 0;
  933                                 }
  934                         } else
  935                                 sc->sc_sendad_errors = 0;
  936                 }
  937         }
  938 #endif /* INET */
  939 #ifdef INET6
  940         if (sc->sc_ia6) {
  941                 struct ip6_hdr *ip6;
  942 
  943                 MGETHDR(m, M_DONTWAIT, MT_HEADER);
  944                 if (m == NULL) {
  945                         SC2IFP(sc)->if_oerrors++;
  946                         carpstats.carps_onomem++;
  947                         /* XXX maybe less ? */
  948                         if (advbase != 255 || advskew != 255)
  949                                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
  950                                     carp_send_ad, sc);
  951                         return;
  952                 }
  953                 len = sizeof(*ip6) + sizeof(ch);
  954                 m->m_pkthdr.len = len;
  955                 m->m_pkthdr.rcvif = NULL;
  956                 m->m_len = len;
  957                 MH_ALIGN(m, m->m_len);
  958                 m->m_flags |= M_MCAST;
  959                 ip6 = mtod(m, struct ip6_hdr *);
  960                 bzero(ip6, sizeof(*ip6));
  961                 ip6->ip6_vfc |= IPV6_VERSION;
  962                 ip6->ip6_hlim = CARP_DFLTTL;
  963                 ip6->ip6_nxt = IPPROTO_CARP;
  964                 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
  965                     sizeof(struct in6_addr));
  966                 /* set the multicast destination */
  967 
  968                 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
  969                 ip6->ip6_dst.s6_addr8[15] = 0x12;
  970                 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
  971                         SC2IFP(sc)->if_oerrors++;
  972                         m_freem(m);
  973                         CARP_LOG("%s: in6_setscope failed\n", __func__);
  974                         return;
  975                 }
  976 
  977                 ch_ptr = (struct carp_header *)(&ip6[1]);
  978                 bcopy(&ch, ch_ptr, sizeof(ch));
  979                 if (carp_prepare_ad(m, sc, ch_ptr))
  980                         return;
  981 
  982                 m->m_data += sizeof(*ip6);
  983                 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
  984                 m->m_data -= sizeof(*ip6);
  985 
  986                 getmicrotime(&SC2IFP(sc)->if_lastchange);
  987                 SC2IFP(sc)->if_opackets++;
  988                 SC2IFP(sc)->if_obytes += len;
  989                 carpstats.carps_opackets6++;
  990 
  991                 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
  992                         SC2IFP(sc)->if_oerrors++;
  993                         if (sc->sc_sendad_errors < INT_MAX)
  994                                 sc->sc_sendad_errors++;
  995                         if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
  996                                 carp_suppress_preempt++;
  997                                 if (carp_suppress_preempt == 1) {
  998                                         CARP_SCUNLOCK(sc);
  999                                         carp_send_ad_all();
 1000                                         CARP_SCLOCK(sc);
 1001                                 }
 1002                         }
 1003                         sc->sc_sendad_success = 0;
 1004                 } else {
 1005                         if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
 1006                                 if (++sc->sc_sendad_success >=
 1007                                     CARP_SENDAD_MIN_SUCCESS) {
 1008                                         carp_suppress_preempt--;
 1009                                         sc->sc_sendad_errors = 0;
 1010                                 }
 1011                         } else
 1012                                 sc->sc_sendad_errors = 0;
 1013                 }
 1014         }
 1015 #endif /* INET6 */
 1016 
 1017         if (advbase != 255 || advskew != 255)
 1018                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 1019                     carp_send_ad, sc);
 1020 
 1021 }
 1022 
 1023 /*
 1024  * Broadcast a gratuitous ARP request containing
 1025  * the virtual router MAC address for each IP address
 1026  * associated with the virtual router.
 1027  */
 1028 static void
 1029 carp_send_arp(struct carp_softc *sc)
 1030 {
 1031         struct ifaddr *ifa;
 1032 
 1033         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
 1034 
 1035                 if (ifa->ifa_addr->sa_family != AF_INET)
 1036                         continue;
 1037 
 1038 /*              arprequest(sc->sc_carpdev, &in, &in, IFP2ENADDR(sc->sc_ifp)); */
 1039                 arp_ifinit2(sc->sc_carpdev, ifa, IFP2ENADDR(sc->sc_ifp));
 1040 
 1041                 DELAY(1000);    /* XXX */
 1042         }
 1043 }
 1044 
 1045 #ifdef INET6
 1046 static void
 1047 carp_send_na(struct carp_softc *sc)
 1048 {
 1049         struct ifaddr *ifa;
 1050         struct in6_addr *in6;
 1051         static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 1052 
 1053         TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
 1054 
 1055                 if (ifa->ifa_addr->sa_family != AF_INET6)
 1056                         continue;
 1057 
 1058                 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
 1059                 nd6_na_output(sc->sc_carpdev, &mcast, in6,
 1060                     ND_NA_FLAG_OVERRIDE, 1, NULL);
 1061                 DELAY(1000);    /* XXX */
 1062         }
 1063 }
 1064 #endif /* INET6 */
 1065 
 1066 static int
 1067 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
 1068 {
 1069         struct carp_softc *vh;
 1070         struct ifaddr *ifa;
 1071         int count = 0;
 1072 
 1073         CARP_LOCK_ASSERT(cif);
 1074 
 1075         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 1076                 if ((type == CARP_COUNT_RUNNING &&
 1077                     (SC2IFP(vh)->if_flags & IFF_UP) &&
 1078                     (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) ||
 1079                     (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
 1080                         TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
 1081                             ifa_list) {
 1082                                 if (ifa->ifa_addr->sa_family == AF_INET &&
 1083                                     ia->ia_addr.sin_addr.s_addr ==
 1084                                     ifatoia(ifa)->ia_addr.sin_addr.s_addr)
 1085                                         count++;
 1086                         }
 1087                 }
 1088         }
 1089         return (count);
 1090 }
 1091 
 1092 int
 1093 carp_iamatch(void *v, struct in_ifaddr *ia,
 1094     struct in_addr *isaddr, u_int8_t **enaddr)
 1095 {
 1096         struct carp_if *cif = v;
 1097         struct carp_softc *vh;
 1098         int index, count = 0;
 1099         struct ifaddr *ifa;
 1100 
 1101         CARP_LOCK(cif);
 1102 
 1103         if (carp_opts[CARPCTL_ARPBALANCE]) {
 1104                 /*
 1105                  * XXX proof of concept implementation.
 1106                  * We use the source ip to decide which virtual host should
 1107                  * handle the request. If we're master of that virtual host,
 1108                  * then we respond, otherwise, just drop the arp packet on
 1109                  * the floor.
 1110                  */
 1111                 count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
 1112                 if (count == 0) {
 1113                         /* should never reach this */
 1114                         CARP_UNLOCK(cif);
 1115                         return (0);
 1116                 }
 1117 
 1118                 /* this should be a hash, like pf_hash() */
 1119                 index = ntohl(isaddr->s_addr) % count;
 1120                 count = 0;
 1121 
 1122                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 1123                         if ((SC2IFP(vh)->if_flags & IFF_UP) &&
 1124                             (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) {
 1125                                 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
 1126                                     ifa_list) {
 1127                                         if (ifa->ifa_addr->sa_family ==
 1128                                             AF_INET &&
 1129                                             ia->ia_addr.sin_addr.s_addr ==
 1130                                             ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
 1131                                                 if (count == index) {
 1132                                                         if (vh->sc_state ==
 1133                                                             MASTER) {
 1134                                                                 *enaddr = IFP2ENADDR(vh->sc_ifp);
 1135                                                                 CARP_UNLOCK(cif);
 1136                                                                 return (1);
 1137                                                         } else {
 1138                                                                 CARP_UNLOCK(cif);
 1139                                                                 return (0);
 1140                                                         }
 1141                                                 }
 1142                                                 count++;
 1143                                         }
 1144                                 }
 1145                         }
 1146                 }
 1147         } else {
 1148                 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 1149                         if ((SC2IFP(vh)->if_flags & IFF_UP) &&
 1150                             (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
 1151                             ia->ia_ifp == SC2IFP(vh) &&
 1152                             vh->sc_state == MASTER) {
 1153                                 *enaddr = IFP2ENADDR(vh->sc_ifp);
 1154                                 CARP_UNLOCK(cif);
 1155                                 return (1);
 1156                         }
 1157                 }
 1158         }
 1159         CARP_UNLOCK(cif);
 1160         return (0);
 1161 }
 1162 
 1163 #ifdef INET6
 1164 struct ifaddr *
 1165 carp_iamatch6(void *v, struct in6_addr *taddr)
 1166 {
 1167         struct carp_if *cif = v;
 1168         struct carp_softc *vh;
 1169         struct ifaddr *ifa;
 1170 
 1171         CARP_LOCK(cif);
 1172         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
 1173                 TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) {
 1174                         if (IN6_ARE_ADDR_EQUAL(taddr,
 1175                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
 1176                             (SC2IFP(vh)->if_flags & IFF_UP) &&
 1177                             (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
 1178                             vh->sc_state == MASTER) {
 1179                                 CARP_UNLOCK(cif);
 1180                                 return (ifa);
 1181                         }
 1182                 }
 1183         }
 1184         CARP_UNLOCK(cif);
 1185         
 1186         return (NULL);
 1187 }
 1188 
 1189 void *
 1190 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
 1191 {
 1192         struct m_tag *mtag;
 1193         struct carp_if *cif = v;
 1194         struct carp_softc *sc;
 1195         struct ifaddr *ifa;
 1196 
 1197         CARP_LOCK(cif);
 1198         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
 1199                 TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
 1200                         if (IN6_ARE_ADDR_EQUAL(taddr,
 1201                             &ifatoia6(ifa)->ia_addr.sin6_addr) &&
 1202                             (SC2IFP(sc)->if_flags & IFF_UP) &&
 1203                             (SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) {
 1204                                 struct ifnet *ifp = SC2IFP(sc);
 1205                                 mtag = m_tag_get(PACKET_TAG_CARP,
 1206                                     sizeof(struct ifnet *), M_NOWAIT);
 1207                                 if (mtag == NULL) {
 1208                                         /* better a bit than nothing */
 1209                                         CARP_UNLOCK(cif);
 1210                                         return (IFP2ENADDR(sc->sc_ifp));
 1211                                 }
 1212                                 bcopy(&ifp, (caddr_t)(mtag + 1),
 1213                                     sizeof(struct ifnet *));
 1214                                 m_tag_prepend(m, mtag);
 1215 
 1216                                 CARP_UNLOCK(cif);
 1217                                 return (IFP2ENADDR(sc->sc_ifp));
 1218                         }
 1219                 }
 1220         }
 1221         CARP_UNLOCK(cif);
 1222 
 1223         return (NULL);
 1224 }
 1225 #endif
 1226 
 1227 struct ifnet *
 1228 carp_forus(void *v, void *dhost)
 1229 {
 1230         struct carp_if *cif = v;
 1231         struct carp_softc *vh;
 1232         u_int8_t *ena = dhost;
 1233 
 1234         if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
 1235                 return (NULL);
 1236 
 1237         CARP_LOCK(cif);
 1238         TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
 1239                 if ((SC2IFP(vh)->if_flags & IFF_UP) &&
 1240                     (SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
 1241                     vh->sc_state == MASTER &&
 1242                     !bcmp(dhost, IFP2ENADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
 1243                         CARP_UNLOCK(cif);
 1244                         return (SC2IFP(vh));
 1245                 }
 1246 
 1247         CARP_UNLOCK(cif);
 1248         return (NULL);
 1249 }
 1250 
 1251 static void
 1252 carp_master_down(void *v)
 1253 {
 1254         struct carp_softc *sc = v;
 1255 
 1256         CARP_SCLOCK(sc);
 1257         carp_master_down_locked(sc);
 1258         CARP_SCUNLOCK(sc);
 1259 }
 1260 
 1261 static void
 1262 carp_master_down_locked(struct carp_softc *sc)
 1263 {
 1264         if (sc->sc_carpdev)
 1265                 CARP_SCLOCK_ASSERT(sc);
 1266 
 1267         switch (sc->sc_state) {
 1268         case INIT:
 1269                 printf("%s: master_down event in INIT state\n",
 1270                     SC2IFP(sc)->if_xname);
 1271                 break;
 1272         case MASTER:
 1273                 break;
 1274         case BACKUP:
 1275                 carp_set_state(sc, MASTER);
 1276                 carp_send_ad_locked(sc);
 1277                 carp_send_arp(sc);
 1278 #ifdef INET6
 1279                 carp_send_na(sc);
 1280 #endif /* INET6 */
 1281                 carp_setrun(sc, 0);
 1282                 carp_setroute(sc, RTM_ADD);
 1283                 break;
 1284         }
 1285 }
 1286 
 1287 /*
 1288  * When in backup state, af indicates whether to reset the master down timer
 1289  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
 1290  */
 1291 static void
 1292 carp_setrun(struct carp_softc *sc, sa_family_t af)
 1293 {
 1294         struct timeval tv;
 1295 
 1296         if (sc->sc_carpdev == NULL) {
 1297                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 1298                 carp_set_state(sc, INIT);
 1299                 return;
 1300         } else
 1301                 CARP_SCLOCK_ASSERT(sc);
 1302 
 1303         if (SC2IFP(sc)->if_flags & IFF_UP &&
 1304             sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6))
 1305                 SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
 1306         else {
 1307                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 1308                 carp_setroute(sc, RTM_DELETE);
 1309                 return;
 1310         }
 1311 
 1312         switch (sc->sc_state) {
 1313         case INIT:
 1314                 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
 1315                         carp_send_ad_locked(sc);
 1316                         carp_send_arp(sc);
 1317 #ifdef INET6
 1318                         carp_send_na(sc);
 1319 #endif /* INET6 */
 1320                         CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
 1321                             SC2IFP(sc)->if_xname);
 1322                         carp_set_state(sc, MASTER);
 1323                         carp_setroute(sc, RTM_ADD);
 1324                 } else {
 1325                         CARP_DEBUG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname);
 1326                         carp_set_state(sc, BACKUP);
 1327                         carp_setroute(sc, RTM_DELETE);
 1328                         carp_setrun(sc, 0);
 1329                 }
 1330                 break;
 1331         case BACKUP:
 1332                 callout_stop(&sc->sc_ad_tmo);
 1333                 tv.tv_sec = 3 * sc->sc_advbase;
 1334                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 1335                 switch (af) {
 1336 #ifdef INET
 1337                 case AF_INET:
 1338                         callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 1339                             carp_master_down, sc);
 1340                         break;
 1341 #endif /* INET */
 1342 #ifdef INET6
 1343                 case AF_INET6:
 1344                         callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 1345                             carp_master_down, sc);
 1346                         break;
 1347 #endif /* INET6 */
 1348                 default:
 1349                         if (sc->sc_naddrs)
 1350                                 callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
 1351                                     carp_master_down, sc);
 1352                         if (sc->sc_naddrs6)
 1353                                 callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
 1354                                     carp_master_down, sc);
 1355                         break;
 1356                 }
 1357                 break;
 1358         case MASTER:
 1359                 tv.tv_sec = sc->sc_advbase;
 1360                 tv.tv_usec = sc->sc_advskew * 1000000 / 256;
 1361                 callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
 1362                     carp_send_ad, sc);
 1363                 break;
 1364         }
 1365 }
 1366 
 1367 void
 1368 carp_multicast_cleanup(struct carp_softc *sc)
 1369 {
 1370         struct ip_moptions *imo = &sc->sc_imo;
 1371 #ifdef INET6
 1372         struct ip6_moptions *im6o = &sc->sc_im6o;
 1373 #endif 
 1374         u_int16_t n = imo->imo_num_memberships;
 1375   
 1376         /* Clean up our own multicast memberships */
 1377         while (n-- > 0) {
 1378                 if (imo->imo_membership[n] != NULL) {
 1379                         in_delmulti(imo->imo_membership[n]);
 1380                         imo->imo_membership[n] = NULL;
 1381                 }
 1382         }
 1383         imo->imo_num_memberships = 0;
 1384         imo->imo_multicast_ifp = NULL;
 1385 
 1386 #ifdef INET6
 1387         while (!LIST_EMPTY(&im6o->im6o_memberships)) {
 1388                 struct in6_multi_mship *imm =
 1389                     LIST_FIRST(&im6o->im6o_memberships);
 1390     
 1391                 LIST_REMOVE(imm, i6mm_chain);
 1392                 in6_leavegroup(imm);
 1393         }
 1394         im6o->im6o_multicast_ifp = NULL;
 1395 #endif
 1396 }
 1397 
 1398 static int
 1399 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
 1400 {
 1401         struct ifnet *ifp;
 1402         struct carp_if *cif;
 1403         struct in_ifaddr *ia, *ia_if;
 1404         struct ip_moptions *imo = &sc->sc_imo;
 1405         struct in_addr addr;
 1406         u_long iaddr = htonl(sin->sin_addr.s_addr);
 1407         int own, error;
 1408 
 1409         if (sin->sin_addr.s_addr == 0) {
 1410                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
 1411                         carp_set_state(sc, INIT);
 1412                 if (sc->sc_naddrs)
 1413                         SC2IFP(sc)->if_flags |= IFF_UP;
 1414                 carp_setrun(sc, 0);
 1415                 return (0);
 1416         }
 1417 
 1418         /* we have to do it by hands to check we won't match on us */
 1419         ia_if = NULL; own = 0;
 1420         TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
 1421                 /* and, yeah, we need a multicast-capable iface too */
 1422                 if (ia->ia_ifp != SC2IFP(sc) &&
 1423                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
 1424                     (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
 1425                         if (!ia_if)
 1426                                 ia_if = ia;
 1427                         if (sin->sin_addr.s_addr ==
 1428                             ia->ia_addr.sin_addr.s_addr)
 1429                                 own++;
 1430                 }
 1431         }
 1432 
 1433         if (!ia_if)
 1434                 return (EADDRNOTAVAIL);
 1435 
 1436         ia = ia_if;
 1437         ifp = ia->ia_ifp;
 1438 
 1439         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
 1440             (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp))
 1441                 return (EADDRNOTAVAIL);
 1442 
 1443         if (imo->imo_num_memberships == 0) {
 1444                 addr.s_addr = htonl(INADDR_CARP_GROUP);
 1445                 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL)
 1446                         return (ENOBUFS);
 1447                 imo->imo_num_memberships++;
 1448                 imo->imo_multicast_ifp = ifp;
 1449                 imo->imo_multicast_ttl = CARP_DFLTTL;
 1450                 imo->imo_multicast_loop = 0;
 1451         }
 1452 
 1453         if (!ifp->if_carp) {
 1454 
 1455                 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP,
 1456                     M_WAITOK|M_ZERO);
 1457                 if (!cif) {
 1458                         error = ENOBUFS;
 1459                         goto cleanup;
 1460                 }
 1461                 if ((error = ifpromisc(ifp, 1))) {
 1462                         FREE(cif, M_CARP);
 1463                         goto cleanup;
 1464                 }
 1465                 
 1466                 CARP_LOCK_INIT(cif);
 1467                 CARP_LOCK(cif);
 1468                 cif->vhif_ifp = ifp;
 1469                 TAILQ_INIT(&cif->vhif_vrs);
 1470                 ifp->if_carp = cif;
 1471 
 1472         } else {
 1473                 struct carp_softc *vr;
 1474 
 1475                 cif = (struct carp_if *)ifp->if_carp;
 1476                 CARP_LOCK(cif);
 1477                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
 1478                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
 1479                                 CARP_UNLOCK(cif);
 1480                                 error = EINVAL;
 1481                                 goto cleanup;
 1482                         }
 1483         }
 1484         sc->sc_ia = ia;
 1485         sc->sc_carpdev = ifp;
 1486 
 1487         { /* XXX prevent endless loop if already in queue */
 1488         struct carp_softc *vr, *after = NULL;
 1489         int myself = 0;
 1490         cif = (struct carp_if *)ifp->if_carp;
 1491 
 1492         /* XXX: cif should not change, right? So we still hold the lock */
 1493         CARP_LOCK_ASSERT(cif);
 1494 
 1495         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
 1496                 if (vr == sc)
 1497                         myself = 1;
 1498                 if (vr->sc_vhid < sc->sc_vhid)
 1499                         after = vr;
 1500         }
 1501 
 1502         if (!myself) {
 1503                 /* We're trying to keep things in order */
 1504                 if (after == NULL) {
 1505                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
 1506                 } else {
 1507                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
 1508                 }
 1509                 cif->vhif_nvrs++;
 1510         }
 1511         }
 1512 
 1513         sc->sc_naddrs++;
 1514         SC2IFP(sc)->if_flags |= IFF_UP;
 1515         if (own)
 1516                 sc->sc_advskew = 0;
 1517         carp_sc_state_locked(sc);
 1518         carp_setrun(sc, 0);
 1519 
 1520         CARP_UNLOCK(cif);
 1521 
 1522         return (0);
 1523 
 1524 cleanup:
 1525         in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
 1526         return (error);
 1527 }
 1528 
 1529 static int
 1530 carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
 1531 {
 1532         int error = 0;
 1533 
 1534         if (!--sc->sc_naddrs) {
 1535                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
 1536                 struct ip_moptions *imo = &sc->sc_imo;
 1537 
 1538                 CARP_LOCK(cif);
 1539                 callout_stop(&sc->sc_ad_tmo);
 1540                 SC2IFP(sc)->if_flags &= ~IFF_UP;
 1541                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 1542                 sc->sc_vhid = -1;
 1543                 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
 1544                 imo->imo_multicast_ifp = NULL;
 1545                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
 1546                 if (!--cif->vhif_nvrs) {
 1547                         sc->sc_carpdev->if_carp = NULL;
 1548                         CARP_LOCK_DESTROY(cif);
 1549                         FREE(cif, M_IFADDR);
 1550                 } else {
 1551                         CARP_UNLOCK(cif);
 1552                 }
 1553         }
 1554 
 1555         return (error);
 1556 }
 1557 
 1558 #ifdef INET6
 1559 static int
 1560 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
 1561 {
 1562         struct ifnet *ifp;
 1563         struct carp_if *cif;
 1564         struct in6_ifaddr *ia, *ia_if;
 1565         struct ip6_moptions *im6o = &sc->sc_im6o;
 1566         struct in6_multi_mship *imm;
 1567         struct in6_addr in6;
 1568         int own, error;
 1569 
 1570         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 1571                 if (!(SC2IFP(sc)->if_flags & IFF_UP))
 1572                         carp_set_state(sc, INIT);
 1573                 if (sc->sc_naddrs6)
 1574                         SC2IFP(sc)->if_flags |= IFF_UP;
 1575                 carp_setrun(sc, 0);
 1576                 return (0);
 1577         }
 1578 
 1579         /* we have to do it by hands to check we won't match on us */
 1580         ia_if = NULL; own = 0;
 1581         for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
 1582                 int i;
 1583 
 1584                 for (i = 0; i < 4; i++) {
 1585                         if ((sin6->sin6_addr.s6_addr32[i] &
 1586                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
 1587                             (ia->ia_addr.sin6_addr.s6_addr32[i] &
 1588                             ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
 1589                                 break;
 1590                 }
 1591                 /* and, yeah, we need a multicast-capable iface too */
 1592                 if (ia->ia_ifp != SC2IFP(sc) &&
 1593                     (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
 1594                     (i == 4)) {
 1595                         if (!ia_if)
 1596                                 ia_if = ia;
 1597                         if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
 1598                             &ia->ia_addr.sin6_addr))
 1599                                 own++;
 1600                 }
 1601         }
 1602 
 1603         if (!ia_if)
 1604                 return (EADDRNOTAVAIL);
 1605         ia = ia_if;
 1606         ifp = ia->ia_ifp;
 1607 
 1608         if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
 1609             (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
 1610                 return (EADDRNOTAVAIL);
 1611 
 1612         if (!sc->sc_naddrs6) {
 1613                 im6o->im6o_multicast_ifp = ifp;
 1614 
 1615                 /* join CARP multicast address */
 1616                 bzero(&in6, sizeof(in6));
 1617                 in6.s6_addr16[0] = htons(0xff02);
 1618                 in6.s6_addr8[15] = 0x12;
 1619                 if (in6_setscope(&in6, ifp, NULL) != 0)
 1620                         goto cleanup;
 1621                 if ((imm = in6_joingroup(ifp, &in6, &error, 0)) == NULL)
 1622                         goto cleanup;
 1623                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
 1624 
 1625                 /* join solicited multicast address */
 1626                 bzero(&in6, sizeof(in6));
 1627                 in6.s6_addr16[0] = htons(0xff02);
 1628                 in6.s6_addr32[1] = 0;
 1629                 in6.s6_addr32[2] = htonl(1);
 1630                 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
 1631                 in6.s6_addr8[12] = 0xff;
 1632                 if (in6_setscope(&in6, ifp, NULL) != 0)
 1633                         goto cleanup;
 1634                 if ((imm = in6_joingroup(ifp, &in6, &error, 0)) == NULL)
 1635                         goto cleanup;
 1636                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
 1637         }
 1638 
 1639         if (!ifp->if_carp) {
 1640                 MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP,
 1641                     M_WAITOK|M_ZERO);
 1642                 if (!cif) {
 1643                         error = ENOBUFS;
 1644                         goto cleanup;
 1645                 }
 1646                 if ((error = ifpromisc(ifp, 1))) {
 1647                         FREE(cif, M_CARP);
 1648                         goto cleanup;
 1649                 }
 1650 
 1651                 CARP_LOCK_INIT(cif);
 1652                 CARP_LOCK(cif);
 1653                 cif->vhif_ifp = ifp;
 1654                 TAILQ_INIT(&cif->vhif_vrs);
 1655                 ifp->if_carp = cif;
 1656 
 1657         } else {
 1658                 struct carp_softc *vr;
 1659 
 1660                 cif = (struct carp_if *)ifp->if_carp;
 1661                 CARP_LOCK(cif);
 1662                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
 1663                         if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
 1664                                 CARP_UNLOCK(cif);
 1665                                 error = EINVAL;
 1666                                 goto cleanup;
 1667                         }
 1668         }
 1669         sc->sc_ia6 = ia;
 1670         sc->sc_carpdev = ifp;
 1671 
 1672         { /* XXX prevent endless loop if already in queue */
 1673         struct carp_softc *vr, *after = NULL;
 1674         int myself = 0;
 1675         cif = (struct carp_if *)ifp->if_carp;
 1676         CARP_LOCK_ASSERT(cif);
 1677 
 1678         TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
 1679                 if (vr == sc)
 1680                         myself = 1;
 1681                 if (vr->sc_vhid < sc->sc_vhid)
 1682                         after = vr;
 1683         }
 1684 
 1685         if (!myself) {
 1686                 /* We're trying to keep things in order */
 1687                 if (after == NULL) {
 1688                         TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
 1689                 } else {
 1690                         TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
 1691                 }
 1692                 cif->vhif_nvrs++;
 1693         }
 1694         }
 1695 
 1696         sc->sc_naddrs6++;
 1697         SC2IFP(sc)->if_flags |= IFF_UP;
 1698         if (own)
 1699                 sc->sc_advskew = 0;
 1700         carp_sc_state_locked(sc);
 1701         carp_setrun(sc, 0);
 1702 
 1703         CARP_UNLOCK(cif);
 1704 
 1705         return (0);
 1706 
 1707 cleanup:
 1708         /* clean up multicast memberships */
 1709         if (!sc->sc_naddrs6) {
 1710                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
 1711                         imm = LIST_FIRST(&im6o->im6o_memberships);
 1712                         LIST_REMOVE(imm, i6mm_chain);
 1713                         in6_leavegroup(imm);
 1714                 }
 1715         }
 1716         return (error);
 1717 }
 1718 
 1719 static int
 1720 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
 1721 {
 1722         int error = 0;
 1723 
 1724         if (!--sc->sc_naddrs6) {
 1725                 struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
 1726                 struct ip6_moptions *im6o = &sc->sc_im6o;
 1727 
 1728                 CARP_LOCK(cif);
 1729                 callout_stop(&sc->sc_ad_tmo);
 1730                 SC2IFP(sc)->if_flags &= ~IFF_UP;
 1731                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 1732                 sc->sc_vhid = -1;
 1733                 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
 1734                         struct in6_multi_mship *imm =
 1735                             LIST_FIRST(&im6o->im6o_memberships);
 1736 
 1737                         LIST_REMOVE(imm, i6mm_chain);
 1738                         in6_leavegroup(imm);
 1739                 }
 1740                 im6o->im6o_multicast_ifp = NULL;
 1741                 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
 1742                 if (!--cif->vhif_nvrs) {
 1743                         CARP_LOCK_DESTROY(cif);
 1744                         sc->sc_carpdev->if_carp = NULL;
 1745                         FREE(cif, M_IFADDR);
 1746                 } else
 1747                         CARP_UNLOCK(cif);
 1748         }
 1749 
 1750         return (error);
 1751 }
 1752 #endif /* INET6 */
 1753 
 1754 static int
 1755 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
 1756 {
 1757         struct carp_softc *sc = ifp->if_softc, *vr;
 1758         struct carpreq carpr;
 1759         struct ifaddr *ifa;
 1760         struct ifreq *ifr;
 1761         struct ifaliasreq *ifra;
 1762         int locked = 0, error = 0;
 1763 
 1764         ifa = (struct ifaddr *)addr;
 1765         ifra = (struct ifaliasreq *)addr;
 1766         ifr = (struct ifreq *)addr;
 1767 
 1768         switch (cmd) {
 1769         case SIOCSIFADDR:
 1770                 switch (ifa->ifa_addr->sa_family) {
 1771 #ifdef INET
 1772                 case AF_INET:
 1773                         SC2IFP(sc)->if_flags |= IFF_UP;
 1774                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
 1775                             sizeof(struct sockaddr));
 1776                         error = carp_set_addr(sc, satosin(ifa->ifa_addr));
 1777                         break;
 1778 #endif /* INET */
 1779 #ifdef INET6
 1780                 case AF_INET6:
 1781                         SC2IFP(sc)->if_flags |= IFF_UP;
 1782                         error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
 1783                         break;
 1784 #endif /* INET6 */
 1785                 default:
 1786                         error = EAFNOSUPPORT;
 1787                         break;
 1788                 }
 1789                 break;
 1790 
 1791         case SIOCAIFADDR:
 1792                 switch (ifa->ifa_addr->sa_family) {
 1793 #ifdef INET
 1794                 case AF_INET:
 1795                         SC2IFP(sc)->if_flags |= IFF_UP;
 1796                         bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
 1797                             sizeof(struct sockaddr));
 1798                         error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
 1799                         break;
 1800 #endif /* INET */
 1801 #ifdef INET6
 1802                 case AF_INET6:
 1803                         SC2IFP(sc)->if_flags |= IFF_UP;
 1804                         error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
 1805                         break;
 1806 #endif /* INET6 */
 1807                 default:
 1808                         error = EAFNOSUPPORT;
 1809                         break;
 1810                 }
 1811                 break;
 1812 
 1813         case SIOCDIFADDR:
 1814                 switch (ifa->ifa_addr->sa_family) {
 1815 #ifdef INET
 1816                 case AF_INET:
 1817                         error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
 1818                         break;
 1819 #endif /* INET */
 1820 #ifdef INET6
 1821                 case AF_INET6:
 1822                         error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
 1823                         break;
 1824 #endif /* INET6 */
 1825                 default:
 1826                         error = EAFNOSUPPORT;
 1827                         break;
 1828                 }
 1829                 break;
 1830 
 1831         case SIOCSIFFLAGS:
 1832                 if (sc->sc_carpdev) {
 1833                         locked = 1;
 1834                         CARP_SCLOCK(sc);
 1835                 }
 1836                 if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
 1837                         callout_stop(&sc->sc_ad_tmo);
 1838                         callout_stop(&sc->sc_md_tmo);
 1839                         callout_stop(&sc->sc_md6_tmo);
 1840                         if (sc->sc_state == MASTER)
 1841                                 carp_send_ad_locked(sc);
 1842                         carp_set_state(sc, INIT);
 1843                         carp_setrun(sc, 0);
 1844                 } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
 1845                         SC2IFP(sc)->if_flags |= IFF_UP;
 1846                         carp_setrun(sc, 0);
 1847                 }
 1848                 break;
 1849 
 1850         case SIOCSVH:
 1851                 if ((error = suser(curthread)) != 0)
 1852                         break;
 1853                 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
 1854                         break;
 1855                 error = 1;
 1856                 if (sc->sc_carpdev) {
 1857                         locked = 1;
 1858                         CARP_SCLOCK(sc);
 1859                 }
 1860                 if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
 1861                         switch (carpr.carpr_state) {
 1862                         case BACKUP:
 1863                                 callout_stop(&sc->sc_ad_tmo);
 1864                                 carp_set_state(sc, BACKUP);
 1865                                 carp_setrun(sc, 0);
 1866                                 carp_setroute(sc, RTM_DELETE);
 1867                                 break;
 1868                         case MASTER:
 1869                                 carp_master_down_locked(sc);
 1870                                 break;
 1871                         default:
 1872                                 break;
 1873                         }
 1874                 }
 1875                 if (carpr.carpr_vhid > 0) {
 1876                         if (carpr.carpr_vhid > 255) {
 1877                                 error = EINVAL;
 1878                                 break;
 1879                         }
 1880                         if (sc->sc_carpdev) {
 1881                                 struct carp_if *cif;
 1882                                 cif = (struct carp_if *)sc->sc_carpdev->if_carp;
 1883                                 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
 1884                                         if (vr != sc &&
 1885                                             vr->sc_vhid == carpr.carpr_vhid)
 1886                                                 return EEXIST;
 1887                         }
 1888                         sc->sc_vhid = carpr.carpr_vhid;
 1889                         IFP2ENADDR(sc->sc_ifp)[0] = 0;
 1890                         IFP2ENADDR(sc->sc_ifp)[1] = 0;
 1891                         IFP2ENADDR(sc->sc_ifp)[2] = 0x5e;
 1892                         IFP2ENADDR(sc->sc_ifp)[3] = 0;
 1893                         IFP2ENADDR(sc->sc_ifp)[4] = 1;
 1894                         IFP2ENADDR(sc->sc_ifp)[5] = sc->sc_vhid;
 1895                         error--;
 1896                 }
 1897                 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
 1898                         if (carpr.carpr_advskew >= 255) {
 1899                                 error = EINVAL;
 1900                                 break;
 1901                         }
 1902                         if (carpr.carpr_advbase > 255) {
 1903                                 error = EINVAL;
 1904                                 break;
 1905                         }
 1906                         sc->sc_advbase = carpr.carpr_advbase;
 1907                         sc->sc_advskew = carpr.carpr_advskew;
 1908                         error--;
 1909                 }
 1910                 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
 1911                 if (error > 0)
 1912                         error = EINVAL;
 1913                 else {
 1914                         error = 0;
 1915                         carp_setrun(sc, 0);
 1916                 }
 1917                 break;
 1918 
 1919         case SIOCGVH:
 1920                 /* XXX: lockless read */
 1921                 bzero(&carpr, sizeof(carpr));
 1922                 carpr.carpr_state = sc->sc_state;
 1923                 carpr.carpr_vhid = sc->sc_vhid;
 1924                 carpr.carpr_advbase = sc->sc_advbase;
 1925                 carpr.carpr_advskew = sc->sc_advskew;
 1926                 if (suser(curthread) == 0)
 1927                         bcopy(sc->sc_key, carpr.carpr_key,
 1928                             sizeof(carpr.carpr_key));
 1929                 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
 1930                 break;
 1931 
 1932         default:
 1933                 error = EINVAL;
 1934         }
 1935 
 1936         if (locked)
 1937                 CARP_SCUNLOCK(sc);
 1938 
 1939         carp_hmac_prepare(sc);
 1940 
 1941         return (error);
 1942 }
 1943 
 1944 /*
 1945  * XXX: this is looutput. We should eventually use it from there.
 1946  */
 1947 static int
 1948 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
 1949     struct rtentry *rt)
 1950 {
 1951         u_int32_t af;
 1952 
 1953         M_ASSERTPKTHDR(m); /* check if we have the packet header */
 1954 
 1955         if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 1956                 m_freem(m);
 1957                 return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
 1958                         rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
 1959         }
 1960 
 1961         ifp->if_opackets++;
 1962         ifp->if_obytes += m->m_pkthdr.len;
 1963 
 1964         /* BPF writes need to be handled specially. */
 1965         if (dst->sa_family == AF_UNSPEC) {
 1966                 bcopy(dst->sa_data, &af, sizeof(af));
 1967                 dst->sa_family = af;
 1968         }
 1969 
 1970 #if 1   /* XXX */
 1971         switch (dst->sa_family) {
 1972         case AF_INET:
 1973         case AF_INET6:
 1974         case AF_IPX:
 1975         case AF_APPLETALK:
 1976                 break;
 1977         default:
 1978                 printf("carp_looutput: af=%d unexpected\n", dst->sa_family);
 1979                 m_freem(m);
 1980                 return (EAFNOSUPPORT);
 1981         }
 1982 #endif
 1983         return(if_simloop(ifp, m, dst->sa_family, 0));
 1984 }
 1985 
 1986 /*
 1987  * Start output on carp interface. This function should never be called.
 1988  */
 1989 static void
 1990 carp_start(struct ifnet *ifp)
 1991 {
 1992 #ifdef DEBUG
 1993         printf("%s: start called\n", ifp->if_xname);
 1994 #endif
 1995 }
 1996 
 1997 int
 1998 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
 1999     struct rtentry *rt)
 2000 {
 2001         struct m_tag *mtag;
 2002         struct carp_softc *sc;
 2003         struct ifnet *carp_ifp;
 2004 
 2005         if (!sa)
 2006                 return (0);
 2007 
 2008         switch (sa->sa_family) {
 2009 #ifdef INET
 2010         case AF_INET:
 2011                 break;
 2012 #endif /* INET */
 2013 #ifdef INET6
 2014         case AF_INET6:
 2015                 break;
 2016 #endif /* INET6 */
 2017         default:
 2018                 return (0);
 2019         }
 2020 
 2021         mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
 2022         if (mtag == NULL)
 2023                 return (0);
 2024 
 2025         bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
 2026         sc = carp_ifp->if_softc;
 2027 
 2028         /* Set the source MAC address to Virtual Router MAC Address */
 2029         switch (ifp->if_type) {
 2030         case IFT_ETHER:
 2031         case IFT_L2VLAN: {
 2032                         struct ether_header *eh;
 2033 
 2034                         eh = mtod(m, struct ether_header *);
 2035                         eh->ether_shost[0] = 0;
 2036                         eh->ether_shost[1] = 0;
 2037                         eh->ether_shost[2] = 0x5e;
 2038                         eh->ether_shost[3] = 0;
 2039                         eh->ether_shost[4] = 1;
 2040                         eh->ether_shost[5] = sc->sc_vhid;
 2041                 }
 2042                 break;
 2043         case IFT_FDDI: {
 2044                         struct fddi_header *fh;
 2045 
 2046                         fh = mtod(m, struct fddi_header *);
 2047                         fh->fddi_shost[0] = 0;
 2048                         fh->fddi_shost[1] = 0;
 2049                         fh->fddi_shost[2] = 0x5e;
 2050                         fh->fddi_shost[3] = 0;
 2051                         fh->fddi_shost[4] = 1;
 2052                         fh->fddi_shost[5] = sc->sc_vhid;
 2053                 }
 2054                 break;
 2055         case IFT_ISO88025: {
 2056                         struct iso88025_header *th;
 2057                         th = mtod(m, struct iso88025_header *);
 2058                         th->iso88025_shost[0] = 3;
 2059                         th->iso88025_shost[1] = 0;
 2060                         th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
 2061                         th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
 2062                         th->iso88025_shost[4] = 0;
 2063                         th->iso88025_shost[5] = 0;
 2064                 }
 2065                 break;
 2066         default:
 2067                 printf("%s: carp is not supported for this interface type\n",
 2068                     ifp->if_xname);
 2069                 return (EOPNOTSUPP);
 2070         }
 2071 
 2072         return (0);
 2073 }
 2074 
 2075 static void
 2076 carp_set_state(struct carp_softc *sc, int state)
 2077 {
 2078 
 2079         if (sc->sc_carpdev)
 2080                 CARP_SCLOCK_ASSERT(sc);
 2081 
 2082         if (sc->sc_state == state)
 2083                 return;
 2084 
 2085         sc->sc_state = state;
 2086         switch (state) {
 2087         case BACKUP:
 2088                 SC2IFP(sc)->if_link_state = LINK_STATE_DOWN;
 2089                 break;
 2090         case MASTER:
 2091                 SC2IFP(sc)->if_link_state = LINK_STATE_UP;
 2092                 break;
 2093         default:
 2094                 SC2IFP(sc)->if_link_state = LINK_STATE_UNKNOWN;
 2095                 break;
 2096         }
 2097         rt_ifmsg(SC2IFP(sc));
 2098 }
 2099 
 2100 void
 2101 carp_carpdev_state(void *v)
 2102 {
 2103         struct carp_if *cif = v;
 2104 
 2105         CARP_LOCK(cif);
 2106         carp_carpdev_state_locked(cif);
 2107         CARP_UNLOCK(cif);
 2108 }
 2109 
 2110 static void
 2111 carp_carpdev_state_locked(struct carp_if *cif)
 2112 {
 2113         struct carp_softc *sc;
 2114 
 2115         TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
 2116                 carp_sc_state_locked(sc);
 2117 }
 2118 
 2119 static void
 2120 carp_sc_state_locked(struct carp_softc *sc)
 2121 {
 2122         CARP_SCLOCK_ASSERT(sc);
 2123 
 2124         if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
 2125             !(sc->sc_carpdev->if_flags & IFF_UP)) {
 2126                 sc->sc_flags_backup = SC2IFP(sc)->if_flags;
 2127                 SC2IFP(sc)->if_flags &= ~IFF_UP;
 2128                 SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
 2129                 callout_stop(&sc->sc_ad_tmo);
 2130                 callout_stop(&sc->sc_md_tmo);
 2131                 callout_stop(&sc->sc_md6_tmo);
 2132                 carp_set_state(sc, INIT);
 2133                 carp_setrun(sc, 0);
 2134                 if (!sc->sc_suppress) {
 2135                         carp_suppress_preempt++;
 2136                         if (carp_suppress_preempt == 1) {
 2137                                 CARP_SCUNLOCK(sc);
 2138                                 carp_send_ad_all();
 2139                                 CARP_SCLOCK(sc);
 2140                         }
 2141                 }
 2142                 sc->sc_suppress = 1;
 2143         } else {
 2144                 SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
 2145                 carp_set_state(sc, INIT);
 2146                 carp_setrun(sc, 0);
 2147                 if (sc->sc_suppress)
 2148                         carp_suppress_preempt--;
 2149                 sc->sc_suppress = 0;
 2150         }
 2151 
 2152         return;
 2153 }
 2154 
 2155 static int
 2156 carp_modevent(module_t mod, int type, void *data)
 2157 {
 2158         switch (type) {
 2159         case MOD_LOAD:
 2160                 if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
 2161                     carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
 2162                 if (if_detach_event_tag == NULL)
 2163                         return (ENOMEM);
 2164                 mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
 2165                 LIST_INIT(&carpif_list);
 2166                 if_clone_attach(&carp_cloner);
 2167                 break;
 2168 
 2169         case MOD_UNLOAD:
 2170                 EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
 2171                 if_clone_detach(&carp_cloner);
 2172                 while (!LIST_EMPTY(&carpif_list))
 2173                         carp_clone_destroy(SC2IFP(LIST_FIRST(&carpif_list)));
 2174                 mtx_destroy(&carp_mtx);
 2175                 break;
 2176 
 2177         default:
 2178                 return (EINVAL);
 2179         }
 2180 
 2181         return (0);
 2182 }
 2183 
 2184 static moduledata_t carp_mod = {
 2185         "carp",
 2186         carp_modevent,
 2187         0
 2188 };
 2189 
 2190 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);

Cache object: 97ec73ea28620b98a5bbae71a4ccde44


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.